diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e0116c1
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+
+data/gene_name_info/query_full_name.txt
+data/gene_name_info/query_ids.txt
+data/gene_name_info/query_snps.txt
+data/gene_name_info/query_symbol.txt
+results/baseline_doc/pubmed.zinc.0.15.txt
+results/baseline_doc/pubmed.zinc.1.15.txt
diff --git a/REPRODUCING.md b/REPRODUCING.md
new file mode 100644
index 0000000..3146537
--- /dev/null
+++ b/REPRODUCING.md
@@ -0,0 +1,36 @@
+This [Code Ocean](https://codeocean.com) Compute Capsule will allow you to reproduce the results published by the author on your local machine<sup>1</sup>. Follow the instructions below, or consult [our knowledge base](https://help.codeocean.com/user-manual/sharing-and-finding-published-capsules/exporting-capsules-and-reproducing-results-on-your-local-machine) for more information. Don't hesitate to reach out to [Support](mailto:support@codeocean.com) if you have any questions.
+
+<sup>1</sup> You may need access to additional hardware and/or software licenses.
+
+# Prerequisites
+
+- [Docker Community Edition (CE)](https://www.docker.com/community-edition)
+- [nvidia-container-runtime](https://docs.docker.com/config/containers/resource_constraints/#gpu) for code that leverages the GPU
+- MATLAB/MOSEK/Stata licenses where applicable
+
+# Instructions
+
+## The computational environment (Docker image)
+
+This capsule is private and its environment cannot be downloaded at this time. You will need to rebuild the environment locally.
+
+> If there's any software requiring a license that needs to be run during the build stage, you'll need to make your license available. See [our knowledge base](https://help.codeocean.com/user-manual/sharing-and-finding-published-capsules/exporting-capsules-and-reproducing-results-on-your-local-machine) for more information.
+
+In your terminal, navigate to the folder where you've extracted the capsule and execute the following command:
+```shell
+cd environment && docker build . --tag 6ef700ed-ff07-4a42-bf13-65d4165511b6; cd ..
+```
+
+> This step will recreate the environment (i.e., the Docker image) locally, fetching and installing any required dependencies in the process. If any external resources have become unavailable for any reason, the environment will fail to build.
+
+## Running the capsule to reproduce the results
+
+In your terminal, navigate to the folder where you've extracted the capsule and execute the following command, adjusting parameters as needed:
+```shell
+docker run --platform linux/amd64 --rm --gpus all \
+  --workdir /code \
+  --volume "$PWD/data":/data \
+  --volume "$PWD/code":/code \
+  --volume "$PWD/results":/results \
+  6ef700ed-ff07-4a42-bf13-65d4165511b6 bash run
+```
diff --git a/code/Extrinsic_application_CVD_prediction.py b/code/Extrinsic_application_CVD_prediction.py
new file mode 100644
index 0000000..9617b75
--- /dev/null
+++ b/code/Extrinsic_application_CVD_prediction.py
@@ -0,0 +1,128 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Aug 30 21:59:06 2022
+
+@author: Jihye Moon
+"""
+import sys
+import os
+import pathlib
+
+import pandas as pd
+import numpy as np
+ 
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import StratifiedShuffleSplit as strata
+
+import lib.ML_models as ml
+sys.path.append('lib')   
+import loading_literature_embedding as emb
+
+def data_split(X_train_index, X_test_index, X, y):
+    valid_data = int(len(X_test_index)/2) 
+    test_data = int(len(X_test_index))-valid_data 
+    
+    test = X_test_index[0:test_data]; valid = X_test_index[test_data:test_data+valid_data] 
+    
+    X_train = X[X_train_index]; X_test = X[test]; X_valid = X[valid]
+    
+    y_train = y[X_train_index]
+    y_test = y[test]
+    y_valid = y[valid]
+    
+    X_train = np.reshape(X_train, (X_train.shape[0], -1)); X_test = np.reshape(X_test, (X_test.shape[0], -1))
+    X_valid = np.reshape(X_valid, (X_valid.shape[0], -1)) 
+    y_train = np.squeeze(y_train); y_test = np.squeeze(y_test); y_valid = np.squeeze(y_valid) 
+                 
+    scaler = StandardScaler()  
+    scaler.fit(X_train)
+    X_train = scaler.transform(X_train); X_test = scaler.transform(X_test); X_valid = scaler.transform(X_valid) 
+    return X_train, X_test, X_valid, y_train, y_test, y_valid
+
+def loading_variable_embedding(data_path):
+    var_symbol = list(pd.read_csv(data_path+'/variables_symbol.csv').drop(columns='Unnamed: 0')['0'])
+    var_name = list(pd.read_csv(data_path+'/variables_preprocessed_names.csv').drop(columns='Unnamed: 0')['0'])
+    tar_symbol = list(pd.read_csv(data_path+'/target_variables_symbol.csv').drop(columns='Unnamed: 0')['0'])
+    tar_name = list(pd.read_csv(data_path+'/target_variables_preprocessed_names.csv').drop(columns='Unnamed: 0')['0'])
+    
+    variables_indexing={}; disease_variables_indexing={}
+        
+    for i in range(len(var_name)):
+        variables_indexing[var_symbol[i]] = var_name[i]
+        
+    for i in range(len(tar_name)):
+        disease_variables_indexing[tar_symbol[i]] = tar_name[i]
+             
+    additional_dictionary = {'uricosurics':'uricosuric'} 
+    # If some variable names are very unique that can't find in embedding vocabulary, 
+    # add the unique variable names here to avoid error for feature selection tasks
+    
+    embedding_list, index2variables, embedding, removal, removed_words = emb2simi.variable2embed(words_list, syn0norm, variables_indexing, additional_dictionary)
+        
+    if removal==[]:
+        print(" === NO problem for your variables") 
+        target_embedding_list, index2target, target_embedding, _, _ = emb2simi.variable2embed(words_list, syn0norm, disease_variables_indexing, additional_dictionary)
+    
+        return embedding_list, variables_indexing, disease_variables_indexing, additional_dictionary, \
+            target_embedding_list, index2target, index2variables, target_embedding, embedding
+    else:
+        print(" === Check if there are errors for your variable names")
+        return 0, 0, 0, 0, 0, 0, 0, 0, 0
+
+def CVD_Prediction_with_FS_DR(data_path, Xt, y):
+    feature_size = 128; i=0
+    split_info = strata(n_splits=5, test_size=0.2, random_state=12)
+    total_FS_Pre=[]; total_FS_prob=[]
+    total_DR_pre=[]; total_DR_prob=[]
+    embedding_list, variables_indexing, disease_variables_indexing, additional_dictionary, target_embedding_list, index2target, index2variables, target_embedding, embedding = loading_variable_embedding(data_path)
+    for X_train_index, X_test_index in split_info.split(Xt.values, y): 
+        result_dir = os.path.join(output_path +str(i)) 
+        pathlib.Path(result_dir).mkdir(parents=True, exist_ok=True)
+        X_train, X_test, X_valid, y_train, y_test, y_valid = data_split(X_train_index, X_test_index, Xt.values, y)
+        pr.save_label(y_test, 'CVD_label', result_dir) # y_test labels to evaludate CVD prediction performance for each fold
+        print("=== run Our feature selector --- our FS selected features via feature name , our FS uses same feature set for 5-fold cross validation. ")
+        embed_name = fs.Our_FS(emb2simi, str(i)+'rf_embedding_features', embedding_list, variables_indexing, disease_variables_indexing, additional_dictionary, embedding, target_embedding_list, index2target, index2variables, target_embedding, feature_size, result_dir)
+
+        print("=== run Our dimensionality reductor ")
+        A1, A2, A3 = dr.Our_DR(embedding, X_train, X_test, X_valid, feature_size)
+
+        print("=== Running with MLs with Feature Selection (Our FS)")
+        X2 = Xt[embed_name].values ### selecting only 128 variables based on our 128 features
+        valid_data = int(len(X_test_index)/2); test_data = int(len(X_test_index))-valid_data 
+        test = X_test_index[0:test_data]; valid = X_test_index[test_data:test_data+valid_data] # split test data 
+        X_train2 = X2[X_train_index]; X_test2 = X2[test]; X_valid2 = X2[valid] 
+        
+        X_train2 = np.reshape(X_train2, (X_train2.shape[0], -1)) 
+        X_test2 = np.reshape(X_test2, (X_test2.shape[0], -1))
+        X_valid2 = np.reshape(X_valid2, (X_valid2.shape[0], -1))
+        
+        scaler = StandardScaler()  
+        scaler.fit(X_train2)
+        X_train2 = scaler.transform(X_train2); X_test2 = scaler.transform(X_test2); X_valid2 = scaler.transform(X_valid2) 
+    
+        Our_FS_total_prediction, Our_FS_total_prob = pr.run_save(X_train2, y_train, X_test2, y_test, X_valid2, y_valid, 'FS.embedding', 'SMOTE', feature_size, result_dir)
+        total_FS_Pre.append(Our_FS_total_prediction); total_FS_prob.append(Our_FS_total_prob)
+        print("=== Running MLs with Dimensionality Reduction (Our DR)")
+        Our_DR_total_prediction, Our_DR_total_prob = pr.run_save(A1, y_train, A2, y_test, A3, y_valid, 'DR.embedding', 'SMOTE', feature_size, result_dir)
+        total_DR_pre.append(Our_FS_total_prediction); total_DR_prob.append(Our_FS_total_prob)
+        i+=1
+    print('all results are saved in ', output_path)
+    return total_FS_Pre, total_FS_prob, total_DR_pre, total_DR_prob
+
+data_path = '../data/Example'
+model_path = '../data/old_model'
+output_path = '../results/prediction/'
+
+fs = ml.feature_selectors()
+dr = ml.dimension_reducers()
+pr = ml.predictors()
+
+gene_name = '../data/gene_name_info/query_full_name'; gene_symb='../data/gene_name_info/query_symbol' 
+emb2simi=emb.embedding_vector()  
+
+words_list, index2word, syn0norm, _ = emb2simi.setting(model_path, gene_symb) 
+
+Xt = pd.read_csv(data_path+'/Example_X.csv').drop(columns='Unnamed: 0')
+y = pd.read_csv(data_path+'/Example_y.csv').drop(columns='Unnamed: 0').values
+
+total_FS_Pre, total_FS_prob, total_DR_pre, total_DR_prob = CVD_Prediction_with_FS_DR(data_path, Xt, y)
diff --git a/code/LICENSE b/code/LICENSE
new file mode 100644
index 0000000..08320cf
--- /dev/null
+++ b/code/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Jihye Moon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/code/README.md b/code/README.md
new file mode 100644
index 0000000..c87cead
--- /dev/null
+++ b/code/README.md
@@ -0,0 +1,266 @@
+#### **A Literature Embedding Model for Cardiovascular Disease Prediction using Risk Factors, Symptoms, and Genotype Information**
+##### Authors: Jihye Moon, Hugo F. Posada-Quintero, and *Ki. H. Chon 
+**Contact address**: ki.chon@uconn.edu (*Corresponding author), jihye.moon@uconn.edu (Q&A for code).
+(Accepted by Expert System with Application at August 24, 2022)
+
+### Contents
+
+This capsule aims to provide the implementation of 1) **Literature data collection and preprocessing**, and 2) **Literature embedding model training and evaluation**. The pre-trained Literature embedding model aims to identify CVD risk factors and associated information for a given input query (i.e., stroke). Also, since our literature embedding model contains representations for CVD-related words, our literature model can work as **Feature Selection (FS) and Dimensionality Reduction (DR) models on cohort data** for CVD prediction/classification tasks (extrinsic method). We used MESA cohort data consisting of 6,814 subjects and 564 variables in our manuscript. Since our cohort data requires permission, users are required to prepare their cohort data to use the literature embedding model as FS or DR tasks. The cohort data are required to have variables per subject and the variables' name. This capsule's guideline also provides a pipeline for the FS and DR for input cohort data.
+
+1. [Introduction](#introduction) 
+2. [Code Implementations and Guidelines](#gudelines)
+    0. [DEMO](#demo)
+        1. [DEMO A) CVD risk factors, genes, and associated information identifications](#demo1")
+        2. [DEMO B) All steps to build a literature embedding model (data collection ~ model training](#demo2)
+        3. [Reproduction DEMO](#default_demo) 
+    1. [Literature data collection](#collection)
+    2. [Literature data preprocessing](#preprocessing)
+    3. [Literature embedding model training](#training)
+    4. [Literature embedding model evaluation](#evaluation)
+    5. [FS and DR applications on cohort data](#applications) 
+3. [Results](#results)
+4. [GitHub Source](#github)
+
+### 1. Introduction <a name="introduction"></a>
+Accurate prediction of cardiovascular disease (CVD) requires multifaceted information consisting of not only a patient’s medical history, but genomic data, symptoms, lifestyle, and risk factors which are often not incorporated into a decision-making process as the data are vast, difficult to obtain, and require complex algorithms. **Estimating CVD risk factors is now a significant goal for more accurate CVD prediction and treatment**. 
+##### Previous work's limitation
+CVD risk factors can be identified from phenotype variables, genetic arrays, text, and image data. Several approaches have been introduced to identify CVD risk factors that are categorized as: (1) cohort-based CVD risk factor identification, and (2) literature-based CVD risk factor identification and information management. Category (1) enables objective validation of the identified risk factors using CVD patient data, but the number of available features is limited, which may limit the identification of new CVD risk factors. Category (2) enables the management of significant risk factors using publicly-available literature data, however, most methods were not validated using CVD patient data. Hence, **it is critical to developing a novel method to collect information on the risk factors, associated symptoms, and mechanisms, but it needs to be objectively validated using CVD patients to be relevant for better clinical diagnosis and treatment management.**
+##### Our proposed work
+In our paper, **we proposed a literature embedding model that trained using literature data freely accessible online.** Our model enables the retrieval of CVD risk factors, associated information, and genes independently from population-based data. Even though our literature model was trained using literature, our model enables selecting accurate CVD-related features from the population-based cohort data as FS and DR models, which involves better CVD prediction. 
+
+### 2. Code implementation and guidelines <a name="gudelines"></a>
+This section provides descriptions for [0. Demo](#demo) and details for [1. Literature data collection](#collection), [2. Literature data preprocessing](#preprocessing), [3. Literature embedding model training](#training), [4. Literature embedding model evaluation](#evaluation), and [5. FS and DR applications on cohort data](#applications). 
+DEMO shows an overall for our codes, and the other five subsections show the details of the codes for each purpose.
+
+We prepared five main codes for each goal: 
+ 1) step1_data_collection.py, 
+ 2) step2_data_preprocessing.py, 
+ 3) step3_literature_embedding_training.py, 
+ 4) step4_CVD_risk_factor_identification.py, 
+ 5) Extrinsic_application_CVD_prediction.py. 
+ 
+We feed different inputs to each main code for each purpose. Details are described below.
+
+#### 2.0. DEMO <a name="demo"></a>
+
+We prepared three DEMOs:
+ 1) **DEMO A**: It provides **CVD risk factors, genes, and associated information identifications** using a pre-trained literature model.
+ 2) **DEMO B**: It provides all steps for **literature data collection**, **literature data preprocessing**, and **literature embedding model training and intrinsic evaluation (CVD risk factor identifications)**
+ 3) **Reproduction DEMO**: It shows DEMO A's results and provides **literature embedding model training and evaluation steps**. 
+ 
+In the CodeOcean platform, the DEMO A is the default.
+
+##### 2.0.1. DEMO A) CVD risk factors, genes, and associated information identifications <a name="demo1"></a>
+To run DEMO A, run the following command: 
+~~~~ {.sourceCode .shell}
+./run.sh 'demo_a'
+~~~~
+
+The command imports our pre-trained literature embedding model at EMBEDDING_PATH='../data/old_model' and captures CVD risk factors and associated information for three queries ('stroke', 'atrial fibrillation, 'ventricular fibrillation').
+The input query-related risk factors, associated information, and gene names will be displayed and saved in STEP4_OUTPUT_PATH='../results/demo_a'. 
+
+##### 2.0.2. DEMO B) All steps to build a literature embedding model (data collection ~ model training) <a name="demo2"></a>
+To run Demo-b, run the following command at **your local computer**: 
+~~~~ {.sourceCode .shell}
+./run.sh 'demo_b'
+~~~~
+Demo b) provides all steps for literature data collection &  preprocessing, literature embedding model training & evaluation for CVD risk factor identifications. This DEMO B provides a limited number of collected literature data. When users want to get all data, users are required to set NUM_WORD_BASED_DATA=0 and NUM_GENE_BASED_DATA=0.
+
+~~~~ {.sourceCode .shell}
+./run.sh 'demo_b' 
+  echo 'demo b -- '
+  QUERY_WORD='zinc' ## you can define query word to collect literature data
+  NUM_WORD_BASED_DATA=500000 #if NUM_WORD_BASED_DATA=0, it collects all possible gene-related literature
+  NUM_GENE_BASED_DATA=100 #if NUM_GENE_BASED_DATA=0, it collects all possible gene-related literature
+  BASE_PATH='../results/'
+  DATA_COLLECTION_PATH='../results/demo_b'
+  PREPROCESSEING_PATH='../results/demo_b'
+  EMBEDDING_NAME='pre_trained_demo' 
+  EMBEDDING_PATH='../results/pre_trained_demo'
+  EPOCH=2
+  STEP4_OUTPUT_PATH='../results/CVD_searches'
+
+  python -u step1_data_collection.py $QUERY_WORD $NUM_WORD_BASED_DATA $NUM_GENE_BASED_DATA $DATA_COLLECTION_PATH
+  python -u step2_data_preprocessing.py $DATA_COLLECTION_PATH $PREPROCESSEING_PATH
+  python -u step3_literature_embedding_training.py $PREPROCESSEING_PATH $EPOCH $EMBEDDING_NAME
+  python -u step4_CVD_risk_factor_identification.py $EMBEDDING_NAME $STEP4_OUTPUT_PATH
+~~~~
+
+This DEMO B generates the collected literature data, the pre-processed literature data, and the trained literature embedding model at './results'.
+
+##### 2.0.3. Reproduction DEMO <a name="default_demo"></a>
+
+The reproduction DEMO is operated by the following command:
+~~~~ {.sourceCode .shell}
+./run.sh 
+
+or
+
+./run.sh 'demo_r'
+~~~~
+
+This reproduction DEMO shows 1) CVD risk factor identifications using our paper's pre-trained literature model and 2) all steps for a literature model training process and risk factor searches using the newly pre-trained model.  
+We prepared a collected literature data set at PREPROCESSEING_PATH='../data/old_preprocessed_data' for 2).
+
+#### 2.1. Literature data collection <a name="collection"></a>
+This subsection explains details for step1_data_collection.py. The code recieves four input:
+
+~~~~ {.sourceCode .shell}
+  QUERY_WORD='zinc'
+  NUM_WORD_BASED_DATA=0
+  NUM_GENE_BASED_DATA=0 
+  DATA_COLLECTION_PATH='../results/$USER_DIFINED'
+
+  python -u step1_data_collection.py $QUERY_WORD $NUM_WORD_BASED_DATA $NUM_GENE_BASED_DATA $DATA_COLLECTION_PATH
+~~~~
+
+In our manuscript, we collected 16k published literature from PubMed using search keywords consisting of a word (“heart”) and human gene names, then trained a literature embedding model using the collected abstracts. Below table 1. shows an example of collected abstracts by this code.
+
+*Table 1. An example of collected abstracts*
+|<div style="width:100px">Document type</div>|<div style="width:100px">Keyword</div>|<div style="width:100px">Example</div>|
+|:---|:---|:---|
+|Keyword-based Literature from PubMed|Heart|Waist-to-hip ratio (WHR) is a strong predictor of mortality in patients with **heart** failure (HF). Left ventricular diastolic filling function has predictable maturational progression, with significant differences in the intraventricular pressure difference between infants from birth to 2 years. | 
+|Gene Name-based Literature from PubMed|HMGA1|**HMGA1** has been shown to regulate genes involved with systemic inflammatory processes. We hypothesized that **HMGA1** is important in the function of mesenchymal stromal cells, which are known to modulate inflammatory responses due to sepsis.| 
+
+We can change a number of collectible documents: 
+~~~
+If NUM_WORD_BASED_DATA==0: 
+  It collects all documents for $QUERY_WORD.
+elif NUM_WORD_BASED_DATA==100000:
+  It collects 10,0000 documents for $QUERY_WORD.
+
+If NUM_GENE_BASED_DATA==0: 
+  It collects all documents for $QUERY_WORD.
+elif NUM_GENE_BASED_DATA==10:
+  It collects documents for 10*NUM_GENE_BASED_DATA gene names.
+~~~
+
+#### 2.2. Literature data preprocessing <a name="preprocessing"></a>
+This subsection explains details for step2_data_preprocessing.py. The code recieves two inputs:
+~~~
+  DATA_COLLECTION_PATH='../results/$USER_DIFINED'
+  PREPROCESSEING_PATH='../results/$USER_DIFINED'
+
+  python -u step2_data_preprocessing.py $DATA_COLLECTION_PATH $PREPROCESSEING_PATH
+~~~
+
+*Table 2. An example of text preprocessing*
+|<div style="width:100px">Document</div>|<div style="width:100px">Gene Name</div>| Sentence                                                                                                                           |
+|:---|:---|:---|
+| Original      | HMGA1     | Mesenchymal stromal cells expressing a dominant-negative high mobility group A1 transgene exhibit improved function during sepsis. |
+| Pre-processed | #HMGA1    | mesenchymal stromal cells expressing dominant-negative high mobility group a# transgene exhibit improved function sepsis           |
+
+
+#### 2.3. Literature embedding model training <a name="training"></a>
+This subsection explains details for step3_literature_embedding_training.py. The code recieves three inputs:
+~~~
+ EMBEDDING_PATH='../results/$MODEL_PATH'
+ EPOCH=2 # setting the number of ecoch for literature embedding model
+
+ python -u step3_literature_embedding_training.py $PREPROCESSEING_PATH $EPOCH $EMBEDDING_PATH
+ ~~~
+EMBEDDING_PATH is embedding model path and EPOCH is the number of epoch. EPOCH=10 is recommanded.
+Our literature embedding model trains literature representations by the following three steps. To train 'heart'-related literature, the model trains a basic skip-gram structure as shown in Fig.1(a). To train gene-name-related literature, the model uses Fig.(b) and (c) structures.
+
+Fig. 1. Skip-gram structure of Word2vec
+| (a) step 1| (b) step 2| (c) step 3|
+| :---        |    :---   |          :--- |
+| ![image](read_me_images/model1_re.jpg)|![image](read_me_images/model2_re.jpg)|![image](read_me_images/model3_re.jpg)|
+|Skip-gram structure to predict context words using a center word in the same document|Our proposed structure (1) to predict captured document's word contexts with gene name that used as search query |Our proposed structure (2) to predict gene-name-associated words in captured document using gene name|
+
+Users can set hyper-parameters in step3_literature_embedding_training.py:
+~~~~ {step3_literature_embedding_training.py}
+ window_size = 2 # The number of contexts per center word for literature model training. Details are in our manuscript.
+ min_count = 5 # Words with Appreacnce frequency in the document is fewer than min_count = 5, 
+ min_size = 2 # Words that have character size <= min_size = 3 will be excluded.
+ dimension = 128 # Embedding model's dimension
+ num_sampled = 16 # Negative sampling parameter
+ batch_size = 256 #
+~~~~
+
+#### 2.4. Literature embedding model evaluation (CVD risk factor searches) <a name="evaluation"></a>
+This subsection explains details for step4_CVD_risk_factor_identification.py. The code recieves two inputs:
+
+~~~~ {.sourceCode .shell}
+ EMBEDDING_PATH='../results/$MODEL_PATH'
+ STEP4_OUTPUT_PATH='../results/$SEARCH_PATH'
+ python -u step4_CVD_risk_factor_identification.py $EMBEDDING_PATH $STEP4_OUTPUT_PATH
+~~~~
+
+Users can put their query in step4_CVD_risk_factor_identification.py like below:
+
+~~~~ {.sourceCode .python}
+queries = ['stroke', 'atrial fibrillation', 'ventricular fibrillation'] #put your own query in []
+~~~~
+
+#### 2.5. FS and DR applications on cohort data <a name="applications"></a>
+This subsection explains details for Extrinsic_application_CVD_prediction.py. The code has three inputs in Extrinsic_application_CVD_prediction.py:
+ * data_path = '../data/Example'
+ * model_path = '../data/old_model'
+ * output_path = '../results/prediction/'
+
+Users are required to prepare cohort data, pre-trained embedding model paths, and output paths. After users run Extrinsic_application_CVD_prediction.py with the user's cohort data, users will get prediction results and label after our FS and DR processes for each K-fold at output_path. Then evaluate CVD prediction performance using performance_metrics.metric(label, prediction_results) in lib/performance_metrics.py. All other ML methods-FS, DT, H2FS, PCA, and UMAP are in lib/ML_models.py file.
+
+##### Cohort data format ######
+
+Users are required to prepare cohort data with variable names. To show the format of input data, we generated example data Example_X (variables per subject) and Example_y (CVD labels per subject) using lib/ExpCohort_Generator.py. Details are in the lib/ExpCohort_Generator.py file.
+
+The **format** of input cohort data (Example_X) have should be like below:
+
+*Table 3. The data format example generated by ExpCohort_Generator.py (variable)*
+| Subject | bca      | nit      | fhha     | sbld     | pulrate  |
+|-----|----------|----------|----------|----------|----------|
+| 0   | 0.296735 | 0.292552 | 0.074269 | 0.886255 | 0.235104 |
+| 1   | 0.699152 | 0.626459 | 0.917815 | 0.988134 | 0.167721 |
+| 2   | 0.484408 | 0.327285 | 0.351393 | 0.946728 | 0.366808 |
+| 3   | 0.970385 | 0.811354 | 0.068369 | 0.246754 | 0.198345 | 
+| ..  | ...      | ...      | ...      | ...      | ...      |
+| N | 0.905146 | 0.855485 | 0.657306 | 0.385825 | 0.957396 | 
+ 
+The **format** of CVD label per subject (Example_y) should be like below:
+
+*Table 4. The cohort data format example generated by ExpCohort_Generator.py (label)*
+|<div style="width:70px">Subject</div>|<div style="width:60px">CVD (Yes=1, no=0)</div>|
+|:---|:---|
+| 1  | 0  |
+| 2 | 1 |
+| 3   | 1   |
+| ...    | ...   |
+| N  | 0  |
+
+### 3. Results <a name="results"></a>
+
+In our manuscript, we used three queries ('stroke', 'atrial fibrillation, 'ventricular fibrillation') for CVD risk factor identifications. We analyzed whether or not the captured words and genes were correctly identified as risk factors and associated symptoms for the input query words. Our model accurately (average accuracy of >96%) captured associated risk factors, symptoms, and genes for a given input query word. Details are described in our published manuscript. 
+
+We also used our embedding model as FS and DR tasks on cohort data for CVD prediction. Our FS and DR method provides better performance with the fastest computation time when compared with other popular FS and DR methods - Random Forest, Decision Tree, H2FS, UMAP, and PCA. 
+
+Our model has the potential to facilitate easier collation of multifaceted information for better data mining of vast publicly available data so that efficient and accurate risk factors and symptoms can be identified, which helps better-informed decisions for CVD prediction and treatment. 
+
+### 4. GitHub Source <a name="github"></a>
+-------------
+
+This project is also hosted on GitHub ([link](https://github.com/JihyeMooon/CVD_literature_embedding)) and is actively developed.
+
+### Error note
+In the literature data collection process, some errors can happen due to network connecions.
+ 
+if you get errors at 25/33 point from 'collecting_doc_using_word_based_query' like below:
+~~~
+        25 / 33 
+        Going to download records from 1250001 to 1260000 
+        Going to download records from 1260001 to 1270000
+         
+        raise HTTPError(req.full_url, code, msg, hdrs, fp)
+        or IncompleteRead: IncompleteRead(20458171 bytes read)
+~~~ 
+Then run collecting_doc_using_word_based_query agian, with 'w2d_starting_point = 25'  
+
+If you have problems from 'collecting_doc_using_gene_based_query' like below:
+~~~
+    Example: if we get error at 5 / 2634
+~~~
+Then run collecting_doc_using_gene_based_query agian, with 'g2d_starting_point = 5'  
+
+
+
+ 
\ No newline at end of file
diff --git a/code/gene_extraction.py b/code/gene_extraction.py
new file mode 100644
index 0000000..c90b3b9
--- /dev/null
+++ b/code/gene_extraction.py
@@ -0,0 +1,134 @@
+from Bio import Entrez
+from Bio import SeqIO
+import time
+from urllib.error import HTTPError
+from http.client import IncompleteRead
+
+# Set your email address for Entrez
+Entrez.email = "lrmercadod@gmail.com"
+Entrez.api_key = "f095f0c0aad9480d90ee0b869acb43670d08"
+
+# Search for human genes in the Nucleotide database
+handle = Entrez.esearch(db="gene", term="Homo sapiens[Organism]", retmax=10000000)
+human_record = Entrez.read(handle)
+handle.close()
+
+# Search for human ZIP11 gene
+handle = Entrez.esearch(db="gene", term="ZIP11 AND Homo sapiens[Organism]", retmax=10000000)
+human_zip11_record = Entrez.read(handle)
+handle.close()
+
+# Search for mouse ZIP11 gene
+handle = Entrez.esearch(db="gene", term="ZIP11 AND Mus musculus[Organism]", retmax=10000000)
+mouse_zip11_record = Entrez.read(handle)
+handle.close()
+
+# Get the list of gene IDs
+human_gene_ids = human_record["IdList"]
+human_zip11_ids = human_zip11_record["IdList"]
+mouse_zip11_ids = mouse_zip11_record["IdList"]
+
+# Combine all gene IDs
+gene_ids = human_gene_ids + human_zip11_ids + mouse_zip11_ids
+
+# Open the output files
+symbol_file = open("query_symbol.txt", "a", encoding="utf-8")  # Append mode
+id_file = open("query_ids.txt", "a", encoding="utf-8")  # Append mode
+full_name_file = open("query_full_name.txt", "a", encoding="utf-8")  # Append mode
+snp_file = open("query_snps.txt", "a", encoding="utf-8")  # Append mode
+error_file = open("error_log.txt", "a", encoding="utf-8")  # Append mode for error logging
+
+max_retries = 5
+retry_delay = 2
+batch_size = 500
+batch_delay = 2
+
+# Load the last processed batch from the checkpoint file
+checkpoint_file = "checkpoint.txt"
+try:
+    with open(checkpoint_file, "r") as file:
+        last_processed_batch = int(file.read())
+except FileNotFoundError:
+    last_processed_batch = 0
+
+# Iterate over the gene IDs in batches and fetch the gene information
+for i in range(last_processed_batch * batch_size, len(gene_ids), batch_size):
+    batch_ids = gene_ids[i:i+batch_size]
+    
+    for gene_id in batch_ids:
+        retries = 0
+        while retries < max_retries:
+            try:
+                handle = Entrez.efetch(db="gene", id=gene_id, retmode="xml")
+                gene_record = Entrez.read(handle)
+                handle.close()
+                break
+            except (HTTPError, IncompleteRead) as e:
+                print(f"Error: {str(e)}. Retrying...")
+                retries += 1
+                time.sleep(retry_delay)
+        else:
+            print(f"Failed to fetch gene information for gene ID: {gene_id}")
+            continue
+        
+        # Extract the relevant information
+        if "Entrezgene_gene" in gene_record[0] and "Gene-ref" in gene_record[0]["Entrezgene_gene"]:
+            gene_ref = gene_record[0]["Entrezgene_gene"]["Gene-ref"]
+            gene_symbol = gene_ref.get("Gene-ref_locus", "")
+            gene_full_name = gene_ref.get("Gene-ref_desc", "")
+        else:
+            gene_symbol = ""
+            gene_full_name = ""
+        
+        # Retrieve SNP information for the gene
+        retries = 0
+        while retries < max_retries:
+            try:
+                handle = Entrez.elink(dbfrom="gene", db="snp", id=gene_id)
+                snp_record = Entrez.read(handle)
+                handle.close()
+
+                if snp_record[0]["LinkSetDb"]:
+                    snp_ids = [link["Id"] for link in snp_record[0]["LinkSetDb"][0]["Link"]]
+                    for snp_id in snp_ids:
+                        try:
+                            snp_file.write(str(snp_id) + "\n")
+                        except OSError as e:
+                            error_file.write(f"Error writing SNP ID {snp_id} for gene ID {gene_id}: {str(e)}\n")
+                else:
+                    try:
+                        snp_file.write("N/A\n")
+                    except OSError as e:
+                        error_file.write(f"Error writing 'N/A' to snp_file for gene ID {gene_id}: {str(e)}\n")
+                break
+            except (IndexError, RuntimeError, IncompleteRead) as e:
+                print(f"Error retrieving SNP information for gene ID: {gene_id}. Retrying...")
+                retries += 1
+                time.sleep(retry_delay)
+        else:
+            print(f"Failed to retrieve SNP information for gene ID: {gene_id}")
+            try:
+                snp_file.write("N/A\n")
+            except OSError as e:
+                error_file.write(f"Error writing 'N/A' to snp_file for gene ID {gene_id}: {str(e)}\n")
+        
+        # Write the information to the respective files
+        symbol_file.write(gene_symbol + "\n")
+        id_file.write(gene_id + "\n")
+        full_name_file.write(gene_full_name + "\n")
+    
+    # Update the checkpoint file with the last processed batch
+    with open(checkpoint_file, "w") as file:
+        file.write(str(i // batch_size))
+    
+    print(f"Processed batch {i//batch_size + 1} of {len(gene_ids)//batch_size + 1}")
+    time.sleep(batch_delay)
+
+# Close the output files
+symbol_file.close()
+id_file.close()
+full_name_file.close()
+snp_file.close()
+error_file.close()
+
+print("Gene extraction completed.")
\ No newline at end of file
diff --git a/code/lib/Bio/Affy/CelFile.py b/code/lib/Bio/Affy/CelFile.py
new file mode 100644
index 0000000..ee95b0d
--- /dev/null
+++ b/code/lib/Bio/Affy/CelFile.py
@@ -0,0 +1,502 @@
+# Copyright 2004 by Harry Zuzan. All rights reserved.
+# Copyright 2016 by Adam Kurkiewicz. All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Reading information from Affymetrix CEL files version 3 and 4."""
+
+
+import struct
+
+try:
+    import numpy
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Install NumPy if you want to use Bio.Affy.CelFile"
+    ) from None
+
+
+class ParserError(ValueError):
+    """Affymetrix parser error."""
+
+    def __init__(self, *args):
+        """Initialise class."""
+        super().__init__(*args)
+
+
+class Record:
+    """Stores the information in a cel file.
+
+    Example usage:
+
+    >>> from Bio.Affy import CelFile
+    >>> with open("Affy/affy_v3_example.CEL") as handle:
+    ...     c = CelFile.read(handle)
+    ...
+    >>> print(c.ncols, c.nrows)
+    5 5
+    >>> print(c.intensities)
+    [[   234.    170.  22177.    164.  22104.]
+     [   188.    188.  21871.    168.  21883.]
+     [   188.    193.  21455.    198.  21300.]
+     [   188.    182.  21438.    188.  20945.]
+     [   193.  20370.    174.  20605.    168.]]
+    >>> print(c.stdevs)
+    [[   24.     34.5  2669.     19.7  3661.2]
+     [   29.8    29.8  2795.9    67.9  2792.4]
+     [   29.8    88.7  2976.5    62.   2914.5]
+     [   29.8    76.2  2759.5    49.2  2762. ]
+     [   38.8  2611.8    26.6  2810.7    24.1]]
+    >>> print(c.npix)
+    [[25 25 25 25 25]
+     [25 25 25 25 25]
+     [25 25 25 25 25]
+     [25 25 25 25 25]
+     [25 25 25 25 25]]
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.version = None
+        self.GridCornerUL = None
+        self.GridCornerUR = None
+        self.GridCornerLR = None
+        self.GridCornerLL = None
+        self.DatHeader = None
+        self.Algorithm = None
+        self.AlgorithmParameters = None
+        self.NumberCells = None
+        self.intensities = None
+        self.stdevs = None
+        self.npix = None
+        self.nrows = None
+        self.ncols = None
+        self.nmask = None
+        self.mask = None
+        self.noutliers = None
+        self.outliers = None
+        self.modified = None
+
+
+def read(handle, version=None):
+    """Read Affymetrix CEL file and return Record object.
+
+    CEL files format versions 3 and 4 are supported.
+    Please specify the CEL file format as 3 or 4 if known for the version
+    argument. If the version number is not specified, the parser will attempt
+    to detect the version from the file contents.
+
+    The Record object returned by this function stores the intensities from
+    the CEL file in record.intensities.
+    Currently, record.mask and record.outliers are not set in when parsing
+    version 4 CEL files.
+
+    Example Usage:
+
+    >>> from Bio.Affy import CelFile
+    >>> with open("Affy/affy_v3_example.CEL") as handle:
+    ...     record = CelFile.read(handle)
+    ...
+    >>> record.version == 3
+    True
+    >>> print("%i by %i array" % record.intensities.shape)
+    5 by 5 array
+
+    >>> with open("Affy/affy_v4_example.CEL", "rb") as handle:
+    ...     record = CelFile.read(handle, version=4)
+    ...
+    >>> record.version == 4
+    True
+    >>> print("%i by %i array" % record.intensities.shape)
+    5 by 5 array
+
+    """
+    try:
+        data = handle.read(0)
+    except AttributeError:
+        raise ValueError("handle should be a file handle") from None
+    data = handle.read(4)
+    if not data:
+        raise ValueError("Empty file.")
+    if data == b"[CEL":
+        raise ValueError("CEL file in version 3 format should be opened in text mode")
+    if data == "[CEL":
+        # Version 3 format. Continue to read the header here before passing
+        # control to _read_v3 to avoid having to seek to the beginning of
+        # the file.
+        data += next(handle)
+        if data.strip() != "[CEL]":
+            raise ValueError("Failed to parse Affy Version 3 CEL file.")
+        line = next(handle)
+        keyword, value = line.split("=", 1)
+        if keyword != "Version":
+            raise ValueError("Failed to parse Affy Version 3 CEL file.")
+        version = int(value)
+        if version != 3:
+            raise ValueError("Incorrect version number in Affy Version 3 CEL file.")
+        return _read_v3(handle)
+    try:
+        magicNumber = struct.unpack("<i", data)
+    except TypeError:
+        raise ValueError(
+            "CEL file in version 4 format should be opened in binary mode"
+        ) from None
+    except struct.error:
+        raise ValueError(
+            "Failed to read magic number from Affy Version 4 CEL file"
+        ) from None
+    if magicNumber != (64,):
+        raise ValueError("Incorrect magic number in Affy Version 4 CEL file")
+    return _read_v4(handle)
+
+
+def _read_v4(f):
+    # We follow the documentation here:
+    # http://www.affymetrix.com/estore/support/developer/powertools/changelog/gcos-agcc/cel.html.affx
+    record = Record()
+    preHeaders = ["version", "columns", "rows", "cellNo", "headerLen"]
+    preHeadersMap = {}
+    headersMap = {}
+
+    # Load pre-headers. The magic number was already parsed in the read
+    # function calling _read_v4.
+    preHeadersMap["magic"] = 64
+    try:
+        for name in preHeaders:
+            preHeadersMap[name] = struct.unpack("<i", f.read(4))[0]
+    except struct.error:
+        raise ParserError("Failed to parse CEL version 4 file") from None
+
+    char = f.read(preHeadersMap["headerLen"])
+    header = char.decode("ascii", "ignore")
+    for header in header.split("\n"):
+        if "=" in header:
+            header = header.split("=")
+            headersMap[header[0]] = "=".join(header[1:])
+
+    record.version = preHeadersMap["version"]
+    if record.version != 4:
+        raise ParserError("Incorrect version number in CEL version 4 file")
+
+    record.GridCornerUL = headersMap["GridCornerUL"]
+    record.GridCornerUR = headersMap["GridCornerUR"]
+    record.GridCornerLR = headersMap["GridCornerLR"]
+    record.GridCornerLL = headersMap["GridCornerLL"]
+    record.DatHeader = headersMap["DatHeader"]
+    record.Algorithm = headersMap["Algorithm"]
+    record.AlgorithmParameters = headersMap["AlgorithmParameters"]
+    record.NumberCells = preHeadersMap["cellNo"]
+    # record.intensities are set below
+    # record.stdevs are set below
+    # record.npix are set below
+    record.nrows = int(headersMap["Rows"])
+    record.ncols = int(headersMap["Cols"])
+
+    # These cannot be reliably set in v4, because of discrepancies between real
+    # data and the documented format.
+    record.nmask = None
+    record.mask = None
+    record.noutliers = None
+    record.outliers = None
+    record.modified = None
+
+    # Real data never seems to have anything but zeros here, but we don't want
+    # to take chances. Raising an error is better than returning unreliable
+    # data.
+    def raiseBadHeader(field, expected):
+        actual = int(headersMap[field])
+        message = f"The header {field} is expected to be 0, not {actual}"
+        if actual != expected:
+            raise ParserError(message)
+
+    raiseBadHeader("Axis-invertX", 0)
+
+    raiseBadHeader("AxisInvertY", 0)
+
+    raiseBadHeader("OffsetX", 0)
+
+    raiseBadHeader("OffsetY", 0)
+
+    # This is unfortunately undocumented, but it turns out that real data has
+    # the record.AlgorithmParameters repeated in the data section, until an
+    # EOF, i.e. b"\x04".
+    char = b"\x00"
+    safetyValve = 10 ** 4
+    for i in range(safetyValve):
+        char = f.read(1)
+        # For debugging
+        # print([i for i in char], end="")
+        if char == b"\x04":
+            break
+        if i == safetyValve:
+            raise ParserError(
+                "Parse Error. The parser expects a short, "
+                "undocumented binary blob terminating with "
+                "ASCII EOF, x04"
+            )
+
+    # After that there are precisely 15 bytes padded. Again, undocumented.
+    padding = f.read(15)
+
+    # That's how we pull out the values (triplets of the form float, float,
+    # signed short).
+    structa = struct.Struct("< f f h")
+
+    # There are 10 bytes in our struct.
+    structSize = 10
+
+    # We initialize the most important: intensities, stdevs and npixs.
+    record.intensities = numpy.empty(record.NumberCells, dtype=float)
+    record.stdevs = numpy.empty(record.NumberCells, dtype=float)
+    record.npix = numpy.empty(record.NumberCells, dtype=int)
+
+    b = f.read(structSize * record.NumberCells)
+    for i in range(record.NumberCells):
+        binaryFragment = b[i * structSize : (i + 1) * structSize]
+        intensity, stdevs, npix = structa.unpack(binaryFragment)
+        record.intensities[i] = intensity
+        record.stdevs[i] = stdevs
+        record.npix[i] = npix
+
+    # reshape without copying.
+    def reshape(array):
+        view = array.view()
+        view.shape = (record.nrows, record.ncols)
+        return view
+
+    record.intensities = reshape(record.intensities)
+    record.stdevs = reshape(record.stdevs)
+    record.npix = reshape(record.npix)
+
+    return record
+
+
+def _read_v3(handle):
+    # Needs error handling.
+    # Needs to know the chip design.
+    record = Record()
+    # The version number was already obtained when the read function calling
+    # _read_v3 parsed the CEL section.
+    record.version = 3
+    section = ""
+    for line in handle:
+        line = line.rstrip("\r\n")
+        if not line:
+            continue
+        # Set current section
+        if line.startswith("[HEADER]"):
+            section = "HEADER"
+        elif line.startswith("[INTENSITY]"):
+            section = "INTENSITY"
+            record.intensities = numpy.zeros((record.nrows, record.ncols))
+            record.stdevs = numpy.zeros((record.nrows, record.ncols))
+            record.npix = numpy.zeros((record.nrows, record.ncols), int)
+        elif line.startswith("[MASKS]"):
+            section = "MASKS"
+            record.mask = numpy.zeros((record.nrows, record.ncols), bool)
+        elif line.startswith("[OUTLIERS]"):
+            section = "OUTLIERS"
+            record.outliers = numpy.zeros((record.nrows, record.ncols), bool)
+        elif line.startswith("[MODIFIED]"):
+            section = "MODIFIED"
+            record.modified = numpy.zeros((record.nrows, record.ncols))
+        elif line.startswith("["):
+            raise ParserError("Unknown section found in version 3 CEL file")
+        else:  # read the data in a section
+            if section == "HEADER":
+                # Set record.ncols and record.nrows, remaining data goes into
+                # record.header dict
+                key, value = line.split("=", 1)
+                if key == "Cols":
+                    record.ncols = int(value)
+                elif key == "Rows":
+                    record.nrows = int(value)
+                elif key == "GridCornerUL":
+                    x, y = value.split()
+                    record.GridCornerUL = (int(x), int(y))
+                elif key == "GridCornerUR":
+                    x, y = value.split()
+                    record.GridCornerUR = (int(x), int(y))
+                elif key == "GridCornerLR":
+                    x, y = value.split()
+                    record.GridCornerLR = (int(x), int(y))
+                elif key == "GridCornerLL":
+                    x, y = value.split()
+                    record.GridCornerLL = (int(x), int(y))
+                elif key == "DatHeader":
+                    # not sure if all parameters here are interpreted correctly
+                    record.DatHeader = {}
+                    index = line.find(":")
+                    _, filename = line[:index].split()
+                    record.DatHeader["filename"] = filename
+                    index += 1
+                    field = line[index : index + 9]
+                    assert field[:4] == "CLS="
+                    assert field[8] == " "
+                    record.DatHeader["CLS"] = int(field[4:8])
+                    index += 9
+                    field = line[index : index + 9]
+                    assert field[:4] == "RWS="
+                    assert field[8] == " "
+                    record.DatHeader["RWS"] = int(field[4:8])
+                    index += 9
+                    field = line[index : index + 7]
+                    assert field[:4] == "XIN="
+                    assert field[6] == " "
+                    record.DatHeader["XIN"] = int(field[4:6])
+                    index += 7
+                    field = line[index : index + 7]
+                    assert field[:4] == "YIN="
+                    assert field[6] == " "
+                    record.DatHeader["YIN"] = int(field[4:6])
+                    index += 7
+                    field = line[index : index + 6]
+                    assert field[:3] == "VE="
+                    assert field[5] == " "
+                    record.DatHeader["VE"] = int(field[3:5])
+                    index += 6
+                    field = line[index : index + 7]
+                    assert field[6] == " "
+                    temperature = field[:6].strip()
+                    if temperature:
+                        record.DatHeader["temperature"] = int(temperature)
+                    else:
+                        record.DatHeader["temperature"] = None
+                    index += 7
+                    field = line[index : index + 4]
+                    assert field.endswith(" ")
+                    record.DatHeader["laser-power"] = float(field)
+                    index += 4
+                    field = line[index : index + 18]
+                    assert field[8] == " "
+                    record.DatHeader["scan-date"] = field[:8]
+                    assert field[17] == " "
+                    record.DatHeader["scan-time"] = field[9:17]
+                    index += 18
+                    field = line[index:]
+                    subfields = field.split(" \x14 ")
+                    assert len(subfields) == 12
+                    subfield = subfields[0]
+                    try:
+                        scanner_id, scanner_type = subfield.split()
+                    except ValueError:
+                        scanner_id = subfield.strip()
+                    record.DatHeader["scanner-id"] = scanner_id
+                    record.DatHeader["scanner-type"] = scanner_type
+                    record.DatHeader["array-type"] = subfields[2]
+                    record.DatHeader["image-orientation"] = int(subfields[11])
+                elif key == "Algorithm":
+                    record.Algorithm = value
+                elif key == "AlgorithmParameters":
+                    parameters = value.split(";")
+                    values = {}
+                    for parameter in parameters:
+                        key, value = parameter.split(":", 1)
+                        if key in (
+                            "Percentile",
+                            "CellMargin",
+                            "FullFeatureWidth",
+                            "FullFeatureHeight",
+                            "PoolWidthExtenstion",
+                            "PoolHeightExtension",
+                        ):
+                            values[key] = int(value)
+                        elif key in ("OutlierHigh", "OutlierLow", "StdMult"):
+                            values[key] = float(value)
+                        elif key in (
+                            "FixedCellSize",
+                            "IgnoreOutliersInShiftRows",
+                            "FeatureExtraction",
+                            "UseSubgrids",
+                            "RandomizePixels",
+                        ):
+                            if value == "TRUE":
+                                value = True
+                            elif value == "FALSE":
+                                value = False
+                            else:
+                                raise ValueError("Unexpected boolean value")
+                            values[key] = value
+                        elif key in ("AlgVersion", "ErrorBasis"):
+                            values[key] = value
+                        else:
+                            raise ValueError("Unexpected tag in AlgorithmParameters")
+                    record.AlgorithmParameters = values
+            elif section == "INTENSITY":
+                if line.startswith("NumberCells="):
+                    key, value = line.split("=", 1)
+                    record.NumberCells = int(value)
+                elif line.startswith("CellHeader="):
+                    key, value = line.split("=", 1)
+                    if value.split() != ["X", "Y", "MEAN", "STDV", "NPIXELS"]:
+                        raise ParserError(
+                            "Unexpected CellHeader in INTENSITY "
+                            "section CEL version 3 file"
+                        )
+                else:
+                    words = line.split()
+                    y = int(words[0])
+                    x = int(words[1])
+                    record.intensities[x, y] = float(words[2])
+                    record.stdevs[x, y] = float(words[3])
+                    record.npix[x, y] = int(words[4])
+            elif section == "MASKS":
+                if line.startswith("NumberCells="):
+                    key, value = line.split("=", 1)
+                    record.nmask = int(value)
+                elif line.startswith("CellHeader="):
+                    key, value = line.split("=", 1)
+                    if value.split() != ["X", "Y"]:
+                        raise ParserError(
+                            "Unexpected CellHeader in MASKS "
+                            "section in CEL version 3 file"
+                        )
+                else:
+                    words = line.split()
+                    y = int(words[0])
+                    x = int(words[1])
+                    record.mask[x, y] = True
+            elif section == "OUTLIERS":
+                if line.startswith("NumberCells="):
+                    key, value = line.split("=", 1)
+                    record.noutliers = int(value)
+                elif line.startswith("CellHeader="):
+                    key, value = line.split("=", 1)
+                    if value.split() != ["X", "Y"]:
+                        raise ParserError(
+                            "Unexpected CellHeader in OUTLIERS "
+                            "section in CEL version 3 file"
+                        )
+                else:
+                    words = line.split()
+                    y = int(words[0])
+                    x = int(words[1])
+                    record.outliers[x, y] = True
+            elif section == "MODIFIED":
+                if line.startswith("NumberCells="):
+                    key, value = line.split("=", 1)
+                    record.nmodified = int(value)
+                elif line.startswith("CellHeader="):
+                    key, value = line.split("=", 1)
+                    if value.split() != ["X", "Y", "ORIGMEAN"]:
+                        raise ParserError(
+                            "Unexpected CellHeader in MODIFIED "
+                            "section in CEL version 3 file"
+                        )
+                else:
+                    words = line.split()
+                    y = int(words[0])
+                    x = int(words[1])
+                    record.modified[x, y] = float(words[2])
+    return record
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Affy/__init__.py b/code/lib/Bio/Affy/__init__.py
new file mode 100644
index 0000000..5871430
--- /dev/null
+++ b/code/lib/Bio/Affy/__init__.py
@@ -0,0 +1,6 @@
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Deal with Affymetrix related data such as cel files."""
diff --git a/code/lib/Bio/Affy/__pycache__/CelFile.cpython-37.pyc b/code/lib/Bio/Affy/__pycache__/CelFile.cpython-37.pyc
new file mode 100644
index 0000000..d7325b5
Binary files /dev/null and b/code/lib/Bio/Affy/__pycache__/CelFile.cpython-37.pyc differ
diff --git a/code/lib/Bio/Affy/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Affy/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..822500f
Binary files /dev/null and b/code/lib/Bio/Affy/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/AlignInfo.py b/code/lib/Bio/Align/AlignInfo.py
new file mode 100644
index 0000000..95a4e69
--- /dev/null
+++ b/code/lib/Bio/Align/AlignInfo.py
@@ -0,0 +1,594 @@
+# Copyright 2000 Brad Chapman.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Extract information from alignment objects.
+
+In order to try and avoid huge alignment objects with tons of functions,
+functions which return summary type information about alignments should
+be put into classes in this module.
+"""
+
+
+import math
+import sys
+
+from Bio.Seq import Seq
+
+
+class SummaryInfo:
+    """Calculate summary info about the alignment.
+
+    This class should be used to calculate information summarizing the
+    results of an alignment. This may either be straight consensus info
+    or more complicated things.
+    """
+
+    def __init__(self, alignment):
+        """Initialize with the alignment to calculate information on.
+
+        ic_vector attribute. A list of ic content for each column number.
+        """
+        self.alignment = alignment
+        self.ic_vector = []
+
+    def dumb_consensus(self, threshold=0.7, ambiguous="X", require_multiple=False):
+        """Output a fast consensus sequence of the alignment.
+
+        This doesn't do anything fancy at all. It will just go through the
+        sequence residue by residue and count up the number of each type
+        of residue (ie. A or G or T or C for DNA) in all sequences in the
+        alignment. If the percentage of the most common residue type is
+        greater then the passed threshold, then we will add that residue type,
+        otherwise an ambiguous character will be added.
+
+        This could be made a lot fancier (ie. to take a substitution matrix
+        into account), but it just meant for a quick and dirty consensus.
+
+        Arguments:
+         - threshold - The threshold value that is required to add a particular
+           atom.
+         - ambiguous - The ambiguous character to be added when the threshold is
+           not reached.
+         - require_multiple - If set as True, this will require that more than
+           1 sequence be part of an alignment to put it in the consensus (ie.
+           not just 1 sequence and gaps).
+
+        """
+        # Iddo Friedberg, 1-JUL-2004: changed ambiguous default to "X"
+        consensus = ""
+
+        # find the length of the consensus we are creating
+        con_len = self.alignment.get_alignment_length()
+
+        # go through each seq item
+        for n in range(con_len):
+            # keep track of the counts of the different atoms we get
+            atom_dict = {}
+            num_atoms = 0
+
+            for record in self.alignment:
+                # make sure we haven't run past the end of any sequences
+                # if they are of different lengths
+                if n < len(record.seq):
+                    if record.seq[n] != "-" and record.seq[n] != ".":
+                        if record.seq[n] not in atom_dict:
+                            atom_dict[record.seq[n]] = 1
+                        else:
+                            atom_dict[record.seq[n]] += 1
+
+                        num_atoms = num_atoms + 1
+
+            max_atoms = []
+            max_size = 0
+
+            for atom in atom_dict:
+                if atom_dict[atom] > max_size:
+                    max_atoms = [atom]
+                    max_size = atom_dict[atom]
+                elif atom_dict[atom] == max_size:
+                    max_atoms.append(atom)
+
+            if require_multiple and num_atoms == 1:
+                consensus += ambiguous
+            elif (len(max_atoms) == 1) and (
+                (float(max_size) / float(num_atoms)) >= threshold
+            ):
+                consensus += max_atoms[0]
+            else:
+                consensus += ambiguous
+
+        return Seq(consensus)
+
+    def gap_consensus(self, threshold=0.7, ambiguous="X", require_multiple=False):
+        """Output a fast consensus sequence of the alignment, allowing gaps.
+
+        Same as dumb_consensus(), but allows gap on the output.
+
+        Things to do:
+         - Let the user define that with only one gap, the result
+           character in consensus is gap.
+         - Let the user select gap character, now
+           it takes the same as input.
+
+        """
+        consensus = ""
+
+        # find the length of the consensus we are creating
+        con_len = self.alignment.get_alignment_length()
+
+        # go through each seq item
+        for n in range(con_len):
+            # keep track of the counts of the different atoms we get
+            atom_dict = {}
+            num_atoms = 0
+
+            for record in self.alignment:
+                # make sure we haven't run past the end of any sequences
+                # if they are of different lengths
+                if n < len(record.seq):
+                    if record.seq[n] not in atom_dict:
+                        atom_dict[record.seq[n]] = 1
+                    else:
+                        atom_dict[record.seq[n]] += 1
+
+                    num_atoms += 1
+
+            max_atoms = []
+            max_size = 0
+
+            for atom in atom_dict:
+                if atom_dict[atom] > max_size:
+                    max_atoms = [atom]
+                    max_size = atom_dict[atom]
+                elif atom_dict[atom] == max_size:
+                    max_atoms.append(atom)
+
+            if require_multiple and num_atoms == 1:
+                consensus += ambiguous
+            elif (len(max_atoms) == 1) and (
+                (float(max_size) / float(num_atoms)) >= threshold
+            ):
+                consensus += max_atoms[0]
+            else:
+                consensus += ambiguous
+
+        return Seq(consensus)
+
+    def replacement_dictionary(self, skip_chars=None, letters=None):
+        """Generate a replacement dictionary to plug into a substitution matrix.
+
+        This should look at an alignment, and be able to generate the number
+        of substitutions of different residues for each other in the
+        aligned object.
+
+        Will then return a dictionary with this information::
+
+            {('A', 'C') : 10, ('C', 'A') : 12, ('G', 'C') : 15 ....}
+
+        This also treats weighted sequences. The following example shows how
+        we calculate the replacement dictionary. Given the following
+        multiple sequence alignment::
+
+            GTATC  0.5
+            AT--C  0.8
+            CTGTC  1.0
+
+        For the first column we have::
+
+            ('A', 'G') : 0.5 * 0.8 = 0.4
+            ('C', 'G') : 0.5 * 1.0 = 0.5
+            ('A', 'C') : 0.8 * 1.0 = 0.8
+
+        We then continue this for all of the columns in the alignment, summing
+        the information for each substitution in each column, until we end
+        up with the replacement dictionary.
+
+        Arguments:
+         - skip_chars - Not used; setting it to anything other than None
+           will raise a ValueError
+         - letters - An iterable (e.g. a string or list of characters to include.
+        """
+        if skip_chars is not None:
+            raise ValueError(
+                "argument skip_chars has been deprecated; instead, please use 'letters' to specify the characters you want to include"
+            )
+        rep_dict = {(letter1, letter2): 0 for letter1 in letters for letter2 in letters}
+
+        # iterate through each record
+        for rec_num1 in range(len(self.alignment)):
+            # iterate through each record from one beyond the current record
+            # to the end of the list of records
+            for rec_num2 in range(rec_num1 + 1, len(self.alignment)):
+                # for each pair of records, compare the sequences and add
+                # the pertinent info to the dictionary
+                self._pair_replacement(
+                    self.alignment[rec_num1].seq,
+                    self.alignment[rec_num2].seq,
+                    self.alignment[rec_num1].annotations.get("weight", 1.0),
+                    self.alignment[rec_num2].annotations.get("weight", 1.0),
+                    rep_dict,
+                    letters,
+                )
+
+        return rep_dict
+
+    def _pair_replacement(self, seq1, seq2, weight1, weight2, dictionary, letters):
+        """Compare two sequences and generate info on the replacements seen (PRIVATE).
+
+        Arguments:
+         - seq1, seq2 - The two sequences to compare.
+         - weight1, weight2 - The relative weights of seq1 and seq2.
+         - dictionary - The dictionary containing the starting replacement
+           info that we will modify.
+         - letters - A list of characters to include when calculating replacements.
+
+        """
+        # loop through each residue in the sequences
+        for residue1, residue2 in zip(seq1, seq2):
+            if residue1 in letters and residue2 in letters:
+                dictionary[(residue1, residue2)] += weight1 * weight2
+
+    def _get_all_letters(self):
+        """Return a string containing the expected letters in the alignment (PRIVATE)."""
+        set_letters = set()
+        for record in self.alignment:
+            set_letters.update(record.seq)
+        list_letters = sorted(set_letters)
+        all_letters = "".join(list_letters)
+        return all_letters
+
+    def pos_specific_score_matrix(self, axis_seq=None, chars_to_ignore=None):
+        """Create a position specific score matrix object for the alignment.
+
+        This creates a position specific score matrix (pssm) which is an
+        alternative method to look at a consensus sequence.
+
+        Arguments:
+         - chars_to_ignore - A list of all characters not to include in
+           the pssm.
+         - axis_seq - An optional argument specifying the sequence to
+           put on the axis of the PSSM. This should be a Seq object. If nothing
+           is specified, the consensus sequence, calculated with default
+           parameters, will be used.
+
+        Returns:
+         - A PSSM (position specific score matrix) object.
+
+        """
+        # determine all of the letters we have to deal with
+        all_letters = self._get_all_letters()
+        assert all_letters
+
+        if chars_to_ignore is None:
+            chars_to_ignore = []
+        if not isinstance(chars_to_ignore, list):
+            raise TypeError("chars_to_ignore should be a list.")
+
+        gap_char = "-"
+        chars_to_ignore.append(gap_char)
+
+        for char in chars_to_ignore:
+            all_letters = all_letters.replace(char, "")
+
+        if axis_seq:
+            left_seq = axis_seq
+            assert len(axis_seq) == self.alignment.get_alignment_length()
+        else:
+            left_seq = self.dumb_consensus()
+
+        pssm_info = []
+        # now start looping through all of the sequences and getting info
+        for residue_num in range(len(left_seq)):
+            score_dict = dict.fromkeys(all_letters, 0)
+            for record in self.alignment:
+                try:
+                    this_residue = record.seq[residue_num]
+                # if we hit an index error we've run out of sequence and
+                # should not add new residues
+                except IndexError:
+                    this_residue = None
+
+                if this_residue and this_residue not in chars_to_ignore:
+                    weight = record.annotations.get("weight", 1.0)
+                    try:
+                        score_dict[this_residue] += weight
+                    except KeyError:
+                        raise ValueError(
+                            "Residue %s not found" % this_residue
+                        ) from None
+
+            pssm_info.append((left_seq[residue_num], score_dict))
+
+        return PSSM(pssm_info)
+
+    def information_content(
+        self,
+        start=0,
+        end=None,
+        e_freq_table=None,
+        log_base=2,
+        chars_to_ignore=None,
+        pseudo_count=0,
+    ):
+        """Calculate the information content for each residue along an alignment.
+
+        Arguments:
+         - start, end - The starting an ending points to calculate the
+           information content. These points should be relative to the first
+           sequence in the alignment, starting at zero (ie. even if the 'real'
+           first position in the seq is 203 in the initial sequence, for
+           the info content, we need to use zero). This defaults to the entire
+           length of the first sequence.
+         - e_freq_table - A dictionary specifying the expected frequencies
+           for each letter (e.g. {'G' : 0.4, 'C' : 0.4, 'T' : 0.1, 'A' : 0.1}).
+           Gap characters should not be included, since these should not have
+           expected frequencies.
+         - log_base - The base of the logarithm to use in calculating the
+           information content. This defaults to 2 so the info is in bits.
+         - chars_to_ignore - A listing of characters which should be ignored
+           in calculating the info content. Defaults to none.
+
+        Returns:
+         - A number representing the info content for the specified region.
+
+        Please see the Biopython manual for more information on how information
+        content is calculated.
+
+        """
+        # if no end was specified, then we default to the end of the sequence
+        if end is None:
+            end = len(self.alignment[0].seq)
+        if chars_to_ignore is None:
+            chars_to_ignore = []
+
+        if start < 0 or end > len(self.alignment[0].seq):
+            raise ValueError(
+                "Start (%s) and end (%s) are not in the range %s to %s"
+                % (start, end, 0, len(self.alignment[0].seq))
+            )
+        # determine random expected frequencies, if necessary
+        random_expected = None
+        # determine all of the letters we have to deal with
+        all_letters = self._get_all_letters()
+        for char in chars_to_ignore:
+            all_letters = all_letters.replace(char, "")
+
+        info_content = {}
+        for residue_num in range(start, end):
+            freq_dict = self._get_letter_freqs(
+                residue_num,
+                self.alignment,
+                all_letters,
+                chars_to_ignore,
+                pseudo_count,
+                e_freq_table,
+                random_expected,
+            )
+            # print(freq_dict, end="")
+            column_score = self._get_column_info_content(
+                freq_dict, e_freq_table, log_base, random_expected
+            )
+            info_content[residue_num] = column_score
+        # sum up the score
+        total_info = sum(info_content.values())
+        # fill in the ic_vector member: holds IC for each column
+        # reset ic_vector to empty list at each call
+        self.ic_vector = []
+        for (i, k) in enumerate(info_content):
+            self.ic_vector.append(info_content[i + start])
+        return total_info
+
+    def _get_letter_freqs(
+        self,
+        residue_num,
+        all_records,
+        letters,
+        to_ignore,
+        pseudo_count=0,
+        e_freq_table=None,
+        random_expected=None,
+    ):
+        """Determine the frequency of specific letters in the alignment (PRIVATE).
+
+        Arguments:
+         - residue_num - The number of the column we are getting frequencies
+           from.
+         - all_records - All of the SeqRecords in the alignment.
+         - letters - The letters we are interested in getting the frequency
+           for.
+         - to_ignore - Letters we are specifically supposed to ignore.
+         - pseudo_count - Optional argument specifying the Pseudo count (k)
+           to add in order to prevent a frequency of 0 for a letter.
+         - e_freq_table - An optional argument specifying a dictionary with
+           the expected frequencies for each letter.
+         - random_expected - Optional argument that specify the frequency to use
+           when e_freq_table is not defined.
+
+        This will calculate the frequencies of each of the specified letters
+        in the alignment at the given frequency, and return this as a
+        dictionary where the keys are the letters and the values are the
+        frequencies. Pseudo count can be added to prevent a null frequency
+        """
+        freq_info = dict.fromkeys(letters, 0)
+
+        total_count = 0
+
+        gap_char = "-"
+
+        if pseudo_count < 0:
+            raise ValueError(
+                "Positive value required for pseudo_count, %s provided" % (pseudo_count)
+            )
+
+        # collect the count info into the dictionary for all the records
+        for record in all_records:
+            try:
+                if record.seq[residue_num] not in to_ignore:
+                    weight = record.annotations.get("weight", 1.0)
+                    freq_info[record.seq[residue_num]] += weight
+                    total_count += weight
+            except KeyError:
+                raise ValueError(
+                    "Residue %s not found in letters %s"
+                    % (record.seq[residue_num], letters)
+                ) from None
+
+        if e_freq_table:
+            # check if all the residus in freq_info are in e_freq_table
+            for key in freq_info:
+                if key != gap_char and key not in e_freq_table:
+                    raise ValueError("%s not found in expected frequency table" % key)
+
+        if total_count == 0:
+            # This column must be entirely ignored characters
+            for letter in freq_info:
+                assert freq_info[letter] == 0
+                # TODO - Map this to NA or NaN?
+        else:
+            # now convert the counts into frequencies
+            for letter in freq_info:
+                if pseudo_count and (random_expected or e_freq_table):
+                    # use either the expected random freq or the
+                    if e_freq_table:
+                        ajust_freq = e_freq_table[letter]
+                    else:
+                        ajust_freq = random_expected
+
+                    ajusted_letter_count = freq_info[letter] + ajust_freq * pseudo_count
+                    ajusted_total = total_count + pseudo_count
+                    freq_info[letter] = ajusted_letter_count / ajusted_total
+
+                else:
+                    freq_info[letter] = freq_info[letter] / total_count
+
+        return freq_info
+
+    def _get_column_info_content(
+        self, obs_freq, e_freq_table, log_base, random_expected
+    ):
+        """Calculate the information content for a column (PRIVATE).
+
+        Arguments:
+         - obs_freq - The frequencies observed for each letter in the column.
+         - e_freq_table - An optional argument specifying a dictionary with
+           the expected frequencies for each letter.
+         - log_base - The base of the logarithm to use in calculating the
+           info content.
+
+        """
+        gap_char = "-"
+
+        if e_freq_table:
+            # check the expected freq information to make sure it is good
+            for key in obs_freq:
+                if key != gap_char and key not in e_freq_table:
+                    raise ValueError(
+                        f"Frequency table provided does not contain observed letter {key}"
+                    )
+
+        total_info = 0.0
+
+        for letter in obs_freq:
+            inner_log = 0.0
+            # if we have expected frequencies, modify the log value by them
+            # gap characters do not have expected frequencies, so they
+            # should just be the observed frequency.
+            if letter != gap_char:
+                if e_freq_table:
+                    inner_log = obs_freq[letter] / e_freq_table[letter]
+                else:
+                    inner_log = obs_freq[letter] / random_expected
+            # if the observed frequency is zero, we don't add any info to the
+            # total information content
+            if inner_log > 0:
+                letter_info = (
+                    obs_freq[letter] * math.log(inner_log) / math.log(log_base)
+                )
+                total_info += letter_info
+        return total_info
+
+    def get_column(self, col):
+        """Return column of alignment."""
+        # TODO - Deprecate this and implement slicing?
+        return self.alignment[:, col]
+
+
+class PSSM:
+    """Represent a position specific score matrix.
+
+    This class is meant to make it easy to access the info within a PSSM
+    and also make it easy to print out the information in a nice table.
+
+    Let's say you had an alignment like this::
+
+        GTATC
+        AT--C
+        CTGTC
+
+    The position specific score matrix (when printed) looks like::
+
+          G A T C
+        G 1 1 0 1
+        T 0 0 3 0
+        A 1 1 0 0
+        T 0 0 2 0
+        C 0 0 0 3
+
+    You can access a single element of the PSSM using the following::
+
+        your_pssm[sequence_number][residue_count_name]
+
+    For instance, to get the 'T' residue for the second element in the
+    above alignment you would need to do:
+
+    your_pssm[1]['T']
+    """
+
+    def __init__(self, pssm):
+        """Initialize with pssm data to represent.
+
+        The pssm passed should be a list with the following structure:
+
+        list[0] - The letter of the residue being represented (for instance,
+        from the example above, the first few list[0]s would be GTAT...
+        list[1] - A dictionary with the letter substitutions and counts.
+        """
+        self.pssm = pssm
+
+    def __getitem__(self, pos):
+        return self.pssm[pos][1]
+
+    def __str__(self):
+        out = " "
+        all_residues = sorted(self.pssm[0][1])
+
+        # first print out the top header
+        for res in all_residues:
+            out += "   %s" % res
+        out += "\n"
+
+        # for each item, write out the substitutions
+        for item in self.pssm:
+            out += "%s " % item[0]
+            for res in all_residues:
+                out += " %.1f" % item[1][res]
+
+            out += "\n"
+        return out
+
+    def get_residue(self, pos):
+        """Return the residue letter at the specified position."""
+        return self.pssm[pos][0]
+
+
+def print_info_content(summary_info, fout=None, rep_record=0):
+    """3 column output: position, aa in representative sequence, ic_vector value."""
+    fout = fout or sys.stdout
+    if not summary_info.ic_vector:
+        summary_info.information_content()
+    rep_sequence = summary_info.alignment[rep_record].seq
+    for pos, ic in enumerate(summary_info.ic_vector):
+        fout.write("%d %s %.3f\n" % (pos, rep_sequence[pos], ic))
diff --git a/code/lib/Bio/Align/Applications/_ClustalOmega.py b/code/lib/Bio/Align/Applications/_ClustalOmega.py
new file mode 100644
index 0000000..2181bc5
--- /dev/null
+++ b/code/lib/Bio/Align/Applications/_ClustalOmega.py
@@ -0,0 +1,269 @@
+# Copyright 2011 by Andreas Wilm. All rights reserved.
+# Based on ClustalW wrapper copyright 2009 by Cymon J. Cox.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command line wrapper for the multiple alignment program Clustal Omega."""
+
+
+from Bio.Application import _Option, _Switch, AbstractCommandline
+
+
+class ClustalOmegaCommandline(AbstractCommandline):
+    """Command line wrapper for clustal omega.
+
+    http://www.clustal.org/omega
+
+    Notes
+    -----
+    Last checked against version: 1.2.0
+
+    References
+    ----------
+    Sievers F, Wilm A, Dineen DG, Gibson TJ, Karplus K, Li W, Lopez R,
+    McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG (2011).
+    Fast, scalable generation of high-quality protein multiple
+    sequence alignments using Clustal Omega.
+    Molecular Systems Biology 7:539 https://doi.org/10.1038/msb.2011.75
+
+    Examples
+    --------
+    >>> from Bio.Align.Applications import ClustalOmegaCommandline
+    >>> in_file = "unaligned.fasta"
+    >>> out_file = "aligned.fasta"
+    >>> clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True)
+    >>> print(clustalomega_cline)
+    clustalo -i unaligned.fasta -o aligned.fasta --auto -v
+
+    You would typically run the command line with clustalomega_cline() or via
+    the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="clustalo", **kwargs):
+        """Initialize the class."""
+        # order parameters in the same order as clustalo --help
+        self.parameters = [
+            # Sequence Input
+            _Option(
+                ["-i", "--in", "--infile", "infile"],
+                "Multiple sequence input file",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["--hmm-in", "HMM input", "hmm_input"],
+                "HMM input files",
+                filename=True,
+                equate=False,
+            ),
+            _Switch(["--dealign", "dealign"], "Dealign input sequences"),
+            _Option(
+                ["--profile1", "--p1", "profile1"],
+                "Pre-aligned multiple sequence file (aligned columns will be kept fix).",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["--profile2", "--p2", "profile2"],
+                "Pre-aligned multiple sequence file (aligned columns will be kept fix).",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-t", "--seqtype", "seqtype"],
+                "{Protein, RNA, DNA} Force a sequence type (default: auto).",
+                equate=False,
+                checker_function=lambda x: x
+                in ["protein", "rna", "dna", "Protein", "RNA", "DNA", "PROTEIN"],
+            ),
+            _Switch(
+                ["--is-profile", "isprofile"],
+                "disable check if profile, force profile (default no)",
+            ),
+            _Option(
+                ["--infmt", "infmt"],
+                """Forced sequence input file format (default: auto)
+
+                    Allowed values: a2m, fa[sta], clu[stal], msf, phy[lip], selex, st[ockholm], vie[nna]
+                    """,
+                equate=False,
+                checker_function=lambda x: x
+                in [
+                    "a2m",
+                    "fa",
+                    "fasta",
+                    "clu",
+                    "clustal",
+                    "msf",
+                    "phy",
+                    "phylip",
+                    "selex",
+                    "st",
+                    "stockholm",
+                    "vie",
+                    "vienna",
+                ],
+            ),
+            # Clustering
+            _Option(
+                ["--distmat-in", "distmat_in"],
+                "Pairwise distance matrix input file (skips distance computation).",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["--distmat-out", "distmat_out"],
+                "Pairwise distance matrix output file.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["--guidetree-in", "guidetree_in"],
+                "Guide tree input file (skips distance computation and guide-tree clustering step).",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["--guidetree-out", "guidetree_out"],
+                "Guide tree output file.",
+                filename=True,
+                equate=False,
+            ),
+            _Switch(
+                ["--full", "distmat_full"],
+                "Use full distance matrix for guide-tree calculation (slow; mBed is default)",
+            ),
+            _Switch(
+                ["--full-iter", "distmat_full_iter"],
+                "Use full distance matrix for guide-tree calculation during iteration (mBed is default)",
+            ),
+            _Option(
+                ["--cluster-size", "clustersize"],
+                "soft maximum of sequences in sub-clusters",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["--clustering-out", "clusteringout"],
+                "Clustering output file",
+                filename=True,
+            ),
+            _Switch(
+                ["--use-kimura", "usekimura"],
+                "use Kimura distance correction for aligned sequences (default no)",
+            ),
+            _Switch(
+                ["--percent-id", "percentid"],
+                "convert distances into percent identities (default no)",
+            ),
+            # Alignment Output
+            _Option(
+                ["-o", "--out", "--outfile", "outfile"],
+                "Multiple sequence alignment output file (default: stdout).",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["--outfmt", "outfmt"],
+                "MSA output file format:"
+                " a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]"
+                " (default: fasta).",
+                equate=False,
+                checker_function=lambda x: x
+                in [
+                    "a2m",
+                    "fa",
+                    "fasta",
+                    "clu",
+                    "clustal",
+                    "msf",
+                    "phy",
+                    "phylip",
+                    "selex",
+                    "st",
+                    "stockholm",
+                    "vie",
+                    "vienna",
+                ],
+            ),
+            _Switch(
+                ["--residuenumber", "--resno", "residuenumber"],
+                "in Clustal format print residue numbers (default no)",
+            ),
+            _Option(
+                ["--wrap", "wrap"],
+                "number of residues before line-wrap in output",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["--output-order", "outputorder"],
+                "MSA output order like in input/guide-tree",
+                checker_function=lambda x: x in ["input-order", "tree-order"],
+            ),
+            # Iteration
+            _Option(
+                ["--iterations", "--iter", "iterations"],
+                "Number of (combined guide-tree/HMM) iterations",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["--max-guidetree-iterations", "max_guidetree_iterations"],
+                "Maximum number of guidetree iterations",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["--max-hmm-iterations", "max_hmm_iterations"],
+                "Maximum number of HMM iterations",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            # Limits (will exit early, if exceeded):
+            _Option(
+                ["--maxnumseq", "maxnumseq"],
+                "Maximum allowed number of sequences",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["--maxseqlen", "maxseqlen"],
+                "Maximum allowed sequence length",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            # Miscellaneous:
+            _Switch(
+                ["--auto", "auto"],
+                "Set options automatically (might overwrite some of your options)",
+            ),
+            _Option(
+                ["--threads", "threads"],
+                "Number of processors to use",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-l", "--log", "log"],
+                "Log all non-essential output to this file.",
+                filename=True,
+                equate=False,
+            ),
+            _Switch(["-h", "--help", "help"], "Print help and exit."),
+            _Switch(["-v", "--verbose", "verbose"], "Verbose output"),
+            _Switch(["--version", "version"], "Print version information and exit"),
+            _Switch(
+                ["--long-version", "long_version"],
+                "Print long version information and exit",
+            ),
+            _Switch(["--force", "force"], "Force file overwriting."),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Align/Applications/_Clustalw.py b/code/lib/Bio/Align/Applications/_Clustalw.py
new file mode 100644
index 0000000..777e411
--- /dev/null
+++ b/code/lib/Bio/Align/Applications/_Clustalw.py
@@ -0,0 +1,486 @@
+# Copyright 2009 by Cymon J. Cox.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command line wrapper for the multiple alignment program Clustal W."""
+
+
+import os
+from Bio.Application import _Option, _Switch, AbstractCommandline
+
+
+class ClustalwCommandline(AbstractCommandline):
+    """Command line wrapper for clustalw (version one or two).
+
+    http://www.clustal.org/
+
+    Notes
+    -----
+    Last checked against versions: 1.83 and 2.1
+
+    References
+    ----------
+    Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA,
+    McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD,
+    Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0.
+    Bioinformatics, 23, 2947-2948.
+
+    Examples
+    --------
+    >>> from Bio.Align.Applications import ClustalwCommandline
+    >>> in_file = "unaligned.fasta"
+    >>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file)
+    >>> print(clustalw_cline)
+    clustalw2 -infile=unaligned.fasta
+
+    You would typically run the command line with clustalw_cline() or via
+    the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    # TODO - Should we default to cmd="clustalw2" now?
+    def __init__(self, cmd="clustalw", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-infile", "-INFILE", "INFILE", "infile"],
+                "Input sequences.",
+                filename=True,
+            ),
+            _Option(
+                ["-profile1", "-PROFILE1", "PROFILE1", "profile1"],
+                "Profiles (old alignment).",
+                filename=True,
+            ),
+            _Option(
+                ["-profile2", "-PROFILE2", "PROFILE2", "profile2"],
+                "Profiles (old alignment).",
+                filename=True,
+            ),
+            # ################# VERBS (do things) #############################
+            _Switch(
+                ["-options", "-OPTIONS", "OPTIONS", "options"],
+                "List the command line parameters",
+            ),
+            _Switch(
+                ["-help", "-HELP", "HELP", "help"], "Outline the command line params."
+            ),
+            _Switch(
+                ["-check", "-CHECK", "CHECK", "check"],
+                "Outline the command line params.",
+            ),
+            _Switch(
+                ["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"],
+                "Output full help content.",
+            ),
+            _Switch(
+                ["-align", "-ALIGN", "ALIGN", "align"], "Do full multiple alignment."
+            ),
+            _Switch(["-tree", "-TREE", "TREE", "tree"], "Calculate NJ tree."),
+            _Switch(
+                ["-pim", "-PIM", "PIM", "pim"],
+                "Output percent identity matrix (while calculating the tree).",
+            ),
+            _Option(
+                ["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"],
+                "Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Switch(
+                ["-convert", "-CONVERT", "CONVERT", "convert"],
+                "Output the input sequences in a different file format.",
+            ),
+            # #################### PARAMETERS (set things) #########################
+            # ***General settings:****
+            # Makes no sense in biopython
+            # _Option(["-interactive", "-INTERACTIVE", "INTERACTIVE", "interactive"],
+            #        [],
+            #        lambda x: 0, # Does not take value
+            #        False,
+            #        "read command line, then enter normal interactive menus",
+            #        False),
+            _Switch(
+                ["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"],
+                "Use FAST algorithm for the alignment guide tree",
+            ),
+            _Option(
+                ["-type", "-TYPE", "TYPE", "type"],
+                "PROTEIN or DNA sequences",
+                checker_function=lambda x: x in ["PROTEIN", "DNA", "protein", "dna"],
+            ),
+            _Switch(
+                ["-negative", "-NEGATIVE", "NEGATIVE", "negative"],
+                "Protein alignment with negative values in matrix",
+            ),
+            _Option(
+                ["-outfile", "-OUTFILE", "OUTFILE", "outfile"],
+                "Output sequence alignment file name",
+                filename=True,
+            ),
+            _Option(
+                ["-output", "-OUTPUT", "OUTPUT", "output"],
+                "Output format: CLUSTAL(default), GCG, GDE, PHYLIP, PIR, NEXUS and FASTA",
+                checker_function=lambda x: x
+                in [
+                    "CLUSTAL",
+                    "GCG",
+                    "GDE",
+                    "PHYLIP",
+                    "PIR",
+                    "NEXUS",
+                    "FASTA",
+                    "clustal",
+                    "gcg",
+                    "gde",
+                    "phylip",
+                    "pir",
+                    "nexus",
+                    "fasta",
+                ],
+            ),
+            _Option(
+                ["-outorder", "-OUTORDER", "OUTORDER", "outorder"],
+                "Output taxon order: INPUT or ALIGNED",
+                checker_function=lambda x: x
+                in ["INPUT", "input", "ALIGNED", "aligned"],
+            ),
+            _Option(
+                ["-case", "-CASE", "CASE", "case"],
+                "LOWER or UPPER (for GDE output only)",
+                checker_function=lambda x: x in ["UPPER", "upper", "LOWER", "lower"],
+            ),
+            _Option(
+                ["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"],
+                "OFF or ON (for Clustal output only)",
+                checker_function=lambda x: x in ["ON", "on", "OFF", "off"],
+            ),
+            _Option(
+                ["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"],
+                "OFF or ON (NEW- for all output formats)",
+                checker_function=lambda x: x in ["ON", "on", "OFF", "off"],
+            ),
+            _Option(
+                ["-range", "-RANGE", "RANGE", "range"],
+                "Sequence range to write starting m to m+n. "
+                "Input as string eg. '24,200'",
+            ),
+            _Option(
+                ["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"],
+                "Maximum allowed input sequence length",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Switch(
+                ["-quiet", "-QUIET", "QUIET", "quiet"],
+                "Reduce console output to minimum",
+            ),
+            _Option(
+                ["-stats", "-STATS", "STATS", "stats"],
+                "Log some alignment statistics to file",
+                filename=True,
+            ),
+            # ***Fast Pairwise Alignments:***
+            _Option(
+                ["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"],
+                "Word size",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Option(
+                ["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"],
+                "Number of best diags.",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Option(
+                ["-window", "-WINDOW", "WINDOW", "window"],
+                "Window around best diags.",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Option(
+                ["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"],
+                "Gap penalty",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Option(
+                ["-score", "-SCORE", "SCORE", "score"],
+                "Either: PERCENT or ABSOLUTE",
+                checker_function=lambda x: x
+                in ["percent", "PERCENT", "absolute", "ABSOLUTE"],
+            ),
+            # ***Slow Pairwise Alignments:***
+            _Option(
+                ["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"],
+                "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
+                checker_function=lambda x: (
+                    x
+                    in [
+                        "BLOSUM",
+                        "PAM",
+                        "GONNET",
+                        "ID",
+                        "blosum",
+                        "pam",
+                        "gonnet",
+                        "id",
+                    ]
+                    or os.path.exists(x)
+                ),
+                filename=True,
+            ),
+            _Option(
+                ["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"],
+                "DNA weight matrix=IUB, CLUSTALW or filename",
+                checker_function=lambda x: (
+                    x in ["IUB", "CLUSTALW", "iub", "clustalw"] or os.path.exists(x)
+                ),
+                filename=True,
+            ),
+            _Option(
+                ["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"],
+                "Gap opening penalty",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Option(
+                ["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"],
+                "Gap extension penalty",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            # ***Multiple Alignments:***
+            _Option(
+                ["-newtree", "-NEWTREE", "NEWTREE", "newtree"],
+                "Output file name for newly created guide tree",
+                filename=True,
+            ),
+            _Option(
+                ["-usetree", "-USETREE", "USETREE", "usetree"],
+                "File name of guide tree",
+                checker_function=lambda x: os.path.exists,
+                filename=True,
+            ),
+            _Option(
+                ["-matrix", "-MATRIX", "MATRIX", "matrix"],
+                "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
+                checker_function=lambda x: (
+                    x
+                    in [
+                        "BLOSUM",
+                        "PAM",
+                        "GONNET",
+                        "ID",
+                        "blosum",
+                        "pam",
+                        "gonnet",
+                        "id",
+                    ]
+                    or os.path.exists(x)
+                ),
+                filename=True,
+            ),
+            _Option(
+                ["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"],
+                "DNA weight matrix=IUB, CLUSTALW or filename",
+                checker_function=lambda x: (
+                    x in ["IUB", "CLUSTALW", "iub", "clustalw"] or os.path.exists(x)
+                ),
+                filename=True,
+            ),
+            _Option(
+                ["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"],
+                "Gap opening penalty",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Option(
+                ["-gapext", "-GAPEXT", "GAPEXT", "gapext"],
+                "Gap extension penalty",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Switch(
+                ["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"],
+                "No end gap separation pen.",
+            ),
+            _Option(
+                ["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"],
+                "Gap separation pen. range",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Switch(
+                ["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"], "Residue-specific gaps off"
+            ),
+            _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"], "Hydrophilic gaps off"),
+            _Switch(
+                ["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"],
+                "List hydrophilic res.",
+            ),
+            _Option(
+                ["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"],
+                "% ident. for delay",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            # Already handled in General Settings section, but appears a second
+            # time under Multiple Alignments in the help
+            # _Option(["-type", "-TYPE", "TYPE", "type"],
+            #        "PROTEIN or DNA",
+            #        checker_function=lambda x: x in ["PROTEIN", "DNA",
+            #                                         "protein", "dna"]),
+            _Option(
+                ["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"],
+                "Transitions weighting",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Option(
+                ["-iteration", "-ITERATION", "ITERATION", "iteration"],
+                "NONE or TREE or ALIGNMENT",
+                checker_function=lambda x: x
+                in ["NONE", "TREE", "ALIGNMENT", "none", "tree", "alignment"],
+            ),
+            _Option(
+                ["-numiter", "-NUMITER", "NUMITER", "numiter"],
+                "maximum number of iterations to perform",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Switch(
+                ["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"],
+                "Disable sequence weighting",
+            ),
+            # ***Profile Alignments:***
+            _Switch(
+                ["-profile", "-PROFILE", "PROFILE", "profile"],
+                "Merge two alignments by profile alignment",
+            ),
+            _Option(
+                ["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"],
+                "Output file name for new guide tree of profile1",
+                filename=True,
+            ),
+            _Option(
+                ["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"],
+                "Output file for new guide tree of profile2",
+                filename=True,
+            ),
+            _Option(
+                ["-usetree1", "-USETREE1", "USETREE1", "usetree1"],
+                "File name of guide tree for profile1",
+                checker_function=lambda x: os.path.exists,
+                filename=True,
+            ),
+            _Option(
+                ["-usetree2", "-USETREE2", "USETREE2", "usetree2"],
+                "File name of guide tree for profile2",
+                checker_function=lambda x: os.path.exists,
+                filename=True,
+            ),
+            # ***Sequence to Profile Alignments:***
+            _Switch(
+                ["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"],
+                "Sequentially add profile2 sequences to profile1 alignment",
+            ),
+            # These are already handled in the Multiple Alignments section,
+            # but appear a second time here in the help.
+            # _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"],
+            #        "File for new guide tree",
+            #        filename=True),
+            # _Option(["-usetree", "-USETREE", "USETREE", "usetree"],
+            #        "File for old guide tree",
+            #        checker_function=lambda x: os.path.exists,
+            #        filename=True),
+            # ***Structure Alignments:***
+            _Switch(
+                ["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"],
+                "Do not use secondary structure-gap penalty mask for profile 1",
+            ),
+            _Switch(
+                ["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"],
+                "Do not use secondary structure-gap penalty mask for profile 2",
+            ),
+            _Option(
+                ["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"],
+                "STRUCTURE or MASK or BOTH or NONE output in alignment file",
+                checker_function=lambda x: x
+                in [
+                    "STRUCTURE",
+                    "MASK",
+                    "BOTH",
+                    "NONE",
+                    "structure",
+                    "mask",
+                    "both",
+                    "none",
+                ],
+            ),
+            _Option(
+                ["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"],
+                "Gap penalty for helix core residues",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Option(
+                ["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"],
+                "gap penalty for strand core residues",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Option(
+                ["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"],
+                "Gap penalty for loop regions",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Option(
+                ["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"],
+                "Gap penalty for structure termini",
+                checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)),
+            ),
+            _Option(
+                ["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"],
+                "Number of residues inside helix to be treated as terminal",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"],
+                "Number of residues outside helix to be treated as terminal",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"],
+                "Number of residues inside strand to be treated as terminal",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"],
+                "Number of residues outside strand to be treated as terminal",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            # ***Trees:***
+            _Option(
+                ["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"],
+                "nj OR phylip OR dist OR nexus",
+                checker_function=lambda x: x
+                in ["NJ", "PHYLIP", "DIST", "NEXUS", "nj", "phylip", "dist", "nexus"],
+            ),
+            _Option(
+                ["-seed", "-SEED", "SEED", "seed"],
+                "Seed number for bootstraps.",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Switch(
+                ["-kimura", "-KIMURA", "KIMURA", "kimura"], "Use Kimura's correction."
+            ),
+            _Switch(
+                ["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"],
+                "Ignore positions with gaps.",
+            ),
+            _Option(
+                ["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"],
+                "Node OR branch position of bootstrap values in tree display",
+                checker_function=lambda x: x in ["NODE", "BRANCH", "node", "branch"],
+            ),
+            _Option(
+                ["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"],
+                "NJ or UPGMA",
+                checker_function=lambda x: x in ["NJ", "UPGMA", "nj", "upgma"],
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Align/Applications/_Dialign.py b/code/lib/Bio/Align/Applications/_Dialign.py
new file mode 100644
index 0000000..52be1b1
--- /dev/null
+++ b/code/lib/Bio/Align/Applications/_Dialign.py
@@ -0,0 +1,243 @@
+# Copyright 2009 by Cymon J. Cox.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command line wrapper for the multiple alignment program DIALIGN2-2."""
+
+from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline
+
+
+class DialignCommandline(AbstractCommandline):
+    """Command line wrapper for the multiple alignment program DIALIGN2-2.
+
+    http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html
+
+    Notes
+    -----
+    Last checked against version: 2.2
+
+    References
+    ----------
+    B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence
+    Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36.
+
+    Examples
+    --------
+    To align a FASTA file (unaligned.fasta) with the output files names
+    aligned.* including a FASTA output file (aligned.fa), use:
+
+    >>> from Bio.Align.Applications import DialignCommandline
+    >>> dialign_cline = DialignCommandline(input="unaligned.fasta",
+    ...                                    fn="aligned", fa=True)
+    >>> print(dialign_cline)
+    dialign2-2 -fa -fn aligned unaligned.fasta
+
+    You would typically run the command line with dialign_cline() or via
+    the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="dialign2-2", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _Switch(
+                ["-afc", "afc"],
+                r"Creates additional output file '\*.afc' "
+                "containing data of all fragments considered "
+                "for alignment WARNING: this file can be HUGE !",
+            ),
+            _Switch(
+                ["-afc_v", "afc_v"],
+                "Like '-afc' but verbose: fragments are explicitly "
+                "printed. WARNING: this file can be EVEN BIGGER !",
+            ),
+            _Switch(
+                ["-anc", "anc"],
+                "Anchored alignment. Requires a file <seq_file>.anc "
+                "containing anchor points.",
+            ),
+            _Switch(
+                ["-cs", "cs"],
+                "If segments are translated, not only the 'Watson "
+                "strand' but also the 'Crick strand' is looked at.",
+            ),
+            _Switch(["-cw", "cw"], "Additional output file in CLUSTAL W format."),
+            _Switch(
+                ["-ds", "ds"],
+                "'dna alignment speed up' - non-translated nucleic acid "
+                "fragments are taken into account only if they start "
+                "with at least two matches. Speeds up DNA alignment at "
+                "the expense of sensitivity.",
+            ),
+            _Switch(["-fa", "fa"], "Additional output file in FASTA format."),
+            _Switch(
+                ["-ff", "ff"],
+                r"Creates file \*.frg containing information about all "
+                "fragments that are part of the respective optimal "
+                "pairwise alignmnets plus information about "
+                "consistency in the multiple alignment",
+            ),
+            _Option(
+                ["-fn", "fn"],
+                "Output files are named <out_file>.<extension>.",
+                equate=False,
+            ),
+            _Switch(
+                ["-fop", "fop"],
+                r"Creates file \*.fop containing coordinates of all "
+                "fragments that are part of the respective pairwise alignments.",
+            ),
+            _Switch(
+                ["-fsm", "fsm"],
+                r"Creates file \*.fsm containing coordinates of all "
+                "fragments that are part of the final alignment",
+            ),
+            _Switch(
+                ["-iw", "iw"],
+                "Overlap weights switched off (by default, overlap "
+                "weights are used if up to 35 sequences are aligned). "
+                "This option speeds up the alignment but may lead "
+                "to reduced alignment quality.",
+            ),
+            _Switch(
+                ["-lgs", "lgs"],
+                "'long genomic sequences' - combines the following "
+                "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, "
+                "-fop, -ff, -cs, -ds, -pst ",
+            ),
+            _Switch(
+                ["-lgs_t", "lgs_t"],
+                "Like '-lgs' but with all segment pairs assessed "
+                "at the peptide level (rather than 'mixed alignments' "
+                "as with the '-lgs' option). Therefore faster than "
+                "-lgs but not very sensitive for non-coding regions.",
+            ),
+            _Option(
+                ["-lmax", "lmax"],
+                "Maximum fragment length = x  (default: x = 40 or "
+                "x = 120 for 'translated' fragments). Shorter x "
+                "speeds up the program but may affect alignment quality.",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Switch(
+                ["-lo", "lo"],
+                r"(Long Output) Additional file \*.log with information "
+                "about fragments selected for pairwise alignment and "
+                "about consistency in multi-alignment procedure.",
+            ),
+            _Switch(
+                ["-ma", "ma"],
+                "'mixed alignments' consisting of P-fragments and "
+                "N-fragments if nucleic acid sequences are aligned.",
+            ),
+            _Switch(
+                ["-mask", "mask"],
+                "Residues not belonging to selected fragments are "
+                r"replaced by '\*' characters in output alignment "
+                "(rather than being printed in lower-case characters)",
+            ),
+            _Switch(
+                ["-mat", "mat"],
+                r"Creates file \*mat with substitution counts derived "
+                "from the fragments that have been selected for alignment.",
+            ),
+            _Switch(
+                ["-mat_thr", "mat_thr"],
+                "Like '-mat' but only fragments with weight score "
+                "> t are considered",
+            ),
+            _Switch(
+                ["-max_link", "max_link"],
+                "'maximum linkage' clustering used to construct "
+                "sequence tree (instead of UPGMA).",
+            ),
+            _Switch(["-min_link", "min_link"], "'minimum linkage' clustering used."),
+            _Option(["-mot", "mot"], "'motif' option.", equate=False),
+            _Switch(["-msf", "msf"], "Separate output file in MSF format."),
+            _Switch(
+                ["-n", "n"],
+                "Input sequences are nucleic acid sequences. "
+                "No translation of fragments.",
+            ),
+            _Switch(
+                ["-nt", "nt"],
+                "Input sequences are nucleic acid sequences and "
+                "'nucleic acid segments' are translated to 'peptide "
+                "segments'.",
+            ),
+            _Switch(
+                ["-nta", "nta"],
+                "'no textual alignment' - textual alignment suppressed. "
+                "This option makes sense if other output files are of "
+                "interest -- e.g. the fragment files created with -ff, "
+                "-fop, -fsm or -lo.",
+            ),
+            _Switch(
+                ["-o", "o"],
+                "Fast version, resulting alignments may be slightly different.",
+            ),
+            _Switch(
+                ["-ow", "ow"],
+                "Overlap weights enforced (By default, overlap weights "
+                "are used only if up to 35 sequences are aligned since "
+                "calculating overlap weights is time consuming).",
+            ),
+            _Switch(
+                ["-pst", "pst"],
+                r"'print status'. Creates and updates a file \*.sta with "
+                "information about the current status of the program "
+                "run.  This option is recommended if large data sets "
+                "are aligned since it allows the user to estimate the "
+                "remaining running time.",
+            ),
+            _Switch(
+                ["-smin", "smin"],
+                "Minimum similarity value for first residue pair "
+                "(or codon pair) in fragments. Speeds up protein "
+                "alignment or alignment of translated DNA fragments "
+                "at the expense of sensitivity.",
+            ),
+            _Option(
+                ["-stars", "stars"],
+                r"Maximum number of '\*' characters indicating degree "
+                "of local similarity among sequences. By default, no "
+                "stars are used but numbers between 0 and 9, instead.",
+                checker_function=lambda x: x in range(0, 10),
+                equate=False,
+            ),
+            _Switch(["-stdo", "stdo"], "Results written to standard output."),
+            _Switch(
+                ["-ta", "ta"],
+                "Standard textual alignment printed (overrides "
+                "suppression of textual alignments in special "
+                "options, e.g. -lgs)",
+            ),
+            _Option(
+                ["-thr", "thr"],
+                "Threshold T = x.",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Switch(
+                ["-xfr", "xfr"],
+                "'exclude fragments' - list of fragments can be "
+                "specified that are NOT considered for pairwise alignment",
+            ),
+            _Argument(
+                ["input"],
+                "Input file name. Must be FASTA format",
+                filename=True,
+                is_required=True,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Align/Applications/_MSAProbs.py b/code/lib/Bio/Align/Applications/_MSAProbs.py
new file mode 100644
index 0000000..74b26a1
--- /dev/null
+++ b/code/lib/Bio/Align/Applications/_MSAProbs.py
@@ -0,0 +1,89 @@
+# Copyright 2013 by Christian Brueffer. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command line wrapper for the multiple sequence alignment program MSAProbs."""
+
+from Bio.Application import _Argument, _Option, _Switch, AbstractCommandline
+
+
+class MSAProbsCommandline(AbstractCommandline):
+    """Command line wrapper for MSAProbs.
+
+    http://msaprobs.sourceforge.net
+
+    Notes
+    -----
+    Last checked against version: 0.9.7
+
+    References
+    ----------
+    Yongchao Liu, Bertil Schmidt, Douglas L. Maskell: "MSAProbs: multiple
+    sequence alignment based on pair hidden Markov models and partition
+    function posterior probabilities". Bioinformatics, 2010, 26(16): 1958 -1964
+
+    Examples
+    --------
+    >>> from Bio.Align.Applications import MSAProbsCommandline
+    >>> in_file = "unaligned.fasta"
+    >>> out_file = "aligned.cla"
+    >>> cline = MSAProbsCommandline(infile=in_file, outfile=out_file, clustalw=True)
+    >>> print(cline)
+    msaprobs -o aligned.cla -clustalw unaligned.fasta
+
+    You would typically run the command line with cline() or via
+    the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="msaprobs", **kwargs):
+        """Initialize the class."""
+        # order of parameters is the same as in msaprobs -help
+        self.parameters = [
+            _Option(
+                ["-o", "--outfile", "outfile"],
+                "specify the output file name (STDOUT by default)",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-num_threads", "numthreads"],
+                "specify the number of threads used, and otherwise detect automatically",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Switch(
+                ["-clustalw", "clustalw"],
+                "use CLUSTALW output format instead of FASTA format",
+            ),
+            _Option(
+                ["-c", "consistency"],
+                "use 0 <= REPS <= 5 (default: 2) passes of consistency transformation",
+                checker_function=lambda x: isinstance(x, int) and 0 <= x <= 5,
+            ),
+            _Option(
+                ["-ir", "--iterative-refinement", "iterative_refinement"],
+                "use 0 <= REPS <= 1000 (default: 10) passes of iterative-refinement",
+                checker_function=lambda x: isinstance(x, int) and 0 <= x <= 1000,
+            ),
+            _Switch(["-v", "verbose"], "report progress while aligning (default: off)"),
+            _Option(
+                ["-annot", "annot"],
+                "write annotation for multiple alignment to FILENAME",
+                filename=True,
+            ),
+            _Switch(
+                ["-a", "--alignment-order", "alignment_order"],
+                "print sequences in alignment order rather than input order (default: off)",
+            ),
+            _Option(["-version", "version"], "print out version of MSAPROBS"),
+            _Argument(["infile"], "Multiple sequence input file", filename=True),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Align/Applications/_Mafft.py b/code/lib/Bio/Align/Applications/_Mafft.py
new file mode 100644
index 0000000..4a0b901
--- /dev/null
+++ b/code/lib/Bio/Align/Applications/_Mafft.py
@@ -0,0 +1,435 @@
+# Copyright 2009 by Cymon J. Cox.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command line wrapper for the multiple alignment programme MAFFT."""
+
+
+from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline
+
+
+class MafftCommandline(AbstractCommandline):
+    """Command line wrapper for the multiple alignment program MAFFT.
+
+    http://align.bmr.kyushu-u.ac.jp/mafft/software/
+
+    Notes
+    -----
+    Last checked against version: MAFFT v6.717b (2009/12/03)
+
+    References
+    ----------
+    Katoh, Toh (BMC Bioinformatics 9:212, 2008) Improved accuracy of
+    multiple ncRNA alignment by incorporating structural information into
+    a MAFFT-based framework (describes RNA structural alignment methods)
+
+    Katoh, Toh (Briefings in Bioinformatics 9:286-298, 2008) Recent
+    developments in the MAFFT multiple sequence alignment program
+    (outlines version 6)
+
+    Katoh, Toh (Bioinformatics 23:372-374, 2007)  Errata PartTree: an
+    algorithm to build an approximate tree from a large number of
+    unaligned sequences (describes the PartTree algorithm)
+
+    Katoh, Kuma, Toh, Miyata (Nucleic Acids Res. 33:511-518, 2005) MAFFT
+    version 5: improvement in accuracy of multiple sequence alignment
+    (describes [ancestral versions of] the G-INS-i, L-INS-i and E-INS-i
+    strategies)
+
+    Katoh, Misawa, Kuma, Miyata (Nucleic Acids Res. 30:3059-3066, 2002)
+
+    Examples
+    --------
+    >>> from Bio.Align.Applications import MafftCommandline
+    >>> mafft_exe = "/opt/local/mafft"
+    >>> in_file = "../Doc/examples/opuntia.fasta"
+    >>> mafft_cline = MafftCommandline(mafft_exe, input=in_file)
+    >>> print(mafft_cline)
+    /opt/local/mafft ../Doc/examples/opuntia.fasta
+
+    If the mafft binary is on the path (typically the case on a Unix style
+    operating system) then you don't need to supply the executable location:
+
+    >>> from Bio.Align.Applications import MafftCommandline
+    >>> in_file = "../Doc/examples/opuntia.fasta"
+    >>> mafft_cline = MafftCommandline(input=in_file)
+    >>> print(mafft_cline)
+    mafft ../Doc/examples/opuntia.fasta
+
+    You would typically run the command line with mafft_cline() or via
+    the Python subprocess module, as described in the Biopython tutorial.
+
+    Note that MAFFT will write the alignment to stdout, which you may
+    want to save to a file and then parse, e.g.::
+
+        stdout, stderr = mafft_cline()
+        with open("aligned.fasta", "w") as handle:
+            handle.write(stdout)
+        from Bio import AlignIO
+        align = AlignIO.read("aligned.fasta", "fasta")
+
+    Alternatively, to parse the output with AlignIO directly you can
+    use StringIO to turn the string into a handle::
+
+        stdout, stderr = mafft_cline()
+        from io import StringIO
+        from Bio import AlignIO
+        align = AlignIO.read(StringIO(stdout), "fasta")
+
+    """
+
+    def __init__(self, cmd="mafft", **kwargs):
+        """Initialize the class."""
+        BLOSUM_MATRICES = ["30", "45", "62", "80"]
+        self.parameters = [
+            # **** Algorithm ****
+            # Automatically selects an appropriate strategy from L-INS-i, FFT-NS-
+            # i and FFT-NS-2, according to data size. Default: off (always FFT-NS-2)
+            _Switch(["--auto", "auto"], "Automatically select strategy. Default off."),
+            # Distance is calculated based on the number of shared 6mers. Default: on
+            _Switch(
+                ["--6merpair", "6merpair", "sixmerpair"],
+                "Distance is calculated based on the number of shared "
+                "6mers. Default: on",
+            ),
+            # All pairwise alignments are computed with the Needleman-Wunsch
+            # algorithm. More accurate but slower than --6merpair. Suitable for a
+            # set of globally alignable sequences. Applicable to up to ~200
+            # sequences. A combination with --maxiterate 1000 is recommended (G-
+            # INS-i). Default: off (6mer distance is used)
+            _Switch(
+                ["--globalpair", "globalpair"],
+                "All pairwise alignments are computed with the "
+                "Needleman-Wunsch algorithm. Default: off",
+            ),
+            # All pairwise alignments are computed with the Smith-Waterman
+            # algorithm. More accurate but slower than --6merpair. Suitable for a
+            # set of locally alignable sequences. Applicable to up to ~200
+            # sequences. A combination with --maxiterate 1000 is recommended (L-
+            # INS-i). Default: off (6mer distance is used)
+            _Switch(
+                ["--localpair", "localpair"],
+                "All pairwise alignments are computed with the "
+                "Smith-Waterman algorithm. Default: off",
+            ),
+            # All pairwise alignments are computed with a local algorithm with
+            # the generalized affine gap cost (Altschul 1998). More accurate but
+            # slower than --6merpair. Suitable when large internal gaps are
+            # expected. Applicable to up to ~200 sequences. A combination with --
+            # maxiterate 1000 is recommended (E-INS-i). Default: off (6mer
+            # distance is used)
+            _Switch(
+                ["--genafpair", "genafpair"],
+                "All pairwise alignments are computed with a local "
+                "algorithm with the generalized affine gap cost "
+                "(Altschul 1998). Default: off",
+            ),
+            # All pairwise alignments are computed with FASTA (Pearson and Lipman
+            # 1988). FASTA is required. Default: off (6mer distance is used)
+            _Switch(
+                ["--fastapair", "fastapair"],
+                "All pairwise alignments are computed with FASTA "
+                "(Pearson and Lipman 1988). Default: off",
+            ),
+            # Weighting factor for the consistency term calculated from pairwise
+            # alignments. Valid when either of --blobalpair, --localpair, --
+            # genafpair, --fastapair or --blastpair is selected. Default: 2.7
+            _Option(
+                ["--weighti", "weighti"],
+                "Weighting factor for the consistency term calculated "
+                "from pairwise alignments. Default: 2.7",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # Guide tree is built number times in the progressive stage. Valid
+            # with 6mer distance. Default: 2
+            _Option(
+                ["--retree", "retree"],
+                "Guide tree is built number times in the progressive "
+                "stage. Valid with 6mer distance. Default: 2",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # Number cycles of iterative refinement are performed. Default: 0
+            _Option(
+                ["--maxiterate", "maxiterate"],
+                "Number cycles of iterative refinement are performed. Default: 0",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # Number of threads to use. Default: 1
+            _Option(
+                ["--thread", "thread"],
+                "Number of threads to use. Default: 1",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # Use FFT approximation in group-to-group alignment. Default: on
+            _Switch(
+                ["--fft", "fft"],
+                "Use FFT approximation in group-to-group alignment. Default: on",
+            ),
+            # Do not use FFT approximation in group-to-group alignment. Default:
+            # off
+            _Switch(
+                ["--nofft", "nofft"],
+                "Do not use FFT approximation in group-to-group "
+                "alignment. Default: off",
+            ),
+            # Alignment score is not checked in the iterative refinement stage.
+            # Default: off (score is checked)
+            _Switch(
+                ["--noscore", "noscore"],
+                "Alignment score is not checked in the iterative "
+                "refinement stage. Default: off (score is checked)",
+            ),
+            # Use the Myers-Miller (1988) algorithm. Default: automatically
+            # turned on when the alignment length exceeds 10,000 (aa/nt).
+            _Switch(
+                ["--memsave", "memsave"],
+                "Use the Myers-Miller (1988) algorithm. Default: "
+                "automatically turned on when the alignment length "
+                "exceeds 10,000 (aa/nt).",
+            ),
+            # Use a fast tree-building method (PartTree, Katoh and Toh 2007) with
+            # the 6mer distance. Recommended for a large number (> ~10,000) of
+            # sequences are input. Default: off
+            _Switch(
+                ["--parttree", "parttree"],
+                "Use a fast tree-building method with the 6mer "
+                "distance. Default: off",
+            ),
+            # The PartTree algorithm is used with distances based on DP. Slightly
+            # more accurate and slower than --parttree. Recommended for a large
+            # number (> ~10,000) of sequences are input. Default: off
+            _Switch(
+                ["--dpparttree", "dpparttree"],
+                "The PartTree algorithm is used with distances "
+                "based on DP. Default: off",
+            ),
+            # The PartTree algorithm is used with distances based on FASTA.
+            # Slightly more accurate and slower than --parttree. Recommended for
+            # a large number (> ~10,000) of sequences are input. FASTA is
+            # required. Default: off
+            _Switch(
+                ["--fastaparttree", "fastaparttree"],
+                "The PartTree algorithm is used with distances based "
+                "on FASTA. Default: off",
+            ),
+            # The number of partitions in the PartTree algorithm. Default: 50
+            _Option(
+                ["--partsize", "partsize"],
+                "The number of partitions in the PartTree algorithm. Default: 50",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # Do not make alignment larger than number sequences. Valid only with
+            # the --*parttree options. Default: the number of input sequences
+            _Switch(
+                ["--groupsize", "groupsize"],
+                "Do not make alignment larger than number sequences. "
+                "Default: the number of input sequences",
+            ),
+            # Adjust direction according to the first sequence
+            # Mafft V6 beta function
+            _Switch(
+                ["--adjustdirection", "adjustdirection"],
+                "Adjust direction according to the first sequence. Default off.",
+            ),
+            # Adjust direction according to the first sequence
+            # for highly diverged data; very slow
+            # Mafft V6 beta function
+            _Switch(
+                ["--adjustdirectionaccurately", "adjustdirectionaccurately"],
+                "Adjust direction according to the first sequence,"
+                "for highly diverged data; very slow"
+                "Default off.",
+            ),
+            # **** Parameter ****
+            # Gap opening penalty at group-to-group alignment. Default: 1.53
+            _Option(
+                ["--op", "op"],
+                "Gap opening penalty at group-to-group alignment. Default: 1.53",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # Offset value, which works like gap extension penalty, for group-to-
+            # group alignment. Deafult: 0.123
+            _Option(
+                ["--ep", "ep"],
+                "Offset value, which works like gap extension penalty, "
+                "for group-to- group alignment. Default: 0.123",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # Gap opening penalty at local pairwise alignment. Valid when the --
+            # localpair or --genafpair option is selected. Default: -2.00
+            _Option(
+                ["--lop", "lop"],
+                "Gap opening penalty at local pairwise alignment. Default: 0.123",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # Offset value at local pairwise alignment. Valid when the --
+            # localpair or --genafpair option is selected. Default: 0.1
+            _Option(
+                ["--lep", "lep"],
+                "Offset value at local pairwise alignment. Default: 0.1",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # Gap extension penalty at local pairwise alignment. Valid when the -
+            # -localpair or --genafpair option is selected. Default: -0.1
+            _Option(
+                ["--lexp", "lexp"],
+                "Gap extension penalty at local pairwise alignment. Default: -0.1",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # Gap opening penalty to skip the alignment. Valid when the --
+            # genafpair option is selected. Default: -6.00
+            _Option(
+                ["--LOP", "LOP"],
+                "Gap opening penalty to skip the alignment. Default: -6.00",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # Gap extension penalty to skip the alignment. Valid when the --
+            # genafpair option is selected. Default: 0.00
+            _Option(
+                ["--LEXP", "LEXP"],
+                "Gap extension penalty to skip the alignment. Default: 0.00",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # BLOSUM number matrix (Henikoff and Henikoff 1992) is used.
+            # number=30, 45, 62 or 80. Default: 62
+            _Option(
+                ["--bl", "bl"],
+                "BLOSUM number matrix is used. Default: 62",
+                checker_function=lambda x: x in BLOSUM_MATRICES,
+                equate=False,
+            ),
+            # JTT PAM number (Jones et al. 1992) matrix is used. number>0.
+            # Default: BLOSUM62
+            _Option(
+                ["--jtt", "jtt"],
+                "JTT PAM number (Jones et al. 1992) matrix is used. "
+                "number>0. Default: BLOSUM62",
+                equate=False,
+            ),
+            # Transmembrane PAM number (Jones et al. 1994) matrix is used.
+            # number>0. Default: BLOSUM62
+            _Option(
+                ["--tm", "tm"],
+                "Transmembrane PAM number (Jones et al. 1994) "
+                "matrix is used. number>0. Default: BLOSUM62",
+                filename=True,  # to ensure spaced inputs are quoted
+                equate=False,
+            ),
+            # Use a user-defined AA scoring matrix. The format of matrixfile is
+            # the same to that of BLAST. Ignored when nucleotide sequences are
+            # input. Default: BLOSUM62
+            _Option(
+                ["--aamatrix", "aamatrix"],
+                "Use a user-defined AA scoring matrix. Default: BLOSUM62",
+                filename=True,  # to ensure spaced inputs are quoted
+                equate=False,
+            ),
+            # Incorporate the AA/nuc composition information into the scoring
+            # matrix. Default: off
+            _Switch(
+                ["--fmodel", "fmodel"],
+                "Incorporate the AA/nuc composition information into "
+                "the scoring matrix (True) or not (False, default)",
+            ),
+            # **** Output ****
+            # Name length for CLUSTAL and PHYLIP format output
+            _Option(
+                ["--namelength", "namelength"],
+                """Name length in CLUSTAL and PHYLIP output.
+
+                    MAFFT v6.847 (2011) added --namelength for use with
+                    the --clustalout option for CLUSTAL output.
+
+                    MAFFT v7.024 (2013) added support for this with the
+                    --phylipout option for PHYLIP output (default 10).
+                    """,
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # Output format: clustal format. Default: off (fasta format)
+            _Switch(
+                ["--clustalout", "clustalout"],
+                "Output format: clustal (True) or fasta (False, default)",
+            ),
+            # Output format: phylip format.
+            # Added in beta with v6.847, fixed in v6.850 (2011)
+            _Switch(
+                ["--phylipout", "phylipout"],
+                "Output format: phylip (True), or fasta (False, default)",
+            ),
+            # Output order: same as input. Default: on
+            _Switch(
+                ["--inputorder", "inputorder"],
+                "Output order: same as input (True, default) or alignment "
+                "based (False)",
+            ),
+            # Output order: aligned. Default: off (inputorder)
+            _Switch(
+                ["--reorder", "reorder"],
+                "Output order: aligned (True) or in input order (False, default)",
+            ),
+            # Guide tree is output to the input.tree file. Default: off
+            _Switch(
+                ["--treeout", "treeout"],
+                "Guide tree is output to the input.tree file (True) or "
+                "not (False, default)",
+            ),
+            # Do not report progress. Default: off
+            _Switch(
+                ["--quiet", "quiet"],
+                "Do not report progress (True) or not (False, default).",
+            ),
+            # **** Input ****
+            # Assume the sequences are nucleotide. Deafult: auto
+            _Switch(
+                ["--nuc", "nuc"],
+                "Assume the sequences are nucleotide (True/False). Default: auto",
+            ),
+            # Assume the sequences are amino acid. Deafult: auto
+            _Switch(
+                ["--amino", "amino"],
+                "Assume the sequences are amino acid (True/False). Default: auto",
+            ),
+            # MAFFT has multiple --seed commands where the unaligned input is
+            # aligned to the seed alignment. There can be multiple seeds in the
+            # form: "mafft --seed align1 --seed align2 [etc] input"
+            # Effectively for n number of seed alignments.
+            # TODO - Can we use class _ArgumentList here?
+            _Option(
+                ["--seed", "seed"],
+                "Seed alignments given in alignment_n (fasta format) "
+                "are aligned with sequences in input.",
+                filename=True,
+                equate=False,
+            ),
+            # The input (must be FASTA format)
+            _Argument(["input"], "Input file name", filename=True, is_required=True),
+            # mafft-profile takes a second alignment input as an argument:
+            # mafft-profile align1 align2
+            _Argument(
+                ["input1"],
+                "Second input file name for the mafft-profile command",
+                filename=True,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Align/Applications/_Muscle.py b/code/lib/Bio/Align/Applications/_Muscle.py
new file mode 100644
index 0000000..6a67e2a
--- /dev/null
+++ b/code/lib/Bio/Align/Applications/_Muscle.py
@@ -0,0 +1,685 @@
+# Copyright 2009 by Cymon J. Cox.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command line wrapper for the multiple alignment program MUSCLE."""
+
+
+from Bio.Application import _Option, _Switch, AbstractCommandline
+
+
+class MuscleCommandline(AbstractCommandline):
+    r"""Command line wrapper for the multiple alignment program MUSCLE.
+
+    http://www.drive5.com/muscle/
+
+    Notes
+    -----
+    Last checked against version: 3.7, briefly against 3.8
+
+    References
+    ----------
+    Edgar, Robert C. (2004), MUSCLE: multiple sequence alignment with high
+    accuracy and high throughput, Nucleic Acids Research 32(5), 1792-97.
+
+    Edgar, R.C. (2004) MUSCLE: a multiple sequence alignment method with
+    reduced time and space complexity. BMC Bioinformatics 5(1): 113.
+
+    Examples
+    --------
+    >>> from Bio.Align.Applications import MuscleCommandline
+    >>> muscle_exe = r"C:\Program Files\Alignments\muscle3.8.31_i86win32.exe"
+    >>> in_file = r"C:\My Documents\unaligned.fasta"
+    >>> out_file = r"C:\My Documents\aligned.fasta"
+    >>> muscle_cline = MuscleCommandline(muscle_exe, input=in_file, out=out_file)
+    >>> print(muscle_cline)
+    "C:\Program Files\Alignments\muscle3.8.31_i86win32.exe" -in "C:\My Documents\unaligned.fasta" -out "C:\My Documents\aligned.fasta"
+
+    You would typically run the command line with muscle_cline() or via
+    the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="muscle", **kwargs):
+        """Initialize the class."""
+        CLUSTERING_ALGORITHMS = ["upgma", "upgmb", "neighborjoining"]
+        DISTANCE_MEASURES_ITER1 = [
+            "kmer6_6",
+            "kmer20_3",
+            "kmer20_4",
+            "kbit20_3",
+            "kmer4_6",
+        ]
+        DISTANCE_MEASURES_ITER2 = DISTANCE_MEASURES_ITER1 + [
+            "pctid_kimura",
+            "pctid_log",
+        ]
+        OBJECTIVE_SCORES = ["sp", "ps", "dp", "xp", "spf", "spm"]
+        TREE_ROOT_METHODS = ["pseudo", "midlongestspan", "minavgleafdist"]
+
+        # The mucleotide arguments for  the sequence type parameter in MUSCLE (-seqtype)
+        # were updated at somepoint in MUSCLE version 3.8. Prior to the update
+        # 'nucleo' was used for nucleotide. This has been updated to 'rna' and 'dna'. 'nucleo' kept for
+        # backwards compatibility with older MUSCLE versions.
+        SEQUENCE_TYPES = ["protein", "rna", "dna", "nucleo", "auto"]
+        WEIGHTING_SCHEMES = [
+            "none",
+            "clustalw",
+            "henikoff",
+            "henikoffpb",
+            "gsc",
+            "threeway",
+        ]
+        self.parameters = [
+            # Can't use "in" as the final alias as this
+            # is a reserved word in python:
+            _Option(
+                ["-in", "in", "input"], "Input filename", filename=True, equate=False
+            ),
+            _Option(["-out", "out"], "Output filename", filename=True, equate=False),
+            _Switch(
+                ["-diags", "diags"], "Find diagonals (faster for similar sequences)"
+            ),
+            _Switch(["-profile", "profile"], "Perform a profile alignment"),
+            _Option(
+                ["-in1", "in1"],
+                "First input filename for profile alignment",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-in2", "in2"],
+                "Second input filename for a profile alignment",
+                filename=True,
+                equate=False,
+            ),
+            # anchorspacing   Integer              32       Minimum spacing
+            #                                              between anchor cols
+            _Option(
+                ["-anchorspacing", "anchorspacing"],
+                "Minimum spacing between anchor columns",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # center          Floating point       [1]      Center parameter.
+            #                                              Should be negative.
+            _Option(
+                ["-center", "center"],
+                "Center parameter - should be negative",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # cluster1        upgma                upgmb    Clustering method.
+            _Option(
+                ["-cluster1", "cluster1"],
+                "Clustering method used in iteration 1",
+                checker_function=lambda x: x in CLUSTERING_ALGORITHMS,
+                equate=False,
+            ),
+            # cluster2        upgmb                         cluster1 is used
+            #                neighborjoining               in iteration 1 and
+            #                                              2, cluster2 in
+            #                                              later iterations.
+            _Option(
+                ["-cluster2", "cluster2"],
+                "Clustering method used in iteration 2",
+                checker_function=lambda x: x in CLUSTERING_ALGORITHMS,
+                equate=False,
+            ),
+            # diaglength      Integer              24       Minimum length of
+            #                                              diagonal.
+            _Option(
+                ["-diaglength", "diaglength"],
+                "Minimum length of diagonal",
+                checker_function=lambda x: isinstance(x, int),
+                equate=True,
+            ),
+            # diagmargin      Integer              5        Discard this many
+            #                                              positions at ends
+            #                                              of diagonal.
+            _Option(
+                ["-diagmargin", "diagmargin"],
+                "Discard this many positions at ends of diagonal",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # distance1       kmer6_6       Kmer6_6(amino) or Distance measure
+            #                kmer20_3       Kmer4_6(nucleo)   for iteration 1
+            #                kmer20_4
+            #                kbit20_3
+            #                kmer4_6
+            _Option(
+                ["-distance1", "distance1"],
+                "Distance measure for iteration 1",
+                checker_function=lambda x: x in DISTANCE_MEASURES_ITER1,
+                equate=False,
+            ),
+            # distance2       kmer6_6       pctid_kimura    Distance measure
+            #                kmer20_3                      for iterations
+            #                kmer20_4                      2, 3 ...
+            #                kbit20_3
+            #                pctid_kimura
+            #                pctid_log
+            _Option(
+                ["-distance2", "distance2"],
+                "Distance measure for iteration 2",
+                checker_function=lambda x: x in DISTANCE_MEASURES_ITER2,
+                equate=False,
+            ),
+            # gapextend       Floating point       [1]    The gap extend score
+            _Option(
+                ["-gapextend", "gapextend"],
+                "Gap extension penalty",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # gapopen         Floating point       [1]      The gap open score
+            #                                              Must be negative.
+            _Option(
+                ["-gapopen", "gapopen"],
+                "Gap open score - negative number",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # hydro           Integer              5        Window size for
+            #                                              determining whether
+            #                                              a region is
+            #                                              hydrophobic.
+            _Option(
+                ["-hydro", "hydro"],
+                "Window size for hydrophobic region",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # hydrofactor     Floating point       1.2      Multiplier for gap
+            #                                              open/close
+            #                                              penalties in
+            #                                              hydrophobic regions
+            _Option(
+                ["-hydrofactor", "hydrofactor"],
+                "Multiplier for gap penalties in hydrophobic regions",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # log             File name            None.    Log file name
+            #                                              (delete existing
+            #                                              file).
+            _Option(["-log", "log"], "Log file name", filename=True, equate=False),
+            # loga            File name            None.    Log file name
+            #                                              (append to existing
+            #                                              file).
+            _Option(
+                ["-loga", "loga"],
+                "Log file name (append to existing file)",
+                filename=True,
+                equate=False,
+            ),
+            # matrix          File name            None.    File name for
+            #                                              substitution matrix
+            #                                              in NCBI or WU-BLAST
+            #                                              format. If you
+            #                                              specify your own
+            #                                              matrix, you should
+            #                                              also specify:
+            #                                                -gapopen <g>
+            #                                                -gapextend <e>
+            #                                                -center 0.0
+            _Option(
+                ["-matrix", "matrix"],
+                "path to NCBI or WU-BLAST format protein substitution "
+                "matrix - also set -gapopen, -gapextend and -center",
+                filename=True,
+                equate=False,
+            ),
+            # diagbreak    Integer              1           Maximum distance
+            #                                              between two
+            #                                              diagonals that
+            #                                              allows them to
+            #                                              merge into one
+            #                                              diagonal.
+            _Option(
+                ["-diagbreak", "diagbreak"],
+                "Maximum distance between two diagonals that allows "
+                "them to merge into one diagonal",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-maxdiagbreak", "maxdiagbreak"],  # deprecated 3.8
+                "Deprecated in v3.8, use -diagbreak instead.",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # maxhours        Floating point       None.    Maximum time to
+            #                                              run in hours. The
+            #                                              actual time may
+            #                                              exceed requested
+            #                                              limit by a few
+            #                                              minutes. Decimals
+            #                                              are allowed, so 1.5
+            #                                              means one hour and
+            #                                              30 minutes.
+            _Option(
+                ["-maxhours", "maxhours"],
+                "Maximum time to run in hours",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # maxiters        Integer 1, 2 ...     16       Maximum number of
+            #                                              iterations.
+            _Option(
+                ["-maxiters", "maxiters"],
+                "Maximum number of iterations",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # maxtrees        Integer              1        Maximum number of
+            #                                              new trees to build
+            #                                              in iteration 2.
+            _Option(
+                ["-maxtrees", "maxtrees"],
+                "Maximum number of trees to build in iteration 2",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # minbestcolscore Floating point       [1]      Minimum score a
+            #                                              column must have to
+            #                                              be an anchor.
+            _Option(
+                ["-minbestcolscore", "minbestcolscore"],
+                "Minimum score a column must have to be an anchor",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # minsmoothscore  Floating point       [1]      Minimum smoothed
+            #                                              score a column must
+            #                                              have to be an
+            #                                              anchor.
+            _Option(
+                ["-minsmoothscore", "minsmoothscore"],
+                "Minimum smoothed score a column must have to be an anchor",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # objscore        sp                   spm      Objective score
+            #                ps                            used by tree
+            #                dp                            dependent
+            #                xp                            refinement.
+            #                spf                           sp=sum-of-pairs
+            #                spm                           score. (dimer
+            #                                              approximation)
+            #                                              spm=sp for < 100
+            #                                              seqs, otherwise spf
+            #                                              dp=dynamic
+            #                                              programming score.
+            #                                              ps=average profile-
+            #                                              sequence score.
+            #                                              xp=cross profile
+            #                                              score.
+            _Option(
+                ["-objscore", "objscore"],
+                "Objective score used by tree dependent refinement",
+                checker_function=lambda x: x in OBJECTIVE_SCORES,
+                equate=False,
+            ),
+            # refinewindow    Integer              200      Length of window
+            #                                              for -refinew.
+            _Option(
+                ["-refinewindow", "refinewindow"],
+                "Length of window for -refinew",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # root1           pseudo               pseudo  Method used to root
+            _Option(
+                ["-root1", "root1"],
+                "Method used to root tree in iteration 1",
+                checker_function=lambda x: x in TREE_ROOT_METHODS,
+                equate=False,
+            ),
+            # root2           midlongestspan                tree; root1 is
+            #                minavgleafdist                used in iteration 1
+            #                                              and 2, root2 in
+            #                                              later iterations.
+            _Option(
+                ["-root2", "root2"],
+                "Method used to root tree in iteration 2",
+                checker_function=lambda x: x in TREE_ROOT_METHODS,
+                equate=False,
+            ),
+            # scorefile       File name            None    File name where to
+            #                                             write a score file.
+            #                                             This contains one
+            #                                             line for each column
+            #                                             in the alignment.
+            #                                             The line contains
+            #                                             the letters in the
+            #                                             column followed by
+            #                                             the average BLOSUM62
+            #                                             score over pairs of
+            #                                             letters in the
+            #                                             column.
+            _Option(
+                ["-scorefile", "scorefile"],
+                "Score file name, contains one line for each column"
+                " in the alignment with average BLOSUM62 score",
+                filename=True,
+                equate=False,
+            ),
+            # seqtype         protein              auto     Sequence type.
+            #                 dna (MUSCLE version > 3.8)
+            #                 rna (MUSCLE version > 3.8)
+            #                 auto
+            #                 nucleo (only valid for MUSCLE versions < 3.8)
+            _Option(
+                ["-seqtype", "seqtype"],
+                "Sequence type",
+                checker_function=lambda x: x in SEQUENCE_TYPES,
+                equate=False,
+            ),
+            # smoothscoreceil Floating point       [1]      Maximum value of
+            #                                              column score for
+            #                                              smoothing purposes.
+            _Option(
+                ["-smoothscoreceil", "smoothscoreceil"],
+                "Maximum value of column score for smoothing",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # smoothwindow    Integer              7        Window used for
+            #                                              anchor column
+            #                                              smoothing.
+            _Option(
+                ["-smoothwindow", "smoothwindow"],
+                "Window used for anchor column smoothing",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # spscore         File name                     Compute SP
+            #                                              objective score of
+            #                                              multiple alignment.
+            _Option(
+                ["-spscore", "spscore"],
+                "Compute SP objective score of multiple alignment",
+                filename=True,
+                equate=False,
+            ),
+            # SUEFF           Floating point value 0.1      Constant used in
+            #                between 0 and 1.              UPGMB clustering.
+            #                                              Determines the
+            #                                              relative fraction
+            #                                              of average linkage
+            #                                              (SUEFF) vs. nearest
+            #                                              neighbor linkage
+            #                                              (1 SUEFF).
+            _Option(
+                ["-sueff", "sueff"],
+                "Constant used in UPGMB clustering",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            # tree1           File name            None     Save tree
+            _Option(
+                ["-tree1", "tree1"], "Save Newick tree from iteration 1", equate=False
+            ),
+            # tree2                                         first or second
+            #                                              iteration to given
+            #                                              file in Newick
+            #                                              (Phylip-compatible)
+            #                                              format.
+            _Option(
+                ["-tree2", "tree2"], "Save Newick tree from iteration 2", equate=False
+            ),
+            # usetree         File name     None            Use given tree as
+            #                                              guide tree. Must by
+            #                                              in Newick
+            #                                              (Phyip-compatible)
+            #                                              format.
+            _Option(
+                ["-usetree", "usetree"],
+                "Use given Newick tree as guide tree",
+                filename=True,
+                equate=False,
+            ),
+            # weight1         none          clustalw        Sequence weighting
+            _Option(
+                ["-weight1", "weight1"],
+                "Weighting scheme used in iteration 1",
+                checker_function=lambda x: x in WEIGHTING_SCHEMES,
+                equate=False,
+            ),
+            # weight2         henikoff                      scheme.
+            #                henikoffpb                    weight1 is used in
+            #                gsc                           iterations 1 and 2.
+            #                clustalw                      weight2 is used for
+            #                threeway                      tree-dependent
+            #                                              refinement.
+            #                                              none=all sequences
+            #                                              have equal weight.
+            #                                              henikoff=Henikoff &
+            #                                              Henikoff weighting
+            #                                              scheme.
+            #                                              henikoffpb=Modified
+            #                                              Henikoff scheme as
+            #                                              used in PSI-BLAST.
+            #                                              clustalw=CLUSTALW
+            #                                              method.
+            #                                              threeway=Gotoh
+            #                                              three-way method.
+            _Option(
+                ["-weight2", "weight2"],
+                "Weighting scheme used in iteration 2",
+                checker_function=lambda x: x in WEIGHTING_SCHEMES,
+                equate=False,
+            ),
+            # ################### FORMATS ####################################
+            # Multiple formats can be specified on the command line
+            # If -msf appears it will be used regardless of other formats
+            # specified. If -clw appears (and not -msf), clustalw format will
+            # be used regardless of other formats specified. If both -clw and
+            # -clwstrict are specified -clwstrict will be used regardless of
+            # other formats specified. If -fasta is specified and not -msf,
+            # -clw, or clwstrict, fasta will be used. If -fasta and -html are
+            # specified -fasta will be used. Only if -html is specified alone
+            # will html be used. I kid ye not.
+            # clw                no       Write output in CLUSTALW format
+            #                            (default is FASTA).
+            _Switch(
+                ["-clw", "clw"],
+                "Write output in CLUSTALW format (with a MUSCLE header)",
+            ),
+            # clwstrict          no       Write output in CLUSTALW format with
+            #                            the "CLUSTAL W (1.81)" header rather
+            #                            than the MUSCLE version. This is
+            #                            useful when a post-processing step is
+            #                            picky about the file header.
+            _Switch(
+                ["-clwstrict", "clwstrict"],
+                "Write output in CLUSTALW format with version 1.81 header",
+            ),
+            # fasta              yes             Write output in FASTA format.
+            #                                   Alternatives include clw,
+            #                                   clwstrict, msf and html.
+            _Switch(["-fasta", "fasta"], "Write output in FASTA format"),
+            # html               no       Write output in HTML format (default
+            #                            is FASTA).
+            _Switch(["-html", "html"], "Write output in HTML format"),
+            # msf                no       Write output in MSF format (default
+            #                            is FASTA).
+            _Switch(["-msf", "msf"], "Write output in MSF format"),
+            # Phylip interleaved - undocumented as of 3.7
+            _Switch(["-phyi", "phyi"], "Write output in PHYLIP interleaved format"),
+            # Phylip sequential - undocumented as of 3.7
+            _Switch(["-phys", "phys"], "Write output in PHYLIP sequential format"),
+            # ################# Additional specified output files #########
+            _Option(
+                ["-phyiout", "phyiout"],
+                "Write PHYLIP interleaved output to specified filename",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-physout", "physout"],
+                "Write PHYLIP sequential format to specified filename",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-htmlout", "htmlout"],
+                "Write HTML output to specified filename",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-clwout", "clwout"],
+                "Write CLUSTALW output (with MUSCLE header) to specified filename",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-clwstrictout", "clwstrictout"],
+                "Write CLUSTALW output (with version 1.81 header) to "
+                "specified filename",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-msfout", "msfout"],
+                "Write MSF format output to specified filename",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-fastaout", "fastaout"],
+                "Write FASTA format output to specified filename",
+                filename=True,
+                equate=False,
+            ),
+            # ############# END FORMATS ###################################
+            # anchors            yes      Use anchor optimization in tree
+            #                            dependent refinement iterations.
+            _Switch(
+                ["-anchors", "anchors"],
+                "Use anchor optimisation in tree dependent refinement iterations",
+            ),
+            # noanchors          no       Disable anchor optimization. Default
+            #                            is anchors.
+            _Switch(
+                ["-noanchors", "noanchors"],
+                "Do not use anchor optimisation in tree dependent "
+                "refinement iterations",
+            ),
+            # brenner            no       Use Steven Brenner's method for
+            #                            computing the root alignment.
+            _Switch(
+                ["-brenner", "brenner"], "Use Steve Brenner's root alignment method"
+            ),
+            # cluster            no       Perform fast clustering of input
+            #                            sequences. Use the tree1 option to
+            #                            save the tree.
+            _Switch(
+                ["-cluster", "cluster"],
+                "Perform fast clustering of input sequences, "
+                "use -tree1 to save tree",
+            ),
+            # dimer              no       Use dimer approximation for the
+            #                            SP score (faster, less accurate).
+            _Switch(
+                ["-dimer", "dimer"],
+                "Use faster (slightly less accurate) dimer approximation"
+                "for the SP score",
+            ),
+            # group              yes      Group similar sequences together
+            #                            in the output. This is the default.
+            #                            See also stable.
+            _Switch(["-group", "group"], "Group similar sequences in output"),
+            # ############# log-expectation profile score ####################
+            # One of either -le, -sp, or -sv
+            #
+            # According to the doc, spn is default and the only option for
+            # nucleotides: this doesn't appear to be true. -le, -sp, and -sv
+            # can be used and produce numerically different logs
+            # (what is going on?)
+            #
+            # spn fails on proteins
+            # le                 maybe    Use log-expectation profile score
+            #                            (VTML240). Alternatives are to use sp
+            #                            or sv. This is the default for amino
+            #                            acid sequences.
+            _Switch(["-le", "le"], "Use log-expectation profile score (VTML240)"),
+            # sv                 no       Use sum-of-pairs profile score
+            #                            (VTML240). Default is le.
+            _Switch(["-sv", "sv"], "Use sum-of-pairs profile score (VTML240)"),
+            # sp                 no       Use sum-of-pairs protein profile
+            #                            score (PAM200). Default is le.
+            _Switch(["-sp", "sp"], "Use sum-of-pairs protein profile score (PAM200)"),
+            # spn                maybe    Use sum-of-pairs nucleotide profile
+            #                            score (BLASTZ parameters). This is
+            #                            the only option for nucleotides,
+            #                            and is therefore the default.
+            _Switch(
+                ["-spn", "spn"], "Use sum-of-pairs protein nucleotide profile score"
+            ),
+            # ########## END log-expectation profile score ###################
+            # quiet              no      Do not display progress messages.
+            _Switch(["-quiet", "quiet"], "Do not display progress messages"),
+            # refine             no       Input file is already aligned, skip
+            #                            first two iterations and begin tree
+            #                            dependent refinement.
+            _Switch(["-refine", "refine"], "Only do tree dependent refinement"),
+            # refinew            no      Refine an alignment by dividing it
+            #                           into non-overlapping windows and
+            #                           re-aligning each window. Typically
+            #                           used for whole-genome nucleotide
+            #                           alignments.
+            _Switch(
+                ["-refinew", "refinew"],
+                "Only do tree dependent refinement using sliding window approach",
+            ),
+            # core           yes in muscle,       Do not catch exceptions.
+            #                no in muscled.
+            _Switch(["-core", "core"], "Do not catch exceptions"),
+            # nocore         no in muscle,        Catch exceptions and give an
+            #                yes in muscled.     error message if possible.
+            _Switch(["-nocore", "nocore"], "Catch exceptions"),
+            # stable             no      Preserve input order of sequences
+            #                           in output file. Default is to group
+            #                           sequences by similarity (group).
+            _Switch(
+                ["-stable", "stable"],
+                "Do not group similar sequences in output (not supported in v3.8)",
+            ),
+            # termgaps4          yes     Use 4-way test for treatment of
+            #                           terminal gaps.
+            #                           (Cannot be disabled in this version).
+            #
+            # termgapsfull       no      Terminal gaps penalized with
+            #                           full penalty. [1] Not fully
+            #                           supported in this version
+            #
+            # termgapshalf       yes     Terminal gaps penalized with
+            #                           half penalty. [1] Not fully
+            #                           supported in this version
+            #
+            # termgapshalflonger no      Terminal gaps penalized with
+            #                           half penalty if gap relative
+            #                           to longer sequence, otherwise with
+            #                           full penalty. [1] Not fully
+            #                           supported in this version
+            #
+            # verbose            no      Write parameter settings and
+            #                           progress messages to log file.
+            _Switch(["-verbose", "verbose"], "Write parameter settings and progress"),
+            # version            no      Write version string to
+            #                           stdout and exit
+            _Switch(["-version", "version"], "Write version string to stdout and exit"),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Align/Applications/_Prank.py b/code/lib/Bio/Align/Applications/_Prank.py
new file mode 100644
index 0000000..4d07c56
--- /dev/null
+++ b/code/lib/Bio/Align/Applications/_Prank.py
@@ -0,0 +1,236 @@
+# Copyright 2009 by Cymon J. Cox.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command line wrapper for the multiple alignment program PRANK."""
+
+from Bio.Application import _Option, _Switch, AbstractCommandline
+
+
+class PrankCommandline(AbstractCommandline):
+    """Command line wrapper for the multiple alignment program PRANK.
+
+    http://www.ebi.ac.uk/goldman-srv/prank/prank/
+
+    Notes
+    -----
+    Last checked against version: 081202
+
+    References
+    ----------
+    Loytynoja, A. and Goldman, N. 2005. An algorithm for progressive
+    multiple alignment of sequences with insertions. Proceedings of
+    the National Academy of Sciences, 102: 10557--10562.
+
+    Loytynoja, A. and Goldman, N. 2008. Phylogeny-aware gap placement
+    prevents errors in sequence alignment and evolutionary analysis.
+    Science, 320: 1632.
+
+    Examples
+    --------
+    To align a FASTA file (unaligned.fasta) with the output in aligned
+    FASTA format with the output filename starting with "aligned" (you
+    can't pick the filename explicitly), no tree output and no XML output,
+    use:
+
+    >>> from Bio.Align.Applications import PrankCommandline
+    >>> prank_cline = PrankCommandline(d="unaligned.fasta",
+    ...                                o="aligned", # prefix only!
+    ...                                f=8, # FASTA output
+    ...                                notree=True, noxml=True)
+    >>> print(prank_cline)
+    prank -d=unaligned.fasta -o=aligned -f=8 -noxml -notree
+
+    You would typically run the command line with prank_cline() or via
+    the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="prank", **kwargs):
+        """Initialize the class."""
+        OUTPUT_FORMAT_VALUES = list(range(1, 18))
+        self.parameters = [
+            # ################# input/output parameters: ##################
+            # -d=sequence_file
+            _Option(["-d", "d"], "Input filename", filename=True, is_required=True),
+            # -t=tree_file [default: no tree, generate approximate NJ tree]
+            _Option(["-t", "t"], "Input guide tree filename", filename=True),
+            # -tree="tree_string" [tree in newick format; in double quotes]
+            _Option(["-tree", "tree"], "Input guide tree as Newick string"),
+            # -m=model_file [default: HKY2/WAG]
+            _Option(
+                ["-m", "m"], "User-defined alignment model filename. Default: HKY2/WAG"
+            ),
+            # -o=output_file [default: 'output']
+            _Option(
+                ["-o", "o"],
+                "Output filenames prefix. Default: 'output'\n "
+                "Will write: output.?.fas (depending on requested "
+                "format), output.?.xml and output.?.dnd",
+                filename=True,
+            ),
+            # -f=output_format [default: 8]
+            _Option(
+                ["-f", "f"],
+                "Output alignment format. Default: 8 FASTA\n"
+                "Option are:\n"
+                "1. IG/Stanford	8. Pearson/Fasta\n"
+                "2. GenBank/GB 	11. Phylip3.2\n"
+                "3. NBRF       	12. Phylip\n"
+                "4. EMBL       	14. PIR/CODATA\n"
+                "6. DNAStrider 	15. MSF\n"
+                "7. Fitch      	17. PAUP/NEXUS",
+                checker_function=lambda x: x in OUTPUT_FORMAT_VALUES,
+            ),
+            _Switch(
+                ["-noxml", "noxml"],
+                "Do not output XML files (PRANK versions earlier than v.120626)",
+            ),
+            _Switch(
+                ["-notree", "notree"],
+                "Do not output dnd tree files (PRANK versions earlier than v.120626)",
+            ),
+            _Switch(
+                ["-showxml", "showxml"], "Output XML files (PRANK v.120626 and later)"
+            ),
+            _Switch(
+                ["-showtree", "showtree"],
+                "Output dnd tree files (PRANK v.120626 and later)",
+            ),
+            _Switch(["-shortnames", "shortnames"], "Truncate names at first space"),
+            _Switch(["-quiet", "quiet"], "Reduce verbosity"),
+            # ###################### model parameters: ######################
+            # +F [force insertions to be always skipped]
+            # -F [equivalent]
+            _Switch(
+                ["-F", "+F", "F"], "Force insertions to be always skipped: same as +F"
+            ),
+            # -dots [show insertion gaps as dots]
+            _Switch(["-dots", "dots"], "Show insertion gaps as dots"),
+            # -gaprate=# [gap opening rate; default: dna 0.025 / prot 0.0025]
+            _Option(
+                ["-gaprate", "gaprate"],
+                "Gap opening rate. Default: dna 0.025 prot 0.0025",
+                checker_function=lambda x: isinstance(x, float),
+            ),
+            # -gapext=# [gap extension probability; default: dna 0.5 / prot 0.5]
+            _Option(
+                ["-gapext", "gapext"],
+                "Gap extension probability. Default: dna 0.5 / prot 0.5",
+                checker_function=lambda x: isinstance(x, float),
+            ),
+            # -dnafreqs=#,#,#,# [ACGT; default: empirical]
+            _Option(
+                ["-dnafreqs", "dnafreqs"],
+                "DNA frequencies - 'A,C,G,T'. eg '25,25,25,25' as a quote "
+                "surrounded string value. Default: empirical",
+                checker_function=lambda x: isinstance(x, bytes),
+            ),
+            # -kappa=# [ts/tv rate ratio; default:2]
+            _Option(
+                ["-kappa", "kappa"],
+                "Transition/transversion ratio. Default: 2",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            # -rho=# [pur/pyr rate ratio; default:1]
+            _Option(
+                ["-rho", "rho"],
+                "Purine/pyrimidine ratio. Default: 1",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            # -codon [for DNA: use empirical codon model]
+            _Switch(["-codon", "codon"], "Codon aware alignment or not"),
+            # -termgap [penalise terminal gaps normally]
+            _Switch(["-termgap", "termgap"], "Penalise terminal gaps normally"),
+            # ############### other parameters: ################################
+            # -nopost [do not compute posterior support; default: compute]
+            _Switch(
+                ["-nopost", "nopost"],
+                "Do not compute posterior support. Default: compute",
+            ),
+            # -pwdist=# [expected pairwise distance for computing guidetree;
+            # default: dna 0.25 / prot 0.5]
+            _Option(
+                ["-pwdist", "pwdist"],
+                "Expected pairwise distance for computing guidetree. "
+                "Default: dna 0.25 / prot 0.5",
+                checker_function=lambda x: isinstance(x, float),
+            ),
+            _Switch(
+                ["-once", "once"], "Run only once. Default: twice if no guidetree given"
+            ),
+            _Switch(["-twice", "twice"], "Always run twice"),
+            _Switch(["-skipins", "skipins"], "Skip insertions in posterior support"),
+            _Switch(
+                ["-uselogs", "uselogs"],
+                "Slower but should work for a greater number of sequences",
+            ),
+            _Switch(["-writeanc", "writeanc"], "Output ancestral sequences"),
+            _Switch(
+                ["-printnodes", "printnodes"], "Output each node; mostly for debugging"
+            ),
+            # -matresize=# [matrix resizing multiplier]
+            # Doesn't specify type but Float and Int work
+            _Option(
+                ["-matresize", "matresize"],
+                "Matrix resizing multiplier",
+                checker_function=lambda x: (isinstance(x, float) or isinstance(x, int)),
+            ),
+            # -matinitsize=# [matrix initial size multiplier]
+            # Doesn't specify type but Float and Int work
+            _Option(
+                ["-matinitsize", "matinitsize"],
+                "Matrix initial size multiplier",
+                checker_function=lambda x: (isinstance(x, float) or isinstance(x, int)),
+            ),
+            _Switch(["-longseq", "longseq"], "Save space in pairwise alignments"),
+            _Switch(["-pwgenomic", "pwgenomic"], "Do pairwise alignment, no guidetree"),
+            # -pwgenomicdist=# [distance for pairwise alignment; default: 0.3]
+            _Option(
+                ["-pwgenomicdist", "pwgenomicdist"],
+                "Distance for pairwise alignment. Default: 0.3",
+                checker_function=lambda x: isinstance(x, float),
+            ),
+            # -scalebranches=# [scale branch lengths; default: dna 1 / prot 2]
+            _Option(
+                ["-scalebranches", "scalebranches"],
+                "Scale branch lengths. Default: dna 1 / prot 2",
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            # -fixedbranches=# [use fixed branch lengths]
+            # Assume looking for a float
+            _Option(
+                ["-fixedbranches", "fixedbranches"],
+                "Use fixed branch lengths of input value",
+                checker_function=lambda x: isinstance(x, float),
+            ),
+            # -maxbranches=# [set maximum branch length]
+            # Assume looking for a float
+            _Option(
+                ["-maxbranches", "maxbranches"],
+                "Use maximum branch lengths of input value",
+                checker_function=lambda x: isinstance(x, float),
+            ),
+            # -realbranches [disable branch length truncation]
+            _Switch(
+                ["-realbranches", "realbranches"], "Disable branch length truncation"
+            ),
+            _Switch(["-translate", "translate"], "Translate to protein"),
+            _Switch(
+                ["-mttranslate", "mttranslate"], "Translate to protein using mt table"
+            ),
+            # ##################### other: ####################
+            _Switch(
+                ["-convert", "convert"],
+                "Convert input alignment to new format. Do not perform alignment",
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Align/Applications/_Probcons.py b/code/lib/Bio/Align/Applications/_Probcons.py
new file mode 100644
index 0000000..e94e026
--- /dev/null
+++ b/code/lib/Bio/Align/Applications/_Probcons.py
@@ -0,0 +1,137 @@
+# Copyright 2009 by Cymon J. Cox.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command line wrapper for the multiple alignment program PROBCONS."""
+
+from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline
+
+
+class ProbconsCommandline(AbstractCommandline):
+    """Command line wrapper for the multiple alignment program PROBCONS.
+
+    http://probcons.stanford.edu/
+
+    Notes
+    -----
+    Last checked against version: 1.12
+
+    References
+    ----------
+    Do, C.B., Mahabhashyam, M.S.P., Brudno, M., and Batzoglou, S. 2005.
+    PROBCONS: Probabilistic Consistency-based Multiple Sequence Alignment.
+    Genome Research 15: 330-340.
+
+    Examples
+    --------
+    To align a FASTA file (unaligned.fasta) with the output in ClustalW
+    format, and otherwise default settings, use:
+
+    >>> from Bio.Align.Applications import ProbconsCommandline
+    >>> probcons_cline = ProbconsCommandline(input="unaligned.fasta",
+    ...                                      clustalw=True)
+    >>> print(probcons_cline)
+    probcons -clustalw unaligned.fasta
+
+    You would typically run the command line with probcons_cline() or via
+    the Python subprocess module, as described in the Biopython tutorial.
+
+    Note that PROBCONS will write the alignment to stdout, which you may
+    want to save to a file and then parse, e.g.::
+
+        stdout, stderr = probcons_cline()
+        with open("aligned.aln", "w") as handle:
+            handle.write(stdout)
+        from Bio import AlignIO
+        align = AlignIO.read("aligned.fasta", "clustalw")
+
+    Alternatively, to parse the output with AlignIO directly you can
+    use StringIO to turn the string into a handle::
+
+        stdout, stderr = probcons_cline()
+        from io import StringIO
+        from Bio import AlignIO
+        align = AlignIO.read(StringIO(stdout), "clustalw")
+
+    """
+
+    def __init__(self, cmd="probcons", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            # Note that some options cannot be assigned via properties using the
+            # original documented option (because hyphens are not valid for names in
+            # python), e.g cmdline.pre-training = 3 will not work
+            # In these cases the shortened option name should be used
+            # cmdline.pre = 3
+            _Switch(
+                ["-clustalw", "clustalw"], "Use CLUSTALW output format instead of MFA"
+            ),
+            _Option(
+                ["-c", "c", "--consistency", "consistency"],
+                "Use 0 <= REPS <= 5 (default: 2) passes of consistency transformation",
+                checker_function=lambda x: x in range(0, 6),
+                equate=False,
+            ),
+            _Option(
+                ["-ir", "--iterative-refinement", "iterative-refinement", "ir"],
+                "Use 0 <= REPS <= 1000 (default: 100) passes of iterative-refinement",
+                checker_function=lambda x: x in range(0, 1001),
+                equate=False,
+            ),
+            _Option(
+                ["-pre", "--pre-training", "pre-training", "pre"],
+                "Use 0 <= REPS <= 20 (default: 0) rounds of pretraining",
+                checker_function=lambda x: x in range(0, 21),
+                equate=False,
+            ),
+            _Switch(["-pairs", "pairs"], "Generate all-pairs pairwise alignments"),
+            _Switch(
+                ["-viterbi", "viterbi"],
+                "Use Viterbi algorithm to generate all pairs "
+                "(automatically enables -pairs)",
+            ),
+            _Switch(
+                ["-verbose", "verbose"], "Report progress while aligning (default: off)"
+            ),
+            _Option(
+                ["-annot", "annot"],
+                "Write annotation for multiple alignment to FILENAME",
+                equate=False,
+            ),
+            _Option(
+                ["-t", "t", "--train", "train"],
+                "Compute EM transition probabilities, store in FILENAME "
+                "(default: no training)",
+                equate=False,
+            ),
+            _Switch(
+                ["-e", "e", "--emissions", "emissions"],
+                "Also reestimate emission probabilities (default: off)",
+            ),
+            _Option(
+                ["-p", "p", "--paramfile", "paramfile"],
+                "Read parameters from FILENAME",
+                equate=False,
+            ),
+            _Switch(
+                ["-a", "--alignment-order", "alignment-order", "a"],
+                "Print sequences in alignment order rather than input "
+                "order (default: off)",
+            ),
+            # Input file name
+            _Argument(
+                ["input"],
+                "Input file name. Must be multiple FASTA alignment (MFA) format",
+                filename=True,
+                is_required=True,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Align/Applications/_TCoffee.py b/code/lib/Bio/Align/Applications/_TCoffee.py
new file mode 100644
index 0000000..de337bc
--- /dev/null
+++ b/code/lib/Bio/Align/Applications/_TCoffee.py
@@ -0,0 +1,125 @@
+# Copyright 2009 by Cymon J. Cox and Brad Chapman. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command line wrapper for the multiple alignment program TCOFFEE."""
+
+
+from Bio.Application import _Option, _Switch, AbstractCommandline
+
+
+class TCoffeeCommandline(AbstractCommandline):
+    """Commandline object for the TCoffee alignment program.
+
+    http://www.tcoffee.org/Projects_home_page/t_coffee_home_page.html
+
+    The T-Coffee command line tool has a lot of switches and options.
+    This wrapper implements a VERY limited number of options - if you
+    would like to help improve it please get in touch.
+
+    Notes
+    -----
+    Last checked against: Version_6.92
+
+    References
+    ----------
+    T-Coffee: A novel method for multiple sequence alignments.
+    Notredame, Higgins, Heringa, JMB,302(205-217) 2000
+
+    Examples
+    --------
+    To align a FASTA file (unaligned.fasta) with the output in ClustalW
+    format (file aligned.aln), and otherwise default settings, use:
+
+    >>> from Bio.Align.Applications import TCoffeeCommandline
+    >>> tcoffee_cline = TCoffeeCommandline(infile="unaligned.fasta",
+    ...                                    output="clustalw",
+    ...                                    outfile="aligned.aln")
+    >>> print(tcoffee_cline)
+    t_coffee -output clustalw -infile unaligned.fasta -outfile aligned.aln
+
+    You would typically run the command line with tcoffee_cline() or via
+    the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    SEQ_TYPES = ["dna", "protein", "dna_protein"]
+
+    def __init__(self, cmd="t_coffee", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-output", "output"],
+                """Specify the output type.
+
+                One (or more separated by a comma) of:
+                'clustalw_aln', 'clustalw', 'gcg', 'msf_aln',
+                'pir_aln', 'fasta_aln', 'phylip', 'pir_seq', 'fasta_seq'
+                """,
+                equate=False,
+            ),
+            _Option(
+                ["-infile", "infile"],
+                "Specify the input file.",
+                filename=True,
+                is_required=True,
+                equate=False,
+            ),
+            # Indicates the name of the alignment output by t_coffee. If the
+            # default is used, the alignment is named <your sequences>.aln
+            _Option(
+                ["-outfile", "outfile"],
+                "Specify the output file. Default: <your sequences>.aln",
+                filename=True,
+                equate=False,
+            ),
+            _Switch(
+                ["-convert", "convert"], "Specify you want to perform a file conversion"
+            ),
+            _Option(
+                ["-type", "type"],
+                "Specify the type of sequence being aligned",
+                checker_function=lambda x: x in self.SEQ_TYPES,
+                equate=False,
+            ),
+            _Option(
+                ["-outorder", "outorder"],
+                "Specify the order of sequence to output"
+                "Either 'input', 'aligned' or <filename> of "
+                "Fasta file with sequence order",
+                equate=False,
+            ),
+            _Option(
+                ["-matrix", "matrix"],
+                "Specify the filename of the substitution matrix to use. "
+                "Default: blosum62mt",
+                equate=False,
+            ),
+            _Option(
+                ["-gapopen", "gapopen"],
+                "Indicates the penalty applied for opening a gap (negative integer)",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-gapext", "gapext"],
+                "Indicates the penalty applied for extending a gap (negative integer)",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Switch(["-quiet", "quiet"], "Turn off log output"),
+            _Option(
+                ["-mode", "mode"],
+                "Specifies a special mode: genome, quickaln, dali, 3dcoffee",
+                equate=False,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Align/Applications/__init__.py b/code/lib/Bio/Align/Applications/__init__.py
new file mode 100644
index 0000000..778a7dd
--- /dev/null
+++ b/code/lib/Bio/Align/Applications/__init__.py
@@ -0,0 +1,34 @@
+# Copyright 2009 by Peter Cock & Cymon J. Cox.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Alignment command line tool wrappers (OBSOLETE).
+
+We have decided to remove this module in future, and instead recommend
+building your command and invoking it via the subprocess module directly.
+"""
+
+from ._Muscle import MuscleCommandline
+from ._Clustalw import ClustalwCommandline
+from ._ClustalOmega import ClustalOmegaCommandline
+from ._Prank import PrankCommandline
+from ._Mafft import MafftCommandline
+from ._Dialign import DialignCommandline
+from ._Probcons import ProbconsCommandline
+from ._TCoffee import TCoffeeCommandline
+from ._MSAProbs import MSAProbsCommandline
+
+# Make this explicit, then they show up in the API docs
+__all__ = (
+    "MuscleCommandline",
+    "ClustalwCommandline",
+    "ClustalOmegaCommandline",
+    "PrankCommandline",
+    "MafftCommandline",
+    "DialignCommandline",
+    "ProbconsCommandline",
+    "TCoffeeCommandline",
+    "MSAProbsCommandline",
+)
diff --git a/code/lib/Bio/Align/Applications/__pycache__/_ClustalOmega.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_ClustalOmega.cpython-37.pyc
new file mode 100644
index 0000000..047e02a
Binary files /dev/null and b/code/lib/Bio/Align/Applications/__pycache__/_ClustalOmega.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/Applications/__pycache__/_Clustalw.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_Clustalw.cpython-37.pyc
new file mode 100644
index 0000000..5cfed83
Binary files /dev/null and b/code/lib/Bio/Align/Applications/__pycache__/_Clustalw.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/Applications/__pycache__/_Dialign.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_Dialign.cpython-37.pyc
new file mode 100644
index 0000000..48be4d1
Binary files /dev/null and b/code/lib/Bio/Align/Applications/__pycache__/_Dialign.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/Applications/__pycache__/_MSAProbs.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_MSAProbs.cpython-37.pyc
new file mode 100644
index 0000000..9a47d9c
Binary files /dev/null and b/code/lib/Bio/Align/Applications/__pycache__/_MSAProbs.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/Applications/__pycache__/_Mafft.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_Mafft.cpython-37.pyc
new file mode 100644
index 0000000..d7dc9b7
Binary files /dev/null and b/code/lib/Bio/Align/Applications/__pycache__/_Mafft.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/Applications/__pycache__/_Muscle.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_Muscle.cpython-37.pyc
new file mode 100644
index 0000000..1fc62ff
Binary files /dev/null and b/code/lib/Bio/Align/Applications/__pycache__/_Muscle.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/Applications/__pycache__/_Prank.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_Prank.cpython-37.pyc
new file mode 100644
index 0000000..191a273
Binary files /dev/null and b/code/lib/Bio/Align/Applications/__pycache__/_Prank.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/Applications/__pycache__/_Probcons.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_Probcons.cpython-37.pyc
new file mode 100644
index 0000000..a0b18ca
Binary files /dev/null and b/code/lib/Bio/Align/Applications/__pycache__/_Probcons.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/Applications/__pycache__/_TCoffee.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_TCoffee.cpython-37.pyc
new file mode 100644
index 0000000..a2f271d
Binary files /dev/null and b/code/lib/Bio/Align/Applications/__pycache__/_TCoffee.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/Applications/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..5912300
Binary files /dev/null and b/code/lib/Bio/Align/Applications/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/__init__.py b/code/lib/Bio/Align/__init__.py
new file mode 100644
index 0000000..ac5b1cd
--- /dev/null
+++ b/code/lib/Bio/Align/__init__.py
@@ -0,0 +1,2326 @@
+# Copyright 2000, 2004 by Brad Chapman.
+# Revisions copyright 2010-2013, 2015-2018 by Peter Cock.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Code for dealing with sequence alignments.
+
+One of the most important things in this module is the MultipleSeqAlignment
+class, used in the Bio.AlignIO module.
+
+"""
+
+import sys
+
+from Bio.Align import _aligners
+from Bio.Align import substitution_matrices
+from Bio.Seq import Seq, MutableSeq, reverse_complement, UndefinedSequenceError
+from Bio.SeqRecord import SeqRecord, _RestrictedDict
+
+# Import errors may occur here if a compiled aligners.c file
+# (_aligners.pyd or _aligners.so) is missing or if the user is
+# importing from within the Biopython source tree, see PR #2007:
+# https://github.com/biopython/biopython/pull/2007
+
+
+class MultipleSeqAlignment:
+    """Represents a classical multiple sequence alignment (MSA).
+
+    By this we mean a collection of sequences (usually shown as rows) which
+    are all the same length (usually with gap characters for insertions or
+    padding). The data can then be regarded as a matrix of letters, with well
+    defined columns.
+
+    You would typically create an MSA by loading an alignment file with the
+    AlignIO module:
+
+    >>> from Bio import AlignIO
+    >>> align = AlignIO.read("Clustalw/opuntia.aln", "clustal")
+    >>> print(align)
+    Alignment with 7 rows and 156 columns
+    TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273285|gb|AF191659.1|AF191
+    TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273284|gb|AF191658.1|AF191
+    TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273287|gb|AF191661.1|AF191
+    TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273286|gb|AF191660.1|AF191
+    TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273290|gb|AF191664.1|AF191
+    TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273289|gb|AF191663.1|AF191
+    TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273291|gb|AF191665.1|AF191
+
+    In some respects you can treat these objects as lists of SeqRecord objects,
+    each representing a row of the alignment. Iterating over an alignment gives
+    the SeqRecord object for each row:
+
+    >>> len(align)
+    7
+    >>> for record in align:
+    ...     print("%s %i" % (record.id, len(record)))
+    ...
+    gi|6273285|gb|AF191659.1|AF191 156
+    gi|6273284|gb|AF191658.1|AF191 156
+    gi|6273287|gb|AF191661.1|AF191 156
+    gi|6273286|gb|AF191660.1|AF191 156
+    gi|6273290|gb|AF191664.1|AF191 156
+    gi|6273289|gb|AF191663.1|AF191 156
+    gi|6273291|gb|AF191665.1|AF191 156
+
+    You can also access individual rows as SeqRecord objects via their index:
+
+    >>> print(align[0].id)
+    gi|6273285|gb|AF191659.1|AF191
+    >>> print(align[-1].id)
+    gi|6273291|gb|AF191665.1|AF191
+
+    And extract columns as strings:
+
+    >>> print(align[:, 1])
+    AAAAAAA
+
+    Or, take just the first ten columns as a sub-alignment:
+
+    >>> print(align[:, :10])
+    Alignment with 7 rows and 10 columns
+    TATACATTAA gi|6273285|gb|AF191659.1|AF191
+    TATACATTAA gi|6273284|gb|AF191658.1|AF191
+    TATACATTAA gi|6273287|gb|AF191661.1|AF191
+    TATACATAAA gi|6273286|gb|AF191660.1|AF191
+    TATACATTAA gi|6273290|gb|AF191664.1|AF191
+    TATACATTAA gi|6273289|gb|AF191663.1|AF191
+    TATACATTAA gi|6273291|gb|AF191665.1|AF191
+
+    Combining this alignment slicing with alignment addition allows you to
+    remove a section of the alignment. For example, taking just the first
+    and last ten columns:
+
+    >>> print(align[:, :10] + align[:, -10:])
+    Alignment with 7 rows and 20 columns
+    TATACATTAAGTGTACCAGA gi|6273285|gb|AF191659.1|AF191
+    TATACATTAAGTGTACCAGA gi|6273284|gb|AF191658.1|AF191
+    TATACATTAAGTGTACCAGA gi|6273287|gb|AF191661.1|AF191
+    TATACATAAAGTGTACCAGA gi|6273286|gb|AF191660.1|AF191
+    TATACATTAAGTGTACCAGA gi|6273290|gb|AF191664.1|AF191
+    TATACATTAAGTATACCAGA gi|6273289|gb|AF191663.1|AF191
+    TATACATTAAGTGTACCAGA gi|6273291|gb|AF191665.1|AF191
+
+    Note - This object replaced the older Alignment object defined in module
+    Bio.Align.Generic but is not fully backwards compatible with it.
+
+    Note - This object does NOT attempt to model the kind of alignments used
+    in next generation sequencing with multiple sequencing reads which are
+    much shorter than the alignment, and where there is usually a consensus or
+    reference sequence with special status.
+    """
+
+    def __init__(
+        self, records, alphabet=None, annotations=None, column_annotations=None
+    ):
+        """Initialize a new MultipleSeqAlignment object.
+
+        Arguments:
+         - records - A list (or iterator) of SeqRecord objects, whose
+                     sequences are all the same length.  This may be an be an
+                     empty list.
+         - alphabet - For backward compatibility only; its value should always
+                      be None.
+         - annotations - Information about the whole alignment (dictionary).
+         - column_annotations - Per column annotation (restricted dictionary).
+                      This holds Python sequences (lists, strings, tuples)
+                      whose length matches the number of columns. A typical
+                      use would be a secondary structure consensus string.
+
+        You would normally load a MSA from a file using Bio.AlignIO, but you
+        can do this from a list of SeqRecord objects too:
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> from Bio.Align import MultipleSeqAlignment
+        >>> a = SeqRecord(Seq("AAAACGT"), id="Alpha")
+        >>> b = SeqRecord(Seq("AAA-CGT"), id="Beta")
+        >>> c = SeqRecord(Seq("AAAAGGT"), id="Gamma")
+        >>> align = MultipleSeqAlignment([a, b, c],
+        ...                              annotations={"tool": "demo"},
+        ...                              column_annotations={"stats": "CCCXCCC"})
+        >>> print(align)
+        Alignment with 3 rows and 7 columns
+        AAAACGT Alpha
+        AAA-CGT Beta
+        AAAAGGT Gamma
+        >>> align.annotations
+        {'tool': 'demo'}
+        >>> align.column_annotations
+        {'stats': 'CCCXCCC'}
+        """
+        if alphabet is not None:
+            raise ValueError("The alphabet argument is no longer supported")
+
+        self._records = []
+        if records:
+            self.extend(records)
+
+        # Annotations about the whole alignment
+        if annotations is None:
+            annotations = {}
+        elif not isinstance(annotations, dict):
+            raise TypeError("annotations argument should be a dict")
+        self.annotations = annotations
+
+        # Annotations about each column of the alignment
+        if column_annotations is None:
+            column_annotations = {}
+        # Handle this via the property set function which will validate it
+        self.column_annotations = column_annotations
+
+    def _set_per_column_annotations(self, value):
+        if not isinstance(value, dict):
+            raise TypeError(
+                "The per-column-annotations should be a (restricted) dictionary."
+            )
+        # Turn this into a restricted-dictionary (and check the entries)
+        if len(self):
+            # Use the standard method to get the length
+            expected_length = self.get_alignment_length()
+            self._per_col_annotations = _RestrictedDict(length=expected_length)
+            self._per_col_annotations.update(value)
+        else:
+            # Bit of a problem case... number of columns is undefined
+            self._per_col_annotations = None
+            if value:
+                raise ValueError(
+                    "Can't set per-column-annotations without an alignment"
+                )
+
+    def _get_per_column_annotations(self):
+        if self._per_col_annotations is None:
+            # This happens if empty at initialisation
+            if len(self):
+                # Use the standard method to get the length
+                expected_length = self.get_alignment_length()
+            else:
+                # Should this raise an exception? Compare SeqRecord behaviour...
+                expected_length = 0
+            self._per_col_annotations = _RestrictedDict(length=expected_length)
+        return self._per_col_annotations
+
+    column_annotations = property(
+        fget=_get_per_column_annotations,
+        fset=_set_per_column_annotations,
+        doc="""Dictionary of per-letter-annotation for the sequence.""",
+    )
+
+    def _str_line(self, record, length=50):
+        """Return a truncated string representation of a SeqRecord (PRIVATE).
+
+        This is a PRIVATE function used by the __str__ method.
+        """
+        if record.seq.__class__.__name__ == "CodonSeq":
+            if len(record.seq) <= length:
+                return "%s %s" % (record.seq, record.id)
+            else:
+                return "%s...%s %s" % (
+                    record.seq[: length - 3],
+                    record.seq[-3:],
+                    record.id,
+                )
+        else:
+            if len(record.seq) <= length:
+                return "%s %s" % (record.seq, record.id)
+            else:
+                return "%s...%s %s" % (
+                    record.seq[: length - 6],
+                    record.seq[-3:],
+                    record.id,
+                )
+
+    def __str__(self):
+        """Return a multi-line string summary of the alignment.
+
+        This output is intended to be readable, but large alignments are
+        shown truncated.  A maximum of 20 rows (sequences) and 50 columns
+        are shown, with the record identifiers.  This should fit nicely on a
+        single screen. e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> from Bio.Align import MultipleSeqAlignment
+        >>> a = SeqRecord(Seq("ACTGCTAGCTAG"), id="Alpha")
+        >>> b = SeqRecord(Seq("ACT-CTAGCTAG"), id="Beta")
+        >>> c = SeqRecord(Seq("ACTGCTAGATAG"), id="Gamma")
+        >>> align = MultipleSeqAlignment([a, b, c])
+        >>> print(align)
+        Alignment with 3 rows and 12 columns
+        ACTGCTAGCTAG Alpha
+        ACT-CTAGCTAG Beta
+        ACTGCTAGATAG Gamma
+
+        See also the alignment's format method.
+        """
+        rows = len(self._records)
+        lines = [
+            "Alignment with %i rows and %i columns"
+            % (rows, self.get_alignment_length())
+        ]
+        if rows <= 20:
+            lines.extend(self._str_line(rec) for rec in self._records)
+        else:
+            lines.extend(self._str_line(rec) for rec in self._records[:18])
+            lines.append("...")
+            lines.append(self._str_line(self._records[-1]))
+        return "\n".join(lines)
+
+    def __repr__(self):
+        """Return a representation of the object for debugging.
+
+        The representation cannot be used with eval() to recreate the object,
+        which is usually possible with simple python objects.  For example:
+
+        <Bio.Align.MultipleSeqAlignment instance (2 records of length 14)
+        at a3c184c>
+
+        The hex string is the memory address of the object, see help(id).
+        This provides a simple way to visually distinguish alignments of
+        the same size.
+        """
+        # A doctest for __repr__ would be nice, but __class__ comes out differently
+        # if run via the __main__ trick.
+        return "<%s instance (%i records of length %i) at %x>" % (
+            self.__class__,
+            len(self._records),
+            self.get_alignment_length(),
+            id(self),
+        )
+        # This version is useful for doing eval(repr(alignment)),
+        # but it can be VERY long:
+        # return "%s(%r)" \
+        #       % (self.__class__, self._records)
+
+    def __format__(self, format_spec):
+        """Return the alignment as a string in the specified file format.
+
+        The format should be a lower case string supported as an output
+        format by Bio.AlignIO (such as "fasta", "clustal", "phylip",
+        "stockholm", etc), which is used to turn the alignment into a
+        string.
+
+        e.g.
+
+        >>> from Bio.Align import MultipleSeqAlignment
+        >>> a = SeqRecord(Seq("ACTGCTAGCTAG"), id="Alpha", description="")
+        >>> b = SeqRecord(Seq("ACT-CTAGCTAG"), id="Beta", description="")
+        >>> c = SeqRecord(Seq("ACTGCTAGATAG"), id="Gamma", description="")
+        >>> align = MultipleSeqAlignment([a, b, c])
+        >>> print(format(align, "fasta"))
+        >Alpha
+        ACTGCTAGCTAG
+        >Beta
+        ACT-CTAGCTAG
+        >Gamma
+        ACTGCTAGATAG
+        <BLANKLINE>
+        >>> print(format(align, "phylip"))
+         3 12
+        Alpha      ACTGCTAGCT AG
+        Beta       ACT-CTAGCT AG
+        Gamma      ACTGCTAGAT AG
+        <BLANKLINE>
+        """
+        if format_spec:
+            from io import StringIO
+            from Bio import AlignIO
+
+            handle = StringIO()
+            AlignIO.write([self], handle, format_spec)
+            return handle.getvalue()
+        else:
+            # Follow python convention and default to using __str__
+            return str(self)
+
+    def __iter__(self):
+        """Iterate over alignment rows as SeqRecord objects.
+
+        e.g.
+
+        >>> from Bio.Align import MultipleSeqAlignment
+        >>> a = SeqRecord(Seq("ACTGCTAGCTAG"), id="Alpha")
+        >>> b = SeqRecord(Seq("ACT-CTAGCTAG"), id="Beta")
+        >>> c = SeqRecord(Seq("ACTGCTAGATAG"), id="Gamma")
+        >>> align = MultipleSeqAlignment([a, b, c])
+        >>> for record in align:
+        ...    print(record.id)
+        ...    print(record.seq)
+        ...
+        Alpha
+        ACTGCTAGCTAG
+        Beta
+        ACT-CTAGCTAG
+        Gamma
+        ACTGCTAGATAG
+        """
+        return iter(self._records)
+
+    def __len__(self):
+        """Return the number of sequences in the alignment.
+
+        Use len(alignment) to get the number of sequences (i.e. the number of
+        rows), and alignment.get_alignment_length() to get the length of the
+        longest sequence (i.e. the number of columns).
+
+        This is easy to remember if you think of the alignment as being like a
+        list of SeqRecord objects.
+        """
+        return len(self._records)
+
+    def get_alignment_length(self):
+        """Return the maximum length of the alignment.
+
+        All objects in the alignment should (hopefully) have the same
+        length. This function will go through and find this length
+        by finding the maximum length of sequences in the alignment.
+
+        >>> from Bio.Align import MultipleSeqAlignment
+        >>> a = SeqRecord(Seq("ACTGCTAGCTAG"), id="Alpha")
+        >>> b = SeqRecord(Seq("ACT-CTAGCTAG"), id="Beta")
+        >>> c = SeqRecord(Seq("ACTGCTAGATAG"), id="Gamma")
+        >>> align = MultipleSeqAlignment([a, b, c])
+        >>> align.get_alignment_length()
+        12
+
+        If you want to know the number of sequences in the alignment,
+        use len(align) instead:
+
+        >>> len(align)
+        3
+
+        """
+        max_length = 0
+
+        for record in self._records:
+            if len(record.seq) > max_length:
+                max_length = len(record.seq)
+
+        return max_length
+
+    def extend(self, records):
+        """Add more SeqRecord objects to the alignment as rows.
+
+        They must all have the same length as the original alignment. For
+        example,
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> from Bio.Align import MultipleSeqAlignment
+        >>> a = SeqRecord(Seq("AAAACGT"), id="Alpha")
+        >>> b = SeqRecord(Seq("AAA-CGT"), id="Beta")
+        >>> c = SeqRecord(Seq("AAAAGGT"), id="Gamma")
+        >>> d = SeqRecord(Seq("AAAACGT"), id="Delta")
+        >>> e = SeqRecord(Seq("AAA-GGT"), id="Epsilon")
+
+        First we create a small alignment (three rows):
+
+        >>> align = MultipleSeqAlignment([a, b, c])
+        >>> print(align)
+        Alignment with 3 rows and 7 columns
+        AAAACGT Alpha
+        AAA-CGT Beta
+        AAAAGGT Gamma
+
+        Now we can extend this alignment with another two rows:
+
+        >>> align.extend([d, e])
+        >>> print(align)
+        Alignment with 5 rows and 7 columns
+        AAAACGT Alpha
+        AAA-CGT Beta
+        AAAAGGT Gamma
+        AAAACGT Delta
+        AAA-GGT Epsilon
+
+        Because the alignment object allows iteration over the rows as
+        SeqRecords, you can use the extend method with a second alignment
+        (provided its sequences have the same length as the original alignment).
+        """
+        if len(self):
+            # Use the standard method to get the length
+            expected_length = self.get_alignment_length()
+        else:
+            # Take the first record's length
+            records = iter(records)  # records arg could be list or iterator
+            try:
+                rec = next(records)
+            except StopIteration:
+                # Special case, no records
+                return
+            expected_length = len(rec)
+            self._append(rec, expected_length)
+            # Can now setup the per-column-annotations as well, set to None
+            # while missing the length:
+            self.column_annotations = {}
+            # Now continue to the rest of the records as usual
+
+        for rec in records:
+            self._append(rec, expected_length)
+
+    def append(self, record):
+        """Add one more SeqRecord object to the alignment as a new row.
+
+        This must have the same length as the original alignment (unless this is
+        the first record).
+
+        >>> from Bio import AlignIO
+        >>> align = AlignIO.read("Clustalw/opuntia.aln", "clustal")
+        >>> print(align)
+        Alignment with 7 rows and 156 columns
+        TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273285|gb|AF191659.1|AF191
+        TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273284|gb|AF191658.1|AF191
+        TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273287|gb|AF191661.1|AF191
+        TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273286|gb|AF191660.1|AF191
+        TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273290|gb|AF191664.1|AF191
+        TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273289|gb|AF191663.1|AF191
+        TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273291|gb|AF191665.1|AF191
+        >>> len(align)
+        7
+
+        We'll now construct a dummy record to append as an example:
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> dummy = SeqRecord(Seq("N"*156), id="dummy")
+
+        Now append this to the alignment,
+
+        >>> align.append(dummy)
+        >>> print(align)
+        Alignment with 8 rows and 156 columns
+        TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273285|gb|AF191659.1|AF191
+        TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273284|gb|AF191658.1|AF191
+        TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273287|gb|AF191661.1|AF191
+        TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273286|gb|AF191660.1|AF191
+        TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273290|gb|AF191664.1|AF191
+        TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273289|gb|AF191663.1|AF191
+        TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273291|gb|AF191665.1|AF191
+        NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...NNN dummy
+        >>> len(align)
+        8
+
+        """
+        if self._records:
+            self._append(record, self.get_alignment_length())
+        else:
+            self._append(record)
+
+    def _append(self, record, expected_length=None):
+        """Validate and append a record (PRIVATE)."""
+        if not isinstance(record, SeqRecord):
+            raise TypeError("New sequence is not a SeqRecord object")
+
+        # Currently the get_alignment_length() call is expensive, so we need
+        # to avoid calling it repeatedly for __init__ and extend, hence this
+        # private _append method
+        if expected_length is not None and len(record) != expected_length:
+            # TODO - Use the following more helpful error, but update unit tests
+            # raise ValueError("New sequence is not of length %i"
+            #                  % self.get_alignment_length())
+            raise ValueError("Sequences must all be the same length")
+
+        self._records.append(record)
+
+    def __add__(self, other):
+        """Combine two alignments with the same number of rows by adding them.
+
+        If you have two multiple sequence alignments (MSAs), there are two ways to think
+        about adding them - by row or by column. Using the extend method adds by row.
+        Using the addition operator adds by column. For example,
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> from Bio.Align import MultipleSeqAlignment
+        >>> a1 = SeqRecord(Seq("AAAAC"), id="Alpha")
+        >>> b1 = SeqRecord(Seq("AAA-C"), id="Beta")
+        >>> c1 = SeqRecord(Seq("AAAAG"), id="Gamma")
+        >>> a2 = SeqRecord(Seq("GT"), id="Alpha")
+        >>> b2 = SeqRecord(Seq("GT"), id="Beta")
+        >>> c2 = SeqRecord(Seq("GT"), id="Gamma")
+        >>> left = MultipleSeqAlignment([a1, b1, c1],
+        ...                             annotations={"tool": "demo", "name": "start"},
+        ...                             column_annotations={"stats": "CCCXC"})
+        >>> right = MultipleSeqAlignment([a2, b2, c2],
+        ...                             annotations={"tool": "demo", "name": "end"},
+        ...                             column_annotations={"stats": "CC"})
+
+        Now, let's look at these two alignments:
+
+        >>> print(left)
+        Alignment with 3 rows and 5 columns
+        AAAAC Alpha
+        AAA-C Beta
+        AAAAG Gamma
+        >>> print(right)
+        Alignment with 3 rows and 2 columns
+        GT Alpha
+        GT Beta
+        GT Gamma
+
+        And add them:
+
+        >>> combined = left + right
+        >>> print(combined)
+        Alignment with 3 rows and 7 columns
+        AAAACGT Alpha
+        AAA-CGT Beta
+        AAAAGGT Gamma
+
+        For this to work, both alignments must have the same number of records (here
+        they both have 3 rows):
+
+        >>> len(left)
+        3
+        >>> len(right)
+        3
+        >>> len(combined)
+        3
+
+        The individual rows are SeqRecord objects, and these can be added together. Refer
+        to the SeqRecord documentation for details of how the annotation is handled. This
+        example is a special case in that both original alignments shared the same names,
+        meaning when the rows are added they also get the same name.
+
+        Any common annotations are preserved, but differing annotation is lost. This is
+        the same behaviour used in the SeqRecord annotations and is designed to prevent
+        accidental propagation of inappropriate values:
+
+        >>> combined.annotations
+        {'tool': 'demo'}
+
+        Similarly any common per-column-annotations are combined:
+
+        >>> combined.column_annotations
+        {'stats': 'CCCXCCC'}
+
+        """
+        if not isinstance(other, MultipleSeqAlignment):
+            raise NotImplementedError
+        if len(self) != len(other):
+            raise ValueError(
+                "When adding two alignments they must have the same length"
+                " (i.e. same number or rows)"
+            )
+        merged = (left + right for left, right in zip(self, other))
+        # Take any common annotation:
+        annotations = {}
+        for k, v in self.annotations.items():
+            if k in other.annotations and other.annotations[k] == v:
+                annotations[k] = v
+        column_annotations = {}
+        for k, v in self.column_annotations.items():
+            if k in other.column_annotations:
+                column_annotations[k] = v + other.column_annotations[k]
+        return MultipleSeqAlignment(
+            merged, annotations=annotations, column_annotations=column_annotations
+        )
+
+    def __getitem__(self, index):
+        """Access part of the alignment.
+
+        Depending on the indices, you can get a SeqRecord object
+        (representing a single row), a Seq object (for a single columns),
+        a string (for a single characters) or another alignment
+        (representing some part or all of the alignment).
+
+        align[r,c] gives a single character as a string
+        align[r] gives a row as a SeqRecord
+        align[r,:] gives a row as a SeqRecord
+        align[:,c] gives a column as a Seq
+
+        align[:] and align[:,:] give a copy of the alignment
+
+        Anything else gives a sub alignment, e.g.
+        align[0:2] or align[0:2,:] uses only row 0 and 1
+        align[:,1:3] uses only columns 1 and 2
+        align[0:2,1:3] uses only rows 0 & 1 and only cols 1 & 2
+
+        We'll use the following example alignment here for illustration:
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> from Bio.Align import MultipleSeqAlignment
+        >>> a = SeqRecord(Seq("AAAACGT"), id="Alpha")
+        >>> b = SeqRecord(Seq("AAA-CGT"), id="Beta")
+        >>> c = SeqRecord(Seq("AAAAGGT"), id="Gamma")
+        >>> d = SeqRecord(Seq("AAAACGT"), id="Delta")
+        >>> e = SeqRecord(Seq("AAA-GGT"), id="Epsilon")
+        >>> align = MultipleSeqAlignment([a, b, c, d, e])
+
+        You can access a row of the alignment as a SeqRecord using an integer
+        index (think of the alignment as a list of SeqRecord objects here):
+
+        >>> first_record = align[0]
+        >>> print("%s %s" % (first_record.id, first_record.seq))
+        Alpha AAAACGT
+        >>> last_record = align[-1]
+        >>> print("%s %s" % (last_record.id, last_record.seq))
+        Epsilon AAA-GGT
+
+        You can also access use python's slice notation to create a sub-alignment
+        containing only some of the SeqRecord objects:
+
+        >>> sub_alignment = align[2:5]
+        >>> print(sub_alignment)
+        Alignment with 3 rows and 7 columns
+        AAAAGGT Gamma
+        AAAACGT Delta
+        AAA-GGT Epsilon
+
+        This includes support for a step, i.e. align[start:end:step], which
+        can be used to select every second sequence:
+
+        >>> sub_alignment = align[::2]
+        >>> print(sub_alignment)
+        Alignment with 3 rows and 7 columns
+        AAAACGT Alpha
+        AAAAGGT Gamma
+        AAA-GGT Epsilon
+
+        Or to get a copy of the alignment with the rows in reverse order:
+
+        >>> rev_alignment = align[::-1]
+        >>> print(rev_alignment)
+        Alignment with 5 rows and 7 columns
+        AAA-GGT Epsilon
+        AAAACGT Delta
+        AAAAGGT Gamma
+        AAA-CGT Beta
+        AAAACGT Alpha
+
+        You can also use two indices to specify both rows and columns. Using simple
+        integers gives you the entry as a single character string. e.g.
+
+        >>> align[3, 4]
+        'C'
+
+        This is equivalent to:
+
+        >>> align[3][4]
+        'C'
+
+        or:
+
+        >>> align[3].seq[4]
+        'C'
+
+        To get a single column (as a string) use this syntax:
+
+        >>> align[:, 4]
+        'CCGCG'
+
+        Or, to get part of a column,
+
+        >>> align[1:3, 4]
+        'CG'
+
+        However, in general you get a sub-alignment,
+
+        >>> print(align[1:5, 3:6])
+        Alignment with 4 rows and 3 columns
+        -CG Beta
+        AGG Gamma
+        ACG Delta
+        -GG Epsilon
+
+        This should all seem familiar to anyone who has used the NumPy
+        array or matrix objects.
+        """
+        if isinstance(index, int):
+            # e.g. result = align[x]
+            # Return a SeqRecord
+            return self._records[index]
+        elif isinstance(index, slice):
+            # e.g. sub_align = align[i:j:k]
+            new = MultipleSeqAlignment(self._records[index])
+            if self.column_annotations and len(new) == len(self):
+                # All rows kept (although could have been reversed)
+                # Preserve the column annotations too,
+                for k, v in self.column_annotations.items():
+                    new.column_annotations[k] = v
+            return new
+        elif len(index) != 2:
+            raise TypeError("Invalid index type.")
+
+        # Handle double indexing
+        row_index, col_index = index
+        if isinstance(row_index, int):
+            # e.g. row_or_part_row = align[6, 1:4], gives a SeqRecord
+            return self._records[row_index][col_index]
+        elif isinstance(col_index, int):
+            # e.g. col_or_part_col = align[1:5, 6], gives a string
+            return "".join(rec[col_index] for rec in self._records[row_index])
+        else:
+            # e.g. sub_align = align[1:4, 5:7], gives another alignment
+            new = MultipleSeqAlignment(
+                rec[col_index] for rec in self._records[row_index]
+            )
+            if self.column_annotations and len(new) == len(self):
+                # All rows kept (although could have been reversed)
+                # Preserve the column annotations too,
+                for k, v in self.column_annotations.items():
+                    new.column_annotations[k] = v[col_index]
+            return new
+
+    def sort(self, key=None, reverse=False):
+        """Sort the rows (SeqRecord objects) of the alignment in place.
+
+        This sorts the rows alphabetically using the SeqRecord object id by
+        default. The sorting can be controlled by supplying a key function
+        which must map each SeqRecord to a sort value.
+
+        This is useful if you want to add two alignments which use the same
+        record identifiers, but in a different order. For example,
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> from Bio.Align import MultipleSeqAlignment
+        >>> align1 = MultipleSeqAlignment([
+        ...              SeqRecord(Seq("ACGT"), id="Human"),
+        ...              SeqRecord(Seq("ACGG"), id="Mouse"),
+        ...              SeqRecord(Seq("ACGC"), id="Chicken"),
+        ...          ])
+        >>> align2 = MultipleSeqAlignment([
+        ...              SeqRecord(Seq("CGGT"), id="Mouse"),
+        ...              SeqRecord(Seq("CGTT"), id="Human"),
+        ...              SeqRecord(Seq("CGCT"), id="Chicken"),
+        ...          ])
+
+        If you simple try and add these without sorting, you get this:
+
+        >>> print(align1 + align2)
+        Alignment with 3 rows and 8 columns
+        ACGTCGGT <unknown id>
+        ACGGCGTT <unknown id>
+        ACGCCGCT Chicken
+
+        Consult the SeqRecord documentation which explains why you get a
+        default value when annotation like the identifier doesn't match up.
+        However, if we sort the alignments first, then add them we get the
+        desired result:
+
+        >>> align1.sort()
+        >>> align2.sort()
+        >>> print(align1 + align2)
+        Alignment with 3 rows and 8 columns
+        ACGCCGCT Chicken
+        ACGTCGTT Human
+        ACGGCGGT Mouse
+
+        As an example using a different sort order, you could sort on the
+        GC content of each sequence.
+
+        >>> from Bio.SeqUtils import GC
+        >>> print(align1)
+        Alignment with 3 rows and 4 columns
+        ACGC Chicken
+        ACGT Human
+        ACGG Mouse
+        >>> align1.sort(key = lambda record: GC(record.seq))
+        >>> print(align1)
+        Alignment with 3 rows and 4 columns
+        ACGT Human
+        ACGC Chicken
+        ACGG Mouse
+
+        There is also a reverse argument, so if you wanted to sort by ID
+        but backwards:
+
+        >>> align1.sort(reverse=True)
+        >>> print(align1)
+        Alignment with 3 rows and 4 columns
+        ACGG Mouse
+        ACGT Human
+        ACGC Chicken
+
+        """
+        if key is None:
+            self._records.sort(key=lambda r: r.id, reverse=reverse)
+        else:
+            self._records.sort(key=key, reverse=reverse)
+
+    @property
+    def substitutions(self):
+        """Return an Array with the number of substitutions of letters in the alignment.
+
+        As an example, consider a multiple sequence alignment of three DNA sequences:
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> from Bio.Align import MultipleSeqAlignment
+        >>> seq1 = SeqRecord(Seq("ACGT"), id="seq1")
+        >>> seq2 = SeqRecord(Seq("A--A"), id="seq2")
+        >>> seq3 = SeqRecord(Seq("ACGT"), id="seq3")
+        >>> seq4 = SeqRecord(Seq("TTTC"), id="seq4")
+        >>> alignment = MultipleSeqAlignment([seq1, seq2, seq3, seq4])
+        >>> print(alignment)
+        Alignment with 4 rows and 4 columns
+        ACGT seq1
+        A--A seq2
+        ACGT seq3
+        TTTC seq4
+
+        >>> m = alignment.substitutions
+        >>> print(m)
+            A   C   G   T
+        A 3.0 0.5 0.0 2.5
+        C 0.5 1.0 0.0 2.0
+        G 0.0 0.0 1.0 1.0
+        T 2.5 2.0 1.0 1.0
+        <BLANKLINE>
+
+        Note that the matrix is symmetric, with counts divided equally on both
+        sides of the diagonal. For example, the total number of substitutions
+        between A and T in the alignment is 3.5 + 3.5 = 7.
+
+        Any weights associated with the sequences are taken into account when
+        calculating the substitution matrix.  For example, given the following
+        multiple sequence alignment::
+
+            GTATC  0.5
+            AT--C  0.8
+            CTGTC  1.0
+
+        For the first column we have::
+
+            ('A', 'G') : 0.5 * 0.8 = 0.4
+            ('C', 'G') : 0.5 * 1.0 = 0.5
+            ('A', 'C') : 0.8 * 1.0 = 0.8
+
+        """
+        letters = set.union(*[set(record.seq) for record in self])
+        try:
+            letters.remove("-")
+        except KeyError:
+            pass
+        letters = "".join(sorted(letters))
+        m = substitution_matrices.Array(letters, dims=2)
+        for rec_num1, alignment1 in enumerate(self):
+            seq1 = alignment1.seq
+            weight1 = alignment1.annotations.get("weight", 1.0)
+            for rec_num2, alignment2 in enumerate(self):
+                if rec_num1 == rec_num2:
+                    break
+                seq2 = alignment2.seq
+                weight2 = alignment2.annotations.get("weight", 1.0)
+                for residue1, residue2 in zip(seq1, seq2):
+                    if residue1 == "-":
+                        continue
+                    if residue2 == "-":
+                        continue
+                    m[(residue1, residue2)] += weight1 * weight2
+
+        m += m.transpose()
+        m /= 2.0
+
+        return m
+
+
+class PairwiseAlignment:
+    """Represents a pairwise sequence alignment.
+
+    Internally, the pairwise alignment is stored as the path through
+    the traceback matrix, i.e. a tuple of pairs of indices corresponding
+    to the vertices of the path in the traceback matrix.
+    """
+
+    def __init__(self, target, query, path, score):
+        """Initialize a new PairwiseAlignment object.
+
+        Arguments:
+         - target  - The first sequence, as a plain string, without gaps.
+         - query   - The second sequence, as a plain string, without gaps.
+         - path    - The path through the traceback matrix, defining an
+                     alignment.
+         - score   - The alignment score.
+
+        You would normally obtain a PairwiseAlignment object by iterating
+        over a PairwiseAlignments object.
+        """
+        self.target = target
+        self.query = query
+        self.score = score
+        self.path = path
+
+    def __eq__(self, other):
+        return self.path == other.path
+
+    def __ne__(self, other):
+        return self.path != other.path
+
+    def __lt__(self, other):
+        return self.path < other.path
+
+    def __le__(self, other):
+        return self.path <= other.path
+
+    def __gt__(self, other):
+        return self.path > other.path
+
+    def __ge__(self, other):
+        return self.path >= other.path
+
+    def __getitem__(self, key):
+        """Return self[key].
+
+        Currently, this is implemented only for indices of the form
+
+        self[:, :]
+
+        which returns a copy of the PairwiseAlignment object, and
+
+        self[:, i:]
+        self[:, :j]
+        self[:, i:j]
+
+        which returns a new PairwiseAlignment object spanning the indicated
+        columns.
+
+        >>> from Bio.Align import PairwiseAligner
+        >>> aligner = PairwiseAligner()
+        >>> alignments = aligner.align("ACCGGTTT", "ACGGGTT")
+        >>> alignment = alignments[0]
+        >>> print(alignment)
+        ACCGG-TTT
+        ||-||-||-
+        AC-GGGTT-
+        <BLANKLINE>
+        >>> alignment[:, 1:]  # doctest:+ELLIPSIS
+        <Bio.Align.PairwiseAlignment object at ...>
+        >>> print(alignment[:, 1:])
+        ACCGG-TTT
+         |-||-||-
+        AC-GGGTT-
+        <BLANKLINE>
+        >>> print(alignment[:, 2:])
+        ACCGG-TTT
+          -||-||-
+        AC-GGGTT-
+        <BLANKLINE>
+        >>> print(alignment[:, 3:])
+        ACCGG-TTT
+           ||-||-
+         ACGGGTT-
+        <BLANKLINE>
+        >>> print(alignment[:, 3:-1])
+        ACCGG-TTT
+           ||-||
+         ACGGGTT
+        <BLANKLINE>
+        """
+        if isinstance(key, slice):
+            if key.indices(len(self)) == (0, 2, 1):
+                target = self.target
+                query = self.query
+                path = self.path
+                score = self.score
+                return PairwiseAlignment(target, query, path, score)
+            raise NotImplementedError
+        if isinstance(key, int):
+            raise NotImplementedError
+        if isinstance(key, tuple):
+            try:
+                row, col = key
+            except ValueError:
+                raise ValueError("only tuples of length 2 can be alignment indices")
+            if isinstance(row, int):
+                raise NotImplementedError
+            if isinstance(row, slice):
+                if row.indices(len(self)) != (0, 2, 1):
+                    raise NotImplementedError
+                if isinstance(col, int):
+                    raise NotImplementedError
+                if isinstance(col, slice):
+                    n, m = self.shape
+                    start_index, stop_index, step = col.indices(m)
+                    if step != 1:
+                        raise NotImplementedError
+                    path = []
+                    index = 0
+                    path_iterator = iter(self.path)
+                    starts = next(path_iterator)
+                    for ends in path_iterator:
+                        index += max(e - s for s, e in zip(starts, ends))
+                        if start_index < index:
+                            offset = index - start_index
+                            point = tuple(
+                                e - offset if s < e else s for s, e in zip(starts, ends)
+                            )
+                            path.append(point)
+                            break
+                        starts = ends
+                    while True:
+                        if stop_index <= index:
+                            offset = index - stop_index
+                            point = tuple(
+                                e - offset if s < e else s for s, e in zip(starts, ends)
+                            )
+                            path.append(point)
+                            break
+                        path.append(ends)
+                        starts = ends
+                        ends = next(path_iterator)
+                        index += max(e - s for s, e in zip(starts, ends))
+                    path = tuple(path)
+                    target = self.target
+                    query = self.query
+                    if path == self.path:
+                        score = self.score
+                    else:
+                        score = None
+                    return PairwiseAlignment(target, query, path, score)
+                raise TypeError("second index must be an integer or slice")
+            raise TypeError("first index must be an integer or slice")
+        raise TypeError("alignment indices must be integers, slices, or tuples")
+
+    def _convert_sequence_string(self, sequence):
+        if isinstance(sequence, (bytes, bytearray)):
+            return sequence.decode()
+        if isinstance(sequence, str):
+            return sequence
+        if isinstance(sequence, Seq):
+            return str(sequence)
+        try:  # check if target is a SeqRecord
+            sequence = sequence.seq
+        except AttributeError:
+            pass
+        else:
+            return str(sequence)
+        try:
+            view = memoryview(sequence)
+        except TypeError:
+            pass
+        else:
+            if view.format == "c":
+                return str(sequence)
+        return None
+
+    def __format__(self, format_spec):
+        return self.format(format_spec)
+
+    def format(self, fmt="", **kwargs):
+        """Return the alignment as a string in the specified file format.
+
+        Arguments:
+         - fmt       - File format. Acceptable values are
+                       ""   : create a human-readable representation of the
+                              alignment (default);
+                       "BED": create a line representing the alignment in
+                              the Browser Extensible Data (BED) file format;
+                       "PSL": create a line representing the alignment in
+                              the Pattern Space Layout (PSL) file format as
+                              generated by BLAT;
+                       "SAM": create a line representing the alignment in
+                              the Sequence Alignment/Map (SAM) format.
+         - mask      - PSL format only. Specify if repeat regions in the target
+                       sequence are masked and should be reported in the
+                       `repMatches` field of the PSL file instead of in the
+                       `matches` field. Acceptable values are
+                       None   : no masking (default);
+                       "lower": masking by lower-case characters;
+                       "upper": masking by upper-case characters.
+         - wildcard  - PSL format only. Report alignments to the wildcard
+                       character in the target or query sequence in the
+                       `nCount` field of the PSL file instead of in the
+                       `matches`, `misMatches`, or `repMatches` fields.
+                       Default value is 'N'.
+        """
+        if fmt == "":
+            return self._format_pretty(**kwargs)
+        elif fmt == "psl":
+            return self._format_psl(**kwargs)
+        elif fmt == "bed":
+            return self._format_bed(**kwargs)
+        elif fmt == "sam":
+            return self._format_sam(**kwargs)
+        else:
+            raise ValueError("Unknown format %s" % fmt)
+
+    def _format_pretty(self):
+        seq1 = self._convert_sequence_string(self.target)
+        if seq1 is None:
+            return self._format_generalized()
+        seq2 = self._convert_sequence_string(self.query)
+        if seq2 is None:
+            return self._format_generalized()
+        n1 = len(seq1)
+        n2 = len(seq2)
+        aligned_seq1 = ""
+        aligned_seq2 = ""
+        pattern = ""
+        path = self.path
+        if path[0][1] > path[-1][1]:  # mapped to reverse strand
+            path = tuple((c1, n2 - c2) for (c1, c2) in path)
+            seq2 = reverse_complement(seq2)
+        end1, end2 = path[0]
+        if end1 > 0 or end2 > 0:
+            end = max(end1, end2)
+            aligned_seq1 += " " * (end - end1) + seq1[:end1]
+            aligned_seq2 += " " * (end - end2) + seq2[:end2]
+            pattern += " " * end
+        start1 = end1
+        start2 = end2
+        for end1, end2 in path[1:]:
+            if end1 == start1:
+                gap = end2 - start2
+                aligned_seq1 += "-" * gap
+                aligned_seq2 += seq2[start2:end2]
+                pattern += "-" * gap
+            elif end2 == start2:
+                gap = end1 - start1
+                aligned_seq1 += seq1[start1:end1]
+                aligned_seq2 += "-" * gap
+                pattern += "-" * gap
+            else:
+                s1 = seq1[start1:end1]
+                s2 = seq2[start2:end2]
+                aligned_seq1 += s1
+                aligned_seq2 += s2
+                for c1, c2 in zip(s1, s2):
+                    if c1 == c2:
+                        pattern += "|"
+                    else:
+                        pattern += "."
+            start1 = end1
+            start2 = end2
+        aligned_seq1 += seq1[end1:]
+        aligned_seq2 += seq2[end2:]
+        return "%s\n%s\n%s\n" % (aligned_seq1, pattern, aligned_seq2)
+
+    def _format_generalized(self):
+        seq1 = self.target
+        seq2 = self.query
+        aligned_seq1 = []
+        aligned_seq2 = []
+        pattern = []
+        path = self.path
+        end1, end2 = path[0]
+        if end1 > 0 or end2 > 0:
+            if end1 <= end2:
+                for c2 in seq2[: end2 - end1]:
+                    s2 = str(c2)
+                    s1 = " " * len(s2)
+                    aligned_seq1.append(s1)
+                    aligned_seq2.append(s2)
+                    pattern.append(s1)
+            else:  # end1 > end2
+                for c1 in seq1[: end1 - end2]:
+                    s1 = str(c1)
+                    s2 = " " * len(s1)
+                    aligned_seq1.append(s1)
+                    aligned_seq2.append(s2)
+                    pattern.append(s2)
+        start1 = end1
+        start2 = end2
+        for end1, end2 in path[1:]:
+            if end1 == start1:
+                for c2 in seq2[start2:end2]:
+                    s2 = str(c2)
+                    s1 = "-" * len(s2)
+                    aligned_seq1.append(s1)
+                    aligned_seq2.append(s2)
+                    pattern.append(s1)
+                start2 = end2
+            elif end2 == start2:
+                for c1 in seq1[start1:end1]:
+                    s1 = str(c1)
+                    s2 = "-" * len(s1)
+                    aligned_seq1.append(s1)
+                    aligned_seq2.append(s2)
+                    pattern.append(s2)
+                start1 = end1
+            else:
+                for c1, c2 in zip(seq1[start1:end1], seq2[start2:end2]):
+                    s1 = str(c1)
+                    s2 = str(c2)
+                    m1 = len(s1)
+                    m2 = len(s2)
+                    if c1 == c2:
+                        p = "|"
+                    else:
+                        p = "."
+                    if m1 < m2:
+                        space = (m2 - m1) * " "
+                        s1 += space
+                        pattern.append(p * m1 + space)
+                    elif m1 > m2:
+                        space = (m1 - m2) * " "
+                        s2 += space
+                        pattern.append(p * m2 + space)
+                    else:
+                        pattern.append(p * m1)
+                    aligned_seq1.append(s1)
+                    aligned_seq2.append(s2)
+                start1 = end1
+                start2 = end2
+        aligned_seq1 = " ".join(aligned_seq1)
+        aligned_seq2 = " ".join(aligned_seq2)
+        pattern = " ".join(pattern)
+        return "%s\n%s\n%s\n" % (aligned_seq1, pattern, aligned_seq2)
+
+    def _format_bed(self):
+        query = self.query
+        target = self.target
+        # variable names follow those in the BED file format specification
+        try:
+            chrom = target.id
+        except AttributeError:
+            chrom = "target"
+        try:
+            name = query.id
+        except AttributeError:
+            name = "query"
+        path = self.path
+        if path[0][1] < path[-1][1]:  # mapped to forward strand
+            strand = "+"
+        else:  # mapped to reverse strand
+            strand = "-"
+            n2 = len(query)
+            path = tuple((c1, n2 - c2) for (c1, c2) in path)
+        score = self.score
+        blockSizes = []
+        tStarts = []
+        tStart, qStart = path[0]
+        for tEnd, qEnd in path[1:]:
+            tCount = tEnd - tStart
+            qCount = qEnd - qStart
+            if tCount == 0:
+                qStart = qEnd
+            elif qCount == 0:
+                tStart = tEnd
+            else:
+                assert tCount == qCount
+                tStarts.append(tStart)
+                blockSizes.append(tCount)
+                tStart = tEnd
+                qStart = qEnd
+        chromStart = tStarts[0]
+        chromEnd = tStarts[-1] + blockSizes[-1]
+        blockStarts = [tStart - chromStart for tStart in tStarts]
+        blockCount = len(blockSizes)
+        blockSizes = ",".join(map(str, blockSizes)) + ","
+        blockStarts = ",".join(map(str, blockStarts)) + ","
+        thickStart = chromStart
+        thickEnd = chromEnd
+        itemRgb = "0"
+        words = [
+            chrom,
+            str(chromStart),
+            str(chromEnd),
+            name,
+            str(score),
+            strand,
+            str(thickStart),
+            str(thickEnd),
+            itemRgb,
+            str(blockCount),
+            blockSizes,
+            blockStarts,
+        ]
+        line = "\t".join(words) + "\n"
+        return line
+
+    def _format_psl(self, mask=False, wildcard="N"):
+        path = self.path
+        if not path:  # alignment consists of gaps only
+            return ""
+        query = self.query
+        target = self.target
+        try:
+            qName = query.id
+        except AttributeError:
+            qName = "query"
+        try:
+            query = query.seq
+        except AttributeError:
+            pass
+        try:
+            tName = target.id
+        except AttributeError:
+            tName = "target"
+        try:
+            target = target.seq
+        except AttributeError:
+            pass
+        n1 = len(target)
+        n2 = len(query)
+        try:
+            seq1 = bytes(target)
+        except TypeError:  # string
+            seq1 = bytes(target, "ASCII")
+        except UndefinedSequenceError:  # sequence contents is unknown
+            seq1 = None
+        if path[0][1] < path[-1][1]:  # mapped to forward strand
+            strand = "+"
+            seq2 = query
+        else:  # mapped to reverse strand
+            strand = "-"
+            seq2 = reverse_complement(query)
+            path = tuple((c1, n2 - c2) for (c1, c2) in path)
+        try:
+            seq2 = bytes(seq2)
+        except TypeError:  # string
+            seq2 = bytes(seq2, "ASCII")
+        except UndefinedSequenceError:  # sequence contents is unknown
+            seq2 = None
+        if wildcard is not None:
+            if mask == "upper":
+                wildcard = ord(wildcard.lower())
+            else:
+                wildcard = ord(wildcard.upper())
+        # variable names follow those in the PSL file format specification
+        matches = 0
+        misMatches = 0
+        repMatches = 0
+        nCount = 0
+        qNumInsert = 0
+        qBaseInsert = 0
+        tNumInsert = 0
+        tBaseInsert = 0
+        qSize = n2
+        tSize = n1
+        blockSizes = []
+        qStarts = []
+        tStarts = []
+        tStart, qStart = path[0]
+        for tEnd, qEnd in path[1:]:
+            tCount = tEnd - tStart
+            qCount = qEnd - qStart
+            if tCount == 0:
+                if qStart > 0 and qEnd < qSize:
+                    qNumInsert += 1
+                    qBaseInsert += qCount
+                qStart = qEnd
+            elif qCount == 0:
+                if tStart > 0 and tEnd < tSize:
+                    tNumInsert += 1
+                    tBaseInsert += tCount
+                tStart = tEnd
+            else:
+                assert tCount == qCount
+                tStarts.append(tStart)
+                qStarts.append(qStart)
+                blockSizes.append(tCount)
+                if seq1 is None or seq2 is None:
+                    # contents of at least one sequence is unknown;
+                    # count all alignments as matches:
+                    matches += tCount
+                else:
+                    s1 = seq1[tStart:tEnd]
+                    s2 = seq2[qStart:qEnd]
+                    if mask == "lower":
+                        for u1, u2, c1 in zip(s1.upper(), s2.upper(), s1):
+                            if u1 == wildcard or u2 == wildcard:
+                                nCount += 1
+                            elif u1 == u2:
+                                if u1 == c1:
+                                    matches += 1
+                                else:
+                                    repMatches += 1
+                            else:
+                                misMatches += 1
+                    elif mask == "upper":
+                        for u1, u2, c1 in zip(s1.lower(), s2.lower(), s1):
+                            if u1 == wildcard or u2 == wildcard:
+                                nCount += 1
+                            elif u1 == u2:
+                                if u1 == c1:
+                                    matches += 1
+                                else:
+                                    repMatches += 1
+                            else:
+                                misMatches += 1
+                    else:
+                        for u1, u2 in zip(s1.upper(), s2.upper()):
+                            if u1 == wildcard or u2 == wildcard:
+                                nCount += 1
+                            elif u1 == u2:
+                                matches += 1
+                            else:
+                                misMatches += 1
+                tStart = tEnd
+                qStart = qEnd
+        tStart = tStarts[0]  # start of alignment in target
+        qStart = qStarts[0]  # start of alignment in query
+        tEnd = tStarts[-1] + blockSizes[-1]  # end of alignment in target
+        qEnd = qStarts[-1] + blockSizes[-1]  # end of alignment in query
+        if strand == "-":
+            qStart, qEnd = qSize - qEnd, qSize - qStart
+        blockCount = len(blockSizes)
+        blockSizes = ",".join(map(str, blockSizes)) + ","
+        qStarts = ",".join(map(str, qStarts)) + ","
+        tStarts = ",".join(map(str, tStarts)) + ","
+        words = [
+            str(matches),
+            str(misMatches),
+            str(repMatches),
+            str(nCount),
+            str(qNumInsert),
+            str(qBaseInsert),
+            str(tNumInsert),
+            str(tBaseInsert),
+            strand,
+            qName,
+            str(qSize),
+            str(qStart),
+            str(qEnd),
+            tName,
+            str(tSize),
+            str(tStart),
+            str(tEnd),
+            str(blockCount),
+            blockSizes,
+            qStarts,
+            tStarts,
+        ]
+        line = "\t".join(words) + "\n"
+        return line
+
+    def _format_sam(self):
+        query = self.query
+        target = self.target
+        try:
+            qName = query.id
+        except AttributeError:
+            qName = "query"
+        else:
+            query = query.seq
+        try:
+            rName = target.id
+        except AttributeError:
+            rName = "target"
+        else:
+            target = target.seq
+        n1 = len(target)
+        n2 = len(query)
+        pos = None
+        qSize = n2
+        tSize = n1
+        cigar = []
+        path = self.path
+        if path[0][1] < path[-1][1]:  # mapped to forward strand
+            flag = 0
+            seq = query
+        else:  # mapped to reverse strand
+            flag = 16
+            seq = reverse_complement(query)
+            path = tuple((c1, n2 - c2) for (c1, c2) in path)
+        try:
+            seq = bytes(seq)
+        except TypeError:  # string
+            pass
+        else:
+            seq = str(seq, "ASCII")
+        tStart, qStart = path[0]
+        for tEnd, qEnd in path[1:]:
+            tCount = tEnd - tStart
+            qCount = qEnd - qStart
+            if tCount == 0:
+                length = qCount
+                if pos is None or tEnd == tSize:
+                    operation = "S"
+                else:
+                    operation = "I"
+                qStart = qEnd
+            elif qCount == 0:
+                if tStart > 0 and tEnd < tSize:
+                    length = tCount
+                    operation = "D"
+                else:
+                    operation = None
+                tStart = tEnd
+            else:
+                assert tCount == qCount
+                if pos is None:
+                    pos = tStart
+                tStart = tEnd
+                qStart = qEnd
+                operation = "M"
+                length = tCount
+            if operation is not None:
+                cigar.append(str(length) + operation)
+        mapQ = 255  # not available
+        rNext = "*"
+        pNext = 0
+        tLen = 0
+        qual = "*"
+        cigar = "".join(cigar)
+        tag = "AS:i:%d" % int(round(self.score))
+        words = [
+            qName,
+            str(flag),
+            rName,
+            str(pos + 1),  # 1-based coordinates
+            str(mapQ),
+            cigar,
+            rNext,
+            str(pNext),
+            str(tLen),
+            seq,
+            qual,
+            tag,
+        ]
+        line = "\t".join(words) + "\n"
+        return line
+
+    def __str__(self):
+        return self.format()
+
+    def __len__(self):
+        """Return the number of sequences in the alignment, which is always 2."""
+        return 2
+
+    @property
+    def shape(self):
+        """Return the shape of the alignment as a tuple of two integer values.
+
+        The first integer value is the number of sequences in the alignment as
+        returned by len(alignment), which is always 2 for pairwise alignments.
+
+        The second integer value is the number of columns in the alignment when
+        it is printed, and is equal to the sum of the number of matches, number
+        of mismatches, and the total length of gaps in the target and query.
+        Sequence sections beyond the aligned segment are not included in the
+        number of columns.
+
+        For example,
+
+        >>> from Bio import Align
+        >>> aligner = Align.PairwiseAligner()
+        >>> aligner.mode = "global"
+        >>> alignments = aligner.align("GACCTG", "CGATCG")
+        >>> alignment = alignments[0]
+        >>> print(alignment)
+        -GACCT-G
+        -||--|-|
+        CGA--TCG
+        <BLANKLINE>
+        >>> len(alignment)
+        2
+        >>> alignment.shape
+        (2, 8)
+        >>> aligner.mode = "local"
+        >>> alignments = aligner.align("GACCTG", "CGATCG")
+        >>> alignment = alignments[0]
+        >>> print(alignment)
+         GACCT-G
+         ||--|-|
+        CGA--TCG
+        <BLANKLINE>
+        >>> len(alignment)
+        2
+        >>> alignment.shape
+        (2, 7)
+        """
+        path = self.path
+        if path[0][1] > path[-1][1]:  # mapped to reverse strand
+            n2 = len(self.query)
+            path = tuple((c1, n2 - c2) for (c1, c2) in path)
+        start = path[0]
+        n = len(start)
+        m = 0
+        for end in path[1:]:
+            m += max(e - s for s, e in zip(start, end))
+            start = end
+        return (n, m)
+
+    @property
+    def aligned(self):
+        """Return the indices of subsequences aligned to each other.
+
+        This property returns the start and end indices of subsequences
+        in the target and query sequence that were aligned to each other.
+        If the alignment between target (t) and query (q) consists of N
+        chunks, you get two tuples of length N:
+
+            (((t_start1, t_end1), (t_start2, t_end2), ..., (t_startN, t_endN)),
+             ((q_start1, q_end1), (q_start2, q_end2), ..., (q_startN, q_endN)))
+
+        For example,
+
+        >>> from Bio import Align
+        >>> aligner = Align.PairwiseAligner()
+        >>> alignments = aligner.align("GAACT", "GAT")
+        >>> alignment = alignments[0]
+        >>> print(alignment)
+        GAACT
+        ||--|
+        GA--T
+        <BLANKLINE>
+        >>> alignment.aligned
+        (((0, 2), (4, 5)), ((0, 2), (2, 3)))
+        >>> alignment = alignments[1]
+        >>> print(alignment)
+        GAACT
+        |-|-|
+        G-A-T
+        <BLANKLINE>
+        >>> alignment.aligned
+        (((0, 1), (2, 3), (4, 5)), ((0, 1), (1, 2), (2, 3)))
+
+        Note that different alignments may have the same subsequences
+        aligned to each other. In particular, this may occur if alignments
+        differ from each other in terms of their gap placement only:
+
+        >>> aligner.mismatch_score = -10
+        >>> alignments = aligner.align("AAACAAA", "AAAGAAA")
+        >>> len(alignments)
+        2
+        >>> print(alignments[0])
+        AAAC-AAA
+        |||--|||
+        AAA-GAAA
+        <BLANKLINE>
+        >>> alignments[0].aligned
+        (((0, 3), (4, 7)), ((0, 3), (4, 7)))
+        >>> print(alignments[1])
+        AAA-CAAA
+        |||--|||
+        AAAG-AAA
+        <BLANKLINE>
+        >>> alignments[1].aligned
+        (((0, 3), (4, 7)), ((0, 3), (4, 7)))
+
+        The property can be used to identify alignments that are identical
+        to each other in terms of their aligned sequences.
+        """
+        segments1 = []
+        segments2 = []
+        path = self.path
+        if path[0][1] < path[-1][1]:  # mapped to forward strand
+            i1, i2 = path[0]
+            for node in path[1:]:
+                j1, j2 = node
+                if j1 > i1 and j2 > i2:
+                    segment1 = (i1, j1)
+                    segment2 = (i2, j2)
+                    segments1.append(segment1)
+                    segments2.append(segment2)
+                i1, i2 = j1, j2
+        else:  # mapped to reverse strand
+            n2 = len(self.query)
+            i1, i2 = path[0]
+            i2 = n2 - i2
+            for node in path[1:]:
+                j1, j2 = node
+                j2 = n2 - j2
+                if j1 > i1 and j2 > i2:
+                    segment1 = (i1, j1)
+                    segment2 = (n2 - i2, n2 - j2)
+                    segments1.append(segment1)
+                    segments2.append(segment2)
+                i1, i2 = j1, j2
+        return tuple(segments1), tuple(segments2)
+
+    def sort(self, key=None, reverse=False):
+        """Sort the sequences of the alignment in place.
+
+        By default, this sorts the sequences alphabetically using their id
+        attribute if available, or by their sequence contents otherwise.
+        For example,
+
+        >>> from Bio.Align import PairwiseAligner
+        >>> aligner = PairwiseAligner()
+        >>> aligner.gap_score = -1
+        >>> alignments = aligner.align("AATAA", "AAGAA")
+        >>> len(alignments)
+        1
+        >>> alignment = alignments[0]
+        >>> print(alignment)
+        AATAA
+        ||.||
+        AAGAA
+        <BLANKLINE>
+        >>> alignment.sort()
+        >>> print(alignment)
+        AAGAA
+        ||.||
+        AATAA
+        <BLANKLINE>
+
+        Alternatively, a key function can be supplied that maps each sequence
+        to a sort value.  For example, you could sort on the GC content of each
+        sequence.
+
+        >>> from Bio.SeqUtils import GC
+        >>> alignment.sort(key=GC)
+        >>> print(alignment)
+        AATAA
+        ||.||
+        AAGAA
+        <BLANKLINE>
+
+        You can reverse the sort order by passing `reverse=True`:
+
+        >>> alignment.sort(key=GC, reverse=True)
+        >>> print(alignment)
+        AAGAA
+        ||.||
+        AATAA
+        <BLANKLINE>
+
+        The sequences are now sorted by decreasing GC content value.
+        """
+        path = self.path
+        sequences = self.target, self.query
+        if key is None:
+            try:
+                values = [sequence.id for sequence in sequences]
+            except AttributeError:
+                values = sequences
+        else:
+            values = [key(sequence) for sequence in sequences]
+        indices = sorted(range(len(sequences)), key=values.__getitem__, reverse=reverse)
+        sequences = [sequences[index] for index in indices]
+        self.target, self.query = sequences
+        path = tuple(tuple(row[index] for index in indices) for row in path)
+        self.path = path
+
+    def map(self, alignment):
+        r"""Map the alignment to self.target and return the resulting alignment.
+
+        Here, self.query and alignment.target are the same sequence.
+
+        A typical example is where self is the pairwise alignment between a
+        chromosome and a transcript, the argument is the pairwise alignment
+        between the transcript and a sequence (e.g., as obtained by RNA-seq),
+        and we want to find the alignment of the sequence to the chromosome:
+
+        >>> from Bio import Align
+        >>> aligner = Align.PairwiseAligner()
+        >>> aligner.mode = 'local'
+        >>> aligner.open_gap_score = -1
+        >>> aligner.extend_gap_score = 0
+        >>> chromosome = "AAAAAAAACCCCCCCAAAAAAAAAAAGGGGGGAAAAAAAA"
+        >>> transcript = "CCCCCCCGGGGGG"
+        >>> alignments1 = aligner.align(chromosome, transcript)
+        >>> len(alignments1)
+        1
+        >>> alignment1 = alignments1[0]
+        >>> print(alignment1)
+        AAAAAAAACCCCCCCAAAAAAAAAAAGGGGGGAAAAAAAA
+                |||||||-----------||||||
+                CCCCCCC-----------GGGGGG
+        <BLANKLINE>
+        >>> sequence = "CCCCGGGG"
+        >>> alignments2 = aligner.align(transcript, sequence)
+        >>> len(alignments2)
+        1
+        >>> alignment2 = alignments2[0]
+        >>> print(alignment2)
+        CCCCCCCGGGGGG
+           ||||||||
+           CCCCGGGG
+        <BLANKLINE>
+        >>> alignment = alignment1.map(alignment2)
+        >>> print(alignment)
+        AAAAAAAACCCCCCCAAAAAAAAAAAGGGGGGAAAAAAAA
+                   ||||-----------||||
+                   CCCC-----------GGGG
+        <BLANKLINE>
+        >>> format(alignment, "psl")
+        '8\t0\t0\t0\t0\t0\t1\t11\t+\tquery\t8\t0\t8\ttarget\t40\t11\t30\t2\t4,4,\t0,4,\t11,26,\n'
+
+        Mapping the alignment does not depend on the sequence contents. If we
+        delete the sequence contents, the same alignment is found in PSL format
+        (though we obviously lose the ability to print the sequence alignment):
+
+        >>> alignment1.target = Seq(None, len(alignment1.target))
+        >>> alignment1.query = Seq(None, len(alignment1.query))
+        >>> alignment2.target = Seq(None, len(alignment2.target))
+        >>> alignment2.query = Seq(None, len(alignment2.query))
+        >>> alignment = alignment1.map(alignment2)
+        >>> format(alignment, "psl")
+        '8\t0\t0\t0\t0\t0\t1\t11\t+\tquery\t8\t0\t8\ttarget\t40\t11\t30\t2\t4,4,\t0,4,\t11,26,\n'
+        """
+        from numpy import array
+
+        alignment1, alignment2 = self, alignment
+        if len(alignment1.query) != len(alignment2.target):
+            raise ValueError(
+                "length of alignment1 query sequence (%d) != length of alignment2 target sequence (%d)"
+                % (len(alignment1.query), len(alignment2.target))
+            )
+        target = alignment1.target
+        query = alignment2.query
+        path1 = alignment1.path
+        path2 = alignment2.path
+        n1 = len(alignment1.query)
+        n2 = len(alignment2.query)
+        if path1[0][1] < path1[-1][1]:  # mapped to forward strand
+            strand1 = "+"
+        else:  # mapped to reverse strand
+            strand1 = "-"
+        if path2[0][1] < path2[-1][1]:  # mapped to forward strand
+            strand2 = "+"
+        else:  # mapped to reverse strand
+            strand2 = "-"
+        path1 = array(path1)
+        path2 = array(path2)
+        if strand1 == "+":
+            if strand2 == "-":  # mapped to reverse strand
+                path2[:, 1] = n2 - path2[:, 1]
+        else:  # mapped to reverse strand
+            path1[:, 1] = n1 - path1[:, 1]
+            path2[:, 0] = n1 - path2[::-1, 0]
+            if strand2 == "+":
+                path2[:, 1] = n2 - path2[::-1, 1]
+            else:  # mapped to reverse strand
+                path2[:, 1] = path2[::-1, 1]
+        path = []
+        tEnd, qEnd = sys.maxsize, sys.maxsize
+        path1 = iter(path1)
+        tStart1, qStart1 = sys.maxsize, sys.maxsize
+        for tEnd1, qEnd1 in path1:
+            if tStart1 < tEnd1 and qStart1 < qEnd1:
+                break
+            tStart1, qStart1 = tEnd1, qEnd1
+        tStart2, qStart2 = sys.maxsize, sys.maxsize
+        for tEnd2, qEnd2 in path2:
+            while qStart2 < qEnd2 and tStart2 < tEnd2:
+                while True:
+                    if tStart2 < qStart1:
+                        if tEnd2 < qStart1:
+                            size = tEnd2 - tStart2
+                        else:
+                            size = qStart1 - tStart2
+                        break
+                    elif tStart2 < qEnd1:
+                        offset = tStart2 - qStart1
+                        if tEnd2 > qEnd1:
+                            size = qEnd1 - tStart2
+                        else:
+                            size = tEnd2 - tStart2
+                        qStart = qStart2
+                        tStart = tStart1 + offset
+                        if tStart > tEnd and qStart > qEnd:
+                            # adding a gap both in target and in query;
+                            # add gap to target first:
+                            path.append([tStart, qEnd])
+                        qEnd = qStart2 + size
+                        tEnd = tStart + size
+                        path.append([tStart, qStart])
+                        path.append([tEnd, qEnd])
+                        break
+                    tStart1, qStart1 = sys.maxsize, sys.maxsize
+                    for tEnd1, qEnd1 in path1:
+                        if tStart1 < tEnd1 and qStart1 < qEnd1:
+                            break
+                        tStart1, qStart1 = tEnd1, qEnd1
+                    else:
+                        size = qEnd2 - qStart2
+                        break
+                qStart2 += size
+                tStart2 += size
+            tStart2, qStart2 = tEnd2, qEnd2
+        if strand1 != strand2:
+            path = tuple((c1, n2 - c2) for (c1, c2) in path)
+        alignment = PairwiseAlignment(target, query, path, None)
+        return alignment
+
+    @property
+    def substitutions(self):
+        """Return an Array with the number of substitutions of letters in the alignment.
+
+        As an example, consider a sequence alignment of two RNA sequences:
+
+        >>> from Bio.Align import PairwiseAligner
+        >>> target = "ATACTTACCTGGCAGGGGAGATACCATGATCACGAAGGTGGTTTTCCCAGGGCGAGGCTTATCCATTGCACTCCGGATGTGCTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGCATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTTTCCCCTG"  # human spliceosomal small nuclear RNA U1
+        >>> query = "ATACTTACCTGACAGGGGAGGCACCATGATCACACAGGTGGTCCTCCCAGGGCGAGGCTCTTCCATTGCACTGCGGGAGGGTTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGTATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTATCCCCCG"  # sea lamprey spliceosomal small RNA U1
+        >>> aligner = PairwiseAligner()
+        >>> aligner.gap_score = -10
+        >>> alignments = aligner.align(target, query)
+        >>> len(alignments)
+        1
+        >>> alignment = alignments[0]
+        >>> print(alignment)
+        ATACTTACCTGGCAGGGGAGATACCATGATCACGAAGGTGGTTTTCCCAGGGCGAGGCTTATCCATTGCACTCCGGATGTGCTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGCATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTTTCCCCTG
+        |||||||||||.||||||||..|||||||||||..|||||||..|||||||||||||||..|||||||||||.|||..|.|.|||||||||||||||||||||||||||||||||||||||.||||||||||||||||||||||||||||||||||.|||||.|
+        ATACTTACCTGACAGGGGAGGCACCATGATCACACAGGTGGTCCTCCCAGGGCGAGGCTCTTCCATTGCACTGCGGGAGGGTTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGTATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTATCCCCCG
+        <BLANKLINE>
+        >>> m = alignment.substitutions
+        >>> print(m)
+             A    C    G    T
+        A 28.0  1.0  2.0  1.0
+        C  0.0 39.0  1.0  2.0
+        G  2.0  0.0 45.0  0.0
+        T  2.0  5.0  1.0 35.0
+        <BLANKLINE>
+
+        Note that the matrix is not symmetric: rows correspond to the target
+        sequence, and columns to the query sequence.  For example, the number
+        of T's in the target sequence that are aligned to a C in the query
+        sequence is
+
+        >>> m['T', 'C']
+        5.0
+
+        and the number of C's in the query sequence tat are aligned to a T in
+        the query sequence is
+
+        >>> m['C', 'T']
+        2.0
+
+        For some applications (for example, to define a scoring matrix from
+        the substitution matrix), a symmetric matrix may be preferred, which
+        can be calculated as follows:
+
+        >>> m += m.transpose()
+        >>> m /= 2.0
+        >>> print(m)
+             A    C    G    T
+        A 28.0  0.5  2.0  1.5
+        C  0.5 39.0  0.5  3.5
+        G  2.0  0.5 45.0  0.5
+        T  1.5  3.5  0.5 35.0
+        <BLANKLINE>
+
+        The matrix is now symmetric, with counts divided equally on both sides
+        of the diagonal:
+
+        >>> m['C', 'T']
+        3.5
+        >>> m['T', 'C']
+        3.5
+
+        The total number of substitutions between T's and C's in the alignment
+        is 3.5 + 3.5 = 7.
+        """
+        target = self.target
+        try:
+            target = target.seq
+        except AttributeError:
+            pass
+        query = self.query
+        try:
+            query = query.seq
+        except AttributeError:
+            pass
+        sequences = (str(target), str(query))
+        letters = set.union(*[set(sequence) for sequence in sequences])
+        letters = "".join(sorted(letters))
+        m = substitution_matrices.Array(letters, dims=2)
+        n = len(sequences)
+        for i1 in range(n):
+            path1 = [p[i1] for p in self.path]
+            sequence1 = sequences[i1]
+            for i2 in range(i1 + 1, n):
+                path2 = [p[i2] for p in self.path]
+                sequence2 = sequences[i2]
+                start1, start2 = sys.maxsize, sys.maxsize
+                for end1, end2 in zip(path1, path2):
+                    if start1 < end1 and start2 < end2:  # aligned
+                        segment1 = sequence1[start1:end1]
+                        segment2 = sequence2[start2:end2]
+                        for c1, c2 in zip(segment1, segment2):
+                            m[c1, c2] += 1.0
+                    start1, start2 = end1, end2
+        return m
+
+
+class PairwiseAlignments:
+    """Implements an iterator over pairwise alignments returned by the aligner.
+
+    This class also supports indexing, which is fast for increasing indices,
+    but may be slow for random access of a large number of alignments.
+
+    Note that pairwise aligners can return an astronomical number of alignments,
+    even for relatively short sequences, if they align poorly to each other. We
+    therefore recommend to first check the number of alignments, accessible as
+    len(alignments), which can be calculated quickly even if the number of
+    alignments is very large.
+    """
+
+    def __init__(self, seqA, seqB, score, paths):
+        """Initialize a new PairwiseAlignments object.
+
+        Arguments:
+         - seqA  - The first sequence, as a plain string, without gaps.
+         - seqB  - The second sequence, as a plain string, without gaps.
+         - score - The alignment score.
+         - paths - An iterator over the paths in the traceback matrix;
+                   each path defines one alignment.
+
+        You would normally obtain an PairwiseAlignments object by calling
+        aligner.align(seqA, seqB), where aligner is a PairwiseAligner object.
+        """
+        self.seqA = seqA
+        self.seqB = seqB
+        self.score = score
+        self.paths = paths
+        self.index = -1
+
+    def __len__(self):
+        return len(self.paths)
+
+    def __getitem__(self, index):
+        if index == self.index:
+            return self.alignment
+        if index < self.index:
+            self.paths.reset()
+            self.index = -1
+        while self.index < index:
+            try:
+                alignment = next(self)
+            except StopIteration:
+                raise IndexError("index out of range") from None
+        return alignment
+
+    def __iter__(self):
+        self.paths.reset()
+        self.index = -1
+        return self
+
+    def __next__(self):
+        path = next(self.paths)
+        self.index += 1
+        alignment = PairwiseAlignment(self.seqA, self.seqB, path, self.score)
+        self.alignment = alignment
+        return alignment
+
+
+class PairwiseAligner(_aligners.PairwiseAligner):
+    """Performs pairwise sequence alignment using dynamic programming.
+
+    This provides functions to get global and local alignments between two
+    sequences.  A global alignment finds the best concordance between all
+    characters in two sequences.  A local alignment finds just the
+    subsequences that align the best.
+
+    To perform a pairwise sequence alignment, first create a PairwiseAligner
+    object.  This object stores the match and mismatch scores, as well as the
+    gap scores.  Typically, match scores are positive, while mismatch scores
+    and gap scores are negative or zero.  By default, the match score is 1,
+    and the mismatch and gap scores are zero.  Based on the values of the gap
+    scores, a PairwiseAligner object automatically chooses the appropriate
+    alignment algorithm (the Needleman-Wunsch, Smith-Waterman, Gotoh, or
+    Waterman-Smith-Beyer global or local alignment algorithm).
+
+    Calling the "score" method on the aligner with two sequences as arguments
+    will calculate the alignment score between the two sequences.
+    Calling the "align" method on the aligner with two sequences as arguments
+    will return a generator yielding the alignments between the two
+    sequences.
+
+    Some examples:
+
+    >>> from Bio import Align
+    >>> aligner = Align.PairwiseAligner()
+    >>> alignments = aligner.align("TACCG", "ACG")
+    >>> for alignment in sorted(alignments):
+    ...     print("Score = %.1f:" % alignment.score)
+    ...     print(alignment)
+    ...
+    Score = 3.0:
+    TACCG
+    -|-||
+    -A-CG
+    <BLANKLINE>
+    Score = 3.0:
+    TACCG
+    -||-|
+    -AC-G
+    <BLANKLINE>
+
+    Specify the aligner mode as local to generate local alignments:
+
+    >>> aligner.mode = 'local'
+    >>> alignments = aligner.align("TACCG", "ACG")
+    >>> for alignment in sorted(alignments):
+    ...     print("Score = %.1f:" % alignment.score)
+    ...     print(alignment)
+    ...
+    Score = 3.0:
+    TACCG
+     |-||
+     A-CG
+    <BLANKLINE>
+    Score = 3.0:
+    TACCG
+     ||-|
+     AC-G
+    <BLANKLINE>
+
+    Do a global alignment.  Identical characters are given 2 points,
+    1 point is deducted for each non-identical character.
+
+    >>> aligner.mode = 'global'
+    >>> aligner.match_score = 2
+    >>> aligner.mismatch_score = -1
+    >>> for alignment in aligner.align("TACCG", "ACG"):
+    ...     print("Score = %.1f:" % alignment.score)
+    ...     print(alignment)
+    ...
+    Score = 6.0:
+    TACCG
+    -||-|
+    -AC-G
+    <BLANKLINE>
+    Score = 6.0:
+    TACCG
+    -|-||
+    -A-CG
+    <BLANKLINE>
+
+    Same as above, except now 0.5 points are deducted when opening a
+    gap, and 0.1 points are deducted when extending it.
+
+    >>> aligner.open_gap_score = -0.5
+    >>> aligner.extend_gap_score = -0.1
+    >>> aligner.target_end_gap_score = 0.0
+    >>> aligner.query_end_gap_score = 0.0
+    >>> for alignment in aligner.align("TACCG", "ACG"):
+    ...     print("Score = %.1f:" % alignment.score)
+    ...     print(alignment)
+    ...
+    Score = 5.5:
+    TACCG
+    -|-||
+    -A-CG
+    <BLANKLINE>
+    Score = 5.5:
+    TACCG
+    -||-|
+    -AC-G
+    <BLANKLINE>
+
+    The alignment function can also use known matrices already included in
+    Biopython:
+
+    >>> from Bio.Align import substitution_matrices
+    >>> aligner = Align.PairwiseAligner()
+    >>> aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
+    >>> alignments = aligner.align("KEVLA", "EVL")
+    >>> alignments = list(alignments)
+    >>> print("Number of alignments: %d" % len(alignments))
+    Number of alignments: 1
+    >>> alignment = alignments[0]
+    >>> print("Score = %.1f" % alignment.score)
+    Score = 13.0
+    >>> print(alignment)
+    KEVLA
+    -|||-
+    -EVL-
+    <BLANKLINE>
+
+    You can also set the value of attributes directly during construction
+    of the PairwiseAligner object by providing them as keyword arguemnts:
+
+    >>> aligner = Align.PairwiseAligner(mode='global', match_score=2, mismatch_score=-1)
+    >>> for alignment in aligner.align("TACCG", "ACG"):
+    ...     print("Score = %.1f:" % alignment.score)
+    ...     print(alignment)
+    ...
+    Score = 6.0:
+    TACCG
+    -||-|
+    -AC-G
+    <BLANKLINE>
+    Score = 6.0:
+    TACCG
+    -|-||
+    -A-CG
+    <BLANKLINE>
+
+    """
+
+    def __init__(self, **kwargs):
+        """Initialize a new PairwiseAligner with the keyword arguments as attributes.
+
+        Loops over the keyword arguments and sets them as attributes on the object.
+        """
+        super().__init__()
+        for name, value in kwargs.items():
+            setattr(self, name, value)
+
+    def __setattr__(self, key, value):
+        if key not in dir(_aligners.PairwiseAligner):
+            # To prevent confusion, don't allow users to create new attributes.
+            # On CPython, __slots__ can be used for this, but currently
+            # __slots__ does not behave the same way on PyPy at least.
+            raise AttributeError("'PairwiseAligner' object has no attribute '%s'" % key)
+        _aligners.PairwiseAligner.__setattr__(self, key, value)
+
+    def align(self, seqA, seqB, strand="+"):
+        """Return the alignments of two sequences using PairwiseAligner."""
+        if isinstance(seqA, (Seq, MutableSeq)):
+            sA = bytes(seqA)
+        else:
+            sA = seqA
+        if strand == "+":
+            sB = seqB
+        else:  # strand == "-":
+            sB = reverse_complement(seqB)
+        if isinstance(sB, (Seq, MutableSeq)):
+            sB = bytes(sB)
+        score, paths = _aligners.PairwiseAligner.align(self, sA, sB, strand)
+        alignments = PairwiseAlignments(seqA, seqB, score, paths)
+        return alignments
+
+    def score(self, seqA, seqB, strand="+"):
+        """Return the alignments score of two sequences using PairwiseAligner."""
+        if isinstance(seqA, (Seq, MutableSeq)):
+            seqA = bytes(seqA)
+        if strand == "-":
+            seqB = reverse_complement(seqB)
+        if isinstance(seqB, (Seq, MutableSeq)):
+            seqB = bytes(seqB)
+        return _aligners.PairwiseAligner.score(self, seqA, seqB, strand)
+
+    def __getstate__(self):
+        state = {
+            "wildcard": self.wildcard,
+            "target_internal_open_gap_score": self.target_internal_open_gap_score,
+            "target_internal_extend_gap_score": self.target_internal_extend_gap_score,
+            "target_left_open_gap_score": self.target_left_open_gap_score,
+            "target_left_extend_gap_score": self.target_left_extend_gap_score,
+            "target_right_open_gap_score": self.target_right_open_gap_score,
+            "target_right_extend_gap_score": self.target_right_extend_gap_score,
+            "query_internal_open_gap_score": self.query_internal_open_gap_score,
+            "query_internal_extend_gap_score": self.query_internal_extend_gap_score,
+            "query_left_open_gap_score": self.query_left_open_gap_score,
+            "query_left_extend_gap_score": self.query_left_extend_gap_score,
+            "query_right_open_gap_score": self.query_right_open_gap_score,
+            "query_right_extend_gap_score": self.query_right_extend_gap_score,
+            "mode": self.mode,
+        }
+        if self.substitution_matrix is None:
+            state["match_score"] = self.match_score
+            state["mismatch_score"] = self.mismatch_score
+        else:
+            state["substitution_matrix"] = self.substitution_matrix
+        return state
+
+    def __setstate__(self, state):
+        self.wildcard = state["wildcard"]
+        self.target_internal_open_gap_score = state["target_internal_open_gap_score"]
+        self.target_internal_extend_gap_score = state[
+            "target_internal_extend_gap_score"
+        ]
+        self.target_left_open_gap_score = state["target_left_open_gap_score"]
+        self.target_left_extend_gap_score = state["target_left_extend_gap_score"]
+        self.target_right_open_gap_score = state["target_right_open_gap_score"]
+        self.target_right_extend_gap_score = state["target_right_extend_gap_score"]
+        self.query_internal_open_gap_score = state["query_internal_open_gap_score"]
+        self.query_internal_extend_gap_score = state["query_internal_extend_gap_score"]
+        self.query_left_open_gap_score = state["query_left_open_gap_score"]
+        self.query_left_extend_gap_score = state["query_left_extend_gap_score"]
+        self.query_right_open_gap_score = state["query_right_open_gap_score"]
+        self.query_right_extend_gap_score = state["query_right_extend_gap_score"]
+        self.mode = state["mode"]
+        substitution_matrix = state.get("substitution_matrix")
+        if substitution_matrix is None:
+            self.match_score = state["match_score"]
+            self.mismatch_score = state["mismatch_score"]
+        else:
+            self.substitution_matrix = substitution_matrix
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Align/__pycache__/AlignInfo.cpython-37.pyc b/code/lib/Bio/Align/__pycache__/AlignInfo.cpython-37.pyc
new file mode 100644
index 0000000..7955c2f
Binary files /dev/null and b/code/lib/Bio/Align/__pycache__/AlignInfo.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Align/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..54c2237
Binary files /dev/null and b/code/lib/Bio/Align/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/_aligners.c b/code/lib/Bio/Align/_aligners.c
new file mode 100644
index 0000000..e78f252
--- /dev/null
+++ b/code/lib/Bio/Align/_aligners.c
@@ -0,0 +1,6988 @@
+/* Copyright 2018-2019 by Michiel de Hoon.  All rights reserved.
+ * This file is part of the Biopython distribution and governed by your
+ * choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+ * Please see the LICENSE file that should have been included as part of this
+ * package.
+ */
+
+
+
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+#include "float.h"
+
+
+#define HORIZONTAL 0x1
+#define VERTICAL 0x2
+#define DIAGONAL 0x4
+#define STARTPOINT 0x8
+#define ENDPOINT 0x10
+#define M_MATRIX 0x1
+#define Ix_MATRIX 0x2
+#define Iy_MATRIX 0x4
+#define DONE 0x3
+#define NONE 0x7
+
+#define OVERFLOW_ERROR -1
+#define MEMORY_ERROR -2
+
+#define MISSING_LETTER -1
+
+#define SAFE_ADD(t, s) \
+{   if (s != OVERFLOW_ERROR) { \
+        term = t; \
+        if (term > PY_SSIZE_T_MAX - s) s = OVERFLOW_ERROR; \
+        else s += term; \
+    } \
+}
+
+
+typedef enum {NeedlemanWunschSmithWaterman,
+              Gotoh,
+              WatermanSmithBeyer,
+              Unknown} Algorithm;
+
+typedef enum {Global, Local} Mode;
+
+typedef struct {
+    unsigned char trace : 5;
+    unsigned char path : 3;
+} Trace;
+
+typedef struct {
+    unsigned char Ix : 4;
+    unsigned char Iy : 4;
+} TraceGapsGotoh;
+
+typedef struct {
+    int* MIx;
+    int* IyIx;
+    int* MIy;
+    int* IxIy;
+} TraceGapsWatermanSmithBeyer;
+
+typedef struct {
+    PyObject_HEAD
+    Trace** M;
+    union { TraceGapsGotoh** gotoh;
+            TraceGapsWatermanSmithBeyer** waterman_smith_beyer; } gaps;
+    int nA;
+    int nB;
+    int iA;
+    int iB;
+    Mode mode;
+    Algorithm algorithm;
+    Py_ssize_t length;
+    unsigned char strand;
+} PathGenerator;
+
+static PyObject*
+PathGenerator_create_path(PathGenerator* self, int i, int j) {
+    PyObject* tuple;
+    PyObject* row;
+    PyObject* value;
+    int path;
+    const int ii = i;
+    const int jj = j;
+    int n = 1;
+    int direction = 0;
+    Trace** M = self->M;
+    const unsigned char strand = self->strand;
+
+    while (1) {
+        path = M[i][j].path;
+        if (!path) break;
+        if (path != direction) {
+            n++;
+            direction = path;
+        }
+        switch (path) {
+            case HORIZONTAL: j++; break;
+            case VERTICAL: i++; break;
+            case DIAGONAL: i++; j++; break;
+        }
+    }
+
+    i = ii;
+    j = jj;
+    direction = 0;
+    tuple = PyTuple_New(n);
+    if (!tuple) return NULL;
+
+    n = 0;
+    switch (strand) {
+        case '+':
+            while (1) {
+                path = M[i][j].path;
+                if (path != direction) {
+                    row = PyTuple_New(2);
+                    if (!row) break;
+                    value = PyLong_FromLong(i);
+                    if (!value) {
+                        Py_DECREF(row); /* all references were stolen */
+                        break;
+                    }
+                    PyTuple_SET_ITEM(row, 0, value);
+                    value = PyLong_FromLong(j);
+                    if (!value) {
+                        Py_DECREF(row); /* all references were stolen */
+                        break;
+                    }
+                    PyTuple_SET_ITEM(row, 1, value);
+                    PyTuple_SET_ITEM(tuple, n, row);
+                    n++;
+                    direction = path;
+                }
+                switch (path) {
+                    case HORIZONTAL: j++; break;
+                    case VERTICAL: i++; break;
+                    case DIAGONAL: i++; j++; break;
+                    default: return tuple;
+                }
+            }
+            break;
+        case '-': {
+            const int nB = self->nB;
+            while (1) {
+                path = M[i][j].path;
+                if (path != direction) {
+                    row = PyTuple_New(2);
+                    if (!row) break;
+                    value = PyLong_FromLong(i);
+                    if (!value) {
+                        Py_DECREF(row); /* all references were stolen */
+                        break;
+                    }
+                    PyTuple_SET_ITEM(row, 0, value);
+                    value = PyLong_FromLong(nB-j);
+                    if (!value) {
+                        Py_DECREF(row); /* all references were stolen */
+                        break;
+                    }
+                    PyTuple_SET_ITEM(row, 1, value);
+                    PyTuple_SET_ITEM(tuple, n, row);
+                    n++;
+                    direction = path;
+                }
+                switch (path) {
+                    case HORIZONTAL: j++; break;
+                    case VERTICAL: i++; break;
+                    case DIAGONAL: i++; j++; break;
+                    default: return tuple;
+                }
+            }
+            break;
+        }
+    }
+    Py_DECREF(tuple); /* all references were stolen */
+    return PyErr_NoMemory();
+}
+
+static Py_ssize_t
+PathGenerator_needlemanwunsch_length(PathGenerator* self)
+{
+    int i;
+    int j;
+    int trace;
+    const int nA = self->nA;
+    const int nB = self->nB;
+    Trace** M = self->M;
+    Py_ssize_t term;
+    Py_ssize_t count = MEMORY_ERROR;
+    Py_ssize_t temp;
+    Py_ssize_t* counts;
+    counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+    if (!counts) goto exit;
+    counts[0] = 1;
+    for (j = 1; j <= nB; j++) {
+        trace = M[0][j].trace;
+        count = 0;
+        if (trace & HORIZONTAL) SAFE_ADD(counts[j-1], count);
+        counts[j] = count;
+    }
+    for (i = 1; i <= nA; i++) {
+        trace = M[i][0].trace;
+        count = 0;
+        if (trace & VERTICAL) SAFE_ADD(counts[0], count);
+        temp = counts[0];
+        counts[0] = count;
+        for (j = 1; j <= nB; j++) {
+            trace = M[i][j].trace;
+            count = 0;
+            if (trace & HORIZONTAL) SAFE_ADD(counts[j-1], count);
+            if (trace & VERTICAL) SAFE_ADD(counts[j], count);
+            if (trace & DIAGONAL) SAFE_ADD(temp, count);
+            temp = counts[j];
+            counts[j] = count;
+        }
+    }
+    PyMem_Free(counts);
+exit:
+    return count;
+}
+
+static Py_ssize_t
+PathGenerator_smithwaterman_length(PathGenerator* self)
+{
+    int i;
+    int j;
+    int trace;
+    const int nA = self->nA;
+    const int nB = self->nB;
+    Trace** M = self->M;
+    Py_ssize_t term;
+    Py_ssize_t count = MEMORY_ERROR;
+    Py_ssize_t total = 0;
+    Py_ssize_t temp;
+    Py_ssize_t* counts;
+    counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+    if (!counts) goto exit;
+    counts[0] = 1;
+    for (j = 1; j <= nB; j++) counts[j] = 1;
+    for (i = 1; i <= nA; i++) {
+        temp = counts[0];
+        counts[0] = 1;
+        for (j = 1; j <= nB; j++) {
+            trace = M[i][j].trace;
+            count = 0;
+            if (trace & DIAGONAL) SAFE_ADD(temp, count);
+            if (M[i][j].trace & ENDPOINT) SAFE_ADD(count, total);
+            if (trace & HORIZONTAL) SAFE_ADD(counts[j-1], count);
+            if (trace & VERTICAL) SAFE_ADD(counts[j], count);
+            temp = counts[j];
+            if (count == 0 && (trace & STARTPOINT)) count = 1;
+            counts[j] = count;
+        }
+    }
+    count = total;
+    PyMem_Free(counts);
+exit:
+    return count;
+}
+
+static Py_ssize_t
+PathGenerator_gotoh_global_length(PathGenerator* self)
+{
+    int i;
+    int j;
+    int trace;
+    const int nA = self->nA;
+    const int nB = self->nB;
+    Trace** M = self->M;
+    TraceGapsGotoh** gaps = self->gaps.gotoh;
+    Py_ssize_t count = MEMORY_ERROR;
+    Py_ssize_t term;
+    Py_ssize_t M_temp;
+    Py_ssize_t Ix_temp;
+    Py_ssize_t Iy_temp;
+    Py_ssize_t* M_counts = NULL;
+    Py_ssize_t* Ix_counts = NULL;
+    Py_ssize_t* Iy_counts = NULL;
+    M_counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+    if (!M_counts) goto exit;
+    Ix_counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+    if (!Ix_counts) goto exit;
+    Iy_counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+    if (!Iy_counts) goto exit;
+    M_counts[0] = 1;
+    Ix_counts[0] = 0;
+    Iy_counts[0] = 0;
+    for (j = 1; j <= nB; j++) {
+        M_counts[j] = 0;
+        Ix_counts[j] = 0;
+        Iy_counts[j] = 1;
+    }
+    for (i = 1; i <= nA; i++) {
+        M_temp = M_counts[0];
+        M_counts[0] = 0;
+        Ix_temp = Ix_counts[0];
+        Ix_counts[0] = 1;
+        Iy_temp = Iy_counts[0];
+        Iy_counts[0] = 0;
+        for (j = 1; j <= nB; j++) {
+            count = 0;
+            trace = M[i][j].trace;
+            if (trace & M_MATRIX) SAFE_ADD(M_temp, count);
+            if (trace & Ix_MATRIX) SAFE_ADD(Ix_temp, count);
+            if (trace & Iy_MATRIX) SAFE_ADD(Iy_temp, count);
+            M_temp = M_counts[j];
+            M_counts[j] = count;
+            count = 0;
+            trace = gaps[i][j].Ix;
+            if (trace & M_MATRIX) SAFE_ADD(M_temp, count);
+            if (trace & Ix_MATRIX) SAFE_ADD(Ix_counts[j], count);
+            if (trace & Iy_MATRIX) SAFE_ADD(Iy_counts[j], count);
+            Ix_temp = Ix_counts[j];
+            Ix_counts[j] = count;
+            count = 0;
+            trace = gaps[i][j].Iy;
+            if (trace & M_MATRIX) SAFE_ADD(M_counts[j-1], count);
+            if (trace & Ix_MATRIX) SAFE_ADD(Ix_counts[j-1], count);
+            if (trace & Iy_MATRIX) SAFE_ADD(Iy_counts[j-1], count);
+            Iy_temp = Iy_counts[j];
+            Iy_counts[j] = count;
+        }
+    }
+    count = 0;
+    if (M[nA][nB].trace) SAFE_ADD(M_counts[nB], count);
+    if (gaps[nA][nB].Ix) SAFE_ADD(Ix_counts[nB], count);
+    if (gaps[nA][nB].Iy) SAFE_ADD(Iy_counts[nB], count);
+exit:
+    if (M_counts) PyMem_Free(M_counts);
+    if (Ix_counts) PyMem_Free(Ix_counts);
+    if (Iy_counts) PyMem_Free(Iy_counts);
+    return count;
+}
+
+static Py_ssize_t
+PathGenerator_gotoh_local_length(PathGenerator* self)
+{
+    int i;
+    int j;
+    int trace;
+    const int nA = self->nA;
+    const int nB = self->nB;
+    Trace** M = self->M;
+    TraceGapsGotoh** gaps = self->gaps.gotoh;
+    Py_ssize_t term;
+    Py_ssize_t count = MEMORY_ERROR;
+    Py_ssize_t total = 0;
+    Py_ssize_t M_temp;
+    Py_ssize_t Ix_temp;
+    Py_ssize_t Iy_temp;
+    Py_ssize_t* M_counts = NULL;
+    Py_ssize_t* Ix_counts = NULL;
+    Py_ssize_t* Iy_counts = NULL;
+    M_counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+    if (!M_counts) goto exit;
+    Ix_counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+    if (!Ix_counts) goto exit;
+    Iy_counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+    if (!Iy_counts) goto exit;
+    M_counts[0] = 1;
+    Ix_counts[0] = 0;
+    Iy_counts[0] = 0;
+    for (j = 1; j <= nB; j++) {
+        M_counts[j] = 1;
+        Ix_counts[j] = 0;
+        Iy_counts[j] = 0;
+    }
+    for (i = 1; i <= nA; i++) {
+        M_temp = M_counts[0];
+        M_counts[0] = 1;
+        Ix_temp = Ix_counts[0];
+        Ix_counts[0] = 0;
+        Iy_temp = Iy_counts[0];
+        Iy_counts[0] = 0;
+        for (j = 1; j <= nB; j++) {
+            count = 0;
+            trace = M[i][j].trace;
+            if (trace & M_MATRIX) SAFE_ADD(M_temp, count);
+            if (trace & Ix_MATRIX) SAFE_ADD(Ix_temp, count);
+            if (trace & Iy_MATRIX) SAFE_ADD(Iy_temp, count);
+            if (count == 0 && (trace & STARTPOINT)) count = 1;
+            M_temp = M_counts[j];
+            M_counts[j] = count;
+            if (M[i][j].trace & ENDPOINT) SAFE_ADD(count, total);
+            count = 0;
+            trace = gaps[i][j].Ix;
+            if (trace & M_MATRIX) SAFE_ADD(M_temp, count);
+            if (trace & Ix_MATRIX) SAFE_ADD(Ix_counts[j], count);
+            if (trace & Iy_MATRIX) SAFE_ADD(Iy_counts[j], count);
+            Ix_temp = Ix_counts[j];
+            Ix_counts[j] = count;
+            count = 0;
+            trace = gaps[i][j].Iy;
+            if (trace & M_MATRIX) SAFE_ADD(M_counts[j-1], count);
+            if (trace & Ix_MATRIX) SAFE_ADD(Ix_counts[j-1], count);
+            if (trace & Iy_MATRIX) SAFE_ADD(Iy_counts[j-1], count);
+            Iy_temp = Iy_counts[j];
+            Iy_counts[j] = count;
+        }
+    }
+    count = total;
+exit:
+    if (M_counts) PyMem_Free(M_counts);
+    if (Ix_counts) PyMem_Free(Ix_counts);
+    if (Iy_counts) PyMem_Free(Iy_counts);
+    return count;
+}
+
+static Py_ssize_t
+PathGenerator_waterman_smith_beyer_global_length(PathGenerator* self)
+{
+    int i;
+    int j;
+    int trace;
+    int* p;
+    int gap;
+    const int nA = self->nA;
+    const int nB = self->nB;
+    Trace** M = self->M;
+    TraceGapsWatermanSmithBeyer** gaps = self->gaps.waterman_smith_beyer;
+    Py_ssize_t count = MEMORY_ERROR;
+    Py_ssize_t term;
+    Py_ssize_t** M_count = NULL;
+    Py_ssize_t** Ix_count = NULL;
+    Py_ssize_t** Iy_count = NULL;
+    M_count = PyMem_Malloc((nA+1)*sizeof(Py_ssize_t*));
+    if (!M_count) goto exit;
+    Ix_count = PyMem_Malloc((nA+1)*sizeof(Py_ssize_t*));
+    if (!Ix_count) goto exit;
+    Iy_count = PyMem_Malloc((nA+1)*sizeof(Py_ssize_t*));
+    if (!Iy_count) goto exit;
+    for (i = 0; i <= nA; i++) {
+        M_count[i] = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+        if (!M_count[i]) goto exit;
+        Ix_count[i] = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+        if (!Ix_count[i]) goto exit;
+        Iy_count[i] = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+        if (!Iy_count[i]) goto exit;
+    }
+    for (i = 0; i <= nA; i++) {
+        for (j = 0; j <= nB; j++) {
+            count = 0;
+            trace = M[i][j].trace;
+            if (trace & M_MATRIX) SAFE_ADD(M_count[i-1][j-1], count);
+            if (trace & Ix_MATRIX) SAFE_ADD(Ix_count[i-1][j-1], count);
+            if (trace & Iy_MATRIX) SAFE_ADD(Iy_count[i-1][j-1], count);
+            if (count == 0) count = 1; /* happens at M[0][0] only */
+            M_count[i][j] = count;
+            count = 0;
+            p = gaps[i][j].MIx;
+            if (p) {
+                while (1) {
+                    gap = *p;
+                    if (!gap) break;
+                    SAFE_ADD(M_count[i-gap][j], count);
+                    p++;
+                }
+            }
+            p = gaps[i][j].IyIx;
+            if (p) {
+                while (1) {
+                    gap = *p;
+                    if (!gap) break;
+                    SAFE_ADD(Iy_count[i-gap][j], count);
+                    p++;
+                }
+            }
+            Ix_count[i][j] = count;
+            count = 0;
+            p = gaps[i][j].MIy;
+            if (p) {
+                while (1) {
+                    gap = *p;
+                    if (!gap) break;
+                    SAFE_ADD(M_count[i][j-gap], count);
+                    p++;
+                }
+            }
+	    p = gaps[i][j].IxIy;
+            if (p) {
+                while (1) {
+                    gap = *p;
+                    if (!gap) break;
+                    SAFE_ADD(Ix_count[i][j-gap], count);
+                    p++;
+                }
+            }
+            Iy_count[i][j] = count;
+        }
+    }
+    count = 0;
+    if (M[nA][nB].trace)
+        SAFE_ADD(M_count[nA][nB], count);
+    if (gaps[nA][nB].MIx[0] || gaps[nA][nB].IyIx[0])
+        SAFE_ADD(Ix_count[nA][nB], count);
+    if (gaps[nA][nB].MIy[0] || gaps[nA][nB].IxIy[0])
+        SAFE_ADD(Iy_count[nA][nB], count);
+exit:
+    if (M_count) {
+        if (Ix_count) {
+            if (Iy_count) {
+                for (i = 0; i <= nA; i++) {
+                    if (!M_count[i]) break;
+                    PyMem_Free(M_count[i]);
+                    if (!Ix_count[i]) break;
+                    PyMem_Free(Ix_count[i]);
+                    if (!Iy_count[i]) break;
+                    PyMem_Free(Iy_count[i]);
+                }
+                PyMem_Free(Iy_count);
+            }
+            PyMem_Free(Ix_count);
+        }
+        PyMem_Free(M_count);
+    }
+    return count;
+}
+
+static Py_ssize_t
+PathGenerator_waterman_smith_beyer_local_length(PathGenerator* self)
+{
+    int i;
+    int j;
+    int trace;
+    int* p;
+    int gap;
+    const int nA = self->nA;
+    const int nB = self->nB;
+    Trace** M = self->M;
+    TraceGapsWatermanSmithBeyer** gaps = self->gaps.waterman_smith_beyer;
+    Py_ssize_t term;
+    Py_ssize_t count = MEMORY_ERROR;
+    Py_ssize_t total = 0;
+    Py_ssize_t** M_count = NULL;
+    Py_ssize_t** Ix_count = NULL;
+    Py_ssize_t** Iy_count = NULL;
+    M_count = PyMem_Malloc((nA+1)*sizeof(Py_ssize_t*));
+    if (!M_count) goto exit;
+    Ix_count = PyMem_Malloc((nA+1)*sizeof(Py_ssize_t*));
+    if (!Ix_count) goto exit;
+    Iy_count = PyMem_Malloc((nA+1)*sizeof(Py_ssize_t*));
+    if (!Iy_count) goto exit;
+    for (i = 0; i <= nA; i++) {
+        M_count[i] = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+        if (!M_count[i]) goto exit;
+        Ix_count[i] = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+        if (!Ix_count[i]) goto exit;
+        Iy_count[i] = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t));
+        if (!Iy_count[i]) goto exit;
+    }
+    for (i = 0; i <= nA; i++) {
+        for (j = 0; j <= nB; j++) {
+            count = 0;
+            trace = M[i][j].trace;
+            if (trace & M_MATRIX) SAFE_ADD(M_count[i-1][j-1], count);
+            if (trace & Ix_MATRIX) SAFE_ADD(Ix_count[i-1][j-1], count);
+            if (trace & Iy_MATRIX) SAFE_ADD(Iy_count[i-1][j-1], count);
+            if (count == 0 && (trace & STARTPOINT)) count = 1;
+            M_count[i][j] = count;
+            if (M[i][j].trace & ENDPOINT) SAFE_ADD(count, total);
+            count = 0;
+            p = gaps[i][j].MIx;
+            if (p) {
+                while (1) {
+                    gap = *p;
+                    if (!gap) break;
+                    SAFE_ADD(M_count[i-gap][j], count);
+                    p++;
+                }
+            }
+            p = gaps[i][j].IyIx;
+            if (p) {
+                while (1) {
+                    gap = *p;
+                    if (!gap) break;
+                    SAFE_ADD(Iy_count[i-gap][j], count);
+                    p++;
+                }
+            }
+            Ix_count[i][j] = count;
+            count = 0;
+            p = gaps[i][j].MIy;
+            if (p) {
+                while (1) {
+                    gap = *p;
+                    if (!gap) break;
+                    SAFE_ADD(M_count[i][j-gap], count);
+                    p++;
+                }
+            }
+            p = gaps[i][j].IxIy;
+            if (p) {
+                while (1) {
+                    gap = *p;
+                    if (!gap) break;
+                    SAFE_ADD(Ix_count[i][j-gap], count);
+                    p++;
+                }
+            }
+            Iy_count[i][j] = count;
+        }
+    }
+    count = total;
+exit:
+    if (M_count) {
+        if (Ix_count) {
+            if (Iy_count) {
+                for (i = 0; i <= nA; i++) {
+                    if (!M_count[i]) break;
+                    PyMem_Free(M_count[i]);
+                    if (!Ix_count[i]) break;
+                    PyMem_Free(Ix_count[i]);
+                    if (!Iy_count[i]) break;
+                    PyMem_Free(Iy_count[i]);
+                }
+                PyMem_Free(Iy_count);
+            }
+            PyMem_Free(Ix_count);
+        }
+        PyMem_Free(M_count);
+    }
+    return count;
+}
+
+static Py_ssize_t PathGenerator_length(PathGenerator* self) {
+    Py_ssize_t length = self->length;
+    if (length == 0) {
+        switch (self->algorithm) {
+            case NeedlemanWunschSmithWaterman:
+                switch (self->mode) {
+                    case Global:
+                        length = PathGenerator_needlemanwunsch_length(self);
+                        break;
+                    case Local:
+                        length = PathGenerator_smithwaterman_length(self);
+                        break;
+                    default:
+                        /* should not happen, but some compilers complain that
+                         * that length can be used uninitialized.
+                         */
+                        PyErr_SetString(PyExc_RuntimeError, "Unknown mode");
+                        return -1;
+                }
+                break;
+            case Gotoh:
+                switch (self->mode) {
+                    case Global:
+                        length = PathGenerator_gotoh_global_length(self);
+                        break;
+                    case Local:
+                        length = PathGenerator_gotoh_local_length(self);
+                        break;
+                    default:
+                        /* should not happen, but some compilers complain that
+                         * that length can be used uninitialized.
+                         */
+                        PyErr_SetString(PyExc_RuntimeError, "Unknown mode");
+                        return -1;
+                }
+                break;
+            case WatermanSmithBeyer:
+                switch (self->mode) {
+                    case Global:
+                        length = PathGenerator_waterman_smith_beyer_global_length(self);
+                        break;
+                    case Local:
+                        length = PathGenerator_waterman_smith_beyer_local_length(self);
+                        break;
+                    default:
+                        /* should not happen, but some compilers complain that
+                         * that length can be used uninitialized.
+                         */
+                        PyErr_SetString(PyExc_RuntimeError, "Unknown mode");
+                        return -1;
+                }
+                break;
+            case Unknown:
+            default:
+                PyErr_SetString(PyExc_RuntimeError, "Unknown algorithm");
+                return -1;
+        }
+        self->length = length;
+    }
+    switch (length) {
+        case OVERFLOW_ERROR:
+            PyErr_Format(PyExc_OverflowError,
+                         "number of optimal alignments is larger than %zd",
+                         PY_SSIZE_T_MAX);
+            break;
+        case MEMORY_ERROR:
+            PyErr_SetNone(PyExc_MemoryError);
+            break;
+        default:
+            break;
+    }
+    return length;
+}
+
+static void
+PathGenerator_dealloc(PathGenerator* self)
+{
+    int i;
+    const int nA = self->nA;
+    const Algorithm algorithm = self->algorithm;
+    Trace** M = self->M;
+    if (M) {
+        for (i = 0; i <= nA; i++) {
+            if (!M[i]) break;
+            PyMem_Free(M[i]);
+        }
+        PyMem_Free(M);
+    }
+    switch (algorithm) {
+        case NeedlemanWunschSmithWaterman:
+            break;
+        case Gotoh: {
+            TraceGapsGotoh** gaps = self->gaps.gotoh;
+            if (gaps) {
+                for (i = 0; i <= nA; i++) {
+                    if (!gaps[i]) break;
+                    PyMem_Free(gaps[i]);
+                }
+                PyMem_Free(gaps);
+            }
+            break;
+        }
+        case WatermanSmithBeyer: {
+            TraceGapsWatermanSmithBeyer** gaps = self->gaps.waterman_smith_beyer;
+            if (gaps) {
+                int j;
+                const int nB = self->nB;
+                int* trace;
+                for (i = 0; i <= nA; i++) {
+                    if (!gaps[i]) break;
+                    for (j = 0; j <= nB; j++) {
+                        trace = gaps[i][j].MIx;
+                        if (trace) PyMem_Free(trace);
+                        trace = gaps[i][j].IyIx;
+                        if (trace) PyMem_Free(trace);
+                        trace = gaps[i][j].MIy;
+                        if (trace) PyMem_Free(trace);
+                        trace = gaps[i][j].IxIy;
+                        if (trace) PyMem_Free(trace);
+                    }
+                    PyMem_Free(gaps[i]);
+                }
+                PyMem_Free(gaps);
+            }
+            break;
+        }
+        case Unknown:
+        default:
+            PyErr_WriteUnraisable((PyObject*)self);
+            break;
+    }
+    Py_TYPE(self)->tp_free((PyObject*)self);
+}
+
+static PyObject* PathGenerator_next_needlemanwunsch(PathGenerator* self)
+{
+    int i = 0;
+    int j = 0;
+    int path;
+    int trace = 0;
+    const int nA = self->nA;
+    const int nB = self->nB;
+    Trace** M = self->M;
+
+    path = M[i][j].path;
+    if (path == DONE) return NULL;
+    if (path == 0) {
+        /* Generate the first path. */
+        i = nA;
+        j = nB;
+    }
+    else {
+        /* We already have a path. Prune the path to see if there are
+         * any alternative paths. */
+        while (1) {
+            if (path == HORIZONTAL) {
+                trace = M[i][++j].trace;
+                if (trace & VERTICAL) {
+                    M[--i][j].path = VERTICAL;
+                    break;
+                }
+                if (trace & DIAGONAL) {
+                    M[--i][--j].path = DIAGONAL;
+                    break;
+                }
+            }
+            else if (path == VERTICAL) {
+                trace = M[++i][j].trace;
+                if (trace & DIAGONAL) {
+                    M[--i][--j].path = DIAGONAL;
+                    break;
+                }
+            }
+            else /* DIAGONAL */ {
+                i++;
+                j++;
+            }
+            path = M[i][j].path;
+            if (!path) {
+                /* we reached the end of the alignment without finding
+                 * an alternative path */
+                M[0][0].path = DONE;
+                return NULL;
+            }
+        }
+    }
+    /* Follow the traceback until we reach the origin. */
+    while (1) {
+        trace = M[i][j].trace;
+        if (trace & HORIZONTAL) M[i][--j].path = HORIZONTAL;
+        else if (trace & VERTICAL) M[--i][j].path = VERTICAL;
+        else if (trace & DIAGONAL) M[--i][--j].path = DIAGONAL;
+        else break;
+    }
+    return PathGenerator_create_path(self, 0, 0);
+}
+
+static PyObject* PathGenerator_next_smithwaterman(PathGenerator* self)
+{
+    int trace = 0;
+    int i = self->iA;
+    int j = self->iB;
+    const int nA = self->nA;
+    const int nB = self->nB;
+    Trace** M = self->M;
+    int path = M[0][0].path;
+
+    if (path == DONE || path == NONE) return NULL;
+
+    path = M[i][j].path;
+    if (path) {
+        /* We already have a path. Prune the path to see if there are
+         * any alternative paths. */
+        while (1) {
+            if (path == HORIZONTAL) {
+                trace = M[i][++j].trace;
+                if (trace & VERTICAL) {
+                    M[--i][j].path = VERTICAL;
+                    break;
+                }
+                else if (trace & DIAGONAL) {
+                    M[--i][--j].path = DIAGONAL;
+                    break;
+                }
+            }
+            else if (path == VERTICAL) {
+                trace = M[++i][j].trace;
+                if (trace & DIAGONAL) {
+                    M[--i][--j].path = DIAGONAL;
+                    break;
+                }
+            }
+            else /* DIAGONAL */ {
+                i++;
+                j++;
+            }
+            path = M[i][j].path;
+            if (!path) break;
+        }
+    }
+
+    if (path) {
+        trace = M[i][j].trace;
+    } else {
+        /* Find a suitable end point for a path.
+         * Only allow end points ending at the M matrix. */
+        while (1) {
+            if (j < nB) j++;
+            else if (i < nA) {
+                i++;
+                j = 0;
+            }
+            else {
+                /* we reached the end of the sequences without finding
+                 * an alternative path */
+                M[0][0].path = DONE;
+                return NULL;
+            }
+            trace = M[i][j].trace;
+            if (trace & ENDPOINT) {
+                trace &= DIAGONAL; /* exclude paths ending in a gap */
+                break;
+            }
+        }
+        M[i][j].path = 0;
+    }
+
+    /* Follow the traceback until we reach the origin. */
+    while (1) {
+        if (trace & HORIZONTAL) M[i][--j].path = HORIZONTAL;
+        else if (trace & VERTICAL) M[--i][j].path = VERTICAL;
+        else if (trace & DIAGONAL) M[--i][--j].path = DIAGONAL;
+        else if (trace & STARTPOINT) {
+            self->iA = i;
+            self->iB = j;
+            return PathGenerator_create_path(self, i, j);
+        }
+        else {
+            PyErr_SetString(PyExc_RuntimeError,
+                "Unexpected trace in PathGenerator_next_smithwaterman");
+            return NULL;
+        }
+        trace = M[i][j].trace;
+    }
+}
+
+static PyObject* PathGenerator_next_gotoh_global(PathGenerator* self)
+{
+    int i = 0;
+    int j = 0;
+    int m;
+    int path;
+    int trace = 0;
+    const int nA = self->nA;
+    const int nB = self->nB;
+    Trace** M = self->M;
+    TraceGapsGotoh** gaps = self->gaps.gotoh;
+
+    m = M_MATRIX;
+    path = M[i][j].path;
+    if (path == DONE) return NULL;
+    if (path == 0) {
+        i = nA;
+        j = nB;
+    }
+    else {
+        /* We already have a path. Prune the path to see if there are
+         * any alternative paths. */
+        while (1) {
+            path = M[i][j].path;
+            if (path == 0) {
+                switch (m) {
+                    case M_MATRIX: m = Ix_MATRIX; break;
+                    case Ix_MATRIX: m = Iy_MATRIX; break;
+                    case Iy_MATRIX: m = 0; break;
+                }
+                break;
+            }
+            switch (path) {
+                case HORIZONTAL: trace = gaps[i][++j].Iy; break;
+                case VERTICAL: trace = gaps[++i][j].Ix; break;
+                case DIAGONAL: trace = M[++i][++j].trace; break;
+            }
+            switch (m) {
+                case M_MATRIX:
+                    if (trace & Ix_MATRIX) {
+                        m = Ix_MATRIX;
+                        break;
+                    }
+                case Ix_MATRIX:
+                    if (trace & Iy_MATRIX) {
+                        m = Iy_MATRIX;
+                        break;
+                    }
+                case Iy_MATRIX:
+                default:
+                    switch (path) {
+                        case HORIZONTAL: m = Iy_MATRIX; break;
+                        case VERTICAL: m = Ix_MATRIX; break;
+                        case DIAGONAL: m = M_MATRIX; break;
+                    }
+                    continue;
+            }
+            switch (path) {
+                case HORIZONTAL: j--; break;
+                case VERTICAL: i--; break;
+                case DIAGONAL: i--; j--; break;
+            }
+            M[i][j].path = path;
+            break;
+        }
+    }
+
+    if (path == 0) {
+        /* Generate a new path. */
+        switch (m) {
+            case M_MATRIX:
+                if (M[nA][nB].trace) {
+                   /* m = M_MATRIX; */
+                   break;
+                }
+            case Ix_MATRIX:
+                if (gaps[nA][nB].Ix) {
+                   m = Ix_MATRIX;
+                   break;
+                }
+            case Iy_MATRIX:
+                if (gaps[nA][nB].Iy) {
+                   m = Iy_MATRIX;
+                   break;
+                }
+            default:
+                /* exhausted this generator */
+                M[0][0].path = DONE;
+                return NULL;
+        }
+    }
+
+    switch (m) {
+        case M_MATRIX:
+            trace = M[i][j].trace;
+            path = DIAGONAL;
+            i--; j--;
+            break;
+        case Ix_MATRIX:
+            trace = gaps[i][j].Ix;
+            path = VERTICAL;
+            i--;
+            break;
+        case Iy_MATRIX:
+            trace = gaps[i][j].Iy;
+            path = HORIZONTAL;
+            j--;
+            break;
+    }
+
+    while (1) {
+        if (trace & M_MATRIX) {
+            trace = M[i][j].trace;
+            M[i][j].path = path;
+            path = DIAGONAL;
+            i--; j--;
+        }
+        else if (trace & Ix_MATRIX) {
+            M[i][j].path = path;
+            trace = gaps[i][j].Ix;
+            path = VERTICAL;
+            i--;
+        }
+        else if (trace & Iy_MATRIX) {
+            M[i][j].path = path;
+            trace = gaps[i][j].Iy;
+            path = HORIZONTAL;
+            j--;
+        }
+        else break;
+    }
+    return PathGenerator_create_path(self, 0, 0);
+}
+
+static PyObject* PathGenerator_next_gotoh_local(PathGenerator* self)
+{
+    int trace = 0;
+    int i;
+    int j;
+    int m = M_MATRIX;
+    int iA = self->iA;
+    int iB = self->iB;
+    const int nA = self->nA;
+    const int nB = self->nB;
+    Trace** M = self->M;
+    TraceGapsGotoh** gaps = self->gaps.gotoh;
+    int path = M[0][0].path;
+
+    if (path == DONE) return NULL;
+
+    path = M[iA][iB].path;
+
+    if (path) {
+        i = iA;
+        j = iB;
+        while (1) {
+            /* We already have a path. Prune the path to see if there are
+             * any alternative paths. */
+            path = M[i][j].path;
+            if (path == 0) {
+                m = M_MATRIX;
+                iA = i;
+                iB = j;
+                break;
+            }
+            switch (path) {
+                case HORIZONTAL: trace = gaps[i][++j].Iy; break;
+                case VERTICAL: trace = gaps[++i][j].Ix; break;
+                case DIAGONAL: trace = M[++i][++j].trace; break;
+            }
+            switch (m) {
+                case M_MATRIX:
+                    if (trace & Ix_MATRIX) {
+                        m = Ix_MATRIX;
+                        break;
+                    }
+                case Ix_MATRIX:
+                    if (trace & Iy_MATRIX) {
+                        m = Iy_MATRIX;
+                        break;
+                    }
+                case Iy_MATRIX:
+                default:
+                    switch (path) {
+                        case HORIZONTAL: m = Iy_MATRIX; break;
+                        case VERTICAL: m = Ix_MATRIX; break;
+                        case DIAGONAL: m = M_MATRIX; break;
+                    }
+                    continue;
+            }
+            switch (path) {
+                case HORIZONTAL: j--; break;
+                case VERTICAL: i--; break;
+                case DIAGONAL: i--; j--; break;
+            }
+            M[i][j].path = path;
+            break;
+        }
+    }
+
+    if (path == 0) {
+        /* Find the end point for a new path. */
+        while (1) {
+            if (iB < nB) iB++;
+            else if (iA < nA) {
+                iA++;
+                iB = 0;
+            }
+            else {
+                /* we reached the end of the alignment without finding
+                 * an alternative path */
+                M[0][0].path = DONE;
+                return NULL;
+            }
+            if (M[iA][iB].trace & ENDPOINT) {
+                M[iA][iB].path = 0;
+                break;
+            }
+        }
+        m = M_MATRIX;
+        i = iA;
+        j = iB;
+    }
+
+    while (1) {
+        switch (m) {
+            case M_MATRIX: trace = M[i][j].trace; break;
+            case Ix_MATRIX: trace = gaps[i][j].Ix; break;
+            case Iy_MATRIX: trace = gaps[i][j].Iy; break;
+        }
+        if (trace == STARTPOINT) {
+            self->iA = i;
+            self->iB = j;
+            return PathGenerator_create_path(self, i, j);
+        }
+        switch (m) {
+            case M_MATRIX:
+                path = DIAGONAL;
+                i--;
+                j--;
+                break;
+            case Ix_MATRIX:
+                path = VERTICAL;
+                i--;
+                break;
+            case Iy_MATRIX:
+                path = HORIZONTAL;
+                j--;
+                break;
+        }
+        if (trace & M_MATRIX) m = M_MATRIX;
+        else if (trace & Ix_MATRIX) m = Ix_MATRIX;
+        else if (trace & Iy_MATRIX) m = Iy_MATRIX;
+        else {
+            PyErr_SetString(PyExc_RuntimeError,
+                "Unexpected trace in PathGenerator_next_gotoh_local");
+            return NULL;
+        }
+        M[i][j].path = path;
+    }
+    return NULL;
+}
+
+static PyObject*
+PathGenerator_next_waterman_smith_beyer_global(PathGenerator* self)
+{
+    int i = 0, j = 0;
+    int iA, iB;
+    int trace;
+    int* gapM;
+    int* gapXY;
+
+    int m = M_MATRIX;
+    const int nA = self->nA;
+    const int nB = self->nB;
+    Trace** M = self->M;
+    TraceGapsWatermanSmithBeyer** gaps = self->gaps.waterman_smith_beyer;
+
+    int gap;
+    int path = M[0][0].path;
+
+    if (path == DONE) return NULL;
+
+    if (path) {
+        /* We already have a path. Prune the path to see if there are
+         * any alternative paths. */
+        while (1) {
+            if (!path) {
+                m <<= 1;
+                break;
+            }
+            switch (path) {
+                case HORIZONTAL:
+                    iA = i;
+                    iB = j;
+                    while (M[i][iB].path == HORIZONTAL) iB++;
+                    break;
+                case VERTICAL:
+                    iA = i;
+                    while (M[iA][j].path == VERTICAL) iA++;
+                    iB = j;
+                    break;
+                case DIAGONAL:
+                    iA = i + 1;
+                    iB = j + 1;
+                    break;
+                default:
+                    PyErr_SetString(PyExc_RuntimeError,
+                        "Unexpected path in PathGenerator_next_waterman_smith_beyer_global");
+                    return NULL;
+            }
+            if (i == iA) { /* HORIZONTAL */
+                gapM = gaps[iA][iB].MIy;
+                gapXY = gaps[iA][iB].IxIy;
+                if (m == M_MATRIX) {
+                    gap = iB - j;
+                    while (*gapM != gap) gapM++;
+                    gapM++;
+                    gap = *gapM;
+                    if (gap) {
+                        j = iB - gap;
+                        while (j < iB) M[i][--iB].path = HORIZONTAL;
+                        break;
+                    }
+                } else if (m == Ix_MATRIX) {
+                    gap = iB - j;
+                    while (*gapXY != gap) gapXY++;
+                    gapXY++;
+                }
+                gap = *gapXY;
+                if (gap) {
+                    m = Ix_MATRIX;
+                    j = iB - gap;
+                    while (j < iB) M[i][--iB].path = HORIZONTAL;
+                    break;
+                }
+                /* no alternative found; continue pruning */
+                m = Iy_MATRIX;
+                j = iB;
+            }
+            else if (j == iB) { /* VERTICAL */
+                gapM = gaps[iA][iB].MIx;
+                gapXY = gaps[iA][iB].IyIx;
+                if (m == M_MATRIX) {
+                    gap = iA - i;
+                    while (*gapM != gap) gapM++;
+                    gapM++;
+                    gap = *gapM;
+                    if (gap) {
+                        i = iA - gap;
+                        while (i < iA) M[--iA][j].path = VERTICAL;
+                        break;
+                    }
+                } else if (m == Iy_MATRIX) {
+                    gap = iA - i;
+                    while (*gapXY != gap) gapXY++;
+                    gapXY++;
+                }
+                gap = *gapXY;
+                if (gap) {
+                    m = Iy_MATRIX;
+                    i = iA - gap;
+                    while (i < iA) M[--iA][j].path = VERTICAL;
+                    break;
+                }
+                /* no alternative found; continue pruning */
+                m = Ix_MATRIX;
+                i = iA;
+            }
+            else { /* DIAGONAL */
+                i = iA - 1;
+                j = iB - 1;
+                trace = M[iA][iB].trace;
+                switch (m) {
+                    case M_MATRIX:
+                        if (trace & Ix_MATRIX) {
+                            m = Ix_MATRIX;
+                            M[i][j].path = DIAGONAL;
+                            break;
+                        }
+                    case Ix_MATRIX:
+                        if (trace & Iy_MATRIX) {
+                            m = Iy_MATRIX;
+                            M[i][j].path = DIAGONAL;
+                            break;
+                        }
+                    case Iy_MATRIX:
+                    default:
+                        /* no alternative found; continue pruning */
+                        m = M_MATRIX;
+                        i = iA;
+                        j = iB;
+                        path = M[i][j].path;
+                        continue;
+                }
+                /* alternative found; build path until starting point */
+                break;
+            }
+            path = M[i][j].path;
+        }
+    }
+
+    if (!path) {
+        /* Find a suitable end point for a path. */
+        switch (m) {
+            case M_MATRIX:
+                if (M[nA][nB].trace) {
+                    /* m = M_MATRIX; */
+                    break;
+                }
+            case Ix_MATRIX:
+                if (gaps[nA][nB].MIx[0] || gaps[nA][nB].IyIx[0]) {
+                    m = Ix_MATRIX;
+                    break;
+                }
+            case Iy_MATRIX:
+                if (gaps[nA][nB].MIy[0] || gaps[nA][nB].IxIy[0]) {
+                    m = Iy_MATRIX;
+                    break;
+                }
+            default:
+                M[0][0].path = DONE;
+                return NULL;
+        }
+        i = nA;
+        j = nB;
+    }
+
+    /* Follow the traceback until we reach the origin. */
+    while (1) {
+        switch (m) {
+            case M_MATRIX:
+                trace = M[i][j].trace;
+                if (trace & M_MATRIX) m = M_MATRIX;
+                else if (trace & Ix_MATRIX) m = Ix_MATRIX;
+                else if (trace & Iy_MATRIX) m = Iy_MATRIX;
+                else return PathGenerator_create_path(self, i, j);
+                i--;
+                j--;
+                M[i][j].path = DIAGONAL;
+                break;
+            case Ix_MATRIX:
+                gap = gaps[i][j].MIx[0];
+                if (gap) m = M_MATRIX;
+                else {
+                    gap = gaps[i][j].IyIx[0];
+                    m = Iy_MATRIX;
+                }
+                iA = i - gap;
+                while (iA < i) M[--i][j].path = VERTICAL;
+                M[i][j].path = VERTICAL;
+                break;
+            case Iy_MATRIX:
+                gap = gaps[i][j].MIy[0];
+                if (gap) m = M_MATRIX;
+                else {
+                    gap = gaps[i][j].IxIy[0];
+                    m = Ix_MATRIX;
+                }
+                iB = j - gap;
+                while (iB < j) M[i][--j].path = HORIZONTAL;
+                M[i][j].path = HORIZONTAL;
+                break;
+        }
+    }
+}
+
+static PyObject*
+PathGenerator_next_waterman_smith_beyer_local(PathGenerator* self)
+{
+    int i, j, m;
+    int trace = 0;
+    int* gapM;
+    int* gapXY;
+
+    int iA = self->iA;
+    int iB = self->iB;
+    const int nA = self->nA;
+    const int nB = self->nB;
+    Trace** M = self->M;
+    TraceGapsWatermanSmithBeyer** gaps = self->gaps.waterman_smith_beyer;
+
+    int gap;
+    int path = M[0][0].path;
+
+    if (path == DONE) return NULL;
+    m = 0;
+    path = M[iA][iB].path;
+    if (path) {
+        /* We already have a path. Prune the path to see if there are
+         * any alternative paths. */
+        m = M_MATRIX;
+        i = iA;
+        j = iB;
+        while (1) {
+            path = M[i][j].path;
+            switch (path) {
+                case HORIZONTAL:
+                    iA = i;
+                    iB = j;
+                    while (M[i][iB].path == HORIZONTAL) iB++;
+                    break;
+                case VERTICAL:
+                    iA = i;
+                    iB = j;
+                    while (M[iA][j].path == VERTICAL) iA++;
+                    break;
+                case DIAGONAL:
+                    iA = i + 1;
+                    iB = j + 1;
+                    break;
+                default:
+                    iA = -1;
+                    break;
+            }
+            if (iA < 0) {
+                m = 0;
+                iA = i;
+                iB = j;
+                break;
+            }
+            if (i == iA) { /* HORIZONTAL */
+                gapM = gaps[iA][iB].MIy;
+                gapXY = gaps[iA][iB].IxIy;
+                if (m == M_MATRIX) {
+                    gap = iB - j;
+                    while (*gapM != gap) gapM++;
+                    gapM++;
+                    gap = *gapM;
+                    if (gap) {
+                        j = iB - gap;
+                        while (j < iB) M[i][--iB].path = HORIZONTAL;
+                        break;
+                    }
+                } else if (m == Ix_MATRIX) {
+                    gap = iB - j;
+                    while (*gapXY != gap) gapXY++;
+                    gapXY++;
+                }
+                gap = *gapXY;
+                if (gap) {
+                    m = Ix_MATRIX;
+                    j = iB - gap;
+                    M[i][j].path = HORIZONTAL;
+                    while (iB > j) M[i][--iB].path = HORIZONTAL;
+                    break;
+                }
+                /* no alternative found; continue pruning */
+                m = Iy_MATRIX;
+                j = iB;
+            }
+            else if (j == iB) { /* VERTICAL */
+                gapM = gaps[iA][iB].MIx;
+                gapXY = gaps[iA][iB].IyIx;
+                if (m == M_MATRIX) {
+                    gap = iA - i;
+                    while (*gapM != gap) gapM++;
+                    gapM++;
+                    gap = *gapM;
+                    if (gap) {
+                        i = iA - gap;
+                        while (i < iA) M[--iA][j].path = VERTICAL;
+                        break;
+                    }
+                } else if (m == Iy_MATRIX) {
+                    gap = iA - i;
+                    while (*gapXY != gap) gapXY++;
+                    gapXY++;
+                }
+                gap = *gapXY;
+                if (gap) {
+                    m = Iy_MATRIX;
+                    i = iA - gap;
+                    M[i][j].path = VERTICAL;
+                    while (iA > i) M[--iA][j].path = VERTICAL;
+                    break;
+                }
+                /* no alternative found; continue pruning */
+                m = Ix_MATRIX;
+                i = iA;
+            }
+            else { /* DIAGONAL */
+                i = iA - 1;
+                j = iB - 1;
+                trace = M[iA][iB].trace;
+                switch (m) {
+                    case M_MATRIX:
+                        if (trace & Ix_MATRIX) {
+                            m = Ix_MATRIX;
+                            M[i][j].path = DIAGONAL;
+                            break;
+                        }
+                    case Ix_MATRIX:
+                        if (trace & Iy_MATRIX) {
+                            m = Iy_MATRIX;
+                            M[i][j].path = DIAGONAL;
+                            break;
+                        }
+                    case Iy_MATRIX:
+                    default:
+                        /* no alternative found; continue pruning */
+                        m = M_MATRIX;
+                        i = iA;
+                        j = iB;
+                        continue;
+                }
+                /* alternative found; build path until starting point */
+                break;
+            }
+        }
+    }
+ 
+    if (m == 0) {
+        /* We are at [nA][nB]. Find a suitable end point for a path. */
+        while (1) {
+            if (iB < nB) iB++;
+            else if (iA < nA) {
+                iA++;
+                iB = 0;
+            }
+            else {
+                /* exhausted this generator */
+                M[0][0].path = DONE;
+                return NULL;
+            }
+            if (M[iA][iB].trace & ENDPOINT) break;
+        }
+        M[iA][iB].path = 0;
+        m = M_MATRIX;
+        i = iA;
+        j = iB;
+    }
+
+    /* Follow the traceback until we reach the origin. */
+    while (1) {
+        switch (m) {
+            case Ix_MATRIX:
+                gapM = gaps[i][j].MIx;
+                gapXY = gaps[i][j].IyIx;
+                iB = j;
+                gap = *gapM;
+                if (gap) m = M_MATRIX;
+                else {
+                    gap = *gapXY;
+                    m = Iy_MATRIX;
+                }
+                iA = i - gap;
+                while (i > iA) M[--i][iB].path = VERTICAL;
+                break;
+            case Iy_MATRIX:
+                gapM = gaps[i][j].MIy;
+                gapXY = gaps[i][j].IxIy;
+                iA = i;
+                gap = *gapM;
+                if (gap) m = M_MATRIX;
+                else {
+                    gap = *gapXY;
+                    m = Ix_MATRIX;
+                }
+                iB = j - gap;
+                while (j > iB) M[iA][--j].path = HORIZONTAL;
+                break;
+            case M_MATRIX:
+                iA = i-1;
+                iB = j-1;
+                trace = M[i][j].trace;
+                if (trace & M_MATRIX) m = M_MATRIX;
+                else if (trace & Ix_MATRIX) m = Ix_MATRIX;
+                else if (trace & Iy_MATRIX) m = Iy_MATRIX;
+                else if (trace == STARTPOINT) {
+                    self->iA = i;
+                    self->iB = j;
+                    return PathGenerator_create_path(self, i, j);
+                }
+                else {
+                    PyErr_SetString(PyExc_RuntimeError,
+                        "Unexpected trace in PathGenerator_next_waterman_smith_beyer_local");
+                    return NULL;
+                }
+                M[iA][iB].path = DIAGONAL;
+                break;
+        }
+        i = iA;
+        j = iB;
+    }
+}
+
+static PyObject *
+PathGenerator_next(PathGenerator* self)
+{
+    const Mode mode = self->mode;
+    const Algorithm algorithm = self->algorithm;
+    switch (algorithm) {
+        case NeedlemanWunschSmithWaterman:
+            switch (mode) {
+                case Global:
+                    return PathGenerator_next_needlemanwunsch(self);
+                case Local:
+                    return PathGenerator_next_smithwaterman(self);
+            }
+        case Gotoh:
+            switch (mode) {
+                case Global:
+                    return PathGenerator_next_gotoh_global(self);
+                case Local:
+                    return PathGenerator_next_gotoh_local(self);
+            }
+        case WatermanSmithBeyer:
+            switch (mode) {
+                case Global:
+                    return PathGenerator_next_waterman_smith_beyer_global(self);
+                case Local:
+                    return PathGenerator_next_waterman_smith_beyer_local(self);
+            }
+        case Unknown:
+        default:
+            PyErr_SetString(PyExc_RuntimeError, "Unknown algorithm");
+            return NULL;
+    }
+}
+
+static const char PathGenerator_reset__doc__[] = "reset the iterator";
+
+static PyObject*
+PathGenerator_reset(PathGenerator* self)
+{
+    switch (self->mode) {
+        case Local:
+            self->iA = 0;
+            self->iB = 0;
+        case Global: {
+            Trace** M = self->M;
+            switch (self->algorithm) {
+                case NeedlemanWunschSmithWaterman:
+                case Gotoh: {
+                    if (M[0][0].path != NONE) M[0][0].path = 0;
+                    break;
+                }
+                case WatermanSmithBeyer: {
+                    M[0][0].path = 0;
+                    break;
+                }
+                case Unknown:
+                default:
+                    break;
+            }
+        }
+    }
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+static PyMethodDef PathGenerator_methods[] = {
+    {"reset",
+     (PyCFunction)PathGenerator_reset,
+     METH_NOARGS,
+     PathGenerator_reset__doc__
+    },
+    {NULL}  /* Sentinel */
+};
+
+static PySequenceMethods PathGenerator_as_sequence = {
+    (lenfunc)PathGenerator_length,  /* sq_length */
+    NULL,                           /* sq_concat */
+    NULL,                           /* sq_repeat */
+    NULL,                           /* sq_item */
+    NULL,                           /* sq_ass_item */
+    NULL,                           /* sq_contains */
+    NULL,                           /* sq_inplace_concat */
+    NULL,                           /* sq_inplace_repeat */
+};
+
+static PyTypeObject PathGenerator_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "Path generator",               /* tp_name */
+    sizeof(PathGenerator),          /* tp_basicsize */
+    0,                              /* tp_itemsize */
+    (destructor)PathGenerator_dealloc,  /* tp_dealloc */
+    0,                              /* tp_print */
+    0,                              /* tp_getattr */
+    0,                              /* tp_setattr */
+    0,                              /* tp_reserved */
+    0,                              /* tp_repr */
+    0,                              /* tp_as_number */
+    &PathGenerator_as_sequence,     /* tp_as_sequence */
+    0,                              /* tp_as_mapping */
+    0,                              /* tp_hash */
+    0,                              /* tp_call */
+    0,                              /* tp_str */
+    0,                              /* tp_getattro */
+    0,                              /* tp_setattro */
+    0,                              /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,             /* tp_flags */
+    0,                              /* tp_doc */
+    0,                              /* tp_traverse */
+    0,                              /* tp_clear */
+    0,                              /* tp_richcompare */
+    0,                              /* tp_weaklistoffset */
+    PyObject_SelfIter,              /* tp_iter */
+    (iternextfunc)PathGenerator_next,      /* tp_iternext */
+    PathGenerator_methods,          /* tp_methods */
+};
+
+typedef struct {
+    PyObject_HEAD
+    Mode mode;
+    Algorithm algorithm;
+    double match;
+    double mismatch;
+    double epsilon;
+    double target_internal_open_gap_score;
+    double target_internal_extend_gap_score;
+    double target_left_open_gap_score;
+    double target_left_extend_gap_score;
+    double target_right_open_gap_score;
+    double target_right_extend_gap_score;
+    double query_internal_open_gap_score;
+    double query_internal_extend_gap_score;
+    double query_left_open_gap_score;
+    double query_left_extend_gap_score;
+    double query_right_open_gap_score;
+    double query_right_extend_gap_score;
+    PyObject* target_gap_function;
+    PyObject* query_gap_function;
+    Py_buffer substitution_matrix;
+    PyObject* alphabet;
+    int* mapping;
+    int wildcard;
+} Aligner;
+
+
+static Py_ssize_t
+set_alphabet(Aligner* self, PyObject* alphabet)
+{
+    Py_ssize_t size;
+    if (alphabet == Py_None) {
+        if (self->alphabet) {
+            Py_DECREF(self->alphabet);
+            self->alphabet = NULL;
+        }
+        if (self->mapping) {
+            PyMem_Free(self->mapping);
+            self->mapping = NULL;
+        }
+        return 0;
+    }
+    else if (PyUnicode_Check(alphabet)) {
+        int* mapping;
+        int i;
+        int n;
+        int kind;
+        void* characters;
+        if (PyUnicode_READY(alphabet) == -1) return -1;
+        size = PyUnicode_GET_LENGTH(alphabet);
+        if (size == 0) {
+            PyErr_SetString(PyExc_ValueError, "alphabet has zero length");
+            return -1;
+        }
+        kind = PyUnicode_KIND(alphabet);
+        switch (kind) {
+            case PyUnicode_1BYTE_KIND: {
+                n = 1 << 8 * sizeof(Py_UCS1);
+                break;
+            }
+            case PyUnicode_2BYTE_KIND: {
+                n = 1 << 8 * sizeof(Py_UCS2);
+                break;
+            }
+            case PyUnicode_4BYTE_KIND: {
+                n = 0x110000;  /* Maximum code point in Unicode 6.0
+                                * is 0x10ffff = 1114111 */
+                break;
+            }
+            case PyUnicode_WCHAR_KIND:
+            default:
+                PyErr_SetString(PyExc_ValueError, "could not interpret alphabet");
+                return -1;
+        }
+        characters = PyUnicode_DATA(alphabet);
+        mapping = PyMem_Malloc(n*sizeof(int));
+        if (!mapping) return -1;
+        for (i = 0; i < n; i++) mapping[i] = MISSING_LETTER;
+        for (i = 0; i < size; i++) {
+            Py_UCS4 character = PyUnicode_READ(kind, characters, i);
+            if (mapping[character] != MISSING_LETTER) {
+                PyObject* c = PyUnicode_FromKindAndData(kind, &character, 1);
+                PyErr_Format(PyExc_ValueError,
+                             "alphabet contains '%S' more than once", c);
+                Py_XDECREF(c);
+                PyMem_Free(mapping);
+                return -1;
+            }
+            mapping[character] = i;
+        }
+        Py_INCREF(alphabet);
+        if (self->mapping) PyMem_Free(self->mapping);
+        self->mapping = mapping;
+    }
+    else {
+        /* alphabet is not a string; cannot use mapping */
+        PyObject* sequence = PySequence_Fast(alphabet,
+            "alphabet should support the sequence protocol (e.g.,\n"
+            "strings, lists, and tuples can be valid alphabets).");
+        if (!sequence) return -1;
+        size = PySequence_Fast_GET_SIZE(sequence);
+        Py_DECREF(sequence);
+        if (self->mapping) {
+            PyMem_Free(self->mapping);
+            self->mapping = NULL;
+        }
+        Py_INCREF(alphabet);
+    }
+    Py_XDECREF(self->alphabet);
+    self->alphabet = alphabet;
+    return size;
+}
+
+static int
+Aligner_init(Aligner *self, PyObject *args, PyObject *kwds)
+{
+    self->mode = Global;
+    self->match = 1.0;
+    self->mismatch = 0.0;
+    self->epsilon = 1.e-6;
+    self->target_internal_open_gap_score = 0;
+    self->target_internal_extend_gap_score = 0;
+    self->query_internal_open_gap_score = 0;
+    self->query_internal_extend_gap_score = 0;
+    self->target_left_open_gap_score = 0;
+    self->target_left_extend_gap_score = 0;
+    self->target_right_open_gap_score = 0;
+    self->target_right_extend_gap_score = 0;
+    self->query_left_open_gap_score = 0;
+    self->query_left_extend_gap_score = 0;
+    self->query_right_open_gap_score = 0;
+    self->query_right_extend_gap_score = 0;
+    self->target_gap_function = NULL;
+    self->query_gap_function = NULL;
+    self->substitution_matrix.obj = NULL;
+    self->substitution_matrix.buf = NULL;
+    self->algorithm = Unknown;
+    self->alphabet = NULL;
+    self->mapping = NULL;
+    self->wildcard = -1;
+    return 0;
+}
+
+static void
+Aligner_dealloc(Aligner* self)
+{   Py_XDECREF(self->target_gap_function);
+    Py_XDECREF(self->query_gap_function);
+    if (self->substitution_matrix.obj) PyBuffer_Release(&self->substitution_matrix);
+    Py_XDECREF(self->alphabet);
+    Py_XDECREF(self->mapping);
+    Py_TYPE(self)->tp_free((PyObject*)self);
+}
+
+static PyObject*
+Aligner_repr(Aligner* self)
+{
+  const char text[] = "Pairwise aligner, implementing the Needleman-Wunsch, Smith-Waterman, Gotoh, and Waterman-Smith-Beyer global and local alignment algorithms";
+  return PyUnicode_FromString(text);
+}
+
+static PyObject*
+Aligner_str(Aligner* self)
+{
+    char text[1024];
+    char* p = text;
+    PyObject* substitution_matrix = self->substitution_matrix.obj;
+    void* args[3];
+    int n = 0;
+    PyObject* wildcard = NULL;
+    PyObject* s;
+
+    p += sprintf(p, "Pairwise sequence aligner with parameters\n");
+    if (substitution_matrix) {
+        p += sprintf(p, "  substitution_matrix: <%s object at %p>\n",
+                     Py_TYPE(substitution_matrix)->tp_name,
+                     substitution_matrix);
+    } else {
+        if (self->wildcard == -1) {
+            p += sprintf(p, "  wildcard: None\n");
+        }
+        else {
+            wildcard = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
+                                                 &self->wildcard, 1);
+            if (!wildcard) return NULL;
+            p += sprintf(p, "  wildcard: '%%U'\n");
+            args[n++] = wildcard;
+        }
+        p += sprintf(p, "  match_score: %f\n", self->match);
+        p += sprintf(p, "  mismatch_score: %f\n", self->mismatch);
+    }
+    if (self->target_gap_function) {
+        p += sprintf(p, "  target_gap_function: %%R\n");
+        args[n++] = self->target_gap_function;
+    }
+    else {
+        p += sprintf(p, "  target_internal_open_gap_score: %f\n",
+                     self->target_internal_open_gap_score);
+        p += sprintf(p, "  target_internal_extend_gap_score: %f\n",
+                     self->target_internal_extend_gap_score);
+        p += sprintf(p, "  target_left_open_gap_score: %f\n",
+                     self->target_left_open_gap_score);
+        p += sprintf(p, "  target_left_extend_gap_score: %f\n",
+                     self->target_left_extend_gap_score);
+        p += sprintf(p, "  target_right_open_gap_score: %f\n",
+                     self->target_right_open_gap_score);
+        p += sprintf(p, "  target_right_extend_gap_score: %f\n",
+                     self->target_right_extend_gap_score);
+    }
+    if (self->query_gap_function) {
+        p += sprintf(p, "  query_gap_function: %%R\n");
+        args[n++] = self->query_gap_function;
+    }
+    else {
+        p += sprintf(p, "  query_internal_open_gap_score: %f\n",
+                     self->query_internal_open_gap_score);
+        p += sprintf(p, "  query_internal_extend_gap_score: %f\n",
+                     self->query_internal_extend_gap_score);
+        p += sprintf(p, "  query_left_open_gap_score: %f\n",
+                     self->query_left_open_gap_score);
+        p += sprintf(p, "  query_left_extend_gap_score: %f\n",
+                     self->query_left_extend_gap_score);
+        p += sprintf(p, "  query_right_open_gap_score: %f\n",
+                     self->query_right_open_gap_score);
+        p += sprintf(p, "  query_right_extend_gap_score: %f\n",
+                     self->query_right_extend_gap_score);
+    }
+    switch (self->mode) {
+        case Global: sprintf(p, "  mode: global\n"); break;
+        case Local: sprintf(p, "  mode: local\n"); break;
+    }
+    s = PyUnicode_FromFormat(text, args[0], args[1], args[2]);
+    Py_XDECREF(wildcard);
+    return s;
+}
+
+static char Aligner_mode__doc__[] = "alignment mode ('global' or 'local')";
+
+static PyObject*
+Aligner_get_mode(Aligner* self, void* closure)
+{   const char* message = NULL;
+    switch (self->mode) {
+        case Global: message = "global"; break;
+        case Local: message = "local"; break;
+    }
+    return PyUnicode_FromString(message);
+}
+
+static int
+Aligner_set_mode(Aligner* self, PyObject* value, void* closure)
+{
+    if (PyUnicode_Check(value)) {
+        if (PyUnicode_CompareWithASCIIString(value, "global") == 0) {
+            self->mode = Global;
+            return 0;
+        }
+        if (PyUnicode_CompareWithASCIIString(value, "local") == 0) {
+            self->mode = Local;
+            return 0;
+        }
+    }
+    PyErr_SetString(PyExc_ValueError,
+                    "invalid mode (expected 'global' or 'local'");
+    return -1;
+}
+
+static char Aligner_match_score__doc__[] = "match score";
+
+static PyObject*
+Aligner_get_match_score(Aligner* self, void* closure)
+{   if (self->substitution_matrix.obj) {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+    return PyFloat_FromDouble(self->match);
+}
+
+static int
+Aligner_set_match_score(Aligner* self, PyObject* value, void* closure)
+{
+    const double match = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) {
+        PyErr_SetString(PyExc_ValueError, "invalid match score");
+        return -1;
+    }
+    if (self->substitution_matrix.obj) {
+        if (set_alphabet(self, Py_None) < 0) return -1;
+        PyBuffer_Release(&self->substitution_matrix);
+    }
+    self->match = match;
+    return 0;
+}
+
+static char Aligner_mismatch_score__doc__[] = "mismatch score";
+
+static PyObject*
+Aligner_get_mismatch_score(Aligner* self, void* closure)
+{   if (self->substitution_matrix.obj) {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+    return PyFloat_FromDouble(self->mismatch);
+}
+
+static int
+Aligner_set_mismatch_score(Aligner* self, PyObject* value, void* closure)
+{
+    const double mismatch = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) {
+        PyErr_SetString(PyExc_ValueError, "invalid mismatch score");
+        return -1;
+    }
+    if (self->substitution_matrix.obj) {
+        if (set_alphabet(self, Py_None) < 0) return -1;
+        PyBuffer_Release(&self->substitution_matrix);
+    }
+    self->mismatch = mismatch;
+    return 0;
+}
+
+static char Aligner_substitution_matrix__doc__[] = "substitution_matrix";
+
+static PyObject*
+Aligner_get_substitution_matrix(Aligner* self, void* closure)
+{   PyObject* object = self->substitution_matrix.obj;
+    if (!object) object = Py_None;
+    Py_INCREF(object);
+    return object;
+}
+
+static int
+Aligner_set_substitution_matrix(Aligner* self, PyObject* values, void* closure)
+{
+    PyObject* alphabet;
+    Py_ssize_t size = -1;
+    Py_buffer view;
+    const int flag = PyBUF_FORMAT | PyBUF_ND;
+    if (values == Py_None) {
+        if (self->substitution_matrix.obj)
+            PyBuffer_Release(&self->substitution_matrix);
+        return 0;
+    }
+    if (PyObject_GetBuffer(values, &view, flag) != 0) {
+        PyErr_SetString(PyExc_ValueError, "expected a matrix");
+        return -1;
+    }
+    if (view.ndim != 2) {
+        PyErr_Format(PyExc_ValueError,
+         "substitution matrix has incorrect rank (%d expected 2)",
+          view.ndim);
+        PyBuffer_Release(&view);
+        return -1;
+    }
+    if (view.len == 0) {
+        PyErr_SetString(PyExc_ValueError, "substitution matrix has zero size");
+        PyBuffer_Release(&view);
+        return -1;
+    }
+    if (strcmp(view.format, "d") != 0) {
+        PyErr_SetString(PyExc_ValueError,
+                "substitution matrix should contain float values");
+        PyBuffer_Release(&view);
+        return -1;
+    }
+    if (view.itemsize != sizeof(double)) {
+        PyErr_Format(PyExc_RuntimeError,
+                    "substitution matrix has unexpected item byte size "
+                    "(%zd, expected %zd)", view.itemsize, sizeof(double));
+        PyBuffer_Release(&view);
+        return -1;
+    }
+    if (view.shape[0] != view.shape[1]) {
+        PyErr_Format(PyExc_ValueError,
+                    "substitution matrix should be square "
+                    "(found a %zd x %zd matrix)",
+                    view.shape[0], view.shape[1]);
+        PyBuffer_Release(&view);
+        return -1;
+    }
+    alphabet = PyObject_GetAttrString(values, "alphabet");
+    if (alphabet) {
+        size = set_alphabet(self, alphabet);
+        Py_DECREF(alphabet);
+    } else {
+        /* Set a substitution matrix without setting an alphabet; useful
+         * when aligning integers. */
+        PyErr_Clear();
+        size = set_alphabet(self, Py_None);
+    }
+    if (size < 0) {
+        PyBuffer_Release(&view);
+        return -1;
+    }
+    if (self->substitution_matrix.obj) PyBuffer_Release(&self->substitution_matrix);
+    self->substitution_matrix = view;
+    return 0;
+}
+
+static char Aligner_alphabet__doc__[] = "alphabet";
+
+static PyObject*
+Aligner_get_alphabet(Aligner* self, void* closure)
+{   PyObject* object = self->alphabet;
+    if (!object) object = Py_None;
+    Py_INCREF(object);
+    return object;
+}
+
+static int
+Aligner_set_alphabet(Aligner* self, PyObject* alphabet, void* closure)
+{
+    if (self->substitution_matrix.obj) {
+        PyErr_SetString(PyExc_AttributeError,
+            "can't set alphabet if a substitution matrix is used");
+        return -1;
+    }
+    if (set_alphabet(self, alphabet) < 0) return -1;
+    return 0;
+}
+
+static char Aligner_gap_score__doc__[] = "gap score";
+
+static PyObject*
+Aligner_get_gap_score(Aligner* self, void* closure)
+{   
+    if (self->target_gap_function || self->query_gap_function) {
+        if (self->target_gap_function != self->query_gap_function) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        Py_INCREF(self->target_gap_function);
+        return self->target_gap_function;
+    }
+    else {
+        const double score = self->target_internal_open_gap_score;
+        if (score != self->target_internal_extend_gap_score
+         || score != self->target_left_open_gap_score
+         || score != self->target_left_extend_gap_score
+         || score != self->target_right_open_gap_score
+         || score != self->target_right_extend_gap_score
+         || score != self->query_internal_open_gap_score
+         || score != self->query_internal_extend_gap_score
+         || score != self->query_left_open_gap_score
+         || score != self->query_left_extend_gap_score
+         || score != self->query_right_open_gap_score
+         || score != self->query_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_gap_score(Aligner* self, PyObject* value, void* closure)
+{   if (PyCallable_Check(value)) {
+        Py_XDECREF(self->target_gap_function);
+        Py_XDECREF(self->query_gap_function);
+        Py_INCREF(value);
+        Py_INCREF(value);
+        self->target_gap_function = value;
+        self->query_gap_function = value;
+    }
+    else {
+        const double score = PyFloat_AsDouble(value);
+        if (PyErr_Occurred()) return -1;
+        if (self->target_gap_function) {
+            Py_DECREF(self->target_gap_function);
+            self->target_gap_function = NULL;
+        }
+        if (self->query_gap_function) {
+            Py_DECREF(self->query_gap_function);
+            self->query_gap_function = NULL;
+        }
+        self->target_internal_open_gap_score = score;
+        self->target_internal_extend_gap_score = score;
+        self->target_left_open_gap_score = score;
+        self->target_left_extend_gap_score = score;
+        self->target_right_open_gap_score = score;
+        self->target_right_extend_gap_score = score;
+        self->query_internal_open_gap_score = score;
+        self->query_internal_extend_gap_score = score;
+        self->query_left_open_gap_score = score;
+        self->query_left_extend_gap_score = score;
+        self->query_right_open_gap_score = score;
+        self->query_right_extend_gap_score = score;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_open_gap_score__doc__[] = "internal and end open gap score";
+
+static PyObject*
+Aligner_get_open_gap_score(Aligner* self, void* closure)
+{   
+    if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_internal_open_gap_score;
+        if (score != self->target_left_open_gap_score
+         || score != self->target_right_open_gap_score
+         || score != self->query_internal_open_gap_score
+         || score != self->query_left_open_gap_score
+         || score != self->query_right_open_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_open_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_internal_open_gap_score = score;
+    self->target_left_open_gap_score = score;
+    self->target_right_open_gap_score = score;
+    self->query_internal_open_gap_score = score;
+    self->query_left_open_gap_score = score;
+    self->query_right_open_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_extend_gap_score__doc__[] = "extend gap score";
+
+static PyObject*
+Aligner_get_extend_gap_score(Aligner* self, void* closure)
+{   
+    if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_internal_extend_gap_score;
+        if (score != self->target_left_extend_gap_score
+         || score != self->target_right_extend_gap_score
+         || score != self->query_internal_extend_gap_score
+         || score != self->query_left_extend_gap_score
+         || score != self->query_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_extend_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_internal_extend_gap_score = score;
+    self->target_left_extend_gap_score = score;
+    self->target_right_extend_gap_score = score;
+    self->query_internal_extend_gap_score = score;
+    self->query_left_extend_gap_score = score;
+    self->query_right_extend_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_internal_gap_score__doc__[] = "internal gap score";
+
+static PyObject*
+Aligner_get_internal_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_internal_open_gap_score;
+        if (score != self->target_internal_extend_gap_score
+         || score != self->query_internal_open_gap_score
+         || score != self->query_internal_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_internal_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_internal_open_gap_score = score;
+    self->target_internal_extend_gap_score = score;
+    self->query_internal_open_gap_score = score;
+    self->query_internal_extend_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_internal_open_gap_score__doc__[] = "internal open gap score";
+
+static PyObject*
+Aligner_get_internal_open_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_internal_open_gap_score;
+        if (score != self->query_internal_open_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_internal_open_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_internal_open_gap_score = score;
+    self->query_internal_open_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_internal_extend_gap_score__doc__[] = "internal extend gap score";
+
+static PyObject*
+Aligner_get_internal_extend_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_internal_extend_gap_score;
+        if (score != self->query_internal_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_internal_extend_gap_score(Aligner* self, PyObject* value,
+                                      void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_internal_extend_gap_score = score;
+    self->query_internal_extend_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_end_gap_score__doc__[] = "end gap score";
+
+static PyObject*
+Aligner_get_end_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_left_open_gap_score;
+        if (score != self->target_left_extend_gap_score
+         || score != self->target_right_open_gap_score
+         || score != self->target_right_extend_gap_score
+         || score != self->query_left_open_gap_score
+         || score != self->query_left_extend_gap_score
+         || score != self->query_right_open_gap_score
+         || score != self->query_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_end_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_left_open_gap_score = score;
+    self->target_left_extend_gap_score = score;
+    self->target_right_open_gap_score = score;
+    self->target_right_extend_gap_score = score;
+    self->query_left_open_gap_score = score;
+    self->query_left_extend_gap_score = score;
+    self->query_right_open_gap_score = score;
+    self->query_right_extend_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_end_open_gap_score__doc__[] = "end open gap score";
+
+static PyObject*
+Aligner_get_end_open_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_left_open_gap_score;
+        if (score != self->target_right_open_gap_score
+         || score != self->query_left_open_gap_score
+         || score != self->query_right_open_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_end_open_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_left_open_gap_score = score;
+    self->target_right_open_gap_score = score;
+    self->query_left_open_gap_score = score;
+    self->query_right_open_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_end_extend_gap_score__doc__[] = "end extend gap score";
+
+static PyObject*
+Aligner_get_end_extend_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_left_extend_gap_score;
+        if (score != self->target_right_extend_gap_score
+         || score != self->query_left_extend_gap_score
+         || score != self->query_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_end_extend_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_left_extend_gap_score = score;
+    self->target_right_extend_gap_score = score;
+    self->query_left_extend_gap_score = score;
+    self->query_right_extend_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_left_gap_score__doc__[] = "left gap score";
+
+static PyObject*
+Aligner_get_left_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_left_open_gap_score;
+        if (score != self->target_left_extend_gap_score
+         || score != self->query_left_open_gap_score
+         || score != self->query_left_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_left_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_left_open_gap_score = score;
+    self->target_left_extend_gap_score = score;
+    self->query_left_open_gap_score = score;
+    self->query_left_extend_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_right_gap_score__doc__[] = "right gap score";
+
+static PyObject*
+Aligner_get_right_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_right_open_gap_score;
+        if (score != self->target_right_extend_gap_score
+         || score != self->query_right_open_gap_score
+         || score != self->query_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_right_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_right_open_gap_score = score;
+    self->target_right_extend_gap_score = score;
+    self->query_right_open_gap_score = score;
+    self->query_right_extend_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_left_open_gap_score__doc__[] = "left open gap score";
+
+static PyObject*
+Aligner_get_left_open_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_left_open_gap_score;
+        if (score != self->query_left_open_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_left_open_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_left_open_gap_score = score;
+    self->query_left_open_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_left_extend_gap_score__doc__[] = "left extend gap score";
+
+static PyObject*
+Aligner_get_left_extend_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_left_extend_gap_score;
+        if (score != self->query_left_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_left_extend_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_left_extend_gap_score = score;
+    self->query_left_extend_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_right_open_gap_score__doc__[] = "right open gap score";
+
+static PyObject*
+Aligner_get_right_open_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_right_open_gap_score;
+        if (score != self->query_right_open_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_right_open_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_right_open_gap_score = score;
+    self->query_right_open_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_right_extend_gap_score__doc__[] = "right extend gap score";
+
+static PyObject*
+Aligner_get_right_extend_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function || self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_right_extend_gap_score;
+        if (score != self->query_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_right_extend_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->target_right_extend_gap_score = score;
+    self->query_right_extend_gap_score = score;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_open_gap_score__doc__[] = "target open gap score";
+
+static PyObject*
+Aligner_get_target_open_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_internal_open_gap_score;
+        if (score != self->target_left_open_gap_score
+         || score != self->target_right_open_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_target_open_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_internal_open_gap_score = score;
+    self->target_left_open_gap_score = score;
+    self->target_right_open_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_extend_gap_score__doc__[] = "target extend gap score";
+
+static PyObject*
+Aligner_get_target_extend_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_internal_extend_gap_score;
+        if (score != self->target_left_extend_gap_score
+         || score != self->target_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_target_extend_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_internal_extend_gap_score = score;
+    self->target_left_extend_gap_score = score;
+    self->target_right_extend_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_gap_score__doc__[] = "target gap score";
+
+static PyObject*
+Aligner_get_target_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        Py_INCREF(self->target_gap_function);
+        return self->target_gap_function;
+    }
+    else {
+        const double score = self->target_internal_open_gap_score;
+        if (score != self->target_internal_extend_gap_score
+         || score != self->target_left_open_gap_score
+         || score != self->target_left_extend_gap_score
+         || score != self->target_right_open_gap_score
+         || score != self->target_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_target_gap_score(Aligner* self, PyObject* value, void* closure)
+{
+    if (PyCallable_Check(value)) {
+        Py_XDECREF(self->target_gap_function);
+        Py_INCREF(value);
+        self->target_gap_function = value;
+    }
+    else {
+        const double score = PyFloat_AsDouble(value);
+        if (PyErr_Occurred()) {
+            PyErr_SetString(PyExc_ValueError,
+                            "gap score should be numerical or callable");
+            return -1;
+        }
+        self->target_internal_open_gap_score = score;
+        self->target_internal_extend_gap_score = score;
+        self->target_left_open_gap_score = score;
+        self->target_left_extend_gap_score = score;
+        self->target_right_open_gap_score = score;
+        self->target_right_extend_gap_score = score;
+        if (self->target_gap_function) {
+            Py_DECREF(self->target_gap_function);
+            self->target_gap_function = NULL;
+        }
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_open_gap_score__doc__[] = "query gap open score";
+
+static PyObject*
+Aligner_get_query_open_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->query_internal_open_gap_score;
+        if (score != self->query_left_open_gap_score
+         || score != self->query_right_open_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_query_open_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_internal_open_gap_score = score;
+    self->query_left_open_gap_score = score;
+    self->query_right_open_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_extend_gap_score__doc__[] = "query gap extend score";
+
+static PyObject*
+Aligner_get_query_extend_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->query_internal_extend_gap_score;
+        if (score != self->query_left_extend_gap_score
+         || score != self->query_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_query_extend_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_internal_extend_gap_score = score;
+    self->query_left_extend_gap_score = score;
+    self->query_right_extend_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_gap_score__doc__[] = "query gap score";
+
+static PyObject*
+Aligner_get_query_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        Py_INCREF(self->query_gap_function);
+        return self->query_gap_function;
+    }
+    else {
+        const double score = self->query_internal_open_gap_score;
+        if (score != self->query_left_open_gap_score
+         || score != self->query_right_open_gap_score
+         || score != self->query_internal_extend_gap_score
+         || score != self->query_left_extend_gap_score
+         || score != self->query_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_query_gap_score(Aligner* self, PyObject* value, void* closure)
+{   if (PyCallable_Check(value)) {
+        Py_XDECREF(self->query_gap_function);
+        Py_INCREF(value);
+        self->query_gap_function = value;
+    }
+    else {
+        const double score = PyFloat_AsDouble(value);
+        if (PyErr_Occurred()) {
+            PyErr_SetString(PyExc_ValueError,
+                            "gap score should be numerical or callable");
+            return -1;
+        }
+        self->query_internal_open_gap_score = score;
+        self->query_internal_extend_gap_score = score;
+        self->query_left_open_gap_score = score;
+        self->query_left_extend_gap_score = score;
+        self->query_right_open_gap_score = score;
+        self->query_right_extend_gap_score = score;
+        if (self->query_gap_function) {
+            Py_DECREF(self->query_gap_function);
+            self->query_gap_function = NULL;
+        }
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_internal_open_gap_score__doc__[] = "target internal open gap score";
+
+static PyObject*
+Aligner_get_target_internal_open_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    return PyFloat_FromDouble(self->target_internal_open_gap_score);
+}
+
+static int
+Aligner_set_target_internal_open_gap_score(Aligner* self,
+                                           PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_internal_open_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_internal_extend_gap_score__doc__[] = "target internal extend gap score";
+
+static PyObject*
+Aligner_get_target_internal_extend_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    return PyFloat_FromDouble(self->target_internal_extend_gap_score);
+}
+
+static int
+Aligner_set_target_internal_extend_gap_score(Aligner* self,
+                                             PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_internal_extend_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_internal_gap_score__doc__[] = "target internal gap score";
+
+static PyObject*
+Aligner_get_target_internal_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_internal_open_gap_score;
+        if (score != self->target_internal_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_target_internal_gap_score(Aligner* self, PyObject* value,
+                                      void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_internal_open_gap_score = score;
+    self->target_internal_extend_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_end_gap_score__doc__[] = "target end gap score";
+
+static PyObject*
+Aligner_get_target_end_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_left_open_gap_score;
+        if (score != self->target_left_extend_gap_score
+         || score != self->target_right_open_gap_score
+         || score != self->target_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_target_end_gap_score(Aligner* self, PyObject* value, void* closure) {
+    const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_left_open_gap_score = score;
+    self->target_left_extend_gap_score = score;
+    self->target_right_open_gap_score = score;
+    self->target_right_extend_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_end_open_gap_score__doc__[] = "target end open gap score";
+
+static PyObject*
+Aligner_get_target_end_open_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_left_open_gap_score;
+        if (score != self->target_right_open_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_target_end_open_gap_score(Aligner* self, PyObject* value,
+                                      void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_left_open_gap_score = score;
+    self->target_right_open_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_end_extend_gap_score__doc__[] = "target end extend gap score";
+
+static PyObject*
+Aligner_get_target_end_extend_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_left_extend_gap_score;
+        if (score != self->target_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_target_end_extend_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_left_extend_gap_score = score;
+    self->target_right_extend_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_left_open_gap_score__doc__[] = "target left open score";
+
+static PyObject*
+Aligner_get_target_left_open_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    return PyFloat_FromDouble(self->target_left_open_gap_score);
+}
+
+static int
+Aligner_set_target_left_open_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_left_open_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_left_extend_gap_score__doc__[] = "target left extend score";
+
+static PyObject*
+Aligner_get_target_left_extend_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    return PyFloat_FromDouble(self->target_left_extend_gap_score);
+}
+
+static int
+Aligner_set_target_left_extend_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_left_extend_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_left_gap_score__doc__[] = "target left score";
+
+static PyObject*
+Aligner_get_target_left_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_left_open_gap_score;
+        if (score != self->target_left_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_target_left_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_left_open_gap_score = score;
+    self->target_left_extend_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_right_gap_score_open__doc__[] = "target right open score";
+
+static PyObject*
+Aligner_get_target_right_open_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    return PyFloat_FromDouble(self->target_right_open_gap_score);
+}
+
+static int
+Aligner_set_target_right_open_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_right_open_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_right_extend_gap_score__doc__[] = "target right extend score";
+
+static PyObject*
+Aligner_get_target_right_extend_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    return PyFloat_FromDouble(self->target_right_extend_gap_score);
+}
+
+static int
+Aligner_set_target_right_extend_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_right_extend_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_target_right_gap_score__doc__[] = "target right score";
+
+static PyObject*
+Aligner_get_target_right_gap_score(Aligner* self, void* closure)
+{   if (self->target_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->target_right_open_gap_score;
+        if (score != self->target_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_target_right_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->target_right_open_gap_score = score;
+    self->target_right_extend_gap_score = score;
+    if (self->target_gap_function) {
+        Py_DECREF(self->target_gap_function);
+        self->target_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_end_gap_score__doc__[] = "query end score";
+
+static PyObject*
+Aligner_get_query_end_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->query_left_open_gap_score;
+        if (score != self->query_left_extend_gap_score
+         || score != self->query_right_open_gap_score
+         || score != self->query_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_query_end_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_left_open_gap_score = score;
+    self->query_left_extend_gap_score = score;
+    self->query_right_open_gap_score = score;
+    self->query_right_extend_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_end_open_gap_score__doc__[] = "query end open score";
+
+static PyObject*
+Aligner_get_query_end_open_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->query_left_open_gap_score;
+        if (score != self->query_right_open_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_query_end_open_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_left_open_gap_score = score;
+    self->query_right_open_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_end_extend_gap_score__doc__[] = "query end extend score";
+
+static PyObject*
+Aligner_get_query_end_extend_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->query_left_extend_gap_score;
+        if (score != self->query_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_query_end_extend_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_left_extend_gap_score = score;
+    self->query_right_extend_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_internal_open_gap_score__doc__[] = "query internal open gap score";
+
+static PyObject*
+Aligner_get_query_internal_open_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    return PyFloat_FromDouble(self->query_internal_open_gap_score);
+}
+
+static int
+Aligner_set_query_internal_open_gap_score(Aligner* self, PyObject* value,
+                                          void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_internal_open_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_internal_extend_gap_score__doc__[] = "query internal extend gap score";
+
+static PyObject*
+Aligner_get_query_internal_extend_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    return PyFloat_FromDouble(self->query_internal_extend_gap_score);
+}
+
+static int
+Aligner_set_query_internal_extend_gap_score(Aligner* self, PyObject* value,
+                                            void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_internal_extend_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_internal_gap_score__doc__[] = "query internal gap score";
+
+static PyObject*
+Aligner_get_query_internal_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->query_internal_open_gap_score;
+        if (score != self->query_internal_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_query_internal_gap_score(Aligner* self, PyObject* value,
+                                     void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_internal_open_gap_score = score;
+    self->query_internal_extend_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_left_open_gap_score__doc__[] = "query left open score";
+
+static PyObject*
+Aligner_get_query_left_open_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    return PyFloat_FromDouble(self->query_left_open_gap_score);
+}
+
+static int
+Aligner_set_query_left_open_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_left_open_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_left_extend_gap_score__doc__[] = "query left extend score";
+
+static PyObject*
+Aligner_get_query_left_extend_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    return PyFloat_FromDouble(self->query_left_extend_gap_score);
+}
+
+static int
+Aligner_set_query_left_extend_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_left_extend_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_left_gap_score__doc__[] = "query left score";
+
+static PyObject*
+Aligner_get_query_left_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->query_left_open_gap_score;
+        if (score != self->query_left_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_query_left_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_left_open_gap_score = score;
+    self->query_left_extend_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_right_open_gap_score__doc__[] = "query right open score";
+
+static PyObject*
+Aligner_get_query_right_open_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    return PyFloat_FromDouble(self->query_right_open_gap_score);
+}
+
+static int
+Aligner_set_query_right_open_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_right_open_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_right_extend_gap_score__doc__[] = "query right extend score";
+
+static PyObject*
+Aligner_get_query_right_extend_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    return PyFloat_FromDouble(self->query_right_extend_gap_score);
+}
+
+static int
+Aligner_set_query_right_extend_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_right_extend_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_query_right_gap_score__doc__[] = "query right score";
+
+static PyObject*
+Aligner_get_query_right_gap_score(Aligner* self, void* closure)
+{   if (self->query_gap_function) {
+        PyErr_SetString(PyExc_ValueError, "using a gap score function");
+        return NULL;
+    }
+    else {
+        const double score = self->query_right_open_gap_score;
+        if (score != self->query_right_extend_gap_score) {
+            PyErr_SetString(PyExc_ValueError, "gap scores are different");
+            return NULL;
+        }
+        return PyFloat_FromDouble(score);
+    }
+}
+
+static int
+Aligner_set_query_right_gap_score(Aligner* self, PyObject* value, void* closure)
+{   const double score = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->query_right_open_gap_score = score;
+    self->query_right_extend_gap_score = score;
+    if (self->query_gap_function) {
+        Py_DECREF(self->query_gap_function);
+        self->query_gap_function = NULL;
+    }
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static char Aligner_epsilon__doc__[] = "roundoff epsilon";
+
+static PyObject*
+Aligner_get_epsilon(Aligner* self, void* closure)
+{   return PyFloat_FromDouble(self->epsilon);
+}
+
+static int
+Aligner_set_epsilon(Aligner* self, PyObject* value, void* closure)
+{   const double epsilon = PyFloat_AsDouble(value);
+    if (PyErr_Occurred()) return -1;
+    self->epsilon = epsilon;
+    self->algorithm = Unknown;
+    return 0;
+}
+
+static PyObject*
+Aligner_get_wildcard(Aligner* self, void* closure)
+{
+    if (self->wildcard == -1) {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+    else {
+        return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &self->wildcard, 1);
+    }
+}
+
+static int
+Aligner_set_wildcard(Aligner* self, PyObject* value, void* closure)
+{
+    if (value == Py_None) {
+        self->wildcard = -1;
+        return 0;
+    }
+    if (!PyUnicode_Check(value)) {
+        PyErr_SetString(PyExc_TypeError,
+                        "wildcard should be a single character, or None");
+        return -1;
+    }
+    if (PyUnicode_READY(value) == -1) return -1;
+    if (PyUnicode_GET_LENGTH(value) != 1) {
+        PyErr_SetString(PyExc_ValueError,
+                        "wildcard should be a single character, or None");
+        return -1;
+    }
+    self->wildcard = PyUnicode_READ_CHAR(value, 0);
+    return 0;
+}
+
+static char Aligner_wildcard__doc__[] = "wildcard character";
+
+static Algorithm _get_algorithm(Aligner* self)
+{
+    Algorithm algorithm = self->algorithm;
+    if (algorithm == Unknown) {
+        const double target_gap_open = self->target_internal_open_gap_score;
+        const double query_gap_open = self->query_internal_open_gap_score;
+        const double target_gap_extend = self->target_internal_extend_gap_score;
+        const double query_gap_extend = self->query_internal_extend_gap_score;
+        const double target_left_open = self->target_left_open_gap_score;
+        const double target_left_extend = self->target_left_extend_gap_score;
+        const double query_left_open = self->query_left_open_gap_score;
+        const double target_right_open = self->target_right_open_gap_score;
+        const double query_right_open = self->query_right_open_gap_score;
+        const double target_right_extend = self->target_right_extend_gap_score;
+        const double query_left_extend = self->query_left_extend_gap_score;
+        const double query_right_extend = self->query_right_extend_gap_score;
+        if (self->target_gap_function || self->query_gap_function)
+            algorithm = WatermanSmithBeyer;
+        else if (target_gap_open == target_gap_extend
+              && query_gap_open == query_gap_extend
+              && target_left_open == target_left_extend
+              && target_right_open == target_right_extend
+              && query_left_open == query_left_extend
+              && query_right_open == query_right_extend)
+            algorithm = NeedlemanWunschSmithWaterman;
+        else
+            algorithm = Gotoh;
+        self->algorithm = algorithm;
+    }
+    return algorithm;
+}
+
+
+static char Aligner_algorithm__doc__[] = "alignment algorithm";
+
+static PyObject*
+Aligner_get_algorithm(Aligner* self, void* closure)
+{
+    const char* s = NULL;
+    const Mode mode = self->mode;
+    const Algorithm algorithm = _get_algorithm(self);
+    switch (algorithm) {
+        case NeedlemanWunschSmithWaterman:
+            switch (mode) {
+                case Global:
+                    s = "Needleman-Wunsch";
+                    break;
+                case Local:
+                    s = "Smith-Waterman";
+                    break;
+            }
+            break;
+        case Gotoh:
+            switch (mode) {
+                case Global:
+                    s = "Gotoh global alignment algorithm";
+                    break;
+                case Local:
+                    s = "Gotoh local alignment algorithm";
+                    break;
+            }
+            break;
+        case WatermanSmithBeyer:
+            switch (mode) {
+                case Global:
+                    s = "Waterman-Smith-Beyer global alignment algorithm";
+                    break;
+                case Local:
+                    s = "Waterman-Smith-Beyer local alignment algorithm";
+                    break;
+            }
+            break;
+        case Unknown:
+        default:
+            break;
+    }
+    return PyUnicode_FromString(s);
+}
+
+static PyGetSetDef Aligner_getset[] = {
+    {"mode",
+        (getter)Aligner_get_mode,
+        (setter)Aligner_set_mode,
+        Aligner_mode__doc__, NULL},
+    {"match_score",
+        (getter)Aligner_get_match_score,
+        (setter)Aligner_set_match_score,
+        Aligner_match_score__doc__, NULL},
+    {"mismatch_score",
+        (getter)Aligner_get_mismatch_score,
+        (setter)Aligner_set_mismatch_score,
+        Aligner_mismatch_score__doc__, NULL},
+    {"match", /* synonym for match_score */
+        (getter)Aligner_get_match_score,
+        (setter)Aligner_set_match_score,
+        Aligner_match_score__doc__, NULL},
+    {"mismatch", /* synonym for mismatch_score */
+        (getter)Aligner_get_mismatch_score,
+        (setter)Aligner_set_mismatch_score,
+        Aligner_mismatch_score__doc__, NULL},
+    {"substitution_matrix",
+        (getter)Aligner_get_substitution_matrix,
+        (setter)Aligner_set_substitution_matrix,
+        Aligner_substitution_matrix__doc__, NULL},
+    {"alphabet",
+        (getter)Aligner_get_alphabet,
+        (setter)Aligner_set_alphabet,
+        Aligner_alphabet__doc__, NULL},
+    {"gap_score",
+        (getter)Aligner_get_gap_score,
+        (setter)Aligner_set_gap_score,
+        Aligner_gap_score__doc__, NULL},
+    {"open_gap_score",
+        (getter)Aligner_get_open_gap_score,
+        (setter)Aligner_set_open_gap_score,
+        Aligner_open_gap_score__doc__, NULL},
+    {"extend_gap_score",
+        (getter)Aligner_get_extend_gap_score,
+        (setter)Aligner_set_extend_gap_score,
+        Aligner_extend_gap_score__doc__, NULL},
+    {"internal_gap_score",
+        (getter)Aligner_get_internal_gap_score,
+        (setter)Aligner_set_internal_gap_score,
+        Aligner_internal_gap_score__doc__, NULL},
+    {"internal_open_gap_score",
+        (getter)Aligner_get_internal_open_gap_score,
+        (setter)Aligner_set_internal_open_gap_score,
+        Aligner_internal_open_gap_score__doc__, NULL},
+    {"internal_extend_gap_score",
+        (getter)Aligner_get_internal_extend_gap_score,
+        (setter)Aligner_set_internal_extend_gap_score,
+        Aligner_internal_extend_gap_score__doc__, NULL},
+    {"end_gap_score",
+        (getter)Aligner_get_end_gap_score,
+        (setter)Aligner_set_end_gap_score,
+        Aligner_end_gap_score__doc__, NULL},
+    {"end_open_gap_score",
+        (getter)Aligner_get_end_open_gap_score,
+        (setter)Aligner_set_end_open_gap_score,
+        Aligner_end_open_gap_score__doc__, NULL},
+    {"end_extend_gap_score",
+        (getter)Aligner_get_end_extend_gap_score,
+        (setter)Aligner_set_end_extend_gap_score,
+        Aligner_end_extend_gap_score__doc__, NULL},
+    {"left_gap_score",
+        (getter)Aligner_get_left_gap_score,
+        (setter)Aligner_set_left_gap_score,
+        Aligner_left_gap_score__doc__, NULL},
+    {"left_open_gap_score",
+        (getter)Aligner_get_left_open_gap_score,
+        (setter)Aligner_set_left_open_gap_score,
+        Aligner_left_open_gap_score__doc__, NULL},
+    {"left_extend_gap_score",
+        (getter)Aligner_get_left_extend_gap_score,
+        (setter)Aligner_set_left_extend_gap_score,
+        Aligner_left_extend_gap_score__doc__, NULL},
+    {"right_gap_score",
+        (getter)Aligner_get_right_gap_score,
+        (setter)Aligner_set_right_gap_score,
+        Aligner_right_gap_score__doc__, NULL},
+    {"right_open_gap_score",
+        (getter)Aligner_get_right_open_gap_score,
+        (setter)Aligner_set_right_open_gap_score,
+        Aligner_right_open_gap_score__doc__, NULL},
+    {"right_extend_gap_score",
+        (getter)Aligner_get_right_extend_gap_score,
+        (setter)Aligner_set_right_extend_gap_score,
+        Aligner_right_extend_gap_score__doc__, NULL},
+    {"target_open_gap_score",
+        (getter)Aligner_get_target_open_gap_score,
+        (setter)Aligner_set_target_open_gap_score,
+        Aligner_target_open_gap_score__doc__, NULL},
+    {"target_extend_gap_score",
+        (getter)Aligner_get_target_extend_gap_score,
+        (setter)Aligner_set_target_extend_gap_score,
+        Aligner_target_extend_gap_score__doc__, NULL},
+    {"target_gap_score",
+        (getter)Aligner_get_target_gap_score,
+        (setter)Aligner_set_target_gap_score,
+        Aligner_target_gap_score__doc__, NULL},
+    {"query_open_gap_score",
+        (getter)Aligner_get_query_open_gap_score,
+        (setter)Aligner_set_query_open_gap_score,
+        Aligner_query_open_gap_score__doc__, NULL},
+    {"query_extend_gap_score",
+        (getter)Aligner_get_query_extend_gap_score,
+        (setter)Aligner_set_query_extend_gap_score,
+        Aligner_query_extend_gap_score__doc__, NULL},
+    {"query_gap_score",
+        (getter)Aligner_get_query_gap_score,
+        (setter)Aligner_set_query_gap_score,
+        Aligner_query_gap_score__doc__, NULL},
+    {"target_end_gap_score",
+        (getter)Aligner_get_target_end_gap_score,
+        (setter)Aligner_set_target_end_gap_score,
+        Aligner_target_end_gap_score__doc__, NULL},
+    {"target_end_open_gap_score",
+        (getter)Aligner_get_target_end_open_gap_score,
+        (setter)Aligner_set_target_end_open_gap_score,
+        Aligner_target_end_open_gap_score__doc__, NULL},
+    {"target_end_extend_gap_score",
+        (getter)Aligner_get_target_end_extend_gap_score,
+        (setter)Aligner_set_target_end_extend_gap_score,
+        Aligner_target_end_extend_gap_score__doc__, NULL},
+    {"target_internal_open_gap_score",
+        (getter)Aligner_get_target_internal_open_gap_score,
+        (setter)Aligner_set_target_internal_open_gap_score,
+        Aligner_target_internal_open_gap_score__doc__, NULL},
+    {"target_internal_extend_gap_score",
+        (getter)Aligner_get_target_internal_extend_gap_score,
+        (setter)Aligner_set_target_internal_extend_gap_score,
+        Aligner_target_internal_extend_gap_score__doc__, NULL},
+    {"target_internal_gap_score",
+        (getter)Aligner_get_target_internal_gap_score,
+        (setter)Aligner_set_target_internal_gap_score,
+        Aligner_target_internal_gap_score__doc__, NULL},
+    {"target_left_open_gap_score",
+        (getter)Aligner_get_target_left_open_gap_score,
+        (setter)Aligner_set_target_left_open_gap_score,
+        Aligner_target_left_open_gap_score__doc__, NULL},
+    {"target_left_extend_gap_score",
+        (getter)Aligner_get_target_left_extend_gap_score,
+        (setter)Aligner_set_target_left_extend_gap_score,
+        Aligner_target_left_extend_gap_score__doc__, NULL},
+    {"target_left_gap_score",
+        (getter)Aligner_get_target_left_gap_score,
+        (setter)Aligner_set_target_left_gap_score,
+        Aligner_target_left_gap_score__doc__, NULL},
+    {"target_right_open_gap_score",
+        (getter)Aligner_get_target_right_open_gap_score,
+        (setter)Aligner_set_target_right_open_gap_score,
+        Aligner_target_right_gap_score_open__doc__, NULL},
+    {"target_right_extend_gap_score",
+        (getter)Aligner_get_target_right_extend_gap_score,
+        (setter)Aligner_set_target_right_extend_gap_score,
+        Aligner_target_right_extend_gap_score__doc__, NULL},
+    {"target_right_gap_score",
+        (getter)Aligner_get_target_right_gap_score,
+        (setter)Aligner_set_target_right_gap_score,
+        Aligner_target_right_gap_score__doc__, NULL},
+    {"query_end_gap_score",
+        (getter)Aligner_get_query_end_gap_score,
+        (setter)Aligner_set_query_end_gap_score,
+        Aligner_query_end_gap_score__doc__, NULL},
+    {"query_end_open_gap_score",
+        (getter)Aligner_get_query_end_open_gap_score,
+        (setter)Aligner_set_query_end_open_gap_score,
+        Aligner_query_end_open_gap_score__doc__, NULL},
+    {"query_end_extend_gap_score",
+        (getter)Aligner_get_query_end_extend_gap_score,
+        (setter)Aligner_set_query_end_extend_gap_score,
+        Aligner_query_end_extend_gap_score__doc__, NULL},
+    {"query_internal_open_gap_score",
+        (getter)Aligner_get_query_internal_open_gap_score,
+        (setter)Aligner_set_query_internal_open_gap_score,
+        Aligner_query_internal_open_gap_score__doc__, NULL},
+    {"query_internal_extend_gap_score",
+        (getter)Aligner_get_query_internal_extend_gap_score,
+        (setter)Aligner_set_query_internal_extend_gap_score,
+        Aligner_query_internal_extend_gap_score__doc__, NULL},
+    {"query_internal_gap_score",
+        (getter)Aligner_get_query_internal_gap_score,
+        (setter)Aligner_set_query_internal_gap_score,
+        Aligner_query_internal_gap_score__doc__, NULL},
+    {"query_left_open_gap_score",
+        (getter)Aligner_get_query_left_open_gap_score,
+        (setter)Aligner_set_query_left_open_gap_score,
+        Aligner_query_left_open_gap_score__doc__, NULL},
+    {"query_left_extend_gap_score",
+        (getter)Aligner_get_query_left_extend_gap_score,
+        (setter)Aligner_set_query_left_extend_gap_score,
+        Aligner_query_left_extend_gap_score__doc__, NULL},
+    {"query_left_gap_score",
+        (getter)Aligner_get_query_left_gap_score,
+        (setter)Aligner_set_query_left_gap_score,
+         Aligner_query_left_gap_score__doc__, NULL},
+    {"query_right_open_gap_score",
+        (getter)Aligner_get_query_right_open_gap_score,
+        (setter)Aligner_set_query_right_open_gap_score,
+        Aligner_query_right_open_gap_score__doc__, NULL},
+    {"query_right_extend_gap_score",
+        (getter)Aligner_get_query_right_extend_gap_score,
+        (setter)Aligner_set_query_right_extend_gap_score,
+        Aligner_query_right_extend_gap_score__doc__, NULL},
+    {"query_right_gap_score",
+        (getter)Aligner_get_query_right_gap_score,
+        (setter)Aligner_set_query_right_gap_score,
+        Aligner_query_right_gap_score__doc__, NULL},
+    {"epsilon",
+        (getter)Aligner_get_epsilon,
+        (setter)Aligner_set_epsilon,
+        Aligner_epsilon__doc__, NULL},
+    {"wildcard",
+        (getter)Aligner_get_wildcard,
+        (setter)Aligner_set_wildcard,
+        Aligner_wildcard__doc__, NULL},
+    {"algorithm",
+        (getter)Aligner_get_algorithm,
+        (setter)NULL,
+        Aligner_algorithm__doc__, NULL},
+    {NULL}  /* Sentinel */
+};
+
+#define SELECT_SCORE_GLOBAL(score1, score2, score3) \
+    score = score1; \
+    temp = score2; \
+    if (temp > score) score = temp; \
+    temp = score3; \
+    if (temp > score) score = temp;
+
+#define SELECT_SCORE_WATERMAN_SMITH_BEYER(score1, score2) \
+    temp = score1 + gapscore; \
+    if (temp > score) score = temp; \
+    temp = score2 + gapscore; \
+    if (temp > score) score = temp;
+
+#define SELECT_SCORE_GOTOH_LOCAL_ALIGN(score1, score2, score3, score4) \
+    score = score1; \
+    temp = score2; \
+    if (temp > score) score = temp; \
+    temp = score3; \
+    if (temp > score) score = temp; \
+    score += score4; \
+    if (score < 0) score = 0; \
+    else if (score > maximum) maximum = score;
+
+#define SELECT_SCORE_LOCAL3(score1, score2, score3) \
+    score = score1; \
+    temp = score2; \
+    if (temp > score) score = temp; \
+    temp = score3; \
+    if (temp > score) score = temp; \
+    if (score < 0) score = 0; \
+    else if (score > maximum) maximum = score;
+
+#define SELECT_SCORE_LOCAL1(score1) \
+    score = score1; \
+    if (score < 0) score = 0; \
+    else if (score > maximum) maximum = score;
+
+#define SELECT_TRACE_NEEDLEMAN_WUNSCH(hgap, vgap, align_score) \
+    score = temp + (align_score); \
+    trace = DIAGONAL; \
+    temp = row[j-1] + hgap; \
+    if (temp > score + epsilon) { \
+        score = temp; \
+        trace = HORIZONTAL; \
+    } \
+    else if (temp > score - epsilon) trace |= HORIZONTAL; \
+    temp = row[j] + vgap; \
+    if (temp > score + epsilon) { \
+        score = temp; \
+        trace = VERTICAL; \
+    } \
+    else if (temp > score - epsilon) trace |= VERTICAL; \
+    temp = row[j]; \
+    row[j] = score; \
+    M[i][j].trace = trace;
+
+#define SELECT_TRACE_SMITH_WATERMAN_HVD(align_score) \
+    trace = DIAGONAL; \
+    score = temp + (align_score); \
+    temp = row[j-1] + gap_extend_A; \
+    if (temp > score + epsilon) { \
+        score = temp; \
+        trace = HORIZONTAL; \
+    } \
+    else if (temp > score - epsilon) trace |= HORIZONTAL; \
+    temp = row[j] + gap_extend_B; \
+    if (temp > score + epsilon) { \
+        score = temp; \
+        trace = VERTICAL; \
+    } \
+    else if (temp > score - epsilon) trace |= VERTICAL; \
+    if (score < epsilon) { \
+        score = 0; \
+        trace = STARTPOINT; \
+    } \
+    else if (trace & DIAGONAL && score > maximum - epsilon) { \
+        if (score > maximum + epsilon) { \
+            for ( ; im < i; im++, jm = 0) \
+                for ( ; jm <= nB; jm++) M[im][jm].trace &= ~ENDPOINT; \
+            for ( ; jm < j; jm++) M[im][jm].trace &= ~ENDPOINT; \
+            im = i; \
+            jm = j; \
+        } \
+        trace |= ENDPOINT; \
+    } \
+    M[i][j].trace = trace; \
+    if (score > maximum) maximum = score; \
+    temp = row[j]; \
+    row[j] = score;
+
+#define SELECT_TRACE_SMITH_WATERMAN_D(align_score) \
+    score = temp + (align_score); \
+    trace = DIAGONAL; \
+    if (score < epsilon) { \
+        score = 0; \
+    } \
+    else if (trace & DIAGONAL && score > maximum - epsilon) { \
+        if (score > maximum + epsilon) { \
+            for ( ; im < i; im++, jm = 0) \
+                for ( ; jm <= nB; jm++) M[im][jm].trace &= ~ENDPOINT; \
+            for ( ; jm < j; jm++) M[im][jm].trace &= ~ENDPOINT; \
+            im = i; \
+            jm = j; \
+        } \
+        trace |= ENDPOINT; \
+    } \
+    M[i][j].trace = trace; \
+    if (score > maximum) maximum = score; \
+    temp = row[j]; \
+    row[j] = score
+
+#define SELECT_TRACE_GOTOH_GLOBAL_GAP(matrix, score1, score2, score3) \
+    trace = M_MATRIX; \
+    score = score1; \
+    temp = score2; \
+    if (temp > score + epsilon) { \
+        score = temp; \
+        trace = Ix_MATRIX; \
+    } \
+    else if (temp > score - epsilon) trace |= Ix_MATRIX; \
+    temp = score3; \
+    if (temp > score + epsilon) { \
+        score = temp; \
+        trace = Iy_MATRIX; \
+    } \
+    else if (temp > score - epsilon) trace |= Iy_MATRIX; \
+    gaps[i][j].matrix = trace;
+
+#define SELECT_TRACE_GOTOH_GLOBAL_ALIGN \
+    trace = M_MATRIX; \
+    score = M_temp; \
+    temp = Ix_temp; \
+    if (temp > score + epsilon) { \
+        score = Ix_temp; \
+        trace = Ix_MATRIX; \
+    } \
+    else if (temp > score - epsilon) trace |= Ix_MATRIX; \
+    temp = Iy_temp; \
+    if (temp > score + epsilon) { \
+        score = temp; \
+        trace = Iy_MATRIX; \
+    } \
+    else if (temp > score - epsilon) trace |= Iy_MATRIX; \
+    M[i][j].trace = trace;
+
+#define SELECT_TRACE_GOTOH_LOCAL_ALIGN(align_score) \
+    trace = M_MATRIX; \
+    score = M_temp; \
+    if (Ix_temp > score + epsilon) { \
+        score = Ix_temp; \
+        trace = Ix_MATRIX; \
+    } \
+    else if (Ix_temp > score - epsilon) trace |= Ix_MATRIX; \
+    if (Iy_temp > score + epsilon) { \
+        score = Iy_temp; \
+        trace = Iy_MATRIX; \
+    } \
+    else if (Iy_temp > score - epsilon) trace |= Iy_MATRIX; \
+    score += (align_score); \
+    if (score < epsilon) { \
+        score = 0; \
+        trace = STARTPOINT; \
+    } \
+    else if (score > maximum - epsilon) { \
+        if (score > maximum + epsilon) { \
+            maximum = score; \
+            for ( ; im < i; im++, jm = 0) \
+                for ( ; jm <= nB; jm++) M[im][jm].trace &= ~ENDPOINT; \
+            for ( ; jm < j; jm++) M[im][jm].trace &= ~ENDPOINT; \
+            im = i; \
+            jm = j; \
+        } \
+        trace |= ENDPOINT; \
+    } \
+    M[i][j].trace = trace;
+
+#define SELECT_TRACE_GOTOH_LOCAL_GAP(matrix, score1, score2, score3) \
+    trace = M_MATRIX; \
+    score = score1; \
+    temp = score2; \
+    if (temp > score + epsilon) { \
+        score = temp; \
+        trace = Ix_MATRIX; \
+    } \
+    else if (temp > score - epsilon) trace |= Ix_MATRIX; \
+    temp = score3; \
+    if (temp > score + epsilon) { \
+        score = temp; \
+        trace = Iy_MATRIX; \
+    } \
+    else if (temp > score - epsilon) trace |= Iy_MATRIX; \
+    if (score < epsilon) { \
+        score = -DBL_MAX; \
+        trace = 0; \
+    } \
+    gaps[i][j].matrix = trace;
+
+#define SELECT_TRACE_WATERMAN_SMITH_BEYER_GLOBAL_ALIGN(score4) \
+    trace = M_MATRIX; \
+    score = M_row[i-1][j-1]; \
+    temp = Ix_row[i-1][j-1]; \
+    if (temp > score + epsilon) { \
+        score = temp; \
+        trace = Ix_MATRIX; \
+    } \
+    else if (temp > score - epsilon) trace |= Ix_MATRIX; \
+    temp = Iy_row[i-1][j-1]; \
+    if (temp > score + epsilon) { \
+        score = temp; \
+        trace = Iy_MATRIX; \
+    } \
+    else if (temp > score - epsilon) trace |= Iy_MATRIX; \
+    M_row[i][j] = score + score4; \
+    M[i][j].trace = trace;
+
+#define SELECT_TRACE_WATERMAN_SMITH_BEYER_GAP(score1, score2) \
+    temp = score1 + gapscore; \
+    if (temp > score - epsilon) { \
+        if (temp > score + epsilon) { \
+            score = temp; \
+            nm = 0; \
+            ng = 0; \
+        } \
+        gapM[nm] = gap; \
+        nm++; \
+    } \
+    temp = score2 + gapscore; \
+    if (temp > score - epsilon) { \
+        if (temp > score + epsilon) { \
+            score = temp; \
+            nm = 0; \
+            ng = 0; \
+        } \
+        gapXY[ng] = gap; \
+        ng++; \
+    }
+
+#define SELECT_TRACE_WATERMAN_SMITH_BEYER_ALIGN(score1, score2, score3, score4) \
+    trace = M_MATRIX; \
+    score = score1; \
+    if (score2 > score + epsilon) { \
+        score = score2; \
+        trace = Ix_MATRIX; \
+    } \
+    else if (score2 > score - epsilon) trace |= Ix_MATRIX; \
+    if (score3 > score + epsilon) { \
+        score = score3; \
+        trace = Iy_MATRIX; \
+    } \
+    else if (score3 > score - epsilon) trace |= Iy_MATRIX; \
+    score += score4; \
+    if (score < epsilon) { \
+        score = 0; \
+        trace = STARTPOINT; \
+    } \
+    else if (score > maximum - epsilon) { \
+        if (score > maximum + epsilon) { \
+            maximum = score; \
+            for ( ; im < i; im++, jm = 0) \
+                for ( ; jm <= nB; jm++) M[im][jm].trace &= ~ENDPOINT; \
+            for ( ; jm < j; jm++) M[im][jm].trace &= ~ENDPOINT; \
+            im = i; \
+            jm = j; \
+        } \
+        trace |= ENDPOINT; \
+    } \
+    M_row[i][j] = score; \
+    M[i][j].trace = trace;
+
+/* ----------------- alignment algorithms ----------------- */
+
+#define NEEDLEMANWUNSCH_SCORE(align_score) \
+    int i; \
+    int j; \
+    int kA; \
+    int kB; \
+    const double gap_extend_A = self->target_internal_extend_gap_score; \
+    const double gap_extend_B = self->query_internal_extend_gap_score; \
+    double score; \
+    double temp; \
+    double* row; \
+    double left_gap_extend_A; \
+    double right_gap_extend_A; \
+    double left_gap_extend_B; \
+    double right_gap_extend_B; \
+    switch (strand) { \
+        case '+': \
+            left_gap_extend_A = self->target_left_extend_gap_score; \
+            right_gap_extend_A = self->target_right_extend_gap_score; \
+            left_gap_extend_B = self->query_left_extend_gap_score; \
+            right_gap_extend_B = self->query_right_extend_gap_score; \
+            break; \
+        case '-': \
+            left_gap_extend_A = self->target_right_extend_gap_score; \
+            right_gap_extend_A = self->target_left_extend_gap_score; \
+            left_gap_extend_B = self->query_right_extend_gap_score; \
+            right_gap_extend_B = self->query_left_extend_gap_score; \
+            break; \
+        default: \
+            PyErr_SetString(PyExc_RuntimeError, "strand was neither '+' nor '-'"); \
+            return NULL; \
+    } \
+\
+    /* Needleman-Wunsch algorithm */ \
+    row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!row) return PyErr_NoMemory(); \
+\
+    /* The top row of the score matrix is a special case, \
+     * as there are no previously aligned characters. \
+     */ \
+    row[0] = 0.0; \
+    for (j = 1; j <= nB; j++) row[j] = j * left_gap_extend_A; \
+    for (i = 1; i < nA; i++) { \
+        kA = sA[i-1]; \
+        temp = row[0]; \
+        row[0] = i * left_gap_extend_B; \
+        for (j = 1; j < nB; j++) { \
+            kB = sB[j-1]; \
+            SELECT_SCORE_GLOBAL(temp + (align_score), \
+                                row[j] + gap_extend_B, \
+                                row[j-1] + gap_extend_A); \
+            temp = row[j]; \
+            row[j] = score; \
+        } \
+        kB = sB[nB-1]; \
+        SELECT_SCORE_GLOBAL(temp + (align_score), \
+                            row[nB] + right_gap_extend_B, \
+                            row[nB-1] + gap_extend_A); \
+        temp = row[nB]; \
+        row[nB] = score; \
+    } \
+    kA = sA[nA-1]; \
+    temp = row[0]; \
+    row[0] = nA * right_gap_extend_B; \
+    for (j = 1; j < nB; j++) { \
+        kB = sB[j-1]; \
+        SELECT_SCORE_GLOBAL(temp + (align_score), \
+                            row[j] + gap_extend_B, \
+                            row[j-1] + right_gap_extend_A); \
+        temp = row[j]; \
+        row[j] = score; \
+    } \
+    kB = sB[nB-1]; \
+    SELECT_SCORE_GLOBAL(temp + (align_score), \
+                        row[nB] + right_gap_extend_B, \
+                        row[nB-1] + right_gap_extend_A); \
+    PyMem_Free(row); \
+    return PyFloat_FromDouble(score);
+
+
+#define SMITHWATERMAN_SCORE(align_score) \
+    int i; \
+    int j; \
+    int kA; \
+    int kB; \
+    const double gap_extend_A = self->target_internal_extend_gap_score; \
+    const double gap_extend_B = self->query_internal_extend_gap_score; \
+    double score; \
+    double* row; \
+    double temp; \
+    double maximum = 0; \
+\
+    /* Smith-Waterman algorithm */ \
+    row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!row) return PyErr_NoMemory(); \
+\
+    /* The top row of the score matrix is a special case, \
+     * as there are no previously aligned characters. \
+     */ \
+    for (j = 0; j <= nB; j++) \
+        row[j] = 0; \
+    for (i = 1; i < nA; i++) { \
+        kA = sA[i-1]; \
+        temp = 0; \
+        for (j = 1; j < nB; j++) { \
+            kB = sB[j-1]; \
+            SELECT_SCORE_LOCAL3(temp + (align_score), \
+                                row[j] + gap_extend_B, \
+                                row[j-1] + gap_extend_A); \
+            temp = row[j]; \
+            row[j] = score; \
+        } \
+        kB = sB[nB-1]; \
+        SELECT_SCORE_LOCAL1(temp + (align_score)); \
+        temp = row[nB]; \
+        row[nB] = score; \
+    } \
+    kA = sA[nA-1]; \
+    temp = 0; \
+    for (j = 1; j < nB; j++) { \
+        kB = sB[j-1]; \
+        SELECT_SCORE_LOCAL1(temp + (align_score)); \
+        temp = row[j]; \
+        row[j] = score; \
+    } \
+    kB = sB[nB-1]; \
+    SELECT_SCORE_LOCAL1(temp + (align_score)); \
+    PyMem_Free(row); \
+    return PyFloat_FromDouble(maximum);
+
+
+#define NEEDLEMANWUNSCH_ALIGN(align_score) \
+    int i; \
+    int j; \
+    int kA; \
+    int kB; \
+    const double gap_extend_A = self->target_internal_extend_gap_score; \
+    const double gap_extend_B = self->query_internal_extend_gap_score; \
+    const double epsilon = self->epsilon; \
+    Trace** M; \
+    double score; \
+    int trace; \
+    double temp; \
+    double* row = NULL; \
+    PathGenerator* paths; \
+    double left_gap_extend_A; \
+    double right_gap_extend_A; \
+    double left_gap_extend_B; \
+    double right_gap_extend_B; \
+    switch (strand) { \
+        case '+': \
+            left_gap_extend_A = self->target_left_extend_gap_score; \
+            right_gap_extend_A = self->target_right_extend_gap_score; \
+            left_gap_extend_B = self->query_left_extend_gap_score; \
+            right_gap_extend_B = self->query_right_extend_gap_score; \
+            break; \
+        case '-': \
+            left_gap_extend_A = self->target_right_extend_gap_score; \
+            right_gap_extend_A = self->target_left_extend_gap_score; \
+            left_gap_extend_B = self->query_right_extend_gap_score; \
+            right_gap_extend_B = self->query_left_extend_gap_score; \
+            break; \
+        default: \
+            PyErr_SetString(PyExc_RuntimeError, "strand was neither '+' nor '-'"); \
+            return NULL; \
+    } \
+\
+    /* Needleman-Wunsch algorithm */ \
+    paths = PathGenerator_create_NWSW(nA, nB, Global, strand); \
+    if (!paths) return NULL; \
+    row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!row) { \
+        Py_DECREF(paths); \
+        return PyErr_NoMemory(); \
+    } \
+    M = paths->M; \
+    row[0] = 0; \
+    for (j = 1; j <= nB; j++) row[j] = j * left_gap_extend_A; \
+    for (i = 1; i < nA; i++) { \
+        temp = row[0]; \
+        row[0] = i * left_gap_extend_B; \
+        kA = sA[i-1]; \
+        for (j = 1; j < nB; j++) { \
+            kB = sB[j-1]; \
+            SELECT_TRACE_NEEDLEMAN_WUNSCH(gap_extend_A, gap_extend_B, align_score); \
+        } \
+        kB = sB[j-1]; \
+        SELECT_TRACE_NEEDLEMAN_WUNSCH(gap_extend_A, right_gap_extend_B, align_score); \
+    } \
+    temp = row[0]; \
+    row[0] = i * left_gap_extend_B; \
+    kA = sA[nA-1]; \
+    for (j = 1; j < nB; j++) { \
+        kB = sB[j-1]; \
+        SELECT_TRACE_NEEDLEMAN_WUNSCH(right_gap_extend_A, gap_extend_B, align_score); \
+    } \
+    kB = sB[j-1]; \
+    SELECT_TRACE_NEEDLEMAN_WUNSCH(right_gap_extend_A, right_gap_extend_B, align_score); \
+    PyMem_Free(row); \
+    M[nA][nB].path = 0; \
+    return Py_BuildValue("fN", score, paths);
+
+
+#define SMITHWATERMAN_ALIGN(align_score) \
+    int i; \
+    int j; \
+    int im = nA; \
+    int jm = nB; \
+    int kA; \
+    int kB; \
+    const double gap_extend_A = self->target_internal_extend_gap_score; \
+    const double gap_extend_B = self->query_internal_extend_gap_score; \
+    const double epsilon = self->epsilon; \
+    Trace** M = NULL; \
+    double maximum = 0; \
+    double score = 0; \
+    double* row = NULL; \
+    double temp; \
+    int trace; \
+    PathGenerator* paths = NULL; \
+\
+    /* Smith-Waterman algorithm */ \
+    paths = PathGenerator_create_NWSW(nA, nB, Local, strand); \
+    if (!paths) return NULL; \
+    row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!row) { \
+        Py_DECREF(paths); \
+        return PyErr_NoMemory(); \
+    } \
+    M = paths->M; \
+    for (j = 0; j <= nB; j++) row[j] = 0; \
+    for (i = 1; i < nA; i++) { \
+        temp = 0; \
+        kA = sA[i-1]; \
+        for (j = 1; j < nB; j++) { \
+            kB = sB[j-1]; \
+            SELECT_TRACE_SMITH_WATERMAN_HVD(align_score); \
+        } \
+        kB = sB[nB-1]; \
+        SELECT_TRACE_SMITH_WATERMAN_D(align_score); \
+    } \
+    temp = 0; \
+    kA = sA[nA-1]; \
+    for (j = 1; j < nB; j++) { \
+        kB = sB[j-1]; \
+        SELECT_TRACE_SMITH_WATERMAN_D(align_score); \
+    } \
+    kB = sB[nB-1]; \
+    SELECT_TRACE_SMITH_WATERMAN_D(align_score); \
+    PyMem_Free(row); \
+\
+    /* As we don't allow zero-score extensions to alignments, \
+     * we need to remove all traces towards an ENDPOINT. \
+     * In addition, some points then won't have any path to a STARTPOINT. \
+     * Here, use path as a temporary variable to indicate if the point \
+     * is reachable from a STARTPOINT. If it is unreachable, remove all \
+     * traces from it, and don't allow it to be an ENDPOINT. It may still \
+     * be a valid STARTPOINT. */ \
+    for (j = 0; j <= nB; j++) M[0][j].path = 1; \
+    for (i = 1; i <= nA; i++) { \
+        M[i][0].path = 1; \
+        for (j = 1; j <= nB; j++) { \
+            trace = M[i][j].trace; \
+            /* Remove traces to unreachable points. */ \
+            if (!M[i-1][j-1].path) trace &= ~DIAGONAL; \
+            if (!M[i][j-1].path) trace &= ~HORIZONTAL; \
+            if (!M[i-1][j].path) trace &= ~VERTICAL; \
+            if (trace & (STARTPOINT | HORIZONTAL | VERTICAL | DIAGONAL)) { \
+                /* The point is reachable. */ \
+                if (trace & ENDPOINT) M[i][j].path = 0; /* no extensions after ENDPOINT */ \
+                else M[i][j].path = 1; \
+            } \
+            else { \
+                /* The point is not reachable. Then it is not a STARTPOINT, \
+                 * all traces from it can be removed, and it cannot act as \
+                 * an ENDPOINT. */ \
+                M[i][j].path = 0; \
+                trace = 0; \
+            } \
+            M[i][j].trace = trace; \
+        } \
+    } \
+    if (maximum == 0) M[0][0].path = NONE; \
+    else M[0][0].path = 0; \
+    return Py_BuildValue("fN", maximum, paths);
+
+
+#define GOTOH_GLOBAL_SCORE(align_score) \
+    int i; \
+    int j; \
+    int kA; \
+    int kB; \
+    const double gap_open_A = self->target_internal_open_gap_score; \
+    const double gap_open_B = self->query_internal_open_gap_score; \
+    const double gap_extend_A = self->target_internal_extend_gap_score; \
+    const double gap_extend_B = self->query_internal_extend_gap_score; \
+    double left_gap_open_A; \
+    double left_gap_open_B; \
+    double left_gap_extend_A; \
+    double left_gap_extend_B; \
+    double right_gap_open_A; \
+    double right_gap_open_B; \
+    double right_gap_extend_A; \
+    double right_gap_extend_B; \
+    double* M_row = NULL; \
+    double* Ix_row = NULL; \
+    double* Iy_row = NULL; \
+    double score; \
+    double temp; \
+    double M_temp; \
+    double Ix_temp; \
+    double Iy_temp; \
+    switch (strand) { \
+        case '+': \
+            left_gap_open_A = self->target_left_open_gap_score; \
+            left_gap_open_B = self->query_left_open_gap_score; \
+            left_gap_extend_A = self->target_left_extend_gap_score; \
+            left_gap_extend_B = self->query_left_extend_gap_score; \
+            right_gap_open_A = self->target_right_open_gap_score; \
+            right_gap_open_B = self->query_right_open_gap_score; \
+            right_gap_extend_A = self->target_right_extend_gap_score; \
+            right_gap_extend_B = self->query_right_extend_gap_score; \
+            break; \
+        case '-': \
+            left_gap_open_A = self->target_right_open_gap_score; \
+            left_gap_open_B = self->query_right_open_gap_score; \
+            left_gap_extend_A = self->target_right_extend_gap_score; \
+            left_gap_extend_B = self->query_right_extend_gap_score; \
+            right_gap_open_A = self->target_left_open_gap_score; \
+            right_gap_open_B = self->query_left_open_gap_score; \
+            right_gap_extend_A = self->target_left_extend_gap_score; \
+            right_gap_extend_B = self->query_left_extend_gap_score; \
+            break; \
+        default: \
+            PyErr_SetString(PyExc_RuntimeError, "strand was neither '+' nor '-'"); \
+            return NULL; \
+    } \
+\
+    /* Gotoh algorithm with three states */ \
+    M_row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!M_row) goto exit; \
+    Ix_row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!Ix_row) goto exit; \
+    Iy_row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!Iy_row) goto exit; \
+\
+    /* The top row of the score matrix is a special case, \
+     * as there are no previously aligned characters. \
+     */ \
+    M_row[0] = 0; \
+    Ix_row[0] = -DBL_MAX; \
+    Iy_row[0] = -DBL_MAX; \
+    for (j = 1; j <= nB; j++) { \
+        M_row[j] = -DBL_MAX; \
+        Ix_row[j] = -DBL_MAX; \
+        Iy_row[j] = left_gap_open_A + left_gap_extend_A * (j-1); \
+    } \
+\
+    for (i = 1; i < nA; i++) { \
+        M_temp = M_row[0]; \
+        Ix_temp = Ix_row[0]; \
+        Iy_temp = Iy_row[0]; \
+        M_row[0] = -DBL_MAX; \
+        Ix_row[0] = left_gap_open_B + left_gap_extend_B * (i-1); \
+        Iy_row[0] = -DBL_MAX; \
+        kA = sA[i-1]; \
+        for (j = 1; j < nB; j++) { \
+            kB = sB[j-1]; \
+            SELECT_SCORE_GLOBAL(M_temp, \
+                                Ix_temp, \
+                                Iy_temp); \
+            M_temp = M_row[j]; \
+            M_row[j] = score + (align_score); \
+            SELECT_SCORE_GLOBAL(M_temp + gap_open_B, \
+                                Ix_row[j] + gap_extend_B, \
+                                Iy_row[j] + gap_open_B); \
+            Ix_temp = Ix_row[j]; \
+            Ix_row[j] = score; \
+            SELECT_SCORE_GLOBAL(M_row[j-1] + gap_open_A, \
+                                Ix_row[j-1] + gap_open_A, \
+                                Iy_row[j-1] + gap_extend_A); \
+            Iy_temp = Iy_row[j]; \
+            Iy_row[j] = score; \
+        } \
+        kB = sB[nB-1]; \
+        SELECT_SCORE_GLOBAL(M_temp, \
+                            Ix_temp, \
+                            Iy_temp); \
+        M_temp = M_row[nB]; \
+        M_row[nB] = score + (align_score); \
+        SELECT_SCORE_GLOBAL(M_temp + right_gap_open_B, \
+                            Ix_row[nB] + right_gap_extend_B, \
+                            Iy_row[nB] + right_gap_open_B); \
+        Ix_row[nB] = score; \
+        SELECT_SCORE_GLOBAL(M_row[nB-1] + gap_open_A, \
+                            Iy_row[nB-1] + gap_extend_A, \
+                            Ix_row[nB-1] + gap_open_A); \
+        Iy_row[nB] = score; \
+    } \
+\
+    M_temp = M_row[0]; \
+    Ix_temp = Ix_row[0]; \
+    Iy_temp = Iy_row[0]; \
+    M_row[0] = -DBL_MAX; \
+    Ix_row[0] = left_gap_open_B + left_gap_extend_B * (i-1); \
+    Iy_row[0] = -DBL_MAX; \
+    kA = sA[nA-1]; \
+    for (j = 1; j < nB; j++) { \
+        kB = sB[j-1]; \
+        SELECT_SCORE_GLOBAL(M_temp, \
+                            Ix_temp, \
+                            Iy_temp); \
+        M_temp = M_row[j]; \
+        M_row[j] = score + (align_score); \
+        SELECT_SCORE_GLOBAL(M_temp + gap_open_B, \
+                            Ix_row[j] + gap_extend_B, \
+                            Iy_row[j] + gap_open_B); \
+        Ix_temp = Ix_row[j]; \
+        Ix_row[j] = score; \
+        SELECT_SCORE_GLOBAL(M_row[j-1] + right_gap_open_A, \
+                            Iy_row[j-1] + right_gap_extend_A, \
+                            Ix_row[j-1] + right_gap_open_A); \
+        Iy_temp = Iy_row[j]; \
+        Iy_row[j] = score; \
+    } \
+\
+    kB = sB[nB-1]; \
+    SELECT_SCORE_GLOBAL(M_temp, \
+                        Ix_temp, \
+                        Iy_temp); \
+    M_temp = M_row[nB]; \
+    M_row[nB] = score + (align_score); \
+    SELECT_SCORE_GLOBAL(M_temp + right_gap_open_B, \
+                        Ix_row[nB] + right_gap_extend_B, \
+                        Iy_row[nB] + right_gap_open_B); \
+    Ix_temp = Ix_row[nB]; \
+    Ix_row[nB] = score; \
+    SELECT_SCORE_GLOBAL(M_row[nB-1] + right_gap_open_A, \
+                        Ix_row[nB-1] + right_gap_open_A, \
+                        Iy_row[nB-1] + right_gap_extend_A); \
+    Iy_temp = Iy_row[nB]; \
+    Iy_row[nB] = score; \
+\
+    SELECT_SCORE_GLOBAL(M_row[nB], Ix_row[nB], Iy_row[nB]); \
+    PyMem_Free(M_row); \
+    PyMem_Free(Ix_row); \
+    PyMem_Free(Iy_row); \
+    return PyFloat_FromDouble(score); \
+\
+exit: \
+    if (M_row) PyMem_Free(M_row); \
+    if (Ix_row) PyMem_Free(Ix_row); \
+    if (Iy_row) PyMem_Free(Iy_row); \
+    return PyErr_NoMemory(); \
+
+
+#define GOTOH_LOCAL_SCORE(align_score) \
+    int i; \
+    int j; \
+    int kA; \
+    int kB; \
+    const double gap_open_A = self->target_internal_open_gap_score; \
+    const double gap_open_B = self->query_internal_open_gap_score; \
+    const double gap_extend_A = self->target_internal_extend_gap_score; \
+    const double gap_extend_B = self->query_internal_extend_gap_score; \
+    double* M_row = NULL; \
+    double* Ix_row = NULL; \
+    double* Iy_row = NULL; \
+    double score; \
+    double temp; \
+    double M_temp; \
+    double Ix_temp; \
+    double Iy_temp; \
+    double maximum = 0.0; \
+\
+    /* Gotoh algorithm with three states */ \
+    M_row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!M_row) goto exit; \
+    Ix_row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!Ix_row) goto exit; \
+    Iy_row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!Iy_row) goto exit; \
+ \
+    /* The top row of the score matrix is a special case, \
+     * as there are no previously aligned characters. \
+     */ \
+    M_row[0] = 0; \
+    Ix_row[0] = -DBL_MAX; \
+    Iy_row[0] = -DBL_MAX; \
+    for (j = 1; j <= nB; j++) { \
+        M_row[j] = -DBL_MAX; \
+        Ix_row[j] = -DBL_MAX; \
+        Iy_row[j] = 0; \
+    } \
+    for (i = 1; i < nA; i++) { \
+        M_temp = M_row[0]; \
+        Ix_temp = Ix_row[0]; \
+        Iy_temp = Iy_row[0]; \
+        M_row[0] = -DBL_MAX; \
+        Ix_row[0] = 0; \
+        Iy_row[0] = -DBL_MAX; \
+        kA = sA[i-1]; \
+        for (j = 1; j < nB; j++) { \
+            kB = sB[j-1]; \
+            SELECT_SCORE_GOTOH_LOCAL_ALIGN(M_temp, \
+                                           Ix_temp, \
+                                           Iy_temp, \
+                                           (align_score)); \
+            M_temp = M_row[j]; \
+            M_row[j] = score; \
+            SELECT_SCORE_LOCAL3(M_temp + gap_open_B, \
+                                Ix_row[j] + gap_extend_B, \
+                                Iy_row[j] + gap_open_B); \
+            Ix_temp = Ix_row[j]; \
+            Ix_row[j] = score; \
+            SELECT_SCORE_LOCAL3(M_row[j-1] + gap_open_A, \
+                                Ix_row[j-1] + gap_open_A, \
+                                Iy_row[j-1] + gap_extend_A); \
+            Iy_temp = Iy_row[j]; \
+            Iy_row[j] = score; \
+        } \
+        kB = sB[nB-1]; \
+        Ix_row[nB] = 0; \
+        Iy_row[nB] = 0; \
+        SELECT_SCORE_GOTOH_LOCAL_ALIGN(M_temp, \
+                                       Ix_temp, \
+                                       Iy_temp, \
+                                       (align_score)); \
+        M_temp = M_row[nB]; \
+        M_row[nB] = score; \
+    } \
+    M_temp = M_row[0]; \
+    Ix_temp = Ix_row[0]; \
+    Iy_temp = Iy_row[0]; \
+    M_row[0] = -DBL_MAX; \
+    Ix_row[0] = 0; \
+    Iy_row[0] = -DBL_MAX; \
+    kA = sA[nA-1]; \
+    for (j = 1; j < nB; j++) { \
+        kB = sB[j-1]; \
+        SELECT_SCORE_GOTOH_LOCAL_ALIGN(M_temp, \
+                                       Ix_temp, \
+                                       Iy_temp, \
+                                       (align_score)); \
+        M_temp = M_row[j]; \
+        M_row[j] = score; \
+        Ix_temp = Ix_row[j]; \
+        Iy_temp = Iy_row[j]; \
+        Ix_row[j] = 0; \
+        Iy_row[j] = 0; \
+    } \
+    kB = sB[nB-1]; \
+    SELECT_SCORE_GOTOH_LOCAL_ALIGN(M_temp, \
+                                   Ix_temp, \
+                                   Iy_temp, \
+                                   (align_score)); \
+    PyMem_Free(M_row); \
+    PyMem_Free(Ix_row); \
+    PyMem_Free(Iy_row); \
+    return PyFloat_FromDouble(maximum); \
+exit: \
+    if (M_row) PyMem_Free(M_row); \
+    if (Ix_row) PyMem_Free(Ix_row); \
+    if (Iy_row) PyMem_Free(Iy_row); \
+    return PyErr_NoMemory(); \
+
+
+#define GOTOH_GLOBAL_ALIGN(align_score) \
+    int i; \
+    int j; \
+    int kA; \
+    int kB; \
+    const double gap_open_A = self->target_internal_open_gap_score; \
+    const double gap_open_B = self->query_internal_open_gap_score; \
+    const double gap_extend_A = self->target_internal_extend_gap_score; \
+    const double gap_extend_B = self->query_internal_extend_gap_score; \
+    double left_gap_open_A; \
+    double left_gap_open_B; \
+    double left_gap_extend_A; \
+    double left_gap_extend_B; \
+    double right_gap_open_A; \
+    double right_gap_open_B; \
+    double right_gap_extend_A; \
+    double right_gap_extend_B; \
+    const double epsilon = self->epsilon; \
+    TraceGapsGotoh** gaps = NULL; \
+    Trace** M = NULL; \
+    double* M_row = NULL; \
+    double* Ix_row = NULL; \
+    double* Iy_row = NULL; \
+    double score; \
+    int trace; \
+    double temp; \
+    double M_temp; \
+    double Ix_temp; \
+    double Iy_temp; \
+    PathGenerator* paths; \
+    switch (strand) { \
+        case '+': \
+            left_gap_open_A = self->target_left_open_gap_score; \
+            left_gap_open_B = self->query_left_open_gap_score; \
+            left_gap_extend_A = self->target_left_extend_gap_score; \
+            left_gap_extend_B = self->query_left_extend_gap_score; \
+            right_gap_open_A = self->target_right_open_gap_score; \
+            right_gap_open_B = self->query_right_open_gap_score; \
+            right_gap_extend_A = self->target_right_extend_gap_score; \
+            right_gap_extend_B = self->query_right_extend_gap_score; \
+            break; \
+        case '-': \
+            left_gap_open_A = self->target_right_open_gap_score; \
+            left_gap_open_B = self->query_right_open_gap_score; \
+            left_gap_extend_A = self->target_right_extend_gap_score; \
+            left_gap_extend_B = self->query_right_extend_gap_score; \
+            right_gap_open_A = self->target_left_open_gap_score; \
+            right_gap_open_B = self->query_left_open_gap_score; \
+            right_gap_extend_A = self->target_left_extend_gap_score; \
+            right_gap_extend_B = self->query_left_extend_gap_score; \
+            break; \
+        default: \
+            PyErr_SetString(PyExc_RuntimeError, "strand was neither '+' nor '-'"); \
+            return NULL; \
+    } \
+\
+    /* Gotoh algorithm with three states */ \
+    paths = PathGenerator_create_Gotoh(nA, nB, Global, strand); \
+    if (!paths) return NULL; \
+    M_row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!M_row) goto exit; \
+    Ix_row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!Ix_row) goto exit; \
+    Iy_row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!Iy_row) goto exit; \
+    M = paths->M; \
+    gaps = paths->gaps.gotoh; \
+ \
+    /* Gotoh algorithm with three states */ \
+    M_row[0] = 0; \
+    Ix_row[0] = -DBL_MAX; \
+    Iy_row[0] = -DBL_MAX; \
+    for (j = 1; j <= nB; j++) { \
+        M_row[j] = -DBL_MAX; \
+        Ix_row[j] = -DBL_MAX; \
+        Iy_row[j] = left_gap_open_A + left_gap_extend_A * (j-1); \
+    } \
+    for (i = 1; i < nA; i++) { \
+        kA = sA[i-1]; \
+        M_temp = M_row[0]; \
+        Ix_temp = Ix_row[0]; \
+        Iy_temp = Iy_row[0]; \
+        M_row[0] = -DBL_MAX; \
+        Ix_row[0] = left_gap_open_B + left_gap_extend_B * (i-1); \
+        Iy_row[0] = -DBL_MAX; \
+        for (j = 1; j < nB; j++) { \
+            kB = sB[j-1]; \
+            SELECT_TRACE_GOTOH_GLOBAL_ALIGN; \
+            M_temp = M_row[j]; \
+            M_row[j] = score + (align_score); \
+            SELECT_TRACE_GOTOH_GLOBAL_GAP(Ix, \
+                                          M_temp + gap_open_B, \
+                                          Ix_row[j] + gap_extend_B, \
+                                          Iy_row[j] + gap_open_B); \
+            Ix_temp = Ix_row[j]; \
+            Ix_row[j] = score; \
+            SELECT_TRACE_GOTOH_GLOBAL_GAP(Iy, \
+                                          M_row[j-1] + gap_open_A, \
+                                          Ix_row[j-1] + gap_open_A, \
+                                          Iy_row[j-1] + gap_extend_A); \
+            Iy_temp = Iy_row[j]; \
+            Iy_row[j] = score; \
+        } \
+        kB = sB[nB-1]; \
+        SELECT_TRACE_GOTOH_GLOBAL_ALIGN; \
+        M_temp = M_row[nB]; \
+        M_row[nB] = score + (align_score); \
+        SELECT_TRACE_GOTOH_GLOBAL_GAP(Ix, \
+                                      M_temp + right_gap_open_B, \
+                                      Ix_row[nB] + right_gap_extend_B, \
+                                      Iy_row[nB] + right_gap_open_B); \
+        Ix_temp = Ix_row[nB]; \
+        Ix_row[nB] = score; \
+        SELECT_TRACE_GOTOH_GLOBAL_GAP(Iy, \
+                                      M_row[nB-1] + gap_open_A, \
+                                      Ix_row[nB-1] + gap_open_A, \
+                                      Iy_row[nB-1] + gap_extend_A); \
+        Iy_temp = Iy_row[nB]; \
+        Iy_row[nB] = score; \
+    } \
+    kA = sA[nA-1]; \
+    M_temp = M_row[0]; \
+    Ix_temp = Ix_row[0]; \
+    Iy_temp = Iy_row[0]; \
+    M_row[0] = -DBL_MAX; \
+    Ix_row[0] = left_gap_open_B + left_gap_extend_B * (nA-1); \
+    Iy_row[0] = -DBL_MAX; \
+    for (j = 1; j < nB; j++) { \
+        kB = sB[j-1]; \
+        SELECT_TRACE_GOTOH_GLOBAL_ALIGN; \
+        M_temp = M_row[j]; \
+        M_row[j] = score + (align_score); \
+        SELECT_TRACE_GOTOH_GLOBAL_GAP(Ix, \
+                                      M_temp + gap_open_B, \
+                                      Ix_row[j] + gap_extend_B, \
+                                      Iy_row[j] + gap_open_B); \
+        Ix_temp = Ix_row[j]; \
+        Ix_row[j] = score; \
+        SELECT_TRACE_GOTOH_GLOBAL_GAP(Iy, \
+                                      M_row[j-1] + right_gap_open_A, \
+                                      Ix_row[j-1] + right_gap_open_A, \
+                                      Iy_row[j-1] + right_gap_extend_A); \
+        Iy_temp = Iy_row[j]; \
+        Iy_row[j] = score; \
+    } \
+    kB = sB[nB-1]; \
+    SELECT_TRACE_GOTOH_GLOBAL_ALIGN; \
+    M_temp = M_row[j]; \
+    M_row[j] = score + (align_score); \
+    SELECT_TRACE_GOTOH_GLOBAL_GAP(Ix, \
+                                  M_temp + right_gap_open_B, \
+                                  Ix_row[j] + right_gap_extend_B, \
+                                  Iy_row[j] + right_gap_open_B); \
+    Ix_row[nB] = score; \
+    SELECT_TRACE_GOTOH_GLOBAL_GAP(Iy, \
+                                  M_row[j-1] + right_gap_open_A, \
+                                  Ix_row[j-1] + right_gap_open_A, \
+                                  Iy_row[j-1] + right_gap_extend_A); \
+    Iy_row[nB] = score; \
+    M[nA][nB].path = 0; \
+ \
+    /* traceback */ \
+    SELECT_SCORE_GLOBAL(M_row[nB], Ix_row[nB], Iy_row[nB]); \
+    if (M_row[nB] < score - epsilon) M[nA][nB].trace = 0; \
+    if (Ix_row[nB] < score - epsilon) gaps[nA][nB].Ix = 0; \
+    if (Iy_row[nB] < score - epsilon) gaps[nA][nB].Iy = 0; \
+    return Py_BuildValue("fN", score, paths); \
+exit: \
+    Py_DECREF(paths); \
+    if (M_row) PyMem_Free(M_row); \
+    if (Ix_row) PyMem_Free(Ix_row); \
+    if (Iy_row) PyMem_Free(Iy_row); \
+    return PyErr_NoMemory(); \
+
+
+#define GOTOH_LOCAL_ALIGN(align_score) \
+    int i; \
+    int j; \
+    int im = nA; \
+    int jm = nB; \
+    int kA; \
+    int kB; \
+    const double gap_open_A = self->target_internal_open_gap_score; \
+    const double gap_open_B = self->query_internal_open_gap_score; \
+    const double gap_extend_A = self->target_internal_extend_gap_score; \
+    const double gap_extend_B = self->query_internal_extend_gap_score; \
+    const double epsilon = self->epsilon; \
+    Trace** M = NULL; \
+    TraceGapsGotoh** gaps = NULL; \
+    double* M_row = NULL; \
+    double* Ix_row = NULL; \
+    double* Iy_row = NULL; \
+    double score; \
+    int trace; \
+    double temp; \
+    double M_temp; \
+    double Ix_temp; \
+    double Iy_temp; \
+    double maximum = 0.0; \
+    PathGenerator* paths; \
+ \
+    /* Gotoh algorithm with three states */ \
+    paths = PathGenerator_create_Gotoh(nA, nB, Local, strand); \
+    if (!paths) return NULL; \
+    M = paths->M; \
+    gaps = paths->gaps.gotoh; \
+    M_row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!M_row) goto exit; \
+    Ix_row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!Ix_row) goto exit; \
+    Iy_row = PyMem_Malloc((nB+1)*sizeof(double)); \
+    if (!Iy_row) goto exit; \
+    M_row[0] = 0; \
+    Ix_row[0] = -DBL_MAX; \
+    Iy_row[0] = -DBL_MAX; \
+    for (j = 1; j <= nB; j++) { \
+        M_row[j] = 0; \
+        Ix_row[j] = -DBL_MAX; \
+        Iy_row[j] = -DBL_MAX; \
+    } \
+    for (i = 1; i < nA; i++) { \
+        M_temp = M_row[0]; \
+        Ix_temp = Ix_row[0]; \
+        Iy_temp = Iy_row[0]; \
+        M_row[0] = 0; \
+        Ix_row[0] = -DBL_MAX; \
+        Iy_row[0] = -DBL_MAX; \
+        kA = sA[i-1]; \
+        for (j = 1; j < nB; j++) { \
+            kB = sB[j-1]; \
+            SELECT_TRACE_GOTOH_LOCAL_ALIGN(align_score) \
+            M_temp = M_row[j]; \
+            M_row[j] = score; \
+            SELECT_TRACE_GOTOH_LOCAL_GAP(Ix, \
+                                     M_temp + gap_open_B, \
+                                     Ix_row[j] + gap_extend_B, \
+                                     Iy_row[j] + gap_open_B); \
+            Ix_temp = Ix_row[j]; \
+            Ix_row[j] = score; \
+            SELECT_TRACE_GOTOH_LOCAL_GAP(Iy, \
+                                     M_row[j-1] + gap_open_A, \
+                                     Ix_row[j-1] + gap_open_A, \
+                                     Iy_row[j-1] + gap_extend_A); \
+            Iy_temp = Iy_row[j]; \
+            Iy_row[j] = score; \
+        } \
+        kB = sB[nB-1]; \
+        SELECT_TRACE_GOTOH_LOCAL_ALIGN(align_score) \
+        M_temp = M_row[j]; \
+        M_row[j] = score; \
+        Ix_temp = Ix_row[nB]; \
+        Ix_row[nB] = 0; \
+        gaps[i][nB].Ix = 0; \
+        Iy_temp = Iy_row[nB]; \
+        Iy_row[nB] = 0; \
+        gaps[i][nB].Iy = 0; \
+    } \
+    M_temp = M_row[0]; \
+    M_row[0] = 0; \
+    M[nA][0].trace = 0; \
+    Ix_temp = Ix_row[0]; \
+    Ix_row[0] = -DBL_MAX; \
+    gaps[nA][0].Ix = 0; \
+    gaps[nA][0].Iy = 0; \
+    Iy_temp = Iy_row[0]; \
+    Iy_row[0] = -DBL_MAX; \
+    kA = sA[nA-1]; \
+    for (j = 1; j < nB; j++) { \
+        kB = sB[j-1]; \
+        SELECT_TRACE_GOTOH_LOCAL_ALIGN(align_score) \
+        M_temp = M_row[j]; \
+        M_row[j] = score; \
+        Ix_temp = Ix_row[j]; \
+        Ix_row[j] = 0; \
+        gaps[nA][j].Ix = 0; \
+        Iy_temp = Iy_row[j]; \
+        Iy_row[j] = 0; \
+        gaps[nA][j].Iy = 0; \
+    } \
+    kB = sB[nB-1]; \
+    SELECT_TRACE_GOTOH_LOCAL_ALIGN(align_score) \
+    gaps[nA][nB].Ix = 0; \
+    gaps[nA][nB].Iy = 0; \
+\
+    PyMem_Free(M_row); \
+    PyMem_Free(Ix_row); \
+    PyMem_Free(Iy_row); \
+\
+    /* As we don't allow zero-score extensions to alignments, \
+     * we need to remove all traces towards an ENDPOINT. \
+     * In addition, some points then won't have any path to a STARTPOINT. \
+     * Here, use path as a temporary variable to indicate if the point \
+     * is reachable from a STARTPOINT. If it is unreachable, remove all \
+     * traces from it, and don't allow it to be an ENDPOINT. It may still \
+     * be a valid STARTPOINT. */ \
+    for (j = 0; j <= nB; j++) M[0][j].path = M_MATRIX; \
+    for (i = 1; i <= nA; i++) { \
+        M[i][0].path = M_MATRIX; \
+        for (j = 1; j <= nB; j++) { \
+            /* Remove traces to unreachable points. */ \
+            trace = M[i][j].trace; \
+            if (!(M[i-1][j-1].path & M_MATRIX)) trace &= ~M_MATRIX; \
+            if (!(M[i-1][j-1].path & Ix_MATRIX)) trace &= ~Ix_MATRIX; \
+            if (!(M[i-1][j-1].path & Iy_MATRIX)) trace &= ~Iy_MATRIX; \
+            if (trace & (STARTPOINT | M_MATRIX | Ix_MATRIX | Iy_MATRIX)) { \
+                /* The point is reachable. */ \
+                if (trace & ENDPOINT) M[i][j].path = 0; /* no extensions after ENDPOINT */ \
+                else M[i][j].path |= M_MATRIX; \
+            } \
+            else { \
+                /* The point is not reachable. Then it is not a STARTPOINT, \
+                 * all traces from it can be removed, and it cannot act as \
+                 * an ENDPOINT. */ \
+                M[i][j].path &= ~M_MATRIX; \
+                trace = 0; \
+            } \
+            M[i][j].trace = trace; \
+            trace = gaps[i][j].Ix; \
+            if (!(M[i-1][j].path & M_MATRIX)) trace &= ~M_MATRIX; \
+            if (!(M[i-1][j].path & Ix_MATRIX)) trace &= ~Ix_MATRIX; \
+            if (!(M[i-1][j].path & Iy_MATRIX)) trace &= ~Iy_MATRIX; \
+            if (trace & (M_MATRIX | Ix_MATRIX | Iy_MATRIX)) { \
+                /* The point is reachable. */ \
+                M[i][j].path |= Ix_MATRIX; \
+            } \
+            else { \
+                /* The point is not reachable. Then \
+                 * all traces from it can be removed. */ \
+                M[i][j].path &= ~Ix_MATRIX; \
+                trace = 0; \
+            } \
+            gaps[i][j].Ix = trace; \
+            trace = gaps[i][j].Iy; \
+            if (!(M[i][j-1].path & M_MATRIX)) trace &= ~M_MATRIX; \
+            if (!(M[i][j-1].path & Ix_MATRIX)) trace &= ~Ix_MATRIX; \
+            if (!(M[i][j-1].path & Iy_MATRIX)) trace &= ~Iy_MATRIX; \
+            if (trace & (M_MATRIX | Ix_MATRIX | Iy_MATRIX)) { \
+                /* The point is reachable. */ \
+                M[i][j].path |= Iy_MATRIX; \
+            } \
+            else { \
+                /* The point is not reachable. Then \
+                 * all traces from it can be removed. */ \
+                M[i][j].path &= ~Iy_MATRIX; \
+                trace = 0; \
+            } \
+            gaps[i][j].Iy = trace; \
+        } \
+    } \
+\
+    /* traceback */ \
+    if (maximum == 0) M[0][0].path = DONE; \
+    else M[0][0].path = 0; \
+    return Py_BuildValue("fN", maximum, paths); \
+\
+exit: \
+    Py_DECREF(paths); \
+    if (M_row) PyMem_Free(M_row); \
+    if (Ix_row) PyMem_Free(Ix_row); \
+    if (Iy_row) PyMem_Free(Iy_row); \
+    return PyErr_NoMemory(); \
+
+
+#define WATERMANSMITHBEYER_ENTER_SCORE \
+    int i; \
+    int j = 0; \
+    int k; \
+    int kA; \
+    int kB; \
+    double** M = NULL; \
+    double** Ix = NULL; \
+    double** Iy = NULL; \
+    double score = 0.0; \
+    double gapscore = 0.0; \
+    double temp; \
+    int ok = 1; \
+    PyObject* result = NULL; \
+\
+    /* Waterman-Smith-Beyer algorithm */ \
+    M = PyMem_Malloc((nA+1)*sizeof(double*)); \
+    if (!M) goto exit; \
+    Ix = PyMem_Malloc((nA+1)*sizeof(double*)); \
+    if (!Ix) goto exit; \
+    Iy = PyMem_Malloc((nA+1)*sizeof(double*)); \
+    if (!Iy) goto exit; \
+    for (i = 0; i <= nA; i++) { \
+        M[i] = PyMem_Malloc((nB+1)*sizeof(double)); \
+        if (!M[i]) goto exit; \
+        Ix[i] = PyMem_Malloc((nB+1)*sizeof(double)); \
+        if (!Ix[i]) goto exit; \
+        Iy[i] = PyMem_Malloc((nB+1)*sizeof(double)); \
+        if (!Iy[i]) goto exit; \
+    } \
+
+
+#define WATERMANSMITHBEYER_GLOBAL_SCORE(align_score, query_gap_start) \
+    /* The top row of the score matrix is a special case, \
+     *  as there are no previously aligned characters. \
+     */ \
+    M[0][0] = 0; \
+    Ix[0][0] = -DBL_MAX; \
+    Iy[0][0] = -DBL_MAX; \
+    for (i = 1; i <= nA; i++) { \
+        M[i][0] = -DBL_MAX; \
+        Iy[i][0] = -DBL_MAX; \
+        ok = _call_query_gap_function(self, query_gap_start, i, &score); \
+        if (!ok) goto exit; \
+        Ix[i][0] = score; \
+    } \
+    for (j = 1; j <= nB; j++) { \
+        M[0][j] = -DBL_MAX; \
+        Ix[0][j] = -DBL_MAX; \
+        ok = _call_target_gap_function(self, 0, j, &score); \
+        if (!ok) goto exit; \
+        Iy[0][j] = score; \
+    } \
+    for (i = 1; i <= nA; i++) { \
+        kA = sA[i-1]; \
+        for (j = 1; j <= nB; j++) { \
+            kB = sB[j-1]; \
+            SELECT_SCORE_GLOBAL(M[i-1][j-1], Ix[i-1][j-1], Iy[i-1][j-1]); \
+            M[i][j] = score + (align_score); \
+            score = -DBL_MAX; \
+            for (k = 1; k <= i; k++) { \
+                ok = _call_query_gap_function(self, query_gap_start, k, &gapscore); \
+                if (!ok) goto exit; \
+                SELECT_SCORE_WATERMAN_SMITH_BEYER(M[i-k][j], Iy[i-k][j]); \
+            } \
+            Ix[i][j] = score; \
+            score = -DBL_MAX; \
+            for (k = 1; k <= j; k++) { \
+                ok = _call_target_gap_function(self, i, k, &gapscore); \
+                if (!ok) goto exit; \
+                SELECT_SCORE_WATERMAN_SMITH_BEYER(M[i][j-k], Ix[i][j-k]); \
+            } \
+            Iy[i][j] = score; \
+        } \
+    } \
+    SELECT_SCORE_GLOBAL(M[nA][nB], Ix[nA][nB], Iy[nA][nB]); \
+\
+    result = PyFloat_FromDouble(score); \
+
+
+#define WATERMANSMITHBEYER_LOCAL_SCORE(align_score, query_gap_start) \
+    /* The top row of the score matrix is a special case, \
+     *  as there are no previously aligned characters. \
+     */ \
+    M[0][0] = 0; \
+    Ix[0][0] = -DBL_MAX; \
+    Iy[0][0] = -DBL_MAX; \
+    for (i = 1; i <= nA; i++) { \
+        M[i][0] = -DBL_MAX; \
+        Ix[i][0] = 0; \
+        Iy[i][0] = -DBL_MAX; \
+    } \
+    for (j = 1; j <= nB; j++) { \
+        M[0][j] = -DBL_MAX; \
+        Ix[0][j] = -DBL_MAX; \
+        Iy[0][j] = 0; \
+    } \
+    for (i = 1; i <= nA; i++) { \
+        kA = sA[i-1]; \
+        for (j = 1; j <= nB; j++) { \
+            kB = sB[j-1]; \
+            SELECT_SCORE_GOTOH_LOCAL_ALIGN(M[i-1][j-1], \
+                                           Ix[i-1][j-1], \
+                                           Iy[i-1][j-1], \
+                                           (align_score)); \
+            M[i][j] = score; \
+            if (i == nA || j == nB) { \
+                Ix[i][j] = 0; \
+                Iy[i][j] = 0; \
+                continue; \
+            } \
+            score = 0.0; \
+            for (k = 1; k <= i; k++) { \
+                ok = _call_query_gap_function(self, query_gap_start, k, &gapscore); \
+                SELECT_SCORE_WATERMAN_SMITH_BEYER(M[i-k][j], Iy[i-k][j]); \
+                if (!ok) goto exit; \
+            } \
+            if (score > maximum) maximum = score; \
+            Ix[i][j] = score; \
+            score = 0.0; \
+            for (k = 1; k <= j; k++) { \
+                ok = _call_target_gap_function(self, i, k, &gapscore); \
+                if (!ok) goto exit; \
+                SELECT_SCORE_WATERMAN_SMITH_BEYER(M[i][j-k], Ix[i][j-k]); \
+            } \
+            if (score > maximum) maximum = score; \
+            Iy[i][j] = score; \
+        } \
+    } \
+    SELECT_SCORE_GLOBAL(M[nA][nB], Ix[nA][nB], Iy[nA][nB]); \
+    if (score > maximum) maximum = score; \
+    result = PyFloat_FromDouble(maximum); \
+
+
+#define WATERMANSMITHBEYER_EXIT_SCORE \
+exit: \
+    if (M) { \
+        /* If M is NULL, then Ix is also NULL. */ \
+        if (Ix) { \
+            /* If Ix is NULL, then Iy is also NULL. */ \
+            if (Iy) { \
+                /* If Iy is NULL, then M[i], Ix[i], and Iy[i] are \
+                 * also NULL. */ \
+                for (i = 0; i <= nA; i++) { \
+                    if (!M[i]) break; \
+                    PyMem_Free(M[i]); \
+                    if (!Ix[i]) break; \
+                    PyMem_Free(Ix[i]); \
+                    if (!Iy[i]) break; \
+                    PyMem_Free(Iy[i]); \
+                } \
+                PyMem_Free(Iy); \
+            } \
+            PyMem_Free(Ix); \
+        } \
+        PyMem_Free(M); \
+    } \
+    if (!ok) return NULL; \
+    if (!result) return PyErr_NoMemory(); \
+    return result; \
+
+
+#define WATERMANSMITHBEYER_ENTER_ALIGN(mode) \
+    int i; \
+    int j = 0; \
+    int gap; \
+    int kA; \
+    int kB; \
+    const double epsilon = self->epsilon; \
+    Trace** M; \
+    TraceGapsWatermanSmithBeyer** gaps; \
+    double** M_row; \
+    double** Ix_row; \
+    double** Iy_row; \
+    int ng; \
+    int nm; \
+    double score; \
+    double gapscore; \
+    double temp; \
+    int trace; \
+    int* gapM; \
+    int* gapXY; \
+    int ok = 1; \
+    PathGenerator* paths = NULL; \
+ \
+    /* Waterman-Smith-Beyer algorithm */ \
+    paths = PathGenerator_create_WSB(nA, nB, mode, strand); \
+    if (!paths) return NULL; \
+    M = paths->M; \
+    gaps = paths->gaps.waterman_smith_beyer; \
+    M_row = PyMem_Malloc((nA+1)*sizeof(double*)); \
+    if (!M_row) goto exit; \
+    Ix_row = PyMem_Malloc((nA+1)*sizeof(double*)); \
+    if (!Ix_row) goto exit; \
+    Iy_row = PyMem_Malloc((nA+1)*sizeof(double*)); \
+    if (!Iy_row) goto exit; \
+    for (i = 0; i <= nA; i++) { \
+        M_row[i] = PyMem_Malloc((nB+1)*sizeof(double)); \
+        if (!M_row[i]) goto exit; \
+        Ix_row[i] = PyMem_Malloc((nB+1)*sizeof(double)); \
+        if (!Ix_row[i]) goto exit; \
+        Iy_row[i] = PyMem_Malloc((nB+1)*sizeof(double)); \
+        if (!Iy_row[i]) goto exit; \
+    } \
+
+
+#define WATERMANSMITHBEYER_GLOBAL_ALIGN(align_score, query_gap_start) \
+    M_row[0][0] = 0; \
+    Ix_row[0][0] = -DBL_MAX; \
+    Iy_row[0][0] = -DBL_MAX; \
+    for (i = 1; i <= nA; i++) { \
+        M_row[i][0] = -DBL_MAX; \
+        Iy_row[i][0] = -DBL_MAX; \
+        ok = _call_query_gap_function(self, query_gap_start, i, &score); \
+        if (!ok) goto exit; \
+        Ix_row[i][0] = score; \
+    } \
+    for (j = 1; j <= nB; j++) { \
+        M_row[0][j] = -DBL_MAX; \
+        Ix_row[0][j] = -DBL_MAX; \
+        ok = _call_target_gap_function(self, query_gap_start, j, &score); \
+        if (!ok) goto exit; \
+        Iy_row[0][j] = score; \
+    } \
+    for (i = 1; i <= nA; i++) { \
+        kA = sA[i-1]; \
+        for (j = 1; j <= nB; j++) { \
+            kB = sB[j-1]; \
+            SELECT_TRACE_WATERMAN_SMITH_BEYER_GLOBAL_ALIGN((align_score)); \
+            gapM = PyMem_Malloc((i+1)*sizeof(int)); \
+            if (!gapM) goto exit; \
+            gaps[i][j].MIx = gapM; \
+            gapXY = PyMem_Malloc((i+1)*sizeof(int)); \
+            if (!gapXY) goto exit; \
+            gaps[i][j].IyIx = gapXY; \
+            nm = 0; \
+            ng = 0; \
+            score = -DBL_MAX; \
+            for (gap = 1; gap <= i; gap++) { \
+                ok = _call_query_gap_function(self, query_gap_start, gap, &gapscore); \
+                if (!ok) goto exit; \
+                SELECT_TRACE_WATERMAN_SMITH_BEYER_GAP(M_row[i-gap][j], \
+                                                      Iy_row[i-gap][j]); \
+            } \
+            gapM = PyMem_Realloc(gapM, (nm+1)*sizeof(int)); \
+            if (!gapM) goto exit; \
+            gaps[i][j].MIx = gapM; \
+            gapM[nm] = 0; \
+            gapXY = PyMem_Realloc(gapXY, (ng+1)*sizeof(int)); \
+            if (!gapXY) goto exit; \
+            gapXY[ng] = 0; \
+            gaps[i][j].IyIx = gapXY; \
+            Ix_row[i][j] = score; \
+            gapM = PyMem_Malloc((j+1)*sizeof(int)); \
+            if (!gapM) goto exit; \
+            gaps[i][j].MIy = gapM; \
+            gapXY = PyMem_Malloc((j+1)*sizeof(int)); \
+            if (!gapXY) goto exit; \
+            gaps[i][j].IxIy = gapXY; \
+            nm = 0; \
+            ng = 0; \
+            score = -DBL_MAX; \
+            for (gap = 1; gap <= j; gap++) { \
+                ok = _call_target_gap_function(self, i, gap, &gapscore); \
+                if (!ok) goto exit; \
+                SELECT_TRACE_WATERMAN_SMITH_BEYER_GAP(M_row[i][j-gap], \
+                                                      Ix_row[i][j-gap]); \
+            } \
+            Iy_row[i][j] = score; \
+            gapM = PyMem_Realloc(gapM, (nm+1)*sizeof(int)); \
+            if (!gapM) goto exit; \
+            gaps[i][j].MIy = gapM; \
+            gapM[nm] = 0; \
+            gapXY = PyMem_Realloc(gapXY, (ng+1)*sizeof(int)); \
+            if (!gapXY) goto exit; \
+            gaps[i][j].IxIy = gapXY; \
+            gapXY[ng] = 0; \
+        } \
+    } \
+    /* traceback */ \
+    SELECT_SCORE_GLOBAL(M_row[nA][nB], Ix_row[nA][nB], Iy_row[nA][nB]); \
+    M[nA][nB].path = 0; \
+    if (M_row[nA][nB] < score - epsilon) M[nA][nB].trace = 0; \
+    if (Ix_row[nA][nB] < score - epsilon) { \
+        gapM = PyMem_Realloc(gaps[nA][nB].MIx, sizeof(int)); \
+        if (!gapM) goto exit; \
+        gapM[0] = 0; \
+        gaps[nA][nB].MIx = gapM; \
+        gapXY = PyMem_Realloc(gaps[nA][nB].IyIx, sizeof(int)); \
+        if (!gapXY) goto exit; \
+        gapXY[0] = 0; \
+        gaps[nA][nB].IyIx = gapXY; \
+    } \
+    if (Iy_row[nA][nB] < score - epsilon) { \
+        gapM = PyMem_Realloc(gaps[nA][nB].MIy, sizeof(int)); \
+        if (!gapM) goto exit; \
+        gapM[0] = 0; \
+        gaps[nA][nB].MIy = gapM; \
+        gapXY = PyMem_Realloc(gaps[nA][nB].IxIy, sizeof(int)); \
+        if (!gapXY) goto exit; \
+        gapXY[0] = 0; \
+        gaps[nA][nB].IxIy = gapXY; \
+    } \
+    for (i = 0; i <= nA; i++) { \
+        PyMem_Free(M_row[i]); \
+        PyMem_Free(Ix_row[i]); \
+        PyMem_Free(Iy_row[i]); \
+    } \
+    PyMem_Free(M_row); \
+    PyMem_Free(Ix_row); \
+    PyMem_Free(Iy_row); \
+    return Py_BuildValue("fN", score, paths); \
+
+
+#define WATERMANSMITHBEYER_LOCAL_ALIGN(align_score, query_gap_start) \
+    M_row[0][0] = 0; \
+    Ix_row[0][0] = -DBL_MAX; \
+    Iy_row[0][0] = -DBL_MAX; \
+    for (i = 1; i <= nA; i++) { \
+        M_row[i][0] = 0; \
+        Ix_row[i][0] = -DBL_MAX; \
+        Iy_row[i][0] = -DBL_MAX; \
+    } \
+    for (i = 1; i <= nB; i++) { \
+        M_row[0][i] = 0; \
+        Ix_row[0][i] = -DBL_MAX; \
+        Iy_row[0][i] = -DBL_MAX; \
+    } \
+    for (i = 1; i <= nA; i++) { \
+        kA = sA[i-1]; \
+        for (j = 1; j <= nB; j++) { \
+            kB = sB[j-1]; \
+            nm = 0; \
+            ng = 0; \
+            SELECT_TRACE_WATERMAN_SMITH_BEYER_ALIGN( \
+                                           M_row[i-1][j-1], \
+                                           Ix_row[i-1][j-1], \
+                                           Iy_row[i-1][j-1], \
+                                           (align_score)); \
+            M[i][j].path = 0; \
+            if (i == nA || j == nB) { \
+                Ix_row[i][j] = score; \
+                gaps[i][j].MIx = NULL; \
+                gaps[i][j].IyIx = NULL; \
+                gaps[i][j].MIy = NULL; \
+                gaps[i][j].IxIy = NULL; \
+                Iy_row[i][j] = score; \
+                continue; \
+            } \
+            gapM = PyMem_Malloc((i+1)*sizeof(int)); \
+            if (!gapM) goto exit; \
+            gaps[i][j].MIx = gapM; \
+            gapXY = PyMem_Malloc((i+1)*sizeof(int)); \
+            if (!gapXY) goto exit; \
+            gaps[i][j].IyIx = gapXY; \
+            score = -DBL_MAX; \
+            for (gap = 1; gap <= i; gap++) { \
+                ok = _call_query_gap_function(self, query_gap_start, gap, &gapscore); \
+                if (!ok) goto exit; \
+                SELECT_TRACE_WATERMAN_SMITH_BEYER_GAP(M_row[i-gap][j], \
+                                                      Iy_row[i-gap][j]); \
+            } \
+            if (score < epsilon) { \
+                score = -DBL_MAX; \
+                nm = 0; \
+                ng = 0; \
+            } \
+            else if (score > maximum) maximum = score; \
+            gapM[nm] = 0; \
+            gapXY[ng] = 0; \
+            Ix_row[i][j] = score; \
+            M[i][j].path = 0; \
+            gapM = PyMem_Realloc(gapM, (nm+1)*sizeof(int)); \
+            if (!gapM) goto exit; \
+            gaps[i][j].MIx = gapM; \
+            gapM[nm] = 0; \
+            gapXY = PyMem_Realloc(gapXY, (ng+1)*sizeof(int)); \
+            if (!gapXY) goto exit; \
+            gaps[i][j].IyIx = gapXY; \
+            gapXY[ng] = 0; \
+            gapM = PyMem_Malloc((j+1)*sizeof(int)); \
+            if (!gapM) goto exit; \
+            gaps[i][j].MIy = gapM; \
+            gapXY = PyMem_Malloc((j+1)*sizeof(int)); \
+            if (!gapXY) goto exit; \
+            gaps[i][j].IxIy = gapXY; \
+            nm = 0; \
+            ng = 0; \
+            score = -DBL_MAX; \
+            gapM[0] = 0; \
+            for (gap = 1; gap <= j; gap++) { \
+                ok = _call_target_gap_function(self, i, gap, &gapscore); \
+                if (!ok) goto exit; \
+                SELECT_TRACE_WATERMAN_SMITH_BEYER_GAP(M_row[i][j-gap], \
+                                                      Ix_row[i][j-gap]); \
+            } \
+            if (score < epsilon) { \
+                score = -DBL_MAX; \
+                nm = 0; \
+                ng = 0; \
+            } \
+            else if (score > maximum) maximum = score; \
+            gapM = PyMem_Realloc(gapM, (nm+1)*sizeof(int)); \
+            if (!gapM) goto exit; \
+            gaps[i][j].MIy = gapM; \
+            gapXY = PyMem_Realloc(gapXY, (ng+1)*sizeof(int)); \
+            if (!gapXY) goto exit; \
+            gaps[i][j].IxIy = gapXY; \
+            gapM[nm] = 0; \
+            gapXY[ng] = 0; \
+            Iy_row[i][j] = score; \
+            M[i][j].path = 0; \
+        } \
+    } \
+    for (i = 0; i <= nA; i++) PyMem_Free(M_row[i]); \
+    PyMem_Free(M_row); \
+    for (i = 0; i <= nA; i++) PyMem_Free(Ix_row[i]); \
+    PyMem_Free(Ix_row); \
+    for (i = 0; i <= nA; i++) PyMem_Free(Iy_row[i]); \
+    PyMem_Free(Iy_row); \
+\
+    /* As we don't allow zero-score extensions to alignments, \
+     * we need to remove all traces towards an ENDPOINT. \
+     * In addition, some points then won't have any path to a STARTPOINT. \
+     * Here, use path as a temporary variable to indicate if the point \
+     * is reachable from a STARTPOINT. If it is unreachable, remove all \
+     * traces from it, and don't allow it to be an ENDPOINT. It may still \
+     * be a valid STARTPOINT. */ \
+    for (j = 0; j <= nB; j++) M[0][j].path = M_MATRIX; \
+    for (i = 1; i <= nA; i++) { \
+        M[i][0].path = M_MATRIX; \
+        for (j = 1; j <= nB; j++) { \
+            /* Remove traces to unreachable points. */ \
+            trace = M[i][j].trace; \
+            if (!(M[i-1][j-1].path & M_MATRIX)) trace &= ~M_MATRIX; \
+            if (!(M[i-1][j-1].path & Ix_MATRIX)) trace &= ~Ix_MATRIX; \
+            if (!(M[i-1][j-1].path & Iy_MATRIX)) trace &= ~Iy_MATRIX; \
+            if (trace & (STARTPOINT | M_MATRIX | Ix_MATRIX | Iy_MATRIX)) { \
+                /* The point is reachable. */ \
+                if (trace & ENDPOINT) M[i][j].path = 0; /* no extensions after ENDPOINT */ \
+                else M[i][j].path |= M_MATRIX; \
+            } \
+            else { \
+                /* The point is not reachable. Then it is not a STARTPOINT, \
+                 * all traces from it can be removed, and it cannot act as \
+                 * an ENDPOINT. */ \
+                M[i][j].path &= ~M_MATRIX; \
+                trace = 0; \
+            } \
+            M[i][j].trace = trace; \
+            if (i == nA || j == nB) continue; \
+            gapM = gaps[i][j].MIx; \
+            gapXY = gaps[i][j].IyIx; \
+            nm = 0; \
+            ng = 0; \
+            for (im = 0; (gap = gapM[im]); im++) \
+                if (M[i-gap][j].path & M_MATRIX) gapM[nm++] = gap; \
+            gapM = PyMem_Realloc(gapM, (nm+1)*sizeof(int)); \
+            if (!gapM) goto exit; \
+            gapM[nm] = 0; \
+            gaps[i][j].MIx = gapM; \
+            for (im = 0; (gap = gapXY[im]); im++) \
+                if (M[i-gap][j].path & Iy_MATRIX) gapXY[ng++] = gap; \
+            gapXY = PyMem_Realloc(gapXY, (ng+1)*sizeof(int)); \
+            if (!gapXY) goto exit; \
+            gapXY[ng] = 0; \
+            gaps[i][j].IyIx = gapXY; \
+            if (nm==0 && ng==0) M[i][j].path &= ~Ix_MATRIX; /* not reachable */ \
+            else M[i][j].path |= Ix_MATRIX; /* reachable */ \
+            gapM = gaps[i][j].MIy; \
+            gapXY = gaps[i][j].IxIy; \
+            nm = 0; \
+            ng = 0; \
+            for (im = 0; (gap = gapM[im]); im++) \
+                if (M[i][j-gap].path & M_MATRIX) gapM[nm++] = gap; \
+            gapM = PyMem_Realloc(gapM, (nm+1)*sizeof(int)); \
+            if (!gapM) goto exit; \
+            gapM[nm] = 0; \
+            gaps[i][j].MIy = gapM; \
+            for (im = 0; (gap = gapXY[im]); im++) \
+                if (M[i][j-gap].path & Ix_MATRIX) gapXY[ng++] = gap; \
+            gapXY = PyMem_Realloc(gapXY, (ng+1)*sizeof(int)); \
+            if (!gapXY) goto exit; \
+            gapXY[ng] = 0; \
+            gaps[i][j].IxIy = gapXY; \
+            if (nm==0 && ng==0) M[i][j].path &= ~Iy_MATRIX; /* not reachable */ \
+            else M[i][j].path |= Iy_MATRIX; /* reachable */ \
+        } \
+    } \
+    /* traceback */ \
+    if (maximum == 0) M[0][0].path = DONE; \
+    else M[0][0].path = 0; \
+    return Py_BuildValue("fN", maximum, paths); \
+
+
+#define WATERMANSMITHBEYER_EXIT_ALIGN \
+exit: \
+    if (ok) /* otherwise, an exception was already set */ \
+        PyErr_SetNone(PyExc_MemoryError); \
+    Py_DECREF(paths); \
+    if (M_row) { \
+        /* If M is NULL, then Ix is also NULL. */ \
+        if (Ix_row) { \
+            /* If Ix is NULL, then Iy is also NULL. */ \
+            if (Iy_row) { \
+                /* If Iy is NULL, then M[i], Ix[i], and Iy[i] are also NULL. */ \
+                for (i = 0; i <= nA; i++) { \
+                    if (!M_row[i]) break; \
+                    PyMem_Free(M_row[i]); \
+                    if (!Ix_row[i]) break; \
+                    PyMem_Free(Ix_row[i]); \
+                    if (!Iy_row[i]) break; \
+                    PyMem_Free(Iy_row[i]); \
+                } \
+                PyMem_Free(Iy_row); \
+            } \
+            PyMem_Free(Ix_row); \
+        } \
+        PyMem_Free(M_row); \
+    } \
+    return NULL; \
+
+
+/* -------------- allocation & deallocation ------------- */
+
+static PathGenerator*
+PathGenerator_create_NWSW(Py_ssize_t nA, Py_ssize_t nB, Mode mode, unsigned char strand)
+{
+    int i;
+    unsigned char trace = 0;
+    Trace** M;
+    PathGenerator* paths;
+
+    paths = (PathGenerator*)PyType_GenericAlloc(&PathGenerator_Type, 0);
+    if (!paths) return NULL;
+
+    paths->iA = 0;
+    paths->iB = 0;
+    paths->nA = nA;
+    paths->nB = nB;
+    paths->M = NULL;
+    paths->gaps.gotoh = NULL;
+    paths->gaps.waterman_smith_beyer = NULL;
+    paths->algorithm = NeedlemanWunschSmithWaterman;
+    paths->mode = mode;
+    paths->length = 0;
+    paths->strand = strand;
+
+    M = PyMem_Malloc((nA+1)*sizeof(Trace*));
+    paths->M = M;
+    if (!M) goto exit;
+    switch (mode) {
+        case Global: trace = VERTICAL; break;
+        case Local: trace = STARTPOINT; break;
+    }
+    for (i = 0; i <= nA; i++) {
+        M[i] = PyMem_Malloc((nB+1)*sizeof(Trace));
+        if (!M[i]) goto exit;
+        M[i][0].trace = trace;
+    }
+    if (mode == Global) {
+        M[0][0].trace = 0;
+        trace = HORIZONTAL;
+    }
+    for (i = 1; i <= nB; i++) M[0][i].trace = trace;
+    M[0][0].path = 0;
+    return paths;
+exit:
+    Py_DECREF(paths);
+    PyErr_SetNone(PyExc_MemoryError);
+    return NULL;
+}
+
+static PathGenerator*
+PathGenerator_create_Gotoh(Py_ssize_t nA, Py_ssize_t nB, Mode mode, unsigned char strand)
+{
+    int i;
+    unsigned char trace;
+    Trace** M;
+    TraceGapsGotoh** gaps;
+    PathGenerator* paths;
+
+    switch (mode) {
+        case Global: trace = 0; break;
+        case Local: trace = STARTPOINT; break;
+        default:
+            /* Should not happen, but the compiler has no way of knowing that,
+             * as the enum Mode does not restrict the value of mode, which can
+             * be any integer. Include default: here to prevent compiler
+             * warnings.
+             */
+            PyErr_Format(PyExc_RuntimeError,
+                         "mode has unexpected value %d", mode);
+            return NULL;
+    }
+
+    paths = (PathGenerator*)PyType_GenericAlloc(&PathGenerator_Type, 0);
+    if (!paths) return NULL;
+
+    paths->iA = 0;
+    paths->iB = 0;
+    paths->nA = nA;
+    paths->nB = nB;
+    paths->M = NULL;
+    paths->gaps.gotoh = NULL;
+    paths->algorithm = Gotoh;
+    paths->mode = mode;
+    paths->length = 0;
+    paths->strand = strand;
+
+    M = PyMem_Malloc((nA+1)*sizeof(Trace*));
+    if (!M) goto exit;
+    paths->M = M;
+    for (i = 0; i <= nA; i++) {
+        M[i] = PyMem_Malloc((nB+1)*sizeof(Trace));
+        if (!M[i]) goto exit;
+        M[i][0].trace = trace;
+    }
+    gaps = PyMem_Malloc((nA+1)*sizeof(TraceGapsGotoh*));
+    if (!gaps) goto exit;
+    paths->gaps.gotoh = gaps;
+    for (i = 0; i <= nA; i++) {
+        gaps[i] = PyMem_Malloc((nB+1)*sizeof(TraceGapsGotoh));
+        if (!gaps[i]) goto exit;
+    }
+
+    gaps[0][0].Ix = 0;
+    gaps[0][0].Iy = 0;
+    if (mode == Global) {
+        for (i = 1; i <= nA; i++) {
+            gaps[i][0].Ix = Ix_MATRIX;
+            gaps[i][0].Iy = 0;
+        }
+        gaps[1][0].Ix = M_MATRIX;
+        for (i = 1; i <= nB; i++) {
+            M[0][i].trace = 0;
+            gaps[0][i].Ix = 0;
+            gaps[0][i].Iy = Iy_MATRIX;
+        }
+        gaps[0][1].Iy = M_MATRIX;
+    }
+    else if (mode == Local) {
+        for (i = 1; i < nA; i++) {
+            gaps[i][0].Ix = 0;
+            gaps[i][0].Iy = 0;
+        }
+        for (i = 1; i <= nB; i++) {
+            M[0][i].trace = trace;
+            gaps[0][i].Ix = 0;
+            gaps[0][i].Iy = 0;
+        }
+    }
+    M[0][0].path = 0;
+
+    return paths;
+exit:
+    Py_DECREF(paths);
+    PyErr_SetNone(PyExc_MemoryError);
+    return NULL;
+}
+
+static PathGenerator*
+PathGenerator_create_WSB(Py_ssize_t nA, Py_ssize_t nB, Mode mode, unsigned char strand)
+{
+    int i, j;
+    int* trace;
+    Trace** M = NULL;
+    TraceGapsWatermanSmithBeyer** gaps = NULL;
+    PathGenerator* paths;
+
+    paths = (PathGenerator*)PyType_GenericAlloc(&PathGenerator_Type, 0);
+    if (!paths) return NULL;
+
+    paths->iA = 0;
+    paths->iB = 0;
+    paths->nA = nA;
+    paths->nB = nB;
+    paths->M = NULL;
+    paths->gaps.waterman_smith_beyer = NULL;
+    paths->algorithm = WatermanSmithBeyer;
+    paths->mode = mode;
+    paths->length = 0;
+    paths->strand = strand;
+
+    M = PyMem_Malloc((nA+1)*sizeof(Trace*));
+    if (!M) goto exit;
+    paths->M = M;
+    for (i = 0; i <= nA; i++) {
+        M[i] = PyMem_Malloc((nB+1)*sizeof(Trace));
+        if (!M[i]) goto exit;
+    }
+    gaps = PyMem_Malloc((nA+1)*sizeof(TraceGapsWatermanSmithBeyer*));
+    if (!gaps) goto exit;
+    paths->gaps.waterman_smith_beyer = gaps;
+    for (i = 0; i <= nA; i++) gaps[i] = NULL;
+    for (i = 0; i <= nA; i++) {
+        gaps[i] = PyMem_Malloc((nB+1)*sizeof(TraceGapsWatermanSmithBeyer));
+        if (!gaps[i]) goto exit;
+        for (j = 0; j <= nB; j++) {
+            gaps[i][j].MIx = NULL;
+            gaps[i][j].IyIx = NULL;
+            gaps[i][j].MIy = NULL;
+            gaps[i][j].IxIy = NULL;
+        }
+        M[i][0].path = 0;
+        switch (mode) {
+            case Global:
+                M[i][0].trace = 0;
+                trace = PyMem_Malloc(2*sizeof(int));
+                if (!trace) goto exit;
+                gaps[i][0].MIx = trace;
+                trace[0] = i;
+                trace[1] = 0;
+                trace = PyMem_Malloc(sizeof(int));
+                if (!trace) goto exit;
+                gaps[i][0].IyIx = trace;
+                trace[0] = 0;
+                break;
+            case Local:
+                M[i][0].trace = STARTPOINT;
+                break;
+        }
+    }
+    for (i = 1; i <= nB; i++) {
+        switch (mode) {
+            case Global:
+                M[0][i].trace = 0;
+                trace = PyMem_Malloc(2*sizeof(int));
+                if (!trace) goto exit;
+                gaps[0][i].MIy = trace;
+                trace[0] = i;
+                trace[1] = 0;
+                trace = PyMem_Malloc(sizeof(int));
+                if (!trace) goto exit;
+                gaps[0][i].IxIy = trace;
+                trace[0] = 0;
+                break;
+            case Local:
+                M[0][i].trace = STARTPOINT;
+                break;
+        }
+    }
+    M[0][0].path = 0;
+    return paths;
+exit:
+    Py_DECREF(paths);
+    PyErr_SetNone(PyExc_MemoryError);
+    return NULL;
+}
+
+/* ----------------- alignment algorithms ----------------- */
+
+#define MATRIX_SCORE scores[kA*n+kB]
+#define COMPARE_SCORE (kA == wildcard || kB == wildcard) ? 0 : (kA == kB) ? match : mismatch
+
+
+static PyObject*
+Aligner_needlemanwunsch_score_compare(Aligner* self,
+                                      const int* sA, Py_ssize_t nA,
+                                      const int* sB, Py_ssize_t nB,
+                                      unsigned char strand)
+{
+    const double match = self->match;
+    const double mismatch = self->mismatch;
+    const int wildcard = self->wildcard;
+    NEEDLEMANWUNSCH_SCORE(COMPARE_SCORE);
+}
+
+static PyObject*
+Aligner_needlemanwunsch_score_matrix(Aligner* self,
+                                     const int* sA, Py_ssize_t nA,
+                                     const int* sB, Py_ssize_t nB,
+                                     unsigned char strand)
+{
+    const Py_ssize_t n = self->substitution_matrix.shape[0];
+    const double* scores = self->substitution_matrix.buf;
+    NEEDLEMANWUNSCH_SCORE(MATRIX_SCORE);
+}
+
+static PyObject*
+Aligner_smithwaterman_score_compare(Aligner* self,
+                                    const int* sA, Py_ssize_t nA,
+                                    const int* sB, Py_ssize_t nB)
+{
+    const double match = self->match;
+    const double mismatch = self->mismatch;
+    const int wildcard = self->wildcard;
+    SMITHWATERMAN_SCORE(COMPARE_SCORE);
+}
+
+static PyObject*
+Aligner_smithwaterman_score_matrix(Aligner* self,
+                                   const int* sA, Py_ssize_t nA,
+                                   const int* sB, Py_ssize_t nB)
+{
+    const Py_ssize_t n = self->substitution_matrix.shape[0];
+    const double* scores = self->substitution_matrix.buf;
+    SMITHWATERMAN_SCORE(MATRIX_SCORE);
+}
+
+static PyObject*
+Aligner_needlemanwunsch_align_compare(Aligner* self,
+                                      const int* sA, Py_ssize_t nA,
+                                      const int* sB, Py_ssize_t nB,
+                                      unsigned char strand)
+{
+    const double match = self->match;
+    const double mismatch = self->mismatch;
+    const int wildcard = self->wildcard;
+    NEEDLEMANWUNSCH_ALIGN(COMPARE_SCORE);
+}
+
+static PyObject*
+Aligner_needlemanwunsch_align_matrix(Aligner* self,
+                                     const int* sA, Py_ssize_t nA,
+                                     const int* sB, Py_ssize_t nB,
+                                     unsigned char strand)
+{
+    const Py_ssize_t n = self->substitution_matrix.shape[0];
+    const double* scores = self->substitution_matrix.buf;
+    NEEDLEMANWUNSCH_ALIGN(MATRIX_SCORE);
+}
+
+static PyObject*
+Aligner_smithwaterman_align_compare(Aligner* self,
+                                    const int* sA, Py_ssize_t nA,
+                                    const int* sB, Py_ssize_t nB,
+                                    unsigned char strand)
+{
+    const double match = self->match;
+    const double mismatch = self->mismatch;
+    const int wildcard = self->wildcard;
+    SMITHWATERMAN_ALIGN(COMPARE_SCORE);
+}
+
+static PyObject*
+Aligner_smithwaterman_align_matrix(Aligner* self,
+                                   const int* sA, Py_ssize_t nA,
+                                   const int* sB, Py_ssize_t nB,
+                                   unsigned char strand)
+{
+    const Py_ssize_t n = self->substitution_matrix.shape[0];
+    const double* scores = self->substitution_matrix.buf;
+    SMITHWATERMAN_ALIGN(MATRIX_SCORE);
+}
+
+static PyObject*
+Aligner_gotoh_global_score_compare(Aligner* self,
+                                   const int* sA, Py_ssize_t nA,
+                                   const int* sB, Py_ssize_t nB,
+                                   unsigned char strand)
+{
+    const double match = self->match;
+    const double mismatch = self->mismatch;
+    const int wildcard = self->wildcard;
+    GOTOH_GLOBAL_SCORE(COMPARE_SCORE);
+}
+
+static PyObject*
+Aligner_gotoh_global_score_matrix(Aligner* self,
+                                  const int* sA, Py_ssize_t nA,
+                                  const int* sB, Py_ssize_t nB,
+                                  unsigned char strand)
+{
+    const Py_ssize_t n = self->substitution_matrix.shape[0];
+    const double* scores = self->substitution_matrix.buf;
+    GOTOH_GLOBAL_SCORE(MATRIX_SCORE);
+}
+
+static PyObject*
+Aligner_gotoh_local_score_compare(Aligner* self,
+                                  const int* sA, Py_ssize_t nA,
+                                  const int* sB, Py_ssize_t nB)
+{
+    const double match = self->match;
+    const double mismatch = self->mismatch;
+    const int wildcard = self->wildcard;
+    GOTOH_LOCAL_SCORE(COMPARE_SCORE);
+}
+
+static PyObject*
+Aligner_gotoh_local_score_matrix(Aligner* self,
+                                 const int* sA, Py_ssize_t nA,
+                                 const int* sB, Py_ssize_t nB)
+{
+    const Py_ssize_t n = self->substitution_matrix.shape[0];
+    const double* scores = self->substitution_matrix.buf;
+    GOTOH_LOCAL_SCORE(MATRIX_SCORE);
+}
+
+static PyObject*
+Aligner_gotoh_global_align_compare(Aligner* self,
+                                   const int* sA, Py_ssize_t nA,
+                                   const int* sB, Py_ssize_t nB,
+                                   unsigned char strand)
+{
+    const double match = self->match;
+    const double mismatch = self->mismatch;
+    const int wildcard = self->wildcard;
+    GOTOH_GLOBAL_ALIGN(COMPARE_SCORE);
+}
+
+static PyObject*
+Aligner_gotoh_global_align_matrix(Aligner* self,
+                                  const int* sA, Py_ssize_t nA,
+                                  const int* sB, Py_ssize_t nB,
+                                  unsigned char strand)
+{
+    const Py_ssize_t n = self->substitution_matrix.shape[0];
+    const double* scores = self->substitution_matrix.buf;
+    GOTOH_GLOBAL_ALIGN(MATRIX_SCORE);
+}
+
+static PyObject*
+Aligner_gotoh_local_align_compare(Aligner* self,
+                                  const int* sA, Py_ssize_t nA,
+                                  const int* sB, Py_ssize_t nB,
+                                  unsigned char strand)
+{
+    const double match = self->match;
+    const double mismatch = self->mismatch;
+    const int wildcard = self->wildcard;
+    GOTOH_LOCAL_ALIGN(COMPARE_SCORE);
+}
+
+static PyObject*
+Aligner_gotoh_local_align_matrix(Aligner* self,
+                                 const int* sA, Py_ssize_t nA,
+                                 const int* sB, Py_ssize_t nB,
+                                 unsigned char strand)
+{
+    const Py_ssize_t n = self->substitution_matrix.shape[0];
+    const double* scores = self->substitution_matrix.buf;
+    GOTOH_LOCAL_ALIGN(MATRIX_SCORE);
+}
+
+static int
+_call_query_gap_function(Aligner* aligner, int i, int j, double* score)
+{
+    double value;
+    PyObject* result;
+    PyObject* function = aligner->query_gap_function;
+    if (!function)
+        value = aligner->query_internal_open_gap_score
+              + (j-1) * aligner->query_internal_extend_gap_score;
+    else {
+        result = PyObject_CallFunction(function, "ii", i, j);
+        if (result == NULL) return 0;
+        value = PyFloat_AsDouble(result);
+        Py_DECREF(result);
+        if (value == -1.0 && PyErr_Occurred()) return 0;
+    }
+    *score = value;
+    return 1;
+}
+
+static int
+_call_target_gap_function(Aligner* aligner, int i, int j, double* score)
+{
+    double value;
+    PyObject* result;
+    PyObject* function = aligner->target_gap_function;
+    if (!function)
+        value = aligner->target_internal_open_gap_score
+              + (j-1) * aligner->target_internal_extend_gap_score;
+    else {
+        result = PyObject_CallFunction(function, "ii", i, j);
+        if (result == NULL) return 0;
+        value = PyFloat_AsDouble(result);
+        Py_DECREF(result);
+        if (value == -1.0 && PyErr_Occurred()) return 0;
+    }
+    *score = value;
+    return 1;
+}
+
+static PyObject*
+Aligner_watermansmithbeyer_global_score_compare(Aligner* self,
+                                                const int* sA, Py_ssize_t nA,
+                                                const int* sB, Py_ssize_t nB,
+                                                unsigned char strand)
+{
+    const double match = self->match;
+    const double mismatch = self->mismatch;
+    const int wildcard = self->wildcard;
+    WATERMANSMITHBEYER_ENTER_SCORE;
+    switch (strand) {
+        case '+': {
+            WATERMANSMITHBEYER_GLOBAL_SCORE(COMPARE_SCORE, j);
+            break;
+        }
+        case '-': {
+            WATERMANSMITHBEYER_GLOBAL_SCORE(COMPARE_SCORE, nB-j);
+            break;
+	}
+    }
+    WATERMANSMITHBEYER_EXIT_SCORE;
+}
+
+static PyObject*
+Aligner_watermansmithbeyer_global_score_matrix(Aligner* self,
+                                               const int* sA, Py_ssize_t nA,
+                                               const int* sB, Py_ssize_t nB,
+                                               unsigned char strand)
+{
+    const Py_ssize_t n = self->substitution_matrix.shape[0];
+    const double* scores = self->substitution_matrix.buf;
+    WATERMANSMITHBEYER_ENTER_SCORE;
+    switch (strand) {
+        case '+':
+            WATERMANSMITHBEYER_GLOBAL_SCORE(MATRIX_SCORE, j);
+            break;
+        case '-':
+            WATERMANSMITHBEYER_GLOBAL_SCORE(MATRIX_SCORE, nB-j);
+            break;
+    }
+    WATERMANSMITHBEYER_EXIT_SCORE;
+}
+
+static PyObject*
+Aligner_watermansmithbeyer_local_score_compare(Aligner* self,
+                                               const int* sA, Py_ssize_t nA,
+                                               const int* sB, Py_ssize_t nB,
+                                               unsigned char strand)
+{
+    const double match = self->match;
+    const double mismatch = self->mismatch;
+    const int wildcard = self->wildcard;
+    double maximum = 0.0;
+    WATERMANSMITHBEYER_ENTER_SCORE;
+    switch (strand) {
+        case '+': {
+            WATERMANSMITHBEYER_LOCAL_SCORE(COMPARE_SCORE, j);
+            break;
+        }
+        case '-': {
+            WATERMANSMITHBEYER_LOCAL_SCORE(COMPARE_SCORE, nB-j);
+            break;
+        }
+    }
+    WATERMANSMITHBEYER_EXIT_SCORE;
+}
+
+static PyObject*
+Aligner_watermansmithbeyer_local_score_matrix(Aligner* self,
+                                              const int* sA, Py_ssize_t nA,
+                                              const int* sB, Py_ssize_t nB,
+                                              unsigned char strand)
+{
+    const Py_ssize_t n = self->substitution_matrix.shape[0];
+    const double* scores = self->substitution_matrix.buf;
+    double maximum = 0.0;
+    WATERMANSMITHBEYER_ENTER_SCORE;
+    switch (strand) {
+        case '+': {
+            WATERMANSMITHBEYER_LOCAL_SCORE(MATRIX_SCORE, j);
+            break;
+        }
+        case '-': {
+            WATERMANSMITHBEYER_LOCAL_SCORE(MATRIX_SCORE, nB-j);
+            break;
+        }
+    }
+    WATERMANSMITHBEYER_EXIT_SCORE;
+}
+
+static PyObject*
+Aligner_watermansmithbeyer_global_align_compare(Aligner* self,
+                                                const int* sA, Py_ssize_t nA,
+                                                const int* sB, Py_ssize_t nB,
+                                                unsigned char strand)
+{
+    const double match = self->match;
+    const double mismatch = self->mismatch;
+    const int wildcard = self->wildcard;
+    WATERMANSMITHBEYER_ENTER_ALIGN(Global);
+    switch (strand) {
+        case '+': {
+            WATERMANSMITHBEYER_GLOBAL_ALIGN(COMPARE_SCORE, j);
+            break;
+        }
+        case '-': {
+            WATERMANSMITHBEYER_GLOBAL_ALIGN(COMPARE_SCORE, nB-j);
+            break;
+	}
+    }
+    WATERMANSMITHBEYER_EXIT_ALIGN;
+}
+
+static PyObject*
+Aligner_watermansmithbeyer_global_align_matrix(Aligner* self,
+                                               const int* sA, Py_ssize_t nA,
+                                               const int* sB, Py_ssize_t nB,
+                                               unsigned char strand)
+{
+    const Py_ssize_t n = self->substitution_matrix.shape[0];
+    const double* scores = self->substitution_matrix.buf;
+    WATERMANSMITHBEYER_ENTER_ALIGN(Global);
+    switch (strand) {
+        case '+': {
+            WATERMANSMITHBEYER_GLOBAL_ALIGN(MATRIX_SCORE, j);
+            break;
+        }
+        case '-': {
+            WATERMANSMITHBEYER_GLOBAL_ALIGN(MATRIX_SCORE, nB-j);
+            break;
+	}
+    }
+    WATERMANSMITHBEYER_EXIT_ALIGN;
+}
+
+static PyObject*
+Aligner_watermansmithbeyer_local_align_compare(Aligner* self,
+                                               const int* sA, Py_ssize_t nA,
+                                               const int* sB, Py_ssize_t nB,
+                                               unsigned char strand)
+{
+    const double match = self->match;
+    const double mismatch = self->mismatch;
+    const int wildcard = self->wildcard;
+    int im = nA;
+    int jm = nB;
+    double maximum = 0;
+    WATERMANSMITHBEYER_ENTER_ALIGN(Local);
+    switch (strand) {
+        case '+': {
+            WATERMANSMITHBEYER_LOCAL_ALIGN(COMPARE_SCORE, j);
+            break;
+        }
+        case '-': {
+            WATERMANSMITHBEYER_LOCAL_ALIGN(COMPARE_SCORE, nB-j);
+            break;
+	}
+    }
+    WATERMANSMITHBEYER_EXIT_ALIGN;
+}
+
+static PyObject*
+Aligner_watermansmithbeyer_local_align_matrix(Aligner* self,
+                                              const int* sA, Py_ssize_t nA,
+                                              const int* sB, Py_ssize_t nB,
+                                              unsigned char strand)
+{
+    const Py_ssize_t n = self->substitution_matrix.shape[0];
+    const double* scores = self->substitution_matrix.buf;
+    int im = nA;
+    int jm = nB;
+    double maximum = 0;
+    WATERMANSMITHBEYER_ENTER_ALIGN(Local);
+    switch (strand) {
+        case '+': {
+            WATERMANSMITHBEYER_LOCAL_ALIGN(MATRIX_SCORE, j);
+            break;
+        }
+        case '-': {
+            WATERMANSMITHBEYER_LOCAL_ALIGN(MATRIX_SCORE, nB-j);
+            break;
+	}
+    }
+    WATERMANSMITHBEYER_EXIT_ALIGN;
+}
+
+static int*
+convert_1bytes_to_ints(const int mapping[], Py_ssize_t n, const unsigned char s[])
+{
+    unsigned char c;
+    Py_ssize_t i;
+    int index;
+    int* indices;
+    if (n == 0) {
+        PyErr_SetString(PyExc_ValueError, "sequence has zero length");
+        return NULL;
+    }
+    indices = PyMem_Malloc(n*sizeof(int));
+    if (!indices) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    if (!mapping) for (i = 0; i < n; i++) indices[i] = s[i];
+    else {
+        for (i = 0; i < n; i++) {
+            c = s[i];
+            index = mapping[(int)c];
+            if (index == MISSING_LETTER) {
+                PyErr_SetString(PyExc_ValueError,
+                    "sequence contains letters not in the alphabet");
+                PyMem_Free(indices);
+                return NULL;
+            }
+            indices[i] = index;
+        }
+    }
+    return indices;
+}
+
+static int*
+convert_2bytes_to_ints(const int mapping[], Py_ssize_t n, const Py_UCS2 s[])
+{
+    unsigned char c;
+    Py_ssize_t i;
+    int index;
+    int* indices;
+    if (n == 0) {
+        PyErr_SetString(PyExc_ValueError, "sequence has zero length");
+        return NULL;
+    }
+    indices = PyMem_Malloc(n*sizeof(int));
+    if (!indices) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    if (!mapping) for (i = 0; i < n; i++) indices[i] = s[i];
+    else {
+        for (i = 0; i < n; i++) {
+            c = s[i];
+            index = mapping[(int)c];
+            if (index == MISSING_LETTER) {
+                PyErr_SetString(PyExc_ValueError,
+                    "sequence contains letters not in the alphabet");
+                PyMem_Free(indices);
+                return NULL;
+            }
+            indices[i] = index;
+        }
+    }
+    return indices;
+}
+
+static int*
+convert_4bytes_to_ints(const int mapping[], Py_ssize_t n, const Py_UCS4 s[])
+{
+    unsigned char c;
+    Py_ssize_t i;
+    int index;
+    int* indices;
+    if (n == 0) {
+        PyErr_SetString(PyExc_ValueError, "sequence has zero length");
+        return NULL;
+    }
+    indices = PyMem_Malloc(n*sizeof(int));
+    if (!indices) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    if (!mapping) for (i = 0; i < n; i++) indices[i] = s[i];
+    else {
+        for (i = 0; i < n; i++) {
+            c = s[i];
+            index = mapping[(int)c];
+            if (index == MISSING_LETTER) {
+                PyErr_SetString(PyExc_ValueError,
+                    "sequence contains letters not in the alphabet");
+                PyMem_Free(indices);
+                return NULL;
+            }
+            indices[i] = index;
+        }
+    }
+    return indices;
+}
+
+static int
+convert_objects_to_ints(Py_buffer* view, PyObject* alphabet, PyObject* sequence)
+{
+    Py_ssize_t i, j;
+    Py_ssize_t n;
+    Py_ssize_t m;
+    int* indices = NULL;
+    PyObject *obj1, *obj2;
+    int equal;
+
+    view->buf = NULL;
+    sequence = PySequence_Fast(sequence,
+                               "argument should support the sequence protocol");
+    if (!sequence) return 0;
+    if (!alphabet) {
+        PyErr_SetString(PyExc_ValueError,
+                        "alphabet is None; cannot interpret sequence");
+        goto exit;
+    }
+    alphabet = PySequence_Fast(alphabet, NULL); /* should never fail */
+    n = PySequence_Size(sequence);
+    m = PySequence_Size(alphabet);
+    indices = PyMem_Malloc(n*sizeof(int));
+    if (!indices) {
+        PyErr_NoMemory();
+        goto exit;
+    }
+    for (i = 0; i < n; i++) {
+        obj1 = PySequence_Fast_GET_ITEM(sequence, i);
+        for (j = 0; j < m; j++) {
+            obj2 = PySequence_Fast_GET_ITEM(alphabet, j);
+            equal = PyObject_RichCompareBool(obj1, obj2, Py_EQ);
+            if (equal == 1) /* obj1 == obj2 */ {
+                indices[i] = j;
+                break;
+            }
+            else if (equal == -1) /* error */ {
+                PyMem_Del(indices);
+                goto exit;
+            }
+            /* else (equal == 0) continue; */ /* not equal */
+        }
+        if (j == m) {
+            PyErr_SetString(PyExc_ValueError, "failed to find object in alphabet");
+            goto exit;
+        }
+    }
+    view->buf = indices;
+    view->itemsize = 1;
+    view->len = n;
+exit:
+    Py_DECREF(sequence);
+    Py_XDECREF(alphabet);
+    if (view->buf) return 1;
+    return 0;
+}
+
+static int
+sequence_converter(PyObject* argument, void* pointer)
+{
+    Py_buffer* view = pointer;
+    Py_ssize_t i;
+    Py_ssize_t n;
+    int index;
+    int* indices;
+    const int flag = PyBUF_FORMAT | PyBUF_C_CONTIGUOUS;
+    Aligner* aligner;
+    int* mapping;
+
+    if (argument == NULL) {
+        if (view->obj) PyBuffer_Release(view);
+        else {
+            indices = view->buf;
+            PyMem_Free(indices);
+        }
+        return 1;
+    }
+
+    aligner = (Aligner*)view->obj;
+    view->obj = NULL;
+
+    if (PyObject_GetBuffer(argument, view, flag) == 0) {
+        if (view->ndim != 1) {
+            PyErr_Format(PyExc_ValueError,
+                         "sequence has incorrect rank (%d expected 1)", view->ndim);
+            return 0;
+        }
+        n = view->len / view->itemsize;
+        if (n == 0) {
+            PyErr_SetString(PyExc_ValueError, "sequence has zero length");
+            return 0;
+        }
+        if (strcmp(view->format, "c") == 0 || strcmp(view->format, "B") == 0) {
+            if (view->itemsize != sizeof(char)) {
+                PyErr_Format(PyExc_ValueError,
+                            "sequence has unexpected item byte size "
+                            "(%ld, expected %ld)", view->itemsize, sizeof(char));
+                return 0;
+            }
+            indices = convert_1bytes_to_ints(aligner->mapping, n, view->buf);
+            if (!indices) return 0;
+            PyBuffer_Release(view);
+            view->itemsize = 1;
+            view->len = n;
+            view->buf = indices;
+            return Py_CLEANUP_SUPPORTED;
+        }
+        if (strcmp(view->format, "i") == 0 || strcmp(view->format, "l") == 0) {
+            if (view->itemsize != sizeof(int)) {
+                PyErr_Format(PyExc_ValueError,
+                            "sequence has unexpected item byte size "
+                            "(%ld, expected %ld)", view->itemsize, sizeof(int));
+                return 0;
+            }
+            indices = view->buf;
+            if (aligner->substitution_matrix.obj) {
+                const Py_ssize_t m = aligner->substitution_matrix.shape[0];
+                for (i = 0; i < n; i++) {
+                    index = indices[i];
+                    if (index < 0) {
+                        PyErr_Format(PyExc_ValueError,
+                                     "sequence item %zd is negative (%d)",
+                                     i, index);
+                        return 0;
+                    }
+                    if (index >= m) {
+                        PyErr_Format(PyExc_ValueError,
+                                     "sequence item %zd is out of bound"
+                                     " (%d, should be < %zd)", i, index, m);
+                        return 0;
+                    }
+                }
+            }
+            return Py_CLEANUP_SUPPORTED;
+        }
+        PyErr_Format(PyExc_ValueError,
+                     "sequence has incorrect data type '%s'", view->format);
+        return 0;
+    }
+    PyErr_Clear();  /* To clear the exception raised by PyObject_GetBuffer */
+    mapping = aligner->mapping;
+    if (PyUnicode_Check(argument)) {
+        if (PyUnicode_READY(argument) == -1) return 0;
+        n = PyUnicode_GET_LENGTH(argument);
+        switch (PyUnicode_KIND(argument)) {
+            case PyUnicode_1BYTE_KIND: {
+                Py_UCS1* s = PyUnicode_1BYTE_DATA(argument);
+                indices = convert_1bytes_to_ints(mapping, n, (unsigned char*)s);
+                break;
+            }
+            case PyUnicode_2BYTE_KIND: {
+                Py_UCS2* s = PyUnicode_2BYTE_DATA(argument);
+                indices = convert_2bytes_to_ints(mapping, n, s);
+                break;
+            }
+            case PyUnicode_4BYTE_KIND: {
+                Py_UCS4* s = PyUnicode_4BYTE_DATA(argument);
+                indices = convert_4bytes_to_ints(mapping, n, s);
+                break;
+            }
+            case PyUnicode_WCHAR_KIND:
+            default:
+                PyErr_SetString(PyExc_ValueError, "could not interpret unicode data");
+                return 0;
+        }
+        if (!indices) return 0;
+        view->buf = indices;
+        view->itemsize = 1;
+        view->len = n;
+        return Py_CLEANUP_SUPPORTED;
+    }
+
+    if (!mapping) {
+        if (!convert_objects_to_ints(view, aligner->alphabet, argument)) return 0;
+        return Py_CLEANUP_SUPPORTED;
+    }
+
+    PyErr_SetString(PyExc_ValueError, "sequence has unexpected format");
+    return 0;
+}
+ 
+static int
+strand_converter(PyObject* argument, void* pointer)
+{
+    if (!PyUnicode_Check(argument)) goto error;
+    if (PyUnicode_READY(argument) == -1) return 0;
+    if (PyUnicode_GET_LENGTH(argument) == 1) {
+        const Py_UCS4 ch = PyUnicode_READ_CHAR(argument, 0);
+        if (ch < 128) {
+            const char c = ch;
+            if (ch == '+' || ch == '-') {
+                *((char*)pointer) = c;
+                return 1;
+            }
+        }
+    }
+error:
+    PyErr_SetString(PyExc_ValueError, "strand must be '+' or '-'");
+    return 0;
+}
+
+static const char Aligner_score__doc__[] = "calculates the alignment score";
+
+static PyObject*
+Aligner_score(Aligner* self, PyObject* args, PyObject* keywords)
+{
+    const int* sA;
+    const int* sB;
+    Py_ssize_t nA;
+    Py_ssize_t nB;
+    Py_buffer bA = {0};
+    Py_buffer bB = {0};
+    const Mode mode = self->mode;
+    const Algorithm algorithm = _get_algorithm(self);
+    char strand = '+';
+    PyObject* result = NULL;
+    PyObject* substitution_matrix = self->substitution_matrix.obj;
+
+    static char *kwlist[] = {"sequenceA", "sequenceB", "strand", NULL};
+
+    bA.obj = (PyObject*)self;
+    bB.obj = (PyObject*)self;
+    if(!PyArg_ParseTupleAndKeywords(args, keywords, "O&O&O&", kwlist,
+                                    sequence_converter, &bA,
+                                    sequence_converter, &bB,
+                                    strand_converter, &strand))
+        return NULL;
+
+    sA = bA.buf;
+    nA = bA.len / bA.itemsize;
+    sB = bB.buf;
+    nB = bB.len / bB.itemsize;
+
+    switch (algorithm) {
+        case NeedlemanWunschSmithWaterman:
+            switch (mode) {
+                case Global:
+                    if (substitution_matrix)
+                        result = Aligner_needlemanwunsch_score_matrix(self, sA, nA, sB, nB, strand);
+                    else
+                        result = Aligner_needlemanwunsch_score_compare(self, sA, nA, sB, nB, strand);
+                    break;
+                case Local:
+                    if (substitution_matrix)
+                        result = Aligner_smithwaterman_score_matrix(self, sA, nA, sB, nB);
+                    else
+                        result = Aligner_smithwaterman_score_compare(self, sA, nA, sB, nB);
+                    break;
+            }
+            break;
+        case Gotoh:
+            switch (mode) {
+                case Global:
+                    if (substitution_matrix)
+                        result = Aligner_gotoh_global_score_matrix(self, sA, nA, sB, nB, strand);
+                    else
+                        result = Aligner_gotoh_global_score_compare(self, sA, nA, sB, nB, strand);
+                    break;
+                case Local:
+                    if (substitution_matrix)
+                        result = Aligner_gotoh_local_score_matrix(self, sA, nA, sB, nB);
+                    else
+                        result = Aligner_gotoh_local_score_compare(self, sA, nA, sB, nB);
+                    break;
+            }
+            break;
+        case WatermanSmithBeyer:
+            switch (mode) {
+                case Global:
+                    if (substitution_matrix)
+                        result = Aligner_watermansmithbeyer_global_score_matrix(self, sA, nA, sB, nB, strand);
+                    else
+                        result = Aligner_watermansmithbeyer_global_score_compare(self, sA, nA, sB, nB, strand);
+                    break;
+                case Local:
+                    if (substitution_matrix)
+                        result = Aligner_watermansmithbeyer_local_score_matrix(self, sA, nA, sB, nB, strand);
+                    else
+                        result = Aligner_watermansmithbeyer_local_score_compare(self, sA, nA, sB, nB, strand);
+                    break;
+            }
+            break;
+        case Unknown:
+        default:
+            PyErr_SetString(PyExc_RuntimeError, "unknown algorithm");
+            break;
+    }
+
+    sequence_converter(NULL, &bA);
+    sequence_converter(NULL, &bB);
+
+    return result;
+}
+
+static const char Aligner_align__doc__[] = "align two sequences";
+
+static PyObject*
+Aligner_align(Aligner* self, PyObject* args, PyObject* keywords)
+{
+    const int* sA;
+    const int* sB;
+    Py_ssize_t nA;
+    Py_ssize_t nB;
+    Py_buffer bA = {0};
+    Py_buffer bB = {0};
+    const Mode mode = self->mode;
+    const Algorithm algorithm = _get_algorithm(self);
+    char strand = '+';
+    PyObject* result = NULL;
+    PyObject* substitution_matrix = self->substitution_matrix.obj;
+
+    static char *kwlist[] = {"sequenceA", "sequenceB", "strand", NULL};
+
+    bA.obj = (PyObject*)self;
+    bB.obj = (PyObject*)self;
+    if(!PyArg_ParseTupleAndKeywords(args, keywords, "O&O&O&", kwlist,
+                                    sequence_converter, &bA,
+                                    sequence_converter, &bB,
+                                    strand_converter, &strand))
+        return NULL;
+
+    sA = bA.buf;
+    nA = bA.len / bA.itemsize;
+    sB = bB.buf;
+    nB = bB.len / bB.itemsize;
+
+    switch (algorithm) {
+        case NeedlemanWunschSmithWaterman:
+            switch (mode) {
+                case Global:
+                    if (substitution_matrix)
+                        result = Aligner_needlemanwunsch_align_matrix(self, sA, nA, sB, nB, strand);
+                    else
+                        result = Aligner_needlemanwunsch_align_compare(self, sA, nA, sB, nB, strand);
+                    break;
+                case Local:
+                    if (substitution_matrix)
+                        result = Aligner_smithwaterman_align_matrix(self, sA, nA, sB, nB, strand);
+                    else
+                        result = Aligner_smithwaterman_align_compare(self, sA, nA, sB, nB, strand);
+                    break;
+            }
+            break;
+        case Gotoh:
+            switch (mode) {
+                case Global:
+                    if (substitution_matrix)
+                        result = Aligner_gotoh_global_align_matrix(self, sA, nA, sB, nB, strand);
+                    else
+                        result = Aligner_gotoh_global_align_compare(self, sA, nA, sB, nB, strand);
+                    break;
+                case Local:
+                    if (substitution_matrix)
+                        result = Aligner_gotoh_local_align_matrix(self, sA, nA, sB, nB, strand);
+                    else
+                        result = Aligner_gotoh_local_align_compare(self, sA, nA, sB, nB, strand);
+                    break;
+            }
+            break;
+        case WatermanSmithBeyer:
+            switch (mode) {
+                case Global:
+                    if (substitution_matrix)
+                        result = Aligner_watermansmithbeyer_global_align_matrix(self, sA, nA, sB, nB, strand);
+                    else
+                        result = Aligner_watermansmithbeyer_global_align_compare(self, sA, nA, sB, nB, strand);
+                    break;
+                case Local:
+                    if (substitution_matrix)
+                        result = Aligner_watermansmithbeyer_local_align_matrix(self, sA, nA, sB, nB, strand);
+                    else
+                        result = Aligner_watermansmithbeyer_local_align_compare(self, sA, nA, sB, nB, strand);
+                    break;
+            }
+            break;
+        case Unknown:
+        default:
+            PyErr_SetString(PyExc_RuntimeError, "unknown algorithm");
+            break;
+    }
+
+    sequence_converter(NULL, &bA);
+    sequence_converter(NULL, &bB);
+
+    return result;
+}
+
+static char Aligner_doc[] =
+"Aligner.\n";
+
+static PyMethodDef Aligner_methods[] = {
+    {"score",
+     (PyCFunction)Aligner_score,
+     METH_VARARGS | METH_KEYWORDS,
+     Aligner_score__doc__
+    },
+    {"align",
+     (PyCFunction)Aligner_align,
+     METH_VARARGS | METH_KEYWORDS,
+     Aligner_align__doc__
+    },
+    {NULL}  /* Sentinel */
+};
+
+static PyTypeObject AlignerType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_algorithms.PairwiseAligner", /* tp_name */
+    sizeof(Aligner),               /* tp_basicsize */
+    0,                             /* tp_itemsize */
+    (destructor)Aligner_dealloc,   /* tp_dealloc */
+    0,                             /* tp_print */
+    0,                             /* tp_getattr */
+    0,                             /* tp_setattr */
+    0,                             /* tp_compare */
+    (reprfunc)Aligner_repr,        /* tp_repr */
+    0,                             /* tp_as_number */
+    0,                             /* tp_as_sequence */
+    0,                             /* tp_as_mapping */
+    0,                             /* tp_hash */
+    0,                             /* tp_call */
+    (reprfunc)Aligner_str,         /* tp_str */
+    0,                             /* tp_getattro */
+    0,                             /* tp_setattro */
+    0,                             /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,        /*tp_flags*/
+    Aligner_doc,                   /* tp_doc */
+    0,                             /* tp_traverse */
+    0,                             /* tp_clear */
+    0,                             /* tp_richcompare */
+    0,                             /* tp_weaklistoffset */
+    0,                             /* tp_iter */
+    0,                             /* tp_iternext */
+    Aligner_methods,               /* tp_methods */
+    0,                             /* tp_members */
+    Aligner_getset,                /* tp_getset */
+    0,                             /* tp_base */
+    0,                             /* tp_dict */
+    0,                             /* tp_descr_get */
+    0,                             /* tp_descr_set */
+    0,                             /* tp_dictoffset */
+    (initproc)Aligner_init,        /* tp_init */
+};
+
+
+/* Module definition */
+
+static char _aligners__doc__[] =
+"C extension module implementing pairwise alignment algorithms";
+
+static struct PyModuleDef moduledef = {
+        PyModuleDef_HEAD_INIT,
+        "_aligners",
+        _aligners__doc__,
+        -1,
+        NULL,
+        NULL,
+        NULL,
+        NULL,
+        NULL
+};
+
+PyObject *
+PyInit__aligners(void)
+{
+    PyObject* module;
+    AlignerType.tp_new = PyType_GenericNew;
+
+    if (PyType_Ready(&AlignerType) < 0 || PyType_Ready(&PathGenerator_Type) < 0)
+        return NULL;
+
+    module = PyModule_Create(&moduledef);
+    if (!module) return NULL;
+
+    Py_INCREF(&AlignerType);
+    /* Reference to AlignerType will be stolen by PyModule_AddObject
+     * only if it is successful. */
+    if (PyModule_AddObject(module,
+                           "PairwiseAligner", (PyObject*) &AlignerType) < 0) {
+        Py_DECREF(&AlignerType);
+        Py_DECREF(module);
+        return NULL;
+    }
+
+    return module;
+}
diff --git a/code/lib/Bio/Align/_aligners.cp37-win_amd64.pyd b/code/lib/Bio/Align/_aligners.cp37-win_amd64.pyd
new file mode 100644
index 0000000..26d918c
Binary files /dev/null and b/code/lib/Bio/Align/_aligners.cp37-win_amd64.pyd differ
diff --git a/code/lib/Bio/Align/substitution_matrices/__init__.py b/code/lib/Bio/Align/substitution_matrices/__init__.py
new file mode 100644
index 0000000..5d49ac6
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/__init__.py
@@ -0,0 +1,514 @@
+# Copyright 2019 by Michiel de Hoon.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Substitution matrices."""
+
+import os
+import string
+import numpy
+
+
+class Array(numpy.ndarray):
+    """numpy array subclass indexed by integers and by letters."""
+
+    def __new__(cls, alphabet=None, dims=None, data=None, dtype=float):
+        """Create a new Array instance."""
+        if isinstance(data, dict):
+            if alphabet is not None:
+                raise ValueError("alphabet should be None if data is a dict")
+            if dims is not None:
+                raise ValueError("dims should be None if data is a dict")
+            alphabet = []
+            for key in data:
+                if isinstance(key, str):
+                    if dims is None:
+                        dims = 1
+                    elif dims != 1:
+                        raise ValueError("inconsistent dimensions in data")
+                    alphabet.append(key)
+                elif isinstance(key, tuple):
+                    single_letters = True
+                    if dims is None:
+                        dims = len(key)
+                    elif dims != len(key):
+                        raise ValueError("inconsistent dimensions in data")
+                    if dims == 1:
+                        if not isinstance(key, str):
+                            raise ValueError("expected string")
+                        if len(key) > 1:
+                            single_letters = False
+                        alphabet.append(key)
+                    elif dims == 2:
+                        for letter in key:
+                            if not isinstance(letter, str):
+                                raise ValueError("expected string")
+                            if len(letter) > 1:
+                                single_letters = False
+                            alphabet.append(letter)
+                    else:
+                        raise ValueError(
+                            "data array should be 1- or 2- dimensional "
+                            "(found %d dimensions) in key" % dims
+                        )
+            alphabet = sorted(set(alphabet))
+            if single_letters:
+                alphabet = "".join(alphabet)
+            else:
+                alphabet = tuple(alphabet)
+            n = len(alphabet)
+            if dims == 1:
+                shape = (n,)
+            elif dims == 2:
+                shape = (n, n)
+            else:  # dims is None
+                raise ValueError("data is an empty dictionary")
+            obj = super().__new__(cls, shape, dtype)
+            if dims == 1:
+                for i, key in enumerate(alphabet):
+                    obj[i] = data.get(letter, 0.0)
+            elif dims == 2:
+                for i1, letter1 in enumerate(alphabet):
+                    for i2, letter2 in enumerate(alphabet):
+                        key = (letter1, letter2)
+                        value = data.get(key, 0.0)
+                        obj[i1, i2] = value
+            obj._alphabet = alphabet
+            return obj
+        if alphabet is None:
+            alphabet = string.ascii_uppercase
+        elif not (isinstance(alphabet, str) or isinstance(alphabet, tuple)):
+            raise ValueError("alphabet should be a string or a tuple")
+        n = len(alphabet)
+        if data is None:
+            if dims is None:
+                dims = 1
+            elif dims not in (1, 2):
+                raise ValueError("dims should be 1 or 2 (found %s)" % dims)
+            shape = (n,) * dims
+        else:
+            if dims is None:
+                shape = data.shape
+                dims = len(shape)
+                if dims == 1:
+                    pass
+                elif dims == 2:
+                    if shape[0] != shape[1]:
+                        raise ValueError("data array is not square")
+                else:
+                    raise ValueError(
+                        "data array should be 1- or 2- dimensional "
+                        "(found %d dimensions) " % dims
+                    )
+            else:
+                shape = (n,) * dims
+                if data.shape != shape:
+                    raise ValueError(
+                        "data shape has inconsistent shape (expected (%s), found (%s))"
+                        % (shape, data.shape)
+                    )
+        obj = super().__new__(cls, shape, dtype)
+        if data is None:
+            obj[:] = 0.0
+        else:
+            obj[:] = data
+        obj._alphabet = alphabet
+        return obj
+
+    def __array_finalize__(self, obj):
+        if obj is None:
+            return
+        self._alphabet = getattr(obj, "_alphabet", None)
+
+    def _convert_key(self, key):
+        if isinstance(key, tuple):
+            indices = []
+            for index in key:
+                if isinstance(index, str):
+                    try:
+                        index = self._alphabet.index(index)
+                    except ValueError:
+                        raise IndexError("'%s'" % index) from None
+                indices.append(index)
+            key = tuple(indices)
+        elif isinstance(key, str):
+            try:
+                key = self._alphabet.index(key)
+            except ValueError:
+                raise IndexError("'%s'" % key) from None
+        return key
+
+    def __getitem__(self, key):
+        key = self._convert_key(key)
+        value = numpy.ndarray.__getitem__(self, key)
+        if value.ndim == 2:
+            if self.ndim == 2:
+                if value.shape != self.shape:
+                    raise IndexError("Requesting truncated array")
+            elif self.ndim == 1:
+                length = self.shape[0]
+                if value.shape[0] == length and value.shape[1] == 1:
+                    pass
+                elif value.shape[0] == 1 and value.shape[1] == length:
+                    pass
+                else:
+                    raise IndexError("Requesting truncated array")
+        elif value.ndim == 1:
+            if value.shape[0] != self.shape[0]:
+                value._alphabet = self.alphabet[key]
+        return value.view(Array)
+
+    def __setitem__(self, key, value):
+        key = self._convert_key(key)
+        numpy.ndarray.__setitem__(self, key, value)
+
+    def __contains__(self, key):
+        # Follow dict definition of __contains__
+        return key in self.keys()
+
+    def __array_prepare__(self, out_arr, context=None):
+        # needed for numpy older than 1.13.0
+        ufunc, inputs, i = context
+        alphabet = self.alphabet
+        for arg in inputs:
+            if isinstance(arg, Array):
+                if arg.alphabet != alphabet:
+                    raise ValueError("alphabets are inconsistent")
+        return numpy.ndarray.__array_prepare__(self, out_arr, context)
+
+    def __array_wrap__(self, out_arr, context=None):
+        if len(out_arr) == 1:
+            return out_arr[0]
+        return numpy.ndarray.__array_wrap__(self, out_arr, context)
+
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        args = []
+        alphabet = self._alphabet
+        for arg in inputs:
+            if isinstance(arg, Array):
+                if arg.alphabet != alphabet:
+                    raise ValueError("alphabets are inconsistent")
+                args.append(arg.view(numpy.ndarray))
+            else:
+                args.append(arg)
+
+        outputs = kwargs.pop("out", None)
+        if outputs:
+            out_args = []
+            for arg in outputs:
+                if isinstance(arg, Array):
+                    if arg.alphabet != alphabet:
+                        raise ValueError("alphabets are inconsistent")
+                    out_args.append(arg.view(numpy.ndarray))
+                else:
+                    out_args.append(arg)
+            kwargs["out"] = tuple(out_args)
+        else:
+            outputs = (None,) * ufunc.nout
+
+        raw_results = super().__array_ufunc__(ufunc, method, *args, **kwargs)
+        if raw_results is NotImplemented:
+            return NotImplemented
+
+        if method == "at":
+            return
+
+        if ufunc.nout == 1:
+            raw_results = (raw_results,)
+
+        results = []
+        for raw_result, output in zip(raw_results, outputs):
+            if raw_result.ndim == 0:
+                result = raw_result
+            elif output is None:
+                result = numpy.asarray(raw_result).view(Array)
+                result._alphabet = self._alphabet
+            else:
+                result = output
+                result._alphabet = self._alphabet
+            results.append(result)
+
+        return results[0] if len(results) == 1 else results
+
+    def __reduce__(self):
+        import pickle
+
+        values = numpy.array(self)
+        state = pickle.dumps(values)
+        alphabet = self._alphabet
+        dims = len(self.shape)
+        dtype = self.dtype
+        arguments = (Array, alphabet, dims, None, dtype)
+        return (Array.__new__, arguments, state)
+
+    def __setstate__(self, state):
+        import pickle
+
+        self[:, :] = pickle.loads(state)
+
+    def transpose(self, axes=None):
+        """Transpose the array."""
+        other = numpy.ndarray.transpose(self, axes)
+        other._alphabet = self._alphabet
+        return other
+
+    @property
+    def alphabet(self):
+        """Return the alphabet property."""
+        return self._alphabet
+
+    def copy(self):
+        """Create and return a copy of the array."""
+        other = Array(alphabet=self._alphabet, data=self)
+        return other
+
+    def get(self, key, value=None):
+        """Return the value of the key if found; return value otherwise."""
+        try:
+            return self[key]
+        except IndexError:
+            return value
+
+    def items(self):
+        """Return an iterator  of (key, value) pairs in the array."""
+        dims = len(self.shape)
+        if dims == 1:
+            for index, key in enumerate(self._alphabet):
+                value = numpy.ndarray.__getitem__(self, index)
+                yield key, value
+        elif dims == 2:
+            for i1, c1 in enumerate(self._alphabet):
+                for i2, c2 in enumerate(self._alphabet):
+                    key = (c1, c2)
+                    value = numpy.ndarray.__getitem__(self, (i1, i2))
+                    yield key, value
+        else:
+            raise RuntimeError("array has unexpected shape %s" % self.shape)
+
+    def keys(self):
+        """Return a tuple with the keys associated with the array."""
+        dims = len(self.shape)
+        alphabet = self._alphabet
+        if dims == 1:
+            return tuple(alphabet)
+        elif dims == 2:
+            return tuple((c1, c2) for c2 in alphabet for c1 in alphabet)
+        else:
+            raise RuntimeError("array has unexpected shape %s" % self.shape)
+
+    def values(self):
+        """Return a tuple with the values stored in the array."""
+        dims = len(self.shape)
+        alphabet = self._alphabet
+        if dims == 1:
+            return tuple(self)
+        elif dims == 2:
+            n1, n2 = self.shape
+            return tuple(
+                numpy.ndarray.__getitem__(self, (i1, i2))
+                for i2 in range(n2)
+                for i1 in range(n1)
+            )
+        else:
+            raise RuntimeError("array has unexpected shape %s" % self.shape)
+
+    def update(self, E=None, **F):
+        """Update the array from dict/iterable E and F."""
+        if E is not None:
+            try:
+                alphabet = E.keys()
+            except AttributeError:
+                for key, value in E:
+                    self[key] = value
+            else:
+                for key in E:
+                    self[key] = E[key]
+        for key in F:
+            self[key] = F[key]
+
+    def select(self, alphabet):
+        """Subset the array by selecting the letters from the specified alphabet."""
+        ii = []
+        jj = []
+        for i, key in enumerate(alphabet):
+            try:
+                j = self._alphabet.index(key)
+            except ValueError:
+                continue
+            ii.append(i)
+            jj.append(j)
+        dims = len(self.shape)
+        a = Array(alphabet, dims=dims)
+        ii = numpy.ix_(*[ii] * dims)
+        jj = numpy.ix_(*[jj] * dims)
+        a[ii] = numpy.ndarray.__getitem__(self, jj)
+        return a
+
+    def _format_1D(self, fmt):
+        _alphabet = self._alphabet
+        n = len(_alphabet)
+        words = [None] * n
+        lines = []
+        try:
+            header = self.header
+        except AttributeError:
+            pass
+        else:
+            for line in header:
+                line = "#  %s\n" % line
+                lines.append(line)
+        maxwidth = 0
+        for i, key in enumerate(_alphabet):
+            value = self[key]
+            word = fmt % value
+            width = len(word)
+            if width > maxwidth:
+                maxwidth = width
+            words[i] = word
+        fmt2 = " %" + str(maxwidth) + "s"
+        for letter, word in zip(_alphabet, words):
+            word = fmt2 % word
+            line = letter + word + "\n"
+            lines.append(line)
+        text = "".join(lines)
+        return text
+
+    def _format_2D(self, fmt):
+        alphabet = self.alphabet
+        n = len(alphabet)
+        words = [[None] * n for _ in range(n)]
+        lines = []
+        try:
+            header = self.header
+        except AttributeError:
+            pass
+        else:
+            for line in header:
+                line = "#  %s\n" % line
+                lines.append(line)
+        width = max(len(c) for c in alphabet)
+        line = " " * width
+        for j, c2 in enumerate(alphabet):
+            maxwidth = 0
+            for i, c1 in enumerate(alphabet):
+                key = (c1, c2)
+                value = self[key]
+                word = fmt % value
+                width = len(word)
+                if width > maxwidth:
+                    maxwidth = width
+                words[i][j] = word
+            fmt2 = " %" + str(maxwidth) + "s"
+            word = fmt2 % c2
+            line += word
+            for i, c1 in enumerate(alphabet):
+                word = words[i][j]
+                words[i][j] = fmt2 % word
+        line = line.rstrip() + "\n"
+        lines.append(line)
+        for letter, row in zip(alphabet, words):
+            line = letter + "".join(row) + "\n"
+            lines.append(line)
+        text = "".join(lines)
+        return text
+
+    def __format__(self, fmt):
+        return self.format(fmt)
+
+    def format(self, fmt=""):
+        """Return a string representation of the array.
+
+        The argument ``fmt`` specifies the number format to be used.
+        By default, the number format is "%i" if the array contains integer
+        numbers, and "%.1f" otherwise.
+
+        """
+        if fmt == "":
+            if numpy.issubdtype(self.dtype, numpy.integer):
+                fmt = "%i"
+            else:
+                fmt = "%.1f"
+        n = len(self.shape)
+        if n == 1:
+            return self._format_1D(fmt)
+        elif n == 2:
+            return self._format_2D(fmt)
+        else:
+            raise RuntimeError("Array has unexpected rank %d" % n)
+
+    def __str__(self):
+        return self.format()
+
+    def __repr__(self):
+        text = numpy.ndarray.__repr__(self)
+        alphabet = self._alphabet
+        if isinstance(alphabet, str):
+            assert text.endswith(")")
+            text = text[:-1] + ",\n         alphabet='%s')" % self._alphabet
+        return text
+
+
+def read(handle, dtype=float):
+    """Parse the file and return an Array object."""
+    try:
+        fp = open(handle)
+        lines = fp.readlines()
+    except TypeError:
+        fp = handle
+        try:
+            lines = fp.readlines()
+        except Exception as e:
+            raise e from None
+        finally:
+            fp.close()
+    header = []
+    for i, line in enumerate(lines):
+        if not line.startswith("#"):
+            break
+        header.append(line[1:].strip())
+    rows = [line.split() for line in lines[i:]]
+    if len(rows[0]) == len(rows[1]) == 2:
+        alphabet = [key for key, value in rows]
+        for key in alphabet:
+            if len(key) > 1:
+                alphabet = tuple(alphabet)
+                break
+        else:
+            alphabet = "".join(alphabet)
+        matrix = Array(alphabet=alphabet, dims=1, dtype=dtype)
+        matrix.update(rows)
+    else:
+        alphabet = rows.pop(0)
+        for key in alphabet:
+            if len(key) > 1:
+                alphabet = tuple(alphabet)
+                break
+        else:
+            alphabet = "".join(alphabet)
+        matrix = Array(alphabet=alphabet, dims=2, dtype=dtype)
+        for letter1, row in zip(alphabet, rows):
+            assert letter1 == row.pop(0)
+            for letter2, word in zip(alphabet, row):
+                matrix[letter1, letter2] = float(word)
+    matrix.header = header
+    return matrix
+
+
+def load(name=None):
+    """Load and return a precalculated substitution matrix.
+
+    >>> from Bio.Align import substitution_matrices
+    >>> names = substitution_matrices.load()
+    """
+    path = os.path.realpath(__file__)
+    directory = os.path.dirname(path)
+    subdirectory = os.path.join(directory, "data")
+    if name is None:
+        filenames = os.listdir(subdirectory)
+        return sorted(filenames)
+    path = os.path.join(subdirectory, name)
+    matrix = read(path)
+    return matrix
diff --git a/code/lib/Bio/Align/substitution_matrices/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Align/substitution_matrices/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..23f023b
Binary files /dev/null and b/code/lib/Bio/Align/substitution_matrices/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Align/substitution_matrices/data/BENNER22 b/code/lib/Bio/Align/substitution_matrices/data/BENNER22
new file mode 100644
index 0000000..49ba457
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/BENNER22
@@ -0,0 +1,27 @@
+#  S.A. Benner, M.A. Cohen, G.H. Gonnet:
+#  "Amino acid substitution during functionally constrained divergent evolution
+#  of protein sequences".
+#  Protein Engineering 7(11): 1323-1332 (1994).
+#  Figure 3B.
+#  PMID 7700864
+     A    C    D    E    F    G    H    I    K    L    M    N    P    Q    R    S    T    V    W    Y
+A  2.5 -1.2 -0.2 -0.3 -3.1  0.8 -1.6 -0.4 -1.0 -1.7 -0.8  0.0  0.8 -0.9 -1.2  1.3  1.4  0.4 -5.5 -3.5 
+C -1.2 12.6 -3.7 -4.3 -0.1 -1.7 -1.5 -2.4 -3.3 -2.6 -2.5 -1.9 -3.1 -3.3 -1.6  0.3 -1.1 -1.7  0.5  0.6 
+D -0.2 -3.7  4.8  3.9 -5.4  0.7  0.3 -4.0  0.2 -4.9 -3.9  2.4 -1.8  0.6 -1.0  0.1 -0.7 -3.0 -6.4 -3.0 
+E -0.3 -4.3  3.9  4.6 -5.7  0.5 -0.2 -3.6  1.0 -4.4 -3.4  1.2 -1.7  1.7 -0.1 -0.5 -0.9 -2.7 -6.3 -4.0 
+F -3.1 -0.1 -5.4 -5.7  7.7 -5.8  0.3  0.5 -5.1  2.2  0.7 -3.5 -3.4 -3.6 -4.3 -2.2 -2.6 -0.1  0.5  5.9 
+G  0.8 -1.7  0.7  0.5 -5.8  6.2 -2.0 -3.8 -1.0 -4.9 -3.8  0.4 -1.8 -1.4 -0.7  0.6 -0.7 -2.5 -4.5 -4.8 
+H -1.6 -1.5  0.3 -0.2  0.3 -2.0  6.1 -3.2  0.8 -2.1 -2.4  1.4 -0.4  2.4  1.5 -0.5 -1.1 -3.0 -2.7  3.7 
+I -0.4 -2.4 -4.0 -3.6  0.5 -3.8 -3.2  4.2 -3.0  2.7  3.1 -2.7 -2.3 -2.7 -3.2 -1.4  0.3  3.6 -4.4 -2.2 
+K -1.0 -3.3  0.2  1.0 -5.1 -1.0  0.8 -3.0  4.4 -3.3 -2.0  1.0 -1.6  2.2  3.9 -0.4 -0.4 -2.7 -3.7 -3.6 
+L -1.7 -2.6 -4.9 -4.4  2.2 -4.9 -2.1  2.7 -3.3  4.6  3.2 -3.5 -1.3 -2.0 -2.9 -2.1 -1.0  2.0 -1.8 -0.7 
+M -0.8 -2.5 -3.9 -3.4  0.7 -3.8 -2.4  3.1 -2.0  3.2  4.9 -2.6 -2.0 -1.7 -2.1 -1.5  0.1  2.5 -2.8 -1.8 
+N  0.0 -1.9  2.4  1.2 -3.5  0.4  1.4 -2.7  1.0 -3.5 -2.6  3.3 -1.1  0.5  0.4  1.1  0.5 -2.3 -5.2 -1.2 
+P  0.8 -3.1 -1.8 -1.7 -3.4 -1.8 -0.4 -2.3 -1.6 -1.3 -2.0 -1.1  7.0 -0.1 -1.2  1.1  0.4 -1.7 -5.8 -3.5 
+Q -0.9 -3.3  0.6  1.7 -3.6 -1.4  2.4 -2.7  2.2 -2.0 -1.7  0.5 -0.1  4.2  2.2 -0.6 -0.7 -2.4 -3.3 -1.9 
+R -1.2 -1.6 -1.0 -0.1 -4.3 -0.7  1.5 -3.2  3.9 -2.9 -2.1  0.4 -1.2  2.2  5.0 -0.5 -0.7 -2.9 -1.1 -2.7 
+S  1.3  0.3  0.1 -0.5 -2.2  0.6 -0.5 -1.4 -0.4 -2.1 -1.5  1.1  1.1 -0.6 -0.5  2.0  1.5 -0.9 -3.9 -1.9 
+T  1.4 -1.1 -0.7 -0.9 -2.6 -0.7 -1.1  0.3 -0.4 -1.0  0.1  0.5  0.4 -0.7 -0.7  1.5  2.5  0.4 -4.5 -3.0 
+V  0.4 -1.7 -3.0 -2.7 -0.1 -2.5 -3.0  3.6 -2.7  2.0  2.5 -2.3 -1.7 -2.4 -2.9 -0.9  0.4  3.7 -4.5 -2.6 
+W -5.5  0.5 -6.4 -6.3  0.5 -4.5 -2.7 -4.4 -3.7 -1.8 -2.8 -5.2 -5.8 -3.3 -1.1 -3.9 -4.5 -4.5 15.7  1.5 
+Y -3.5  0.6 -3.0 -4.0  5.9 -4.8  3.7 -2.2 -3.6 -0.7 -1.8 -1.2 -3.5 -1.9 -2.7 -1.9 -3.0 -2.6  1.5  9.0 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/BENNER6 b/code/lib/Bio/Align/substitution_matrices/data/BENNER6
new file mode 100644
index 0000000..4849b30
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/BENNER6
@@ -0,0 +1,27 @@
+#  S.A. Benner, M.A. Cohen, G.H. Gonnet:
+#  "Amino acid substitution during functionally constrained divergent evolution
+#  of protein sequences".
+#  Protein Engineering 7(11): 1323-1332 (1994).
+#  Figure 3A.
+#  PMID 7700864
+     A    C    D    E    F    G    H    I    K    L    M    N    P    Q    R    S    T    V    W    Y
+A  2.5 -1.7 -0.6 -0.7 -3.2  0.8 -2.1  0.1 -1.9 -1.3 -0.2  0.0  1.1 -1.7 -1.7  1.4  1.7  0.7 -4.3 -4.0 
+C -1.7 12.1 -3.7 -4.7 -0.1 -1.3 -1.2 -3.6 -2.8 -3.8 -3.7 -1.6 -2.7 -3.2 -0.4  0.9 -1.5 -3.1  1.6  2.6 
+D -0.6 -3.7  5.2  4.4 -5.7  0.8  0.1 -4.2 -0.2 -5.3 -4.3  2.5 -2.8  0.6 -1.5 -0.4 -1.2 -3.3 -6.3 -2.3 
+E -0.7 -4.7  4.4  5.2 -6.7  0.5 -0.2 -4.1  0.9 -5.0 -4.1  1.1 -2.6  2.1 -0.4 -1.2 -1.6 -3.0 -5.6 -4.1 
+F -3.2 -0.1 -5.7 -6.7  8.3 -5.7  0.1  0.0 -6.3  2.4 -0.1 -3.5 -3.2 -4.4 -4.9 -1.8 -2.4 -0.5 -1.6  5.6 
+G  0.8 -1.3  0.8  0.5 -5.7  5.8 -2.1 -3.4 -1.4 -4.6 -3.7 -0.1 -1.7 -1.6 -0.1  0.8 -0.5 -2.3 -1.7 -4.9 
+H -2.1 -1.2  0.1 -0.2  0.1 -2.1  6.1 -3.7  0.9 -2.2 -3.4  1.4 -0.4  3.2  1.8 -0.9 -1.7 -3.8 -2.8  4.4 
+I  0.1 -3.6 -4.2 -4.1  0.0 -3.4 -3.7  4.4 -3.8  2.4  4.0 -2.5 -2.0 -3.8 -3.8 -1.2  0.7  3.9 -5.0 -3.3 
+K -1.9 -2.8 -0.2  0.9 -6.3 -1.4  0.9 -3.8  5.6 -4.1 -2.9  1.0 -2.3  2.5  4.3 -1.2 -1.1 -3.8 -1.4 -4.0 
+L -1.3 -3.8 -5.3 -5.0  2.4 -4.6 -2.2  2.4 -4.1  4.8 -2.9 -3.4 -0.2 -2.4 -3.2 -1.5 -0.4  1.9 -3.0 -1.6 
+M -0.2 -3.7 -4.3 -4.1 -0.1 -3.7 -3.4  4.0 -2.9 -2.9  4.8 -2.5 -1.8 -3.1 -3.0 -1.3  0.6  3.3 -4.4 -3.6 
+N  0.0 -1.6  2.5  1.1 -3.5 -0.1  1.4 -2.5  1.0 -3.4 -2.5  3.6 -1.1  0.1 -0.1  1.2  0.5 -2.4 -4.4 -0.9 
+P  1.1 -2.7 -2.8 -2.6 -3.2 -1.7 -0.4 -2.0 -2.3 -0.2 -1.8 -1.1  6.5  0.1 -1.3  1.4  0.6 -1.6 -4.8 -3.8 
+Q -1.7 -3.2  0.6  2.1 -4.4 -1.6  3.2 -3.8  2.5 -2.4 -3.1  0.1  0.1  5.3  2.5 -1.4 -1.7 -3.5 -2.6 -1.4 
+R -1.7 -0.4 -1.5 -0.4 -4.9 -0.1  1.8 -3.8  4.3 -3.2 -3.0 -0.1 -1.3  2.5  5.1 -0.9 -1.3 -3.7  2.0 -2.6 
+S  1.4  0.9 -0.4 -1.2 -1.8  0.8 -0.9 -1.2 -1.2 -1.5 -1.3  1.2  1.4 -1.4 -0.9  2.1  1.5 -0.9 -2.9 -1.8 
+T  1.7 -1.5 -1.2 -1.6 -2.4 -0.5 -1.7  0.7 -1.1 -0.4  0.6  0.5  0.6 -1.7 -1.3  1.5  2.4  0.6 -2.6 -3.4 
+V  0.7 -3.1 -3.3 -3.0 -0.5 -2.3 -3.8  3.9 -3.8  1.9  3.3 -2.4 -1.6 -3.5 -3.7 -0.9  0.6  4.0 -4.8 -3.8 
+W -4.3  1.6 -6.3 -5.6 -1.6 -1.7 -2.8 -5.0 -1.4 -3.0 -4.4 -4.4 -4.8 -2.6  2.0 -2.9 -2.6 -4.8 14.7 -0.3 
+Y -4.0  2.6 -2.3 -4.1  5.6 -4.9  4.4 -3.3 -4.0 -1.6 -3.6 -0.9 -3.8 -1.4 -2.6 -1.8 -3.4 -3.8 -0.3  9.5 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/BENNER74 b/code/lib/Bio/Align/substitution_matrices/data/BENNER74
new file mode 100644
index 0000000..62000b1
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/BENNER74
@@ -0,0 +1,27 @@
+#  S.A. Benner, M.A. Cohen, G.H. Gonnet:
+#  "Amino acid substitution during functionally constrained divergent evolution
+#  of protein sequences".
+#  Protein Engineering 7(11): 1323-1332 (1994).
+#  Figure 3C.
+#  PMID 7700864
+     A    C    D    E    F    G    H    I    K    L    M    N    P    Q    R    S    T    V    W    Y
+A  2.4  0.3 -0.3 -0.1 -2.6  0.6 -1.0 -0.8 -0.4 -1.4 -0.8 -0.2  0.4 -0.3 -0.8  1.1  0.7  0.1 -4.1 -2.6 
+C  0.3 11.8 -3.2 -3.2 -0.7 -2.0 -1.3 -1.2 -2.9 -1.6 -1.2 -1.8 -3.1 -2.6 -2.2  0.1 -0.6 -0.2 -0.9 -0.4 
+D -0.3 -3.2  4.8  2.9 -4.7  0.2  0.4 -3.9  0.4 -4.2 -3.2  2.2 -1.0  0.8 -0.5  0.4 -0.2 -2.9 -5.5 -2.8 
+E -0.1 -3.2  2.9  3.7 -4.3 -0.5  0.2 -2.9  1.2 -3.1 -2.2  1.0 -0.7  1.7  0.3  0.1 -0.2 -2.1 -4.7 -3.0 
+F -2.6 -0.7 -4.7 -4.3  7.2 -5.4  0.0  0.9 -3.6  2.1  1.3 -3.2 -3.8 -2.8 -3.5 -2.6 -2.2  0.1  3.0  5.3 
+G  0.6 -2.0  0.2 -0.5 -5.4  6.6 -1.6 -4.3 -1.1 -4.6 -3.5  0.4 -1.7 -1.1 -1.0  0.4 -1.0 -3.1 -4.1 -4.3 
+H -1.0 -1.3  0.4  0.2  0.0 -1.6  6.1 -2.3  0.6 -1.9 -1.5  1.2 -1.0  1.4  1.0 -0.3 -0.5 -2.1 -1.0  2.5 
+I -0.8 -1.2 -3.9 -2.9  0.9 -4.3 -2.3  4.0 -2.3  2.8  2.6 -2.8 -2.6 -2.0 -2.6 -1.8 -0.3  3.2 -2.3 -1.0 
+K -0.4 -2.9  0.4  1.2 -3.6 -1.1  0.6 -2.3  3.4 -2.4 -1.5  0.9 -0.8  1.7  2.9  0.0  0.1 -1.9 -3.6 -2.4 
+L -1.4 -1.6 -4.2 -3.1  2.1 -4.6 -1.9  2.8 -2.4  4.2  2.9 -3.1 -2.2 -1.7 -2.4 -2.2 -1.1  1.9 -0.9 -0.1 
+M -0.8 -1.2 -3.2 -2.2  1.3 -3.5 -1.5  2.6 -1.5  2.9  4.5 -2.2 -2.4 -1.0 -1.8 -1.4 -0.4  1.8 -1.3 -0.5 
+N -0.2 -1.8  2.2  1.0 -3.2  0.4  1.2 -2.8  0.9 -3.1 -2.2  3.6 -1.0  0.7  0.3  0.9  0.4 -2.2 -4.0 -1.4 
+P  0.4 -3.1 -1.0 -0.7 -3.8 -1.7 -1.0 -2.6 -0.8 -2.2 -2.4 -1.0  7.5 -0.2 -0.1  0.5  0.1 -1.9 -5.2 -3.4 
+Q -0.3 -2.6  0.8  1.7 -2.8 -1.1  1.4 -2.0  1.7 -1.7 -1.0  0.7 -0.2  3.0  1.6  0.1 -0.1 -1.7 -2.8 -1.8 
+R -0.8 -2.2 -0.5  0.3 -3.5 -1.0  1.0 -2.6  2.9 -2.4 -1.8  0.3 -0.1  1.6  4.8 -0.2 -0.3 -2.2 -1.6 -2.0 
+S  1.1  0.1  0.4  0.1 -2.6  0.4 -0.3 -1.8  0.0 -2.2 -1.4  0.9  0.5  0.1 -0.2  2.1  1.4 -1.0 -3.4 -1.9 
+T  0.7 -0.6 -0.2 -0.2 -2.2 -1.0 -0.5 -0.3  0.1 -1.1 -0.4  0.4  0.1 -0.1 -0.3  1.4  2.5  0.2 -3.7 -2.1 
+V  0.1 -0.2 -2.9 -2.1  0.1 -3.1 -2.1  3.2 -1.9  1.9  1.8 -2.2 -1.9 -1.7 -2.2 -1.0  0.2  3.4 -2.9 -1.4 
+W -4.1 -0.9 -5.5 -4.7  3.0 -4.1 -1.0 -2.3 -3.6 -0.9 -1.3 -4.0 -5.2 -2.8 -1.6 -3.4 -3.7 -2.9 14.7  3.6 
+Y -2.6 -0.4 -2.8 -3.0  5.3 -4.3  2.5 -1.0 -2.4 -0.1 -0.5 -1.4 -3.4 -1.8 -2.0 -1.9 -2.1 -1.4  3.6  8.1 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/BLOSUM45 b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM45
new file mode 100644
index 0000000..18c3323
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM45
@@ -0,0 +1,31 @@
+#  Matrix made by matblas from blosum45.iij
+#  * column uses minimum score
+#  BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 45
+#  Entropy =   0.3795, Expected =  -0.2789
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
+A  5 -2 -1 -2 -1 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -2 -2  0 -1 -1  0 -5 
+R -2  7  0 -1 -3  1  0 -2  0 -3 -2  3 -1 -2 -2 -1 -1 -2 -1 -2 -1  0 -1 -5 
+N -1  0  6  2 -2  0  0  0  1 -2 -3  0 -2 -2 -2  1  0 -4 -2 -3  4  0 -1 -5 
+D -2 -1  2  7 -3  0  2 -1  0 -4 -3  0 -3 -4 -1  0 -1 -4 -2 -3  5  1 -1 -5 
+C -1 -3 -2 -3 12 -3 -3 -3 -3 -3 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -2 -3 -2 -5 
+Q -1  1  0  0 -3  6  2 -2  1 -2 -2  1  0 -4 -1  0 -1 -2 -1 -3  0  4 -1 -5 
+E -1  0  0  2 -3  2  6 -2  0 -3 -2  1 -2 -3  0  0 -1 -3 -2 -3  1  4 -1 -5 
+G  0 -2  0 -1 -3 -2 -2  7 -2 -4 -3 -2 -2 -3 -2  0 -2 -2 -3 -3 -1 -2 -1 -5 
+H -2  0  1  0 -3  1  0 -2 10 -3 -2 -1  0 -2 -2 -1 -2 -3  2 -3  0  0 -1 -5 
+I -1 -3 -2 -4 -3 -2 -3 -4 -3  5  2 -3  2  0 -2 -2 -1 -2  0  3 -3 -3 -1 -5 
+L -1 -2 -3 -3 -2 -2 -2 -3 -2  2  5 -3  2  1 -3 -3 -1 -2  0  1 -3 -2 -1 -5 
+K -1  3  0  0 -3  1  1 -2 -1 -3 -3  5 -1 -3 -1 -1 -1 -2 -1 -2  0  1 -1 -5 
+M -1 -1 -2 -3 -2  0 -2 -2  0  2  2 -1  6  0 -2 -2 -1 -2  0  1 -2 -1 -1 -5 
+F -2 -2 -2 -4 -2 -4 -3 -3 -2  0  1 -3  0  8 -3 -2 -1  1  3  0 -3 -3 -1 -5 
+P -1 -2 -2 -1 -4 -1  0 -2 -2 -2 -3 -1 -2 -3  9 -1 -1 -3 -3 -3 -2 -1 -1 -5 
+S  1 -1  1  0 -1  0  0  0 -1 -2 -3 -1 -2 -2 -1  4  2 -4 -2 -1  0  0  0 -5 
+T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -1 -1  2  5 -3 -1  0  0 -1  0 -5 
+W -2 -2 -4 -4 -5 -2 -3 -2 -3 -2 -2 -2 -2  1 -3 -4 -3 15  3 -3 -4 -2 -2 -5 
+Y -2 -1 -2 -2 -3 -1 -2 -3  2  0  0 -1  0  3 -3 -2 -1  3  8 -1 -2 -2 -1 -5 
+V  0 -2 -3 -3 -1 -3 -3 -3 -3  3  1 -2  1  0 -3 -1  0 -3 -1  5 -3 -3 -1 -5 
+B -1 -1  4  5 -2  0  1 -1  0 -3 -3  0 -2 -3 -2  0  0 -4 -2 -3  4  2 -1 -5 
+Z -1  0  0  1 -3  4  4 -2  0 -3 -2  1 -1 -3 -1  0 -1 -2 -2 -3  2  4 -1 -5 
+X  0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  0  0 -2 -1 -1 -1 -1 -1 -5 
+* -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5  1 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/BLOSUM50 b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM50
new file mode 100644
index 0000000..3f62e3c
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM50
@@ -0,0 +1,31 @@
+#  Matrix made by matblas from blosum50.iij
+#  * column uses minimum score
+#  BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 50
+#  Entropy =   0.4808, Expected =  -0.3573
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
+A  5 -2 -1 -2 -1 -1 -1  0 -2 -1 -2 -1 -1 -3 -1  1  0 -3 -2  0 -2 -1 -1 -5 
+R -2  7 -1 -2 -4  1  0 -3  0 -4 -3  3 -2 -3 -3 -1 -1 -3 -1 -3 -1  0 -1 -5 
+N -1 -1  7  2 -2  0  0  0  1 -3 -4  0 -2 -4 -2  1  0 -4 -2 -3  4  0 -1 -5 
+D -2 -2  2  8 -4  0  2 -1 -1 -4 -4 -1 -4 -5 -1  0 -1 -5 -3 -4  5  1 -1 -5 
+C -1 -4 -2 -4 13 -3 -3 -3 -3 -2 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -3 -3 -2 -5 
+Q -1  1  0  0 -3  7  2 -2  1 -3 -2  2  0 -4 -1  0 -1 -1 -1 -3  0  4 -1 -5 
+E -1  0  0  2 -3  2  6 -3  0 -4 -3  1 -2 -3 -1 -1 -1 -3 -2 -3  1  5 -1 -5 
+G  0 -3  0 -1 -3 -2 -3  8 -2 -4 -4 -2 -3 -4 -2  0 -2 -3 -3 -4 -1 -2 -2 -5 
+H -2  0  1 -1 -3  1  0 -2 10 -4 -3  0 -1 -1 -2 -1 -2 -3  2 -4  0  0 -1 -5 
+I -1 -4 -3 -4 -2 -3 -4 -4 -4  5  2 -3  2  0 -3 -3 -1 -3 -1  4 -4 -3 -1 -5 
+L -2 -3 -4 -4 -2 -2 -3 -4 -3  2  5 -3  3  1 -4 -3 -1 -2 -1  1 -4 -3 -1 -5 
+K -1  3  0 -1 -3  2  1 -2  0 -3 -3  6 -2 -4 -1  0 -1 -3 -2 -3  0  1 -1 -5 
+M -1 -2 -2 -4 -2  0 -2 -3 -1  2  3 -2  7  0 -3 -2 -1 -1  0  1 -3 -1 -1 -5 
+F -3 -3 -4 -5 -2 -4 -3 -4 -1  0  1 -4  0  8 -4 -3 -2  1  4 -1 -4 -4 -2 -5 
+P -1 -3 -2 -1 -4 -1 -1 -2 -2 -3 -4 -1 -3 -4 10 -1 -1 -4 -3 -3 -2 -1 -2 -5 
+S  1 -1  1  0 -1  0 -1  0 -1 -3 -3  0 -2 -3 -1  5  2 -4 -2 -2  0  0 -1 -5 
+T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  2  5 -3 -2  0  0 -1  0 -5 
+W -3 -3 -4 -5 -5 -1 -3 -3 -3 -3 -2 -3 -1  1 -4 -4 -3 15  2 -3 -5 -2 -3 -5 
+Y -2 -1 -2 -3 -3 -1 -2 -3  2 -1 -1 -2  0  4 -3 -2 -2  2  8 -1 -3 -2 -1 -5 
+V  0 -3 -3 -4 -1 -3 -3 -4 -4  4  1 -3  1 -1 -3 -2  0 -3 -1  5 -4 -3 -1 -5 
+B -2 -1  4  5 -3  0  1 -1  0 -4 -4  0 -3 -4 -2  0  0 -5 -3 -4  5  2 -1 -5 
+Z -1  0  0  1 -3  4  5 -2  0 -3 -3  1 -1 -4 -1  0 -1 -2 -2 -3  2  5 -1 -5 
+X -1 -1 -1 -1 -2 -1 -1 -2 -1 -1 -1 -1 -1 -2 -2 -1  0 -3 -1 -1 -1 -1 -1 -5 
+* -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5  1 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/BLOSUM62 b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM62
new file mode 100644
index 0000000..205f139
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM62
@@ -0,0 +1,31 @@
+#  Matrix made by matblas from blosum62.iij
+#  * column uses minimum score
+#  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 62
+#  Entropy =   0.6979, Expected =  -0.5209
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
+A  4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2  0 -2 -1  0 -4 
+R -1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2 -3 -1  0 -1 -4 
+N -2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2 -3  3  0 -1 -4 
+D -2 -2  1  6 -3  0  2 -1 -1 -3 -4 -1 -3 -3 -1  0 -1 -4 -3 -3  4  1 -1 -4 
+C  0 -3 -3 -3  9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 
+Q -1  1  0  0 -3  5  2 -2  0 -3 -2  1  0 -3 -1  0 -1 -2 -1 -2  0  3 -1 -4 
+E -1  0  0  2 -4  2  5 -2  0 -3 -3  1 -2 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4 
+G  0 -2  0 -1 -3 -2 -2  6 -2 -4 -4 -2 -3 -3 -2  0 -2 -2 -3 -3 -1 -2 -1 -4 
+H -2  0  1 -1 -3  0  0 -2  8 -3 -3 -1 -2 -1 -2 -1 -2 -2  2 -3  0  0 -1 -4 
+I -1 -3 -3 -3 -1 -3 -3 -4 -3  4  2 -3  1  0 -3 -2 -1 -3 -1  3 -3 -3 -1 -4 
+L -1 -2 -3 -4 -1 -2 -3 -4 -3  2  4 -2  2  0 -3 -2 -1 -2 -1  1 -4 -3 -1 -4 
+K -1  2  0 -1 -3  1  1 -2 -1 -3 -2  5 -1 -3 -1  0 -1 -3 -2 -2  0  1 -1 -4 
+M -1 -1 -2 -3 -1  0 -2 -3 -2  1  2 -1  5  0 -2 -1 -1 -1 -1  1 -3 -1 -1 -4 
+F -2 -3 -3 -3 -2 -3 -3 -3 -1  0  0 -3  0  6 -4 -2 -2  1  3 -1 -3 -3 -1 -4 
+P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4  7 -1 -1 -4 -3 -2 -2 -1 -2 -4 
+S  1 -1  1  0 -1  0  0  0 -1 -2 -2  0 -1 -2 -1  4  1 -3 -2 -2  0  0  0 -4 
+T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  1  5 -2 -2  0 -1 -1  0 -4 
+W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1  1 -4 -3 -2 11  2 -3 -4 -3 -2 -4 
+Y -2 -2 -2 -3 -2 -1 -2 -3  2 -1 -1 -2 -1  3 -3 -2 -2  2  7 -1 -3 -2 -1 -4 
+V  0 -3 -3 -3 -1 -2 -2 -3 -3  3  1 -2  1 -1 -2 -2  0 -3 -1  4 -3 -2 -1 -4 
+B -2 -1  3  4 -3  0  1 -1  0 -3 -4  0 -3 -3 -2  0 -1 -4 -3 -3  4  1 -1 -4 
+Z -1  0  0  1 -3  3  4 -2  0 -3 -3  1 -1 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4 
+X  0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2  0  0 -2 -1 -1 -1 -1 -1 -4 
+* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4  1 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/BLOSUM80 b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM80
new file mode 100644
index 0000000..78172a3
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM80
@@ -0,0 +1,31 @@
+#  Matrix made by matblas from blosum80_3.iij
+#  * column uses minimum score
+#  BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 80
+#  Entropy =   0.9868, Expected =  -0.7442
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
+A  7 -3 -3 -3 -1 -2 -2  0 -3 -3 -3 -1 -2 -4 -1  2  0 -5 -4 -1 -3 -2 -1 -8 
+R -3  9 -1 -3 -6  1 -1 -4  0 -5 -4  3 -3 -5 -3 -2 -2 -5 -4 -4 -2  0 -2 -8 
+N -3 -1  9  2 -5  0 -1 -1  1 -6 -6  0 -4 -6 -4  1  0 -7 -4 -5  5 -1 -2 -8 
+D -3 -3  2 10 -7 -1  2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6  6  1 -3 -8 
+C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4 -8 
+Q -2  1  0 -1 -5  9  3 -4  1 -5 -4  2 -1 -5 -3 -1 -1 -4 -3 -4 -1  5 -2 -8 
+E -2 -1 -1  2 -7  3  8 -4  0 -6 -6  1 -4 -6 -2 -1 -2 -6 -5 -4  1  6 -2 -8 
+G  0 -4 -1 -3 -6 -4 -4  9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3 -8 
+H -3  0  1 -2 -7  1  0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4  3 -5 -1  0 -2 -8 
+I -3 -5 -6 -7 -2 -5 -6 -7 -6  7  2 -5  2 -1 -5 -4 -2 -5 -3  4 -6 -6 -2 -8 
+L -3 -4 -6 -7 -3 -4 -6 -7 -5  2  6 -4  3  0 -5 -4 -3 -4 -2  1 -7 -5 -2 -8 
+K -1  3  0 -2 -6  2  1 -3 -1 -5 -4  8 -3 -5 -2 -1 -1 -6 -4 -4 -1  1 -2 -8 
+M -2 -3 -4 -6 -3 -1 -4 -5 -4  2  3 -3  9  0 -4 -3 -1 -3 -3  1 -5 -3 -2 -8 
+F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1  0 -5  0 10 -6 -4 -4  0  4 -2 -6 -6 -3 -8 
+P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3 -8 
+S  2 -2  1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2  7  2 -6 -3 -3  0 -1 -1 -8 
+T  0 -2  0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3  2  8 -5 -3  0 -1 -2 -1 -8 
+W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3  0 -7 -6 -5 16  3 -5 -8 -5 -5 -8 
+Y -4 -4 -4 -6 -5 -3 -5 -6  3 -3 -2 -4 -3  4 -6 -3 -3  3 11 -3 -5 -4 -3 -8 
+V -1 -4 -5 -6 -2 -4 -4 -6 -5  4  1 -4  1 -2 -4 -3  0 -5 -3  7 -6 -4 -2 -8 
+B -3 -2  5  6 -6 -1  1 -2 -1 -6 -7 -1 -5 -6 -4  0 -1 -8 -5 -6  6  0 -3 -8 
+Z -2  0 -1  1 -7  5  6 -4  0 -6 -5  1 -3 -6 -2 -1 -2 -5 -4 -4  0  6 -1 -8 
+X -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2 -8 
+* -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8  1 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/BLOSUM90 b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM90
new file mode 100644
index 0000000..71441b5
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM90
@@ -0,0 +1,31 @@
+#  Matrix made by matblas from blosum90.iij
+#  * column uses minimum score
+#  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 90
+#  Entropy =   1.1806, Expected =  -0.8887
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
+A  5 -2 -2 -3 -1 -1 -1  0 -2 -2 -2 -1 -2 -3 -1  1  0 -4 -3 -1 -2 -1 -1 -6 
+R -2  6 -1 -3 -5  1 -1 -3  0 -4 -3  2 -2 -4 -3 -1 -2 -4 -3 -3 -2  0 -2 -6 
+N -2 -1  7  1 -4  0 -1 -1  0 -4 -4  0 -3 -4 -3  0  0 -5 -3 -4  4 -1 -2 -6 
+D -3 -3  1  7 -5 -1  1 -2 -2 -5 -5 -1 -4 -5 -3 -1 -2 -6 -4 -5  4  0 -2 -6 
+C -1 -5 -4 -5  9 -4 -6 -4 -5 -2 -2 -4 -2 -3 -4 -2 -2 -4 -4 -2 -4 -5 -3 -6 
+Q -1  1  0 -1 -4  7  2 -3  1 -4 -3  1  0 -4 -2 -1 -1 -3 -3 -3 -1  4 -1 -6 
+E -1 -1 -1  1 -6  2  6 -3 -1 -4 -4  0 -3 -5 -2 -1 -1 -5 -4 -3  0  4 -2 -6 
+G  0 -3 -1 -2 -4 -3 -3  6 -3 -5 -5 -2 -4 -5 -3 -1 -3 -4 -5 -5 -2 -3 -2 -6 
+H -2  0  0 -2 -5  1 -1 -3  8 -4 -4 -1 -3 -2 -3 -2 -2 -3  1 -4 -1  0 -2 -6 
+I -2 -4 -4 -5 -2 -4 -4 -5 -4  5  1 -4  1 -1 -4 -3 -1 -4 -2  3 -5 -4 -2 -6 
+L -2 -3 -4 -5 -2 -3 -4 -5 -4  1  5 -3  2  0 -4 -3 -2 -3 -2  0 -5 -4 -2 -6 
+K -1  2  0 -1 -4  1  0 -2 -1 -4 -3  6 -2 -4 -2 -1 -1 -5 -3 -3 -1  1 -1 -6 
+M -2 -2 -3 -4 -2  0 -3 -4 -3  1  2 -2  7 -1 -3 -2 -1 -2 -2  0 -4 -2 -1 -6 
+F -3 -4 -4 -5 -3 -4 -5 -5 -2 -1  0 -4 -1  7 -4 -3 -3  0  3 -2 -4 -4 -2 -6 
+P -1 -3 -3 -3 -4 -2 -2 -3 -3 -4 -4 -2 -3 -4  8 -2 -2 -5 -4 -3 -3 -2 -2 -6 
+S  1 -1  0 -1 -2 -1 -1 -1 -2 -3 -3 -1 -2 -3 -2  5  1 -4 -3 -2  0 -1 -1 -6 
+T  0 -2  0 -2 -2 -1 -1 -3 -2 -1 -2 -1 -1 -3 -2  1  6 -4 -2 -1 -1 -1 -1 -6 
+W -4 -4 -5 -6 -4 -3 -5 -4 -3 -4 -3 -5 -2  0 -5 -4 -4 11  2 -3 -6 -4 -3 -6 
+Y -3 -3 -3 -4 -4 -3 -4 -5  1 -2 -2 -3 -2  3 -4 -3 -2  2  8 -3 -4 -3 -2 -6 
+V -1 -3 -4 -5 -2 -3 -3 -5 -4  3  0 -3  0 -2 -3 -2 -1 -3 -3  5 -4 -3 -2 -6 
+B -2 -2  4  4 -4 -1  0 -2 -1 -5 -5 -1 -4 -4 -3  0 -1 -6 -4 -4  4  0 -2 -6 
+Z -1  0 -1  0 -5  4  4 -3  0 -4 -4  1 -2 -4 -2 -1 -1 -4 -3 -3  0  4 -1 -6 
+X -1 -2 -2 -2 -3 -1 -2 -2 -2 -2 -2 -1 -1 -2 -2 -1 -1 -3 -2 -2 -2 -1 -2 -6 
+* -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6  1 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/DAYHOFF b/code/lib/Bio/Align/substitution_matrices/data/DAYHOFF
new file mode 100644
index 0000000..e8aecac
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/DAYHOFF
@@ -0,0 +1,27 @@
+#  M.O. Dayhoff, R.M. Schwartz, and B.C. Orcutt:
+#  "A Model of Evolutionary Change in Proteins."
+#  Margaret O. Dayhoff: Atlas of Protein Sequence and Structure,
+#  Volume 5, Supplement 3, 1978, pages 345-352.
+#  The National Biomedical Research Foundation, 1979.
+#  Figure 84, page 352.
+     A    C    D    E    F    G    H    I    K    L    M    N    P    Q    R    S    T    V    W    Y
+A  0.2 -0.2  0.0  0.0 -0.4  0.1 -0.1 -0.1 -0.1 -0.2 -0.1  0.0  0.1  0.0 -0.2  0.1  0.1  0.0 -0.6 -0.3 
+C -0.2  1.2 -0.5 -0.5 -0.4 -0.3 -0.3 -0.2 -0.5 -0.6 -0.5 -0.4 -0.3 -0.5 -0.4  0.0 -0.2 -0.2 -0.8  0.0 
+D  0.0 -0.5  0.4  0.3 -0.6  0.1  0.1 -0.2  0.0 -0.4 -0.3  0.2 -0.1  0.2 -0.1  0.0  0.0 -0.2 -0.7 -0.4 
+E  0.0 -0.5  0.3  0.4 -0.5  0.0  0.1 -0.2  0.0 -0.3 -0.2  0.1 -0.1  0.2 -0.1  0.0  0.0 -0.2 -0.7 -0.4 
+F -0.4 -0.4 -0.6 -0.5  0.9 -0.5 -0.2  0.1 -0.5  0.2  0.0 -0.4 -0.5 -0.5 -0.4 -0.3 -0.3 -0.1  0.0  0.7 
+G  0.1 -0.3  0.1  0.0 -0.5  0.5 -0.2 -0.3 -0.2 -0.4 -0.3  0.0 -0.1 -0.1 -0.3  0.1  0.0 -0.1 -0.7 -0.5 
+H -0.1 -0.3  0.1  0.1 -0.2 -0.2  0.6 -0.2  0.0 -0.2 -0.2  0.2  0.0  0.3  0.2 -0.1 -0.1 -0.2 -0.3  0.0 
+I -0.1 -0.2 -0.2 -0.2  0.1 -0.3 -0.2  0.5 -0.2  0.2  0.2 -0.2 -0.2 -0.2 -0.2 -0.1  0.0  0.4 -0.5 -0.1 
+K -0.1 -0.5  0.0  0.0 -0.5 -0.2  0.0 -0.2  0.5 -0.3  0.0  0.1 -0.1  0.1  0.3  0.0  0.0 -0.2 -0.3 -0.4 
+L -0.2 -0.6 -0.4 -0.3  0.2 -0.4 -0.2  0.2 -0.3  0.6  0.4 -0.3 -0.3 -0.2 -0.3 -0.3 -0.2  0.2 -0.2 -0.1 
+M -0.1 -0.5 -0.3 -0.2  0.0 -0.3 -0.2  0.2  0.0  0.4  0.6 -0.2 -0.2 -0.1  0.0 -0.2 -0.1  0.2 -0.4 -0.2 
+N  0.0 -0.4  0.2  0.1 -0.4  0.0  0.2 -0.2  0.1 -0.3 -0.2  0.2 -0.1  0.1  0.0  0.1  0.0 -0.2 -0.4 -0.2 
+P  0.1 -0.3 -0.1 -0.1 -0.5 -0.1  0.0 -0.2 -0.1 -0.3 -0.2 -0.1  0.6  0.0  0.0  0.1  0.0 -0.1 -0.6 -0.5 
+Q  0.0 -0.5  0.2  0.2 -0.5 -0.1  0.3 -0.2  0.1 -0.2 -0.1  0.1  0.0  0.4  0.1 -0.1 -0.1 -0.2 -0.5 -0.4 
+R -0.2 -0.4 -0.1 -0.1 -0.4 -0.3  0.2 -0.2  0.3 -0.3  0.0  0.0  0.0  0.1  0.6  0.0 -0.1 -0.2  0.2 -0.4 
+S  0.1  0.0  0.0  0.0 -0.3  0.1 -0.1 -0.1  0.0 -0.3 -0.2  0.1  0.1 -0.1  0.0  0.2  0.1 -0.1 -0.2 -0.3 
+T  0.1 -0.2  0.0  0.0 -0.3  0.0 -0.1  0.0  0.0 -0.2 -0.1  0.0  0.0 -0.1 -0.1  0.1  0.3  0.0 -0.5 -0.3 
+V  0.0 -0.2 -0.2 -0.2 -0.1 -0.1 -0.2  0.4 -0.2  0.2  0.2 -0.2 -0.1 -0.2 -0.2 -0.1  0.0  0.4 -0.6 -0.2 
+W -0.6 -0.8 -0.7 -0.7  0.0 -0.7 -0.3 -0.5 -0.3 -0.2 -0.4 -0.4 -0.6 -0.5  0.2 -0.2 -0.5 -0.6  1.7  0.0 
+Y -0.3  0.0 -0.4 -0.4  0.7 -0.5  0.0 -0.1 -0.4 -0.1 -0.2 -0.2 -0.5 -0.4 -0.4 -0.3 -0.3 -0.2  0.0  1.0 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/FENG b/code/lib/Bio/Align/substitution_matrices/data/FENG
new file mode 100644
index 0000000..ebd5c2d
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/FENG
@@ -0,0 +1,26 @@
+#  D.F. Feng, M.S. Johnson, R.F. Doolittle:
+#  "Aligning amino acid sequences: Comparison of commonly used methods."
+#  Journal of Molecular Evolution 21(2): 112-125 (1985).
+#  Table 1, upper triangle.
+#  PMID 6100188
+  A C D E F G H I K L M N P Q R S T V W Y
+A 6 2 4 4 2 5 2 2 3 2 2 3 5 3 2 5 5 5 2 2 
+C 2 6 1 0 3 3 2 2 0 2 2 2 2 1 2 4 2 2 3 3 
+D 4 1 6 5 1 4 3 1 3 1 0 5 2 4 2 3 2 3 0 2 
+E 4 0 5 6 0 4 2 1 4 1 1 3 3 4 2 3 3 4 1 1 
+F 2 3 1 0 6 1 2 4 0 4 2 1 2 1 1 3 1 4 3 5 
+G 5 3 4 4 1 6 1 2 2 2 1 3 3 2 3 5 2 4 3 2 
+H 2 2 3 2 2 1 6 1 3 3 1 4 3 4 4 3 2 1 1 3 
+I 2 2 1 1 4 2 1 6 2 5 4 2 2 1 2 2 3 5 2 3 
+K 3 0 3 4 0 2 3 2 6 2 2 4 2 4 5 3 4 3 1 1 
+L 2 2 1 1 4 2 3 5 2 6 5 1 3 2 2 2 2 5 4 3 
+M 2 2 0 1 2 1 1 4 2 5 6 1 2 2 2 1 3 4 3 2 
+N 3 2 5 3 1 3 4 2 4 1 1 6 2 3 2 5 4 2 0 3 
+P 5 2 2 3 2 3 3 2 2 3 2 2 6 3 3 4 4 3 2 2 
+Q 3 1 4 4 1 2 4 1 4 2 2 3 3 6 3 3 3 2 1 2 
+R 2 2 2 2 1 3 4 2 5 2 2 2 3 3 6 3 3 2 2 1 
+S 5 4 3 3 3 5 3 2 3 2 1 5 4 3 3 6 5 2 2 3 
+T 5 2 2 3 1 2 2 3 4 2 3 4 4 3 3 5 6 3 1 2 
+V 5 2 3 4 4 4 1 5 3 5 4 2 3 2 2 2 3 6 3 3 
+W 2 3 0 1 3 3 1 2 1 4 3 0 2 1 2 2 1 3 6 3 
+Y 2 3 2 1 5 2 3 3 1 3 2 3 2 2 1 3 2 3 3 6 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/GENETIC b/code/lib/Bio/Align/substitution_matrices/data/GENETIC
new file mode 100644
index 0000000..79fc69b
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/GENETIC
@@ -0,0 +1,27 @@
+# S.A. Benner, M.A. Cohen, G.H. Gonnet:
+# "Amino acid substitution during functionally constrained divergent evolution
+# of protein sequences."
+# Figure 5.
+# Protein Engineering 7(11): 1323-1332 (1994).
+# PMID 7700864
+     A    C    D    E    F    G    H    I    K    L    M    N    P    Q    R    S    T    V    W    Y
+A  4.0 -1.9  1.0  1.3 -2.4  1.2 -2.1 -1.8 -1.9 -2.3 -2.0 -1.7  0.8 -2.1 -1.6  0.1  0.9  1.0 -2.2 -2.4 
+C -1.9  5.5 -1.6 -3.0  1.8  1.0 -1.6 -1.9 -3.2 -1.3 -2.7 -1.5 -1.9 -3.1  0.7  1.5 -1.9 -2.2  4.1  2.6 
+D  1.0 -1.6  4.8  3.8 -1.7  1.1  1.7 -2.1  0.3 -2.4 -2.5  1.7 -2.2  0.3 -2.3 -2.1 -2.1  1.0 -2.9  2.3 
+E  1.3 -3.0  3.8  5.7 -2.9  1.4  0.3 -2.3  2.0 -2.5 -1.8  0.3 -2.1  2.0 -2.0 -2.8 -2.1  1.3 -3.2 -0.9 
+F -2.4  1.8 -1.7 -2.9  4.5 -1.9 -1.1  1.3 -2.8  2.2  0.5 -1.3 -1.8 -2.1 -1.5  0.0 -2.1  1.0  0.0  2.0 
+G  1.2  1.0  1.1  1.4 -1.9  4.2 -2.2 -2.5 -2.2 -2.2 -2.3 -2.6 -1.8 -2.1  0.8 -0.6 -2.1  1.1  1.4 -1.8 
+H -2.1 -1.6  1.7  0.3 -1.1 -2.2  4.7 -1.8  0.6 -0.1 -1.8  1.8  0.7  3.6  3.6 -1.6 -1.8 -2.1 -2.1  2.3 
+I -1.8 -1.9 -2.1 -2.3  1.3 -2.5 -1.8  4.1  0.7  1.2  3.3  0.9 -1.6 -1.9 -1.2 -0.5  0.8  1.0 -2.2 -1.6 
+K -1.9 -3.2  0.3  2.0 -2.8 -2.2  0.6  0.7  5.6 -2.0  1.6  3.5 -1.5  2.2 -0.2 -1.5  1.0 -2.1 -3.0 -0.8 
+L -2.3 -1.3 -2.4 -2.5  2.2 -2.2 -0.1  1.2 -2.0  3.4  1.5 -2.2  0.0  0.1 -0.4 -1.2 -1.9  1.1 -0.3 -1.6 
+M -2.0 -2.7 -2.5 -1.8  0.5 -2.3 -1.8  3.3  1.6  1.5  5.4  0.1 -1.4 -1.2 -0.4 -1.3  0.7  1.0 -2.0 -2.9 
+N -1.7 -1.5  1.7  0.3 -1.3 -2.6  1.8  0.9  3.5 -2.2  0.1  4.7 -1.6  0.4 -1.5 -0.3  0.9 -2.2 -3.0  2.5 
+P  0.8 -1.9 -2.2 -2.1 -1.8 -1.8  0.7 -1.6 -1.5  0.0 -1.4 -1.6  3.8  1.0  0.3  0.4  1.1 -2.1 -1.6 -2.3 
+Q -2.1 -3.1  0.3  2.0 -2.1 -2.1  3.6 -1.9  2.2  0.1 -1.2  0.4  1.0  5.5  0.3 -2.3 -1.7 -2.0 -2.3 -0.8 
+R -1.6  0.7 -2.3 -2.0 -1.5  0.8  3.6 -1.2 -0.2 -0.4 -0.4 -1.5  0.3  0.3  2.9  0.3 -0.6 -2.1  1.8 -1.9 
+S  0.1  1.5 -2.1 -2.8  0.0 -0.6 -1.6 -0.5 -1.5 -1.2 -1.3 -0.3  0.4 -2.3  0.3  2.6  1.0 -2.2  0.8  0.3 
+T  0.9 -1.9 -2.1 -2.1 -2.1 -2.1 -1.8  0.8  1.0 -1.9  0.7  0.9  1.1 -1.7 -0.6  1.0  4.0 -2.2 -2.2 -2.1 
+V  1.0 -2.2  1.0  1.3  1.0  1.1 -2.1  1.0 -2.1  1.1  1.0 -2.2 -2.1 -2.0 -2.1 -2.2 -2.2  4.1 -2.1 -2.2 
+W -2.2  4.1 -2.9 -3.2  0.0  1.4 -2.1 -2.2 -3.0 -0.3 -2.0 -3.0 -1.6 -2.3  1.8  0.8 -2.2 -2.1  7.5 -0.5 
+Y -2.4  2.6  2.3 -0.9  2.0 -1.8  2.3 -1.6 -0.8 -1.6 -2.9  2.5 -2.3 -0.8 -1.9  0.3 -2.1 -2.2 -0.5  6.5 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/GONNET1992 b/code/lib/Bio/Align/substitution_matrices/data/GONNET1992
new file mode 100644
index 0000000..ac4e821
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/GONNET1992
@@ -0,0 +1,26 @@
+#  Gaston H. Gonnet, Mark A. Cohen, Steven A. Benner:
+#  "Exhaustive matching of the entire protein sequence database."
+#  Science 256(5062): 1443-1445 (1992).
+#  Figure 2.
+#  PMID 1604319
+     A    C    D    E    F    G    H    I    K    L    M    N    P    Q    R    S    T    V    W    Y
+A  2.4  0.5 -0.3  0.0 -2.3  0.5 -0.8 -0.8 -0.4 -1.2 -0.7 -0.3  0.3 -0.2 -0.6  1.1  0.6  0.1 -3.6 -2.2 
+C  0.5 11.5 -3.2 -3.0 -0.8 -2.0 -1.3 -1.1 -2.8 -1.5 -0.9 -1.8 -3.1 -2.4 -2.2  0.1 -0.5  0.0 -1.0 -0.5 
+D -0.3 -3.2  4.7  2.7 -4.5  0.1  0.4 -3.8  0.5 -4.0 -3.0  2.2 -0.7  0.9 -0.3  0.5  0.0 -2.9 -5.2 -2.8 
+E  0.0 -3.0  2.7  3.6 -3.9 -0.8  0.4 -2.7  1.2 -2.8 -2.0  0.9 -0.5  1.7  0.4  0.2 -0.1 -1.9 -4.3 -2.7 
+F -2.3 -0.8 -4.5 -3.9  7.0 -5.2 -0.1  1.0 -3.3  2.0  1.6 -3.1 -3.8 -2.6 -3.2 -2.8 -2.2  0.1  3.6  5.1 
+G  0.5 -2.0  0.1 -0.8 -5.2  6.6 -1.4 -4.5 -1.1 -4.4 -3.5  0.4 -1.6 -1.0 -1.0  0.4 -1.1 -3.3 -4.0 -4.0 
+H -0.8 -1.3  0.4  0.4 -0.1 -1.4  6.0 -2.2  0.6 -1.9 -1.3  1.2 -1.1  1.2  0.6 -0.2 -0.3 -2.0 -0.8  2.2 
+I -0.8 -1.1 -3.8 -2.7  1.0 -4.5 -2.2  4.0 -2.1  2.8  2.5 -2.8 -2.6 -1.9 -2.4 -1.8 -0.6  3.1 -1.8 -0.7 
+K -0.4 -2.8  0.5  1.2 -3.3 -1.1  0.6 -2.1  3.2 -2.1 -1.4  0.8 -0.6  1.5  2.7  0.1  0.1 -1.7 -3.5 -2.1 
+L -1.2 -1.5 -4.0 -2.8  2.0 -4.4 -1.9  2.8 -2.1  4.0  2.8 -3.0 -2.3 -1.6 -2.2 -2.1 -1.3  1.8 -0.7  0.0 
+M -0.7 -0.9 -3.0 -2.0  1.6 -3.5 -1.3  2.5 -1.4  2.8  4.3 -2.2 -2.4 -1.0 -1.7 -1.4 -0.6  1.6 -1.0 -0.2 
+N -0.3 -1.8  2.2  0.9 -3.1  0.4  1.2 -2.8  0.8 -3.0 -2.2  3.8 -0.9  0.7  0.3  0.9  0.5 -2.2 -3.6 -1.4 
+P  0.3 -3.1 -0.7 -0.5 -3.8 -1.6 -1.1 -2.6 -0.6 -2.3 -2.4 -0.9  7.6 -0.2 -0.9  0.4  0.1 -1.8 -5.0 -3.1 
+Q -0.2 -2.4  0.9  1.7 -2.6 -1.0  1.2 -1.9  1.5 -1.6 -1.0  0.7 -0.2  2.7  1.5  0.2  0.0 -1.5 -2.7 -1.7 
+R -0.6 -2.2 -0.3  0.4 -3.2 -1.0  0.6 -2.4  2.7 -2.2 -1.7  0.3 -0.9  1.5  4.7 -0.2 -0.2 -2.0 -1.6 -1.8 
+S  1.1  0.1  0.5  0.2 -2.8  0.4 -0.2 -1.8  0.1 -2.1 -1.4  0.9  0.4  0.2 -0.2  2.2  1.5 -1.0 -3.3 -1.9 
+T  0.6 -0.5  0.0 -0.1 -2.2 -1.1 -0.3 -0.6  0.1 -1.3 -0.6  0.5  0.1  0.0 -0.2  1.5  2.5  0.0 -3.5 -1.9 
+V  0.1  0.0 -2.9 -1.9  0.1 -3.3 -2.0  3.1 -1.7  1.8  1.6 -2.2 -1.8 -1.5 -2.0 -1.0  0.0  3.4 -2.6 -1.1 
+W -3.6 -1.0 -5.2 -4.3  3.6 -4.0 -0.8 -1.8 -3.5 -0.7 -1.0 -3.6 -5.0 -2.7 -1.6 -3.3 -3.5 -2.6 14.2  4.1 
+Y -2.2 -0.5 -2.8 -2.7  5.1 -4.0  2.2 -0.7 -2.1  0.0 -0.2 -1.4 -3.1 -1.7 -1.8 -1.9 -1.9 -1.1  4.1  7.8 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/HOXD70 b/code/lib/Bio/Align/substitution_matrices/data/HOXD70
new file mode 100644
index 0000000..4cbd0f6
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/HOXD70
@@ -0,0 +1,9 @@
+#  F. Chiaromonte, V.B. Yap, W. Miller:
+#  "Scoring pairwise genomic sequence alignments"
+#  Pacific Symposium on Biocomputing 2002: 115-26 (2002).
+#  PMID 11928468
+     A    C    G    T
+A   91 -114  -31 -123
+C -114  100 -125  -31
+G  -31 -125  100 -114
+T -123  -31 -114   91
diff --git a/code/lib/Bio/Align/substitution_matrices/data/JOHNSON b/code/lib/Bio/Align/substitution_matrices/data/JOHNSON
new file mode 100644
index 0000000..7d30964
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/JOHNSON
@@ -0,0 +1,27 @@
+#  Mark S. Johnson and John P. Overington:
+#  "A structural basis for sequence comparisons. An evaluation of scoring
+#  methodologies."
+#  Journal of Molecular Biology 233(4): 716-738 (1993).
+#  Table 3, upper triangle.
+#  PMID 8411177
+      A     C     D     E     F     G     H     I     K     L     M     N     P     Q     R     S     T     V     W     Y
+A  0.60 -0.34 -0.16 -0.07 -0.32 -0.05 -0.31 -0.22 -0.09 -0.33 -0.15 -0.14 -0.10 -0.06 -0.16  0.00 -0.08 -0.05 -0.58 -0.40 
+C -0.34  1.61 -0.97 -0.69 -0.44 -0.82 -0.82 -0.77 -0.87 -0.87 -0.44 -0.76 -0.89 -0.69 -0.56 -0.77 -0.60 -0.48 -0.91 -0.77 
+D -0.16 -0.97  0.85  0.24 -0.70 -0.21 -0.07 -0.48 -0.15 -0.80 -0.59  0.26 -0.10 -0.11 -0.34 -0.02 -0.18 -0.52 -0.60 -0.38 
+E -0.07 -0.69  0.24  0.86 -0.64 -0.25 -0.23 -0.48  0.11 -0.56 -0.28 -0.07 -0.15  0.24 -0.02 -0.22 -0.05 -0.42 -0.76 -0.37 
+F -0.32 -0.44 -0.70 -0.64  1.04 -0.86 -0.17  0.05 -0.56  0.18 -0.06 -0.38 -0.50 -0.64 -0.60 -0.48 -0.50 -0.13  0.34  0.34 
+G -0.05 -0.82 -0.21 -0.25 -0.86  0.80 -0.32 -0.55 -0.35 -0.72 -0.52 -0.14 -0.25 -0.28 -0.28 -0.13 -0.38 -0.56 -0.63 -0.54 
+H -0.31 -0.82 -0.07 -0.23 -0.17 -0.32  1.27 -0.51  0.01 -0.42 -0.23  0.17 -0.43  0.14  0.01 -0.26 -0.30 -0.39 -0.40 -0.04 
+I -0.22 -0.77 -0.48 -0.48  0.05 -0.55 -0.51  0.81 -0.47  0.26  0.26 -0.47 -0.57 -0.70 -0.54 -0.47 -0.32  0.39 -0.33 -0.25 
+K -0.09 -0.87 -0.15  0.11 -0.56 -0.35  0.01 -0.47  0.76 -0.34 -0.19  0.01 -0.06  0.11  0.32 -0.15 -0.02 -0.37 -0.54 -0.37 
+L -0.33 -0.87 -0.80 -0.56  0.18 -0.72 -0.42  0.26 -0.34  0.73  0.44 -0.48 -0.28 -0.44 -0.37 -0.52 -0.46  0.18 -0.10 -0.24 
+M -0.15 -0.44 -0.59 -0.28 -0.06 -0.52 -0.23  0.26 -0.19  0.44  1.12 -0.37 -0.98 -0.06 -0.42 -0.48 -0.32  0.07 -0.09 -0.13 
+N -0.14 -0.76  0.26 -0.07 -0.38 -0.14  0.17 -0.47  0.01 -0.48 -0.37  0.80 -0.24 -0.08 -0.15  0.10  0.01 -0.57 -0.61 -0.13 
+P -0.10 -0.89 -0.10 -0.15 -0.50 -0.25 -0.43 -0.57 -0.06 -0.28 -0.98 -0.24  1.03 -0.36 -0.36 -0.10 -0.20 -0.52 -0.74 -0.70 
+Q -0.06 -0.69 -0.11  0.24 -0.64 -0.28  0.14 -0.70  0.11 -0.44 -0.06 -0.08 -0.36  0.90  0.21 -0.12 -0.04 -0.36 -0.82 -0.51 
+R -0.16 -0.56 -0.34 -0.02 -0.60 -0.28  0.01 -0.54  0.32 -0.37 -0.42 -0.15 -0.36  0.21  1.00 -0.06 -0.14 -0.49 -0.38 -0.21 
+S  0.00 -0.77 -0.02 -0.22 -0.48 -0.13 -0.26 -0.47 -0.15 -0.52 -0.48  0.10 -0.10 -0.12 -0.06  0.58  0.20 -0.43 -0.62 -0.34 
+T -0.08 -0.60 -0.18 -0.05 -0.50 -0.38 -0.30 -0.32 -0.02 -0.46 -0.32  0.01 -0.20 -0.04 -0.14  0.20  0.68 -0.19 -0.93 -0.27 
+V -0.05 -0.48 -0.52 -0.42 -0.13 -0.56 -0.39  0.39 -0.37  0.18  0.07 -0.57 -0.52 -0.36 -0.49 -0.43 -0.19  0.70 -0.49 -0.18 
+W -0.58 -0.91 -0.60 -0.76  0.34 -0.63 -0.40 -0.33 -0.54 -0.10 -0.09 -0.61 -0.74 -0.82 -0.38 -0.62 -0.93 -0.49  1.52  0.23 
+Y -0.40 -0.77 -0.38 -0.37  0.34 -0.54 -0.04 -0.25 -0.37 -0.24 -0.13 -0.13 -0.70 -0.51 -0.21 -0.34 -0.27 -0.18  0.23  1.05 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/JONES b/code/lib/Bio/Align/substitution_matrices/data/JONES
new file mode 100644
index 0000000..daed995
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/JONES
@@ -0,0 +1,26 @@
+#  David T. Jones, William R. Taylor, Janet M. Thornton:
+#  "The rapid generation of mutation data matrices from protein sequences."
+#  Computer Applications in the Biosciences: CABIOS 8(3): 275-282 (1992).
+#  Table I, lower triangle.
+#  PMID 1633570
+     A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V
+A  0.2 -0.1  0.0  0.0 -0.1 -0.1 -0.1  0.1 -0.2  0.0 -0.1 -0.1 -0.1 -0.3  0.1  0.1  0.2 -0.4 -0.3  0.1 
+R -0.1  0.5  0.0 -0.1 -0.1  0.2  0.0  0.0  0.2 -0.3 -0.3  0.4 -0.2 -0.4 -0.1 -0.1 -0.1  0.0 -0.2 -0.3 
+N  0.0  0.0  0.3  0.2 -0.1  0.0  0.1  0.0  0.1 -0.2 -0.3  0.1 -0.2 -0.3 -0.1  0.1  0.1 -0.5 -0.1 -0.2 
+D  0.0 -0.1  0.2  0.5 -0.3  0.1  0.4  0.1  0.0 -0.3 -0.4  0.0 -0.3 -0.5 -0.2  0.0 -0.1 -0.5 -0.2 -0.2 
+C -0.1 -0.1 -0.1 -0.3  1.1 -0.3 -0.4 -0.1  0.0 -0.2 -0.3 -0.3 -0.2  0.0 -0.2  0.1 -0.1  0.1  0.2 -0.2 
+Q -0.1  0.2  0.0  0.1 -0.3  0.5  0.2 -0.1  0.2 -0.3 -0.2  0.2 -0.2 -0.4  0.0 -0.1 -0.1 -0.3 -0.2 -0.3 
+E -0.1  0.0  0.1  0.4 -0.4  0.2  0.5  0.0  0.0 -0.3 -0.4  0.1 -0.3 -0.5 -0.2 -0.1 -0.1 -0.5 -0.4 -0.2 
+G  0.1  0.0  0.0  0.1 -0.1 -0.1  0.0  0.5 -0.2 -0.3 -0.4 -0.1 -0.3 -0.5 -0.1  0.1 -0.1 -0.2 -0.4 -0.2 
+H -0.2  0.2  0.1  0.0  0.0  0.2  0.0 -0.2  0.6 -0.3 -0.2  0.1 -0.2  0.0  0.0 -0.1 -0.1 -0.3  0.4 -0.3 
+I  0.0 -0.3 -0.2 -0.3 -0.2 -0.3 -0.3 -0.3 -0.3  0.4  0.2 -0.3  0.3  0.0 -0.2 -0.1  0.1 -0.4 -0.2  0.4 
+L -0.1 -0.3 -0.3 -0.4 -0.3 -0.2 -0.4 -0.4 -0.2  0.2  0.5 -0.3  0.3  0.2  0.0 -0.2 -0.1 -0.2 -0.1  0.2 
+K -0.1  0.4  0.1  0.0 -0.3  0.2  0.1 -0.1  0.1 -0.3 -0.3  0.5 -0.2 -0.5 -0.2 -0.1 -0.1 -0.3 -0.3 -0.3 
+M -0.1 -0.2 -0.2 -0.3 -0.2 -0.2 -0.3 -0.3 -0.2  0.3  0.3 -0.2  0.6  0.0 -0.2 -0.1  0.0 -0.3 -0.2  0.2 
+F -0.3 -0.4 -0.3 -0.5  0.0 -0.4 -0.5 -0.5  0.0  0.0  0.2 -0.5  0.0  0.8 -0.3 -0.2 -0.2 -0.1  0.5  0.0 
+P  0.1 -0.1 -0.1 -0.2 -0.2  0.0 -0.2 -0.1  0.0 -0.2  0.0 -0.2 -0.2 -0.3  0.6  0.1  0.1 -0.4 -0.3 -0.1 
+S  0.1 -0.1  0.1  0.0  0.1 -0.1 -0.1  0.1 -0.1 -0.1 -0.2 -0.1 -0.1 -0.2  0.1  0.2  0.1 -0.3 -0.1 -0.1 
+T  0.2 -0.1  0.1 -0.1 -0.1 -0.1 -0.1 -0.1 -0.1  0.1 -0.1 -0.1  0.0 -0.2  0.1  0.1  0.2 -0.4 -0.3  0.0 
+W -0.4  0.0 -0.5 -0.5  0.1 -0.3 -0.5 -0.2 -0.3 -0.4 -0.2 -0.3 -0.3 -0.1 -0.4 -0.3 -0.4  1.5  0.0 -0.3 
+Y -0.3 -0.2 -0.1 -0.2  0.2 -0.2 -0.4 -0.4  0.4 -0.2 -0.1 -0.3 -0.2  0.5 -0.3 -0.1 -0.3  0.0  0.9 -0.3 
+V  0.1 -0.3 -0.2 -0.2 -0.2 -0.3 -0.2 -0.2 -0.3  0.4  0.2 -0.3  0.2  0.0 -0.1 -0.1  0.0 -0.3 -0.3  0.4 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/LEVIN b/code/lib/Bio/Align/substitution_matrices/data/LEVIN
new file mode 100644
index 0000000..2f9c8c4
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/LEVIN
@@ -0,0 +1,27 @@
+#  Jonathan M. Levin, Barry Robson, Jean Garnier:
+#  "An algorithm for secondary structure determination in proteins based on
+#  sequence similarity."
+#  FEBS Letters 205(2): 303-308 (1986).
+#  Figure 1.
+#  PMID 3743779
+   A  C  D  E  F  G  H  I  K  L  M  N  P  Q  R  S  T  V  W  Y
+A  2  0  0  1 -1  0  0  0  0  0  0  0 -1  0  0  1  0  0 -1 -1 
+C  0  2  0  0 -1  0  0  0  0  0  0  0  0  0  0  0  0  0 -1 -1 
+D  0  0  2  1 -1  0  0 -1  0 -1 -1  1  0  0  0  0  0 -1 -1 -1 
+E  1  0  1  2 -1  0  0 -1  0 -1 -1  0 -1  1  0  0  0 -1 -1 -1 
+F -1 -1 -1 -1  2 -1 -1  1 -1  0  0 -1 -1 -1 -1 -1 -1  0  0  1 
+G  0  0  0  0 -1  2  0 -1  0 -1 -1  0  0  0  0  0  0 -1 -1 -1 
+H  0  0  0  0 -1  0  2 -1  0 -1 -1  0  0  0  0  0  0 -1 -1  0 
+I  0  0 -1 -1  1 -1 -1  2 -1  0  0 -1 -1 -1 -1 -1  0  1  0  0 
+K  0  0  0  0 -1  0  0 -1  2 -1 -1  1  0  0  1  0  0 -1 -1 -1 
+L  0  0 -1 -1  0 -1 -1  0 -1  2  2 -1 -1 -1 -1 -1  0  1  0  0 
+M  0  0 -1 -1  0 -1 -1  0 -1  2  2 -1 -1 -1 -1 -1  0  0  0  0 
+N  0  0  1  0 -1  0  0 -1  1 -1 -1  3  0  1  0  0  0 -1 -1 -1 
+P -1  0  0 -1 -1  0  0 -1  0 -1 -1  0  3  0  0  0  0 -1 -1 -1 
+Q  0  0  0  1 -1  0  0 -1  0 -1 -1  1  0  2  0  0  0 -1 -1 -1 
+R  0  0  0  0 -1  0  0 -1  1 -1 -1  0  0  0  2  0  0 -1  0 -1 
+S  1  0  0  0 -1  0  0 -1  0 -1 -1  0  0  0  0  2  0 -1 -1 -1 
+T  0  0  0  0 -1  0  0  0  0  0  0  0  0  0  0  0  2  0 -1 -1 
+V  0  0 -1 -1  0 -1 -1  1 -1  1  0 -1 -1 -1 -1 -1  0  2  0  0 
+W -1 -1 -1 -1  0 -1 -1  0 -1  0  0 -1 -1 -1  0 -1 -1  0  2  0 
+Y -1 -1 -1 -1  1 -1  0  0 -1  0  0 -1 -1 -1 -1 -1 -1  0  0  2 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/MCLACHLAN b/code/lib/Bio/Align/substitution_matrices/data/MCLACHLAN
new file mode 100644
index 0000000..adf81ce
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/MCLACHLAN
@@ -0,0 +1,27 @@
+# A.D. McLachlan:
+# "Tests for comparing related amino-acid sequences. Cytochrome c and
+# cytochrome c 551."
+# Journal of Molecular Biology 61(2): 409-424 (1971).
+# Figure 1.
+# PMID 5167087
+  A C D E F G H I K L M N P Q R S T V W Y
+A 8 1 3 4 1 3 3 2 3 2 3 3 4 3 2 4 3 3 1 1 
+C 1 9 1 0 0 1 3 1 0 0 3 1 0 0 1 2 2 1 2 1 
+D 3 1 8 5 1 3 4 1 3 1 2 5 3 4 1 3 3 1 0 1 
+E 4 0 5 8 0 3 2 1 4 1 1 4 4 5 3 4 4 2 1 2 
+F 1 0 1 0 9 0 4 3 0 5 5 0 1 0 1 2 1 3 6 6 
+G 3 1 3 3 0 8 2 1 3 1 1 3 3 2 3 3 2 2 1 0 
+H 3 3 4 2 4 2 8 2 4 2 3 4 3 4 5 3 4 2 3 4 
+I 2 1 1 1 3 1 2 8 1 5 5 1 1 0 1 2 3 5 3 3 
+K 3 0 3 4 0 3 4 1 8 2 1 4 3 4 5 3 3 2 1 1 
+L 2 0 1 1 5 1 2 5 2 8 6 1 1 3 2 2 3 5 3 3 
+M 3 3 2 1 5 1 3 5 1 6 8 2 1 3 1 2 3 4 1 2 
+N 3 1 5 4 0 3 4 1 4 1 2 8 1 4 3 5 3 1 0 2 
+P 4 0 3 4 1 3 3 1 3 1 1 1 8 3 3 3 3 2 0 0 
+Q 3 0 4 5 0 2 4 0 4 3 3 4 3 8 5 4 3 2 2 1 
+R 2 1 1 3 1 3 5 1 5 2 1 3 3 5 8 4 3 2 3 2 
+S 4 2 3 4 2 3 3 2 3 2 2 5 3 4 4 8 5 2 3 3 
+T 3 2 3 4 1 2 4 3 3 3 3 3 3 3 3 5 8 3 2 1 
+V 3 1 1 2 3 2 2 5 2 5 4 1 2 2 2 2 3 8 2 3 
+W 1 2 0 1 6 1 3 3 1 3 1 0 0 2 3 3 2 2 9 6 
+Y 1 1 1 2 6 0 4 3 1 3 2 2 0 1 2 3 1 3 6 9 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/MDM78 b/code/lib/Bio/Align/substitution_matrices/data/MDM78
new file mode 100644
index 0000000..5d0b2ef
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/MDM78
@@ -0,0 +1,27 @@
+#  R.M. Schwartz and M.O. Dayhoff:
+#  "Matrices for Detecting Distant Relationships."
+#  Margaret O. Dayhoff: Atlas of Protein Sequence and Structure,
+#  Volume 5, Supplement 3, 1978, pages 353-358.
+#  The National Biomedical Research Foundation, 1979.
+#  Figure 85, page 354.
+      A     R     N     D     C     Q     E     G     H     I     L     K     M     F     P     S     T     W     Y     V
+A  0.18 -0.15  0.02  0.03 -0.20 -0.04  0.03  0.13 -0.14 -0.05 -0.19 -0.12 -0.11 -0.35  0.11  0.11  0.12 -0.58 -0.35  0.02 
+R -0.15  0.61  0.00 -0.13 -0.36  0.13 -0.11 -0.26  0.16 -0.20 -0.30  0.34 -0.04 -0.45 -0.02 -0.03 -0.09  0.22 -0.42 -0.25 
+N  0.02  0.00  0.20  0.21 -0.36  0.08  0.14  0.03  0.16 -0.18 -0.29  0.10 -0.17 -0.35 -0.05  0.07  0.04 -0.42 -0.21 -0.17 
+D  0.03 -0.13  0.21  0.39 -0.51  0.16  0.34  0.06  0.07 -0.24 -0.40  0.01 -0.26 -0.56 -0.10  0.03 -0.01 -0.68 -0.43 -0.21 
+C -0.20 -0.36 -0.36 -0.51  1.19 -0.54 -0.53 -0.34 -0.34 -0.23 -0.60 -0.54 -0.52 -0.43 -0.28  0.00 -0.22 -0.78  0.03 -0.19 
+Q -0.04  0.13  0.08  0.16 -0.54  0.40  0.25 -0.12  0.29 -0.20 -0.18  0.07 -0.10 -0.47  0.02 -0.05 -0.08 -0.48 -0.40 -0.19 
+E  0.03 -0.11  0.14  0.34 -0.53  0.25  0.38  0.02  0.07 -0.20 -0.34 -0.01 -0.21 -0.54 -0.06  0.00 -0.04 -0.70 -0.43 -0.18 
+G  0.13 -0.26  0.03  0.06 -0.34 -0.12  0.02  0.48 -0.21 -0.26 -0.41 -0.17 -0.28 -0.48 -0.05  0.11  0.00 -0.70 -0.52 -0.14 
+H -0.14  0.16  0.16  0.07 -0.34  0.29  0.07 -0.21  0.65 -0.24 -0.21  0.00 -0.21 -0.18 -0.02 -0.08 -0.13 -0.28 -0.01 -0.22 
+I -0.05 -0.20 -0.18 -0.24 -0.23 -0.20 -0.20 -0.26 -0.24  0.45  0.24 -0.19  0.22  0.10 -0.20 -0.14  0.01 -0.51 -0.09  0.37 
+L -0.19 -0.30 -0.29 -0.40 -0.60 -0.18 -0.34 -0.41 -0.21  0.24  0.59 -0.29  0.37  0.18 -0.25 -0.28 -0.17 -0.18 -0.09  0.19 
+K -0.12  0.34  0.10  0.01 -0.54  0.07 -0.01 -0.17  0.00 -0.19 -0.29  0.47  0.04 -0.53 -0.11 -0.02  0.00 -0.35 -0.44 -0.24 
+M -0.11 -0.04 -0.17 -0.26 -0.52 -0.10 -0.21 -0.28 -0.21  0.22  0.37  0.04  0.64  0.02 -0.21 -0.16 -0.06 -0.42 -0.24  0.18 
+F -0.35 -0.45 -0.35 -0.56 -0.43 -0.47 -0.54 -0.48 -0.18  0.10  0.18 -0.53  0.02  0.91 -0.46 -0.32 -0.31  0.04  0.70 -0.12 
+P  0.11 -0.02 -0.05 -0.10 -0.28  0.02 -0.06 -0.05 -0.02 -0.20 -0.25 -0.11 -0.21 -0.46  0.59  0.09  0.03 -0.56 -0.49 -0.12 
+S  0.11 -0.03  0.07  0.03  0.00 -0.05  0.00  0.11 -0.08 -0.14 -0.28 -0.02 -0.16 -0.32  0.09  0.16  0.13 -0.25 -0.28 -0.10 
+T  0.12 -0.09  0.04 -0.01 -0.22 -0.08 -0.04  0.00 -0.13  0.01 -0.17  0.00 -0.06 -0.31  0.03  0.13  0.26 -0.52 -0.27  0.03 
+W -0.58  0.22 -0.42 -0.68 -0.78 -0.48 -0.70 -0.70 -0.28 -0.51 -0.18 -0.35 -0.42  0.04 -0.56 -0.25 -0.52  1.73 -0.02 -0.62 
+Y -0.35 -0.42 -0.21 -0.43  0.03 -0.40 -0.43 -0.52 -0.01 -0.09 -0.09 -0.44 -0.24  0.70 -0.49 -0.28 -0.27 -0.02  1.01 -0.25 
+V  0.02 -0.25 -0.17 -0.21 -0.19 -0.19 -0.18 -0.14 -0.22  0.37  0.19 -0.24  0.18 -0.12 -0.12 -0.10  0.03 -0.62 -0.25  0.43 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/NUC.4.4 b/code/lib/Bio/Align/substitution_matrices/data/NUC.4.4
new file mode 100644
index 0000000..6fb12d2
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/NUC.4.4
@@ -0,0 +1,25 @@
+#
+# This matrix was created by Todd Lowe   12/10/92
+#
+# Uses ambiguous nucleotide codes, probabilities rounded to
+#  nearest integer
+#
+# Lowest score = -4, Highest score = 5
+#
+    A   T   G   C   S   W   R   Y   K   M   B   V   H   D   N
+A   5  -4  -4  -4  -4   1   1  -4  -4   1  -4  -1  -1  -1  -2
+T  -4   5  -4  -4  -4   1  -4   1   1  -4  -1  -4  -1  -1  -2
+G  -4  -4   5  -4   1  -4   1  -4   1  -4  -1  -1  -4  -1  -2
+C  -4  -4  -4   5   1  -4  -4   1  -4   1  -1  -1  -1  -4  -2
+S  -4  -4   1   1  -1  -4  -2  -2  -2  -2  -1  -1  -3  -3  -1
+W   1   1  -4  -4  -4  -1  -2  -2  -2  -2  -3  -3  -1  -1  -1
+R   1  -4   1  -4  -2  -2  -1  -4  -2  -2  -3  -1  -3  -1  -1
+Y  -4   1  -4   1  -2  -2  -4  -1  -2  -2  -1  -3  -1  -3  -1
+K  -4   1   1  -4  -2  -2  -2  -2  -1  -4  -1  -3  -3  -1  -1
+M   1  -4  -4   1  -2  -2  -2  -2  -4  -1  -3  -1  -1  -3  -1
+B  -4  -1  -1  -1  -1  -3  -3  -1  -1  -3  -1  -2  -2  -2  -1
+V  -1  -4  -1  -1  -1  -3  -1  -3  -3  -1  -2  -1  -2  -2  -1
+H  -1  -1  -4  -1  -3  -1  -3  -1  -3  -1  -2  -2  -1  -2  -1  
+D  -1  -1  -1  -4  -3  -1  -1  -3  -1  -3  -2  -2  -2  -1  -1
+N  -2  -2  -2  -2  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
+
diff --git a/code/lib/Bio/Align/substitution_matrices/data/PAM250 b/code/lib/Bio/Align/substitution_matrices/data/PAM250
new file mode 100644
index 0000000..17e9e60
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/PAM250
@@ -0,0 +1,34 @@
+#
+# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
+#
+# PAM 250 substitution matrix, scale = ln(2)/3 = 0.231049
+#
+# Expected score = -0.844, Entropy = 0.354 bits
+#
+# Lowest score = -8, Highest score = 17
+#
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
+A  2 -2  0  0 -2  0  0  1 -1 -1 -2 -1 -1 -3  1  1  1 -6 -3  0  0  0  0 -8
+R -2  6  0 -1 -4  1 -1 -3  2 -2 -3  3  0 -4  0  0 -1  2 -4 -2 -1  0 -1 -8
+N  0  0  2  2 -4  1  1  0  2 -2 -3  1 -2 -3  0  1  0 -4 -2 -2  2  1  0 -8
+D  0 -1  2  4 -5  2  3  1  1 -2 -4  0 -3 -6 -1  0  0 -7 -4 -2  3  3 -1 -8
+C -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3  0 -2 -8  0 -2 -4 -5 -3 -8
+Q  0  1  1  2 -5  4  2 -1  3 -2 -2  1 -1 -5  0 -1 -1 -5 -4 -2  1  3 -1 -8
+E  0 -1  1  3 -5  2  4  0  1 -2 -3  0 -2 -5 -1  0  0 -7 -4 -2  3  3 -1 -8
+G  1 -3  0  1 -3 -1  0  5 -2 -3 -4 -2 -3 -5  0  1  0 -7 -5 -1  0  0 -1 -8
+H -1  2  2  1 -3  3  1 -2  6 -2 -2  0 -2 -2  0 -1 -1 -3  0 -2  1  2 -1 -8
+I -1 -2 -2 -2 -2 -2 -2 -3 -2  5  2 -2  2  1 -2 -1  0 -5 -1  4 -2 -2 -1 -8
+L -2 -3 -3 -4 -6 -2 -3 -4 -2  2  6 -3  4  2 -3 -3 -2 -2 -1  2 -3 -3 -1 -8
+K -1  3  1  0 -5  1  0 -2  0 -2 -3  5  0 -5 -1  0  0 -3 -4 -2  1  0 -1 -8
+M -1  0 -2 -3 -5 -1 -2 -3 -2  2  4  0  6  0 -2 -2 -1 -4 -2  2 -2 -2 -1 -8
+F -3 -4 -3 -6 -4 -5 -5 -5 -2  1  2 -5  0  9 -5 -3 -3  0  7 -1 -4 -5 -2 -8
+P  1  0  0 -1 -3  0 -1  0  0 -2 -3 -1 -2 -5  6  1  0 -6 -5 -1 -1  0 -1 -8
+S  1  0  1  0  0 -1  0  1 -1 -1 -3  0 -2 -3  1  2  1 -2 -3 -1  0  0  0 -8
+T  1 -1  0  0 -2 -1  0  0 -1  0 -2  0 -1 -3  0  1  3 -5 -3  0  0 -1  0 -8
+W -6  2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4  0 -6 -2 -5 17  0 -6 -5 -6 -4 -8
+Y -3 -4 -2 -4  0 -4 -4 -5  0 -1 -1 -4 -2  7 -5 -3 -3  0 10 -2 -3 -4 -2 -8
+V  0 -2 -2 -2 -2 -2 -2 -1 -2  4  2 -2  2 -1 -1 -1  0 -6 -2  4 -2 -2 -1 -8
+B  0 -1  2  3 -4  1  3  0  1 -2 -3  1 -2 -4 -1  0  0 -5 -3 -2  3  2 -1 -8
+Z  0  0  1  3 -5  3  3  0  2 -2 -3  0 -2 -5  0  0 -1 -6 -4 -2  2  3 -1 -8
+X  0 -1  0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1  0  0 -4 -2 -1 -1 -1 -1 -8
+* -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8  1
diff --git a/code/lib/Bio/Align/substitution_matrices/data/PAM30 b/code/lib/Bio/Align/substitution_matrices/data/PAM30
new file mode 100644
index 0000000..8a01c88
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/PAM30
@@ -0,0 +1,34 @@
+#
+# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
+#
+# PAM 30 substitution matrix, scale = ln(2)/2 = 0.346574
+#
+# Expected score = -5.06, Entropy = 2.57 bits
+#
+# Lowest score = -17, Highest score = 13
+#
+    A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X   *
+A   6  -7  -4  -3  -6  -4  -2  -2  -7  -5  -6  -7  -5  -8  -2   0  -1 -13  -8  -2  -3  -3  -3 -17
+R  -7   8  -6 -10  -8  -2  -9  -9  -2  -5  -8   0  -4  -9  -4  -3  -6  -2 -10  -8  -7  -4  -6 -17
+N  -4  -6   8   2 -11  -3  -2  -3   0  -5  -7  -1  -9  -9  -6   0  -2  -8  -4  -8   6  -3  -3 -17
+D  -3 -10   2   8 -14  -2   2  -3  -4  -7 -12  -4 -11 -15  -8  -4  -5 -15 -11  -8   6   1  -5 -17
+C  -6  -8 -11 -14  10 -14 -14  -9  -7  -6 -15 -14 -13 -13  -8  -3  -8 -15  -4  -6 -12 -14  -9 -17
+Q  -4  -2  -3  -2 -14   8   1  -7   1  -8  -5  -3  -4 -13  -3  -5  -5 -13 -12  -7  -3   6  -5 -17
+E  -2  -9  -2   2 -14   1   8  -4  -5  -5  -9  -4  -7 -14  -5  -4  -6 -17  -8  -6   1   6  -5 -17
+G  -2  -9  -3  -3  -9  -7  -4   6  -9 -11 -10  -7  -8  -9  -6  -2  -6 -15 -14  -5  -3  -5  -5 -17
+H  -7  -2   0  -4  -7   1  -5  -9   9  -9  -6  -6 -10  -6  -4  -6  -7  -7  -3  -6  -1  -1  -5 -17
+I  -5  -5  -5  -7  -6  -8  -5 -11  -9   8  -1  -6  -1  -2  -8  -7  -2 -14  -6   2  -6  -6  -5 -17
+L  -6  -8  -7 -12 -15  -5  -9 -10  -6  -1   7  -8   1  -3  -7  -8  -7  -6  -7  -2  -9  -7  -6 -17
+K  -7   0  -1  -4 -14  -3  -4  -7  -6  -6  -8   7  -2 -14  -6  -4  -3 -12  -9  -9  -2  -4  -5 -17
+M  -5  -4  -9 -11 -13  -4  -7  -8 -10  -1   1  -2  11  -4  -8  -5  -4 -13 -11  -1 -10  -5  -5 -17
+F  -8  -9  -9 -15 -13 -13 -14  -9  -6  -2  -3 -14  -4   9 -10  -6  -9  -4   2  -8 -10 -13  -8 -17
+P  -2  -4  -6  -8  -8  -3  -5  -6  -4  -8  -7  -6  -8 -10   8  -2  -4 -14 -13  -6  -7  -4  -5 -17
+S   0  -3   0  -4  -3  -5  -4  -2  -6  -7  -8  -4  -5  -6  -2   6   0  -5  -7  -6  -1  -5  -3 -17
+T  -1  -6  -2  -5  -8  -5  -6  -6  -7  -2  -7  -3  -4  -9  -4   0   7 -13  -6  -3  -3  -6  -4 -17
+W -13  -2  -8 -15 -15 -13 -17 -15  -7 -14  -6 -12 -13  -4 -14  -5 -13  13  -5 -15 -10 -14 -11 -17
+Y  -8 -10  -4 -11  -4 -12  -8 -14  -3  -6  -7  -9 -11   2 -13  -7  -6  -5  10  -7  -6  -9  -7 -17
+V  -2  -8  -8  -8  -6  -7  -6  -5  -6   2  -2  -9  -1  -8  -6  -6  -3 -15  -7   7  -8  -6  -5 -17
+B  -3  -7   6   6 -12  -3   1  -3  -1  -6  -9  -2 -10 -10  -7  -1  -3 -10  -6  -8   6   0  -5 -17
+Z  -3  -4  -3   1 -14   6   6  -5  -1  -6  -7  -4  -5 -13  -4  -5  -6 -14  -9  -6   0   6  -5 -17
+X  -3  -6  -3  -5  -9  -5  -5  -5  -5  -5  -6  -5  -5  -8  -5  -3  -4 -11  -7  -5  -5  -5  -5 -17
+* -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17   1
diff --git a/code/lib/Bio/Align/substitution_matrices/data/PAM70 b/code/lib/Bio/Align/substitution_matrices/data/PAM70
new file mode 100644
index 0000000..b20cdf0
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/PAM70
@@ -0,0 +1,34 @@
+#
+# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
+#
+# PAM 70 substitution matrix, scale = ln(2)/2 = 0.346574
+#
+# Expected score = -2.77, Entropy = 1.60 bits
+#
+# Lowest score = -11, Highest score = 13
+#
+    A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X   *
+A   5  -4  -2  -1  -4  -2  -1   0  -4  -2  -4  -4  -3  -6   0   1   1  -9  -5  -1  -1  -1  -2 -11
+R  -4   8  -3  -6  -5   0  -5  -6   0  -3  -6   2  -2  -7  -2  -1  -4   0  -7  -5  -4  -2  -3 -11
+N  -2  -3   6   3  -7  -1   0  -1   1  -3  -5   0  -5  -6  -3   1   0  -6  -3  -5   5  -1  -2 -11
+D  -1  -6   3   6  -9   0   3  -1  -1  -5  -8  -2  -7 -10  -4  -1  -2 -10  -7  -5   5   2  -3 -11
+C  -4  -5  -7  -9   9  -9  -9  -6  -5  -4 -10  -9  -9  -8  -5  -1  -5 -11  -2  -4  -8  -9  -6 -11
+Q  -2   0  -1   0  -9   7   2  -4   2  -5  -3  -1  -2  -9  -1  -3  -3  -8  -8  -4  -1   5  -2 -11
+E  -1  -5   0   3  -9   2   6  -2  -2  -4  -6  -2  -4  -9  -3  -2  -3 -11  -6  -4   2   5  -3 -11
+G   0  -6  -1  -1  -6  -4  -2   6  -6  -6  -7  -5  -6  -7  -3   0  -3 -10  -9  -3  -1  -3  -3 -11
+H  -4   0   1  -1  -5   2  -2  -6   8  -6  -4  -3  -6  -4  -2  -3  -4  -5  -1  -4   0   1  -3 -11
+I  -2  -3  -3  -5  -4  -5  -4  -6  -6   7   1  -4   1   0  -5  -4  -1  -9  -4   3  -4  -4  -3 -11
+L  -4  -6  -5  -8 -10  -3  -6  -7  -4   1   6  -5   2  -1  -5  -6  -4  -4  -4   0  -6  -4  -4 -11
+K  -4   2   0  -2  -9  -1  -2  -5  -3  -4  -5   6   0  -9  -4  -2  -1  -7  -7  -6  -1  -2  -3 -11
+M  -3  -2  -5  -7  -9  -2  -4  -6  -6   1   2   0  10  -2  -5  -3  -2  -8  -7   0  -6  -3  -3 -11
+F  -6  -7  -6 -10  -8  -9  -9  -7  -4   0  -1  -9  -2   8  -7  -4  -6  -2   4  -5  -7  -9  -5 -11
+P   0  -2  -3  -4  -5  -1  -3  -3  -2  -5  -5  -4  -5  -7   7   0  -2  -9  -9  -3  -4  -2  -3 -11
+S   1  -1   1  -1  -1  -3  -2   0  -3  -4  -6  -2  -3  -4   0   5   2  -3  -5  -3   0  -2  -1 -11
+T   1  -4   0  -2  -5  -3  -3  -3  -4  -1  -4  -1  -2  -6  -2   2   6  -8  -4  -1  -1  -3  -2 -11
+W  -9   0  -6 -10 -11  -8 -11 -10  -5  -9  -4  -7  -8  -2  -9  -3  -8  13  -3 -10  -7 -10  -7 -11
+Y  -5  -7  -3  -7  -2  -8  -6  -9  -1  -4  -4  -7  -7   4  -9  -5  -4  -3   9  -5  -4  -7  -5 -11
+V  -1  -5  -5  -5  -4  -4  -4  -3  -4   3   0  -6   0  -5  -3  -3  -1 -10  -5   6  -5  -4  -2 -11
+B  -1  -4   5   5  -8  -1   2  -1   0  -4  -6  -1  -6  -7  -4   0  -1  -7  -4  -5   5   1  -2 -11
+Z  -1  -2  -1   2  -9   5   5  -3   1  -4  -4  -2  -3  -9  -2  -2  -3 -10  -7  -4   1   5  -3 -11
+X  -2  -3  -2  -3  -6  -2  -3  -3  -3  -3  -4  -3  -3  -5  -3  -1  -2  -7  -5  -2  -2  -3  -3 -11
+* -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11   1
diff --git a/code/lib/Bio/Align/substitution_matrices/data/RAO b/code/lib/Bio/Align/substitution_matrices/data/RAO
new file mode 100644
index 0000000..f3ef1c0
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/RAO
@@ -0,0 +1,27 @@
+#  J.K. Mohana Rao:
+#  "New scoring matrix for amino acid residue exchanges based on residue
+#  characteristic physical parameters."
+#  International Journal of Peptide and Protein Research: 29(2): 276-281 (1987).
+#  Figure 1, lower triangle.
+#  PMID 3570667
+   A  C  D  E  F  G  H  I  K  L  M  N  P  Q  R  S  T  V  W  Y
+A 16 11  9 10 10  8 11  9 10 11 11  9  6 11  8 10 10  9 11  9 
+C 11 16  8  9 10  8 10  8  9 11 10  9  7 10  8 10 10  8 11 10 
+D  9  8 16 11  4  9  9  3 11  6  5 11  8 11 10 10  9  3  6  7 
+E 10  9 11 16  6  6 11  4 11  7  8 10  5 11  9  9  8  4  7  6 
+F 10 10  4  6 16  7  9 12  6 11 10  6  4  7  5  8 10 11 11 10 
+G  8  8  9  6  7 16  7  6  7  6  4 10 11  8  7 11 10  6  8 10 
+H 11 10  9 11  9  7 16  8 11 10 10 10  5 11 10 10 10  9 10  9 
+I  9  8  3  4 12  6  8 16  4 10  9  5  3  6  4  8 10 12 11 10 
+K 10  9 11 11  6  7 11  4 16  7  8 11  6 12 11 10  9  5  7  7 
+L 11 11  6  7 11  6 10 10  7 16 11  7  4  9  6  8  9 10 11  9 
+M 11 10  5  8 10  4 10  9  8 11 16  6  2  9  6  7  8  9 10  8 
+N  9  9 11 10  6 10 10  5 11  7  6 16  9 11 10 11 10  5  8  8 
+P  6  7  8  5  4 11  5  3  6  4  2  9 16  7  6 10  8  3  6  8 
+Q 11 10 11 11  7  8 11  6 12  9  9 11  7 16 10 10 10  6  9  8 
+R  8  8 10  9  5  7 10  4 11  6  6 10  6 10 16  9  9  5  7  7 
+S 10 10 10  9  8 11 10  8 10  8  7 11 10 10  9 16 11  8 10 11 
+T 10 10  9  8 10 10 10 10  9  9  8 10  8 10  9 11 16 10 11 11 
+V  9  8  3  4 11  6  9 12  5 10  9  5  3  6  5  8 10 16 11 10 
+W 11 11  6  7 11  8 10 11  7 11 10  8  6  9  7 10 11 11 16 11 
+Y  9 10  7  6 10 10  9 10  7  9  8  8  8  8  7 11 11 10 11 16 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/RISLER b/code/lib/Bio/Align/substitution_matrices/data/RISLER
new file mode 100644
index 0000000..438b601
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/RISLER
@@ -0,0 +1,27 @@
+#  J.L. Risler, M.O. Delorme, H. Delacroix, A. Henaut:
+#  "Amino acid substitutions in structurally related proteins. A pattern
+#  recognition approach. Determination of a new and efficient scoring matrix."
+#  Journal of Molecular Biology 204(4): 1019-1029 (1988).
+#  Figure 5.
+#  PMID 3221397
+     A    C    D    E    F    G    H    I    K    L    M    N    P    Q    R    S    T    V    W    Y
+A  2.2 -1.5  0.2  1.7  0.6  0.6 -0.6  1.7  1.4  1.3  1.0  1.3 -0.2  1.8  1.5  2.0  1.9  2.0 -0.9  0.2 
+C -1.5  2.2 -1.7 -1.5 -1.6 -1.7 -1.8 -1.6 -1.6 -1.5 -1.6 -1.6 -1.8 -1.4 -1.5 -1.3 -1.4 -1.4 -1.8 -1.1 
+D  0.2 -1.7  2.2  1.0 -0.3 -0.4 -1.3  0.0  0.1 -0.2 -0.5  0.8 -1.2  0.6 -0.1  0.7  0.0  0.0 -1.4 -0.4 
+E  1.7 -1.5  1.0  2.2  0.6  0.3 -0.6  1.5  1.4  0.9  0.6  1.4 -0.1  2.1  1.9  1.8  1.6  1.6 -1.0  0.2 
+F  0.6 -1.6 -0.3  0.6  2.2 -0.4 -1.1  1.0  0.1  1.0 -0.2  0.4 -1.1  0.7  0.4  0.5  0.3  0.8 -0.9  2.0 
+G  0.6 -1.7 -0.4  0.3 -0.4  2.2 -1.2  0.0 -0.1 -0.2 -0.4  0.2 -1.2  0.2  0.1  0.7  0.2  0.1 -1.3 -0.2 
+H -0.6 -1.8 -1.3 -0.6 -1.1 -1.2  2.2 -0.8 -1.0 -0.9 -1.2 -0.3 -1.6 -0.5 -0.4 -0.4 -0.9 -0.7 -1.7 -0.8 
+I  1.7 -1.6  0.0  1.5  1.0  0.0 -0.8  2.2  1.0  2.1  0.9  0.9 -0.6  1.4  1.4  1.6  1.6  2.2 -0.7  0.4 
+K  1.4 -1.6  0.1  1.4  0.1 -0.1 -1.0  1.0  2.2  0.7  0.4  1.0 -0.7  1.7  2.1  1.4  1.2  1.2 -1.1  0.5 
+L  1.3 -1.5 -0.2  0.9  1.0 -0.2 -0.9  2.1  0.7  2.2  1.8  0.8 -0.8  1.1  1.2  1.3  1.2  2.0 -0.8  0.5 
+M  1.0 -1.6 -0.5  0.6 -0.2 -0.4 -1.2  0.9  0.4  1.8  2.2  0.0 -1.2  1.2  1.1  0.6  0.8  0.8 -1.3 -0.2 
+N  1.3 -1.6  0.8  1.4  0.4  0.2 -0.3  0.9  1.0  0.8  0.0  2.2 -1.0  1.6  1.2  1.9  1.1  1.1 -1.1 -0.1 
+P -0.2 -1.8 -1.2 -0.1 -1.1 -1.2 -1.6 -0.6 -0.7 -0.8 -1.2 -1.0  2.2 -0.6 -0.3 -0.3 -0.5 -0.6 -1.6 -1.2 
+Q  1.8 -1.4  0.6  2.1  0.7  0.2 -0.5  1.4  1.7  1.1  1.2  1.6 -0.6  2.2  2.0  1.8  1.7  1.5 -1.0  0.5 
+R  1.5 -1.5 -0.1  1.9  0.4  0.1 -0.4  1.4  2.1  1.2  1.1  1.2 -0.3  2.0  2.2  2.0  1.9  1.5 -0.8  0.8 
+S  2.0 -1.3  0.7  1.8  0.5  0.7 -0.4  1.6  1.4  1.3  0.6  1.9 -0.3  1.8  2.0  2.2  2.1  1.8 -0.8  0.4 
+T  1.9 -1.4  0.0  1.6  0.3  0.2 -0.9  1.6  1.2  1.2  0.8  1.1 -0.5  1.7  1.9  2.1  2.2  1.6 -1.0  0.3 
+V  2.0 -1.4  0.0  1.6  0.8  0.1 -0.7  2.2  1.2  2.0  0.8  1.1 -0.6  1.5  1.5  1.8  1.6  2.2 -0.7  0.3 
+W -0.9 -1.8 -1.4 -1.0 -0.9 -1.3 -1.7 -0.7 -1.1 -0.8 -1.3 -1.1 -1.6 -1.0 -0.8 -0.8 -1.0 -0.7  2.2 -0.6 
+Y  0.2 -1.1 -0.4  0.2  2.0 -0.2 -0.8  0.4  0.5  0.5 -0.2 -0.1 -1.2  0.5  0.8  0.4  0.3  0.3 -0.6  2.2 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/SCHNEIDER b/code/lib/Bio/Align/substitution_matrices/data/SCHNEIDER
new file mode 100644
index 0000000..0384fa9
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/SCHNEIDER
@@ -0,0 +1,70 @@
+# Adrian Schneider, Gina M. Cannarozzi, and Gaston H. Gonnet:
+# "Empirical codon substitution matrix."
+# BMC Bioinformatics 6:134 (2005).
+# Additional File 3.
+# PMID 15927081
+      AAA   AAC   AAG   AAT   ACA   ACC   ACG   ACT   AGA   AGC   AGG   AGT   ATA   ATC   ATG   ATT   CAA   CAC   CAG   CAT   CCA   CCC   CCG   CCT   CGA   CGC   CGG   CGT   CTA   CTC   CTG   CTT   GAA   GAC   GAG   GAT   GCA   GCC   GCG   GCT   GGA   GGC   GGG   GGT   GTA   GTC   GTG   GTT   TAA   TAC   TAG   TAT   TCA   TCC   TCG   TCT   TGA   TGC   TGG   TGT   TTA   TTC   TTG   TTT
+AAA  11.6  -2.7   9.7  -1.7  -2.7  -6.4  -3.9  -5.6   5.1  -5.0   3.6  -4.2  -6.3 -13.0  -7.1 -11.5   0.4  -6.0  -1.9  -5.3  -8.5 -11.2  -8.9 -10.8   2.1   0.0   1.4   0.2 -10.2 -13.5 -13.0 -12.5  -2.6  -8.5  -5.0  -8.1  -6.3  -9.9  -7.5  -9.0  -7.1 -10.2  -8.2  -9.2  -8.2 -12.5 -11.1 -11.4 -50.0 -14.8 -50.0 -13.8  -7.3 -10.1  -8.4  -9.1 -50.0 -13.0 -13.5 -12.4 -10.7 -18.1 -11.8 -17.2 
+AAC  -2.7  13.0  -3.3  10.9  -3.5  -0.4  -3.3  -1.8  -5.4   4.6  -5.5   3.0 -10.2  -7.9  -9.9  -9.6  -5.0   0.5  -5.5  -1.0 -10.3  -8.1  -9.4  -9.6  -8.1  -5.0  -7.3  -6.3 -13.4 -11.3 -14.4 -12.9  -6.3   0.8  -6.4  -1.1  -7.4  -5.0  -6.2  -6.5  -5.6  -1.6  -4.7  -3.0 -10.8  -8.7 -11.9 -10.0 -50.0  -6.2 -50.0  -7.5  -6.3  -4.3  -6.2  -5.4 -50.0  -7.0 -16.3  -8.2 -13.2 -12.3 -13.1 -13.3 
+AAG   9.7  -3.3  11.6  -2.8  -4.5  -6.7  -3.1  -6.9   3.3  -5.5   4.8  -5.1  -8.9 -13.2  -5.7 -12.6  -1.5  -6.1  -0.6  -6.1 -10.0 -11.8  -8.6 -11.9   1.2   0.5   2.2   0.1 -11.8 -14.0 -11.9 -13.4  -4.9  -9.1  -3.4  -8.9  -8.0 -10.0  -7.2 -10.0  -9.1 -10.2  -7.1  -9.9 -10.1 -13.0 -10.6 -12.7 -50.0 -14.9 -50.0 -14.4  -8.9 -10.8  -8.9 -10.4 -50.0 -13.1 -11.8 -13.0 -12.4 -19.4 -11.5 -17.8 
+AAT  -1.7  10.9  -2.8  12.9  -2.7  -2.2  -2.8   0.2  -4.8   2.9  -5.2   5.2  -9.1  -9.5  -9.0  -7.0  -4.0  -1.0  -5.0   1.0  -9.0  -8.8  -8.9  -7.7  -7.0  -6.8  -7.1  -4.3 -12.4 -12.9 -13.9 -10.0  -4.9  -0.7  -5.6   1.4  -6.0  -6.5  -6.2  -4.8  -5.1  -3.2  -5.0  -1.2  -9.8  -9.9 -11.3  -8.2 -50.0  -7.6 -50.0  -5.1  -5.5  -5.8  -5.9  -4.2 -50.0  -8.3 -15.5  -5.9 -11.6 -14.2 -12.0 -11.6 
+ACA  -2.7  -3.5  -4.5  -2.7  11.7   9.0  10.6   9.6  -3.2  -0.7  -5.1  -0.4   0.7  -4.6  -0.4  -3.6  -4.8  -8.9  -6.5  -7.8  -1.4  -4.1  -2.9  -3.3  -8.0  -9.1  -8.0  -8.2  -6.0  -9.3  -7.7  -8.5  -6.4 -10.1  -7.5  -8.6   2.8  -0.7   0.9   0.1  -5.9  -7.3  -6.5  -6.5  -0.4  -3.9  -2.6  -3.1 -50.0 -14.6 -50.0 -12.6   2.9   0.1   1.8   1.0 -50.0  -9.2 -13.6  -7.7  -4.8 -12.2  -6.2 -11.0 
+ACC  -6.4  -0.4  -6.7  -2.2   9.0  12.3   9.8   9.6  -7.1   2.4  -7.2   0.3  -2.9  -1.2  -3.0  -3.3  -7.6  -6.7  -8.1  -7.9  -4.9  -1.9  -4.5  -3.8  -9.5  -7.8  -9.3  -8.9  -9.4  -7.1  -9.0  -9.0  -9.4  -7.5  -9.3  -9.2  -0.7   2.6   0.2  -0.1  -8.0  -4.6  -7.7  -6.1  -3.6  -1.0  -4.1  -3.1 -50.0 -11.6 -50.0 -11.6   0.2   2.3   0.4   0.5 -50.0  -6.5 -15.3  -7.8  -8.4  -9.5  -8.3 -11.1 
+ACG  -3.9  -3.3  -3.1  -2.8  10.6   9.8  12.2   9.8  -4.9   0.1  -3.6  -0.2  -1.7  -4.1   0.9  -3.5  -5.9  -8.9  -5.6  -8.5  -3.9  -3.9  -2.2  -4.5  -7.3  -7.5  -6.4  -8.5  -7.1  -8.2  -6.7  -7.8  -7.9  -9.1  -7.1  -9.4   0.9  -0.2   2.5  -0.3  -6.9  -6.5  -5.5  -6.8  -1.7  -3.4  -1.7  -2.8 -50.0 -13.0 -50.0 -11.5   1.5   0.7   2.7   0.6 -50.0  -8.9 -12.1  -7.4  -6.1 -11.5  -5.4 -10.4 
+ACT  -5.6  -1.8  -6.9   0.2   9.6   9.6   9.8  11.6  -6.6   0.9  -7.2   2.5  -2.3  -3.4  -2.3  -0.6  -6.5  -8.2  -8.0  -6.3  -3.7  -3.5  -3.9  -1.6  -9.9  -8.9  -9.6  -7.0  -8.6  -8.9  -8.8  -6.8  -8.6  -8.2  -8.8  -6.8   0.3   0.1   0.2   2.4  -7.4  -6.2  -6.9  -4.4  -2.9  -2.9  -3.5  -0.9 -50.0 -12.2 -50.0 -10.1   1.2   0.6   1.2   2.4 -50.0  -7.6 -16.1  -5.8  -7.2 -10.8  -7.4  -9.0 
+AGA   5.1  -5.4   3.3  -4.8  -3.2  -7.1  -4.9  -6.6  13.3  -2.8  11.2  -1.9  -5.7 -12.5  -7.2 -11.6  -0.7  -4.8  -3.1  -4.3 -10.0 -11.6  -8.8 -11.7  10.5   7.7   9.1   8.5  -9.8 -12.7 -11.7 -11.8  -6.3 -11.4  -8.8 -11.0  -7.5 -10.3  -8.3  -9.9  -1.9  -6.7  -4.1  -6.4  -7.9 -12.5 -11.4 -11.7 -50.0 -14.0 -50.0 -13.2  -8.5 -10.4  -9.1  -9.6 -50.0  -9.8  -7.7  -8.8 -10.3 -17.9 -11.8 -16.0 
+AGC  -5.0   4.6  -5.5   2.9  -0.7   2.4   0.1   0.9  -2.8  12.8  -2.6  11.0  -8.4  -6.3  -8.3  -7.7  -6.1  -2.9  -6.5  -4.2  -8.5  -5.9  -7.5  -7.7  -5.5  -2.4  -5.5  -4.1 -12.9 -10.3 -12.7 -11.3  -7.4  -3.1  -7.4  -4.9  -4.4  -1.8  -3.5  -3.5  -1.6   3.0  -0.8   0.9  -8.4  -6.3  -9.6  -7.3 -50.0  -9.0 -50.0  -9.7  -2.3  -0.2  -1.2  -1.5 -50.0  -0.7 -13.3  -2.0 -11.8 -11.9 -12.3 -12.7 
+AGG   3.6  -5.5   4.8  -5.2  -5.1  -7.2  -3.6  -7.2  11.2  -2.6  13.4  -2.1  -7.7 -12.5  -5.3 -11.9  -2.2  -4.4  -1.9  -4.8 -10.2 -11.5  -8.1 -12.0   9.3   8.2  10.0   8.0 -10.8 -11.9 -10.7 -12.4  -8.2 -11.8  -6.9 -11.5  -8.4  -9.9  -7.1 -10.6  -4.7  -6.8  -1.2  -7.0  -9.8 -11.6  -9.6 -12.1 -50.0 -14.1 -50.0 -12.7 -10.0 -10.8  -8.8 -10.5 -50.0  -9.2  -4.2  -9.3 -11.0 -18.2 -11.1 -16.1 
+AGT  -4.2   3.0  -5.1   5.2  -0.4   0.3  -0.2   2.5  -1.9  11.0  -2.1  13.2  -7.6  -8.5  -8.0  -5.3  -5.6  -4.5  -6.8  -2.4  -7.8  -7.9  -7.9  -6.2  -5.7  -5.1  -5.4  -2.1 -12.4 -12.3 -13.1 -10.1  -6.6  -4.4  -6.9  -2.6  -3.6  -3.8  -3.7  -2.0  -1.4   0.8  -0.9   3.0  -8.2  -8.1  -9.2  -5.8 -50.0 -10.1 -50.0  -7.4  -1.7  -2.0  -1.5  -0.5 -50.0  -2.1 -12.6  -0.4 -11.6 -13.9 -11.4 -11.1 
+ATA  -6.3 -10.2  -8.9  -9.1   0.7  -2.9  -1.7  -2.3  -5.7  -8.4  -7.7  -7.6  13.2   9.6   3.5   9.7  -8.7 -12.4 -10.7 -10.7  -7.8 -10.2  -8.8  -9.6  -9.5 -11.9 -10.9 -10.5   2.3  -0.4  -0.2  -0.3  -9.7 -15.5 -11.4 -13.7  -3.0  -6.4  -4.1  -5.4  -9.6 -11.9 -10.5 -11.8   6.2   3.3   3.7   3.6 -50.0 -13.6 -50.0 -11.9  -5.6  -8.8  -7.2  -8.9 -50.0 -12.4 -14.1 -11.6   2.8  -6.4   0.5  -5.2 
+ATC -13.0  -7.9 -13.2  -9.5  -4.6  -1.2  -4.1  -3.4 -12.5  -6.3 -12.5  -8.5   9.6  12.7   0.2  10.5 -12.4 -11.6 -13.2 -12.0 -12.4 -10.1 -11.6 -12.0 -15.1 -13.1 -14.3 -13.4  -1.4   1.4  -1.6  -0.5 -14.9 -14.3 -15.2 -16.8  -7.7  -4.7  -6.3  -6.6 -14.6 -10.8 -12.8 -13.0   2.3   6.0   2.5   3.6 -50.0 -11.2 -50.0 -11.9 -10.5  -9.2 -10.5 -10.5 -50.0 -10.3 -16.2 -11.3  -1.7  -3.7  -2.5  -5.4 
+ATG  -7.1  -9.9  -5.7  -9.0  -0.4  -3.0   0.9  -2.3  -7.2  -8.3  -5.3  -8.0   3.5   0.2  14.3   1.0  -7.3 -11.2  -6.9  -9.7  -8.5 -10.2  -7.7  -9.8 -10.0 -10.4  -8.4 -10.2   1.0  -0.6   1.7  -0.5 -11.1 -14.8  -9.7 -14.5  -3.7  -5.8  -3.1  -5.6 -10.4 -11.5  -8.2 -11.1   0.6  -1.6   1.6  -1.1 -50.0 -12.4 -50.0 -11.5  -5.5  -8.0  -4.9  -7.2 -50.0 -12.4 -10.3 -11.6   0.7  -6.8   2.4  -6.0 
+ATT -11.5  -9.6 -12.6  -7.0  -3.6  -3.3  -3.5  -0.6 -11.6  -7.7 -11.9  -5.3   9.7  10.5   1.0  12.6 -12.0 -12.2 -12.5  -9.7 -11.1 -11.2 -11.9  -9.4 -12.9 -14.3 -14.0 -11.9  -1.0  -0.4  -1.6   1.2 -13.9 -15.4 -13.7 -12.6  -6.2  -6.7  -6.3  -4.4 -13.0 -12.2 -13.0 -10.4   2.9   3.8   2.5   5.8 -50.0 -12.5 -50.0 -10.2  -9.6  -9.7  -9.6  -8.3 -50.0 -11.6 -15.4  -9.4  -1.1  -5.4  -1.6  -3.3 
+CAA   0.4  -5.0  -1.5  -4.0  -4.8  -7.6  -5.9  -6.5  -0.7  -6.1  -2.2  -5.6  -8.7 -12.4  -7.3 -12.0  12.8   2.3  10.2   3.0   0.0  -3.4  -0.7  -3.2   2.5  -0.8   0.9   0.2  -3.0  -7.2  -5.7  -6.1  -0.2  -6.6  -1.8  -6.0  -5.3  -8.2  -5.7  -7.6  -6.7  -9.6  -7.5  -9.3  -7.1 -10.5  -9.6  -9.4 -50.0  -8.1 -50.0  -7.2  -4.3  -6.8  -5.7  -6.3 -50.0  -9.5  -9.6  -8.9  -6.2 -12.8  -6.8 -11.8 
+CAC  -6.0   0.5  -6.1  -1.0  -8.9  -6.7  -8.9  -8.2  -4.8  -2.9  -4.4  -4.5 -12.4 -11.6 -11.2 -12.2   2.3  14.6   1.9  12.9  -5.5  -2.6  -4.1  -4.7  -1.2   3.0  -1.1   1.6  -8.0  -4.7  -8.7  -6.0  -8.2  -4.7  -7.8  -6.7 -10.0  -8.3  -8.9  -9.9 -10.8  -7.1 -10.0  -9.2 -11.6 -10.1 -12.3 -11.9 -50.0   2.2 -50.0   0.9  -7.9  -5.9  -7.4  -6.9 -50.0  -4.6 -11.6  -5.6  -9.7  -5.9  -9.9  -7.3 
+CAG  -1.9  -5.5  -0.6  -5.0  -6.5  -8.1  -5.6  -8.0  -3.1  -6.5  -1.9  -6.8 -10.7 -13.2  -6.9 -12.5  10.2   1.9  11.9   2.1  -2.3  -4.1  -0.5  -4.6  -0.1  -0.9   2.4  -0.8  -5.1  -6.9  -4.8  -6.7  -2.3  -7.1  -0.8  -7.2  -7.2  -8.0  -5.6  -8.5  -9.4  -9.7  -7.2  -9.8  -9.2 -11.1  -9.5 -10.7 -50.0  -8.7 -50.0  -8.6  -6.1  -7.5  -5.5  -7.7 -50.0 -10.4  -7.7 -10.0  -7.3 -13.6  -6.4 -13.3 
+CAT  -5.3  -1.0  -6.1   1.0  -7.8  -7.9  -8.5  -6.3  -4.3  -4.2  -4.8  -2.4 -10.7 -12.0  -9.7  -9.7   3.0  12.9   2.1  14.7  -4.9  -3.8  -4.0  -2.4  -1.0   0.8  -1.3   3.4  -7.6  -5.7  -8.1  -3.5  -6.8  -5.8  -7.2  -4.2  -8.9 -10.2  -8.4  -8.2  -9.9  -8.4  -9.7  -6.4 -11.1 -11.1 -11.8 -10.0 -50.0   0.4 -50.0   2.6  -7.2  -7.0  -7.4  -5.1 -50.0  -5.7  -9.8  -3.2  -8.4  -7.4  -8.9  -5.9 
+CCA  -8.5 -10.3 -10.0  -9.0  -1.4  -4.9  -3.9  -3.7 -10.0  -8.5 -10.2  -7.8  -7.8 -12.4  -8.5 -11.1   0.0  -5.5  -2.3  -4.9  12.6  10.0  11.1  10.5  -6.0  -8.3  -6.9  -7.5  -2.2  -7.4  -5.3  -5.9  -8.9 -12.5  -9.4 -11.6  -0.7  -3.9  -2.3  -3.0  -9.1  -9.8  -9.2  -9.5  -5.6  -9.0  -8.0  -8.5 -50.0 -15.3 -50.0 -14.1   2.3  -1.4   0.3  -0.5 -50.0 -13.6 -14.3 -11.5  -5.3 -13.7  -6.1 -12.7 
+CCC -11.2  -8.1 -11.8  -8.8  -4.1  -1.9  -3.9  -3.5 -11.6  -5.9 -11.5  -7.9 -10.2 -10.1 -10.2 -11.2  -3.4  -2.6  -4.1  -3.8  10.0  13.1  10.7  10.6  -8.1  -5.2  -8.0  -6.7  -6.1  -3.7  -7.5  -5.6 -11.1 -10.3 -10.6 -11.9  -3.2  -1.0  -2.2  -2.8 -10.5  -8.1  -9.4  -9.5  -8.3  -6.9  -9.4  -8.8 -50.0 -11.3 -50.0 -12.8  -0.6   2.2  -0.3   0.1 -50.0 -10.1 -17.5 -11.1  -8.6  -9.9  -8.6 -11.9 
+CCG  -8.9  -9.4  -8.6  -8.9  -2.9  -4.5  -2.2  -3.9  -8.8  -7.5  -8.1  -7.9  -8.8 -11.6  -7.7 -11.9  -0.7  -4.1  -0.5  -4.0  11.1  10.7  13.2  10.4  -5.7  -5.8  -3.5  -6.1  -3.6  -6.5  -3.3  -5.5  -9.4 -11.0  -8.7 -11.2  -1.8  -2.7   0.7  -3.0  -9.1  -8.2  -7.3  -9.4  -7.2  -8.2  -7.2  -8.2 -50.0 -13.2 -50.0 -13.3   0.5  -0.9   1.8  -0.6 -50.0 -11.4 -11.1 -10.5  -6.1 -12.9  -5.0 -11.9 
+CCT -10.8  -9.6 -11.9  -7.7  -3.3  -3.8  -4.5  -1.6 -11.7  -7.7 -12.0  -6.2  -9.6 -12.0  -9.8  -9.4  -3.2  -4.7  -4.6  -2.4  10.5  10.6  10.4  12.6  -8.2  -7.9  -9.0  -4.7  -6.1  -6.0  -7.5  -2.9 -10.9 -12.2 -11.2 -10.9  -2.4  -2.9  -2.7  -0.9 -10.4  -9.7 -10.2  -8.0  -8.4  -9.1  -9.3  -6.5 -50.0 -13.3 -50.0 -11.0  -0.0  -0.4  -0.4   2.3 -50.0 -10.9 -17.2  -8.3  -8.3 -12.7  -7.8  -9.3 
+CGA   2.1  -8.1   1.2  -7.0  -8.0  -9.5  -7.3  -9.9  10.5  -5.5   9.3  -5.7  -9.5 -15.1 -10.0 -12.9   2.5  -1.2  -0.1  -1.0  -6.0  -8.1  -5.7  -8.2  13.8  11.3  11.8  12.1  -6.0  -9.1  -9.0  -9.5  -8.9 -13.4 -10.0 -13.1  -9.9 -11.7  -9.8 -12.8  -5.9  -9.4  -6.5  -8.1 -10.7 -14.0 -13.3 -13.9 -50.0 -11.9 -50.0  -9.7  -8.9 -10.8  -9.0 -11.1 -50.0  -6.5  -5.3  -6.3 -10.5 -16.2 -10.4 -15.2 
+CGC   0.0  -5.0   0.5  -6.8  -9.1  -7.8  -7.5  -8.9   7.7  -2.4   8.2  -5.1 -11.9 -13.1 -10.4 -14.3  -0.8   3.0  -0.9   0.8  -8.3  -5.2  -5.8  -7.9  11.3  15.0  11.2  12.8  -9.7  -5.3  -9.2  -7.8 -11.7 -10.6 -10.1 -12.2 -11.7  -8.8  -8.9 -11.8  -9.3  -5.0  -8.0  -8.2 -13.6 -11.4 -12.8 -13.0 -50.0  -7.2 -50.0  -8.4 -11.0  -7.7  -8.7 -10.2 -50.0  -1.5  -7.1  -4.9 -11.2 -11.6 -11.6 -15.0 
+CGG   1.4  -7.3   2.2  -7.1  -8.0  -9.3  -6.4  -9.6   9.1  -5.5  10.0  -5.4 -10.9 -14.3  -8.4 -14.0   0.9  -1.1   2.4  -1.3  -6.9  -8.0  -3.5  -9.0  11.8  11.2  13.4  11.4  -7.3  -8.8  -6.5  -8.5 -10.0 -12.5  -7.8 -12.5  -9.6 -11.0  -7.1 -10.7  -7.9  -8.0  -4.3  -9.4 -11.4 -12.3 -10.9 -12.9 -50.0 -11.8 -50.0 -11.2  -9.4  -9.8  -7.3 -10.5 -50.0  -6.2  -2.2  -6.7  -9.8 -15.2  -8.5 -14.9 
+CGT   0.2  -6.3   0.1  -4.3  -8.2  -8.9  -8.5  -7.0   8.5  -4.1   8.0  -2.1 -10.5 -13.4 -10.2 -11.9   0.2   1.6  -0.8   3.4  -7.5  -6.7  -6.1  -4.7  12.1  12.8  11.4  14.7  -8.7  -7.4  -9.4  -5.3  -9.9 -11.8 -10.5 -10.3  -9.9 -10.1  -9.3  -9.3  -8.1  -7.2  -8.0  -5.2 -12.0 -12.1 -12.6 -10.9 -50.0  -7.9 -50.0  -5.3  -9.3  -8.8  -8.9  -7.0 -50.0  -3.6  -7.1  -1.2 -10.2 -12.7  -9.9 -11.0 
+CTA -10.2 -13.4 -11.8 -12.4  -6.0  -9.4  -7.1  -8.6  -9.8 -12.9 -10.8 -12.4   2.3  -1.4   1.0  -1.0  -3.0  -8.0  -5.1  -7.6  -2.2  -6.1  -3.6  -6.1  -6.0  -9.7  -7.3  -8.7  11.2   7.9   8.9   8.1 -12.1 -17.4 -13.2 -16.8  -6.3  -9.5  -7.5  -8.8 -12.7 -15.2 -13.1 -14.2   0.0  -3.1  -1.8  -3.1 -50.0 -10.7 -50.0  -9.3  -4.6  -8.9  -5.9  -8.3 -50.0 -12.8  -9.6 -10.6   9.5  -3.2   8.2  -2.8 
+CTC -13.5 -11.3 -14.0 -12.9  -9.3  -7.1  -8.2  -8.9 -12.7 -10.3 -11.9 -12.3  -0.4   1.4  -0.6  -0.4  -7.2  -4.7  -6.9  -5.7  -7.4  -3.7  -6.5  -6.0  -9.1  -5.3  -8.8  -7.4   7.9  11.9   7.8   9.3 -15.2 -15.1 -14.6 -17.2  -9.7  -7.3  -8.2  -9.2 -15.0 -12.8 -14.1 -14.9  -2.8   0.2  -2.9  -2.3 -50.0  -7.4 -50.0  -8.7  -9.4  -7.3  -9.4  -9.4 -50.0  -8.0 -11.9  -9.7   6.5   0.5   6.4  -1.8 
+CTG -13.0 -14.4 -11.9 -13.9  -7.7  -9.0  -6.7  -8.8 -11.7 -12.7 -10.7 -13.1  -0.2  -1.6   1.7  -1.6  -5.7  -8.7  -4.8  -8.1  -5.3  -7.5  -3.3  -7.5  -9.0  -9.2  -6.5  -9.4   8.9   7.8  10.1   7.8 -14.3 -17.7 -13.0 -17.8  -8.0  -9.4  -6.2  -9.2 -15.1 -14.9 -12.2 -14.9  -1.8  -3.2  -0.8  -3.2 -50.0 -10.8 -50.0  -9.9  -7.3  -9.6  -6.4  -9.4 -50.0 -12.3  -8.7 -11.3   7.6  -3.3   8.7  -2.9 
+CTT -12.5 -12.9 -13.4 -10.0  -8.5  -9.0  -7.8  -6.8 -11.8 -11.3 -12.4 -10.1  -0.3  -0.5  -0.5   1.2  -6.1  -6.0  -6.7  -3.5  -5.9  -5.6  -5.5  -2.9  -9.5  -7.8  -8.5  -5.3   8.1   9.3   7.8  11.8 -14.4 -15.7 -14.5 -14.6  -8.4  -8.7  -7.9  -7.1 -14.3 -13.8 -13.8 -12.3  -2.4  -2.2  -2.9  -0.2 -50.0  -8.9 -50.0  -7.1  -8.1  -9.1  -8.7  -6.6 -50.0  -9.7 -11.4  -8.0   6.8  -1.8   6.9   0.4 
+GAA  -2.6  -6.3  -4.9  -4.9  -6.4  -9.4  -7.9  -8.6  -6.3  -7.4  -8.2  -6.6  -9.7 -14.9 -11.1 -13.9  -0.2  -8.2  -2.3  -6.8  -8.9 -11.1  -9.4 -10.9  -8.9 -11.7 -10.0  -9.9 -12.1 -15.2 -14.3 -14.4  11.1   2.9   9.2   3.5  -3.0  -7.0  -4.3  -6.2  -2.2  -6.6  -3.7  -5.7  -6.0 -10.4  -8.6  -9.5 -50.0 -15.5 -50.0 -13.9  -7.8 -10.5  -8.7  -9.6 -50.0 -16.5 -17.1 -14.8 -12.1 -18.9 -12.4 -17.4 
+GAC  -8.5   0.8  -9.1  -0.7 -10.1  -7.5  -9.1  -8.2 -11.4  -3.1 -11.8  -4.4 -15.5 -14.3 -14.8 -15.4  -6.6  -4.7  -7.1  -5.8 -12.5 -10.3 -11.0 -12.2 -13.4 -10.6 -12.5 -11.8 -17.4 -15.1 -17.7 -15.7   2.9  12.5   3.1  10.3  -7.7  -5.2  -6.2  -7.3  -5.4  -1.7  -5.4  -3.7 -11.6  -9.1 -13.0 -10.8 -50.0 -10.3 -50.0 -11.2  -9.8  -8.3  -9.1  -9.7 -50.0 -12.7 -20.6 -13.1 -16.0 -16.6 -17.0 -18.4 
+GAG  -5.0  -6.4  -3.4  -5.6  -7.5  -9.3  -7.1  -8.8  -8.8  -7.4  -6.9  -6.9 -11.4 -15.2  -9.7 -13.7  -1.8  -7.8  -0.8  -7.2  -9.4 -10.6  -8.7 -11.2 -10.0 -10.1  -7.8 -10.5 -13.2 -14.6 -13.0 -14.5   9.2   3.1  10.8   3.0  -4.5  -6.7  -2.7  -6.7  -5.0  -6.8  -2.5  -6.7  -7.5 -10.4  -7.4 -10.2 -50.0 -15.5 -50.0 -14.2  -8.9 -10.2  -8.5 -10.0 -50.0 -15.6 -15.3 -15.5 -13.0 -18.8 -12.6 -18.3 
+GAT  -8.1  -1.1  -8.9   1.4  -8.6  -9.2  -9.4  -6.8 -11.0  -4.9 -11.5  -2.6 -13.7 -16.8 -14.5 -12.6  -6.0  -6.7  -7.2  -4.2 -11.6 -11.9 -11.2 -10.9 -13.1 -12.2 -12.5 -10.3 -16.8 -17.2 -17.8 -14.6   3.5  10.3   3.0  12.4  -6.9  -7.5  -6.8  -4.9  -5.4  -4.2  -5.4  -1.3 -10.8 -11.5 -12.4  -8.2 -50.0 -12.5 -50.0  -8.9  -9.2 -10.0  -9.4  -7.9 -50.0 -14.3 -19.9 -11.4 -16.2 -19.2 -15.8 -15.9 
+GCA  -6.3  -7.4  -8.0  -6.0   2.8  -0.7   0.9   0.3  -7.5  -4.4  -8.4  -3.6  -3.0  -7.7  -3.7  -6.2  -5.3 -10.0  -7.2  -8.9  -0.7  -3.2  -1.8  -2.4  -9.9 -11.7  -9.6  -9.9  -6.3  -9.7  -8.0  -8.4  -3.0  -7.7  -4.5  -6.9  11.3   8.2   9.4   9.1  -1.0  -3.3  -1.8  -2.4   1.5  -2.1  -0.4  -1.1 -50.0 -14.5 -50.0 -12.9   2.8  -0.0   1.4   0.7 -50.0  -8.7 -14.1  -7.5  -5.2 -12.5  -6.5 -10.7 
+GCC  -9.9  -5.0 -10.0  -6.5  -0.7   2.6  -0.2   0.1 -10.3  -1.8  -9.9  -3.8  -6.4  -4.7  -5.8  -6.7  -8.2  -8.3  -8.0 -10.2  -3.9  -1.0  -2.7  -2.9 -11.7  -8.8 -11.0 -10.1  -9.5  -7.3  -9.4  -8.7  -7.0  -5.2  -6.7  -7.5   8.2  11.6   8.8   9.1  -3.7  -0.9  -3.1  -3.0  -2.0   1.2  -2.3  -1.3 -50.0 -12.1 -50.0 -12.6   0.1   2.5   0.7   0.6 -50.0  -6.0 -14.8  -7.5  -8.6  -9.4  -8.5 -11.2 
+GCG  -7.5  -6.2  -7.2  -6.2   0.9   0.2   2.5   0.2  -8.3  -3.5  -7.1  -3.7  -4.1  -6.3  -3.1  -6.3  -5.7  -8.9  -5.6  -8.4  -2.3  -2.2   0.7  -2.7  -9.8  -8.9  -7.1  -9.3  -7.5  -8.2  -6.2  -7.9  -4.3  -6.2  -2.7  -6.8   9.4   8.8  12.1   8.8  -2.1  -2.0   0.1  -2.4   0.0  -1.1   1.3  -0.8 -50.0 -11.9 -50.0 -12.6   1.2   0.7   3.5   0.8 -50.0  -7.1 -12.4  -7.1  -7.1 -10.6  -5.0 -10.6 
+GCT  -9.0  -6.5 -10.0  -4.8   0.1  -0.1  -0.3   2.4  -9.9  -3.5 -10.6  -2.0  -5.4  -6.6  -5.6  -4.4  -7.6  -9.9  -8.5  -8.2  -3.0  -2.8  -3.0  -0.9 -12.8 -11.8 -10.7  -9.3  -8.8  -9.2  -9.2  -7.1  -6.2  -7.3  -6.7  -4.9   9.1   9.1   8.8  11.2  -3.1  -3.0  -3.3  -1.0  -1.1  -1.4  -2.0   1.2 -50.0 -12.8 -50.0 -10.9   0.8   0.5   0.7   2.3 -50.0  -7.5 -16.5  -5.9  -7.8 -11.1  -7.8  -9.0 
+GGA  -7.1  -5.6  -9.1  -5.1  -5.9  -8.0  -6.9  -7.4  -1.9  -1.6  -4.7  -1.4  -9.6 -14.6 -10.4 -13.0  -6.7 -10.8  -9.4  -9.9  -9.1 -10.5  -9.1 -10.4  -5.9  -9.3  -7.9  -8.1 -12.7 -15.0 -15.1 -14.3  -2.2  -5.4  -5.0  -5.4  -1.0  -3.7  -2.1  -3.1  12.8   9.6  11.1  10.1  -4.8  -9.4  -8.1  -8.6 -50.0 -18.3 -50.0 -15.4  -5.9  -7.5  -6.6  -7.1 -50.0  -9.5 -11.7  -8.4 -12.5 -17.3 -13.2 -15.4 
+GGC -10.2  -1.6 -10.2  -3.2  -7.3  -4.6  -6.5  -6.2  -6.7   3.0  -6.8   0.8 -11.9 -10.8 -11.5 -12.2  -9.6  -7.1  -9.7  -8.4  -9.8  -8.1  -8.2  -9.7  -9.4  -5.0  -8.0  -7.2 -15.2 -12.8 -14.9 -13.8  -6.6  -1.7  -6.8  -4.2  -3.3  -0.9  -2.0  -3.0   9.6  12.8   9.7  10.5  -8.5  -5.6  -9.3  -7.8 -50.0 -12.7 -50.0 -13.8  -6.8  -5.1  -5.3  -6.5 -50.0  -4.1 -12.8  -5.8 -14.4 -14.1 -13.6 -14.8 
+GGG  -8.2  -4.7  -7.1  -5.0  -6.5  -7.7  -5.5  -6.9  -4.1  -0.8  -1.2  -0.9 -10.5 -12.8  -8.2 -13.0  -7.5 -10.0  -7.2  -9.7  -9.2  -9.4  -7.3 -10.2  -6.5  -8.0  -4.3  -8.0 -13.1 -14.1 -12.2 -13.8  -3.7  -5.4  -2.5  -5.4  -1.8  -3.1   0.1  -3.3  11.1   9.7  12.9   9.8  -5.5  -8.1  -5.4  -7.8 -50.0 -16.4 -50.0 -14.8  -5.9  -7.1  -4.7  -7.2 -50.0  -8.4  -6.4  -8.1 -12.1 -16.2 -10.3 -15.8 
+GGT  -9.2  -3.0  -9.9  -1.2  -6.5  -6.1  -6.8  -4.4  -6.4   0.9  -7.0   3.0 -11.8 -13.0 -11.1 -10.4  -9.3  -9.2  -9.8  -6.4  -9.5  -9.5  -9.4  -8.0  -8.1  -8.2  -9.4  -5.2 -14.2 -14.9 -14.9 -12.3  -5.7  -3.7  -6.7  -1.3  -2.4  -3.0  -2.4  -1.0  10.1  10.5   9.8  13.1  -7.2  -8.0  -8.4  -5.0 -50.0 -14.7 -50.0 -11.4  -6.3  -6.7  -5.8  -5.0 -50.0  -6.1 -13.4  -3.6 -13.0 -16.0 -13.7 -13.3 
+GTA  -8.2 -10.8 -10.1  -9.8  -0.4  -3.6  -1.7  -2.9  -7.9  -8.4  -9.8  -8.2   6.2   2.3   0.6   2.9  -7.1 -11.6  -9.2 -11.1  -5.6  -8.3  -7.2  -8.4 -10.7 -13.6 -11.4 -12.0   0.0  -2.8  -1.8  -2.4  -6.0 -11.6  -7.5 -10.8   1.5  -2.0   0.0  -1.1  -4.8  -8.5  -5.5  -7.2  11.9   8.6  10.0   9.0 -50.0 -14.2 -50.0 -12.8  -4.1  -7.4  -5.7  -6.5 -50.0 -11.0 -14.3  -9.9   1.4  -7.9  -0.7  -6.8 
+GTC -12.5  -8.7 -13.0  -9.9  -3.9  -1.0  -3.4  -2.9 -12.5  -6.3 -11.6  -8.1   3.3   6.0  -1.6   3.8 -10.5 -10.1 -11.1 -11.1  -9.0  -6.9  -8.2  -9.1 -14.0 -11.4 -12.3 -12.1  -3.1   0.2  -3.2  -2.2 -10.4  -9.1 -10.4 -11.5  -2.1   1.2  -1.1  -1.4  -9.4  -5.6  -8.1  -8.0   8.6  12.4   8.6   9.5 -50.0 -10.4 -50.0 -11.6  -7.2  -5.7  -7.5  -7.4 -50.0  -8.6 -15.7  -9.7  -3.0  -3.7  -3.2  -5.9 
+GTG -11.1 -11.9 -10.6 -11.3  -2.6  -4.1  -1.7  -3.5 -11.4  -9.6  -9.6  -9.2   3.7   2.5   1.6   2.5  -9.6 -12.3  -9.5 -11.8  -8.0  -9.4  -7.2  -9.3 -13.3 -12.8 -10.9 -12.6  -1.8  -2.9  -0.8  -2.9  -8.6 -13.0  -7.4 -12.4  -0.4  -2.3   1.3  -2.0  -8.1  -9.3  -5.4  -8.4  10.0   8.6  11.4   8.9 -50.0 -13.7 -50.0 -13.2  -6.1  -8.1  -5.5  -7.5 -50.0 -11.3 -12.7 -10.1  -1.6  -7.8  -0.3  -7.2 
+GTT -11.4 -10.0 -12.7  -8.2  -3.1  -3.1  -2.8  -0.9 -11.7  -7.3 -12.1  -5.8   3.6   3.6  -1.1   5.8  -9.4 -11.9 -10.7 -10.0  -8.5  -8.8  -8.2  -6.5 -13.9 -13.0 -12.9 -10.9  -3.1  -2.3  -3.2  -0.2  -9.5 -10.8 -10.2  -8.2  -1.1  -1.3  -0.8   1.2  -8.6  -7.8  -7.8  -5.0   9.0   9.5   8.9  12.0 -50.0 -13.0 -50.0 -10.3  -6.2  -7.0  -7.1  -5.4 -50.0  -9.3 -14.0  -8.2  -2.4  -6.3  -2.4  -3.9 
+TAA -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0  33.3 -50.0  30.6 -50.0 -50.0 -50.0 -50.0 -50.0  29.2 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 
+TAC -14.8  -6.2 -14.9  -7.6 -14.6 -11.6 -13.0 -12.2 -14.0  -9.0 -14.1 -10.1 -13.6 -11.2 -12.4 -12.5  -8.1   2.2  -8.7   0.4 -15.3 -11.3 -13.2 -13.3 -11.9  -7.2 -11.8  -7.9 -10.7  -7.4 -10.8  -8.9 -15.5 -10.3 -15.5 -12.5 -14.5 -12.1 -11.9 -12.8 -18.3 -12.7 -16.4 -14.7 -14.2 -10.4 -13.7 -13.0 -50.0  15.1 -50.0  13.3  -9.9  -5.8  -8.9  -7.6 -50.0  -1.5  -7.8  -3.0  -8.1   3.6  -9.4   2.0 
+TAG -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0  30.6 -50.0  35.2 -50.0 -50.0 -50.0 -50.0 -50.0  28.5 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 
+TAT -13.8  -7.5 -14.4  -5.1 -12.6 -11.6 -11.5 -10.1 -13.2  -9.7 -12.7  -7.4 -11.9 -11.9 -11.5 -10.2  -7.2   0.9  -8.6   2.6 -14.1 -12.8 -13.3 -11.0  -9.7  -8.4 -11.2  -5.3  -9.3  -8.7  -9.9  -7.1 -13.9 -11.2 -14.2  -8.9 -12.9 -12.6 -12.6 -10.9 -15.4 -13.8 -14.8 -11.4 -12.8 -11.6 -13.2 -10.3 -50.0  13.3 -50.0  15.2  -8.6  -7.0  -8.4  -4.8 -50.0  -3.0  -7.1  -0.1  -7.3   2.3  -8.7   3.9 
+TCA  -7.3  -6.3  -8.9  -5.5   2.9   0.2   1.5   1.2  -8.5  -2.3 -10.0  -1.7  -5.6 -10.5  -5.5  -9.6  -4.3  -7.9  -6.1  -7.2   2.3  -0.6   0.5  -0.0  -8.9 -11.0  -9.4  -9.3  -4.6  -9.4  -7.3  -8.1  -7.8  -9.8  -8.9  -9.2   2.8   0.1   1.2   0.8  -5.9  -6.8  -5.9  -6.3  -4.1  -7.2  -6.1  -6.2 -50.0  -9.9 -50.0  -8.6  12.5   9.4  11.0   9.8 -50.0  -4.8  -8.7  -3.5  -0.0  -8.6  -2.1  -7.0 
+TCC -10.1  -4.3 -10.8  -5.8   0.1   2.3   0.7   0.6 -10.4  -0.2 -10.8  -2.0  -8.8  -9.2  -8.0  -9.7  -6.8  -5.9  -7.5  -7.0  -1.4   2.2  -0.9  -0.4 -10.8  -7.7  -9.8  -8.8  -8.9  -7.3  -9.6  -9.1 -10.5  -8.3 -10.2 -10.0  -0.0   2.5   0.7   0.5  -7.5  -5.1  -7.1  -6.7  -7.4  -5.7  -8.1  -7.0 -50.0  -5.8 -50.0  -7.0   9.4  12.7  10.1  10.0 -50.0  -1.5 -11.3  -3.3  -5.4  -4.2  -5.6  -6.8 
+TCG  -8.4  -6.2  -8.9  -5.9   1.8   0.4   2.7   1.2  -9.1  -1.2  -8.8  -1.5  -7.2 -10.5  -4.9  -9.6  -5.7  -7.4  -5.5  -7.4   0.3  -0.3   1.8  -0.4  -9.0  -8.7  -7.3  -8.9  -5.9  -9.4  -6.4  -8.7  -8.7  -9.1  -8.5  -9.4   1.4   0.7   3.5   0.7  -6.6  -5.3  -4.7  -5.8  -5.7  -7.5  -5.5  -7.1 -50.0  -8.9 -50.0  -8.4  11.0  10.1  13.2  10.2 -50.0  -4.2  -6.1  -3.6  -3.5  -8.4  -1.2  -6.8 
+TCT  -9.1  -5.4 -10.4  -4.2   1.0   0.5   0.6   2.4  -9.6  -1.5 -10.5  -0.5  -8.9 -10.5  -7.2  -8.3  -6.3  -6.9  -7.7  -5.1  -0.5   0.1  -0.6   2.3 -11.1 -10.2 -10.5  -7.0  -8.3  -9.4  -9.4  -6.6  -9.6  -9.7 -10.0  -7.9   0.7   0.6   0.8   2.3  -7.1  -6.5  -7.2  -5.0  -6.5  -7.4  -7.5  -5.4 -50.0  -7.6 -50.0  -4.8   9.8  10.0  10.2  12.1 -50.0  -3.1 -11.4  -0.7  -4.6  -6.6  -4.9  -3.9 
+TGA -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0  29.2 -50.0  28.5 -50.0 -50.0 -50.0 -50.0 -50.0  33.3 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 
+TGC -13.0  -7.0 -13.1  -8.3  -9.2  -6.5  -8.9  -7.6  -9.8  -0.7  -9.2  -2.1 -12.4 -10.3 -12.4 -11.6  -9.5  -4.6 -10.4  -5.7 -13.6 -10.1 -11.4 -10.9  -6.5  -1.5  -6.2  -3.6 -12.8  -8.0 -12.3  -9.7 -16.5 -12.7 -15.6 -14.3  -8.7  -6.0  -7.1  -7.5  -9.5  -4.1  -8.4  -6.1 -11.0  -8.6 -11.3  -9.3 -50.0  -1.5 -50.0  -3.0  -4.8  -1.5  -4.2  -3.1 -50.0  16.4  -5.1  14.2 -10.1  -4.3 -10.0  -5.4 
+TGG -13.5 -16.3 -11.8 -15.5 -13.6 -15.3 -12.1 -16.1  -7.7 -13.3  -4.2 -12.6 -14.1 -16.2 -10.3 -15.4  -9.6 -11.6  -7.7  -9.8 -14.3 -17.5 -11.1 -17.2  -5.3  -7.1  -2.2  -7.1  -9.6 -11.9  -8.7 -11.4 -17.1 -20.6 -15.3 -19.9 -14.1 -14.8 -12.4 -16.5 -11.7 -12.8  -6.4 -13.4 -14.3 -15.7 -12.7 -14.0 -50.0  -7.8 -50.0  -7.1  -8.7 -11.3  -6.1 -11.4 -50.0  -5.1  18.6  -4.8  -8.6  -8.1  -4.5  -7.3 
+TGT -12.4  -8.2 -13.0  -5.9  -7.7  -7.8  -7.4  -5.8  -8.8  -2.0  -9.3  -0.4 -11.6 -11.3 -11.6  -9.4  -8.9  -5.6 -10.0  -3.2 -11.5 -11.1 -10.5  -8.3  -6.3  -4.9  -6.7  -1.2 -10.6  -9.7 -11.3  -8.0 -14.8 -13.1 -15.5 -11.4  -7.5  -7.5  -7.1  -5.9  -8.4  -5.8  -8.1  -3.6  -9.9  -9.7 -10.1  -8.2 -50.0  -3.0 -50.0  -0.1  -3.5  -3.3  -3.6  -0.7 -50.0  14.2  -4.8  16.4  -8.9  -5.8  -8.9  -3.4 
+TTA -10.7 -13.2 -12.4 -11.6  -4.8  -8.4  -6.1  -7.2 -10.3 -11.8 -11.0 -11.6   2.8  -1.7   0.7  -1.1  -6.2  -9.7  -7.3  -8.4  -5.3  -8.6  -6.1  -8.3 -10.5 -11.2  -9.8 -10.2   9.5   6.5   7.6   6.8 -12.1 -16.0 -13.0 -16.2  -5.2  -8.6  -7.1  -7.8 -12.5 -14.4 -12.1 -13.0   1.4  -3.0  -1.6  -2.4 -50.0  -8.1 -50.0  -7.3  -0.0  -5.4  -3.5  -4.6 -50.0 -10.1  -8.6  -8.9  13.2  -0.9   9.7  -0.3 
+TTC -18.1 -12.3 -19.4 -14.2 -12.2  -9.5 -11.5 -10.8 -17.9 -11.9 -18.2 -13.9  -6.4  -3.7  -6.8  -5.4 -12.8  -5.9 -13.6  -7.4 -13.7  -9.9 -12.9 -12.7 -16.2 -11.6 -15.2 -12.7  -3.2   0.5  -3.3  -1.8 -18.9 -16.6 -18.8 -19.2 -12.5  -9.4 -10.6 -11.1 -17.3 -14.1 -16.2 -16.0  -7.9  -3.7  -7.8  -6.3 -50.0   3.6 -50.0   2.3  -8.6  -4.2  -8.4  -6.6 -50.0  -4.3  -8.1  -5.8  -0.9  14.2  -1.8  11.6 
+TTG -11.8 -13.1 -11.5 -12.0  -6.2  -8.3  -5.4  -7.4 -11.8 -12.3 -11.1 -11.4   0.5  -2.5   2.4  -1.6  -6.8  -9.9  -6.4  -8.9  -6.1  -8.6  -5.0  -7.8 -10.4 -11.6  -8.5  -9.9   8.2   6.4   8.7   6.9 -12.4 -17.0 -12.6 -15.8  -6.5  -8.5  -5.0  -7.8 -13.2 -13.6 -10.3 -13.7  -0.7  -3.2  -0.3  -2.4 -50.0  -9.4 -50.0  -8.7  -2.1  -5.6  -1.2  -4.9 -50.0 -10.0  -4.5  -8.9   9.7  -1.8  11.3  -0.9 
+TTT -17.2 -13.3 -17.8 -11.6 -11.0 -11.1 -10.4  -9.0 -16.0 -12.7 -16.1 -11.1  -5.2  -5.4  -6.0  -3.3 -11.8  -7.3 -13.3  -5.9 -12.7 -11.9 -11.9  -9.3 -15.2 -15.0 -14.9 -11.0  -2.8  -1.8  -2.9   0.4 -17.4 -18.4 -18.3 -15.9 -10.7 -11.2 -10.6  -9.0 -15.4 -14.8 -15.8 -13.3  -6.8  -5.9  -7.2  -3.9 -50.0   2.0 -50.0   3.9  -7.0  -6.8  -6.8  -3.9 -50.0  -5.4  -7.3  -3.4  -0.3  11.6  -0.9  14.1 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/STR b/code/lib/Bio/Align/substitution_matrices/data/STR
new file mode 100644
index 0000000..23189c3
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/STR
@@ -0,0 +1,26 @@
+#  Steven Henikoff and Jorja G. Henikoff:
+#  "Performance evaluation of amino acid substitution matrices."
+#  Proteins: Structure, Function, and Genetics: 17(1): 49-61 (1993).
+#  Figure 1, lower triangle.
+#  PMID 8234244
+   A  C  D  E  F  G  H  I  K  L  M  N  P  Q  R  S  T  V  W  Y
+A  4 -2 -1  0 -3  0 -2 -2 -1 -2  0 -1 -1  0 -1  0 -1  0 -3 -3 
+C -2 11 -7 -3 -2 -6 -6 -4 -4 -6 -5 -6 -8 -3 -2 -4 -5 -4 -6 -6 
+D -1 -7  6  2 -5 -1  0 -3 -1 -6 -4  2 -1  0 -2  0 -1 -4 -6 -3 
+E  0 -3  2  5 -4 -2 -2 -3  1 -4 -2  0 -1  2  0 -1  0 -2 -6 -2 
+F -3 -2 -5 -4  7 -6 -2  1 -3  2  0 -3 -5 -4 -4 -3 -3 -1  2  3 
+G  0 -6 -1 -2 -6  5 -3 -5 -3 -5 -4 -1 -2 -2 -2 -1 -3 -4 -4 -3 
+H -2 -6  0 -2 -2 -3  8 -5  0 -3 -2  2 -3  0  0 -2 -2 -2 -3  0 
+I -2 -4 -3 -3  1 -5 -5  6 -3  2  1 -3 -4 -5 -3 -3 -2  2 -2 -1 
+K -1 -4 -1  1 -3 -3  0 -3  5 -2 -1  0 -1  1  2 -1  0 -3 -3 -2 
+L -2 -6 -6 -4  2 -5 -3  2 -2  5  3 -3 -3 -3 -3 -4 -3  1 -1 -2 
+M  0 -5 -4 -2  0 -4 -2  1 -1  3  8 -2 -6  1 -4 -4 -2  0 -2 -1 
+N -1 -6  2  0 -3 -1  2 -3  0 -3 -2  5 -2  0 -1  0  0 -4 -5 -1 
+P -1 -8 -1 -1 -5 -2 -3 -4 -1 -3 -6 -2  7 -2 -2 -1 -1 -4 -4 -6 
+Q  0 -3  0  2 -4 -2  0 -5  1 -3  1  0 -2  6  1 -1  0 -2 -5 -3 
+R -1 -2 -2  0 -4 -2  0 -3  2 -3 -4 -1 -2  1  7  0 -1 -3 -2 -1 
+S  0 -4  0 -1 -3 -1 -2 -3 -1 -4 -4  0 -1 -1  0  4  1 -3 -5 -2 
+T -1 -5 -1  0 -3 -3 -2 -2  0 -3 -2  0 -1  0 -1  1  5 -1 -5 -2 
+V  0 -4 -4 -2 -1 -4 -2  2 -3  1  0 -4 -4 -2 -3 -3 -1  5 -4 -1 
+W -3 -6 -6 -6  2 -4 -3 -2 -3 -1 -2 -5 -4 -5 -2 -5 -5 -4 10  2 
+Y -3 -6 -3 -2  3 -3  0 -1 -2 -2 -1 -1 -6 -3 -1 -2 -2 -1  2  7 
diff --git a/code/lib/Bio/Align/substitution_matrices/data/TRANS b/code/lib/Bio/Align/substitution_matrices/data/TRANS
new file mode 100644
index 0000000..611e6b9
--- /dev/null
+++ b/code/lib/Bio/Align/substitution_matrices/data/TRANS
@@ -0,0 +1,12 @@
+#  David Wheeler,
+#  Department of Cell Biology, Baylor College of Medicine, Houston, Texas:
+#  "Weight matrices for sequence similarity scoring."
+#  Version 2.0, May 1996.
+#  David Wheeler defined the Transition/Transversion Matrix as a penalty
+#  matrix; the matrix below is a similarity matrix where
+#  similarity = 5 - penalty.
+   A  T  C  G
+A  5  0  0  4
+T  0  5  4  0
+C  0  4  5  0
+G  4  0  0  5
diff --git a/code/lib/Bio/AlignIO/ClustalIO.py b/code/lib/Bio/AlignIO/ClustalIO.py
new file mode 100644
index 0000000..49fc51a
--- /dev/null
+++ b/code/lib/Bio/AlignIO/ClustalIO.py
@@ -0,0 +1,305 @@
+# Copyright 2006-2016 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.AlignIO support for "clustal" output from CLUSTAL W and other tools.
+
+You are expected to use this module via the Bio.AlignIO functions (or the
+Bio.SeqIO functions if you want to work directly with the gapped sequences).
+"""
+from Bio.Align import MultipleSeqAlignment
+from Bio.AlignIO.Interfaces import AlignmentIterator
+from Bio.AlignIO.Interfaces import SequentialAlignmentWriter
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+
+class ClustalWriter(SequentialAlignmentWriter):
+    """Clustalw alignment writer."""
+
+    def write_alignment(self, alignment):
+        """Use this to write (another) single alignment to an open file."""
+        if len(alignment) == 0:
+            raise ValueError("Must have at least one sequence")
+        if alignment.get_alignment_length() == 0:
+            # This doubles as a check for an alignment object
+            raise ValueError("Non-empty sequences are required")
+
+        # Old versions of the parser in Bio.Clustalw used a ._version property
+        try:
+            version = str(alignment._version)
+        except AttributeError:
+            version = ""
+        if not version:
+            version = "1.81"
+        if version.startswith("2."):
+            # e.g. 2.0.x
+            output = "CLUSTAL %s multiple sequence alignment\n\n\n" % version
+        else:
+            # e.g. 1.81 or 1.83
+            output = "CLUSTAL X (%s) multiple sequence alignment\n\n\n" % version
+
+        cur_char = 0
+        max_length = len(alignment[0])
+
+        if max_length <= 0:
+            raise ValueError("Non-empty sequences are required")
+
+        if "clustal_consensus" in alignment.column_annotations:
+            star_info = alignment.column_annotations["clustal_consensus"]
+        else:
+            try:
+                # This was originally stored by Bio.Clustalw as ._star_info
+                star_info = alignment._star_info
+            except AttributeError:
+                star_info = None
+
+        # keep displaying sequences until we reach the end
+        while cur_char != max_length:
+            # calculate the number of sequences to show, which will
+            # be less if we are at the end of the sequence
+            if (cur_char + 50) > max_length:
+                show_num = max_length - cur_char
+            else:
+                show_num = 50
+
+            # go through all of the records and print out the sequences
+            # when we output, we do a nice 80 column output, although this
+            # may result in truncation of the ids.
+            for record in alignment:
+                # Make sure we don't get any spaces in the record
+                # identifier when output in the file by replacing
+                # them with underscores:
+                line = record.id[0:30].replace(" ", "_").ljust(36)
+                line += str(record.seq[cur_char : (cur_char + show_num)])
+                output += line + "\n"
+
+            # now we need to print out the star info, if we've got it
+            if star_info:
+                output += (
+                    (" " * 36) + star_info[cur_char : (cur_char + show_num)] + "\n"
+                )
+
+            output += "\n"
+            cur_char += show_num
+
+        # Want a trailing blank new line in case the output is concatenated
+        self.handle.write(output + "\n")
+
+
+class ClustalIterator(AlignmentIterator):
+    """Clustalw alignment iterator."""
+
+    _header = None  # for caching lines between __next__ calls
+
+    def __next__(self):
+        """Parse the next alignment from the handle."""
+        handle = self.handle
+
+        if self._header is None:
+            line = handle.readline()
+        else:
+            # Header we saved from when we were parsing
+            # the previous alignment.
+            line = self._header
+            self._header = None
+
+        if not line:
+            raise StopIteration
+
+        # Whitelisted headers we know about
+        known_headers = ["CLUSTAL", "PROBCONS", "MUSCLE", "MSAPROBS", "Kalign"]
+        if line.strip().split()[0] not in known_headers:
+            raise ValueError(
+                "%s is not a known CLUSTAL header: %s"
+                % (line.strip().split()[0], ", ".join(known_headers))
+            )
+
+        # find the clustal version in the header line
+        version = None
+        for word in line.split():
+            if word[0] == "(" and word[-1] == ")":
+                word = word[1:-1]
+            if word[0] in "0123456789":
+                version = word
+                break
+
+        # There should be two blank lines after the header line
+        line = handle.readline()
+        while line.strip() == "":
+            line = handle.readline()
+
+        # If the alignment contains entries with the same sequence
+        # identifier (not a good idea - but seems possible), then this
+        # dictionary based parser will merge their sequences.  Fix this?
+        ids = []
+        seqs = []
+        consensus = ""
+        seq_cols = None  # Used to extract the consensus
+
+        # Use the first block to get the sequence identifiers
+        while True:
+            if line[0] != " " and line.strip() != "":
+                # Sequences identifier...
+                fields = line.rstrip().split()
+
+                # We expect there to be two fields, there can be an optional
+                # "sequence number" field containing the letter count.
+                if len(fields) < 2 or len(fields) > 3:
+                    raise ValueError("Could not parse line:\n%s" % line)
+
+                ids.append(fields[0])
+                seqs.append(fields[1])
+
+                # Record the sequence position to get the consensus
+                if seq_cols is None:
+                    start = len(fields[0]) + line[len(fields[0]) :].find(fields[1])
+                    end = start + len(fields[1])
+                    seq_cols = slice(start, end)
+                    del start, end
+                assert fields[1] == line[seq_cols]
+
+                if len(fields) == 3:
+                    # This MAY be an old style file with a letter count...
+                    try:
+                        letters = int(fields[2])
+                    except ValueError:
+                        raise ValueError(
+                            "Could not parse line, bad sequence number:\n%s" % line
+                        ) from None
+                    if len(fields[1].replace("-", "")) != letters:
+                        raise ValueError(
+                            "Could not parse line, invalid sequence number:\n%s" % line
+                        )
+            elif line[0] == " ":
+                # Sequence consensus line...
+                assert len(ids) == len(seqs)
+                assert len(ids) > 0
+                assert seq_cols is not None
+                consensus = line[seq_cols]
+                assert not line[: seq_cols.start].strip()
+                assert not line[seq_cols.stop :].strip()
+                # Check for blank line (or end of file)
+                line = handle.readline()
+                assert line.strip() == ""
+                break
+            else:
+                # No consensus
+                break
+            line = handle.readline()
+            if not line:
+                break  # end of file
+
+        assert line.strip() == ""
+        assert seq_cols is not None
+
+        # Confirm all same length
+        for s in seqs:
+            assert len(s) == len(seqs[0])
+        if consensus:
+            assert len(consensus) == len(seqs[0])
+
+        # Loop over any remaining blocks...
+        done = False
+        while not done:
+            # There should be a blank line between each block.
+            # Also want to ignore any consensus line from the
+            # previous block.
+            while (not line) or line.strip() == "":
+                line = handle.readline()
+                if not line:
+                    break  # end of file
+            if not line:
+                break  # end of file
+
+            if line.split(None, 1)[0] in known_headers:
+                # Found concatenated alignment.
+                self._header = line
+                break
+
+            for i in range(len(ids)):
+                if line[0] == " ":
+                    raise ValueError("Unexpected line:\n%r" % line)
+                fields = line.rstrip().split()
+
+                # We expect there to be two fields, there can be an optional
+                # "sequence number" field containing the letter count.
+                if len(fields) < 2 or len(fields) > 3:
+                    raise ValueError("Could not parse line:\n%r" % line)
+
+                if fields[0] != ids[i]:
+                    raise ValueError(
+                        "Identifiers out of order? Got '%s' but expected '%s'"
+                        % (fields[0], ids[i])
+                    )
+
+                if fields[1] != line[seq_cols]:
+                    start = len(fields[0]) + line[len(fields[0]) :].find(fields[1])
+                    if start != seq_cols.start:
+                        raise ValueError("Old location %s -> %i:XX" % (seq_cols, start))
+                    end = start + len(fields[1])
+                    seq_cols = slice(start, end)
+                    del start, end
+
+                # Append the sequence
+                seqs[i] += fields[1]
+                assert len(seqs[i]) == len(seqs[0])
+
+                if len(fields) == 3:
+                    # This MAY be an old style file with a letter count...
+                    try:
+                        letters = int(fields[2])
+                    except ValueError:
+                        raise ValueError(
+                            "Could not parse line, bad sequence number:\n%s" % line
+                        ) from None
+                    if len(seqs[i].replace("-", "")) != letters:
+                        raise ValueError(
+                            "Could not parse line, invalid sequence number:\n%s" % line
+                        )
+
+                # Read in the next line
+                line = handle.readline()
+            # There should now be a consensus line
+            if consensus:
+                assert line[0] == " "
+                assert seq_cols is not None
+                consensus += line[seq_cols]
+                assert len(consensus) == len(seqs[0])
+                assert not line[: seq_cols.start].strip()
+                assert not line[seq_cols.stop :].strip()
+                # Read in the next line
+                line = handle.readline()
+
+        assert len(ids) == len(seqs)
+        if len(seqs) == 0 or len(seqs[0]) == 0:
+            raise StopIteration
+
+        if (
+            self.records_per_alignment is not None
+            and self.records_per_alignment != len(ids)
+        ):
+            raise ValueError(
+                "Found %i records in this alignment, told to expect %i"
+                % (len(ids), self.records_per_alignment)
+            )
+
+        records = (SeqRecord(Seq(s), id=i, description=i) for (i, s) in zip(ids, seqs))
+        alignment = MultipleSeqAlignment(records)
+        # TODO - Handle alignment annotation better, for now
+        # mimic the old parser in Bio.Clustalw
+        if version:
+            alignment._version = version
+        if consensus:
+            alignment_length = len(seqs[0])
+            if len(consensus) != alignment_length:
+                raise ValueError(
+                    "Alignment length is %i, consensus length is %i, '%s'"
+                    % (alignment_length, len(consensus), consensus)
+                )
+            alignment.column_annotations["clustal_consensus"] = consensus
+            # For backward compatibility prior to .column_annotations:
+            alignment._star_info = consensus
+        return alignment
diff --git a/code/lib/Bio/AlignIO/EmbossIO.py b/code/lib/Bio/AlignIO/EmbossIO.py
new file mode 100644
index 0000000..b1ebd4d
--- /dev/null
+++ b/code/lib/Bio/AlignIO/EmbossIO.py
@@ -0,0 +1,219 @@
+# Copyright 2008-2016 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.AlignIO support for "emboss" alignment output from EMBOSS tools.
+
+You are expected to use this module via the Bio.AlignIO functions (or the
+Bio.SeqIO functions if you want to work directly with the gapped sequences).
+
+This module contains a parser for the EMBOSS pairs/simple file format, for
+example from the alignret, water and needle tools.
+"""
+from Bio.Align import MultipleSeqAlignment
+from Bio.AlignIO.Interfaces import AlignmentIterator
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+
+class EmbossIterator(AlignmentIterator):
+    """Emboss alignment iterator.
+
+    For reading the (pairwise) alignments from EMBOSS tools in what they
+    call the "pairs" and "simple" formats.
+    """
+
+    _header = None  # for caching lines between __next__ calls
+
+    def __next__(self):
+        """Parse the next alignment from the handle."""
+        handle = self.handle
+
+        if self._header is None:
+            line = handle.readline()
+        else:
+            # Header we saved from when we were parsing
+            # the previous alignment.
+            line = self._header
+            self._header = None
+
+        if not line:
+            raise StopIteration
+
+        while line.rstrip() != "#=======================================":
+            line = handle.readline()
+            if not line:
+                raise StopIteration
+
+        length_of_seqs = None
+        number_of_seqs = None
+        ids = []
+        header_dict = {}
+
+        while line[0] == "#":
+            # Read in the rest of this alignment header,
+            # try and discover the number of records expected
+            # and their length
+            parts = line[1:].split(":", 1)
+            key = parts[0].lower().strip()
+            if key == "aligned_sequences":
+                number_of_seqs = int(parts[1].strip())
+                assert len(ids) == 0
+                # Should now expect the record identifiers...
+                for i in range(number_of_seqs):
+                    line = handle.readline()
+                    parts = line[1:].strip().split(":", 1)
+                    assert i + 1 == int(parts[0].strip())
+                    ids.append(parts[1].strip())
+                assert len(ids) == number_of_seqs
+            if key == "length":
+                length_of_seqs = int(parts[1].strip())
+
+            # Parse the rest of the header
+            if key == "identity":
+                header_dict["identity"] = int(parts[1].strip().split("/")[0])
+            if key == "similarity":
+                header_dict["similarity"] = int(parts[1].strip().split("/")[0])
+            if key == "gaps":
+                header_dict["gaps"] = int(parts[1].strip().split("/")[0])
+            if key == "score":
+                header_dict["score"] = float(parts[1].strip())
+
+            # And read in another line...
+            line = handle.readline()
+
+        if number_of_seqs is None:
+            raise ValueError("Number of sequences missing!")
+        if length_of_seqs is None:
+            raise ValueError("Length of sequences missing!")
+
+        if (
+            self.records_per_alignment is not None
+            and self.records_per_alignment != number_of_seqs
+        ):
+            raise ValueError(
+                "Found %i records in this alignment, told to expect %i"
+                % (number_of_seqs, self.records_per_alignment)
+            )
+
+        seqs = [""] * len(ids)
+        seq_starts = []
+        index = 0
+
+        # Parse the seqs
+        while line:
+            if len(line) > 21:
+                id_start = line[:21].strip().split(None, 1)
+                seq_end = line[21:].strip().split(None, 1)
+                if len(id_start) == 2 and len(seq_end) == 2:
+                    # identifier, seq start position, seq, seq end position
+                    # (an aligned seq is broken up into multiple lines)
+                    id, start = id_start
+                    seq, end = seq_end
+                    if start >= end:
+                        # Special case, either a single letter is present,
+                        # or no letters at all.
+                        if seq.replace("-", "") == "":
+                            start = int(start)
+                            end = int(end)
+                        else:
+                            start = int(start) - 1
+                            end = int(end)
+                    else:
+                        assert seq.replace("-", "") != "", repr(line)
+                        start = int(start) - 1  # python counting
+                        end = int(end)
+
+                    if index < 0 or index >= number_of_seqs:
+                        raise ValueError(
+                            "Expected index %i in range [0,%i)"
+                            % (index, number_of_seqs)
+                        )
+                    # The identifier is truncated...
+                    assert id == ids[index] or id == ids[index][: len(id)]
+
+                    if len(seq_starts) == index:
+                        # Record the start
+                        seq_starts.append(start)
+
+                    # Check the start...
+                    if start >= end:
+                        assert seq.replace("-", "") == "", line
+                    elif start - seq_starts[index] != len(seqs[index].replace("-", "")):
+                        raise ValueError(
+                            "Found %i chars so far for sequence %i (%s, %r), line says start %i:\n%s"
+                            % (
+                                len(seqs[index].replace("-", "")),
+                                index,
+                                id,
+                                seqs[index],
+                                start,
+                                line,
+                            )
+                        )
+                    seqs[index] += seq
+
+                    # Check the end ...
+                    if end != seq_starts[index] + len(seqs[index].replace("-", "")):
+                        raise ValueError(
+                            "Found %i chars so far for sequence %i (%s, %r, start=%i), file says end %i:\n%s"
+                            % (
+                                len(seqs[index].replace("-", "")),
+                                index,
+                                id,
+                                seqs[index],
+                                seq_starts[index],
+                                end,
+                                line,
+                            )
+                        )
+
+                    index += 1
+                    if index >= number_of_seqs:
+                        index = 0
+                else:
+                    # just a start value, this is just alignment annotation (?)
+                    # print("Skipping: " + line.rstrip())
+                    pass
+            elif line.strip() == "":
+                # Just a spacer?
+                pass
+            else:
+                raise ValueError("Unrecognised EMBOSS pairwise line: %r\n" % line)
+
+            line = handle.readline()
+            if (
+                line.rstrip() == "#---------------------------------------"
+                or line.rstrip() == "#======================================="
+            ):
+                # End of alignment
+                self._header = line
+                break
+
+        assert index == 0
+
+        if (
+            self.records_per_alignment is not None
+            and self.records_per_alignment != len(ids)
+        ):
+            raise ValueError(
+                "Found %i records in this alignment, told to expect %i"
+                % (len(ids), self.records_per_alignment)
+            )
+
+        records = []
+        for id, seq in zip(ids, seqs):
+            if len(seq) != length_of_seqs:
+                # EMBOSS 2.9.0 is known to use spaces instead of minus signs
+                # for leading gaps, and thus fails to parse.  This old version
+                # is still used as of Dec 2008 behind the EBI SOAP webservice:
+                # http://www.ebi.ac.uk/Tools/webservices/wsdl/WSEmboss.wsdl
+                raise ValueError(
+                    "Error parsing alignment - sequences of "
+                    "different length? You could be using an "
+                    "old version of EMBOSS."
+                )
+            records.append(SeqRecord(Seq(seq), id=id, description=id))
+        return MultipleSeqAlignment(records, annotations=header_dict)
diff --git a/code/lib/Bio/AlignIO/FastaIO.py b/code/lib/Bio/AlignIO/FastaIO.py
new file mode 100644
index 0000000..9816253
--- /dev/null
+++ b/code/lib/Bio/AlignIO/FastaIO.py
@@ -0,0 +1,344 @@
+# Copyright 2008-2016 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.AlignIO support for "fasta-m10" output from Bill Pearson's FASTA tools.
+
+You are expected to use this module via the Bio.AlignIO functions (or the
+Bio.SeqIO functions if you want to work directly with the gapped sequences).
+
+This module contains a parser for the pairwise alignments produced by Bill
+Pearson's FASTA tools, for use from the Bio.AlignIO interface where it is
+referred to as the "fasta-m10" file format (as we only support the machine
+readable output format selected with the -m 10 command line option).
+
+This module does NOT cover the generic "fasta" file format originally
+developed as an input format to the FASTA tools.  The Bio.AlignIO and
+Bio.SeqIO both use the Bio.SeqIO.FastaIO module to deal with these files,
+which can also be used to store a multiple sequence alignments.
+"""
+from Bio.Align import MultipleSeqAlignment
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+
+def _extract_alignment_region(alignment_seq_with_flanking, annotation):
+    """Extract alignment region (PRIVATE).
+
+    Helper function for the main parsing code.
+
+    To get the actual pairwise alignment sequences, we must first
+    translate the un-gapped sequence based coordinates into positions
+    in the gapped sequence (which may have a flanking region shown
+    using leading - characters).  To date, I have never seen any
+    trailing flanking region shown in the m10 file, but the
+    following code should also cope with that.
+
+    Note that this code seems to work fine even when the "sq_offset"
+    entries are present as a result of using the -X command line option.
+    """
+    align_stripped = alignment_seq_with_flanking.strip("-")
+    display_start = int(annotation["al_display_start"])
+    if int(annotation["al_start"]) <= int(annotation["al_stop"]):
+        start = int(annotation["al_start"]) - display_start
+        end = int(annotation["al_stop"]) - display_start + 1
+    else:
+        # FASTA has flipped this sequence...
+        start = display_start - int(annotation["al_start"])
+        end = display_start - int(annotation["al_stop"]) + 1
+
+    end += align_stripped.count("-")
+    if start < 0 or start >= end or end > len(align_stripped):
+        raise ValueError(
+            "Problem with sequence start/stop,\n%s[%i:%i]\n%s"
+            % (alignment_seq_with_flanking, start, end, annotation)
+        )
+    return align_stripped[start:end]
+
+
+def FastaM10Iterator(handle, seq_count=None):
+    """Alignment iterator for the FASTA tool's pairwise alignment output.
+
+    This is for reading the pairwise alignments output by Bill Pearson's
+    FASTA program when called with the -m 10 command line option for machine
+    readable output.  For more details about the FASTA tools, see the website
+    http://fasta.bioch.virginia.edu/ and the paper:
+
+         W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448
+
+    This class is intended to be used via the Bio.AlignIO.parse() function
+    by specifying the format as "fasta-m10" as shown in the following code::
+
+        from Bio import AlignIO
+        handle = ...
+        for a in AlignIO.parse(handle, "fasta-m10"):
+            assert len(a) == 2, "Should be pairwise!"
+            print("Alignment length %i" % a.get_alignment_length())
+            for record in a:
+                print("%s %s %s" % (record.seq, record.name, record.id))
+
+    Note that this is not a full blown parser for all the information
+    in the FASTA output - for example, most of the header and all of the
+    footer is ignored.  Also, the alignments are not batched according to
+    the input queries.
+
+    Also note that there can be up to about 30 letters of flanking region
+    included in the raw FASTA output as contextual information.  This is NOT
+    part of the alignment itself, and is not included in the resulting
+    MultipleSeqAlignment objects returned.
+    """
+    state_PREAMBLE = -1
+    state_NONE = 0
+    state_QUERY_HEADER = 1
+    state_ALIGN_HEADER = 2
+    state_ALIGN_QUERY = 3
+    state_ALIGN_MATCH = 4
+    state_ALIGN_CONS = 5
+
+    def build_hsp():
+        if not query_tags and not match_tags:
+            raise ValueError("No data for query %r, match %r" % (query_id, match_id))
+        assert query_tags, query_tags
+        assert match_tags, match_tags
+        evalue = align_tags.get("fa_expect")
+        tool = global_tags.get("tool", "").upper()
+
+        q = _extract_alignment_region(query_seq, query_tags)
+        if tool in ["TFASTX"] and len(match_seq) == len(q):
+            m = match_seq
+            # Quick hack until I can work out how -, * and / characters
+            # and the apparent mix of aa and bp coordinates works.
+        else:
+            m = _extract_alignment_region(match_seq, match_tags)
+        if len(q) != len(m):
+            raise ValueError(
+                f"""\
+Darn... amino acids vs nucleotide coordinates?
+tool: {tool}
+query_seq: {query_seq}
+query_tags: {query_tags}
+{q} length: {len(q)}
+match_seq: {match_seq}
+match_tags: {match_tags}
+{m} length: {len(m)}
+handle.name: {handle.name}
+"""
+            )
+
+        annotations = {}
+        records = []
+
+        # Want to record both the query header tags, and the alignment tags.
+        annotations.update(header_tags)
+        annotations.update(align_tags)
+
+        # Query
+        # =====
+        record = SeqRecord(
+            Seq(q),
+            id=query_id,
+            name="query",
+            description=query_descr,
+            annotations={"original_length": int(query_tags["sq_len"])},
+        )
+        # TODO - handle start/end coordinates properly. Short term hack for now:
+        record._al_start = int(query_tags["al_start"])
+        record._al_stop = int(query_tags["al_stop"])
+
+        # TODO - Can FASTA output RNA?
+        if "sq_type" in query_tags:
+            if query_tags["sq_type"] == "D":
+                record.annotations["molecule_type"] = "DNA"
+            elif query_tags["sq_type"] == "p":
+                record.annotations["molecule_type"] = "protein"
+
+        records.append(record)
+
+        # Match
+        # =====
+        record = SeqRecord(
+            Seq(m),
+            id=match_id,
+            name="match",
+            description=match_descr,
+            annotations={"original_length": int(match_tags["sq_len"])},
+        )
+        # TODO - handle start/end coordinates properly. Short term hack for now:
+        record._al_start = int(match_tags["al_start"])
+        record._al_stop = int(match_tags["al_stop"])
+
+        if "sq_type" in match_tags:
+            if match_tags["sq_type"] == "D":
+                record.annotations["molecule_type"] = "DNA"
+            elif match_tags["sq_type"] == "p":
+                record.annotations["molecule_type"] = "protein"
+
+        records.append(record)
+
+        return MultipleSeqAlignment(records, annotations=annotations)
+
+    state = state_PREAMBLE
+    query_id = None
+    match_id = None
+    query_descr = ""
+    match_descr = ""
+    global_tags = {}
+    header_tags = {}
+    align_tags = {}
+    query_tags = {}
+    match_tags = {}
+    query_seq = ""
+    match_seq = ""
+    cons_seq = ""
+    for line in handle:
+        if ">>>" in line and not line.startswith(">>>"):
+            if query_id and match_id:
+                # This happens on old FASTA output which lacked an end of
+                # query >>><<< marker line.
+                yield build_hsp()
+            state = state_NONE
+            query_descr = line[line.find(">>>") + 3 :].strip()
+            query_id = query_descr.split(None, 1)[0]
+            match_id = None
+            header_tags = {}
+            align_tags = {}
+            query_tags = {}
+            match_tags = {}
+            query_seq = ""
+            match_seq = ""
+            cons_seq = ""
+        elif line.startswith("!! No "):
+            # e.g.
+            # !! No library sequences with E() < 0.5
+            # or on more recent versions,
+            # No sequences with E() < 0.05
+            assert state == state_NONE
+            assert not header_tags
+            assert not align_tags
+            assert not match_tags
+            assert not query_tags
+            assert match_id is None
+            assert not query_seq
+            assert not match_seq
+            assert not cons_seq
+            query_id = None
+        elif line.strip() in [">>><<<", ">>>///"]:
+            # End of query, possible end of all queries
+            if query_id and match_id:
+                yield build_hsp()
+            state = state_NONE
+            query_id = None
+            match_id = None
+            header_tags = {}
+            align_tags = {}
+            query_tags = {}
+            match_tags = {}
+            query_seq = ""
+            match_seq = ""
+            cons_seq = ""
+        elif line.startswith(">>>"):
+            # Should be start of a match!
+            assert query_id is not None
+            assert line[3:].split(", ", 1)[0] == query_id, line
+            assert match_id is None
+            assert not header_tags
+            assert not align_tags
+            assert not query_tags
+            assert not match_tags
+            assert not match_seq
+            assert not query_seq
+            assert not cons_seq
+            state = state_QUERY_HEADER
+        elif line.startswith(">>"):
+            # Should now be at start of a match alignment!
+            if query_id and match_id:
+                yield build_hsp()
+            align_tags = {}
+            query_tags = {}
+            match_tags = {}
+            query_seq = ""
+            match_seq = ""
+            cons_seq = ""
+            match_descr = line[2:].strip()
+            match_id = match_descr.split(None, 1)[0]
+            state = state_ALIGN_HEADER
+        elif line.startswith(">--"):
+            # End of one HSP
+            assert query_id and match_id, line
+            yield build_hsp()
+            # Clean up read for next HSP
+            # but reuse header_tags
+            align_tags = {}
+            query_tags = {}
+            match_tags = {}
+            query_seq = ""
+            match_seq = ""
+            cons_seq = ""
+            state = state_ALIGN_HEADER
+        elif line.startswith(">"):
+            if state == state_ALIGN_HEADER:
+                # Should be start of query alignment seq...
+                assert query_id is not None, line
+                assert match_id is not None, line
+                assert query_id.startswith(line[1:].split(None, 1)[0]), line
+                state = state_ALIGN_QUERY
+            elif state == state_ALIGN_QUERY:
+                # Should be start of match alignment seq
+                assert query_id is not None, line
+                assert match_id is not None, line
+                assert match_id.startswith(line[1:].split(None, 1)[0]), line
+                state = state_ALIGN_MATCH
+            elif state == state_NONE:
+                # Can get > as the last line of a histogram
+                pass
+            else:
+                raise RuntimeError("state %i got %r" % (state, line))
+        elif line.startswith("; al_cons"):
+            assert state == state_ALIGN_MATCH, line
+            state = state_ALIGN_CONS
+            # Next line(s) should be consensus seq...
+        elif line.startswith("; "):
+            if ": " in line:
+                key, value = [s.strip() for s in line[2:].split(": ", 1)]
+            else:
+                import warnings
+                from Bio import BiopythonParserWarning
+
+                # Seen in lalign36, specifically version 36.3.4 Apr, 2011
+                # Fixed in version 36.3.5b Oct, 2011(preload8)
+                warnings.warn(
+                    "Missing colon in line: %r" % line, BiopythonParserWarning
+                )
+                try:
+                    key, value = [s.strip() for s in line[2:].split(" ", 1)]
+                except ValueError:
+                    raise ValueError("Bad line: %r" % line) from None
+            if state == state_QUERY_HEADER:
+                header_tags[key] = value
+            elif state == state_ALIGN_HEADER:
+                align_tags[key] = value
+            elif state == state_ALIGN_QUERY:
+                query_tags[key] = value
+            elif state == state_ALIGN_MATCH:
+                match_tags[key] = value
+            else:
+                raise RuntimeError("Unexpected state %r, %r" % (state, line))
+        elif state == state_ALIGN_QUERY:
+            query_seq += line.strip()
+        elif state == state_ALIGN_MATCH:
+            match_seq += line.strip()
+        elif state == state_ALIGN_CONS:
+            cons_seq += line.strip("\n")
+        elif state == state_PREAMBLE:
+            if line.startswith("#"):
+                global_tags["command"] = line[1:].strip()
+            elif line.startswith(" version "):
+                global_tags["version"] = line[9:].strip()
+            elif " compares a " in line:
+                global_tags["tool"] = line[: line.find(" compares a ")].strip()
+            elif " searches a " in line:
+                global_tags["tool"] = line[: line.find(" searches a ")].strip()
+        else:
+            pass
diff --git a/code/lib/Bio/AlignIO/Interfaces.py b/code/lib/Bio/AlignIO/Interfaces.py
new file mode 100644
index 0000000..b53de30
--- /dev/null
+++ b/code/lib/Bio/AlignIO/Interfaces.py
@@ -0,0 +1,160 @@
+# Copyright 2008-2018 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""AlignIO support module (not for general use).
+
+Unless you are writing a new parser or writer for Bio.AlignIO, you should not
+use this module.  It provides base classes to try and simplify things.
+"""
+
+
+class AlignmentIterator:
+    """Base class for building MultipleSeqAlignment iterators.
+
+    You should write a next() method to return Alignment
+    objects.  You may wish to redefine the __init__
+    method as well.
+    """
+
+    def __init__(self, handle, seq_count=None):
+        """Create an AlignmentIterator object.
+
+        Arguments:
+         - handle   - input file
+         - count    - optional, expected number of records per alignment
+           Recommend for fasta file format.
+
+        Note when subclassing:
+         - there should be a single non-optional argument, the handle,
+           and optional count IN THAT ORDER.
+         - you can add additional optional arguments.
+
+        """
+        self.handle = handle
+        self.records_per_alignment = seq_count
+        #####################################################
+        # You may want to subclass this, for example        #
+        # to read through the file to find the first record,#
+        # or if additional arguments are required.          #
+        #####################################################
+
+    def __next__(self):
+        """Return the next alignment in the file.
+
+        This method should be replaced by any derived class to do something
+        useful.
+        """
+        raise NotImplementedError("This object should be subclassed")
+        #####################################################
+        # You SHOULD subclass this, to split the file up    #
+        # into your individual alignments and convert these #
+        # into MultipleSeqAlignment objects.                #
+        #####################################################
+
+    def __iter__(self):
+        """Iterate over the entries as MultipleSeqAlignment objects.
+
+        Example usage for (concatenated) PHYLIP files::
+
+            with open("many.phy","r") as myFile:
+                for alignment in PhylipIterator(myFile):
+                    print("New alignment:")
+                    for record in alignment:
+                        print(record.id)
+                        print(record.seq)
+
+        """
+        return iter(self.__next__, None)
+
+
+class AlignmentWriter:
+    """Base class for building MultipleSeqAlignment writers.
+
+    You should write a write_alignment() method.
+    You may wish to redefine the __init__ method as well.
+    """
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self.handle = handle
+
+    def write_file(self, alignments):
+        """Use this to write an entire file containing the given alignments.
+
+        Arguments:
+         - alignments - A list or iterator returning MultipleSeqAlignment objects
+
+        In general, this method can only be called once per file.
+
+        This method should be replaced by any derived class to do something
+        useful.  It should return the number of alignments..
+        """
+        raise NotImplementedError("This object should be subclassed")
+        #####################################################
+        # You SHOULD subclass this, to write the alignment  #
+        # objects to the file handle                        #
+        #####################################################
+
+    def clean(self, text):
+        """Use this to avoid getting newlines in the output."""
+        return text.replace("\n", " ").replace("\r", " ")
+
+
+class SequentialAlignmentWriter(AlignmentWriter):
+    """Base class for building MultipleSeqAlignment writers.
+
+    This assumes each alignment can be simply appended to the file.
+    You should write a write_alignment() method.
+    You may wish to redefine the __init__ method as well.
+    """
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self.handle = handle
+
+    def write_file(self, alignments):
+        """Use this to write an entire file containing the given alignments.
+
+        Arguments:
+         - alignments - A list or iterator returning MultipleSeqAlignment objects
+
+        In general, this method can only be called once per file.
+        """
+        self.write_header()
+        count = 0
+        for alignment in alignments:
+            self.write_alignment(alignment)
+            count += 1
+        self.write_footer()
+        return count
+
+    def write_header(self):
+        """Use this to write any header.
+
+        This method should be replaced by any derived class to do something
+        useful.
+        """
+        pass
+
+    def write_footer(self):
+        """Use this to write any footer.
+
+        This method should be replaced by any derived class to do something
+        useful.
+        """
+        pass
+
+    def write_alignment(self, alignment):
+        """Use this to write a single alignment.
+
+        This method should be replaced by any derived class to do something
+        useful.
+        """
+        raise NotImplementedError("This object should be subclassed")
+        #####################################################
+        # You SHOULD subclass this, to write the alignment  #
+        # objects to the file handle                        #
+        #####################################################
diff --git a/code/lib/Bio/AlignIO/MafIO.py b/code/lib/Bio/AlignIO/MafIO.py
new file mode 100644
index 0000000..787325e
--- /dev/null
+++ b/code/lib/Bio/AlignIO/MafIO.py
@@ -0,0 +1,833 @@
+# Copyright 2011, 2012 by Andrew Sczesnak.  All rights reserved.
+# Revisions Copyright 2011, 2017 by Peter Cock.  All rights reserved.
+# Revisions Copyright 2014, 2015 by Adam Novak.  All rights reserved.
+# Revisions Copyright 2015, 2017 by Blaise Li.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.AlignIO support for the "maf" multiple alignment format.
+
+The Multiple Alignment Format, described by UCSC, stores a series of
+multiple alignments in a single file. It is suitable for whole-genome
+to whole-genome alignments, metadata such as source chromosome, start
+position, size, and strand can be stored.
+
+See http://genome.ucsc.edu/FAQ/FAQformat.html#format5
+
+You are expected to use this module via the Bio.AlignIO functions(or the
+Bio.SeqIO functions if you want to work directly with the gapped sequences).
+
+Coordinates in the MAF format are defined in terms of zero-based start
+positions (like Python) and aligning region sizes.
+
+A minimal aligned region of length one and starting at first position in the
+source sequence would have ``start == 0`` and ``size == 1``.
+
+As we can see on this example, ``start + size`` will give one more than the
+zero-based end position. We can therefore manipulate ``start`` and
+``start + size`` as python list slice boundaries.
+
+For an inclusive end coordinate, we need to use ``end = start + size - 1``.
+A 1-column wide alignment would have ``start == end``.
+"""
+import os
+
+from itertools import islice
+from sqlite3 import dbapi2
+
+from Bio.Align import MultipleSeqAlignment
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import SequentialAlignmentWriter
+
+MAFINDEX_VERSION = 2
+
+
+class MafWriter(SequentialAlignmentWriter):
+    """Accepts a MultipleSeqAlignment object, writes a MAF file."""
+
+    def write_header(self):
+        """Write the MAF header."""
+        self.handle.write("##maf version=1 scoring=none\n")
+        self.handle.write("# generated by Biopython\n\n")
+
+    def _write_record(self, record):
+        """Write a single SeqRecord object to an 's' line in a MAF block (PRIVATE)."""
+        # convert biopython-style 1/-1 strand to MAF-style +/- strand
+        if record.annotations.get("strand") == 1:
+            strand = "+"
+        elif record.annotations.get("strand") == -1:
+            strand = "-"
+        else:
+            # TODO: issue warning?
+            strand = "+"
+
+        fields = [
+            "s",
+            # In the MAF file format, spaces are not allowed in the id
+            "%-40s" % record.id.replace(" ", "_"),
+            "%15s" % record.annotations.get("start", 0),
+            "%5s"
+            % record.annotations.get("size", len(str(record.seq).replace("-", ""))),
+            strand,
+            "%15s" % record.annotations.get("srcSize", 0),
+            str(record.seq),
+        ]
+        self.handle.write("%s\n" % " ".join(fields))
+
+    def write_alignment(self, alignment):
+        """Write a complete alignment to a MAF block.
+
+        Writes every SeqRecord in a MultipleSeqAlignment object to its own
+        MAF block (beginning with an 'a' line, containing 's' lines).
+        """
+        if not isinstance(alignment, MultipleSeqAlignment):
+            raise TypeError("Expected an alignment object")
+
+        if len({len(x) for x in alignment}) > 1:
+            raise ValueError("Sequences must all be the same length")
+
+        # We allow multiple sequences with the same IDs; for example, there may
+        # be a MAF aligning the + and - strands of the same sequence together.
+
+        # for now, use ._annotations private property, but restrict keys to those
+        # specifically supported by the MAF format, according to spec
+        try:
+            anno = " ".join(
+                [
+                    "%s=%s" % (x, y)
+                    for x, y in alignment._annotations.items()
+                    if x in ("score", "pass")
+                ]
+            )
+        except AttributeError:
+            anno = "score=0.00"
+
+        self.handle.write("a %s\n" % (anno,))
+
+        recs_out = 0
+
+        for record in alignment:
+            self._write_record(record)
+
+            recs_out += 1
+
+        self.handle.write("\n")
+
+        return recs_out
+
+
+# Invalid function name according to pylint, but kept for compatibility
+# with Bio* conventions.
+def MafIterator(handle, seq_count=None):
+    """Iterate over a MAF file handle as MultipleSeqAlignment objects.
+
+    Iterates over lines in a MAF file-like object (handle), yielding
+    MultipleSeqAlignment objects. SeqRecord IDs generally correspond to
+    species names.
+    """
+    in_a_bundle = False
+
+    annotations = []
+    records = []
+
+    while True:
+        # allows parsing of the last bundle without duplicating code
+        try:
+            line = next(handle)
+        except StopIteration:
+            line = ""
+
+        if in_a_bundle:
+            if line.startswith("s"):
+                # add a SeqRecord to the bundle
+                line_split = line.strip().split()
+
+                if len(line_split) != 7:
+                    raise ValueError(
+                        "Error parsing alignment - 's' line must have 7 fields"
+                    )
+
+                # convert MAF-style +/- strand to biopython-type 1/-1
+                if line_split[4] == "+":
+                    strand = 1
+                elif line_split[4] == "-":
+                    strand = -1
+                else:
+                    # TODO: issue warning, set to 0?
+                    strand = 1
+
+                # s (literal), src (ID), start, size, strand, srcSize, text (sequence)
+                anno = {
+                    "start": int(line_split[2]),
+                    "size": int(line_split[3]),
+                    "strand": strand,
+                    "srcSize": int(line_split[5]),
+                }
+
+                sequence = line_split[6]
+
+                # interpret a dot/period to mean the same as the first sequence
+                if "." in sequence:
+                    if not records:
+                        raise ValueError(
+                            "Found dot/period in first sequence of alignment"
+                        )
+
+                    ref = records[0].seq
+                    new = []
+
+                    for (letter, ref_letter) in zip(sequence, ref):
+                        new.append(ref_letter if letter == "." else letter)
+
+                    sequence = "".join(new)
+
+                records.append(
+                    SeqRecord(
+                        Seq(sequence),
+                        id=line_split[1],
+                        name=line_split[1],
+                        description="",
+                        annotations=anno,
+                    )
+                )
+            elif line.startswith("i"):
+                # TODO: information about what is in the aligned species DNA before
+                # and after the immediately preceding "s" line
+                pass
+            elif line.startswith("e"):
+                # TODO: information about the size of the gap between the alignments
+                # that span the current block
+                pass
+            elif line.startswith("q"):
+                # TODO: quality of each aligned base for the species.
+                # Need to find documentation on this, looks like ASCII 0-9 or gap?
+                # Can then store in each SeqRecord's .letter_annotations dictionary,
+                # perhaps as the raw string or turned into integers / None for gap?
+                pass
+            elif line.startswith("#"):
+                # ignore comments
+                # (not sure whether comments
+                # are in the maf specification, though)
+                pass
+            elif not line.strip():
+                # end a bundle of records
+                if seq_count is not None:
+                    assert len(records) == seq_count
+
+                alignment = MultipleSeqAlignment(records)
+                # TODO - Introduce an annotated alignment class?
+                # See also Bio/AlignIO/FastaIO.py for same requirement.
+                # For now, store the annotation a new private property:
+                alignment._annotations = annotations
+
+                yield alignment
+
+                in_a_bundle = False
+
+                annotations = []
+                records = []
+            else:
+                raise ValueError(
+                    "Error parsing alignment - unexpected line:\n%s" % (line,)
+                )
+        elif line.startswith("a"):
+            # start a bundle of records
+            in_a_bundle = True
+            annot_strings = line.strip().split()[1:]
+            if len(annot_strings) != line.count("="):
+                raise ValueError("Error parsing alignment - invalid key in 'a' line")
+            annotations = dict(a_string.split("=") for a_string in annot_strings)
+        elif line.startswith("#"):
+            # ignore comments
+            pass
+        elif not line:
+            break
+
+
+class MafIndex:
+    """Index for a MAF file.
+
+    The index is a sqlite3 database that is built upon creation of the object
+    if necessary, and queried when methods *search* or *get_spliced* are
+    used.
+    """
+
+    def __init__(self, sqlite_file, maf_file, target_seqname):
+        """Indexes or loads the index of a MAF file."""
+        self._target_seqname = target_seqname
+        # example: Tests/MAF/ucsc_mm9_chr10.mafindex
+        self._index_filename = sqlite_file
+        # example: /home/bli/src/biopython/Tests/MAF
+        self._relative_path = os.path.abspath(os.path.dirname(sqlite_file))
+        # example: Tests/MAF/ucsc_mm9_chr10.maf
+        self._maf_file = maf_file
+
+        self._maf_fp = open(self._maf_file)
+
+        # if sqlite_file exists, use the existing db, otherwise index the file
+        if os.path.isfile(sqlite_file):
+            self._con = dbapi2.connect(sqlite_file)
+            self._record_count = self.__check_existing_db()
+        else:
+            self._con = dbapi2.connect(sqlite_file)
+            self._record_count = self.__make_new_index()
+
+        # lastly, setup a MafIterator pointing at the open maf_file
+        self._mafiter = MafIterator(self._maf_fp)
+
+    def __check_existing_db(self):
+        """Perform basic sanity checks upon loading an existing index (PRIVATE)."""
+        try:
+            idx_version = int(
+                self._con.execute(
+                    "SELECT value FROM meta_data WHERE key = 'version'"
+                ).fetchone()[0]
+            )
+            if idx_version != MAFINDEX_VERSION:
+                msg = "\n".join(
+                    [
+                        "Index version (%s) incompatible with this version "
+                        "of MafIndex" % idx_version,
+                        "You might erase the existing index %s "
+                        "for it to be rebuilt." % self._index_filename,
+                    ]
+                )
+                raise ValueError(msg)
+
+            filename = self._con.execute(
+                "SELECT value FROM meta_data WHERE key = 'filename'"
+            ).fetchone()[0]
+            # Compute absolute path of the original maf file
+            if os.path.isabs(filename):
+                # It was already stored as absolute
+                tmp_mafpath = filename
+            else:
+                # It should otherwise have been stored as relative to the index
+                # Would be stored with Unix / path separator, so convert
+                # it to the local OS path separator here:
+                tmp_mafpath = os.path.join(
+                    self._relative_path, filename.replace("/", os.path.sep)
+                )
+            if tmp_mafpath != os.path.abspath(self._maf_file):
+                # Original and given absolute paths differ.
+                raise ValueError(
+                    "Index uses a different file (%s != %s)"
+                    % (filename, self._maf_file)
+                )
+
+            db_target = self._con.execute(
+                "SELECT value FROM meta_data WHERE key = 'target_seqname'"
+            ).fetchone()[0]
+            if db_target != self._target_seqname:
+                raise ValueError(
+                    "Provided database indexed for %s, expected %s"
+                    % (db_target, self._target_seqname)
+                )
+
+            record_count = int(
+                self._con.execute(
+                    "SELECT value FROM meta_data WHERE key = 'record_count'"
+                ).fetchone()[0]
+            )
+            if record_count == -1:
+                raise ValueError("Unfinished/partial database provided")
+
+            records_found = int(
+                self._con.execute("SELECT COUNT(*) FROM offset_data").fetchone()[0]
+            )
+            if records_found != record_count:
+                raise ValueError(
+                    "Expected %s records, found %s.  Corrupt index?"
+                    % (record_count, records_found)
+                )
+
+            return records_found
+
+        except (dbapi2.OperationalError, dbapi2.DatabaseError) as err:
+            raise ValueError("Problem with SQLite database: %s" % err) from None
+
+    def __make_new_index(self):
+        """Read MAF file and generate SQLite index (PRIVATE)."""
+        # make the tables
+        self._con.execute("CREATE TABLE meta_data (key TEXT, value TEXT);")
+        self._con.execute(
+            "INSERT INTO meta_data (key, value) VALUES ('version', %s);"
+            % MAFINDEX_VERSION
+        )
+        self._con.execute(
+            "INSERT INTO meta_data (key, value) VALUES ('record_count', -1);"
+        )
+        self._con.execute(
+            "INSERT INTO meta_data (key, value) VALUES ('target_seqname', '%s');"
+            % (self._target_seqname,)
+        )
+        # Determine whether to store maf file as relative to the index or absolute
+        # See https://github.com/biopython/biopython/pull/381
+        if not os.path.isabs(self._maf_file) and not os.path.isabs(
+            self._index_filename
+        ):
+            # Since the user gave both maf file and index as relative paths,
+            # we will store the maf file relative to the index.
+            # Note for cross platform use (e.g. shared drive over SAMBA),
+            # convert any Windows slash into Unix style for rel paths.
+            # example: ucsc_mm9_chr10.maf
+            mafpath = os.path.relpath(self._maf_file, self._relative_path).replace(
+                os.path.sep, "/"
+            )
+        elif (
+            os.path.dirname(os.path.abspath(self._maf_file)) + os.path.sep
+        ).startswith(self._relative_path + os.path.sep):
+            # Since maf file is in same directory or sub directory,
+            # might as well make this into a relative path:
+            mafpath = os.path.relpath(self._maf_file, self._relative_path).replace(
+                os.path.sep, "/"
+            )
+        else:
+            # Default to storing as an absolute path
+            # example: /home/bli/src/biopython/Tests/MAF/ucsc_mm9_chr10.maf
+            mafpath = os.path.abspath(self._maf_file)
+        self._con.execute(
+            "INSERT INTO meta_data (key, value) VALUES ('filename', '%s');" % (mafpath,)
+        )
+        self._con.execute(
+            "CREATE TABLE offset_data (bin INTEGER, start INTEGER, end INTEGER, offset INTEGER);"
+        )
+
+        insert_count = 0
+
+        # iterate over the entire file and insert in batches
+        mafindex_func = self.__maf_indexer()
+
+        while True:
+            batch = list(islice(mafindex_func, 100))
+            if not batch:
+                break
+
+            # batch is made from self.__maf_indexer(),
+            # which yields zero-based "inclusive" start and end coordinates
+            self._con.executemany(
+                "INSERT INTO offset_data (bin, start, end, offset) VALUES (?,?,?,?);",
+                batch,
+            )
+            self._con.commit()
+            insert_count += len(batch)
+
+        # then make indexes on the relevant fields
+        self._con.execute("CREATE INDEX IF NOT EXISTS bin_index ON offset_data(bin);")
+        self._con.execute(
+            "CREATE INDEX IF NOT EXISTS start_index ON offset_data(start);"
+        )
+        self._con.execute("CREATE INDEX IF NOT EXISTS end_index ON offset_data(end);")
+
+        self._con.execute(
+            "UPDATE meta_data SET value = '%s' WHERE key = 'record_count'"
+            % (insert_count,)
+        )
+
+        self._con.commit()
+
+        return insert_count
+
+    def __maf_indexer(self):
+        """Return index information for each bundle (PRIVATE).
+
+        Yields index information for each bundle in the form of
+        (bin, start, end, offset) tuples where start and end are
+        0-based inclusive coordinates.
+        """
+        line = self._maf_fp.readline()
+
+        while line:
+            if line.startswith("a"):
+                # note the offset
+                offset = self._maf_fp.tell() - len(line)
+
+                # search the following lines for a match to target_seqname
+                while True:
+                    line = self._maf_fp.readline()
+
+                    if not line.strip() or line.startswith("a"):
+                        # Empty line or new alignment record
+                        raise ValueError(
+                            "Target for indexing (%s) not found in this bundle"
+                            % (self._target_seqname,)
+                        )
+                    elif line.startswith("s"):
+                        # s (literal), src (ID), start, size, strand, srcSize, text (sequence)
+                        line_split = line.strip().split()
+
+                        if line_split[1] == self._target_seqname:
+                            start = int(line_split[2])
+                            size = int(line_split[3])
+                            if size != len(line_split[6].replace("-", "")):
+                                raise ValueError(
+                                    "Invalid length for target coordinates "
+                                    "(expected %s, found %s)"
+                                    % (size, len(line_split[6].replace("-", "")))
+                                )
+
+                            # "inclusive" end position is start + length - 1
+                            end = start + size - 1
+
+                            # _ucscbin takes end-exclusive coordinates
+                            yield (self._ucscbin(start, end + 1), start, end, offset)
+
+                            break
+
+            line = self._maf_fp.readline()
+
+    # TODO: check coordinate correctness for the two bin-related static methods
+    @staticmethod
+    def _region2bin(start, end):
+        """Find bins that a region may belong to (PRIVATE).
+
+        Converts a region to a list of bins that it may belong to, including largest
+        and smallest bins.
+        """
+        bins = [0, 1]
+
+        bins.extend(range(1 + (start >> 26), 2 + ((end - 1) >> 26)))
+        bins.extend(range(9 + (start >> 23), 10 + ((end - 1) >> 23)))
+        bins.extend(range(73 + (start >> 20), 74 + ((end - 1) >> 20)))
+        bins.extend(range(585 + (start >> 17), 586 + ((end - 1) >> 17)))
+
+        return set(bins)
+
+    @staticmethod
+    def _ucscbin(start, end):
+        """Return the smallest bin a given region will fit into (PRIVATE).
+
+        Adapted from http://genomewiki.ucsc.edu/index.php/Bin_indexing_system
+        """
+        bin_offsets = [512 + 64 + 8 + 1, 64 + 8 + 1, 8 + 1, 1, 0]
+
+        _bin_first_shift = 17
+        _bin_next_shift = 3
+
+        start_bin = start
+        end_bin = end - 1
+
+        start_bin >>= _bin_first_shift
+        end_bin >>= _bin_first_shift
+
+        for bin_offset in bin_offsets:
+            if start_bin == end_bin:
+                return bin_offset + start_bin
+            start_bin >>= _bin_next_shift
+            end_bin >>= _bin_next_shift
+
+        return 0
+
+    def _get_record(self, offset):
+        """Retrieve a single MAF record located at the offset provided (PRIVATE)."""
+        self._maf_fp.seek(offset)
+        return next(self._mafiter)
+
+    def search(self, starts, ends):
+        """Search index database for MAF records overlapping ranges provided.
+
+        Returns *MultipleSeqAlignment* results in order by start, then end, then
+        internal offset field.
+
+        *starts* should be a list of 0-based start coordinates of segments in the reference.
+        *ends* should be the list of the corresponding segment ends
+        (in the half-open UCSC convention:
+        http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/).
+        """
+        # verify the provided exon coordinates
+        if len(starts) != len(ends):
+            raise ValueError("Every position in starts must have a match in ends")
+
+        # Could it be safer to sort the (exonstart, exonend) pairs?
+        for exonstart, exonend in zip(starts, ends):
+            exonlen = exonend - exonstart
+            if exonlen < 1:
+                raise ValueError(
+                    "Exon coordinates (%d, %d) invalid: exon length (%d) < 1"
+                    % (exonstart, exonend, exonlen)
+                )
+        con = self._con
+
+        # Keep track of what blocks have already been yielded
+        # in order to avoid duplicating them
+        # (see https://github.com/biopython/biopython/issues/1083)
+        yielded_rec_coords = set()
+        # search for every exon
+        for exonstart, exonend in zip(starts, ends):
+            try:
+                possible_bins = ", ".join(
+                    map(str, self._region2bin(exonstart, exonend))
+                )
+            except TypeError:
+                raise TypeError(
+                    "Exon coordinates must be integers "
+                    "(start=%d, end=%d)" % (exonstart, exonend)
+                ) from None
+
+            # https://www.sqlite.org/lang_expr.html
+            # -----
+            # The BETWEEN operator
+            #
+            # The BETWEEN operator is logically equivalent to a pair of
+            # comparisons. "x BETWEEN y AND z" is equivalent to "x>=y AND x<=z"
+            # except that with BETWEEN, the x expression is only evaluated
+            # once. The precedence of the BETWEEN operator is the same as the
+            # precedence as operators == and != and LIKE and groups left to
+            # right.
+            # -----
+
+            # We are testing overlap between the query segment and records in
+            # the index, using non-strict coordinates comparisons.
+            # The query segment end must be passed as end-inclusive
+            # The index should also have been build with end-inclusive
+            # end coordinates.
+            # See https://github.com/biopython/biopython/pull/1086#issuecomment-285069073
+
+            result = con.execute(
+                "SELECT DISTINCT start, end, offset FROM offset_data "
+                "WHERE bin IN (%s) "
+                "AND (end BETWEEN %s AND %s OR %s BETWEEN start AND end) "
+                "ORDER BY start, end, offset ASC;"
+                % (possible_bins, exonstart, exonend - 1, exonend - 1)
+            )
+
+            rows = result.fetchall()
+
+            # rows come from the sqlite index,
+            # which should have been written using __make_new_index,
+            # so rec_start and rec_end should be zero-based "inclusive" coordinates
+            for rec_start, rec_end, offset in rows:
+                # Avoid yielding multiple time the same block
+                if (rec_start, rec_end) in yielded_rec_coords:
+                    continue
+                else:
+                    yielded_rec_coords.add((rec_start, rec_end))
+                # Iterate through hits, fetching alignments from the MAF file
+                # and checking to be sure we've retrieved the expected record.
+
+                fetched = self._get_record(int(offset))
+
+                for record in fetched:
+                    if record.id == self._target_seqname:
+                        # start and size come from the maf lines
+                        start = record.annotations["start"]
+                        # "inclusive" end is start + length - 1
+                        end = start + record.annotations["size"] - 1
+
+                        if not (start == rec_start and end == rec_end):
+                            raise ValueError(
+                                "Expected %s-%s @ offset %s, found %s-%s"
+                                % (rec_start, rec_end, offset, start, end)
+                            )
+
+                yield fetched
+
+    def get_spliced(self, starts, ends, strand=1):
+        """Return a multiple alignment of the exact sequence range provided.
+
+        Accepts two lists of start and end positions on target_seqname, representing
+        exons to be spliced in silico.  Returns a *MultipleSeqAlignment* of the
+        desired sequences spliced together.
+
+        *starts* should be a list of 0-based start coordinates of segments in the reference.
+        *ends* should be the list of the corresponding segment ends
+        (in the half-open UCSC convention:
+        http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/).
+
+        To ask for the alignment portion corresponding to the first 100
+        nucleotides of the reference sequence, you would use
+        ``search([0], [100])``
+        """
+        # validate strand
+        if strand not in (1, -1):
+            raise ValueError("Strand must be 1 or -1, got %s" % strand)
+
+        # pull all alignments that span the desired intervals
+        fetched = list(self.search(starts, ends))
+
+        # keep track of the expected letter count
+        # (sum of lengths of [start, end) segments,
+        # where [start, end) half-open)
+        expected_letters = sum(end - start for start, end in zip(starts, ends))
+
+        # if there's no alignment, return filler for the assembly of the length given
+        if len(fetched) == 0:
+            return MultipleSeqAlignment(
+                [SeqRecord(Seq("N" * expected_letters), id=self._target_seqname)]
+            )
+
+        # find the union of all IDs in these alignments
+        all_seqnames = {sequence.id for multiseq in fetched for sequence in multiseq}
+
+        # split every record by base position
+        # key: sequence name
+        # value: dictionary
+        #        key: position in the reference sequence
+        #        value: letter(s) (including letters
+        #               aligned to the "-" preceding the letter
+        #               at the position in the reference, if any)
+        split_by_position = {seq_name: {} for seq_name in all_seqnames}
+
+        # keep track of what the total number of (unspliced) letters should be
+        total_rec_length = 0
+
+        # track first strand encountered on the target seqname
+        ref_first_strand = None
+
+        for multiseq in fetched:
+            # find the target_seqname in this MultipleSeqAlignment and use it to
+            # set the parameters for the rest of this iteration
+            for seqrec in multiseq:
+                if seqrec.id == self._target_seqname:
+                    try:
+                        if ref_first_strand is None:
+                            ref_first_strand = seqrec.annotations["strand"]
+
+                            if ref_first_strand not in (1, -1):
+                                raise ValueError("Strand must be 1 or -1")
+                        elif ref_first_strand != seqrec.annotations["strand"]:
+                            raise ValueError(
+                                "Encountered strand='%s' on target seqname, "
+                                "expected '%s'"
+                                % (seqrec.annotations["strand"], ref_first_strand)
+                            )
+                    except KeyError:
+                        raise ValueError(
+                            "No strand information for target seqname (%s)"
+                            % self._target_seqname
+                        ) from None
+                    # length including gaps (i.e. alignment length)
+                    rec_length = len(seqrec)
+                    rec_start = seqrec.annotations["start"]
+                    ungapped_length = seqrec.annotations["size"]
+                    # inclusive end in zero-based coordinates of the reference
+                    rec_end = rec_start + ungapped_length - 1
+                    # This is length in terms of actual letters in the reference
+                    total_rec_length += ungapped_length
+
+                    # blank out these positions for every seqname
+                    for seqrec in multiseq:
+                        for pos in range(rec_start, rec_end + 1):
+                            split_by_position[seqrec.id][pos] = ""
+
+                    break
+            # http://psung.blogspot.fr/2007/12/for-else-in-python.html
+            # https://docs.python.org/2/tutorial/controlflow.html#break-and-continue-statements-and-else-clauses-on-loops
+            else:
+                raise ValueError(
+                    "Did not find %s in alignment bundle" % (self._target_seqname,)
+                )
+
+            # the true, chromosome/contig/etc position in the target seqname
+            real_pos = rec_start
+
+            # loop over the alignment to fill split_by_position
+            for gapped_pos in range(0, rec_length):
+                for seqrec in multiseq:
+                    # keep track of this position's value for the target seqname
+                    if seqrec.id == self._target_seqname:
+                        track_val = seqrec.seq[gapped_pos]
+
+                    # Here, a real_pos that corresponds to just after a series of "-"
+                    # in the reference will "accumulate" the letters found in other sequences
+                    # in front of the "-"s
+                    split_by_position[seqrec.id][real_pos] += seqrec.seq[gapped_pos]
+
+                # increment the real_pos counter only when non-gaps are found in
+                # the target_seqname, and we haven't reached the end of the record
+                if track_val != "-" and real_pos < rec_end:
+                    real_pos += 1
+
+        # make sure the number of bp entries equals the sum of the record lengths
+        if len(split_by_position[self._target_seqname]) != total_rec_length:
+            raise ValueError(
+                "Target seqname (%s) has %s records, expected %s"
+                % (
+                    self._target_seqname,
+                    len(split_by_position[self._target_seqname]),
+                    total_rec_length,
+                )
+            )
+
+        # translates a position in the target_seqname sequence to its gapped length
+        realpos_to_len = {
+            pos: len(gapped_fragment)
+            for pos, gapped_fragment in split_by_position[self._target_seqname].items()
+            if len(gapped_fragment) > 1
+        }
+
+        # splice together the exons
+        subseq = {}
+
+        for seqid in all_seqnames:
+            seq_split = split_by_position[seqid]
+            seq_splice = []
+
+            filler_char = "N" if seqid == self._target_seqname else "-"
+
+            # iterate from start to end, taking bases from split_by_position when
+            # they exist, using N or - for gaps when there is no alignment.
+            append = seq_splice.append
+
+            for exonstart, exonend in zip(starts, ends):
+                # exonend is exclusive
+                for real_pos in range(exonstart, exonend):
+                    # if this seqname has this position, add it
+                    if real_pos in seq_split:
+                        append(seq_split[real_pos])
+                    # if not, but it's in the target_seqname, add length-matched filler
+                    elif real_pos in realpos_to_len:
+                        append(filler_char * realpos_to_len[real_pos])
+                    # it's not in either, so add a single filler character
+                    else:
+                        append(filler_char)
+
+            subseq[seqid] = "".join(seq_splice)
+
+        # make sure we're returning the right number of letters
+        if len(subseq[self._target_seqname].replace("-", "")) != expected_letters:
+            raise ValueError(
+                "Returning %s letters for target seqname (%s), expected %s"
+                % (
+                    len(subseq[self._target_seqname].replace("-", "")),
+                    self._target_seqname,
+                    expected_letters,
+                )
+            )
+
+        # check to make sure all sequences are the same length as the target seqname
+        ref_subseq_len = len(subseq[self._target_seqname])
+
+        for seqid, seq in subseq.items():
+            if len(seq) != ref_subseq_len:
+                raise ValueError(
+                    "Returning length %s for %s, expected %s"
+                    % (len(seq), seqid, ref_subseq_len)
+                )
+
+        # finally, build a MultipleSeqAlignment object for our final sequences
+        result_multiseq = []
+
+        for seqid, seq in subseq.items():
+            seq = Seq(seq)
+
+            seq = seq if strand == ref_first_strand else seq.reverse_complement()
+
+            result_multiseq.append(SeqRecord(seq, id=seqid, name=seqid, description=""))
+
+        return MultipleSeqAlignment(result_multiseq)
+
+    def __repr__(self):
+        """Return a string representation of the index."""
+        return "MafIO.MafIndex(%r, target_seqname=%r)" % (
+            self._maf_fp.name,
+            self._target_seqname,
+        )
+
+    def __len__(self):
+        """Return the number of records in the index."""
+        return self._record_count
diff --git a/code/lib/Bio/AlignIO/MauveIO.py b/code/lib/Bio/AlignIO/MauveIO.py
new file mode 100644
index 0000000..b5f597c
--- /dev/null
+++ b/code/lib/Bio/AlignIO/MauveIO.py
@@ -0,0 +1,349 @@
+# Copyright 2015-2015 by Eric Rasche.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.AlignIO support for "xmfa" output from Mauve/ProgressiveMauve.
+
+You are expected to use this module via the Bio.AlignIO functions (or the
+Bio.SeqIO functions if you want to work directly with the gapped sequences).
+
+For example, consider a progressiveMauve alignment file containing the following::
+
+    #FormatVersion Mauve1
+    #Sequence1File	a.fa
+    #Sequence1Entry	1
+    #Sequence1Format	FastA
+    #Sequence2File	b.fa
+    #Sequence2Entry	2
+    #Sequence2Format	FastA
+    #Sequence3File	c.fa
+    #Sequence3Entry	3
+    #Sequence3Format	FastA
+    #BackboneFile	three.xmfa.bbcols
+    > 1:0-0 + a.fa
+    --------------------------------------------------------------------------------
+    --------------------------------------------------------------------------------
+    --------------------------------------------------------------------------------
+    > 2:5417-5968 + b.fa
+    TTTAAACATCCCTCGGCCCGTCGCCCTTTTATAATAGCAGTACGTGAGAGGAGCGCCCTAAGCTTTGGGAAATTCAAGC-
+    --------------------------------------------------------------------------------
+    CTGGAACGTACTTGCTGGTTTCGCTACTATTTCAAACAAGTTAGAGGCCGTTACCTCGGGCGAACGTATAAACCATTCTG
+    > 3:9476-10076 - c.fa
+    TTTAAACACCTTTTTGGATG--GCCCAGTTCGTTCAGTTGTG-GGGAGGAGATCGCCCCAAACGTATGGTGAGTCGGGCG
+    TTTCCTATAGCTATAGGACCAATCCACTTACCATACGCCCGGCGTCGCCCAGTCCGGTTCGGTACCCTCCATGACCCACG
+    ---------------------------------------------------------AAATGAGGGCCCAGGGTATGCTT
+    =
+    > 2:5969-6015 + b.fa
+    -----------------------
+    GGGCGAACGTATAAACCATTCTG
+    > 3:9429-9476 - c.fa
+    TTCGGTACCCTCCATGACCCACG
+    AAATGAGGGCCCAGGGTATGCTT
+
+This is a multiple sequence alignment with multiple aligned sections, so you
+would probably load this using the Bio.AlignIO.parse() function:
+
+    >>> from Bio import AlignIO
+    >>> align = AlignIO.parse("Mauve/simple_short.xmfa", "mauve")
+    >>> alignments = list(align)
+    >>> for aln in alignments:
+    ...     print(aln)
+    ...
+    Alignment with 3 rows and 240 columns
+    --------------------------------------------...--- a.fa
+    TTTAAACATCCCTCGGCCCGTCGCCCTTTTATAATAGCAGTACG...CTG b.fa/5416-5968
+    TTTAAACACCTTTTTGGATG--GCCCAGTTCGTTCAGTTGTG-G...CTT c.fa/9475-10076
+    Alignment with 2 rows and 46 columns
+    -----------------------GGGCGAACGTATAAACCATTCTG b.fa/5968-6015
+    TTCGGTACCCTCCATGACCCACGAAATGAGGGCCCAGGGTATGCTT c.fa/9428-9476
+
+Additional information is extracted from the XMFA file and available through
+the annotation attribute of each record::
+
+    >>> for record in alignments[0]:
+    ...     print(record.id, len(record))
+    ...     print("  start: %d, end: %d, strand: %d" %(
+    ...         record.annotations['start'], record.annotations['end'],
+    ...         record.annotations['strand']))
+    ...
+    a.fa 240
+      start: 0, end: 0, strand: 1
+    b.fa/5416-5968 240
+      start: 5416, end: 5968, strand: 1
+    c.fa/9475-10076 240
+      start: 9475, end: 10076, strand: -1
+
+"""
+import re
+
+from Bio.Align import MultipleSeqAlignment
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import AlignmentIterator
+from .Interfaces import SequentialAlignmentWriter
+
+
+XMFA_HEADER_REGEX = re.compile(
+    r"> (?P<id>\d+):(?P<start>\d+)-(?P<end>\d+) (?P<strand>[+-]) (?P<name>.*)"
+)
+XMFA_HEADER_REGEX_BIOPYTHON = re.compile(
+    r"> (?P<id>\d+):(?P<start>\d+)-(?P<end>\d+) (?P<strand>[+-]) (?P<name>[^#]*) # (?P<realname>.*)"
+)
+ID_LINE_FMT = "> {seq_name}:{start}-{end} {strand} {filename} # {ugly_hack}"
+
+
+def _identifier_split(identifier):
+    """Return (name, start, end) string tuple from an identifier (PRIVATE)."""
+    id, loc, strand = identifier.split(":")
+    start, end = map(int, loc.split("-"))
+    start -= 1
+    return id, start, end, strand
+
+
+class MauveWriter(SequentialAlignmentWriter):
+    """Mauve/XMFA alignment writer."""
+
+    def __init__(self, *args, **kwargs):
+        """Initialize the class."""
+        super().__init__(*args, **kwargs)
+        self._wrote_header = False
+        self._wrote_first = False
+
+    def write_alignment(self, alignment):
+        """Use this to write (another) single alignment to an open file.
+
+        Note that sequences and their annotation are recorded
+        together (rather than having a block of annotation followed
+        by a block of aligned sequences).
+        """
+        count = len(alignment)
+
+        self._length_of_sequences = alignment.get_alignment_length()
+
+        # NOTE - For now, the alignment object does not hold any per column
+        # or per alignment annotation - only per sequence.
+
+        if count == 0:
+            raise ValueError("Must have at least one sequence")
+        if self._length_of_sequences == 0:
+            raise ValueError("Non-empty sequences are required")
+
+        if not self._wrote_header:
+            self._wrote_header = True
+            self.handle.write("#FormatVersion Mauve1\n")
+            # There are some more headers, but we ignore those for now.
+            # Sequence1File	unknown.fa
+            # Sequence1Entry	1
+            # Sequence1Format	FastA
+            for i in range(1, count + 1):
+                self.handle.write("#Sequence%sEntry\t%s\n" % (i, i))
+
+        for idx, record in enumerate(alignment):
+            self._write_record(record, record_idx=idx)
+        self.handle.write("=\n")
+
+    def _write_record(self, record, record_idx=0):
+        """Write a single SeqRecord to the file (PRIVATE)."""
+        if self._length_of_sequences != len(record.seq):
+            raise ValueError("Sequences must all be the same length")
+
+        seq_name = record.name
+        try:
+            seq_name = str(int(record.name))
+        except ValueError:
+            seq_name = str(record_idx + 1)
+
+        # We remove the "/{start}-{end}" before writing, as it cannot be part
+        # of the produced XMFA file.
+        if "start" in record.annotations and "end" in record.annotations:
+            suffix0 = "/%s-%s" % (
+                record.annotations["start"],
+                record.annotations["end"],
+            )
+            suffix1 = "/%s-%s" % (
+                record.annotations["start"] + 1,
+                record.annotations["end"],
+            )
+            if seq_name[-len(suffix0) :] == suffix0:
+                seq_name = seq_name[: -len(suffix0)]
+            if seq_name[-len(suffix1) :] == suffix1:
+                seq_name = seq_name[: -len(suffix1)]
+
+        if (
+            "start" in record.annotations
+            and "end" in record.annotations
+            and "strand" in record.annotations
+        ):
+            id_line = ID_LINE_FMT.format(
+                seq_name=seq_name,
+                start=record.annotations["start"] + 1,
+                end=record.annotations["end"],
+                strand=("+" if record.annotations["strand"] == 1 else "-"),
+                filename=record.name + ".fa",
+                ugly_hack=record.id,
+            )
+            lacking_annotations = False
+        else:
+            id_line = ID_LINE_FMT.format(
+                seq_name=seq_name,
+                start=0,
+                end=0,
+                strand="+",
+                filename=record.name + ".fa",
+                ugly_hack=record.id,
+            )
+            lacking_annotations = True
+
+        # If the sequence is an empty one, skip writing it out
+        if (":0-0 " in id_line or ":1-0 " in id_line) and not lacking_annotations:
+            # Except in the first LCB
+            if not self._wrote_first:
+                self._wrote_first = True
+                # The first LCB we write out is special, and must list ALL
+                # sequences, for the Mauve GUI
+                # http://darlinglab.org/mauve/user-guide/files.html#non-standard-xmfa-formatting-used-by-the-mauve-gui
+                id_line = ID_LINE_FMT.format(
+                    seq_name=seq_name,
+                    start=0,
+                    end=0,
+                    strand="+",
+                    filename=record.name + ".fa",
+                    ugly_hack=record.id,
+                )
+                id_line = id_line.replace("\n", " ").replace("\r", " ")
+                self.handle.write(id_line + "\n\n")
+            # Alignments lacking a start/stop/strand were generated by
+            # Biopython on load, and shouldn't exist according to XMFA
+        else:
+            # In other blocks, we only write sequences if they exist in a given
+            # alignment.
+            id_line = id_line.replace("\n", " ").replace("\r", " ")
+            self.handle.write(id_line + "\n")
+            for i in range(0, len(record.seq), 80):
+                self.handle.write("%s\n" % record.seq[i : i + 80])
+
+
+class MauveIterator(AlignmentIterator):
+    """Mauve xmfa alignment iterator."""
+
+    _ids = []  # for caching IDs between __next__ calls
+
+    def __next__(self):
+        """Parse the next alignment from the handle."""
+        handle = self.handle
+        line = handle.readline()
+
+        if not line:
+            raise StopIteration
+
+        # Strip out header comments
+        while line and line.strip().startswith("#"):
+            line = handle.readline()
+
+        seqs = {}
+        seq_regions = {}
+        passed_end_alignment = False
+
+        latest_id = None
+        while True:
+            if not line:
+                break  # end of file
+            line = line.strip()
+
+            if line.startswith("="):
+                # There may be more data, but we've reached the end of this
+                # alignment
+                break
+            elif line.startswith(">"):
+                m = XMFA_HEADER_REGEX_BIOPYTHON.match(line)
+                if not m:
+                    m = XMFA_HEADER_REGEX.match(line)
+                    if not m:
+                        raise ValueError("Malformed header line: %s", line)
+
+                parsed_id = m.group("id")
+                parsed_data = {}
+                for key in ("start", "end", "id", "strand", "name", "realname"):
+                    try:
+                        value = m.group(key)
+                        if key == "start":
+                            value = int(value)
+                            # Convert to zero based counting
+                            if value > 0:
+                                value -= 1
+
+                        if key == "end":
+                            value = int(value)
+                        parsed_data[key] = value
+                    except IndexError:
+                        # This will occur if we're asking for a group that
+                        # doesn't exist. It's fine.
+                        pass
+                seq_regions[parsed_id] = parsed_data
+
+                if parsed_id not in self._ids:
+                    self._ids.append(parsed_id)
+
+                seqs.setdefault(parsed_id, "")
+                latest_id = parsed_id
+            else:
+                assert not passed_end_alignment
+                if latest_id is None:
+                    raise ValueError("Saw sequence before definition line")
+                seqs[latest_id] += line
+            line = handle.readline()
+
+        assert len(seqs) <= len(self._ids)
+
+        self.ids = self._ids
+        self.sequences = seqs
+
+        if self._ids and seqs:
+            alignment_length = max(map(len, list(seqs.values())))
+            records = []
+            for id in self._ids:
+                if id not in seqs or len(seqs[id]) == 0 or len(seqs[id]) == 0:
+                    seq = "-" * alignment_length
+                else:
+                    seq = seqs[id]
+
+                if alignment_length != len(seq):
+                    raise ValueError(
+                        "Sequences have different lengths, or repeated identifier"
+                    )
+
+                # Sometimes we don't see a particular sequence in the
+                # alignment, so we skip that record since it isn't present in
+                # that LCB/alignment
+                if id not in seq_regions:
+                    continue
+
+                if seq_regions[id]["start"] != 0 or seq_regions[id]["end"] != 0:
+                    suffix = "/{start}-{end}".format(**seq_regions[id])
+                    if "realname" in seq_regions[id]:
+                        corrected_id = seq_regions[id]["realname"]
+                    else:
+                        corrected_id = seq_regions[id]["name"]
+                    if corrected_id.count(suffix) == 0:
+                        corrected_id += suffix
+                else:
+                    if "realname" in seq_regions[id]:
+                        corrected_id = seq_regions[id]["realname"]
+                    else:
+                        corrected_id = seq_regions[id]["name"]
+
+                record = SeqRecord(Seq(seq), id=corrected_id, name=id)
+
+                record.annotations["start"] = seq_regions[id]["start"]
+                record.annotations["end"] = seq_regions[id]["end"]
+                record.annotations["strand"] = (
+                    1 if seq_regions[id]["strand"] == "+" else -1
+                )
+
+                records.append(record)
+            return MultipleSeqAlignment(records)
+        else:
+            raise StopIteration
diff --git a/code/lib/Bio/AlignIO/MsfIO.py b/code/lib/Bio/AlignIO/MsfIO.py
new file mode 100644
index 0000000..d620f1b
--- /dev/null
+++ b/code/lib/Bio/AlignIO/MsfIO.py
@@ -0,0 +1,331 @@
+# Copyright 2019, National Marrow Donor Program (NMPD).  All rights reserved.
+# Written by Peter Cock, The James Hutton Institute, under contract to NMDP.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.AlignIO support for GCG MSF format.
+
+The file format was produced by the GCG PileUp and and LocalPileUp tools,
+and later tools such as T-COFFEE and MUSCLE support it as an optional
+output format.
+
+The original GCG tool would write gaps at ends of each sequence which could
+be missing data as tildes (``~``), whereas internal gaps were periods (``.``)
+instead. This parser replaces both with minus signs (``-``) for consistency
+with the rest of ``Bio.AlignIO``.
+
+You are expected to use this module via the Bio.AlignIO functions (or the
+Bio.SeqIO functions if you want to work directly with the gapped sequences).
+"""
+from Bio.Align import MultipleSeqAlignment
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import AlignmentIterator
+
+
+class MsfIterator(AlignmentIterator):
+    """GCG MSF alignment iterator."""
+
+    _header = None  # for caching lines between __next__ calls
+
+    def __next__(self):
+        """Parse the next alignment from the handle."""
+        handle = self.handle
+
+        if self._header is None:
+            line = handle.readline()
+        else:
+            # Header we saved from when we were parsing
+            # the previous alignment.
+            line = self._header
+            self._header = None
+
+        if not line:
+            raise StopIteration
+
+        # Whitelisted headers we know about.
+        known_headers = ["!!NA_MULTIPLE_ALIGNMENT", "!!AA_MULTIPLE_ALIGNMENT", "PileUp"]
+        # Examples in "Molecular Biology Software Training Manual GCG version 10"
+        # by BBSRC Bioscuences IT Services (BITS), Harpenden, UK, Copyright 1996-2001
+        # would often start as follows:
+        #
+        # !!AA_MUTIPLE_ALIGNMENT 1.0
+        # PileUp of: @/usr/users2/culhane/...
+        #
+        # etc with other seemingly free format text before getting to the
+        # MSF/Type/Check line and the following Name: lines block and // line.
+        #
+        # MUSCLE just has a line "PileUp", while other sources just use the line
+        # "!!AA_MULTIPLE_ALIGNMENT" (amino acid) or "!!NA_MULTIPLE_ALIGNMENT"
+        # (nucleotide).
+        if line.strip().split()[0] not in known_headers:
+            raise ValueError(
+                "%s is not a known GCG MSF header: %s"
+                % (line.strip().split()[0], ", ".join(known_headers))
+            )
+
+        while line and " MSF: " not in line:
+            line = handle.readline()
+
+        if not line:
+            raise ValueError("Reached end of file without MSF/Type/Check header line")
+
+        # Quoting from "Molecular Biology Software Training Manual GCG version 10"
+        # by BBSRC Bioscuences IT Services (BITS), Harpenden, UK. Copyright 1996-2001.
+        # Page 31:
+        #
+        # "Header information is before a .. (double dot) in a GCG format file.
+        #  The file will also have a checksum specific for that file."
+        #
+        # This was followed by a single non-aligned sequence, but this convention
+        # appears to also be used in the GCG MSF files. Quoting other examples in
+        # this reference, page 31:
+        #
+        # localpileup_17.msf  MSF: 195  Type: P  January 6, 2000 15:41  Check: 4365 ..
+        #
+        # Except from page 148:
+        #
+        # localpileup_106.msf  MSF: 457  Type: P  November 28, 2000 16:09  Check: 2396 ..
+        #
+        # Quoting output from MUSCLE v3.8, have two leading spaces and a zero checksum:
+        #
+        #   MSF: 689  Type: N  Check: 0000  ..
+        #
+        # By observation, the MSF value is the column count, type is N (nucleotide)
+        # or P (protein / amino acid).
+        #
+        # In a possible bug, EMBOSS v6.6.0.0 uses CompCheck: rather than Check: as shown,
+        #
+        # $ seqret -sequence Tests/Fasta/f002 -auto -stdout -osformat msf
+        # !!NA_MULTIPLE_ALIGNMENT 1.0
+        #
+        #   stdout MSF: 633 Type: N 01/08/19 CompCheck: 8543 ..
+        #
+        #   Name: G26680     Len: 633  Check: 4334 Weight: 1.00
+        #   Name: G26685     Len: 633  Check: 3818 Weight: 1.00
+        #   Name: G29385     Len: 633  Check:  391 Weight: 1.00
+        #
+        # //
+        #
+        parts = line.strip("\n").split()
+        offset = parts.index("MSF:")
+        if (
+            parts[offset + 2] != "Type:"
+            or parts[-3] not in ("Check:", "CompCheck:")
+            or parts[-1] != ".."
+        ):
+            raise ValueError(
+                "GCG MSF header line should be "
+                "'<optional text> MSF: <int> Type: <letter> <optional date> Check: <int> ..', "
+                " not: %r" % line
+            )
+        try:
+            aln_length = int(parts[offset + 1])
+        except ValueError:
+            aln_length = -1
+        if aln_length < 0:
+            raise ValueError(
+                "GCG MSF header line should have MDF: <int> for column count, not %r"
+                % parts[offset + 1]
+            )
+        seq_type = parts[offset + 3]
+        if seq_type not in ["P", "N"]:
+            raise ValueError(
+                "GCG MSF header line should have 'Type: P' (protein) "
+                "or 'Type: N' (nucleotide), not 'Type: %s'" % seq_type
+            )
+
+        # There should be a blank line after that header line, then the Name: lines
+        #
+        # In a possible bug, T-COFFEE v12.00 adds 'oo' after the names, as shown here,
+        #
+        # PileUp
+        #
+        #
+        #
+        #    MSF:  628  Type: P    Check:   147   ..
+        #
+        #  Name: AK1H_ECOLI/1-378 oo  Len:  628  Check:  3643  Weight:  1.000
+        #  Name: AKH_HAEIN/1-382 oo  Len:  628  Check:  6504  Weight:  1.000
+        #
+        # //
+        ids = []
+        lengths = []
+        checks = []
+        weights = []
+        line = handle.readline()
+        while line and line.strip() != "//":
+            line = handle.readline()
+            if line.strip().startswith("Name: "):
+                if " Len: " in line and " Check: " in line and " Weight: " in line:
+                    rest = line[line.index("Name: ") + 6 :].strip()
+                    name, rest = rest.split(" Len: ")
+                    length, rest = rest.split(" Check: ")
+                    check, weight = rest.split(" Weight: ")
+                    name = name.strip()
+                    if name.endswith(" oo"):
+                        # T-COFFEE oddity, ignore this
+                        name = name[:-3]
+                    if name in ids:
+                        raise ValueError("Duplicated ID of %r" % name)
+                    if " " in name:
+                        raise NotImplementedError("Space in ID %r" % name)
+                    ids.append(name)
+                    # Expect aln_length <= int(length.strip()), see below
+                    lengths.append(int(length.strip()))
+                    checks.append(int(check.strip()))
+                    weights.append(float(weight.strip()))
+                else:
+                    raise ValueError("Malformed GCG MSF name line: %r" % line)
+        if not line:
+            raise ValueError("End of file while looking for end of header // line.")
+
+        if aln_length != max(lengths):
+            # In broken examples from IMGTHLA was possible to continue
+            # https://github.com/ANHIG/IMGTHLA/issues/201
+            max_length = max(lengths)
+            max_count = sum(1 for _ in lengths if _ == max_length)
+            raise ValueError(
+                "GCG MSF header said alignment length %i, but %s of %i sequences said Len: %s"
+                % (aln_length, max_count, len(ids), max_length)
+            )
+
+        line = handle.readline()
+        if not line:
+            raise ValueError("End of file after // line, expected sequences.")
+        if line.strip():
+            raise ValueError("After // line, expected blank line before sequences.")
+
+        # Now load the sequences
+        seqs = [[] for _ in ids]  # list of empty lists
+        completed_length = 0
+        while completed_length < aln_length:
+            # Note might have a coordinate header line (seems to be optional)
+            for idx, name in enumerate(ids):
+                line = handle.readline()
+                if idx == 0 and not line.strip():
+                    # T-COFFEE uses two blank lines between blocks, rather than one
+                    while line and not line.strip():
+                        line = handle.readline()
+                if not line:
+                    raise ValueError("End of file where expecting sequence data.")
+                # print("Looking for seq for %s in line: %r" % (name, line))
+                words = line.strip().split()
+                # Should we use column numbers, rather than assuming no spaces in names?
+                if idx == 0 and words and words[0] != name:
+                    # print("Actually have a coord line")
+                    # Hopefully this is a coordinate header before the first seq
+                    try:
+                        i = int(words[0])
+                    except ValueError:
+                        i = -1
+                    if i != completed_length + 1:
+                        raise ValueError(
+                            "Expected GCG MSF coordinate line starting %i, got: %r"
+                            % (completed_length + 1, line)
+                        )
+                    if len(words) > 1:
+                        # Final block usually not full 50 chars, so expect start only.
+                        if len(words) != 2:
+                            i = -1
+                        else:
+                            try:
+                                i = int(words[1])
+                            except ValueError:
+                                i = -1
+                        if i != (
+                            completed_length + 50
+                            if completed_length + 50 < aln_length
+                            else aln_length
+                        ):
+                            raise ValueError(
+                                "Expected GCG MSF coordinate line %i to %i, got: %r"
+                                % (
+                                    completed_length + 1,
+                                    completed_length + 50
+                                    if completed_length + 50 < aln_length
+                                    else aln_length,
+                                    line,
+                                )
+                            )
+                    line = handle.readline()
+                    words = line.strip().split()
+                    # print("Still looking for seq for %s in line: %r" % (name, line))
+                # Dealt with any coordinate header line, should now be sequence
+                if not words:
+                    # Should be sequence here, but perhaps its a short one?
+                    if (
+                        lengths[idx] < aln_length
+                        and len("".join(seqs[idx])) == lengths[idx]
+                    ):
+                        # Is this actually allowed in the format? Personally I would
+                        # expect a line with name and a block of trailing ~ here.
+                        pass
+                    else:
+                        raise ValueError(
+                            "Expected sequence for %s, got: %r" % (name, line)
+                        )
+                elif words[0] == name:
+                    assert len(words) > 1, line
+                    # print(i, name, repr(words))
+                    seqs[idx].extend(words[1:])
+                else:
+                    raise ValueError("Expected sequence for %r, got: %r" % (name, line))
+            # TODO - check the sequence lengths thus far are consistent
+            # with blocks of 50?
+            completed_length += 50
+            line = handle.readline()
+            if line.strip():
+                raise ValueError("Expected blank line, got: %r" % line)
+
+        # Skip over any whitespace at the end...
+        while True:
+            line = handle.readline()
+            if not line:
+                # End of file, no more alignments
+                break
+            elif not line.strip():
+                # Blank line, ignore
+                pass
+            elif line.strip().split()[0] in known_headers:
+                # Looks like the start of another alignment:
+                self._header = line
+                break
+            else:
+                raise ValueError("Unexpected line after GCG MSF alignment: %r" % line)
+
+        # Combine list of strings into single string, remap gaps
+        seqs = ["".join(s).replace("~", "-").replace(".", "-") for s in seqs]
+
+        # Apply any trailing padding for short sequences
+        padded = False
+        for idx, (length, s) in enumerate(zip(lengths, seqs)):
+            if len(s) < aln_length and len(s) == length:
+                padded = True
+                seqs[idx] = s + "-" * (aln_length - len(s))
+        if padded:
+            import warnings
+            from Bio import BiopythonParserWarning
+
+            warnings.warn(
+                "One of more alignment sequences were truncated and have been gap padded",
+                BiopythonParserWarning,
+            )
+
+        records = (
+            SeqRecord(Seq(s), id=i, name=i, description=i, annotations={"weight": w},)
+            for (i, s, w) in zip(ids, seqs, weights)
+        )
+
+        # This will check alignment lengths are self-consistent:
+        align = MultipleSeqAlignment(records)
+        # Check matches the header:
+        if align.get_alignment_length() != aln_length:
+            raise ValueError(
+                "GCG MSF headers said alignment length %i, but have %i"
+                % (aln_length, align.get_alignment_length())
+            )
+        return align
diff --git a/code/lib/Bio/AlignIO/NexusIO.py b/code/lib/Bio/AlignIO/NexusIO.py
new file mode 100644
index 0000000..2c97e2e
--- /dev/null
+++ b/code/lib/Bio/AlignIO/NexusIO.py
@@ -0,0 +1,166 @@
+# Copyright 2008-2010, 2012-2014, 2016-2017 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.AlignIO support for the "nexus" file format.
+
+You are expected to use this module via the Bio.AlignIO functions (or the
+Bio.SeqIO functions if you want to work directly with the gapped sequences).
+
+See also the Bio.Nexus module (which this code calls internally),
+as this offers more than just accessing the alignment or its
+sequences as SeqRecord objects.
+"""
+from Bio.Align import MultipleSeqAlignment
+from Bio.AlignIO.Interfaces import AlignmentWriter
+from Bio.Nexus import Nexus
+from Bio.SeqRecord import SeqRecord
+
+
+# You can get a couple of example files here:
+# http://www.molecularevolution.org/resources/fileformats/
+
+
+# This is a generator function!
+def NexusIterator(handle, seq_count=None):
+    """Return SeqRecord objects from a Nexus file.
+
+    Thus uses the Bio.Nexus module to do the hard work.
+
+    You are expected to call this function via Bio.SeqIO or Bio.AlignIO
+    (and not use it directly).
+
+    NOTE - We only expect ONE alignment matrix per Nexus file,
+    meaning this iterator will only yield one MultipleSeqAlignment.
+    """
+    n = Nexus.Nexus(handle)
+    if not n.matrix:
+        # No alignment found
+        return
+
+    # Bio.Nexus deals with duplicated names by adding a '.copy' suffix.
+    # The original names and the modified names are kept in these two lists:
+    assert len(n.unaltered_taxlabels) == len(n.taxlabels)
+
+    if seq_count and seq_count != len(n.unaltered_taxlabels):
+        raise ValueError(
+            "Found %i sequences, but seq_count=%i"
+            % (len(n.unaltered_taxlabels), seq_count)
+        )
+
+    # TODO - Can we extract any annotation too?
+    if n.datatype in ("dna", "nucleotide"):
+        annotations = {"molecule_type": "DNA"}
+    elif n.datatype == "rna":
+        annotations = {"molecule_type": "RNA"}
+    elif n.datatype == "protein":
+        annotations = {"molecule_type": "protein"}
+    else:
+        annotations = None
+    records = (
+        SeqRecord(
+            n.matrix[new_name],
+            id=new_name,
+            name=old_name,
+            description="",
+            annotations=annotations,
+        )
+        for old_name, new_name in zip(n.unaltered_taxlabels, n.taxlabels)
+    )
+    # All done
+    yield MultipleSeqAlignment(records)
+
+
+class NexusWriter(AlignmentWriter):
+    """Nexus alignment writer.
+
+    Note that Nexus files are only expected to hold ONE alignment
+    matrix.
+
+    You are expected to call this class via the Bio.AlignIO.write() or
+    Bio.SeqIO.write() functions.
+    """
+
+    def write_file(self, alignments):
+        """Use this to write an entire file containing the given alignments.
+
+        Arguments:
+         - alignments - A list or iterator returning MultipleSeqAlignment objects.
+           This should hold ONE and only one alignment.
+
+        """
+        align_iter = iter(alignments)  # Could have been a list
+        try:
+            alignment = next(align_iter)
+        except StopIteration:
+            # Nothing to write!
+            return 0
+
+        # Check there is only one alignment...
+        try:
+            next(align_iter)
+            raise ValueError("We can only write one Alignment to a Nexus file.")
+        except StopIteration:
+            pass
+
+        # Good.  Actually write the single alignment,
+        self.write_alignment(alignment)
+        return 1  # we only support writing one alignment!
+
+    def write_alignment(self, alignment, interleave=None):
+        """Write an alignment to file.
+
+        Creates an empty Nexus object, adds the sequences
+        and then gets Nexus to prepare the output.
+        Default interleave behaviour: Interleave if columns > 1000
+        --> Override with interleave=[True/False]
+        """
+        if len(alignment) == 0:
+            raise ValueError("Must have at least one sequence")
+        columns = alignment.get_alignment_length()
+        if columns == 0:
+            raise ValueError("Non-empty sequences are required")
+        datatype = self._classify_mol_type_for_nexus(alignment)
+        minimal_record = (
+            "#NEXUS\nbegin data; dimensions ntax=0 nchar=0; format datatype=%s; end;"
+            % datatype
+        )
+        n = Nexus.Nexus(minimal_record)
+        for record in alignment:
+            # Sanity test sequences (should this be even stricter?)
+            if datatype == "dna" and "U" in record.seq:
+                raise ValueError(f"{record.id} contains U, but DNA alignment")
+            elif datatype == "rna" and "T" in record.seq:
+                raise ValueError(f"{record.id} contains T, but RNA alignment")
+            n.add_sequence(record.id, str(record.seq))
+
+        # Note: MrBayes may choke on large alignments if not interleaved
+        if interleave is None:
+            interleave = columns > 1000
+        n.write_nexus_data(self.handle, interleave=interleave)
+
+    def _classify_mol_type_for_nexus(self, alignment):
+        """Return 'protein', 'dna', or 'rna' based on records' molecule type (PRIVATE).
+
+        All the records must have a molecule_type annotation, and they must
+        agree.
+
+        Raises an exception if this is not possible.
+        """
+        values = {_.annotations.get("molecule_type", None) for _ in alignment}
+        if all(_ and "DNA" in _ for _ in values):
+            return "dna"  # could have been a mix of "DNA" and "gDNA"
+        elif all(_ and "RNA" in _ for _ in values):
+            return "rna"  # could have been a mix of "RNA" and "mRNA"
+        elif all(_ and "protein" in _ for _ in values):
+            return "protein"
+        else:
+            raise ValueError("Need the molecule type to be defined")
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/AlignIO/PhylipIO.py b/code/lib/Bio/AlignIO/PhylipIO.py
new file mode 100644
index 0000000..cc3f665
--- /dev/null
+++ b/code/lib/Bio/AlignIO/PhylipIO.py
@@ -0,0 +1,454 @@
+# Copyright 2006-2016 by Peter Cock.  All rights reserved.
+# Revisions copyright 2011 Brandon Invergo. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""AlignIO support for "phylip" format from Joe Felsenstein's PHYLIP tools.
+
+You are expected to use this module via the Bio.AlignIO functions (or the
+Bio.SeqIO functions if you want to work directly with the gapped sequences).
+
+Support for "relaxed phylip" format is also provided. Relaxed phylip differs
+from standard phylip format in the following ways:
+
+ - No whitespace is allowed in the sequence ID.
+ - No truncation is performed. Instead, sequence IDs are padded to the longest
+   ID length, rather than 10 characters. A space separates the sequence
+   identifier from the sequence.
+
+Relaxed phylip is supported by RAxML and PHYML.
+
+Note
+====
+
+In TREE_PUZZLE (Schmidt et al. 2003) and PHYML (Guindon and Gascuel 2003)
+a dot/period (".") in a sequence is interpreted as meaning the same
+character as in the first sequence.  The PHYLIP documentation from 3.3 to 3.69
+http://evolution.genetics.washington.edu/phylip/doc/sequence.html says:
+
+"a period was also previously allowed but it is no longer allowed,
+because it sometimes is used in different senses in other programs"
+
+Biopython 1.58 or later treats dots/periods in the sequence as invalid, both
+for reading and writing. Older versions did nothing special with a dot/period.
+"""
+import string
+
+from Bio.Align import MultipleSeqAlignment
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import AlignmentIterator
+from .Interfaces import SequentialAlignmentWriter
+
+
+_PHYLIP_ID_WIDTH = 10
+_NO_DOTS = "PHYLIP format no longer allows dots in sequence"
+
+
+class PhylipWriter(SequentialAlignmentWriter):
+    """Phylip alignment writer."""
+
+    def write_alignment(self, alignment, id_width=_PHYLIP_ID_WIDTH):
+        """Use this to write (another) single alignment to an open file.
+
+        This code will write interlaced alignments (when the sequences are
+        longer than 50 characters).
+
+        Note that record identifiers are strictly truncated to id_width,
+        defaulting to the value required to comply with the PHYLIP standard.
+
+        For more information on the file format, please see:
+        http://evolution.genetics.washington.edu/phylip/doc/sequence.html
+        http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles
+        """
+        handle = self.handle
+
+        if len(alignment) == 0:
+            raise ValueError("Must have at least one sequence")
+        length_of_seqs = alignment.get_alignment_length()
+        for record in alignment:
+            if length_of_seqs != len(record.seq):
+                raise ValueError("Sequences must all be the same length")
+        if length_of_seqs <= 0:
+            raise ValueError("Non-empty sequences are required")
+
+        # Check for repeated identifiers...
+        # Apply this test *after* cleaning the identifiers
+        names = []
+        seqs = []
+        for record in alignment:
+            """
+            Quoting the PHYLIP version 3.6 documentation:
+
+            The name should be ten characters in length, filled out to
+            the full ten characters by blanks if shorter. Any printable
+            ASCII/ISO character is allowed in the name, except for
+            parentheses ("(" and ")"), square brackets ("[" and "]"),
+            colon (":"), semicolon (";") and comma (","). If you forget
+            to extend the names to ten characters in length by blanks,
+            the program [i.e. PHYLIP] will get out of synchronization
+            with the contents of the data file, and an error message will
+            result.
+
+            Note that Tab characters count as only one character in the
+            species names. Their inclusion can cause trouble.
+            """
+            name = sanitize_name(record.id, id_width)
+            if name in names:
+                raise ValueError(
+                    "Repeated name %r (originally %r), possibly due to truncation"
+                    % (name, record.id)
+                )
+            names.append(name)
+            sequence = str(record.seq)
+            if "." in sequence:
+                # Do this check here (once per record, not once per block)
+                raise ValueError(_NO_DOTS)
+            seqs.append(sequence)
+
+        # From experimentation, the use of tabs is not understood by the
+        # EMBOSS suite.  The nature of the expected white space is not
+        # defined in the PHYLIP documentation, simply "These are in free
+        # format, separated by blanks".  We'll use spaces to keep EMBOSS
+        # happy.
+        handle.write(" %i %s\n" % (len(alignment), length_of_seqs))
+        block = 0
+        while True:
+            for name, sequence in zip(names, seqs):
+                if block == 0:
+                    # Write name (truncated/padded to id_width characters)
+                    # Now truncate and right pad to expected length.
+                    handle.write(name[:id_width].ljust(id_width))
+                else:
+                    # write indent
+                    handle.write(" " * id_width)
+                # Write five chunks of ten letters per line...
+                for chunk in range(0, 5):
+                    i = block * 50 + chunk * 10
+                    seq_segment = sequence[i : i + 10]
+                    # TODO - Force any gaps to be '-' character?
+                    # TODO - How to cope with '?' or '.' in the sequence?
+                    handle.write(" %s" % seq_segment)
+                    if i + 10 > length_of_seqs:
+                        break
+                handle.write("\n")
+            block += 1
+            if block * 50 >= length_of_seqs:
+                break
+            handle.write("\n")
+
+
+class PhylipIterator(AlignmentIterator):
+    """Reads a Phylip alignment file returning a MultipleSeqAlignment iterator.
+
+    Record identifiers are limited to at most 10 characters.
+
+    It only copes with interlaced phylip files!  Sequential files won't work
+    where the sequences are split over multiple lines.
+
+    For more information on the file format, please see:
+    http://evolution.genetics.washington.edu/phylip/doc/sequence.html
+    http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles
+    """
+
+    # Default truncation length
+    id_width = _PHYLIP_ID_WIDTH
+
+    _header = None  # for caching lines between __next__ calls
+
+    def _is_header(self, line):
+        line = line.strip()
+        parts = [x for x in line.split() if x]
+        if len(parts) != 2:
+            return False  # First line should have two integers
+        try:
+            number_of_seqs = int(parts[0])
+            length_of_seqs = int(parts[1])
+            return True
+        except ValueError:
+            return False  # First line should have two integers
+
+    def _split_id(self, line):
+        """Extract the sequence ID from a Phylip line (PRIVATE).
+
+        Returning a tuple containing: (sequence_id, sequence_residues)
+
+        The first 10 characters in the line are are the sequence id, the
+        remainder are sequence data.
+        """
+        seq_id = line[: self.id_width].strip()
+        seq = line[self.id_width :].strip().replace(" ", "")
+        return seq_id, seq
+
+    def __next__(self):
+        """Parse the next alignment from the handle."""
+        handle = self.handle
+
+        if self._header is None:
+            line = handle.readline()
+        else:
+            # Header we saved from when we were parsing
+            # the previous alignment.
+            line = self._header
+            self._header = None
+
+        if not line:
+            raise StopIteration
+        line = line.strip()
+        parts = [x for x in line.split() if x]
+        if len(parts) != 2:
+            raise ValueError("First line should have two integers")
+        try:
+            number_of_seqs = int(parts[0])
+            length_of_seqs = int(parts[1])
+        except ValueError:
+            raise ValueError("First line should have two integers") from None
+
+        assert self._is_header(line)
+
+        if (
+            self.records_per_alignment is not None
+            and self.records_per_alignment != number_of_seqs
+        ):
+            raise ValueError(
+                "Found %i records in this alignment, told to expect %i"
+                % (number_of_seqs, self.records_per_alignment)
+            )
+
+        ids = []
+        seqs = []
+
+        # By default, expects STRICT truncation / padding to 10 characters.
+        # Does not require any whitespace between name and seq.
+        for i in range(number_of_seqs):
+            line = handle.readline().rstrip()
+            sequence_id, s = self._split_id(line)
+            ids.append(sequence_id)
+            if "." in s:
+                raise ValueError(_NO_DOTS)
+            seqs.append([s])
+
+        # Look for further blocks
+        line = ""
+        while True:
+            # Skip any blank lines between blocks...
+            while "" == line.strip():
+                line = handle.readline()
+                if not line:
+                    break  # end of file
+            if not line:
+                break  # end of file
+
+            if self._is_header(line):
+                # Looks like the start of a concatenated alignment
+                self._header = line
+                break
+
+            # print("New block...")
+            for i in range(number_of_seqs):
+                s = line.strip().replace(" ", "")
+                if "." in s:
+                    raise ValueError(_NO_DOTS)
+                seqs[i].append(s)
+                line = handle.readline()
+                if (not line) and i + 1 < number_of_seqs:
+                    raise ValueError("End of file mid-block")
+            if not line:
+                break  # end of file
+
+        records = (
+            SeqRecord(Seq("".join(s)), id=i, name=i, description=i)
+            for (i, s) in zip(ids, seqs)
+        )
+        return MultipleSeqAlignment(records)
+
+
+# Relaxed Phylip
+class RelaxedPhylipWriter(PhylipWriter):
+    """Relaxed Phylip format writer."""
+
+    def write_alignment(self, alignment):
+        """Write a relaxed phylip alignment."""
+        # Check inputs
+        for name in (s.id.strip() for s in alignment):
+            if any(c in name for c in string.whitespace):
+                raise ValueError("Whitespace not allowed in identifier: %s" % name)
+
+        # Calculate a truncation length - maximum length of sequence ID plus a
+        # single character for padding
+        # If no sequences, set id_width to 1. super(...) call will raise a
+        # ValueError
+        if len(alignment) == 0:
+            id_width = 1
+        else:
+            id_width = max(len(s.id.strip()) for s in alignment) + 1
+        super().write_alignment(alignment, id_width)
+
+
+class RelaxedPhylipIterator(PhylipIterator):
+    """Relaxed Phylip format Iterator."""
+
+    def _split_id(self, line):
+        """Extract the sequence ID from a Phylip line (PRIVATE).
+
+        Returns a tuple containing: (sequence_id, sequence_residues)
+
+        For relaxed format split at the first whitespace character.
+        """
+        seq_id, sequence = line.split(None, 1)
+        sequence = sequence.strip().replace(" ", "")
+        return seq_id, sequence
+
+
+class SequentialPhylipWriter(SequentialAlignmentWriter):
+    """Sequential Phylip format Writer."""
+
+    def write_alignment(self, alignment, id_width=_PHYLIP_ID_WIDTH):
+        """Write a Phylip alignment to the handle."""
+        handle = self.handle
+
+        if len(alignment) == 0:
+            raise ValueError("Must have at least one sequence")
+        length_of_seqs = alignment.get_alignment_length()
+        for record in alignment:
+            if length_of_seqs != len(record.seq):
+                raise ValueError("Sequences must all be the same length")
+        if length_of_seqs <= 0:
+            raise ValueError("Non-empty sequences are required")
+
+        # Check for repeated identifiers...
+        # Apply this test *after* cleaning the identifiers
+        names = []
+        for record in alignment:
+            # Either remove the banned characters, or map them to something
+            # else like an underscore "_" or pipe "|" character...
+            name = sanitize_name(record.id, id_width)
+            if name in names:
+                raise ValueError(
+                    "Repeated name %r (originally %r), possibly due to truncation"
+                    % (name, record.id)
+                )
+            names.append(name)
+
+        # From experimentation, the use of tabs is not understood by the
+        # EMBOSS suite.  The nature of the expected white space is not
+        # defined in the PHYLIP documentation, simply "These are in free
+        # format, separated by blanks".  We'll use spaces to keep EMBOSS
+        # happy.
+        handle.write(" %i %s\n" % (len(alignment), length_of_seqs))
+        for name, record in zip(names, alignment):
+            sequence = str(record.seq)
+            if "." in sequence:
+                raise ValueError(_NO_DOTS)
+            handle.write(name[:id_width].ljust(id_width))
+            # Write the entire sequence to one line (see sequential format
+            # notes in the SequentialPhylipIterator docstring
+            handle.write(sequence)
+            handle.write("\n")
+
+
+class SequentialPhylipIterator(PhylipIterator):
+    """Sequential Phylip format Iterator.
+
+    The sequential format carries the same restrictions as the normal
+    interleaved one, with the difference being that the sequences are listed
+    sequentially, each sequence written in its entirety before the start of
+    the next. According to the PHYLIP documentation for input file
+    formatting, newlines and spaces may optionally be entered at any point
+    in the sequences.
+    """
+
+    _header = None  # for caching lines between __next__ calls
+
+    def __next__(self):
+        """Parse the next alignment from the handle."""
+        handle = self.handle
+
+        if self._header is None:
+            line = handle.readline()
+        else:
+            # Header we saved from when we were parsing
+            # the previous alignment.
+            line = self._header
+            self._header = None
+
+        if not line:
+            raise StopIteration
+        line = line.strip()
+        parts = [x for x in line.split() if x]
+        if len(parts) != 2:
+            raise ValueError("First line should have two integers")
+        try:
+            number_of_seqs = int(parts[0])
+            length_of_seqs = int(parts[1])
+        except ValueError:
+            raise ValueError("First line should have two integers") from None
+
+        assert self._is_header(line)
+
+        if (
+            self.records_per_alignment is not None
+            and self.records_per_alignment != number_of_seqs
+        ):
+            raise ValueError(
+                "Found %i records in this alignment, told to expect %i"
+                % (number_of_seqs, self.records_per_alignment)
+            )
+
+        ids = []
+        seqs = []
+
+        # By default, expects STRICT truncation / padding to 10 characters.
+        # Does not require any whitespace between name and seq.
+        for i in range(number_of_seqs):
+            line = handle.readline().rstrip()
+            sequence_id, s = self._split_id(line)
+            ids.append(sequence_id)
+            while len(s) < length_of_seqs:
+                # The sequence may be split into multiple lines
+                line = handle.readline().strip()
+                if not line:
+                    break
+                if line == "":
+                    continue
+                s = "".join([s, line.strip().replace(" ", "")])
+                if len(s) > length_of_seqs:
+                    raise ValueError(
+                        "Found a record of length %i, "
+                        "should be %i" % (len(s), length_of_seqs)
+                    )
+            if "." in s:
+                raise ValueError(_NO_DOTS)
+            seqs.append(s)
+        while True:
+            # Find other alignments in the file
+            line = handle.readline()
+            if not line:
+                break
+            if self._is_header(line):
+                self._header = line
+                break
+
+        records = (
+            SeqRecord(Seq(s), id=i, name=i, description=i) for (i, s) in zip(ids, seqs)
+        )
+        return MultipleSeqAlignment(records)
+
+
+def sanitize_name(name, width=None):
+    """Sanitise sequence identifier for output.
+
+    Removes the banned characters "[]()" and replaces the characters ":;"
+    with "|". The name is truncated to "width" characters if specified.
+    """
+    name = name.strip()
+    for char in "[](),":
+        name = name.replace(char, "")
+    for char in ":;":
+        name = name.replace(char, "|")
+    if width is not None:
+        name = name[:width]
+    return name
diff --git a/code/lib/Bio/AlignIO/StockholmIO.py b/code/lib/Bio/AlignIO/StockholmIO.py
new file mode 100644
index 0000000..386e762
--- /dev/null
+++ b/code/lib/Bio/AlignIO/StockholmIO.py
@@ -0,0 +1,630 @@
+# Copyright 2006-2016 by Peter Cock.  All rights reserved.
+# Revisions copyright 2015 by Ben Woodcroft.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.AlignIO support for "stockholm" format (used in the PFAM database).
+
+You are expected to use this module via the Bio.AlignIO functions (or the
+Bio.SeqIO functions if you want to work directly with the gapped sequences).
+
+For example, consider a Stockholm alignment file containing the following::
+
+    # STOCKHOLM 1.0
+    #=GC SS_cons       .................<<<<<<<<...<<<<<<<........>>>>>>>..
+    AP001509.1         UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGU
+    #=GR AP001509.1 SS -----------------<<<<<<<<---..<<-<<-------->>->>..--
+    AE007476.1         AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGU
+    #=GR AE007476.1 SS -----------------<<<<<<<<-----<<.<<-------->>.>>----
+
+    #=GC SS_cons       ......<<<<<<<.......>>>>>>>..>>>>>>>>...............
+    AP001509.1         CUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
+    #=GR AP001509.1 SS -------<<<<<--------->>>>>--->>>>>>>>---------------
+    AE007476.1         UUCUACAAGGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU
+    #=GR AE007476.1 SS ------.<<<<<--------->>>>>.-->>>>>>>>---------------
+    //
+
+This is a single multiple sequence alignment, so you would probably load this
+using the Bio.AlignIO.read() function:
+
+    >>> from Bio import AlignIO
+    >>> align = AlignIO.read("Stockholm/simple.sth", "stockholm")
+    >>> print(align)
+    Alignment with 2 rows and 104 columns
+    UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-G...UGU AP001509.1
+    AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-C...GAU AE007476.1
+    >>> for record in align:
+    ...     print("%s %i" % (record.id, len(record)))
+    AP001509.1 104
+    AE007476.1 104
+
+In addition to the sequences themselves, this example alignment also includes
+some GR lines for the secondary structure of the sequences.  These are
+strings, with one character for each letter in the associated sequence:
+
+    >>> for record in align:
+    ...     print(record.id)
+    ...     print(record.seq)
+    ...     print(record.letter_annotations['secondary_structure'])
+    AP001509.1
+    UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
+    -----------------<<<<<<<<---..<<-<<-------->>->>..---------<<<<<--------->>>>>--->>>>>>>>---------------
+    AE007476.1
+    AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGUUUCUACAAGGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU
+    -----------------<<<<<<<<-----<<.<<-------->>.>>----------.<<<<<--------->>>>>.-->>>>>>>>---------------
+
+Any general annotation for each row is recorded in the SeqRecord's annotations
+dictionary.  Any per-column annotation for the entire alignment in in the
+alignment's column annotations dictionary, such as the secondary structure
+consensus in this example:
+
+    >>> sorted(align.column_annotations.keys())
+    ['secondary_structure']
+    >>> align.column_annotations["secondary_structure"]
+    '.................<<<<<<<<...<<<<<<<........>>>>>>>........<<<<<<<.......>>>>>>>..>>>>>>>>...............'
+
+You can output this alignment in many different file formats
+using Bio.AlignIO.write(), or the MultipleSeqAlignment object's format method:
+
+    >>> print(format(align, "fasta"))
+    >AP001509.1
+    UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-A
+    GGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
+    >AE007476.1
+    AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGUUUCUACAA
+    GGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU
+    <BLANKLINE>
+
+Most output formats won't be able to hold the annotation possible in a
+Stockholm file:
+
+    >>> print(format(align, "stockholm"))
+    # STOCKHOLM 1.0
+    #=GF SQ 2
+    AP001509.1 UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
+    #=GS AP001509.1 AC AP001509.1
+    #=GS AP001509.1 DE AP001509.1
+    #=GR AP001509.1 SS -----------------<<<<<<<<---..<<-<<-------->>->>..---------<<<<<--------->>>>>--->>>>>>>>---------------
+    AE007476.1 AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGUUUCUACAAGGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU
+    #=GS AE007476.1 AC AE007476.1
+    #=GS AE007476.1 DE AE007476.1
+    #=GR AE007476.1 SS -----------------<<<<<<<<-----<<.<<-------->>.>>----------.<<<<<--------->>>>>.-->>>>>>>>---------------
+    #=GC SS_cons .................<<<<<<<<...<<<<<<<........>>>>>>>........<<<<<<<.......>>>>>>>..>>>>>>>>...............
+    //
+    <BLANKLINE>
+
+Note that when writing Stockholm files, AlignIO does not break long sequences
+up and interleave them (as in the input file shown above).  The standard
+allows this simpler layout, and it is more likely to be understood by other
+tools.
+
+Finally, as an aside, it can sometimes be useful to use Bio.SeqIO.parse() to
+iterate over the alignment rows as SeqRecord objects - rather than working
+with Alignnment objects.
+
+    >>> from Bio import SeqIO
+    >>> for record in SeqIO.parse("Stockholm/simple.sth", "stockholm"):
+    ...     print(record.id)
+    ...     print(record.seq)
+    ...     print(record.letter_annotations['secondary_structure'])
+    AP001509.1
+    UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
+    -----------------<<<<<<<<---..<<-<<-------->>->>..---------<<<<<--------->>>>>--->>>>>>>>---------------
+    AE007476.1
+    AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGUUUCUACAAGGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU
+    -----------------<<<<<<<<-----<<.<<-------->>.>>----------.<<<<<--------->>>>>.-->>>>>>>>---------------
+
+Remember that if you slice a SeqRecord, the per-letter-annotations like the
+secondary structure string here, are also sliced:
+
+    >>> sub_record = record[10:20]
+    >>> print(sub_record.seq)
+    AUCGUUUUAC
+    >>> print(sub_record.letter_annotations['secondary_structure'])
+    -------<<<
+
+Likewise with the alignment object, as long as you are not dropping any rows,
+slicing specific columns of an alignment will slice any per-column-annotations:
+
+    >>> align.column_annotations["secondary_structure"]
+    '.................<<<<<<<<...<<<<<<<........>>>>>>>........<<<<<<<.......>>>>>>>..>>>>>>>>...............'
+    >>> part_align = align[:,10:20]
+    >>> part_align.column_annotations["secondary_structure"]
+    '.......<<<'
+
+You can also see this in the Stockholm output of this partial-alignment:
+
+    >>> print(format(part_align, "stockholm"))
+    # STOCKHOLM 1.0
+    #=GF SQ 2
+    AP001509.1 UCAACACUCU
+    #=GS AP001509.1 AC AP001509.1
+    #=GS AP001509.1 DE AP001509.1
+    #=GR AP001509.1 SS -------<<<
+    AE007476.1 AUCGUUUUAC
+    #=GS AE007476.1 AC AE007476.1
+    #=GS AE007476.1 DE AE007476.1
+    #=GR AE007476.1 SS -------<<<
+    #=GC SS_cons .......<<<
+    //
+    <BLANKLINE>
+
+"""
+from collections import OrderedDict
+
+from Bio.Align import MultipleSeqAlignment
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import AlignmentIterator
+from .Interfaces import SequentialAlignmentWriter
+
+
+class StockholmWriter(SequentialAlignmentWriter):
+    """Stockholm/PFAM alignment writer."""
+
+    # These dictionaries should be kept in sync with those
+    # defined in the StockholmIterator class.
+    pfam_gr_mapping = {
+        "secondary_structure": "SS",
+        "surface_accessibility": "SA",
+        "transmembrane": "TM",
+        "posterior_probability": "PP",
+        "ligand_binding": "LI",
+        "active_site": "AS",
+        "intron": "IN",
+    }
+    # These GC mappings are in addition to *_cons in GR mapping:
+    pfam_gc_mapping = {"reference_annotation": "RF", "model_mask": "MM"}
+    # Following dictionary deliberately does not cover AC, DE or DR
+    pfam_gs_mapping = {"organism": "OS", "organism_classification": "OC", "look": "LO"}
+
+    def write_alignment(self, alignment):
+        """Use this to write (another) single alignment to an open file.
+
+        Note that sequences and their annotation are recorded
+        together (rather than having a block of annotation followed
+        by a block of aligned sequences).
+        """
+        count = len(alignment)
+
+        self._length_of_sequences = alignment.get_alignment_length()
+        self._ids_written = []
+
+        if count == 0:
+            raise ValueError("Must have at least one sequence")
+        if self._length_of_sequences == 0:
+            raise ValueError("Non-empty sequences are required")
+
+        self.handle.write("# STOCKHOLM 1.0\n")
+        self.handle.write("#=GF SQ %i\n" % count)
+        for record in alignment:
+            self._write_record(record)
+        # This shouldn't be None... but just in case,
+        if alignment.column_annotations:
+            for k, v in sorted(alignment.column_annotations.items()):
+                if k in self.pfam_gc_mapping:
+                    self.handle.write("#=GC %s %s\n" % (self.pfam_gc_mapping[k], v))
+                elif k in self.pfam_gr_mapping:
+                    self.handle.write(
+                        "#=GC %s %s\n" % (self.pfam_gr_mapping[k] + "_cons", v)
+                    )
+                else:
+                    # It doesn't follow the PFAM standards, but should we record
+                    # this data anyway?
+                    pass
+        self.handle.write("//\n")
+
+    def _write_record(self, record):
+        """Write a single SeqRecord to the file (PRIVATE)."""
+        if self._length_of_sequences != len(record.seq):
+            raise ValueError("Sequences must all be the same length")
+
+        # For the case for stockholm to stockholm, try and use record.name
+        seq_name = record.id
+        if record.name is not None:
+            if "accession" in record.annotations:
+                if record.id == record.annotations["accession"]:
+                    seq_name = record.name
+
+        # In the Stockholm file format, spaces are not allowed in the id
+        seq_name = seq_name.replace(" ", "_")
+
+        if "start" in record.annotations and "end" in record.annotations:
+            suffix = "/%s-%s" % (
+                record.annotations["start"],
+                record.annotations["end"],
+            )
+            if seq_name[-len(suffix) :] != suffix:
+                seq_name = "%s/%s-%s" % (
+                    seq_name,
+                    record.annotations["start"],
+                    record.annotations["end"],
+                )
+
+        if seq_name in self._ids_written:
+            raise ValueError("Duplicate record identifier: %s" % seq_name)
+        self._ids_written.append(seq_name)
+        self.handle.write("%s %s\n" % (seq_name, record.seq))
+
+        # The recommended placement for GS lines (per sequence annotation)
+        # is above the alignment (as a header block) or just below the
+        # corresponding sequence.
+        #
+        # The recommended placement for GR lines (per sequence per column
+        # annotation such as secondary structure) is just below the
+        # corresponding sequence.
+        #
+        # We put both just below the corresponding sequence as this allows
+        # us to write the file using a single pass through the records.
+
+        # AC = Accession
+        if "accession" in record.annotations:
+            self.handle.write(
+                "#=GS %s AC %s\n"
+                % (seq_name, self.clean(record.annotations["accession"]))
+            )
+        elif record.id:
+            self.handle.write("#=GS %s AC %s\n" % (seq_name, self.clean(record.id)))
+
+        # DE = description
+        if record.description:
+            self.handle.write(
+                "#=GS %s DE %s\n" % (seq_name, self.clean(record.description))
+            )
+
+        # DE = database links
+        for xref in record.dbxrefs:
+            self.handle.write("#=GS %s DR %s\n" % (seq_name, self.clean(xref)))
+
+        # GS = other per sequence annotation
+        for key, value in record.annotations.items():
+            if key in self.pfam_gs_mapping:
+                data = self.clean(str(value))
+                if data:
+                    self.handle.write(
+                        "#=GS %s %s %s\n"
+                        % (seq_name, self.clean(self.pfam_gs_mapping[key]), data)
+                    )
+            else:
+                # It doesn't follow the PFAM standards, but should we record
+                # this data anyway?
+                pass
+
+        # GR = per row per column sequence annotation
+        for key, value in record.letter_annotations.items():
+            if key in self.pfam_gr_mapping and len(str(value)) == len(record.seq):
+                data = self.clean(str(value))
+                if data:
+                    self.handle.write(
+                        "#=GR %s %s %s\n"
+                        % (seq_name, self.clean(self.pfam_gr_mapping[key]), data)
+                    )
+            else:
+                # It doesn't follow the PFAM standards, but should we record
+                # this data anyway?
+                pass
+
+
+class StockholmIterator(AlignmentIterator):
+    """Loads a Stockholm file from PFAM into MultipleSeqAlignment objects.
+
+    The file may contain multiple concatenated alignments, which are loaded
+    and returned incrementally.
+
+    This parser will detect if the Stockholm file follows the PFAM
+    conventions for sequence specific meta-data (lines starting #=GS
+    and #=GR) and populates the SeqRecord fields accordingly.
+
+    Any annotation which does not follow the PFAM conventions is currently
+    ignored.
+
+    If an accession is provided for an entry in the meta data, IT WILL NOT
+    be used as the record.id (it will be recorded in the record's
+    annotations).  This is because some files have (sub) sequences from
+    different parts of the same accession (differentiated by different
+    start-end positions).
+
+    Wrap-around alignments are not supported - each sequences must be on
+    a single line.  However, interlaced sequences should work.
+
+    For more information on the file format, please see:
+    http://sonnhammer.sbc.su.se/Stockholm.html
+    https://en.wikipedia.org/wiki/Stockholm_format
+    http://bioperl.org/formats/alignment_formats/Stockholm_multiple_alignment_format.html
+
+    For consistency with BioPerl and EMBOSS we call this the "stockholm"
+    format.
+    """
+
+    # These dictionaries should be kept in sync with those
+    # defined in the PfamStockholmWriter class.
+    pfam_gr_mapping = {
+        "SS": "secondary_structure",
+        "SA": "surface_accessibility",
+        "TM": "transmembrane",
+        "PP": "posterior_probability",
+        "LI": "ligand_binding",
+        "AS": "active_site",
+        "IN": "intron",
+    }
+    # These GC mappings are in addition to *_cons in GR mapping:
+    pfam_gc_mapping = {"RF": "reference_annotation", "MM": "model_mask"}
+    # Following dictionary deliberately does not cover AC, DE or DR
+    pfam_gs_mapping = {"OS": "organism", "OC": "organism_classification", "LO": "look"}
+
+    _header = None  # for caching lines between __next__ calls
+
+    def __next__(self):
+        """Parse the next alignment from the handle."""
+        handle = self.handle
+
+        if self._header is None:
+            line = handle.readline()
+        else:
+            # Header we saved from when we were parsing
+            # the previous alignment.
+            line = self._header
+            self._header = None
+
+        if not line:
+            # Empty file - just give up.
+            raise StopIteration
+        if line.strip() != "# STOCKHOLM 1.0":
+            raise ValueError("Did not find STOCKHOLM header")
+
+        # Note: If this file follows the PFAM conventions, there should be
+        # a line containing the number of sequences, e.g. "#=GF SQ 67"
+        # We do not check for this - perhaps we should, and verify that
+        # if present it agrees with our parsing.
+
+        seqs = {}
+        ids = OrderedDict()  # Really only need an OrderedSet, but python lacks this
+        gs = {}
+        gr = {}
+        gf = {}
+        gc = {}
+        passed_end_alignment = False
+        while True:
+            line = handle.readline()
+            if not line:
+                break  # end of file
+            line = line.strip()  # remove trailing \n
+            if line == "# STOCKHOLM 1.0":
+                self._header = line
+                break
+            elif line == "//":
+                # The "//" line indicates the end of the alignment.
+                # There may still be more meta-data
+                passed_end_alignment = True
+            elif line == "":
+                # blank line, ignore
+                pass
+            elif line[0] != "#":
+                # Sequence
+                # Format: "<seqname> <sequence>"
+                assert not passed_end_alignment
+                parts = [x.strip() for x in line.split(" ", 1)]
+                if len(parts) != 2:
+                    # This might be someone attempting to store a zero length sequence?
+                    raise ValueError(
+                        "Could not split line into identifier and sequence:\n" + line
+                    )
+                seq_id, seq = parts
+                if seq_id not in ids:
+                    ids[seq_id] = True
+                seqs.setdefault(seq_id, "")
+                seqs[seq_id] += seq.replace(".", "-")
+            elif len(line) >= 5:
+                # Comment line or meta-data
+                if line[:5] == "#=GF ":
+                    # Generic per-File annotation, free text
+                    # Format: #=GF <feature> <free text>
+                    feature, text = line[5:].strip().split(None, 1)
+                    # Each feature key could be used more than once,
+                    # so store the entries as a list of strings.
+                    if feature not in gf:
+                        gf[feature] = [text]
+                    else:
+                        gf[feature].append(text)
+                elif line[:5] == "#=GC ":
+                    # Generic per-Column annotation, exactly 1 char per column
+                    # Format: "#=GC <feature> <exactly 1 char per column>"
+                    feature, text = line[5:].strip().split(None, 2)
+                    if feature not in gc:
+                        gc[feature] = ""
+                    gc[feature] += text.strip()  # append to any previous entry
+                    # Might be interleaved blocks, so can't check length yet
+                elif line[:5] == "#=GS ":
+                    # Generic per-Sequence annotation, free text
+                    # Format: "#=GS <seqname> <feature> <free text>"
+                    try:
+                        seq_id, feature, text = line[5:].strip().split(None, 2)
+                    except ValueError:
+                        # Free text can sometimes be empty, which a one line split throws an error for.
+                        # See https://github.com/biopython/biopython/issues/2982 for more details
+                        seq_id, feature = line[5:].strip().split(None, 1)
+                        text = ""
+                    # if seq_id not in ids:
+                    #    ids.append(seq_id)
+                    if seq_id not in gs:
+                        gs[seq_id] = {}
+                    if feature not in gs[seq_id]:
+                        gs[seq_id][feature] = [text]
+                    else:
+                        gs[seq_id][feature].append(text)
+                elif line[:5] == "#=GR ":
+                    # Generic per-Sequence AND per-Column markup
+                    # Format: "#=GR <seqname> <feature> <exactly 1 char per column>"
+                    seq_id, feature, text = line[5:].strip().split(None, 2)
+                    # if seq_id not in ids:
+                    #    ids.append(seq_id)
+                    if seq_id not in gr:
+                        gr[seq_id] = {}
+                    if feature not in gr[seq_id]:
+                        gr[seq_id][feature] = ""
+                    gr[seq_id][feature] += text.strip()  # append to any previous entry
+                    # Might be interleaved blocks, so can't check length yet
+            # Next line...
+
+        assert len(seqs) <= len(ids)
+        # assert len(gs)   <= len(ids)
+        # assert len(gr)   <= len(ids)
+
+        self.ids = ids.keys()
+        self.sequences = seqs
+        self.seq_annotation = gs
+        self.seq_col_annotation = gr
+
+        if ids and seqs:
+
+            if (
+                self.records_per_alignment is not None
+                and self.records_per_alignment != len(ids)
+            ):
+                raise ValueError(
+                    "Found %i records in this alignment, told to expect %i"
+                    % (len(ids), self.records_per_alignment)
+                )
+
+            alignment_length = len(list(seqs.values())[0])
+            records = []  # Alignment obj will put them all in a list anyway
+            for seq_id in ids:
+                seq = seqs[seq_id]
+                if alignment_length != len(seq):
+                    raise ValueError(
+                        "Sequences have different lengths, or repeated identifier"
+                    )
+                name, start, end = self._identifier_split(seq_id)
+                record = SeqRecord(
+                    Seq(seq),
+                    id=seq_id,
+                    name=name,
+                    description=seq_id,
+                    annotations={"accession": name},
+                )
+                # Accession will be overridden by _populate_meta_data if an explicit
+                # accession is provided:
+                record.annotations["accession"] = name
+
+                if start is not None:
+                    record.annotations["start"] = start
+                if end is not None:
+                    record.annotations["end"] = end
+
+                self._populate_meta_data(seq_id, record)
+                records.append(record)
+            for k, v in gc.items():
+                if len(v) != alignment_length:
+                    raise ValueError(
+                        "%s length %i, expected %i" % (k, len(v), alignment_length)
+                    )
+            alignment = MultipleSeqAlignment(records)
+
+            for k, v in sorted(gc.items()):
+                if k in self.pfam_gc_mapping:
+                    alignment.column_annotations[self.pfam_gc_mapping[k]] = v
+                elif k.endswith("_cons") and k[:-5] in self.pfam_gr_mapping:
+                    alignment.column_annotations[self.pfam_gr_mapping[k[:-5]]] = v
+                else:
+                    # Ignore it?
+                    alignment.column_annotations["GC:" + k] = v
+
+            # TODO - Introduce an annotated alignment class?
+            # For now, store the annotation a new private property:
+            alignment._annotations = gr
+
+            return alignment
+        else:
+            raise StopIteration
+
+    def _identifier_split(self, identifier):
+        """Return (name, start, end) string tuple from an identifier (PRIVATE)."""
+        if "/" in identifier:
+            name, start_end = identifier.rsplit("/", 1)
+            if start_end.count("-") == 1:
+                try:
+                    start, end = start_end.split("-")
+                    return name, int(start), int(end)
+                except ValueError:
+                    # Non-integers after final '/' - fall through
+                    pass
+        return identifier, None, None
+
+    def _get_meta_data(self, identifier, meta_dict):
+        """Take an itentifier and returns dict of all meta-data matching it (PRIVATE).
+
+        For example, given "Q9PN73_CAMJE/149-220" will return all matches to
+        this or "Q9PN73_CAMJE" which the identifier without its /start-end
+        suffix.
+
+        In the example below, the suffix is required to match the AC, but must
+        be removed to match the OS and OC meta-data::
+
+            # STOCKHOLM 1.0
+            #=GS Q9PN73_CAMJE/149-220  AC Q9PN73
+            ...
+            Q9PN73_CAMJE/149-220               NKA...
+            ...
+            #=GS Q9PN73_CAMJE OS Campylobacter jejuni
+            #=GS Q9PN73_CAMJE OC Bacteria
+
+        This function will return an empty dictionary if no data is found.
+        """
+        name, start, end = self._identifier_split(identifier)
+        if name == identifier:
+            identifier_keys = [identifier]
+        else:
+            identifier_keys = [identifier, name]
+        answer = {}
+        for identifier_key in identifier_keys:
+            try:
+                for feature_key in meta_dict[identifier_key]:
+                    answer[feature_key] = meta_dict[identifier_key][feature_key]
+            except KeyError:
+                pass
+        return answer
+
+    def _populate_meta_data(self, identifier, record):
+        """Add meta-date to a SecRecord's annotations dictionary (PRIVATE).
+
+        This function applies the PFAM conventions.
+        """
+        seq_data = self._get_meta_data(identifier, self.seq_annotation)
+        for feature in seq_data:
+            # Note this dictionary contains lists!
+            if feature == "AC":  # ACcession number
+                assert len(seq_data[feature]) == 1
+                record.annotations["accession"] = seq_data[feature][0]
+            elif feature == "DE":  # DEscription
+                record.description = "\n".join(seq_data[feature])
+            elif feature == "DR":  # Database Reference
+                # Should we try and parse the strings?
+                record.dbxrefs = seq_data[feature]
+            elif feature in self.pfam_gs_mapping:
+                record.annotations[self.pfam_gs_mapping[feature]] = ", ".join(
+                    seq_data[feature]
+                )
+            else:
+                # Ignore it?
+                record.annotations["GS:" + feature] = ", ".join(seq_data[feature])
+
+        # Now record the per-letter-annotations
+        seq_col_data = self._get_meta_data(identifier, self.seq_col_annotation)
+        for feature in seq_col_data:
+            # Note this dictionary contains strings!
+            if feature in self.pfam_gr_mapping:
+                record.letter_annotations[self.pfam_gr_mapping[feature]] = seq_col_data[
+                    feature
+                ]
+            else:
+                # Ignore it?
+                record.letter_annotations["GR:" + feature] = seq_col_data[feature]
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/AlignIO/__init__.py b/code/lib/Bio/AlignIO/__init__.py
new file mode 100644
index 0000000..fe01f8f
--- /dev/null
+++ b/code/lib/Bio/AlignIO/__init__.py
@@ -0,0 +1,480 @@
+# Copyright 2008-2018 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Multiple sequence alignment input/output as alignment objects.
+
+The Bio.AlignIO interface is deliberately very similar to Bio.SeqIO, and in
+fact the two are connected internally.  Both modules use the same set of file
+format names (lower case strings).  From the user's perspective, you can read
+in a PHYLIP file containing one or more alignments using Bio.AlignIO, or you
+can read in the sequences within these alignments using Bio.SeqIO.
+
+Bio.AlignIO is also documented at http://biopython.org/wiki/AlignIO and by
+a whole chapter in our tutorial:
+
+* `HTML Tutorial`_
+* `PDF Tutorial`_
+
+.. _`HTML Tutorial`: http://biopython.org/DIST/docs/tutorial/Tutorial.html
+.. _`PDF Tutorial`: http://biopython.org/DIST/docs/tutorial/Tutorial.pdf
+
+Input
+-----
+For the typical special case when your file or handle contains one and only
+one alignment, use the function Bio.AlignIO.read().  This takes an input file
+handle (or in recent versions of Biopython a filename as a string), format
+string and optional number of sequences per alignment.  It will return a single
+MultipleSeqAlignment object (or raise an exception if there isn't just one
+alignment):
+
+>>> from Bio import AlignIO
+>>> align = AlignIO.read("Phylip/interlaced.phy", "phylip")
+>>> print(align)
+Alignment with 3 rows and 384 columns
+-----MKVILLFVLAVFTVFVSS---------------RGIPPE...I-- CYS1_DICDI
+MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTL...VAA ALEU_HORVU
+------MWATLPLLCAGAWLLGV--------PVCGAAELSVNSL...PLV CATH_HUMAN
+
+For the general case, when the handle could contain any number of alignments,
+use the function Bio.AlignIO.parse(...) which takes the same arguments, but
+returns an iterator giving MultipleSeqAlignment objects (typically used in a
+for loop). If you want random access to the alignments by number, turn this
+into a list:
+
+>>> from Bio import AlignIO
+>>> alignments = list(AlignIO.parse("Emboss/needle.txt", "emboss"))
+>>> print(alignments[2])
+Alignment with 2 rows and 120 columns
+-KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKER...--- ref_rec
+LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHP...HKE gi|94967506|receiver
+
+Most alignment file formats can be concatenated so as to hold as many
+different multiple sequence alignments as possible.  One common example
+is the output of the tool seqboot in the PHLYIP suite.  Sometimes there
+can be a file header and footer, as seen in the EMBOSS alignment output.
+
+Output
+------
+Use the function Bio.AlignIO.write(...), which takes a complete set of
+Alignment objects (either as a list, or an iterator), an output file handle
+(or filename in recent versions of Biopython) and of course the file format::
+
+    from Bio import AlignIO
+    alignments = ...
+    count = SeqIO.write(alignments, "example.faa", "fasta")
+
+If using a handle make sure to close it to flush the data to the disk::
+
+    from Bio import AlignIO
+    alignments = ...
+    with open("example.faa", "w") as handle:
+        count = SeqIO.write(alignments, handle, "fasta")
+
+In general, you are expected to call this function once (with all your
+alignments) and then close the file handle.  However, for file formats
+like PHYLIP where multiple alignments are stored sequentially (with no file
+header and footer), then multiple calls to the write function should work as
+expected when using handles.
+
+If you are using a filename, the repeated calls to the write functions will
+overwrite the existing file each time.
+
+Conversion
+----------
+The Bio.AlignIO.convert(...) function allows an easy interface for simple
+alignment file format conversions. Additionally, it may use file format
+specific optimisations so this should be the fastest way too.
+
+In general however, you can combine the Bio.AlignIO.parse(...) function with
+the Bio.AlignIO.write(...) function for sequence file conversion. Using
+generator expressions provides a memory efficient way to perform filtering or
+other extra operations as part of the process.
+
+File Formats
+------------
+When specifying the file format, use lowercase strings.  The same format
+names are also used in Bio.SeqIO and include the following:
+
+  - clustal -   Output from Clustal W or X, see also the module Bio.Clustalw
+    which can be used to run the command line tool from Biopython.
+  - emboss    - EMBOSS tools' "pairs" and "simple" alignment formats.
+  - fasta     - The generic sequence file format where each record starts with
+    an identifier line starting with a ">" character, followed by
+    lines of sequence.
+  - fasta-m10 - For the pairwise alignments output by Bill Pearson's FASTA
+    tools when used with the -m 10 command line option for machine
+    readable output.
+  - ig        - The IntelliGenetics file format, apparently the same as the
+    MASE alignment format.
+  - msf       - The GCG MSF alignment format, originally from PileUp tool.
+  - nexus     - Output from NEXUS, see also the module Bio.Nexus which can also
+    read any phylogenetic trees in these files.
+  - phylip    - Interlaced PHYLIP, as used by the PHYLIP tools.
+  - phylip-sequential - Sequential PHYLIP.
+  - phylip-relaxed - PHYLIP like format allowing longer names.
+  - stockholm - A richly annotated alignment file format used by PFAM.
+  - mauve - Output from progressiveMauve/Mauve
+
+Note that while Bio.AlignIO can read all the above file formats, it cannot
+write to all of them.
+
+You can also use any file format supported by Bio.SeqIO, such as "fasta" or
+"ig" (which are listed above), PROVIDED the sequences in your file are all the
+same length.
+"""
+# TODO
+# - define policy on reading aligned sequences with gaps in
+#   (e.g. - and . characters)
+#
+# - Can we build the to_alignment(...) functionality
+#   into the generic Alignment class instead?
+#
+# - How best to handle unique/non unique record.id when writing.
+#   For most file formats reading such files is fine; The stockholm
+#   parser would fail.
+#
+# - MSF multiple alignment format, aka GCG, aka PileUp format (*.msf)
+#   http://www.bioperl.org/wiki/MSF_multiple_alignment_format
+from Bio.Align import MultipleSeqAlignment
+from Bio.File import as_handle
+
+from . import ClustalIO
+from . import EmbossIO
+from . import FastaIO
+from . import MafIO
+from . import MauveIO
+from . import MsfIO
+from . import NexusIO
+from . import PhylipIO
+from . import StockholmIO
+
+# Convention for format names is "mainname-subtype" in lower case.
+# Please use the same names as BioPerl and EMBOSS where possible.
+
+_FormatToIterator = {  # "fasta" is done via Bio.SeqIO
+    "clustal": ClustalIO.ClustalIterator,
+    "emboss": EmbossIO.EmbossIterator,
+    "fasta-m10": FastaIO.FastaM10Iterator,
+    "maf": MafIO.MafIterator,
+    "mauve": MauveIO.MauveIterator,
+    "msf": MsfIO.MsfIterator,
+    "nexus": NexusIO.NexusIterator,
+    "phylip": PhylipIO.PhylipIterator,
+    "phylip-sequential": PhylipIO.SequentialPhylipIterator,
+    "phylip-relaxed": PhylipIO.RelaxedPhylipIterator,
+    "stockholm": StockholmIO.StockholmIterator,
+}
+
+_FormatToWriter = {  # "fasta" is done via Bio.SeqIO
+    "clustal": ClustalIO.ClustalWriter,
+    "maf": MafIO.MafWriter,
+    "mauve": MauveIO.MauveWriter,
+    "nexus": NexusIO.NexusWriter,
+    "phylip": PhylipIO.PhylipWriter,
+    "phylip-sequential": PhylipIO.SequentialPhylipWriter,
+    "phylip-relaxed": PhylipIO.RelaxedPhylipWriter,
+    "stockholm": StockholmIO.StockholmWriter,
+}
+
+
+def write(alignments, handle, format):
+    """Write complete set of alignments to a file.
+
+    Arguments:
+     - alignments - A list (or iterator) of MultipleSeqAlignment objects,
+       or a single alignment object.
+     - handle    - File handle object to write to, or filename as string
+       (note older versions of Biopython only took a handle).
+     - format    - lower case string describing the file format to write.
+
+    You should close the handle after calling this function.
+
+    Returns the number of alignments written (as an integer).
+    """
+    from Bio import SeqIO
+
+    # Try and give helpful error messages:
+    if not isinstance(format, str):
+        raise TypeError("Need a string for the file format (lower case)")
+    if not format:
+        raise ValueError("Format required (lower case string)")
+    if format != format.lower():
+        raise ValueError("Format string '%s' should be lower case" % format)
+
+    if isinstance(alignments, MultipleSeqAlignment):
+        # This raised an exception in older versions of Biopython
+        alignments = [alignments]
+
+    with as_handle(handle, "w") as fp:
+        # Map the file format to a writer class
+        if format in _FormatToWriter:
+            writer_class = _FormatToWriter[format]
+            count = writer_class(fp).write_file(alignments)
+        elif format in SeqIO._FormatToWriter:
+            # Exploit the existing SeqIO parser to do the dirty work!
+            # TODO - Can we make one call to SeqIO.write() and count the alignments?
+            count = 0
+            for alignment in alignments:
+                if not isinstance(alignment, MultipleSeqAlignment):
+                    raise TypeError(
+                        "Expect a list or iterator of MultipleSeqAlignment "
+                        "objects, got: %r" % alignment
+                    )
+                SeqIO.write(alignment, fp, format)
+                count += 1
+        elif format in _FormatToIterator or format in SeqIO._FormatToIterator:
+            raise ValueError(
+                "Reading format '%s' is supported, but not writing" % format
+            )
+        else:
+            raise ValueError("Unknown format '%s'" % format)
+
+    if not isinstance(count, int):
+        raise RuntimeError(
+            "Internal error - the underlying %s "
+            "writer should have returned the alignment count, not %r" % (format, count)
+        )
+
+    return count
+
+
+# This is a generator function!
+def _SeqIO_to_alignment_iterator(handle, format, seq_count=None):
+    """Use Bio.SeqIO to create an MultipleSeqAlignment iterator (PRIVATE).
+
+    Arguments:
+     - handle    - handle to the file.
+     - format    - string describing the file format.
+     - seq_count - Optional integer, number of sequences expected in each
+       alignment.  Recommended for fasta format files.
+
+    If count is omitted (default) then all the sequences in the file are
+    combined into a single MultipleSeqAlignment.
+    """
+    from Bio import SeqIO
+
+    if format not in SeqIO._FormatToIterator:
+        raise ValueError("Unknown format '%s'" % format)
+
+    if seq_count:
+        # Use the count to split the records into batches.
+        seq_record_iterator = SeqIO.parse(handle, format)
+
+        records = []
+        for record in seq_record_iterator:
+            records.append(record)
+            if len(records) == seq_count:
+                yield MultipleSeqAlignment(records)
+                records = []
+        if records:
+            raise ValueError("Check seq_count argument, not enough sequences?")
+    else:
+        # Must assume that there is a single alignment using all
+        # the SeqRecord objects:
+        records = list(SeqIO.parse(handle, format))
+        if records:
+            yield MultipleSeqAlignment(records)
+
+
+def parse(handle, format, seq_count=None):
+    """Iterate over an alignment file as MultipleSeqAlignment objects.
+
+    Arguments:
+     - handle    - handle to the file, or the filename as a string
+       (note older versions of Biopython only took a handle).
+     - format    - string describing the file format.
+     - seq_count - Optional integer, number of sequences expected in each
+       alignment.  Recommended for fasta format files.
+
+    If you have the file name in a string 'filename', use:
+
+    >>> from Bio import AlignIO
+    >>> filename = "Emboss/needle.txt"
+    >>> format = "emboss"
+    >>> for alignment in AlignIO.parse(filename, format):
+    ...     print("Alignment of length %i" % alignment.get_alignment_length())
+    Alignment of length 124
+    Alignment of length 119
+    Alignment of length 120
+    Alignment of length 118
+    Alignment of length 125
+
+    If you have a string 'data' containing the file contents, use::
+
+      from Bio import AlignIO
+      from io import StringIO
+      my_iterator = AlignIO.parse(StringIO(data), format)
+
+    Use the Bio.AlignIO.read() function when you expect a single record only.
+    """
+    from Bio import SeqIO
+
+    # Try and give helpful error messages:
+    if not isinstance(format, str):
+        raise TypeError("Need a string for the file format (lower case)")
+    if not format:
+        raise ValueError("Format required (lower case string)")
+    if format != format.lower():
+        raise ValueError("Format string '%s' should be lower case" % format)
+    if seq_count is not None and not isinstance(seq_count, int):
+        raise TypeError("Need integer for seq_count (sequences per alignment)")
+
+    with as_handle(handle) as fp:
+        # Map the file format to a sequence iterator:
+        if format in _FormatToIterator:
+            iterator_generator = _FormatToIterator[format]
+            i = iterator_generator(fp, seq_count)
+
+        elif format in SeqIO._FormatToIterator:
+            # Exploit the existing SeqIO parser to the dirty work!
+            i = _SeqIO_to_alignment_iterator(fp, format, seq_count=seq_count)
+        else:
+            raise ValueError("Unknown format '%s'" % format)
+
+        yield from i
+
+
+def read(handle, format, seq_count=None):
+    """Turn an alignment file into a single MultipleSeqAlignment object.
+
+    Arguments:
+     - handle    - handle to the file, or the filename as a string
+       (note older versions of Biopython only took a handle).
+     - format    - string describing the file format.
+     - seq_count - Optional integer, number of sequences expected in each
+       alignment.  Recommended for fasta format files.
+
+    If the handle contains no alignments, or more than one alignment,
+    an exception is raised.  For example, using a PFAM/Stockholm file
+    containing one alignment:
+
+    >>> from Bio import AlignIO
+    >>> filename = "Clustalw/protein.aln"
+    >>> format = "clustal"
+    >>> alignment = AlignIO.read(filename, format)
+    >>> print("Alignment of length %i" % alignment.get_alignment_length())
+    Alignment of length 411
+
+    If however you want the first alignment from a file containing
+    multiple alignments this function would raise an exception.
+
+    >>> from Bio import AlignIO
+    >>> filename = "Emboss/needle.txt"
+    >>> format = "emboss"
+    >>> alignment = AlignIO.read(filename, format)
+    Traceback (most recent call last):
+        ...
+    ValueError: More than one record found in handle
+
+    Instead use:
+
+    >>> from Bio import AlignIO
+    >>> filename = "Emboss/needle.txt"
+    >>> format = "emboss"
+    >>> alignment = next(AlignIO.parse(filename, format))
+    >>> print("First alignment has length %i" % alignment.get_alignment_length())
+    First alignment has length 124
+
+    You must use the Bio.AlignIO.parse() function if you want to read multiple
+    records from the handle.
+    """
+    iterator = parse(handle, format, seq_count)
+    try:
+        alignment = next(iterator)
+    except StopIteration:
+        raise ValueError("No records found in handle") from None
+    try:
+        next(iterator)
+        raise ValueError("More than one record found in handle")
+    except StopIteration:
+        pass
+    if seq_count:
+        if len(alignment) != seq_count:
+            raise RuntimeError(
+                "More sequences found in alignment than specified in seq_count: %s."
+                % seq_count
+            )
+    return alignment
+
+
+def convert(in_file, in_format, out_file, out_format, molecule_type=None):
+    """Convert between two alignment files, returns number of alignments.
+
+    Arguments:
+     - in_file - an input handle or filename
+     - in_format - input file format, lower case string
+     - output - an output handle or filename
+     - out_file - output file format, lower case string
+     - molecule_type - optional molecule type to apply, string containing
+       "DNA", "RNA" or "protein".
+
+    **NOTE** - If you provide an output filename, it will be opened which will
+    overwrite any existing file without warning. This may happen if even the
+    conversion is aborted (e.g. an invalid out_format name is given).
+
+    Some output formats require the molecule type be specified where this
+    cannot be determined by the parser. For example, converting to FASTA,
+    Clustal, or PHYLIP format to NEXUS:
+
+    >>> from io import StringIO
+    >>> from Bio import AlignIO
+    >>> handle = StringIO()
+    >>> AlignIO.convert("Phylip/horses.phy", "phylip", handle, "nexus", "DNA")
+    1
+    >>> print(handle.getvalue())
+    #NEXUS
+    begin data;
+    dimensions ntax=10 nchar=40;
+    format datatype=dna missing=? gap=-;
+    matrix
+    Mesohippus   AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+    Hypohippus   AAACCCCCCCAAAAAAAAACAAAAAAAAAAAAAAAAAAAA
+    Archaeohip   CAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAA
+    Parahippus   CAAACAACAACAAAAAAAACAAAAAAAAAAAAAAAAAAAA
+    Merychippu   CCAACCACCACCCCACACCCAAAAAAAAAAAAAAAAAAAA
+    'M. secundu' CCAACCACCACCCACACCCCAAAAAAAAAAAAAAAAAAAA
+    Nannipus     CCAACCACAACCCCACACCCAAAAAAAAAAAAAAAAAAAA
+    Neohippari   CCAACCCCCCCCCCACACCCAAAAAAAAAAAAAAAAAAAA
+    Calippus     CCAACCACAACCCACACCCCAAAAAAAAAAAAAAAAAAAA
+    Pliohippus   CCCACCCCCCCCCACACCCCAAAAAAAAAAAAAAAAAAAA
+    ;
+    end;
+    <BLANKLINE>
+    """
+    if molecule_type:
+        if not isinstance(molecule_type, str):
+            raise TypeError("Molecule type should be a string, not %r" % molecule_type)
+        elif (
+            "DNA" in molecule_type
+            or "RNA" in molecule_type
+            or "protein" in molecule_type
+        ):
+            pass
+        else:
+            raise ValueError("Unexpected molecule type, %r" % molecule_type)
+
+    # TODO - Add optimised versions of important conversions
+    # For now just off load the work to SeqIO parse/write
+    # Don't open the output file until we've checked the input is OK:
+    alignments = parse(in_file, in_format, None)
+
+    if molecule_type:
+        # Edit the records on the fly to set molecule type
+
+        def over_ride(alignment):
+            """Over-ride molecule in-place."""
+            for record in alignment:
+                record.annotations["molecule_type"] = molecule_type
+            return alignment
+
+        alignments = (over_ride(_) for _ in alignments)
+    return write(alignments, out_file, out_format)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/AlignIO/__pycache__/ClustalIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/ClustalIO.cpython-37.pyc
new file mode 100644
index 0000000..967a616
Binary files /dev/null and b/code/lib/Bio/AlignIO/__pycache__/ClustalIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/AlignIO/__pycache__/EmbossIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/EmbossIO.cpython-37.pyc
new file mode 100644
index 0000000..dc69b07
Binary files /dev/null and b/code/lib/Bio/AlignIO/__pycache__/EmbossIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/AlignIO/__pycache__/FastaIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/FastaIO.cpython-37.pyc
new file mode 100644
index 0000000..590a863
Binary files /dev/null and b/code/lib/Bio/AlignIO/__pycache__/FastaIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/AlignIO/__pycache__/Interfaces.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/Interfaces.cpython-37.pyc
new file mode 100644
index 0000000..50cee59
Binary files /dev/null and b/code/lib/Bio/AlignIO/__pycache__/Interfaces.cpython-37.pyc differ
diff --git a/code/lib/Bio/AlignIO/__pycache__/MafIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/MafIO.cpython-37.pyc
new file mode 100644
index 0000000..6495934
Binary files /dev/null and b/code/lib/Bio/AlignIO/__pycache__/MafIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/AlignIO/__pycache__/MauveIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/MauveIO.cpython-37.pyc
new file mode 100644
index 0000000..9a01d82
Binary files /dev/null and b/code/lib/Bio/AlignIO/__pycache__/MauveIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/AlignIO/__pycache__/MsfIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/MsfIO.cpython-37.pyc
new file mode 100644
index 0000000..41d6c6a
Binary files /dev/null and b/code/lib/Bio/AlignIO/__pycache__/MsfIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/AlignIO/__pycache__/NexusIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/NexusIO.cpython-37.pyc
new file mode 100644
index 0000000..7de4464
Binary files /dev/null and b/code/lib/Bio/AlignIO/__pycache__/NexusIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/AlignIO/__pycache__/PhylipIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/PhylipIO.cpython-37.pyc
new file mode 100644
index 0000000..15268e3
Binary files /dev/null and b/code/lib/Bio/AlignIO/__pycache__/PhylipIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/AlignIO/__pycache__/StockholmIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/StockholmIO.cpython-37.pyc
new file mode 100644
index 0000000..e638dbf
Binary files /dev/null and b/code/lib/Bio/AlignIO/__pycache__/StockholmIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/AlignIO/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..cf0b44e
Binary files /dev/null and b/code/lib/Bio/AlignIO/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Alphabet/__init__.py b/code/lib/Bio/Alphabet/__init__.py
new file mode 100644
index 0000000..5109136
--- /dev/null
+++ b/code/lib/Bio/Alphabet/__init__.py
@@ -0,0 +1,22 @@
+# Copyright 2000-2002 by Andrew Dalke.
+# Revisions copyright 2007-2010 by Peter Cock.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Alphabets were previously used to declare sequence type and letters (OBSOLETE).
+
+The design of Bio.Aphabet included a number of historic design choices
+which, with the benefit of hindsight, were regretable. Bio.Alphabet was
+therefore removed from Biopython in release 1.78. Instead, the molecule type is
+included as an annotation on SeqRecords where appropriate.
+
+Please see https://biopython.org/wiki/Alphabet for examples showing how to
+transition from Bio.Alphabet to molecule type annotations.
+"""
+
+raise ImportError(
+    "Bio.Alphabet has been removed from Biopython. In many cases, the alphabet can simply be ignored and removed from scripts. In a few cases, you may need to specify the ``molecule_type`` as an annotation on a SeqRecord for your script to work correctly. Please see https://biopython.org/wiki/Alphabet for more information."
+)
diff --git a/code/lib/Bio/Alphabet/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Alphabet/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..9ee4b01
Binary files /dev/null and b/code/lib/Bio/Alphabet/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Application/__init__.py b/code/lib/Bio/Application/__init__.py
new file mode 100644
index 0000000..f844d27
--- /dev/null
+++ b/code/lib/Bio/Application/__init__.py
@@ -0,0 +1,838 @@
+# Copyright 2001-2004 Brad Chapman.
+# Revisions copyright 2009-2013 by Peter Cock.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""General mechanisms to access applications in Biopython (OBSOLETE).
+
+This module is not intended for direct use. It provides the basic objects which
+are subclassed by our command line wrappers, such as:
+
+ - Bio.Align.Applications
+ - Bio.Blast.Applications
+ - Bio.Emboss.Applications
+ - Bio.Sequencing.Applications
+
+These modules provide wrapper classes for command line tools to help you
+construct command line strings by setting the values of each parameter.
+The finished command line strings are then normally invoked via the built-in
+Python module subprocess.
+
+Due to the on going maintainance burden or keeping command line application
+wrappers up to date, we have decided to deprecate and eventually remove them.
+We instead now recommend building your command line and invoking it directly
+with the subprocess module.
+"""
+import os
+import platform
+import sys
+import subprocess
+import re
+
+
+# Use this regular expression to test the property names are going to
+# be valid as Python properties or arguments
+_re_prop_name = re.compile(r"^[a-zA-Z][a-zA-Z0-9_]*$")
+assert _re_prop_name.match("t")
+assert _re_prop_name.match("test")
+assert _re_prop_name.match("_test") is None  # we don't want private names
+assert _re_prop_name.match("-test") is None
+assert _re_prop_name.match("any-hyphen") is None
+assert _re_prop_name.match("underscore_ok")
+assert _re_prop_name.match("test_name")
+assert _re_prop_name.match("test2")
+# These are reserved names in Python itself,
+_reserved_names = [
+    "and",
+    "del",
+    "from",
+    "not",
+    "while",
+    "as",
+    "elif",
+    "global",
+    "or",
+    "with",
+    "assert",
+    "else",
+    "if",
+    "pass",
+    "yield",
+    "break",
+    "except",
+    "import",
+    "print",
+    "class",
+    "exec",
+    "in",
+    "raise",
+    "continue",
+    "finally",
+    "is",
+    "return",
+    "def",
+    "for",
+    "lambda",
+    "try",
+]
+# These are reserved names due to the way the wrappers work
+_local_reserved_names = ["set_parameter"]
+
+
+class ApplicationError(subprocess.CalledProcessError):
+    """Raised when an application returns a non-zero exit status (OBSOLETE).
+
+    The exit status will be stored in the returncode attribute, similarly
+    the command line string used in the cmd attribute, and (if captured)
+    stdout and stderr as strings.
+
+    This exception is a subclass of subprocess.CalledProcessError.
+
+    >>> err = ApplicationError(-11, "helloworld", "", "Some error text")
+    >>> err.returncode, err.cmd, err.stdout, err.stderr
+    (-11, 'helloworld', '', 'Some error text')
+    >>> print(err)
+    Non-zero return code -11 from 'helloworld', message 'Some error text'
+
+    """
+
+    def __init__(self, returncode, cmd, stdout="", stderr=""):
+        """Initialize the class."""
+        self.returncode = returncode
+        self.cmd = cmd
+        self.stdout = stdout
+        self.stderr = stderr
+
+    def __str__(self):
+        """Format the error as a string."""
+        # get first line of any stderr message
+        try:
+            msg = self.stderr.lstrip().split("\n", 1)[0].rstrip()
+        except Exception:  # TODO, ValueError? AttributeError?
+            msg = ""
+        if msg:
+            return "Non-zero return code %d from %r, message %r" % (
+                self.returncode,
+                self.cmd,
+                msg,
+            )
+        else:
+            return "Non-zero return code %d from %r" % (self.returncode, self.cmd)
+
+    def __repr__(self):
+        """Represent the error as a string."""
+        return "ApplicationError(%i, %s, %s, %s)" % (
+            self.returncode,
+            self.cmd,
+            self.stdout,
+            self.stderr,
+        )
+
+
+class AbstractCommandline:
+    r"""Generic interface for constructing command line strings (OBSOLETE).
+
+    This class shouldn't be called directly; it should be subclassed to
+    provide an implementation for a specific application.
+
+    For a usage example we'll show one of the EMBOSS wrappers.  You can set
+    options when creating the wrapper object using keyword arguments - or
+    later using their corresponding properties:
+
+    >>> from Bio.Emboss.Applications import WaterCommandline
+    >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
+    >>> cline
+    WaterCommandline(cmd='water', gapopen=10, gapextend=0.5)
+
+    You can instead manipulate the parameters via their properties, e.g.
+
+    >>> cline.gapopen
+    10
+    >>> cline.gapopen = 20
+    >>> cline
+    WaterCommandline(cmd='water', gapopen=20, gapextend=0.5)
+
+    You can clear a parameter you have already added by 'deleting' the
+    corresponding property:
+
+    >>> del cline.gapopen
+    >>> cline.gapopen
+    >>> cline
+    WaterCommandline(cmd='water', gapextend=0.5)
+
+    Once you have set the parameters you need, you can turn the object into
+    a string (e.g. to log the command):
+
+    >>> str(cline)
+    Traceback (most recent call last):
+    ...
+    ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout).
+
+    In this case the wrapper knows certain arguments are required to construct
+    a valid command line for the tool.  For a complete example,
+
+    >>> from Bio.Emboss.Applications import WaterCommandline
+    >>> water_cmd = WaterCommandline(gapopen=10, gapextend=0.5)
+    >>> water_cmd.asequence = "asis:ACCCGGGCGCGGT"
+    >>> water_cmd.bsequence = "asis:ACCCGAGCGCGGT"
+    >>> water_cmd.outfile = "temp_water.txt"
+    >>> print(water_cmd)
+    water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
+    >>> water_cmd
+    WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
+
+    You would typically run the command line via a standard Python operating
+    system call using the subprocess module for full control. For the simple
+    case where you just want to run the command and get the output:
+
+    stdout, stderr = water_cmd()
+
+    Note that by default we assume the underlying tool is installed on the
+    system $PATH environment variable. This is normal under Linux/Unix, but
+    may need to be done manually under Windows. Alternatively, you can specify
+    the full path to the binary as the first argument (cmd):
+
+    >>> from Bio.Emboss.Applications import WaterCommandline
+    >>> water_cmd = WaterCommandline(r"C:\Program Files\EMBOSS\water.exe",
+    ...                              gapopen=10, gapextend=0.5,
+    ...                              asequence="asis:ACCCGGGCGCGGT",
+    ...                              bsequence="asis:ACCCGAGCGCGGT",
+    ...                              outfile="temp_water.txt")
+    >>> print(water_cmd)
+    "C:\Program Files\EMBOSS\water.exe" -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
+
+    Notice that since the path name includes a space it has automatically
+    been quoted.
+
+    """
+
+    # TODO - Replace the above example since EMBOSS doesn't work properly
+    # if installed into a folder with a space like "C:\Program Files\EMBOSS"
+    #
+    # Note the call example above is not a doctest as we can't handle EMBOSS
+    # (or any other tool) being missing in the unit tests.
+
+    parameters = None  # will be a list defined in subclasses
+
+    def __init__(self, cmd, **kwargs):
+        """Create a new instance of a command line wrapper object."""
+        # Init method - should be subclassed!
+        #
+        # The subclass methods should look like this:
+        #
+        # def __init__(self, cmd="muscle", **kwargs):
+        #     self.parameters = [...]
+        #     AbstractCommandline.__init__(self, cmd, **kwargs)
+        #
+        # i.e. There should have an optional argument "cmd" to set the location
+        # of the executable (with a sensible default which should work if the
+        # command is on the path on Unix), and keyword arguments.  It should
+        # then define a list of parameters, all objects derived from the base
+        # class _AbstractParameter.
+        #
+        # The keyword arguments should be any valid parameter name, and will
+        # be used to set the associated parameter.
+        self.program_name = cmd
+        try:
+            parameters = self.parameters
+        except AttributeError:
+            raise AttributeError(
+                "Subclass should have defined self.parameters"
+            ) from None
+        # Create properties for each parameter at run time
+        aliases = set()
+        for p in parameters:
+            if not p.names:
+                if not isinstance(p, _StaticArgument):
+                    raise TypeError("Expected %r to be of type _StaticArgument" % p)
+                continue
+            for name in p.names:
+                if name in aliases:
+                    raise ValueError("Parameter alias %s multiply defined" % name)
+                aliases.add(name)
+            name = p.names[-1]
+            if _re_prop_name.match(name) is None:
+                raise ValueError(
+                    "Final parameter name %r cannot be used as "
+                    "an argument or property name in python" % name
+                )
+            if name in _reserved_names:
+                raise ValueError(
+                    "Final parameter name %r cannot be used as "
+                    "an argument or property name because it is "
+                    "a reserved word in python" % name
+                )
+            if name in _local_reserved_names:
+                raise ValueError(
+                    "Final parameter name %r cannot be used as "
+                    "an argument or property name due to the "
+                    "way the AbstractCommandline class works" % name
+                )
+
+            # Beware of binding-versus-assignment confusion issues
+            def getter(name):
+                return lambda x: x._get_parameter(name)
+
+            def setter(name):
+                return lambda x, value: x.set_parameter(name, value)
+
+            def deleter(name):
+                return lambda x: x._clear_parameter(name)
+
+            doc = p.description
+            if isinstance(p, _Switch):
+                doc += (
+                    "\n\nThis property controls the addition of the %s "
+                    "switch, treat this property as a boolean." % p.names[0]
+                )
+            else:
+                doc += (
+                    "\n\nThis controls the addition of the %s parameter "
+                    "and its associated value.  Set this property to the "
+                    "argument value required." % p.names[0]
+                )
+            prop = property(getter(name), setter(name), deleter(name), doc)
+            setattr(self.__class__, name, prop)  # magic!
+        for key, value in kwargs.items():
+            self.set_parameter(key, value)
+
+    def _validate(self):
+        """Make sure the required parameters have been set (PRIVATE).
+
+        No return value - it either works or raises a ValueError.
+
+        This is a separate method (called from __str__) so that subclasses may
+        override it.
+        """
+        for p in self.parameters:
+            # Check for missing required parameters:
+            if p.is_required and not (p.is_set):
+                raise ValueError("Parameter %s is not set." % p.names[-1])
+            # Also repeat the parameter validation here, just in case?
+
+    def __str__(self):
+        """Make the commandline string with the currently set options.
+
+        e.g.
+
+        >>> from Bio.Emboss.Applications import WaterCommandline
+        >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
+        >>> cline.asequence = "asis:ACCCGGGCGCGGT"
+        >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
+        >>> cline.outfile = "temp_water.txt"
+        >>> print(cline)
+        water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
+        >>> str(cline)
+        'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5'
+        """
+        self._validate()
+        commandline = "%s " % _escape_filename(self.program_name)
+        for parameter in self.parameters:
+            if parameter.is_set:
+                # This will include a trailing space:
+                commandline += str(parameter)
+        return commandline.strip()  # remove trailing space
+
+    def __repr__(self):
+        """Return a representation of the command line object for debugging.
+
+        e.g.
+
+        >>> from Bio.Emboss.Applications import WaterCommandline
+        >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
+        >>> cline.asequence = "asis:ACCCGGGCGCGGT"
+        >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
+        >>> cline.outfile = "temp_water.txt"
+        >>> print(cline)
+        water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
+        >>> cline
+        WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
+        """
+        answer = "%s(cmd=%r" % (self.__class__.__name__, self.program_name)
+        for parameter in self.parameters:
+            if parameter.is_set:
+                if isinstance(parameter, _Switch):
+                    answer += ", %s=True" % parameter.names[-1]
+                else:
+                    answer += ", %s=%r" % (parameter.names[-1], parameter.value)
+        answer += ")"
+        return answer
+
+    def _get_parameter(self, name):
+        """Get a commandline option value (PRIVATE)."""
+        for parameter in self.parameters:
+            if name in parameter.names:
+                if isinstance(parameter, _Switch):
+                    return parameter.is_set
+                else:
+                    return parameter.value
+        raise ValueError("Option name %s was not found." % name)
+
+    def _clear_parameter(self, name):
+        """Reset or clear a commandline option value (PRIVATE)."""
+        cleared_option = False
+        for parameter in self.parameters:
+            if name in parameter.names:
+                parameter.value = None
+                parameter.is_set = False
+                cleared_option = True
+        if not cleared_option:
+            raise ValueError("Option name %s was not found." % name)
+
+    def set_parameter(self, name, value=None):
+        """Set a commandline option for a program (OBSOLETE).
+
+        Every parameter is available via a property and as a named
+        keyword when creating the instance. Using either of these is
+        preferred to this legacy set_parameter method which is now
+        OBSOLETE, and likely to be DEPRECATED and later REMOVED in
+        future releases.
+        """
+        set_option = False
+        for parameter in self.parameters:
+            if name in parameter.names:
+                if isinstance(parameter, _Switch):
+                    if value is None:
+                        import warnings
+
+                        warnings.warn(
+                            "For a switch type argument like %s, "
+                            "we expect a boolean.  None is treated "
+                            "as FALSE!" % parameter.names[-1]
+                        )
+                    parameter.is_set = bool(value)
+                    set_option = True
+                else:
+                    if value is not None:
+                        self._check_value(value, name, parameter.checker_function)
+                        parameter.value = value
+                    parameter.is_set = True
+                    set_option = True
+        if not set_option:
+            raise ValueError("Option name %s was not found." % name)
+
+    def _check_value(self, value, name, check_function):
+        """Check whether the given value is valid (PRIVATE).
+
+        No return value - it either works or raises a ValueError.
+
+        This uses the passed function 'check_function', which can either
+        return a [0, 1] (bad, good) value or raise an error. Either way
+        this function will raise an error if the value is not valid, or
+        finish silently otherwise.
+        """
+        if check_function is not None:
+            is_good = check_function(value)  # May raise an exception
+            if is_good not in [0, 1, True, False]:
+                raise ValueError(
+                    "Result of check_function: %r is of an unexpected value" % is_good
+                )
+            if not is_good:
+                raise ValueError(
+                    "Invalid parameter value %r for parameter %s" % (value, name)
+                )
+
+    def __setattr__(self, name, value):
+        """Set attribute name to value (PRIVATE).
+
+        This code implements a workaround for a user interface issue.
+        Without this __setattr__ attribute-based assignment of parameters
+        will silently accept invalid parameters, leading to known instances
+        of the user assuming that parameters for the application are set,
+        when they are not.
+
+        >>> from Bio.Emboss.Applications import WaterCommandline
+        >>> cline = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True)
+        >>> cline.asequence = "a.fasta"
+        >>> cline.bsequence = "b.fasta"
+        >>> cline.csequence = "c.fasta"
+        Traceback (most recent call last):
+        ...
+        ValueError: Option name csequence was not found.
+        >>> print(cline)
+        water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5
+
+        This workaround uses a whitelist of object attributes, and sets the
+        object attribute list as normal, for these.  Other attributes are
+        assumed to be parameters, and passed to the self.set_parameter method
+        for validation and assignment.
+        """
+        if name in ["parameters", "program_name"]:  # Allowed attributes
+            self.__dict__[name] = value
+        else:
+            self.set_parameter(name, value)  # treat as a parameter
+
+    def __call__(self, stdin=None, stdout=True, stderr=True, cwd=None, env=None):
+        """Execute command, wait for it to finish, return (stdout, stderr).
+
+        Runs the command line tool and waits for it to finish. If it returns
+        a non-zero error level, an exception is raised. Otherwise two strings
+        are returned containing stdout and stderr.
+
+        The optional stdin argument should be a string of data which will be
+        passed to the tool as standard input.
+
+        The optional stdout and stderr argument may be filenames (string),
+        but otherwise are treated as a booleans, and control if the output
+        should be captured as strings (True, default), or ignored by sending
+        it to /dev/null to avoid wasting memory (False). If sent to a file
+        or ignored, then empty string(s) are returned.
+
+        The optional cwd argument is a string giving the working directory
+        to run the command from. See Python's subprocess module documentation
+        for more details.
+
+        The optional env argument is a dictionary setting the environment
+        variables to be used in the new process. By default the current
+        process' environment variables are used. See Python's subprocess
+        module documentation for more details.
+
+        Default example usage::
+
+            from Bio.Emboss.Applications import WaterCommandline
+            water_cmd = WaterCommandline(gapopen=10, gapextend=0.5,
+                                         stdout=True, auto=True,
+                                         asequence="a.fasta", bsequence="b.fasta")
+            print("About to run: %s" % water_cmd)
+            std_output, err_output = water_cmd()
+
+        This functionality is similar to subprocess.check_output(). In general
+        if you require more control over running the command, use subprocess
+        directly.
+
+        When the program called returns a non-zero error level, a custom
+        ApplicationError exception is raised. This includes any stdout and
+        stderr strings captured as attributes of the exception object, since
+        they may be useful for diagnosing what went wrong.
+        """
+        if not stdout:
+            stdout_arg = open(os.devnull, "w")
+        elif isinstance(stdout, str):
+            stdout_arg = open(stdout, "w")
+        else:
+            stdout_arg = subprocess.PIPE
+
+        if not stderr:
+            stderr_arg = open(os.devnull, "w")
+        elif isinstance(stderr, str):
+            if stdout == stderr:
+                stderr_arg = stdout_arg  # Write both to the same file
+            else:
+                stderr_arg = open(stderr, "w")
+        else:
+            stderr_arg = subprocess.PIPE
+
+        # We may not need to supply any piped input, but we setup the
+        # standard input pipe anyway as a work around for a python
+        # bug if this is called from a Windows GUI program.  For
+        # details, see http://bugs.python.org/issue1124861
+        #
+        # Using universal newlines is important on Python 3, this
+        # gives unicode handles rather than bytes handles.
+
+        # Windows 7, 8, 8.1 and 10 want shell = True
+        if sys.platform != "win32":
+            use_shell = True
+        else:
+            win_ver = platform.win32_ver()[0]
+            if win_ver in ["7", "8", "post2012Server", "10"]:
+                use_shell = True
+            else:
+                use_shell = False
+        child_process = subprocess.Popen(
+            str(self),
+            stdin=subprocess.PIPE,
+            stdout=stdout_arg,
+            stderr=stderr_arg,
+            universal_newlines=True,
+            cwd=cwd,
+            env=env,
+            shell=use_shell,
+        )
+        # Use .communicate as can get deadlocks with .wait(), see Bug 2804
+        stdout_str, stderr_str = child_process.communicate(stdin)
+        if not stdout:
+            assert not stdout_str, stdout_str
+        if not stderr:
+            assert not stderr_str, stderr_str
+        return_code = child_process.returncode
+
+        # Particularly important to close handles on Jython and PyPy
+        # (where garbage collection is less predictable) and on Windows
+        # (where cannot delete files with an open handle):
+        if not stdout or isinstance(stdout, str):
+            # We opened /dev/null or a file
+            stdout_arg.close()
+        if not stderr or (isinstance(stderr, str) and stdout != stderr):
+            # We opened /dev/null or a file
+            stderr_arg.close()
+
+        if return_code:
+            raise ApplicationError(return_code, str(self), stdout_str, stderr_str)
+        return stdout_str, stderr_str
+
+
+class _AbstractParameter:
+    """A class to hold information about a parameter for a commandline.
+
+    Do not use this directly, instead use one of the subclasses.
+    """
+
+    def __init__(self):
+        raise NotImplementedError
+
+    def __str__(self):
+        raise NotImplementedError
+
+
+class _Option(_AbstractParameter):
+    """Represent an option that can be set for a program.
+
+    This holds UNIXish options like --append=yes and -a yes,
+    where a value (here "yes") is generally expected.
+
+    For UNIXish options like -kimura in clustalw which don't
+    take a value, use the _Switch object instead.
+
+    Attributes:
+     - names -- a list of string names (typically two entries) by which
+       the parameter can be set via the legacy set_parameter method
+       (eg ["-a", "--append", "append"]). The first name in list is used
+       when building the command line. The last name in the list is a
+       "human readable" name describing the option in one word. This
+       must be a valid Python identifier as it is used as the property
+       name and as a keyword argument, and should therefore follow PEP8
+       naming.
+     - description -- a description of the option. This is used as
+       the property docstring.
+     - filename -- True if this argument is a filename (or other argument
+       that should be quoted) and should be automatically quoted if it
+       contains spaces.
+     - checker_function -- a reference to a function that will determine
+       if a given value is valid for this parameter. This function can either
+       raise an error when given a bad value, or return a [0, 1] decision on
+       whether the value is correct.
+     - equate -- should an equals sign be inserted if a value is used?
+     - is_required -- a flag to indicate if the parameter must be set for
+       the program to be run.
+     - is_set -- if the parameter has been set
+     - value -- the value of a parameter
+
+    """
+
+    def __init__(
+        self,
+        names,
+        description,
+        filename=False,
+        checker_function=None,
+        is_required=False,
+        equate=True,
+    ):
+        self.names = names
+        if not isinstance(description, str):
+            raise TypeError("Should be a string: %r for %s" % (description, names[-1]))
+        # Note 'filename' is for any string with spaces that needs quoting
+        self.is_filename = filename
+        self.checker_function = checker_function
+        self.description = description
+        self.equate = equate
+        self.is_required = is_required
+
+        self.is_set = False
+        self.value = None
+
+    def __str__(self):
+        """Return the value of this option for the commandline.
+
+        Includes a trailing space.
+        """
+        # Note: Before equate was handled explicitly, the old
+        # code would do either "--name " or "--name=value ",
+        # or " -name " or " -name value ".  This choice is now
+        # now made explicitly when setting up the option.
+        if self.value is None:
+            return "%s " % self.names[0]
+        if self.is_filename:
+            v = _escape_filename(self.value)
+        else:
+            v = str(self.value)
+        if self.equate:
+            return "%s=%s " % (self.names[0], v)
+        else:
+            return "%s %s " % (self.names[0], v)
+
+
+class _Switch(_AbstractParameter):
+    """Represent an optional argument switch for a program.
+
+    This holds UNIXish options like -kimura in clustalw which don't
+    take a value, they are either included in the command string
+    or omitted.
+
+    Attributes:
+     - names -- a list of string names (typically two entries) by which
+       the parameter can be set via the legacy set_parameter method
+       (eg ["-a", "--append", "append"]). The first name in list is used
+       when building the command line. The last name in the list is a
+       "human readable" name describing the option in one word. This
+       must be a valid Python identifier as it is used as the property
+       name and as a keyword argument, and should therefore follow PEP8
+       naming.
+     - description -- a description of the option. This is used as
+       the property docstring.
+     - is_set -- if the parameter has been set
+
+    NOTE - There is no value attribute, see is_set instead,
+
+    """
+
+    def __init__(self, names, description):
+        self.names = names
+        self.description = description
+        self.is_set = False
+        self.is_required = False
+
+    def __str__(self):
+        """Return the value of this option for the commandline.
+
+        Includes a trailing space.
+        """
+        assert not hasattr(self, "value")
+        if self.is_set:
+            return "%s " % self.names[0]
+        else:
+            return ""
+
+
+class _Argument(_AbstractParameter):
+    """Represent an argument on a commandline.
+
+    The names argument should be a list containing one string.
+    This must be a valid Python identifier as it is used as the
+    property name and as a keyword argument, and should therefore
+    follow PEP8 naming.
+    """
+
+    def __init__(
+        self,
+        names,
+        description,
+        filename=False,
+        checker_function=None,
+        is_required=False,
+    ):
+        # if len(names) != 1:
+        #    raise ValueError("The names argument to _Argument should be a "
+        #                     "single entry list with a PEP8 property name.")
+        self.names = names
+        if not isinstance(description, str):
+            raise TypeError("Should be a string: %r for %s" % (description, names[-1]))
+        # Note 'filename' is for any string with spaces that needs quoting
+        self.is_filename = filename
+        self.checker_function = checker_function
+        self.description = description
+        self.is_required = is_required
+        self.is_set = False
+        self.value = None
+
+    def __str__(self):
+        if self.value is None:
+            return " "
+        elif self.is_filename:
+            return "%s " % _escape_filename(self.value)
+        else:
+            return "%s " % self.value
+
+
+class _ArgumentList(_Argument):
+    """Represent a variable list of arguments on a command line, e.g. multiple filenames."""
+
+    # TODO - Option to require at least one value? e.g. min/max count?
+
+    def __str__(self):
+        if not isinstance(self.value, list):
+            raise TypeError("Arguments should be a list")
+        if not self.value:
+            raise ValueError("Requires at least one filename")
+        # A trailing space is required so that parameters following the last filename
+        # do not appear merged.
+        # e.g.:  samtools cat in1.bam in2.bam-o out.sam  [without trailing space][Incorrect]
+        #        samtools cat in1.bam in2.bam -o out.sam  [with trailing space][Correct]
+        if self.is_filename:
+            return " ".join(_escape_filename(v) for v in self.value) + " "
+        else:
+            return " ".join(self.value) + " "
+
+
+class _StaticArgument(_AbstractParameter):
+    """Represent a static (read only) argument on a commandline.
+
+    This is not intended to be exposed as a named argument or
+    property of a command line wrapper object.
+    """
+
+    def __init__(self, value):
+        self.names = []
+        self.is_required = False
+        self.is_set = True
+        self.value = value
+
+    def __str__(self):
+        return "%s " % self.value
+
+
+def _escape_filename(filename):
+    """Escape filenames with spaces by adding quotes (PRIVATE).
+
+    Note this will not add quotes if they are already included:
+
+    >>> print((_escape_filename('example with spaces')))
+    "example with spaces"
+    >>> print((_escape_filename('"example with spaces"')))
+    "example with spaces"
+    >>> print((_escape_filename(1)))
+    1
+
+    Note the function is more generic than the name suggests, since it
+    is used to add quotes around any string arguments containing spaces.
+    """
+    # Is adding the following helpful
+    # if os.path.isfile(filename):
+    #    # On Windows, if the file exists, we can ask for
+    #    # its alternative short name (DOS style 8.3 format)
+    #    # which has no spaces in it.  Note that this name
+    #    # is not portable between machines, or even folder!
+    #    try:
+    #        import win32api
+    #        short = win32api.GetShortPathName(filename)
+    #        assert os.path.isfile(short)
+    #        return short
+    #    except ImportError:
+    #        pass
+    if not isinstance(filename, str):
+        # for example the NCBI BLAST+ -outfmt argument can be an integer
+        return filename
+    if " " not in filename:
+        return filename
+    # We'll just quote it - works on Windows, Mac OS X etc
+    if filename.startswith('"') and filename.endswith('"'):
+        # Its already quoted
+        return filename
+    else:
+        return '"%s"' % filename
+
+
+def _test():
+    """Run the Bio.Application module's doctests (PRIVATE)."""
+    import doctest
+
+    doctest.testmod(verbose=1)
+
+
+if __name__ == "__main__":
+    # Run the doctests
+    _test()
diff --git a/code/lib/Bio/Application/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Application/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..ee141eb
Binary files /dev/null and b/code/lib/Bio/Application/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Blast/Applications.py b/code/lib/Bio/Blast/Applications.py
new file mode 100644
index 0000000..954a254
--- /dev/null
+++ b/code/lib/Bio/Blast/Applications.py
@@ -0,0 +1,1602 @@
+# Copyright 2001 Brad Chapman.
+# Revisions copyright 2009-2010 by Peter Cock.
+# Revisions copyright 2010 by Phillip Garland.
+# All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Definitions for interacting with BLAST related applications (OBSOLETE).
+
+Wrappers for the new NCBI BLAST+ tools (written in C++):
+
+ - NcbiblastpCommandline - Protein-Protein BLAST
+ - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST
+ - NcbiblastxCommandline - Translated Query-Protein Subject BLAST
+ - NcbitblastnCommandline - Protein Query-Translated Subject BLAST
+ - NcbitblastxCommandline - Translated Query-Protein Subject BLAST
+ - NcbipsiblastCommandline - Position-Specific Initiated BLAST
+ - NcbirpsblastCommandline - Reverse Position Specific BLAST
+ - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST
+ - NcbideltablastCommandline - Protein-Protein domain enhanced lookup time accelerated blast
+ - NcbiblastformatterCommandline - Convert ASN.1 to other BLAST output formats
+ - NcbimakeblastdbCommandline - Application to create BLAST databases
+
+For further details, see:
+
+Camacho et al. BLAST+: architecture and applications
+BMC Bioinformatics 2009, 10:421
+https://doi.org/10.1186/1471-2105-10-421
+
+We have decided to remove this module in future, and instead recommend
+building your command and invoking it via the subprocess module directly.
+"""
+
+from Bio.Application import _Option, AbstractCommandline, _Switch
+
+
+class _NcbibaseblastCommandline(AbstractCommandline):
+    """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
+
+    This is provided for subclassing, it deals with shared options
+    common to all the BLAST tools (blastn, rpsblast, rpsblast, etc
+    AND blast_formatter).
+    """
+
+    def __init__(self, cmd=None, **kwargs):
+        assert cmd is not None
+        extra_parameters = [
+            # Core:
+            _Switch(
+                ["-h", "h"], "Print USAGE and DESCRIPTION;  ignore other arguments."
+            ),
+            _Switch(
+                ["-help", "help"],
+                "Print USAGE, DESCRIPTION and ARGUMENTS description; "
+                "ignore other arguments.",
+            ),
+            _Switch(
+                ["-version", "version"],
+                "Print version number;  ignore other arguments.",
+            ),
+            # Output configuration options
+            _Option(
+                ["-out", "out"],
+                "Output file for alignment.",
+                filename=True,
+                equate=False,
+            ),
+            # Formatting options:
+            _Option(
+                ["-outfmt", "outfmt"],
+                "Alignment view.  Typically an integer 0-14 but for some "
+                "formats can be named columns like '6 qseqid sseqid'.  "
+                "Use 5 for XML output (differs from classic BLAST which "
+                "used 7 for XML).",
+                filename=True,  # to ensure spaced inputs are quoted
+                equate=False,
+            ),
+            # TODO - Document and test the column options
+            _Switch(["-show_gis", "show_gis"], "Show NCBI GIs in deflines?"),
+            _Option(
+                ["-num_descriptions", "num_descriptions"],
+                "Number of database sequences to show one-line descriptions for.\n\n"
+                "Integer argument (at least zero). Default is 500. "
+                "See also num_alignments.",
+                equate=False,
+            ),
+            _Option(
+                ["-num_alignments", "num_alignments"],
+                "Number of database sequences to show num_alignments for.\n\n"
+                "Integer argument (at least zero). Default is 200. "
+                "See also num_alignments.",
+                equate=False,
+            ),
+            _Option(
+                ["-line_length", "line_length"],
+                "Line length for formatting alignments "
+                "(integer, at least 1, default 60).\n\n"
+                "Not applicable for outfmt > 4. Added in BLAST+ 2.2.30.",
+                equate=False,
+            ),
+            _Switch(
+                ["-html", "html"], "Produce HTML output? See also the outfmt option."
+            ),
+            # Miscellaneous options
+            _Switch(
+                ["-parse_deflines", "parse_deflines"],
+                "Should the query and subject defline(s) be parsed?",
+            ),
+        ]
+        try:
+            # Insert extra parameters - at the start just in case there
+            # are any arguments which must come last:
+            self.parameters = extra_parameters + self.parameters
+        except AttributeError:
+            # Should we raise an error?  The subclass should have set this up!
+            self.parameters = extra_parameters
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+    def _validate_incompatibilities(self, incompatibles):
+        """Validate parameters for incompatibilities (PRIVATE).
+
+        Used by the _validate method.
+        """
+        for a in incompatibles:
+            if self._get_parameter(a):
+                for b in incompatibles[a]:
+                    if self._get_parameter(b):
+                        raise ValueError("Options %s and %s are incompatible." % (a, b))
+
+
+class _NcbiblastCommandline(_NcbibaseblastCommandline):
+    """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
+
+    This is provided for subclassing, it deals with shared options
+    common to all the BLAST tools (blastn, rpsblast, rpsblast, etc).
+    """
+
+    def __init__(self, cmd=None, **kwargs):
+        assert cmd is not None
+        extra_parameters = [
+            # Input query options:
+            _Option(
+                ["-query", "query"],
+                "The sequence to search with.",
+                filename=True,
+                equate=False,
+            ),  # Should this be required?
+            _Option(
+                ["-query_loc", "query_loc"],
+                "Location on the query sequence (Format: start-stop).",
+                equate=False,
+            ),
+            # General search options:
+            _Option(["-db", "db"], "The database to BLAST against.", equate=False),
+            _Option(["-evalue", "evalue"], "Expectation value cutoff.", equate=False),
+            _Option(
+                ["-word_size", "word_size"],
+                "Word size for wordfinder algorithm.\n\nInteger. Minimum 2.",
+                equate=False,
+            ),
+            # BLAST-2-Sequences options:
+            # - see subclass
+            # Formatting options:
+            # - see baseclass
+            # Query filtering options
+            _Option(
+                ["-soft_masking", "soft_masking"],
+                "Apply filtering locations as soft masks (Boolean, Default = true).",
+                equate=False,
+            ),
+            _Switch(
+                ["-lcase_masking", "lcase_masking"],
+                "Use lower case filtering in query and subject sequence(s)?",
+            ),
+            # Restrict search or results
+            _Option(
+                ["-gilist", "gilist"],
+                "Restrict search of database to list of GI's.\n\n"
+                "Incompatible with: negative_gilist, seqidlist, negative_seqidlist, "
+                "remote, subject, subject_loc",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-negative_gilist", "negative_gilist"],
+                "Restrict search of database to everything except the listed GIs.\n\n"
+                "Incompatible with: gilist, seqidlist, remote, subject, subject_loc",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-seqidlist", "seqidlist"],
+                "Restrict search of database to list of SeqID's.\n\n"
+                "Incompatible with: gilist, negative_gilist, remote, subject, "
+                "subject_loc",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-negative_seqidlist", "negative_seqidlist"],
+                "Restrict search of database to everything except listed SeqID's.\n\n"
+                "Incompatible with: gilist, seqidlist, remote, subject, subject_loc",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-entrez_query", "entrez_query"],
+                "Restrict search with the given Entrez query (requires remote).",
+                equate=False,
+            ),
+            _Option(
+                ["-qcov_hsp_perc", "qcov_hsp_perc"],
+                "Percent query coverage per hsp (float, 0 to 100).\n\n"
+                "Added in BLAST+ 2.2.30.",
+                equate=False,
+            ),
+            _Option(
+                ["-max_target_seqs", "max_target_seqs"],
+                "Maximum number of aligned sequences to keep (integer, at least one).",
+                equate=False,
+            ),
+            # Statistical options
+            _Option(
+                ["-dbsize", "dbsize"],
+                "Effective length of the database (integer).",
+                equate=False,
+            ),
+            _Option(
+                ["-searchsp", "searchsp"],
+                "Effective length of the search space (integer).",
+                equate=False,
+            ),
+            _Option(
+                ["-max_hsps_per_subject", "max_hsps_per_subject"],
+                "Override max number of HSPs per subject saved for ungapped searches "
+                "(integer).",
+                equate=False,
+            ),
+            _Option(
+                ["-max_hsps", "max_hsps"],
+                "Set max number of HSPs saved per subject sequence\n\n"
+                "Ddefault 0 means no limit.",
+                equate=False,
+            ),
+            _Switch(["-sum_statistics", "sum_statistics"], "Use sum statistics."),
+            # Is -sum_stats a BLAST+ bug, why not use -sum_statistics switch?
+            _Option(
+                ["-sum_stats", "sum_stats"],
+                "Use sum statistics (boolean).\n\nAdded in BLAST+ 2.2.30.",
+                equate=False,
+            ),
+            # Extension options
+            _Option(
+                ["-xdrop_ungap", "xdrop_ungap"],
+                "X-dropoff value (in bits) for ungapped extensions (float).",
+                equate=False,
+            ),
+            _Option(
+                ["-xdrop_gap", "xdrop_gap"],
+                "X-dropoff value (in bits) for preliminary gapped extensions (float).",
+                equate=False,
+            ),
+            _Option(
+                ["-xdrop_gap_final", "xdrop_gap_final"],
+                "X-dropoff value (in bits) for final gapped alignment (float).",
+                equate=False,
+            ),
+            _Option(
+                ["-window_size", "window_size"],
+                "Multiple hits window size, use 0 to specify 1-hit algorithm "
+                "(integer).",
+                equate=False,
+            ),
+            # Search strategy options
+            _Option(
+                ["-import_search_strategy", "import_search_strategy"],
+                "Search strategy to use.\n\n"
+                "Incompatible with: export_search_strategy",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-export_search_strategy", "export_search_strategy"],
+                "File name to record the search strategy used.\n\n"
+                "Incompatible with: import_search_strategy",
+                filename=True,
+                equate=False,
+            ),
+            # Miscellaneous options
+            _Option(
+                ["-num_threads", "num_threads"],
+                "Number of threads to use in the BLAST search.\n\n"
+                "Integer, at least one. Default is one. Incompatible with: remote",
+                equate=False,
+            ),
+            _Switch(
+                ["-remote", "remote"],
+                "Execute search remotely?\n\n"
+                "Incompatible with: gilist, negative_gilist, subject_loc, "
+                "num_threads, ...",
+            ),
+        ]
+        try:
+            # Insert extra parameters - at the start just in case there
+            # are any arguments which must come last:
+            self.parameters = extra_parameters + self.parameters
+        except AttributeError:
+            # Should we raise an error?  The subclass should have set this up!
+            self.parameters = extra_parameters
+        _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
+
+    def _validate(self):
+        incompatibles = {
+            "remote": ["gilist", "negative_gilist", "num_threads"],
+            "import_search_strategy": ["export_search_strategy"],
+            "gilist": ["negative_gilist"],
+            "seqidlist": ["gilist", "negative_gilist", "remote"],
+        }
+        self._validate_incompatibilities(incompatibles)
+        if self.entrez_query and not self.remote:
+            raise ValueError("Option entrez_query requires remote option.")
+        AbstractCommandline._validate(self)
+
+
+class _Ncbiblast2SeqCommandline(_NcbiblastCommandline):
+    """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
+
+    This is provided for subclassing, it deals with shared options
+    common to all the BLAST tools supporting two-sequence BLAST
+    (blastn, psiblast, etc) but not rpsblast or rpstblastn.
+    """
+
+    def __init__(self, cmd=None, **kwargs):
+        assert cmd is not None
+        extra_parameters = [
+            # General search options:
+            _Option(
+                ["-gapopen", "gapopen"], "Cost to open a gap (integer).", equate=False
+            ),
+            _Option(
+                ["-gapextend", "gapextend"],
+                "Cost to extend a gap (integer).",
+                equate=False,
+            ),
+            # BLAST-2-Sequences options:
+            _Option(
+                ["-subject", "subject"],
+                "Subject sequence(s) to search.\n\n"
+                "Incompatible with: db, gilist, seqidlist, negative_gilist, "
+                "negative_seqidlist, db_soft_mask, db_hard_mask\n\n"
+                "See also subject_loc.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-subject_loc", "subject_loc"],
+                "Location on the subject sequence (Format: start-stop).\n\n"
+                "Incompatible with: db, gilist, seqidlist, negative_gilist, "
+                "negative_seqidlist, db_soft_mask, db_hard_mask, remote.\n\n"
+                "See also subject.",
+                equate=False,
+            ),
+            # Restrict search or results:
+            _Option(
+                ["-culling_limit", "culling_limit"],
+                "Hit culling limit (integer).\n\n"
+                "If the query range of a hit is enveloped by that of at "
+                "least this many higher-scoring hits, delete the hit.\n\n"
+                "Incompatible with: best_hit_overhang, best_hit_score_edge.",
+                equate=False,
+            ),
+            _Option(
+                ["-best_hit_overhang", "best_hit_overhang"],
+                "Best Hit algorithm overhang value (float, recommended value: 0.1)\n\n"
+                "Float between 0.0 and 0.5 inclusive. "
+                "Incompatible with: culling_limit.",
+                equate=False,
+            ),
+            _Option(
+                ["-best_hit_score_edge", "best_hit_score_edge"],
+                "Best Hit algorithm score edge value (float).\n\n"
+                "Float between 0.0 and 0.5 inclusive. Recommended value: 0.1\n\n"
+                "Incompatible with: culling_limit.",
+                equate=False,
+            ),
+        ]
+        try:
+            # Insert extra parameters - at the start just in case there
+            # are any arguments which must come last:
+            self.parameters = extra_parameters + self.parameters
+        except AttributeError:
+            # Should we raise an error?  The subclass should have set this up!
+            self.parameters = extra_parameters
+        _NcbiblastCommandline.__init__(self, cmd, **kwargs)
+
+    def _validate(self):
+        incompatibles = {
+            "subject_loc": ["db", "gilist", "negative_gilist", "seqidlist", "remote"],
+            "culling_limit": ["best_hit_overhang", "best_hit_score_edge"],
+            "subject": ["db", "gilist", "negative_gilist", "seqidlist"],
+        }
+        self._validate_incompatibilities(incompatibles)
+        _NcbiblastCommandline._validate(self)
+
+
+class _NcbiblastMain2SeqCommandline(_Ncbiblast2SeqCommandline):
+    """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
+
+    This is provided for subclassing, it deals with shared options
+    common to the main BLAST tools blastp, blastn, blastx, tblastx, tblastn
+    but not psiblast, rpsblast or rpstblastn.
+    """
+
+    def __init__(self, cmd=None, **kwargs):
+        assert cmd is not None
+        extra_parameters = [
+            # Restrict search or results:
+            _Option(
+                ["-db_soft_mask", "db_soft_mask"],
+                "Filtering algorithm for soft masking (integer).\n\n"
+                "Filtering algorithm ID to apply to BLAST database as soft masking. "
+                "Incompatible with: db_hard_mask, subject, subject_loc",
+                equate=False,
+            ),
+            _Option(
+                ["-db_hard_mask", "db_hard_mask"],
+                "Filtering algorithm for hard masking (integer).\n\n"
+                "Filtering algorithm ID to apply to BLAST database as hard masking. "
+                "Incompatible with: db_soft_mask, subject, subject_loc",
+                equate=False,
+            ),
+        ]
+        try:
+            # Insert extra parameters - at the start just in case there
+            # are any arguments which must come last:
+            self.parameters = extra_parameters + self.parameters
+        except AttributeError:
+            # Should we raise an error?  The subclass should have set this up!
+            self.parameters = extra_parameters
+        _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
+
+    def _validate(self):
+        incompatibles = {
+            "db_soft_mask": ["db_hard_mask", "subject", "subject_loc"],
+            "db_hard_mask": ["db_soft_mask", "subject", "subject_loc"],
+        }
+        self._validate_incompatibilities(incompatibles)
+        _Ncbiblast2SeqCommandline._validate(self)
+
+
+class NcbiblastpCommandline(_NcbiblastMain2SeqCommandline):
+    """Create a commandline for the NCBI BLAST+ program blastp (for proteins).
+
+    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
+    replaced the old blastall tool with separate tools for each of the searches.
+    This wrapper therefore replaces BlastallCommandline with option -p blastp.
+
+    >>> from Bio.Blast.Applications import NcbiblastpCommandline
+    >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr",
+    ...                               evalue=0.001, remote=True, ungapped=True)
+    >>> cline
+    NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True)
+    >>> print(cline)
+    blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped
+
+    You would typically run the command line with cline() or via the Python
+    subprocess module, as described in the Biopython tutorial.
+    """
+
+    def __init__(self, cmd="blastp", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            # General search options:
+            _Option(
+                ["-task", "task"],
+                "Task to execute (string, blastp (default), blastp-fast or blastp-short).",
+                checker_function=lambda value: value
+                in ["blastp", "blastp-fast", "blastp-short"],
+                equate=False,
+            ),
+            _Option(["-matrix", "matrix"], "Scoring matrix name (default BLOSUM62)."),
+            _Option(
+                ["-threshold", "threshold"],
+                "Minimum score for words to be added to the BLAST lookup table (float).",
+                equate=False,
+            ),
+            _Option(
+                ["-comp_based_stats", "comp_based_stats"],
+                "Use composition-based statistics (string, default 2, i.e. True).\n\n"
+                "0, F or f: no composition-based statistics\n\n"
+                "2, T or t, D or d : Composition-based score adjustment as in "
+                "Bioinformatics 21:902-911, 2005, conditioned on sequence "
+                "properties\n\n"
+                "Note that tblastn also supports values of 1 and 3.",
+                checker_function=lambda value: value in "0Ft2TtDd",
+                equate=False,
+            ),
+            # Query filtering options:
+            _Option(
+                ["-seg", "seg"],
+                "Filter query sequence with SEG (string).\n\n"
+                'Format: "yes", "window locut hicut", or "no" to disable\n'
+                'Default is "12 2.2 2.5"',
+                equate=False,
+            ),
+            # Extension options:
+            _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"),
+            # Miscellaneous options:
+            _Switch(
+                ["-use_sw_tback", "use_sw_tback"],
+                "Compute locally optimal Smith-Waterman alignments?",
+            ),
+        ]
+        _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
+
+
+class NcbiblastnCommandline(_NcbiblastMain2SeqCommandline):
+    """Wrapper for the NCBI BLAST+ program blastn (for nucleotides).
+
+    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
+    replaced the old blastall tool with separate tools for each of the searches.
+    This wrapper therefore replaces BlastallCommandline with option -p blastn.
+
+    For example, to run a search against the "nt" nucleotide database using the
+    FASTA nucleotide file "m_code.fasta" as the query, with an expectation value
+    cut off of 0.001, saving the output to a file in XML format:
+
+    >>> from Bio.Blast.Applications import NcbiblastnCommandline
+    >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus",
+    ...                               evalue=0.001, out="m_cold.xml", outfmt=5)
+    >>> cline
+    NcbiblastnCommandline(cmd='blastn', out='m_cold.xml', outfmt=5, query='m_cold.fasta', db='nt', evalue=0.001, strand='plus')
+    >>> print(cline)
+    blastn -out m_cold.xml -outfmt 5 -query m_cold.fasta -db nt -evalue 0.001 -strand plus
+
+    You would typically run the command line with cline() or via the Python
+    subprocess module, as described in the Biopython tutorial.
+    """
+
+    def __init__(self, cmd="blastn", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            # Input query options:
+            _Option(
+                ["-strand", "strand"],
+                "Query strand(s) to search against database/subject.\n\n"
+                'Values allowed are "both" (default), "minus", "plus".',
+                checker_function=lambda value: value in ["both", "minus", "plus"],
+                equate=False,
+            ),
+            # General search options:
+            _Option(
+                ["-task", "task"],
+                "Task to execute (string, default 'megablast')\n\n"
+                "Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast' "
+                "(the default), or 'vecscreen'.",
+                checker_function=lambda value: value
+                in ["blastn", "blastn-short", "dc-megablast", "megablast", "vecscreen"],
+                equate=False,
+            ),
+            _Option(
+                ["-penalty", "penalty"],
+                "Penalty for a nucleotide mismatch (integer, at most zero).",
+                equate=False,
+            ),
+            _Option(
+                ["-reward", "reward"],
+                "Reward for a nucleotide match (integer, at least zero).",
+                equate=False,
+            ),
+            _Option(
+                ["-use_index", "use_index"],
+                "Use MegaBLAST database index (Boolean, Default = False)",
+                equate=False,
+            ),
+            _Option(
+                ["-index_name", "index_name"],
+                "MegaBLAST database index name.",
+                equate=False,
+            ),
+            # Query filtering options:
+            _Option(
+                ["-dust", "dust"],
+                "Filter query sequence with DUST (string).\n\n"
+                "Format: 'yes', 'level window linker', or 'no' to disable.\n\n"
+                "Default = '20 64 1'.",
+                equate=False,
+            ),
+            _Option(
+                ["-filtering_db", "filtering_db"],
+                "BLAST database containing filtering elements (i.e. repeats).",
+                equate=False,
+            ),
+            _Option(
+                ["-window_masker_taxid", "window_masker_taxid"],
+                "Enable WindowMasker filtering using a Taxonomic ID (integer).",
+                equate=False,
+            ),
+            _Option(
+                ["-window_masker_db", "window_masker_db"],
+                "Enable WindowMasker filtering using this repeats database (string).",
+                equate=False,
+            ),
+            # Restrict search or results:
+            _Option(
+                ["-perc_identity", "perc_identity"],
+                "Percent identity (real, 0 to 100 inclusive).",
+                equate=False,
+            ),
+            # Discontiguous MegaBLAST options
+            _Option(
+                ["-template_type", "template_type"],
+                "Discontiguous MegaBLAST template type (string).\n\n"
+                "Allowed values: 'coding', 'coding_and_optimal' or 'optimal'.\n"
+                "Requires: template_length.",
+                checker_function=lambda value: value
+                in ["coding", "coding_and_optimal", "optimal"],
+                equate=False,
+            ),
+            _Option(
+                ["-template_length", "template_length"],
+                "Discontiguous MegaBLAST template length (integer).\n\n"
+                "Allowed values: 16, 18, 21.\n\n"
+                "Requires: template_type.",
+                checker_function=lambda value: value in [16, 18, 21, "16", "18", "21"],
+                equate=False,
+            ),
+            # Extension options:
+            _Switch(
+                ["-no_greedy", "no_greedy"],
+                "Use non-greedy dynamic programming extension",
+            ),
+            _Option(
+                ["-min_raw_gapped_score", "min_raw_gapped_score"],
+                "Minimum raw gapped score to keep an alignment in the "
+                "preliminary gapped and traceback stages (integer).",
+                equate=False,
+            ),
+            _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"),
+            _Option(
+                ["-off_diagonal_range", "off_diagonal_range"],
+                "Number of off-diagonals to search for the 2nd hit (integer).\n\n"
+                "Expects a positive integer, or 0 (default) to turn off."
+                "Added in BLAST 2.2.23+",
+                equate=False,
+            ),
+        ]
+        _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
+
+    def _validate(self):
+        if (self.template_type and not self.template_length) or (
+            self.template_length and not self.template_type
+        ):
+            raise ValueError(
+                "Options template_type and template_type require each other."
+            )
+        _NcbiblastMain2SeqCommandline._validate(self)
+
+
+class NcbiblastxCommandline(_NcbiblastMain2SeqCommandline):
+    """Wrapper for the NCBI BLAST+ program blastx (nucleotide query, protein database).
+
+    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
+    replaced the old blastall tool with separate tools for each of the searches.
+    This wrapper therefore replaces BlastallCommandline with option -p blastx.
+
+    >>> from Bio.Blast.Applications import NcbiblastxCommandline
+    >>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001)
+    >>> cline
+    NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001)
+    >>> print(cline)
+    blastx -query m_cold.fasta -db nr -evalue 0.001
+
+    You would typically run the command line with cline() or via the Python
+    subprocess module, as described in the Biopython tutorial.
+    """
+
+    def __init__(self, cmd="blastx", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            # Input query options:
+            _Option(
+                ["-task", "task"],
+                "Task to execute (string, blastx (default) or blastx-fast).",
+                checker_function=lambda value: value in ["blastx", "blastx-fast"],
+                equate=False,
+            ),
+            _Option(
+                ["-strand", "strand"],
+                "Query strand(s) to search against database/subject.\n\n"
+                'Values allowed are "both" (default), "minus", "plus".',
+                checker_function=lambda value: value in ["both", "minus", "plus"],
+                equate=False,
+            ),
+            # Input query options:
+            _Option(
+                ["-query_gencode", "query_gencode"],
+                "Genetic code to use to translate query (integer, default 1).",
+                equate=False,
+            ),
+            # General search options:
+            _Option(
+                ["-frame_shift_penalty", "frame_shift_penalty"],
+                "Frame shift penalty (integer, at least 1, default ignored) (OBSOLETE).\n\n"
+                "This was removed in BLAST 2.2.27+",
+                equate=False,
+            ),
+            _Option(
+                ["-max_intron_length", "max_intron_length"],
+                "Maximum intron length (integer).\n\n"
+                "Length of the largest intron allowed in a translated nucleotide "
+                "sequence when linking multiple distinct alignments (a negative "
+                "value disables linking). Default zero.",
+                equate=False,
+            ),
+            _Option(
+                ["-matrix", "matrix"],
+                "Scoring matrix name (default BLOSUM62).",
+                equate=False,
+            ),
+            _Option(
+                ["-threshold", "threshold"],
+                "Minimum score for words to be added to the BLAST lookup table (float).",
+                equate=False,
+            ),
+            _Option(
+                ["-comp_based_stats", "comp_based_stats"],
+                "Use composition-based statistics for blastp, blastx, or tblastn.\n\n"
+                "D or d: default (equivalent to 2 )\n\n"
+                "0 or F or f: no composition-based statistics\n\n"
+                "1: Composition-based statistics as in NAR 29:2994-3005, 2001\n\n"
+                "2 or T or t : Composition-based score adjustment as in "
+                "Bioinformatics 21:902-911, 2005, conditioned on sequence "
+                "properties\n\n"
+                "3: Composition-based score adjustment as in Bioinformatics "
+                "21:902-911, 2005, unconditionally.\n\n"
+                "For programs other than tblastn, must either be absent or be "
+                "D, F or 0\n\n"
+                "Default = 2.",
+                equate=False,
+            ),
+            # Query filtering options:
+            _Option(
+                ["-seg", "seg"],
+                "Filter query sequence with SEG (string).\n\n"
+                'Format: "yes", "window locut hicut", or "no" to disable.'
+                'Default is "12 2.2 2.5"',
+                equate=False,
+            ),
+            # Extension options:
+            _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"),
+            _Switch(
+                ["-use_sw_tback", "use_sw_tback"],
+                "Compute locally optimal Smith-Waterman alignments?",
+            ),
+        ]
+        _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
+
+
+class NcbitblastnCommandline(_NcbiblastMain2SeqCommandline):
+    """Wrapper for the NCBI BLAST+ program tblastn.
+
+    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
+    replaced the old blastall tool with separate tools for each of the searches.
+    This wrapper therefore replaces BlastallCommandline with option -p tblastn.
+
+    >>> from Bio.Blast.Applications import NcbitblastnCommandline
+    >>> cline = NcbitblastnCommandline(help=True)
+    >>> cline
+    NcbitblastnCommandline(cmd='tblastn', help=True)
+    >>> print(cline)
+    tblastn -help
+
+    You would typically run the command line with cline() or via the Python
+    subprocess module, as described in the Biopython tutorial.
+    """
+
+    def __init__(self, cmd="tblastn", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            # General search options:
+            _Option(
+                ["-task", "task"],
+                "Task to execute (string, tblastn (default) or tblastn-fast).",
+                checker_function=lambda value: value in ["tblastn", "tblastn-fast"],
+                equate=False,
+            ),
+            _Option(
+                ["-db_gencode", "db_gencode"],
+                "Genetic code to use to translate query (integer, default 1).",
+                equate=False,
+            ),
+            _Option(
+                ["-frame_shift_penalty", "frame_shift_penalty"],
+                "Frame shift penalty (integer, at least 1, default ignored) (OBSOLETE).\n\n"
+                "This was removed in BLAST 2.2.27+",
+                equate=False,
+            ),
+            _Option(
+                ["-max_intron_length", "max_intron_length"],
+                "Maximum intron length (integer).\n\n"
+                "Length of the largest intron allowed in a translated nucleotide "
+                "sequence when linking multiple distinct alignments (a negative "
+                "value disables linking). Default zero.",
+                equate=False,
+            ),
+            _Option(
+                ["-matrix", "matrix"],
+                "Scoring matrix name (default BLOSUM62).",
+                equate=False,
+            ),
+            _Option(
+                ["-threshold", "threshold"],
+                "Minimum score for words to be added to the BLAST lookup table (float).",
+                equate=False,
+            ),
+            _Option(
+                ["-comp_based_stats", "comp_based_stats"],
+                "Use composition-based statistics (string, default 2, i.e. True).\n\n"
+                "0, F or f: no composition-based statistics\n\n"
+                "1: Composition-based statistics as in NAR 29:2994-3005, 2001\n\n"
+                "2, T or t, D or d : Composition-based score adjustment as in "
+                "Bioinformatics 21:902-911, 2005, conditioned on sequence properties\n\n"
+                "3: Composition-based score adjustment as in Bioinformatics 21:902-911, "
+                "2005, unconditionally\n\n"
+                "Note that only tblastn supports values of 1 and 3.",
+                checker_function=lambda value: value in "0Ft12TtDd3",
+                equate=False,
+            ),
+            # Query filtering options:
+            _Option(
+                ["-seg", "seg"],
+                "Filter query sequence with SEG (string).\n\n"
+                'Format: "yes", "window locut hicut", or "no" to disable.\n\n'
+                'Default is "12 2.2 2.5"',
+                equate=False,
+            ),
+            # Extension options:
+            _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"),
+            # Miscellaneous options:
+            _Switch(
+                ["-use_sw_tback", "use_sw_tback"],
+                "Compute locally optimal Smith-Waterman alignments?",
+            ),
+            # PSI-TBLASTN options:
+            _Option(
+                ["-in_pssm", "in_pssm"],
+                "PSI-BLAST checkpoint file.\n\nIncompatible with: remote, query",
+                filename=True,
+                equate=False,
+            ),
+        ]
+        _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
+
+
+class NcbitblastxCommandline(_NcbiblastMain2SeqCommandline):
+    """Wrapper for the NCBI BLAST+ program tblastx.
+
+    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
+    replaced the old blastall tool with separate tools for each of the searches.
+    This wrapper therefore replaces BlastallCommandline with option -p tblastx.
+
+    >>> from Bio.Blast.Applications import NcbitblastxCommandline
+    >>> cline = NcbitblastxCommandline(help=True)
+    >>> cline
+    NcbitblastxCommandline(cmd='tblastx', help=True)
+    >>> print(cline)
+    tblastx -help
+
+    You would typically run the command line with cline() or via the Python
+    subprocess module, as described in the Biopython tutorial.
+    """
+
+    def __init__(self, cmd="tblastx", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            # Input query options:
+            _Option(
+                ["-strand", "strand"],
+                "Query strand(s) to search against database/subject.\n\n"
+                'Values allowed are "both" (default), "minus", "plus".',
+                checker_function=lambda value: value in ["both", "minus", "plus"],
+                equate=False,
+            ),
+            # Input query options:
+            _Option(
+                ["-query_gencode", "query_gencode"],
+                "Genetic code to use to translate query (integer, default 1).",
+                equate=False,
+            ),
+            # General search options:
+            _Option(
+                ["-db_gencode", "db_gencode"],
+                "Genetic code to use to translate query (integer, default 1).",
+                equate=False,
+            ),
+            _Option(
+                ["-max_intron_length", "max_intron_length"],
+                "Maximum intron length (integer).\n\n"
+                "Length of the largest intron allowed in a translated nucleotide "
+                "sequence when linking multiple distinct alignments (a negative "
+                "value disables linking). Default zero.",
+                equate=False,
+            ),
+            _Option(
+                ["-matrix", "matrix"],
+                "Scoring matrix name (default BLOSUM62).",
+                equate=False,
+            ),
+            _Option(
+                ["-threshold", "threshold"],
+                "Minimum score for words to be added to the BLAST lookup table (float).",
+                equate=False,
+            ),
+            # Query filtering options:
+            _Option(
+                ["-seg", "seg"],
+                "Filter query sequence with SEG (string).\n\n"
+                'Format: "yes", "window locut hicut", or "no" to disable.\n\n'
+                'Default is "12 2.2 2.5"',
+                equate=False,
+            ),
+        ]
+        _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
+
+
+class NcbipsiblastCommandline(_Ncbiblast2SeqCommandline):
+    """Wrapper for the NCBI BLAST+ program psiblast.
+
+    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
+    replaced the old blastpgp tool with a similar tool psiblast. This wrapper
+    therefore replaces BlastpgpCommandline, the wrapper for blastpgp.
+
+    >>> from Bio.Blast.Applications import NcbipsiblastCommandline
+    >>> cline = NcbipsiblastCommandline(help=True)
+    >>> cline
+    NcbipsiblastCommandline(cmd='psiblast', help=True)
+    >>> print(cline)
+    psiblast -help
+
+    You would typically run the command line with cline() or via the Python
+    subprocess module, as described in the Biopython tutorial.
+    """
+
+    def __init__(self, cmd="psiblast", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            # General search options:
+            _Option(
+                ["-matrix", "matrix"],
+                "Scoring matrix name (default BLOSUM62).",
+                equate=False,
+            ),
+            _Option(
+                ["-threshold", "threshold"],
+                "Minimum score for words to be added to the BLAST lookup table (float).",
+                equate=False,
+            ),
+            _Option(
+                ["-comp_based_stats", "comp_based_stats"],
+                "Use composition-based statistics (string, default 2, i.e. True).\n\n"
+                "0, F or f: no composition-based statistics\n\n"
+                "2, T or t, D or d : Composition-based score adjustment as in "
+                "Bioinformatics 21:902-911, 2005, conditioned on sequence properties\n\n"
+                "Note that tblastn also supports values of 1 and 3.",
+                checker_function=lambda value: value in "0Ft2TtDd",
+                equate=False,
+            ),
+            # Query filtering options:
+            _Option(
+                ["-seg", "seg"],
+                "Filter query sequence with SEG (string).\n\n"
+                'Format: "yes", "window locut hicut", or "no" to disable. '
+                'Default is "12 2.2 2.5"',
+                equate=False,
+            ),
+            # Extension options:
+            _Option(
+                ["-gap_trigger", "gap_trigger"],
+                "Number of bits to trigger gapping (float, default 22).",
+                equate=False,
+            ),
+            # Miscellaneous options:
+            _Switch(
+                ["-use_sw_tback", "use_sw_tback"],
+                "Compute locally optimal Smith-Waterman alignments?",
+            ),
+            # PSI-BLAST options:
+            _Option(
+                ["-num_iterations", "num_iterations"],
+                "Number of iterations to perform (integer, at least one).\n\n"
+                "Default is one. Incompatible with: remote",
+                equate=False,
+            ),
+            _Option(
+                ["-out_pssm", "out_pssm"],
+                "File name to store checkpoint file.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-out_ascii_pssm", "out_ascii_pssm"],
+                "File name to store ASCII version of PSSM.",
+                filename=True,
+                equate=False,
+            ),
+            _Switch(
+                ["-save_pssm_after_last_round", "save_pssm_after_last_round"],
+                "Save PSSM after the last database search.",
+            ),
+            _Switch(
+                ["-save_each_pssm", "save_each_pssm"],
+                "Save PSSM after each iteration\n\n"
+                "File name is given in -save_pssm or -save_ascii_pssm options.",
+            ),
+            _Option(
+                ["-in_msa", "in_msa"],
+                "File name of multiple sequence alignment to restart PSI-BLAST.\n\n"
+                "Incompatible with: in_pssm, query",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-msa_master_idx", "msa_master_idx"],
+                "Index of sequence to use as master in MSA.\n\n"
+                "Index (1-based) of sequence to use as the master in the multiple "
+                "sequence alignment. If not specified, the first sequence is used.",
+                equate=False,
+            ),
+            _Option(
+                ["-in_pssm", "in_pssm"],
+                "PSI-BLAST checkpoint file.\n\n"
+                "Incompatible with: in_msa, query, phi_pattern",
+                filename=True,
+                equate=False,
+            ),
+            # PSSM engine options:
+            _Option(
+                ["-pseudocount", "pseudocount"],
+                "Pseudo-count value used when constructing PSSM.\n\n"
+                "Integer. Default is zero.",
+                equate=False,
+            ),
+            _Option(
+                ["-inclusion_ethresh", "inclusion_ethresh"],
+                "E-value inclusion threshold for pairwise alignments (float, default 0.002).",
+                equate=False,
+            ),
+            _Switch(
+                ["-ignore_msa_master", "ignore_msa_master"],
+                "Ignore the master sequence when creating PSSM.\n\n"
+                "Requires: in_msa\n"
+                "Incompatible with: msa_master_idx, in_pssm, query, query_loc, "
+                "phi_pattern",
+            ),
+            # PHI-BLAST options:
+            _Option(
+                ["-phi_pattern", "phi_pattern"],
+                "File name containing pattern to search.\n\n"
+                "Incompatible with: in_pssm",
+                filename=True,
+                equate=False,
+            ),
+        ]
+        _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
+
+    def _validate(self):
+        incompatibles = {
+            "num_iterations": ["remote"],
+            "in_msa": ["in_pssm", "query"],
+            "in_pssm": ["in_msa", "query", "phi_pattern"],
+            "ignore_msa_master": [
+                "msa_master_idx",
+                "in_pssm",
+                "query",
+                "query_loc",
+                "phi_pattern",
+            ],
+        }
+        self._validate_incompatibilities(incompatibles)
+        _Ncbiblast2SeqCommandline._validate(self)
+
+
+class NcbirpsblastCommandline(_NcbiblastCommandline):
+    """Wrapper for the NCBI BLAST+ program rpsblast.
+
+    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
+    replaced the old rpsblast tool with a similar tool of the same name. This
+    wrapper replaces RpsBlastCommandline, the wrapper for the old rpsblast.
+
+    >>> from Bio.Blast.Applications import NcbirpsblastCommandline
+    >>> cline = NcbirpsblastCommandline(help=True)
+    >>> cline
+    NcbirpsblastCommandline(cmd='rpsblast', help=True)
+    >>> print(cline)
+    rpsblast -help
+
+    You would typically run the command line with cline() or via the Python
+    subprocess module, as described in the Biopython tutorial.
+    """
+
+    def __init__(self, cmd="rpsblast", **kwargs):
+        """Initialize the class."""
+        # TODO - remove the -word_size argument as per BLAST+ 2.2.30
+        # (BLAST team say it should never have been included, since
+        # the word size is set when building the domain database.)
+        # This likely means reviewing the class hierarchy again.
+        self.parameters = [
+            # Query filtering options:
+            _Option(
+                ["-seg", "seg"],
+                "Filter query sequence with SEG (string).\n\n"
+                'Format: "yes", "window locut hicut", or "no" to disable.'
+                'Default is "12 2.2 2.5"',
+                equate=False,
+            ),
+            # Restrict search or results:
+            _Option(
+                ["-culling_limit", "culling_limit"],
+                "Hit culling limit (integer).\n\n"
+                "If the query range of a hit is enveloped by that of at "
+                "least this many higher-scoring hits, delete the hit. "
+                "Incompatible with: best_hit_overhang, best_hit_score_edge.",
+                equate=False,
+            ),
+            _Option(
+                ["-best_hit_overhang", "best_hit_overhang"],
+                "Best Hit algorithm overhang value (recommended value: 0.1).\n\n"
+                "Float between 0.0 and 0.5 inclusive. "
+                "Incompatible with: culling_limit.",
+                equate=False,
+            ),
+            _Option(
+                ["-best_hit_score_edge", "best_hit_score_edge"],
+                "Best Hit algorithm score edge value (recommended value: 0.1).\n\n"
+                "Float between 0.0 and 0.5 inclusive. "
+                "Incompatible with: culling_limit.",
+                equate=False,
+            ),
+            # General search options:
+            _Option(
+                ["-comp_based_stats", "comp_based_stats"],
+                "Use composition-based statistics.\n\n"
+                "D or d: default (equivalent to 0)\n\n"
+                "0 or F or f: Simplified Composition-based statistics as in "
+                "Bioinformatics 15:1000-1011, 1999\n\n"
+                "1 or T or t: Composition-based statistics as in NAR 29:2994-3005, "
+                "2001\n\n"
+                "Default = 0.",
+                checker_function=lambda value: value in "Dd0Ff1Tt",
+                equate=False,
+            ),
+            # Misc options:
+            _Switch(
+                ["-use_sw_tback", "use_sw_tback"],
+                "Compute locally optimal Smith-Waterman alignments?",
+            ),
+        ]
+        _NcbiblastCommandline.__init__(self, cmd, **kwargs)
+
+    def _validate(self):
+        incompatibles = {"culling_limit": ["best_hit_overhang", "best_hit_score_edge"]}
+        self._validate_incompatibilities(incompatibles)
+        _NcbiblastCommandline._validate(self)
+
+
+class NcbirpstblastnCommandline(_NcbiblastCommandline):
+    """Wrapper for the NCBI BLAST+ program rpstblastn.
+
+    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
+    replaced the old rpsblast tool with a similar tool of the same name, and a
+    separate tool rpstblastn for Translated Reverse Position Specific BLAST.
+
+    >>> from Bio.Blast.Applications import NcbirpstblastnCommandline
+    >>> cline = NcbirpstblastnCommandline(help=True)
+    >>> cline
+    NcbirpstblastnCommandline(cmd='rpstblastn', help=True)
+    >>> print(cline)
+    rpstblastn -help
+
+    You would typically run the command line with cline() or via the Python
+    subprocess module, as described in the Biopython tutorial.
+    """
+
+    def __init__(self, cmd="rpstblastn", **kwargs):
+        """Initialize the class."""
+        # TODO - remove the -word_size argument as per BLAST+ 2.2.30
+        # (BLAST team say it should never have been included, since
+        # the word size is set when building the domain database.)
+        # This likely means reviewing the class hierarchy again.
+        self.parameters = [
+            # Input query options:
+            _Option(
+                ["-strand", "strand"],
+                "Query strand(s) to search against database/subject.\n\n"
+                'Values allowed are "both" (default), "minus", "plus".',
+                checker_function=lambda value: value in ["both", "minus", "plus"],
+                equate=False,
+            ),
+            # Input query options:
+            _Option(
+                ["-query_gencode", "query_gencode"],
+                "Genetic code to use to translate query (integer, default 1).",
+                equate=False,
+            ),
+            # Query filtering options:
+            _Option(
+                ["-seg", "seg"],
+                "Filter query sequence with SEG (string).\n\n"
+                'Format: "yes", "window locut hicut", or "no" to disable. '
+                'Default is "12 2.2 2.5"',
+                equate=False,
+            ),
+            # General search options:
+            _Option(
+                ["-comp_based_stats", "comp_based_stats"],
+                "Use composition-based statistics.\n\n"
+                "D or d: default (equivalent to 0)\n\n"
+                "0 or F or f: Simplified Composition-based statistics as in "
+                "Bioinformatics 15:1000-1011, 1999\n\n"
+                "1 or T or t: Composition-based statistics as in NAR 29:2994-3005, "
+                "2001\n\n"
+                "Default = 0.",
+                checker_function=lambda value: value in "Dd0Ff1Tt",
+                equate=False,
+            ),
+            # Extension options:
+            _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"),
+            # Miscellaneous options:
+            _Switch(
+                ["-use_sw_tback", "use_sw_tback"],
+                "Compute locally optimal Smith-Waterman alignments?",
+            ),
+        ]
+        _NcbiblastCommandline.__init__(self, cmd, **kwargs)
+
+
+class NcbiblastformatterCommandline(_NcbibaseblastCommandline):
+    """Wrapper for the NCBI BLAST+ program blast_formatter.
+
+    With the release of BLAST 2.2.24+ (i.e. the BLAST suite rewritten in C++
+    instead of C), the NCBI added the ASN.1 output format option to all the
+    search tools, and extended the blast_formatter to support this as input.
+
+    The blast_formatter command allows you to convert the ASN.1 output into
+    the other output formats (XML, tabular, plain text, HTML).
+
+    >>> from Bio.Blast.Applications import NcbiblastformatterCommandline
+    >>> cline = NcbiblastformatterCommandline(archive="example.asn", outfmt=5, out="example.xml")
+    >>> cline
+    NcbiblastformatterCommandline(cmd='blast_formatter', out='example.xml', outfmt=5, archive='example.asn')
+    >>> print(cline)
+    blast_formatter -out example.xml -outfmt 5 -archive example.asn
+
+    You would typically run the command line with cline() or via the Python
+    subprocess module, as described in the Biopython tutorial.
+
+    Note that this wrapper is for the version of blast_formatter from BLAST
+    2.2.24+ (or later) which is when the NCBI first announced the inclusion
+    this tool. There was actually an early version in BLAST 2.2.23+ (and
+    possibly in older releases) but this did not have the -archive option
+    (instead -rid is a mandatory argument), and is not supported by this
+    wrapper.
+    """
+
+    def __init__(self, cmd="blast_formatter", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            # Input options
+            _Option(
+                ["-rid", "rid"],
+                "BLAST Request ID (RID), not compatible with archive arg.",
+                equate=False,
+            ),
+            _Option(
+                ["-archive", "archive"],
+                "Archive file of results, not compatible with rid arg.",
+                filename=True,
+                equate=False,
+            ),
+            # Restrict search or results
+            _Option(
+                ["-max_target_seqs", "max_target_seqs"],
+                "Maximum number of aligned sequences to keep.",
+                checker_function=lambda value: value >= 1,
+                equate=False,
+            ),
+        ]
+        _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
+
+    def _validate(self):
+        incompatibles = {"rid": ["archive"]}
+        self._validate_incompatibilities(incompatibles)
+        _NcbibaseblastCommandline._validate(self)
+
+
+class NcbideltablastCommandline(_Ncbiblast2SeqCommandline):
+    """Create a commandline for the NCBI BLAST+ program deltablast (for proteins).
+
+    This is a wrapper for the deltablast command line command included in
+    the NCBI BLAST+ software (not present in the original BLAST).
+
+    >>> from Bio.Blast.Applications import NcbideltablastCommandline
+    >>> cline = NcbideltablastCommandline(query="rosemary.pro", db="nr",
+    ...                               evalue=0.001, remote=True)
+    >>> cline
+    NcbideltablastCommandline(cmd='deltablast', query='rosemary.pro', db='nr', evalue=0.001, remote=True)
+    >>> print(cline)
+    deltablast -query rosemary.pro -db nr -evalue 0.001 -remote
+
+    You would typically run the command line with cline() or via the Python
+    subprocess module, as described in the Biopython tutorial.
+    """
+
+    def __init__(self, cmd="deltablast", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            # General search options:
+            _Option(["-matrix", "matrix"], "Scoring matrix name (default BLOSUM62)."),
+            _Option(
+                ["-threshold", "threshold"],
+                "Minimum score for words to be added to the BLAST lookup table (float).",
+                equate=False,
+            ),
+            _Option(
+                ["-comp_based_stats", "comp_based_stats"],
+                "Use composition-based statistics (string, default 2, i.e. True).\n\n"
+                "0, F or f: no composition-based statistics.\n\n"
+                "2, T or t, D or d : Composition-based score adjustment as in "
+                "Bioinformatics 21:902-911, 2005, conditioned on sequence properties\n\n"
+                "Note that tblastn also supports values of 1 and 3.",
+                checker_function=lambda value: value in "0Ft2TtDd",
+                equate=False,
+            ),
+            # Query filtering options:
+            _Option(
+                ["-seg", "seg"],
+                "Filter query sequence with SEG (string).\n\n"
+                'Format: "yes", "window locut hicut", or "no" to disable. '
+                'Default is "12 2.2 2.5"',
+                equate=False,
+            ),
+            # Extension options:
+            _Option(
+                ["-gap_trigger", "gap_trigger"],
+                "Number of bits to trigger gapping. Default = 22.",
+                equate=False,
+            ),
+            # Miscellaneous options:
+            _Switch(
+                ["-use_sw_tback", "use_sw_tback"],
+                "Compute locally optimal Smith-Waterman alignments?",
+            ),
+            # PSI-BLAST options
+            _Option(
+                ["-num_iterations", "num_iterations"],
+                "Number of iterations to perform. (integer >=1, Default is 1).\n\n"
+                "Incompatible with: remote",
+                equate=False,
+            ),
+            _Option(
+                ["-out_pssm", "out_pssm"],
+                "File name to store checkpoint file.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-out_ascii_pssm", "out_ascii_pssm"],
+                "File name to store ASCII version of PSSM.",
+                filename=True,
+                equate=False,
+            ),
+            _Switch(
+                ["-save_pssm_after_last_round", "save_pssm_after_last_round"],
+                "Save PSSM after the last database search.",
+            ),
+            _Switch(
+                ["-save_each_pssm", "save_each_pssm"],
+                "Save PSSM after each iteration.\n\n"
+                "File name is given in -save_pssm or -save_ascii_pssm options.",
+            ),
+            # PSSM engine options
+            _Option(
+                ["-pseudocount", "pseudocount"],
+                "Pseudo-count value used when constructing PSSM (integer, default 0).",
+                equate=False,
+            ),
+            _Option(
+                ["-domain_inclusion_ethresh", "domain_inclusion_ethresh"],
+                "E-value inclusion threshold for alignments with conserved domains.\n\n"
+                "(float, Default is 0.05)",
+                equate=False,
+            ),
+            _Option(
+                ["-inclusion_ethresh", "inclusion_ethresh"],
+                "Pairwise alignment e-value inclusion threshold (float, default 0.002).",
+                equate=False,
+            ),
+            # DELTA-BLAST options
+            _Option(
+                ["-rpsdb", "rpsdb"],
+                "BLAST domain database name (dtring, Default = 'cdd_delta').",
+                equate=False,
+            ),
+            _Switch(
+                ["-show_domain_hits", "show_domain_hits"],
+                "Show domain hits?\n\nIncompatible with: remote, subject",
+            ),
+        ]
+        _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
+
+
+class NcbimakeblastdbCommandline(AbstractCommandline):
+    """Wrapper for the NCBI BLAST+ program makeblastdb.
+
+    This is a wrapper for the NCBI BLAST+ makeblastdb application
+    to create BLAST databases. By default, this creates a blast database
+    with the same name as the input file. The default output location
+    is the same directory as the input.
+
+    >>> from Bio.Blast.Applications import NcbimakeblastdbCommandline
+    >>> cline = NcbimakeblastdbCommandline(dbtype="prot",
+    ...                                    input_file="NC_005816.faa")
+    >>> cline
+    NcbimakeblastdbCommandline(cmd='makeblastdb', dbtype='prot', input_file='NC_005816.faa')
+    >>> print(cline)
+    makeblastdb -dbtype prot -in NC_005816.faa
+
+    You would typically run the command line with cline() or via the Python
+    subprocess module, as described in the Biopython tutorial.
+    """
+
+    def __init__(self, cmd="makeblastdb", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            # Basic input options
+            _Switch(
+                ["-h", "h"], "Print USAGE and DESCRIPTION; ignore other arguments."
+            ),
+            _Switch(
+                ["-help", "help"],
+                "Print USAGE, DESCRIPTION and ARGUMENTS description; "
+                "ignore other arguments.",
+            ),
+            _Switch(
+                ["-version", "version"],
+                "Print version number;  ignore other arguments.",
+            ),
+            # Output configuration options
+            _Option(
+                ["-out", "out"],
+                "Output file for alignment.",
+                filename=True,
+                equate=False,
+            ),
+            # makeblastdb specific options
+            _Option(
+                ["-blastdb_version", "blastdb_version"],
+                "Version of BLAST database to be created. "
+                "Tip: use BLAST database version 4 on 32 bit CPU. "
+                "Default = 5",
+                equate=False,
+                checker_function=lambda x: x == 4 or x == 5,
+            ),
+            _Option(
+                ["-dbtype", "dbtype"],
+                "Molecule type of target db ('nucl' or 'prot').",
+                equate=False,
+                is_required=True,
+                checker_function=lambda x: x == "nucl" or x == "prot",
+            ),
+            _Option(
+                ["-in", "input_file"],
+                "Input file/database name.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-input_type", "input_type"],
+                "Type of the data specified in input_file.\n\n"
+                "Default = 'fasta'. Added in BLAST 2.2.26.",
+                filename=False,
+                equate=False,
+                checker_function=self._input_type_checker,
+            ),
+            _Option(
+                ["-title", "title"],
+                "Title for BLAST database.",
+                filename=False,
+                equate=False,
+            ),
+            _Switch(
+                ["-parse_seqids", "parse_seqids"],
+                "Option to parse seqid for FASTA input if set.\n\n"
+                "For all other input types, seqids are parsed automatically",
+            ),
+            _Switch(
+                ["-hash_index", "hash_index"], "Create index of sequence hash values."
+            ),
+            _Option(
+                ["-mask_data", "mask_data"],
+                "Comma-separated list of input files containing masking "
+                "data as produced by NCBI masking applications "
+                "(e.g. dustmasker, segmasker, windowmasker).",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-mask_id", "mask_id"],
+                "Comma-separated list of strings to uniquely identify the "
+                "masking algorithm.",
+                filename=False,
+                equate=False,
+            ),
+            _Option(
+                ["-mask_desc", "mask_desc"],
+                "Comma-separated list of free form strings to describe "
+                "the masking algorithm details.",
+                filename=False,
+                equate=False,
+            ),
+            _Switch(["-gi_mask", "gi_mask"], "Create GI indexed masking data."),
+            _Option(
+                ["-gi_mask_name", "gi_mask_name"],
+                "Comma-separated list of masking data output files.",
+                filename=False,
+                equate=False,
+            ),
+            _Option(
+                ["-max_file_sz", "max_file_sz"],
+                "Maximum file size for BLAST database files. Default = '1GB'.",
+                filename=False,
+                equate=False,
+            ),
+            _Option(
+                ["-logfile", "logfile"],
+                "File to which the program log should be redirected.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-taxid", "taxid"],
+                "Taxonomy ID to assign to all sequences.",
+                filename=False,
+                equate=False,
+                checker_function=lambda x: type(x)(int(x)) == x,
+            ),
+            _Option(
+                ["-taxid_map", "taxid_map"],
+                "Text file mapping sequence IDs to taxonomy IDs.\n\n"
+                "Format:<SequenceId> <TaxonomyId><newline>",
+                filename=True,
+                equate=False,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+    def _input_type_checker(self, command):
+        return command in ("asn1_bin", "asn1_txt", "blastdb", "fasta")
+
+    def _validate(self):
+        incompatibles = {
+            "mask_id": ["gi_mask"],
+            "gi_mask": ["mask_id"],
+            "taxid": ["taxid_map"],
+        }
+
+        # Copied from _NcbibaseblastCommandline class above.
+        # Code repeated here for python2 and 3 compatibility,
+        # because this is not a _NcbibaseblastCommandline subclass.
+        for a in incompatibles:
+            if self._get_parameter(a):
+                for b in incompatibles[a]:
+                    if self._get_parameter(b):
+                        raise ValueError("Options %s and %s are incompatible." % (a, b))
+
+        if self.mask_id and not self.mask_data:
+            raise ValueError("Option mask_id requires mask_data to be set.")
+        if self.mask_desc and not self.mask_id:
+            raise ValueError("Option mask_desc requires mask_id to be set.")
+        if self.gi_mask and not self.parse_seqids:
+            raise ValueError("Option gi_mask requires parse_seqids to be set.")
+        if self.gi_mask_name and not (self.mask_data and self.gi_mask):
+            raise ValueError(
+                "Option gi_mask_name requires mask_data and gi_mask to be set."
+            )
+        if self.taxid_map and not self.parse_seqids:
+            raise ValueError("Option taxid_map requires parse_seqids to be set.")
+        AbstractCommandline._validate(self)
+
+
+def _test():
+    """Run the Bio.Blast.Applications module's doctests (PRIVATE)."""
+    import doctest
+
+    doctest.testmod(verbose=1)
+
+
+if __name__ == "__main__":
+    # Run the doctests
+    _test()
diff --git a/code/lib/Bio/Blast/NCBIWWW.py b/code/lib/Bio/Blast/NCBIWWW.py
new file mode 100644
index 0000000..4bcca3f
--- /dev/null
+++ b/code/lib/Bio/Blast/NCBIWWW.py
@@ -0,0 +1,348 @@
+# Copyright 1999 by Jeffrey Chang.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Patched by Brad Chapman.
+# Chris Wroe added modifications for work in myGrid
+
+"""Code to invoke the NCBI BLAST server over the internet.
+
+This module provides code to work with the WWW version of BLAST
+provided by the NCBI. https://blast.ncbi.nlm.nih.gov/
+"""
+
+
+import warnings
+
+from io import StringIO
+import time
+
+from urllib.request import urlopen
+from urllib.parse import urlencode
+from urllib.request import Request
+
+from Bio import BiopythonWarning
+
+
+NCBI_BLAST_URL = "https://blast.ncbi.nlm.nih.gov/Blast.cgi"
+
+
+def qblast(
+    program,
+    database,
+    sequence,
+    url_base=NCBI_BLAST_URL,
+    auto_format=None,
+    composition_based_statistics=None,
+    db_genetic_code=None,
+    endpoints=None,
+    entrez_query="(none)",
+    expect=10.0,
+    filter=None,
+    gapcosts=None,
+    genetic_code=None,
+    hitlist_size=50,
+    i_thresh=None,
+    layout=None,
+    lcase_mask=None,
+    matrix_name=None,
+    nucl_penalty=None,
+    nucl_reward=None,
+    other_advanced=None,
+    perc_ident=None,
+    phi_pattern=None,
+    query_file=None,
+    query_believe_defline=None,
+    query_from=None,
+    query_to=None,
+    searchsp_eff=None,
+    service=None,
+    threshold=None,
+    ungapped_alignment=None,
+    word_size=None,
+    short_query=None,
+    alignments=500,
+    alignment_view=None,
+    descriptions=500,
+    entrez_links_new_window=None,
+    expect_low=None,
+    expect_high=None,
+    format_entrez_query=None,
+    format_object=None,
+    format_type="XML",
+    ncbi_gi=None,
+    results_file=None,
+    show_overview=None,
+    megablast=None,
+    template_type=None,
+    template_length=None,
+):
+    """BLAST search using NCBI's QBLAST server or a cloud service provider.
+
+    Supports all parameters of the old qblast API for Put and Get.
+
+    Please note that NCBI uses the new Common URL API for BLAST searches
+    on the internet (http://ncbi.github.io/blast-cloud/dev/api.html). Thus,
+    some of the parameters used by this function are not (or are no longer)
+    officially supported by NCBI. Although they are still functioning, this
+    may change in the future.
+
+    The Common URL API (http://ncbi.github.io/blast-cloud/dev/api.html) allows
+    doing BLAST searches on cloud servers. To use this feature, please set
+    ``url_base='http://host.my.cloud.service.provider.com/cgi-bin/blast.cgi'``
+    and ``format_object='Alignment'``. For more details, please see
+    https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=CloudBlast
+
+    Some useful parameters:
+
+     - program        blastn, blastp, blastx, tblastn, or tblastx (lower case)
+     - database       Which database to search against (e.g. "nr").
+     - sequence       The sequence to search.
+     - ncbi_gi        TRUE/FALSE whether to give 'gi' identifier.
+     - descriptions   Number of descriptions to show.  Def 500.
+     - alignments     Number of alignments to show.  Def 500.
+     - expect         An expect value cutoff.  Def 10.0.
+     - matrix_name    Specify an alt. matrix (PAM30, PAM70, BLOSUM80, BLOSUM45).
+     - filter         "none" turns off filtering.  Default no filtering
+     - format_type    "HTML", "Text", "ASN.1", or "XML".  Def. "XML".
+     - entrez_query   Entrez query to limit Blast search
+     - hitlist_size   Number of hits to return. Default 50
+     - megablast      TRUE/FALSE whether to use MEga BLAST algorithm (blastn only)
+     - short_query    TRUE/FALSE whether to adjust the search parameters for a
+                      short query sequence. Note that this will override
+                      manually set parameters like word size and e value. Turns
+                      off when sequence length is > 30 residues. Default: None.
+     - service        plain, psi, phi, rpsblast, megablast (lower case)
+
+    This function does no checking of the validity of the parameters
+    and passes the values to the server as is.  More help is available at:
+    https://ncbi.github.io/blast-cloud/dev/api.html
+
+    """
+    programs = ["blastn", "blastp", "blastx", "tblastn", "tblastx"]
+    if program not in programs:
+        raise ValueError(
+            "Program specified is %s. Expected one of %s"
+            % (program, ", ".join(programs))
+        )
+
+    # SHORT_QUERY_ADJUST throws an error when using blastn (wrong parameter
+    # assignment from NCBIs side).
+    # Thus we set the (known) parameters directly:
+    if short_query and program == "blastn":
+        short_query = None
+        # We only use the 'short-query' parameters for short sequences:
+        if len(sequence) < 31:
+            expect = 1000
+            word_size = 7
+            nucl_reward = 1
+            filter = None
+            lcase_mask = None
+            warnings.warn(
+                '"SHORT_QUERY_ADJUST" is incorrectly implemented (by NCBI) for blastn.'
+                " We bypass the problem by manually adjusting the search parameters."
+                " Thus, results may slightly differ from web page searches.",
+                BiopythonWarning,
+            )
+
+    # Format the "Put" command, which sends search requests to qblast.
+    # Parameters taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node5.html on 9 July 2007
+    # Additional parameters are taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node9.html on 8 Oct 2010
+    # To perform a PSI-BLAST or PHI-BLAST search the service ("Put" and "Get" commands) must be specified
+    # (e.g. psi_blast = NCBIWWW.qblast("blastp", "refseq_protein", input_sequence, service="psi"))
+    parameters = [
+        ("AUTO_FORMAT", auto_format),
+        ("COMPOSITION_BASED_STATISTICS", composition_based_statistics),
+        ("DATABASE", database),
+        ("DB_GENETIC_CODE", db_genetic_code),
+        ("ENDPOINTS", endpoints),
+        ("ENTREZ_QUERY", entrez_query),
+        ("EXPECT", expect),
+        ("FILTER", filter),
+        ("GAPCOSTS", gapcosts),
+        ("GENETIC_CODE", genetic_code),
+        ("HITLIST_SIZE", hitlist_size),
+        ("I_THRESH", i_thresh),
+        ("LAYOUT", layout),
+        ("LCASE_MASK", lcase_mask),
+        ("MEGABLAST", megablast),
+        ("MATRIX_NAME", matrix_name),
+        ("NUCL_PENALTY", nucl_penalty),
+        ("NUCL_REWARD", nucl_reward),
+        ("OTHER_ADVANCED", other_advanced),
+        ("PERC_IDENT", perc_ident),
+        ("PHI_PATTERN", phi_pattern),
+        ("PROGRAM", program),
+        # ('PSSM',pssm), - It is possible to use PSI-BLAST via this API?
+        ("QUERY", sequence),
+        ("QUERY_FILE", query_file),
+        ("QUERY_BELIEVE_DEFLINE", query_believe_defline),
+        ("QUERY_FROM", query_from),
+        ("QUERY_TO", query_to),
+        # ('RESULTS_FILE',...), - Can we use this parameter?
+        ("SEARCHSP_EFF", searchsp_eff),
+        ("SERVICE", service),
+        ("SHORT_QUERY_ADJUST", short_query),
+        ("TEMPLATE_TYPE", template_type),
+        ("TEMPLATE_LENGTH", template_length),
+        ("THRESHOLD", threshold),
+        ("UNGAPPED_ALIGNMENT", ungapped_alignment),
+        ("WORD_SIZE", word_size),
+        ("CMD", "Put"),
+    ]
+    query = [x for x in parameters if x[1] is not None]
+    message = urlencode(query).encode()
+
+    # Send off the initial query to qblast.
+    # Note the NCBI do not currently impose a rate limit here, other
+    # than the request not to make say 50 queries at once using multiple
+    # threads.
+    request = Request(url_base, message, {"User-Agent": "BiopythonClient"})
+    handle = urlopen(request)
+
+    # Format the "Get" command, which gets the formatted results from qblast
+    # Parameters taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node6.html on 9 July 2007
+    rid, rtoe = _parse_qblast_ref_page(handle)
+    parameters = [
+        ("ALIGNMENTS", alignments),
+        ("ALIGNMENT_VIEW", alignment_view),
+        ("DESCRIPTIONS", descriptions),
+        ("ENTREZ_LINKS_NEW_WINDOW", entrez_links_new_window),
+        ("EXPECT_LOW", expect_low),
+        ("EXPECT_HIGH", expect_high),
+        ("FORMAT_ENTREZ_QUERY", format_entrez_query),
+        ("FORMAT_OBJECT", format_object),
+        ("FORMAT_TYPE", format_type),
+        ("NCBI_GI", ncbi_gi),
+        ("RID", rid),
+        ("RESULTS_FILE", results_file),
+        ("SERVICE", service),
+        ("SHOW_OVERVIEW", show_overview),
+        ("CMD", "Get"),
+    ]
+    query = [x for x in parameters if x[1] is not None]
+    message = urlencode(query).encode()
+
+    # Poll NCBI until the results are ready.
+    # https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=DeveloperInfo
+    # 1. Do not contact the server more often than once every 10 seconds.
+    # 2. Do not poll for any single RID more often than once a minute.
+    # 3. Use the URL parameter email and tool, so that the NCBI
+    #    can contact you if there is a problem.
+    # 4. Run scripts weekends or between 9 pm and 5 am Eastern time
+    #    on weekdays if more than 50 searches will be submitted.
+    # --
+    # Could start with a 10s delay, but expect most short queries
+    # will take longer thus at least 70s with delay. Therefore,
+    # start with 20s delay, thereafter once a minute.
+    delay = 20  # seconds
+    while True:
+        current = time.time()
+        wait = qblast._previous + delay - current
+        if wait > 0:
+            time.sleep(wait)
+            qblast._previous = current + wait
+        else:
+            qblast._previous = current
+        # delay by at least 60 seconds only if running the request against the public NCBI API
+        if delay < 60 and url_base == NCBI_BLAST_URL:
+            # Wasn't a quick return, must wait at least a minute
+            delay = 60
+
+        request = Request(url_base, message, {"User-Agent": "BiopythonClient"})
+        handle = urlopen(request)
+        results = handle.read().decode()
+
+        # Can see an "\n\n" page while results are in progress,
+        # if so just wait a bit longer...
+        if results == "\n\n":
+            continue
+        # XML results don't have the Status tag when finished
+        if "Status=" not in results:
+            break
+        i = results.index("Status=")
+        j = results.index("\n", i)
+        status = results[i + len("Status=") : j].strip()
+        if status.upper() == "READY":
+            break
+    return StringIO(results)
+
+
+qblast._previous = 0
+
+
+def _parse_qblast_ref_page(handle):
+    """Extract a tuple of RID, RTOE from the 'please wait' page (PRIVATE).
+
+    The NCBI FAQ pages use TOE for 'Time of Execution', so RTOE is probably
+    'Request Time of Execution' and RID would be 'Request Identifier'.
+    """
+    s = handle.read().decode()
+    i = s.find("RID =")
+    if i == -1:
+        rid = None
+    else:
+        j = s.find("\n", i)
+        rid = s[i + len("RID =") : j].strip()
+
+    i = s.find("RTOE =")
+    if i == -1:
+        rtoe = None
+    else:
+        j = s.find("\n", i)
+        rtoe = s[i + len("RTOE =") : j].strip()
+
+    if not rid and not rtoe:
+        # Can we reliably extract the error message from the HTML page?
+        # e.g.  "Message ID#24 Error: Failed to read the Blast query:
+        #       Nucleotide FASTA provided for protein sequence"
+        # or    "Message ID#32 Error: Query contains no data: Query
+        #       contains no sequence data"
+        #
+        # This used to occur inside a <div class="error msInf"> entry:
+        i = s.find('<div class="error msInf">')
+        if i != -1:
+            msg = s[i + len('<div class="error msInf">') :].strip()
+            msg = msg.split("</div>", 1)[0].split("\n", 1)[0].strip()
+            if msg:
+                raise ValueError("Error message from NCBI: %s" % msg)
+        # In spring 2010 the markup was like this:
+        i = s.find('<p class="error">')
+        if i != -1:
+            msg = s[i + len('<p class="error">') :].strip()
+            msg = msg.split("</p>", 1)[0].split("\n", 1)[0].strip()
+            if msg:
+                raise ValueError("Error message from NCBI: %s" % msg)
+        # Generic search based on the way the error messages start:
+        i = s.find("Message ID#")
+        if i != -1:
+            # Break the message at the first HTML tag
+            msg = s[i:].split("<", 1)[0].split("\n", 1)[0].strip()
+            raise ValueError("Error message from NCBI: %s" % msg)
+        # We didn't recognise the error layout :(
+        # print(s)
+        raise ValueError(
+            "No RID and no RTOE found in the 'please wait' page, "
+            "there was probably an error in your request but we "
+            "could not extract a helpful error message."
+        )
+    elif not rid:
+        # Can this happen?
+        raise ValueError(
+            "No RID found in the 'please wait' page. (although RTOE = %r)" % rtoe
+        )
+    elif not rtoe:
+        # Can this happen?
+        raise ValueError(
+            "No RTOE found in the 'please wait' page. (although RID = %r)" % rid
+        )
+
+    try:
+        return rid, int(rtoe)
+    except ValueError:
+        raise ValueError(
+            "A non-integer RTOE found in the 'please wait' page, %r" % rtoe
+        ) from None
diff --git a/code/lib/Bio/Blast/NCBIXML.py b/code/lib/Bio/Blast/NCBIXML.py
new file mode 100644
index 0000000..90e91a9
--- /dev/null
+++ b/code/lib/Bio/Blast/NCBIXML.py
@@ -0,0 +1,864 @@
+# Copyright 2000 by Bertrand Frottier.  All rights reserved.
+# Revisions 2005-2006 copyright Michiel de Hoon
+# Revisions 2006-2009 copyright Peter Cock
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Code to work with the BLAST XML output.
+
+The BLAST XML DTD file is on the NCBI FTP site at:
+ftp://ftp.ncbi.nlm.nih.gov/blast/documents/xml/NCBI_BlastOutput.dtd
+"""
+
+from Bio.Blast import Record
+import xml.sax
+from xml.sax.handler import ContentHandler
+
+
+class _XMLparser(ContentHandler):
+    """Generic SAX Parser (PRIVATE).
+
+    Just a very basic SAX parser.
+
+    Redefine the methods startElement, characters and endElement.
+    """
+
+    def __init__(self, debug=0):
+        """Initialize the parser.
+
+        Arguments:
+         - debug - integer, amount of debug information to print
+
+        """
+        self._tag = []
+        self._value = ""
+        self._debug = debug
+        self._debug_ignore_list = []
+        self._method_name_level = 1
+        self._method_map = None
+
+    def startElement(self, name, attr):
+        """Found XML start tag.
+
+        No real need of attr, BLAST DTD doesn't use them
+
+        Arguments:
+         - name -- name of the tag
+         - attr -- tag attributes
+
+        """
+        self._tag.append(name)
+
+        if len(self._tag) == 1:
+            # root node
+            self._on_root_node(name)
+            return
+
+        # Try to call a method (defined in subclasses)
+        method = "start_" + self._node_method_name(name)
+
+        # Note could use try / except AttributeError
+        # BUT I found often triggered by nested errors...
+        if method in self._method_map:
+            self._method_map[method]()
+            if self._debug > 4:
+                print("NCBIXML: Parsed:  " + method)
+        elif self._debug > 3:
+            # Doesn't exist (yet) and may want to warn about it
+            if method not in self._debug_ignore_list:
+                print("NCBIXML: Ignored: " + method)
+                self._debug_ignore_list.append(method)
+
+        # We don't care about white space in parent tags like Hsp,
+        # but that white space doesn't belong to child tags like Hsp_midline
+        if self._value.strip():
+            raise ValueError(
+                "What should we do with %s before the %r tag?" % (self._value, name)
+            )
+        self._value = ""
+
+    def characters(self, ch):
+        """Found some text.
+
+        Arguments:
+         - ch -- characters read
+
+        """
+        self._value += ch  # You don't ever get the whole string
+
+    def endElement(self, name):
+        """Found XML end tag.
+
+        Arguments:
+         - name -- tag name
+
+        """
+        # DON'T strip any white space, we may need it e.g. the hsp-midline
+
+        # Try to call a method (defined in subclasses)
+        method = "end_" + self._node_method_name(name)
+
+        # Note could use try / except AttributeError
+        # BUT I found often triggered by nested errors...
+        if method in self._method_map:
+            self._method_map[method]()
+            if self._debug > 2:
+                print("NCBIXML: Parsed:  %s %s" % (method, self._value))
+        elif self._debug > 1:
+            # Doesn't exist (yet) and may want to warn about it
+            if method not in self._debug_ignore_list:
+                print("NCBIXML: Ignored: %s %s" % (method, self._value))
+                self._debug_ignore_list.append(method)
+
+        # Reset character buffer
+        self._value = ""
+
+        self._tag.pop()
+
+    def _node_method_name(self, name):
+        if self._method_name_level == 1:
+            return name
+        return "/".join(self._tag[-self._method_name_level :])
+
+
+class BlastParser(_XMLparser):
+    """Parse XML BLAST data into a Record.Blast object.
+
+    Parses XML output from BLAST (direct use discouraged).
+    This (now) returns a list of Blast records.
+    Historically it returned a single Blast record.
+    You are expected to use this via the parse or read functions.
+
+    All XML 'action' methods are private methods and may be:
+
+    - ``_start_TAG`` called when the start tag is found
+    - ``_end_TAG`` called when the end tag is found
+
+    """
+
+    def __init__(self, debug=0):
+        """Initialize the parser.
+
+        Arguments:
+         - debug - integer, amount of debug information to print
+
+        """
+        # Calling superclass method
+        _XMLparser.__init__(self, debug)
+
+        self._parser = xml.sax.make_parser()
+        self._parser.setContentHandler(self)
+
+        # To avoid ValueError: unknown url type: NCBI_BlastOutput.dtd
+        self._parser.setFeature(xml.sax.handler.feature_validation, 0)
+        self._parser.setFeature(xml.sax.handler.feature_namespaces, 0)
+        self._parser.setFeature(xml.sax.handler.feature_external_pes, 0)
+        self._parser.setFeature(xml.sax.handler.feature_external_ges, 0)
+
+        self._xml_version = 1
+
+        self.reset()
+
+    def reset(self):
+        """Reset all the data allowing reuse of the BlastParser() object."""
+        self._records = []
+        self._header = Record.Header()
+        self._parameters = Record.Parameters()
+        self._parameters.filter = None  # Maybe I should update the class?
+
+    def _on_root_node(self, name):
+        if name == "BlastOutput":
+            self._setup_blast_v1()
+        elif name == "BlastXML2":
+            self._setup_blast_v2()
+        else:
+            raise ValueError(
+                "Invalid root node name: %s. Root node should be either"
+                " BlastOutput or BlastXML2" % name
+            )
+
+    def _setup_blast_v1(self):
+        self._method_map = {
+            "start_Iteration": self._start_blast_record,
+            "end_Iteration": self._end_blast_record,
+            "end_BlastOutput_program": self._set_header_application,
+            "end_BlastOutput_version": self._set_header_version,
+            "end_BlastOutput_reference": self._set_header_reference,
+            "end_BlastOutput_db": self._set_header_database,
+            "end_BlastOutput_query-ID": self._set_header_query_id,
+            "end_BlastOutput_query-def": self._set_header_query,
+            "end_BlastOutput_query-len": self._set_header_query_letters,
+            "end_Iteration_query-ID": self._set_record_query_id,
+            "end_Iteration_query-def": self._set_record_query_def,
+            "end_Iteration_query-len": self._set_record_query_letters,
+            "end_BlastOutput_hits": self._set_record_hits,
+            "end_Parameters_matrix": self._set_parameters_matrix,
+            "end_Parameters_expect": self._set_parameters_expect,
+            "end_Parameters_sc-match": self._set_parameters_sc_match,
+            "end_Parameters_sc-mismatch": self._set_parameters_sc_mismatch,
+            "end_Parameters_gap-open": self._set_parameters_gap_penalties,
+            "end_Parameters_gap-extend": self._set_parameters_gap_extend,
+            "end_Parameters_filter": self._set_parameters_filter,
+            "start_Hit": self._start_hit,
+            "end_Hit": self._end_hit,
+            "end_Hit_id": self.set_hit_id,
+            "end_Hit_def": self.set_hit_def,
+            "end_Hit_accession": self.set_hit_accession,
+            "end_Hit_len": self.set_hit_len,
+            "start_Hsp": self._start_hsp,
+            "end_Hsp_score": self._set_hsp_score,
+            "end_Hsp_bit-score": self._set_hsp_bit_score,
+            "end_Hsp_evalue": self._set_hsp_e_value,
+            "end_Hsp_query-from": self._set_hsp_query_start,
+            "end_Hsp_query-to": self._set_hsp_query_end,
+            "end_Hsp_hit-from": self._set_hsp_hit_from,
+            "end_Hsp_hit-to": self._set_hsp_hit_to,
+            "end_Hsp_query-frame": self._set_hsp_query_frame,
+            "end_Hsp_hit-frame": self._set_hsp_hit_frame,
+            "end_Hsp_identity": self._set_hsp_identity,
+            "end_Hsp_positive": self._set_hsp_positive,
+            "end_Hsp_gaps": self._set_hsp_gaps,
+            "end_Hsp_align-len": self._set_hsp_align_len,
+            "end_Hsp_qseq": self._set_hsp_query_seq,
+            "end_Hsp_hseq": self._set_hsp_subject_seq,
+            "end_Hsp_midline": self._set_hsp_midline,
+            "end_Statistics_db-num": self._set_statistics_db_num,
+            "end_Statistics_db-len": self._set_statistics_db_len,
+            "end_Statistics_hsp-len": self._set_statistics_hsp_len,
+            "end_Statistics_eff-space": self._set_statistics_eff_space,
+            "end_Statistics_kappa": self._set_statistics_kappa,
+            "end_Statistics_lambda": self._set_statistics_lambda,
+            "end_Statistics_entropy": self._set_statistics_entropy,
+        }
+
+    def _setup_blast_v2(self):
+        self._method_name_level = 2
+        self._xml_version = 2
+        self._method_map = {
+            "start_report/Report": self._start_blast_record,
+            "end_report/Report": self._end_blast_record,
+            "end_Report/program": self._set_header_application,
+            "end_Report/version": self._set_header_version,
+            "end_Report/reference": self._set_header_reference,
+            "end_Target/db": self._set_header_database,
+            "end_Search/query-id": self._set_record_query_id,
+            "end_Search/query-title": self._set_record_query_def,
+            "end_Search/query-len": self._set_record_query_letters,
+            "end_BlastOutput_hits": self._set_record_hits,
+            "end_Parameters/matrix": self._set_parameters_matrix,
+            "end_Parameters/expect": self._set_parameters_expect,
+            "end_Parameters/sc-match": self._set_parameters_sc_match,
+            "end_Parameters/sc-mismatch": self._set_parameters_sc_mismatch,
+            "end_Parameters/gap-open": self._set_parameters_gap_penalties,
+            "end_Parameters/gap-extend": self._set_parameters_gap_extend,
+            "end_Parameters/filter": self._set_parameters_filter,
+            "start_hits/Hit": self._start_hit,
+            "end_hits/Hit": self._end_hit,
+            "start_description/HitDescr": self._start_hit_descr_item,
+            "end_description/HitDescr": self._end_hit_descr_item,
+            "end_HitDescr/id": self._end_description_id,
+            "end_HitDescr/accession": self._end_description_accession,
+            "end_HitDescr/title": self._end_description_title,
+            "end_HitDescr/taxid": self._end_description_taxid,
+            "end_HitDescr/sciname": self._end_description_sciname,
+            "end_Hit/len": self.set_hit_len,
+            "start_hsps/Hsp": self._start_hsp,
+            "end_hsps/Hsp": self._end_hsp,
+            "end_Hsp/score": self._set_hsp_score,
+            "end_Hsp/bit-score": self._set_hsp_bit_score,
+            "end_Hsp/evalue": self._set_hsp_e_value,
+            "end_Hsp/query-from": self._set_hsp_query_start,
+            "end_Hsp/query-to": self._set_hsp_query_end,
+            "end_Hsp/hit-from": self._set_hsp_hit_from,
+            "end_Hsp/hit-to": self._set_hsp_hit_to,
+            "end_Hsp/query-frame": self._set_hsp_query_frame,
+            "end_Hsp/hit-frame": self._set_hsp_hit_frame,
+            "end_Hsp/query-strand": self._set_hsp_query_strand,
+            "end_Hsp/hit-strand": self._set_hsp_hit_strand,
+            "end_Hsp/identity": self._set_hsp_identity,
+            "end_Hsp/positive": self._set_hsp_positive,
+            "end_Hsp/gaps": self._set_hsp_gaps,
+            "end_Hsp/align-len": self._set_hsp_align_len,
+            "end_Hsp/qseq": self._set_hsp_query_seq,
+            "end_Hsp/hseq": self._set_hsp_subject_seq,
+            "end_Hsp/midline": self._set_hsp_midline,
+            "end_Statistics/db-num": self._set_statistics_db_num,
+            "end_Statistics/db-len": self._set_statistics_db_len,
+            "end_Statistics/hsp-len": self._set_statistics_hsp_len,
+            "end_Statistics/eff-space": self._set_statistics_eff_space,
+            "end_Statistics/kappa": self._set_statistics_kappa,
+            "end_Statistics/lambda": self._set_statistics_lambda,
+            "end_Statistics/entropy": self._set_statistics_entropy,
+        }
+
+    def _start_blast_record(self):
+        """Start interaction (PRIVATE)."""
+        self._blast = Record.Blast()
+
+    def _end_blast_record(self):
+        """End interaction (PRIVATE)."""
+        # We stored a lot of generic "top level" information
+        # in self._header (an object of type Record.Header)
+        self._blast.reference = self._header.reference
+        self._blast.date = self._header.date
+        self._blast.version = self._header.version
+        self._blast.database = self._header.database
+        self._blast.application = self._header.application
+
+        # These are required for "old" pre 2.2.14 files
+        # where only <BlastOutput_query-ID>, <BlastOutput_query-def>
+        # and <BlastOutput_query-len> were used.  Now they
+        # are supplemented/replaced by <Iteration_query-ID>,
+        # <Iteration_query-def> and <Iteration_query-len>
+        if not hasattr(self._blast, "query") or not self._blast.query:
+            self._blast.query = self._header.query
+        if not hasattr(self._blast, "query_id") or not self._blast.query_id:
+            self._blast.query_id = self._header.query_id
+        if not hasattr(self._blast, "query_letters") or not self._blast.query_letters:
+            self._blast.query_letters = self._header.query_letters
+
+        # Hack to record the query length as both the query_letters and
+        # query_length properties (as in the plain text parser, see
+        # Bug 2176 comment 12):
+        self._blast.query_length = self._blast.query_letters
+        # Perhaps in the long term we should deprecate one, but I would
+        # prefer to drop query_letters - so we need a transition period
+        # with both.
+
+        # Hack to record the claimed database size as database_length
+        # (as well as in num_letters_in_database, see Bug 2176 comment 13):
+        self._blast.database_length = self._blast.num_letters_in_database
+        # TODO? Deprecate database_letters next?
+
+        # Hack to record the claimed database sequence count as database_sequences
+        self._blast.database_sequences = self._blast.num_sequences_in_database
+
+        # Apply the "top level" parameter information
+        self._blast.matrix = self._parameters.matrix
+        self._blast.num_seqs_better_e = self._parameters.num_seqs_better_e
+        self._blast.gap_penalties = self._parameters.gap_penalties
+        self._blast.filter = self._parameters.filter
+        self._blast.expect = self._parameters.expect
+        self._blast.sc_match = self._parameters.sc_match
+        self._blast.sc_mismatch = self._parameters.sc_mismatch
+
+        # Add to the list
+        self._records.append(self._blast)
+        # Clear the object (a new empty one is create in _start_Iteration)
+        self._blast = None
+
+        if self._debug:
+            print("NCBIXML: Added Blast record to results")
+
+    # Header
+    def _set_header_application(self):
+        """BLAST program, e.g., blastp, blastn, etc. (PRIVATE).
+
+        Save this to put on each blast record object
+        """
+        self._header.application = self._value.upper()
+
+    def _set_header_version(self):
+        """Version number and date of the BLAST engine (PRIVATE).
+
+        e.g. "BLASTX 2.2.12 [Aug-07-2005]" but there can also be
+        variants like "BLASTP 2.2.18+" without the date.
+
+        Save this to put on each blast record object
+        """
+        parts = self._value.split()
+        # TODO - Check the first word starts with BLAST?
+
+        # The version is the second word (field one)
+        self._header.version = parts[1]
+
+        # Check there is a third word (the date)
+        if len(parts) >= 3:
+            if parts[2][0] == "[" and parts[2][-1] == "]":
+                self._header.date = parts[2][1:-1]
+            else:
+                # Assume this is still a date, but without the
+                # square brackets
+                self._header.date = parts[2]
+
+    def _set_header_reference(self):
+        """Record any article reference describing the algorithm (PRIVATE).
+
+        Save this to put on each blast record object
+        """
+        self._header.reference = self._value
+
+    def _set_header_database(self):
+        """Record the database(s) searched (PRIVATE).
+
+        Save this to put on each blast record object
+        """
+        self._header.database = self._value
+
+    def _set_header_query_id(self):
+        """Record the identifier of the query (PRIVATE).
+
+        Important in old pre 2.2.14 BLAST, for recent versions
+        <Iteration_query-ID> is enough
+        """
+        self._header.query_id = self._value
+
+    def _set_header_query(self):
+        """Record the definition line of the query (PRIVATE).
+
+        Important in old pre 2.2.14 BLAST, for recent versions
+        <Iteration_query-def> is enough
+        """
+        self._header.query = self._value
+
+    def _set_header_query_letters(self):
+        """Record the length of the query (PRIVATE).
+
+        Important in old pre 2.2.14 BLAST, for recent versions
+        <Iteration_query-len> is enough
+        """
+        self._header.query_letters = int(self._value)
+
+    def _set_record_query_id(self):
+        """Record the identifier of the query (PRIVATE)."""
+        self._blast.query_id = self._value
+
+    def _set_record_query_def(self):
+        """Record the definition line of the query (PRIVATE)."""
+        self._blast.query = self._value
+
+    def _set_record_query_letters(self):
+        """Record the length of the query (PRIVATE)."""
+        self._blast.query_letters = int(self._value)
+
+    # def _end_BlastOutput_query_seq(self):
+    #     """The query sequence (PRIVATE)."""
+    #     pass # XXX Missing in Record.Blast ?
+
+    # def _end_BlastOutput_iter_num(self):
+    #     """The psi-blast iteration number (PRIVATE)."""
+    #     pass # XXX TODO PSI
+
+    def _set_record_hits(self):
+        """Hits to the database sequences, one for every sequence (PRIVATE)."""
+        self._blast.num_hits = int(self._value)
+
+    # def _end_BlastOutput_message(self):
+    #     """error messages (PRIVATE)."""
+    #     pass # XXX What to do ?
+
+    # Parameters
+    def _set_parameters_matrix(self):
+        """Matrix used (-M on legacy BLAST) (PRIVATE)."""
+        self._parameters.matrix = self._value
+
+    def _set_parameters_expect(self):
+        """Expect values cutoff (PRIVATE)."""
+        # NOTE: In old text output there was a line:
+        # Number of sequences better than 1.0e-004: 1
+        # As far as I can see, parameters.num_seqs_better_e
+        # would take the value of 1, and the expectation
+        # value was not recorded.
+        #
+        # Anyway we should NOT record this against num_seqs_better_e
+        self._parameters.expect = self._value
+
+    # def _end_Parameters_include(self):
+    #     """Inclusion threshold for a psi-blast iteration (-h) (PRIVATE)."""
+    #     pass # XXX TODO PSI
+
+    def _set_parameters_sc_match(self):
+        """Match score for nucleotide-nucleotide comparison (-r) (PRIVATE)."""
+        self._parameters.sc_match = int(self._value)
+
+    def _set_parameters_sc_mismatch(self):
+        """Mismatch penalty for nucleotide-nucleotide comparison (-r) (PRIVATE)."""
+        self._parameters.sc_mismatch = int(self._value)
+
+    def _set_parameters_gap_penalties(self):
+        """Gap existence cost (-G) (PRIVATE)."""
+        self._parameters.gap_penalties = int(self._value)
+
+    def _set_parameters_gap_extend(self):
+        """Gap extension cose (-E) (PRIVATE)."""
+        self._parameters.gap_penalties = (
+            self._parameters.gap_penalties,
+            int(self._value),
+        )
+
+    def _set_parameters_filter(self):
+        """Record filtering options (-F) (PRIVATE)."""
+        self._parameters.filter = self._value
+
+    # def _end_Parameters_pattern(self):
+    #     """Pattern used for phi-blast search (PRIVATE).
+    #     """
+    #     pass # XXX TODO PSI
+
+    # def _end_Parameters_entrez_query(self):
+    #     """Entrez query used to limit search (PRIVATE).
+    #     """
+    #     pass # XXX TODO PSI
+
+    # Hits
+    def _start_hit(self):
+        """Start filling records (PRIVATE)."""
+        self._blast.alignments.append(Record.Alignment())
+        self._descr = (
+            Record.Description() if self._xml_version == 1 else Record.DescriptionExt()
+        )
+        self._blast.descriptions.append(self._descr)
+        self._blast.multiple_alignment = []
+        self._hit = self._blast.alignments[-1]
+
+        self._descr.num_alignments = 0
+
+    def _end_hit(self):
+        """Clear variables (PRIVATE)."""
+        # Cleanup
+        self._blast.multiple_alignment = None
+        self._hit = None
+        self._descr = None
+
+    def set_hit_id(self):
+        """Record the identifier of the database sequence (PRIVATE)."""
+        self._hit.hit_id = self._value
+        self._hit.title = self._value + " "
+
+    def set_hit_def(self):
+        """Record the definition line of the database sequence (PRIVATE)."""
+        self._hit.hit_def = self._value
+        self._hit.title += self._value
+        self._descr.title = self._hit.title
+
+    def set_hit_accession(self):
+        """Record the accession value of the database sequence (PRIVATE)."""
+        self._hit.accession = self._value
+        self._descr.accession = self._value
+
+    def set_hit_len(self):
+        """Record the length of the hit."""
+        self._hit.length = int(self._value)
+
+    # HSPs
+    def _start_hsp(self):
+        # Note that self._start_Hit() should have been called
+        # to setup things like self._blast.multiple_alignment
+        self._hsp = Record.HSP()
+        self._hsp.positives = None
+        self._hit.hsps.append(self._hsp)
+        self._descr.num_alignments += 1
+        self._blast.multiple_alignment.append(Record.MultipleAlignment())
+        self._mult_al = self._blast.multiple_alignment[-1]
+
+    def _end_hsp(self):
+        if self._hsp.frame and len(self._hsp.frame) == 1:
+            self._hsp.frame += (0,)
+
+    # Hsp_num is useless
+    def _set_hsp_score(self):
+        """Record the raw score of HSP (PRIVATE)."""
+        self._hsp.score = float(self._value)
+        if self._descr.score is None:
+            self._descr.score = float(self._value)
+
+    def _set_hsp_bit_score(self):
+        """Record the Bit score of HSP (PRIVATE)."""
+        self._hsp.bits = float(self._value)
+        if self._descr.bits is None:
+            self._descr.bits = float(self._value)
+
+    def _set_hsp_e_value(self):
+        """Record the expect value of the HSP (PRIVATE)."""
+        self._hsp.expect = float(self._value)
+        if self._descr.e is None:
+            self._descr.e = float(self._value)
+
+    def _set_hsp_query_start(self):
+        """Offset of query at the start of the alignment (one-offset) (PRIVATE)."""
+        self._hsp.query_start = int(self._value)
+
+    def _set_hsp_query_end(self):
+        """Offset of query at the end of the alignment (one-offset) (PRIVATE)."""
+        self._hsp.query_end = int(self._value)
+
+    def _set_hsp_hit_from(self):
+        """Offset of the database at the start of the alignment (one-offset) (PRIVATE)."""
+        self._hsp.sbjct_start = int(self._value)
+
+    def _set_hsp_hit_to(self):
+        """Offset of the database at the end of the alignment (one-offset) (PRIVATE)."""
+        self._hsp.sbjct_end = int(self._value)
+
+    # def _end_Hsp_pattern_from(self):
+    #     """Start of phi-blast pattern on the query (one-offset) (PRIVATE)."""
+    #     pass # XXX TODO PSI
+
+    # def _end_Hsp_pattern_to(self):
+    #     """End of phi-blast pattern on the query (one-offset) (PRIVATE)."""
+    #     pass # XXX TODO PSI
+
+    def _set_hsp_query_frame(self):
+        """Frame of the query if applicable (PRIVATE)."""
+        v = int(self._value)
+        self._hsp.frame = (v,)
+        if self._header.application == "BLASTN":
+            self._hsp.strand = ("Plus" if v > 0 else "Minus",)
+
+    def _set_hsp_hit_frame(self):
+        """Frame of the database sequence if applicable (PRIVATE)."""
+        v = int(self._value)
+        if len(self._hsp.frame) == 0:
+            self._hsp.frame = (0, v)
+        else:
+            self._hsp.frame += (v,)
+        if self._header.application == "BLASTN":
+            self._hsp.strand += ("Plus" if v > 0 else "Minus",)
+
+    def _set_hsp_query_strand(self):
+        """Frame of the query if applicable (PRIVATE)."""
+        self._hsp.strand = (self._value,)
+        if self._header.application == "BLASTN":
+            self._hsp.frame = (1 if self._value == "Plus" else -1,)
+
+    def _set_hsp_hit_strand(self):
+        """Frame of the database sequence if applicable (PRIVATE)."""
+        self._hsp.strand += (self._value,)
+        if self._header.application == "BLASTN":
+            self._hsp.frame += (1 if self._value == "Plus" else -1,)
+
+    def _set_hsp_identity(self):
+        """Record the number of identities in the alignment (PRIVATE)."""
+        v = int(self._value)
+        self._hsp.identities = v
+        if self._hsp.positives is None:
+            self._hsp.positives = v
+
+    def _set_hsp_positive(self):
+        """Record the number of positive (conservative) substitutions in the alignment (PRIVATE)."""
+        self._hsp.positives = int(self._value)
+
+    def _set_hsp_gaps(self):
+        """Record the number of gaps in the alignment (PRIVATE)."""
+        self._hsp.gaps = int(self._value)
+
+    def _set_hsp_align_len(self):
+        """Record the length of the alignment (PRIVATE)."""
+        self._hsp.align_length = int(self._value)
+
+    # def _en_Hsp_density(self):
+    #     """Score density (PRIVATE)."""
+    #     pass # XXX ???
+
+    def _set_hsp_query_seq(self):
+        """Record the alignment string for the query (PRIVATE)."""
+        self._hsp.query = self._value
+
+    def _set_hsp_subject_seq(self):
+        """Record the alignment string for the database (PRIVATE)."""
+        self._hsp.sbjct = self._value
+
+    def _set_hsp_midline(self):
+        """Record the middle line as normally seen in BLAST report (PRIVATE)."""
+        self._hsp.match = self._value  # do NOT strip spaces!
+        assert len(self._hsp.match) == len(self._hsp.query)
+        assert len(self._hsp.match) == len(self._hsp.sbjct)
+
+    # Statistics
+    def _set_statistics_db_num(self):
+        """Record the number of sequences in the database (PRIVATE)."""
+        self._blast.num_sequences_in_database = int(self._value)
+
+    def _set_statistics_db_len(self):
+        """Record the number of letters in the database (PRIVATE)."""
+        self._blast.num_letters_in_database = int(self._value)
+
+    def _set_statistics_hsp_len(self):
+        """Record the effective HSP length (PRIVATE)."""
+        self._blast.effective_hsp_length = int(self._value)
+
+    def _set_statistics_eff_space(self):
+        """Record the effective search space (PRIVATE)."""
+        self._blast.effective_search_space = float(self._value)
+
+    def _set_statistics_kappa(self):
+        """Karlin-Altschul parameter K (PRIVATE)."""
+        self._blast.ka_params = float(self._value)
+
+    def _set_statistics_lambda(self):
+        """Karlin-Altschul parameter Lambda (PRIVATE)."""
+        self._blast.ka_params = (float(self._value), self._blast.ka_params)
+
+    def _set_statistics_entropy(self):
+        """Karlin-Altschul parameter H (PRIVATE)."""
+        self._blast.ka_params = self._blast.ka_params + (float(self._value),)
+
+    def _start_hit_descr_item(self):
+        """XML v2. Start hit description item."""
+        self._hit_descr_item = Record.DescriptionExtItem()
+
+    def _end_hit_descr_item(self):
+        """XML v2. Start hit description item."""
+        self._descr.append_item(self._hit_descr_item)
+        if not self._hit.title:
+            self._hit.title = str(self._hit_descr_item)
+        self._hit_descr_item = None
+
+    def _end_description_id(self):
+        """XML v2. The identifier of the database sequence(PRIVATE)."""
+        self._hit_descr_item.id = self._value
+        if not self._hit.hit_id:
+            self._hit.hit_id = self._value
+
+    def _end_description_accession(self):
+        """XML v2. The accession value of the database sequence (PRIVATE)."""
+        self._hit_descr_item.accession = self._value
+        if not getattr(self._hit, "accession", None):
+            self._hit.accession = self._value
+
+    def _end_description_title(self):
+        """XML v2. The hit description title (PRIVATE)."""
+        self._hit_descr_item.title = self._value
+
+    def _end_description_taxid(self):
+        try:
+            self._hit_descr_item.taxid = int(self._value)
+        except ValueError:
+            pass
+
+    def _end_description_sciname(self):
+        self._hit_descr_item.sciname = self._value
+
+
+def read(handle, debug=0):
+    """Return a single Blast record (assumes just one query).
+
+    Uses the BlastParser internally.
+
+    This function is for use when there is one and only one BLAST
+    result in your XML file.
+
+    Use the Bio.Blast.NCBIXML.parse() function if you expect more than
+    one BLAST record (i.e. if you have more than one query sequence).
+    """
+    iterator = parse(handle, debug)
+    try:
+        record = next(iterator)
+    except StopIteration:
+        raise ValueError("No records found in handle") from None
+    try:
+        next(iterator)
+        raise ValueError("More than one record found in handle")
+    except StopIteration:
+        pass
+    return record
+
+
+def parse(handle, debug=0):
+    """Return an iterator a Blast record for each query.
+
+    Incremental parser, this is an iterator that returns
+    Blast records.  It uses the BlastParser internally.
+
+    handle - file handle to and XML file to parse
+    debug - integer, amount of debug information to print
+
+    This is a generator function that returns multiple Blast records
+    objects - one for each query sequence given to blast.  The file
+    is read incrementally, returning complete records as they are read
+    in.
+
+    Should cope with new BLAST 2.2.14+ which gives a single XML file
+    for multiple query records.
+
+    Should also cope with XML output from older versions BLAST which
+    gave multiple XML files concatenated together (giving a single file
+    which strictly speaking wasn't valid XML).
+    """
+    from xml.parsers import expat
+
+    BLOCK = 1024
+    MARGIN = 10  # must be at least length of newline + XML start
+    XML_START = "<?xml"
+    NEW_LINE = "\n"
+    NULL = ""
+
+    pending = ""
+    text = handle.read(BLOCK)
+    if isinstance(text, bytes):
+        # Not a text handle, raw bytes mode
+        XML_START = b"<?xml"
+        NEW_LINE = b"\n"
+        NULL = b""
+        pending = b""
+
+    if not text:
+        # NO DATA FOUND!
+        raise ValueError("Your XML file was empty")
+
+    while text:
+        # We are now starting a new XML file
+        if not text.startswith(XML_START):
+            raise ValueError(
+                "Your XML file did not start with %r... but instead %r"
+                % (XML_START, text[:20])
+            )
+
+        expat_parser = expat.ParserCreate()
+        blast_parser = BlastParser(debug)
+        expat_parser.StartElementHandler = blast_parser.startElement
+        expat_parser.EndElementHandler = blast_parser.endElement
+        expat_parser.CharacterDataHandler = blast_parser.characters
+
+        expat_parser.Parse(text, False)
+        while blast_parser._records:
+            record = blast_parser._records[0]
+            blast_parser._records = blast_parser._records[1:]
+            yield record
+
+        while True:
+            # Read in another block of the file...
+            text, pending = pending + handle.read(BLOCK), ""
+            if not text:
+                # End of the file!
+                expat_parser.Parse(NULL, True)  # End of XML record
+                break
+
+            # Now read a little bit more so we can check for the
+            # start of another XML file...
+            pending = handle.read(MARGIN)
+
+            if (NEW_LINE + XML_START) not in (text + pending):
+                # Good - still dealing with the same XML file
+                expat_parser.Parse(text, False)
+                while blast_parser._records:
+                    yield blast_parser._records.pop(0)
+            else:
+                # This is output from pre 2.2.14 BLAST,
+                # one XML file for each query!
+
+                # Finish the old file:
+                text, pending = (text + pending).split(NEW_LINE + XML_START, 1)
+                pending = XML_START + pending
+
+                expat_parser.Parse(text, True)  # End of XML record
+                while blast_parser._records:
+                    yield blast_parser._records.pop(0)
+
+                # Now we are going to re-loop, reset the
+                # parsers and start reading the next XML file
+                text, pending = pending, NULL
+                break
+
+        # At this point we have finished the first XML record.
+        # If the file is from an old version of blast, it may
+        # contain more XML records (check if text=="").
+        assert not pending, pending
+        assert len(blast_parser._records) == 0, len(blast_parser._records)
+
+    # We should have finished the file!
+    assert not text, text
+    assert not pending, pending
+    assert len(blast_parser._records) == 0, len(blast_parser._records)
diff --git a/code/lib/Bio/Blast/ParseBlastTable.py b/code/lib/Bio/Blast/ParseBlastTable.py
new file mode 100644
index 0000000..09e6929
--- /dev/null
+++ b/code/lib/Bio/Blast/ParseBlastTable.py
@@ -0,0 +1,126 @@
+# Copyright 2003 Iddo Friedberg. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""A parser for the NCBI blastpgp version 2.2.5 output format.
+
+Currently only supports the '-m 9' option, (table w/ annotations).
+Returns a BlastTableRec instance
+"""
+
+
+class BlastTableEntry:
+    """Container for Blast Table Entry, the field values from the table."""
+
+    def __init__(self, in_rec):
+        """Initialize the class."""
+        bt_fields = in_rec.split()
+        self.qid = bt_fields[0].split("|")
+        self.sid = bt_fields[1].split("|")
+        self.pid = float(bt_fields[2])
+        self.ali_len = int(bt_fields[3])
+        self.mis = int(bt_fields[4])
+        self.gaps = int(bt_fields[5])
+        self.q_bounds = (int(bt_fields[6]), int(bt_fields[7]))
+        self.s_bounds = (int(bt_fields[8]), int(bt_fields[9]))
+        self.e_value = float(bt_fields[10])
+        self.bit_score = float(bt_fields[11])
+
+
+class BlastTableRec:
+    """Container for Blast Table record, list of Blast Table Entries."""
+
+    def __init__(self):
+        """Initialize the class."""
+        self.program = None
+        self.version = None
+        self.date = None
+        self.iteration = None
+        self.query = None
+        self.database = None
+        self.entries = []
+
+    def add_entry(self, entry):
+        """Add entry to Blast Table."""
+        self.entries.append(entry)
+
+
+class BlastTableReader:
+    """Reader for the output of blastpgp."""
+
+    reader_keywords = {
+        "BLASTP": "version",
+        "Iteration": "iteration",
+        "Query": "query",
+        "Database": "database",
+        "Fields": "fields",
+    }
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self.handle = handle
+        inline = self.handle.readline()
+        # zip forward to start of record
+        while inline and "BLASTP" not in inline:
+            inline = self.handle.readline()
+        self._lookahead = inline
+        self._n = 0
+        self._in_header = 1
+
+    def __next__(self):
+        """Return the next record when iterating over the file."""
+        self.table_record = BlastTableRec()
+        self._n += 1
+        inline = self._lookahead
+        if not inline:
+            return None
+        while inline:
+            if inline[0] == "#":
+                if self._in_header:
+                    self._in_header = self._consume_header(inline)
+                else:
+                    break
+            else:
+                self._consume_entry(inline)
+                self._in_header = 0
+
+            inline = self.handle.readline()
+        self._lookahead = inline
+        self._in_header = 1
+        return self.table_record
+
+    def _consume_entry(self, inline):
+        current_entry = BlastTableEntry(inline)
+        self.table_record.add_entry(current_entry)
+
+    def _consume_header(self, inline):
+        for keyword in self.reader_keywords:
+            if keyword in inline:
+                return self._Parse("_parse_%s" % self.reader_keywords[keyword], inline)
+
+    def _parse_version(self, inline):
+        program, version, date = inline.split()[1:]
+        self.table_record.program = program
+        self.table_record.version = version
+        self.table_record.date = date
+        return 1
+
+    def _parse_iteration(self, inline):
+        self.table_record.iteration = int(inline.split()[2])
+        return 1
+
+    def _parse_query(self, inline):
+        self.table_record.query = inline.split()[2:]
+        return 1
+
+    def _parse_database(self, inline):
+        self.table_record.database = inline.split()[2]
+        return 1
+
+    def _parse_fields(self, inline):
+        return 0
+
+    def _Parse(self, method_name, inline):
+        return getattr(self, method_name)(inline)
diff --git a/code/lib/Bio/Blast/Record.py b/code/lib/Bio/Blast/Record.py
new file mode 100644
index 0000000..de68565
--- /dev/null
+++ b/code/lib/Bio/Blast/Record.py
@@ -0,0 +1,460 @@
+# Copyright 1999-2000 by Jeffrey Chang.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Record classes to hold BLAST output.
+
+Classes:
+Blast              Holds all the information from a blast search.
+PSIBlast           Holds all the information from a psi-blast search.
+
+Header             Holds information from the header.
+Description        Holds information about one hit description.
+Alignment          Holds information about one alignment hit.
+HSP                Holds information about one HSP.
+MultipleAlignment  Holds information about a multiple alignment.
+DatabaseReport     Holds information from the database report.
+Parameters         Holds information from the parameters.
+
+"""
+# XXX finish printable BLAST output
+
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from Bio.Align import MultipleSeqAlignment
+
+
+def fmt_(value, format_spec="%s", default_str="<unknown>"):
+    """Ensure the given value formats to a string correctly."""
+    if value is None:
+        return default_str
+    return format_spec % value
+
+
+class Header:
+    """Saves information from a blast header.
+
+    Members:
+    application         The name of the BLAST flavor that generated this data.
+    version             Version of blast used.
+    date                Date this data was generated.
+    reference           Reference for blast.
+
+    query               Name of query sequence.
+    query_letters       Number of letters in the query sequence.  (int)
+
+    database            Name of the database.
+    database_sequences  Number of sequences in the database.  (int)
+    database_letters    Number of letters in the database.  (int)
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.application = ""
+        self.version = ""
+        self.date = ""
+        self.reference = ""
+
+        self.query = ""
+        self.query_letters = None
+
+        self.database = ""
+        self.database_sequences = None
+        self.database_letters = None
+
+
+class Description:
+    """Stores information about one hit in the descriptions section.
+
+    Members:
+    title           Title of the hit.
+    score           Number of bits.  (int)
+    bits            Bit score. (float)
+    e               E value.  (float)
+    num_alignments  Number of alignments for the same subject.  (int)
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.title = ""
+        self.score = None
+        self.bits = None
+        self.e = None
+        self.num_alignments = None
+
+    def __str__(self):
+        """Return the description as a string."""
+        return "%-66s %5s  %s" % (self.title, self.score, self.e)
+
+
+class DescriptionExt(Description):
+    """Extended description record for BLASTXML version 2.
+
+    Members:
+    items           List of DescriptionExtItem
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        super().__init__()
+
+        self.items = []
+
+    def append_item(self, item):
+        """Add a description extended record."""
+        if len(self.items) == 0:
+            self.title = str(item)
+        self.items.append(item)
+
+
+class DescriptionExtItem:
+    """Stores information about one record in hit description for BLASTXML version 2.
+
+    Members:
+    id              Database identifier
+    title           Title of the hit.
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.id = None
+        self.title = None
+        self.accession = None
+        self.taxid = None
+        self.sciname = None
+
+    def __str__(self):
+        """Return the description identifier and title as a string."""
+        return "%s %s" % (self.id, self.title)
+
+
+class Alignment:
+    """Stores information about one hit in the alignments section.
+
+    Members:
+    title      Name.
+    hit_id     Hit identifier. (str)
+    hit_def    Hit definition. (str)
+    length     Length.  (int)
+    hsps       A list of HSP objects.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.title = ""
+        self.hit_id = ""
+        self.hit_def = ""
+        self.length = None
+        self.hsps = []
+
+    def __str__(self):
+        """Return the BLAST alignment as a formatted string."""
+        lines = self.title.split("\n")
+        lines.append("Length = %s\n" % self.length)
+        return "\n           ".join(lines)
+
+
+class HSP:
+    """Stores information about one hsp in an alignment hit.
+
+    Members:
+        - score           BLAST score of hit.  (float)
+        - bits            Number of bits for that score.  (float)
+        - expect          Expect value.  (float)
+        - num_alignments  Number of alignments for same subject.  (int)
+        - identities      Number of identities (int) if using the XML parser.
+          Tuple of number of identities/total aligned (int, int)
+          if using the (obsolete) plain text parser.
+        - positives       Number of positives (int) if using the XML parser.
+          Tuple of number of positives/total aligned (int, int)
+          if using the (obsolete) plain text parser.
+        - gaps            Number of gaps (int) if using the XML parser.
+          Tuple of number of gaps/total aligned (int, int) if
+          using the (obsolete) plain text parser.
+        - align_length    Length of the alignment. (int)
+        - strand          Tuple of (query, target) strand.
+        - frame           Tuple of 1 or 2 frame shifts, depending on the flavor.
+
+        - query           The query sequence.
+        - query_start     The start residue for the query sequence.  (1-based)
+        - query_end       The end residue for the query sequence.  (1-based)
+        - match           The match sequence.
+        - sbjct           The sbjct sequence.
+        - sbjct_start     The start residue for the sbjct sequence.  (1-based)
+        - sbjct_end       The end residue for the sbjct sequence.  (1-based)
+
+    Not all flavors of BLAST return values for every attribute::
+
+                  score     expect     identities   positives    strand  frame
+        BLASTP     X          X            X            X
+        BLASTN     X          X            X            X          X
+        BLASTX     X          X            X            X                  X
+        TBLASTN    X          X            X            X                  X
+        TBLASTX    X          X            X            X                 X/X
+
+    Note: for BLASTX, the query sequence is shown as a protein sequence,
+    but the numbering is based on the nucleotides.  Thus, the numbering
+    is 3x larger than the number of amino acid residues.  A similar effect
+    can be seen for the sbjct sequence in TBLASTN, and for both sequences
+    in TBLASTX.
+
+    Also, for negative frames, the sequence numbering starts from
+    query_start and counts down.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.score = None
+        self.bits = None
+        self.expect = None
+        self.num_alignments = None
+        self.identities = (None, None)
+        self.positives = (None, None)
+        self.gaps = (None, None)
+        self.align_length = None
+        self.strand = (None, None)
+        self.frame = ()
+
+        self.query = ""
+        self.query_start = None
+        self.query_end = None
+        self.match = ""
+        self.sbjct = ""
+        self.sbjct_start = None
+        self.sbjct_end = None
+
+    def __str__(self):
+        """Return the BLAST HSP as a formatted string."""
+        lines = [
+            "Score %s (%s bits), expectation %s, alignment length %s"
+            % (
+                fmt_(self.score, "%i"),
+                fmt_(self.bits, "%i"),
+                fmt_(self.expect, "%0.1e"),
+                fmt_(self.align_length, "%i"),
+            )
+        ]
+        if self.align_length is None:
+            return "\n".join(lines)
+        if self.align_length < 50:
+            lines.append(
+                "Query:%8s %s %s" % (self.query_start, self.query, self.query_end)
+            )
+            lines.append("               %s" % self.match)
+            lines.append(
+                "Sbjct:%8s %s %s" % (self.sbjct_start, self.sbjct, self.sbjct_end)
+            )
+        else:
+            lines.append(
+                "Query:%8s %s...%s %s"
+                % (self.query_start, self.query[:45], self.query[-3:], self.query_end,)
+            )
+            lines.append("               %s...%s" % (self.match[:45], self.match[-3:]))
+            lines.append(
+                "Sbjct:%8s %s...%s %s"
+                % (self.sbjct_start, self.sbjct[:45], self.sbjct[-3:], self.sbjct_end)
+            )
+        return "\n".join(lines)
+
+
+class MultipleAlignment:
+    """Holds information about a multiple alignment.
+
+    Members:
+    alignment  A list of tuples (name, start residue, sequence, end residue).
+
+    The start residue is 1-based.  It may be blank, if that sequence is
+    not aligned in the multiple alignment.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.alignment = []
+
+    def to_generic(self):
+        """Retrieve generic alignment object for the given alignment.
+
+        Instead of the tuples, this returns a MultipleSeqAlignment object
+        from Bio.Align, through which you can manipulate and query
+        the object.
+
+        Thanks to James Casbon for the code.
+        """
+        seq_parts = []
+        seq_names = []
+        parse_number = 0
+        n = 0
+        for name, start, seq, end in self.alignment:
+            if name == "QUERY":  # QUERY is the first in each alignment block
+                parse_number += 1
+                n = 0
+
+            if parse_number == 1:  # create on first_parse, append on all others
+                seq_parts.append(seq)
+                seq_names.append(name)
+            else:
+                seq_parts[n] += seq
+                n += 1
+
+        records = (
+            SeqRecord(Seq(seq), name) for (name, seq) in zip(seq_names, seq_parts)
+        )
+        return MultipleSeqAlignment(records)
+
+
+class Round:
+    """Holds information from a PSI-BLAST round.
+
+    Members:
+    number       Round number.  (int)
+    reused_seqs  Sequences in model, found again.  List of Description objects.
+    new_seqs     Sequences not found, or below threshold.  List of Description.
+    alignments          A list of Alignment objects.
+    multiple_alignment  A MultipleAlignment object.
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.number = None
+        self.reused_seqs = []
+        self.new_seqs = []
+        self.alignments = []
+        self.multiple_alignment = None
+
+
+class DatabaseReport:
+    """Holds information about a database report.
+
+    Members:
+    database_name              List of database names.  (can have multiple dbs)
+    num_letters_in_database    Number of letters in the database.  (int)
+    num_sequences_in_database  List of number of sequences in the database.
+    posted_date                List of the dates the databases were posted.
+    ka_params                  A tuple of (lambda, k, h) values.  (floats)
+    gapped                     # XXX this isn't set right!
+    ka_params_gap              A tuple of (lambda, k, h) values.  (floats)
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.database_name = []
+        self.posted_date = []
+        self.num_letters_in_database = []
+        self.num_sequences_in_database = []
+        self.ka_params = (None, None, None)
+        self.gapped = 0
+        self.ka_params_gap = (None, None, None)
+
+
+class Parameters:
+    """Holds information about the parameters.
+
+    Members:
+    matrix              Name of the matrix.
+    gap_penalties       Tuple of (open, extend) penalties.  (floats)
+    sc_match            Match score for nucleotide-nucleotide comparison
+    sc_mismatch         Mismatch penalty for nucleotide-nucleotide comparison
+    num_hits            Number of hits to the database.  (int)
+    num_sequences       Number of sequences.  (int)
+    num_good_extends    Number of extensions.  (int)
+    num_seqs_better_e   Number of sequences better than e-value.  (int)
+    hsps_no_gap         Number of HSP's better, without gapping.  (int)
+    hsps_prelim_gapped  Number of HSP's gapped in prelim test.  (int)
+    hsps_prelim_gapped_attemped  Number of HSP's attempted in prelim.  (int)
+    hsps_gapped         Total number of HSP's gapped.  (int)
+    query_length        Length of the query.  (int)
+    query_id            Identifier of the query sequence. (str)
+    database_length     Number of letters in the database.  (int)
+    effective_hsp_length         Effective HSP length.  (int)
+    effective_query_length       Effective length of query.  (int)
+    effective_database_length    Effective length of database.  (int)
+    effective_search_space       Effective search space.  (int)
+    effective_search_space_used  Effective search space used.  (int)
+    frameshift          Frameshift window.  Tuple of (int, float)
+    threshold           Threshold.  (int)
+    window_size         Window size.  (int)
+    dropoff_1st_pass    Tuple of (score, bits).  (int, float)
+    gap_x_dropoff       Tuple of (score, bits).  (int, float)
+    gap_x_dropoff_final Tuple of (score, bits).  (int, float)
+    gap_trigger         Tuple of (score, bits).  (int, float)
+    blast_cutoff        Tuple of (score, bits).  (int, float)
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.matrix = ""
+        self.gap_penalties = (None, None)
+        self.sc_match = None
+        self.sc_mismatch = None
+        self.num_hits = None
+        self.num_sequences = None
+        self.num_good_extends = None
+        self.num_seqs_better_e = None
+        self.hsps_no_gap = None
+        self.hsps_prelim_gapped = None
+        self.hsps_prelim_gapped_attemped = None
+        self.hsps_gapped = None
+        self.query_id = None
+        self.query_length = None
+        self.database_length = None
+        self.effective_hsp_length = None
+        self.effective_query_length = None
+        self.effective_database_length = None
+        self.effective_search_space = None
+        self.effective_search_space_used = None
+        self.frameshift = (None, None)
+        self.threshold = None
+        self.window_size = None
+        self.dropoff_1st_pass = (None, None)
+        self.gap_x_dropoff = (None, None)
+        self.gap_x_dropoff_final = (None, None)
+        self.gap_trigger = (None, None)
+        self.blast_cutoff = (None, None)
+
+
+# TODO - Add a friendly __str__ method to BLAST results
+class Blast(Header, DatabaseReport, Parameters):
+    """Saves the results from a blast search.
+
+    Members:
+    descriptions        A list of Description objects.
+    alignments          A list of Alignment objects.
+    multiple_alignment  A MultipleAlignment object.
+    + members inherited from base classes
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        Header.__init__(self)
+        DatabaseReport.__init__(self)
+        Parameters.__init__(self)
+        self.descriptions = []
+        self.alignments = []
+        self.multiple_alignment = None
+
+
+class PSIBlast(Header, DatabaseReport, Parameters):
+    """Saves the results from a blastpgp search.
+
+    Members:
+    rounds       A list of Round objects.
+    converged    Whether the search converged.
+    + members inherited from base classes
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        Header.__init__(self)
+        DatabaseReport.__init__(self)
+        Parameters.__init__(self)
+        self.rounds = []
+        self.converged = 0
diff --git a/code/lib/Bio/Blast/__init__.py b/code/lib/Bio/Blast/__init__.py
new file mode 100644
index 0000000..27c0ec3
--- /dev/null
+++ b/code/lib/Bio/Blast/__init__.py
@@ -0,0 +1,7 @@
+# Copyright 1999 by Jeffrey Chang.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Code for dealing with BLAST programs and output."""
diff --git a/code/lib/Bio/Blast/__pycache__/Applications.cpython-37.pyc b/code/lib/Bio/Blast/__pycache__/Applications.cpython-37.pyc
new file mode 100644
index 0000000..44e9314
Binary files /dev/null and b/code/lib/Bio/Blast/__pycache__/Applications.cpython-37.pyc differ
diff --git a/code/lib/Bio/Blast/__pycache__/NCBIWWW.cpython-37.pyc b/code/lib/Bio/Blast/__pycache__/NCBIWWW.cpython-37.pyc
new file mode 100644
index 0000000..f6823c0
Binary files /dev/null and b/code/lib/Bio/Blast/__pycache__/NCBIWWW.cpython-37.pyc differ
diff --git a/code/lib/Bio/Blast/__pycache__/NCBIXML.cpython-37.pyc b/code/lib/Bio/Blast/__pycache__/NCBIXML.cpython-37.pyc
new file mode 100644
index 0000000..18c8311
Binary files /dev/null and b/code/lib/Bio/Blast/__pycache__/NCBIXML.cpython-37.pyc differ
diff --git a/code/lib/Bio/Blast/__pycache__/ParseBlastTable.cpython-37.pyc b/code/lib/Bio/Blast/__pycache__/ParseBlastTable.cpython-37.pyc
new file mode 100644
index 0000000..e31274f
Binary files /dev/null and b/code/lib/Bio/Blast/__pycache__/ParseBlastTable.cpython-37.pyc differ
diff --git a/code/lib/Bio/Blast/__pycache__/Record.cpython-37.pyc b/code/lib/Bio/Blast/__pycache__/Record.cpython-37.pyc
new file mode 100644
index 0000000..37eead1
Binary files /dev/null and b/code/lib/Bio/Blast/__pycache__/Record.cpython-37.pyc differ
diff --git a/code/lib/Bio/Blast/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Blast/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..9e31daf
Binary files /dev/null and b/code/lib/Bio/Blast/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/CAPS/__init__.py b/code/lib/Bio/CAPS/__init__.py
new file mode 100644
index 0000000..2c28e8f
--- /dev/null
+++ b/code/lib/Bio/CAPS/__init__.py
@@ -0,0 +1,135 @@
+# Copyright 2005 by Jonathan Taylor.
+# All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""Cleaved amplified polymorphic sequence (CAPS) markers.
+
+A CAPS marker is a location a DifferentialCutsite as described below and a
+set of primers that can be used to visualize this.  More information can
+be found in the paper `Konieczny and Ausubel (1993)`_ (PMID 8106085).
+
+.. _`Konieczny and Ausubel (1993)`: https://doi.org/10.1046/j.1365-313X.1993.04020403.x
+
+"""
+
+
+class DifferentialCutsite:
+    """Differential enzyme cutsite in an alignment.
+
+    A differential cutsite is a location in an alignment where an enzyme cuts
+    at least one sequence and also cannot cut at least one other sequence.
+
+    Members:
+     - start - Where it lives in the alignment.
+     - enzyme - The enzyme that causes this.
+     - cuts_in - A list of sequences (as indexes into the alignment) the
+       enzyme cuts in.
+     - blocked_in - A list of sequences (as indexes into the alignment) the
+       enzyme is blocked in.
+
+    """
+
+    def __init__(self, **kwds):
+        """Initialize a DifferentialCutsite.
+
+        Each member (as listed in the class description) should be included as a
+        keyword.
+        """
+        self.start = int(kwds["start"])
+        self.enzyme = kwds["enzyme"]
+        self.cuts_in = kwds["cuts_in"]
+        self.blocked_in = kwds["blocked_in"]
+
+
+class AlignmentHasDifferentLengthsError(Exception):
+    """Exception where sequences in alignment have different lengths."""
+
+    pass
+
+
+class CAPSMap:
+    """A map of an alignment showing all possible dcuts.
+
+    Members:
+     - alignment - The alignment that is mapped.
+     - dcuts - A list of possible CAPS markers in the form of
+       DifferentialCutsites.
+
+    """
+
+    def __init__(self, alignment, enzymes=None):
+        """Initialize the CAPSMap.
+
+        Required:
+         - alignment - The alignment to be mapped.
+
+        Optional:
+         - enzymes - List of enzymes to be used to create the map.
+           Defaults to an empty list.
+
+        """
+        if enzymes is None:
+            enzymes = []
+        self.sequences = [rec.seq for rec in alignment]
+        self.size = len(self.sequences)
+        self.length = len(self.sequences[0])
+        for seq in self.sequences:
+            if len(seq) != self.length:
+                raise AlignmentHasDifferentLengthsError
+
+        self.alignment = alignment
+        self.enzymes = enzymes
+
+        # look for dcuts
+        self._digest()
+
+    def _digest_with(self, enzyme):
+        cuts = []  # list of lists, one per sequence
+        all = []
+
+        # go through each sequence
+        for seq in self.sequences:
+            # grab all the cuts in the sequence
+            seq_cuts = [cut - enzyme.fst5 for cut in enzyme.search(seq)]
+            # maintain a list of all cuts in all sequences
+            all.extend(seq_cuts)
+            cuts.append(seq_cuts)
+
+        # we sort the all list and remove duplicates
+        all.sort()
+
+        last = -999
+        new = []
+        for cut in all:
+            if cut != last:
+                new.append(cut)
+            last = cut
+        all = new
+        # all now has indices for all sequences in the alignment
+
+        for cut in all:
+            # test for dcuts
+
+            cuts_in = []
+            blocked_in = []
+
+            for i in range(0, self.size):
+                seq = self.sequences[i]
+                if cut in cuts[i]:
+                    cuts_in.append(i)
+                else:
+                    blocked_in.append(i)
+
+            if cuts_in != [] and blocked_in != []:
+                self.dcuts.append(
+                    DifferentialCutsite(
+                        start=cut, enzyme=enzyme, cuts_in=cuts_in, blocked_in=blocked_in
+                    )
+                )
+
+    def _digest(self):
+        self.dcuts = []
+
+        for enzyme in self.enzymes:
+            self._digest_with(enzyme)
diff --git a/code/lib/Bio/CAPS/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/CAPS/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..06df99c
Binary files /dev/null and b/code/lib/Bio/CAPS/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Cluster/__init__.py b/code/lib/Bio/Cluster/__init__.py
new file mode 100644
index 0000000..32444da
--- /dev/null
+++ b/code/lib/Bio/Cluster/__init__.py
@@ -0,0 +1,1293 @@
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+#
+"""Cluster Analysis.
+
+The Bio.Cluster provides commonly used clustering algorithms and was
+designed with the application to gene expression data in mind. However,
+this module can also be used for cluster analysis of other types of data.
+
+Bio.Cluster and the underlying C Clustering Library is described in
+M. de Hoon et al. (2004) https://doi.org/10.1093/bioinformatics/bth078
+"""
+
+import numbers
+
+try:
+    import numpy
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Please install numpy if you want to use Bio.Cluster. "
+        "See http://www.numpy.org/"
+    ) from None
+
+from . import _cluster
+
+__all__ = (
+    "Node",
+    "Tree",
+    "kcluster",
+    "kmedoids",
+    "treecluster",
+    "somcluster",
+    "clusterdistance",
+    "clustercentroids",
+    "distancematrix",
+    "pca",
+    "Record",
+    "read",
+)
+
+
+__version__ = _cluster.version()
+
+
+class Node(_cluster.Node):
+    """Element of a hierarchical clustering tree.
+
+    A node contains items or other Nodes(sub-nodes).
+    """
+
+    __doc__ = _cluster.Node.__doc__
+
+
+class Tree(_cluster.Tree):
+    """Hierarchical clustering tree.
+
+    A Tree consists of Nodes.
+    """
+
+    def sort(self, order=None):
+        """Sort the hierarchical clustering tree.
+
+        Sort the hierarchical clustering tree by switching the left and
+        right subnode of nodes such that the elements in the left-to-right
+        order of the tree tend to have increasing order values.
+
+        Return the indices of the elements in the left-to-right order in
+        the hierarchical clustering tree, such that the element with index
+        indices[i] occurs at position i in the dendrogram.
+
+        """
+        n = len(self) + 1
+        indices = numpy.ones(n, dtype="intc")
+        if order is None:
+            order = numpy.ones(n, dtype="d")
+        elif isinstance(order, numpy.ndarray):
+            order = numpy.require(order, dtype="d", requirements="C")
+        else:
+            order = numpy.array(order, dtype="d")
+        _cluster.Tree.sort(self, indices, order)
+        return indices
+
+    def cut(self, nclusters=None):
+        """Create clusters by cutting the hierarchical clustering tree.
+
+        Divide the elements in a hierarchical clustering result mytree
+        into clusters, and return an array with the number of the cluster
+        to which each element was assigned.
+
+        Keyword arguments:
+         - nclusters: The desired number of clusters.
+        """
+        n = len(self) + 1
+        indices = numpy.ones(n, dtype="intc")
+        if nclusters is None:
+            nclusters = n
+        _cluster.Tree.cut(self, indices, nclusters)
+        return indices
+
+
+def kcluster(
+    data,
+    nclusters=2,
+    mask=None,
+    weight=None,
+    transpose=False,
+    npass=1,
+    method="a",
+    dist="e",
+    initialid=None,
+):
+    """Perform k-means clustering.
+
+    This function performs k-means clustering on the values in data, and
+    returns the cluster assignments, the within-cluster sum of distances
+    of the optimal k-means clustering solution, and the number of times
+    the optimal solution was found.
+
+    Keyword arguments:
+     - data: nrows x ncolumns array containing the data values.
+     - nclusters: number of clusters (the 'k' in k-means).
+     - mask: nrows x ncolumns array of integers, showing which data
+       are missing. If mask[i,j]==0, then data[i,j] is missing.
+     - weight: the weights to be used when calculating distances
+     - transpose:
+       - if False: rows are clustered;
+       - if True: columns are clustered.
+     - npass: number of times the k-means clustering algorithm is
+       performed, each time with a different (random) initial
+       condition.
+     - method: specifies how the center of a cluster is found:
+       - method == 'a': arithmetic mean;
+       - method == 'm': median.
+     - dist: specifies the distance function to be used:
+       - dist == 'e': Euclidean distance;
+       - dist == 'b': City Block distance;
+       - dist == 'c': Pearson correlation;
+       - dist == 'a': absolute value of the correlation;
+       - dist == 'u': uncentered correlation;
+       - dist == 'x': absolute uncentered correlation;
+       - dist == 's': Spearman's rank correlation;
+       - dist == 'k': Kendall's tau.
+     - initialid: the initial clustering from which the algorithm
+       should start.
+       If initialid is None, the routine carries out npass
+       repetitions of the EM algorithm, each time starting from a
+       different random initial clustering. If initialid is given,
+       the routine carries out the EM algorithm only once, starting
+       from the given initial clustering and without randomizing the
+       order in which items are assigned to clusters (i.e., using
+       the same order as in the data matrix). In that case, the
+       k-means algorithm is fully deterministic.
+
+    Return values:
+     - clusterid: array containing the number of the cluster to which each
+       item was assigned in the best k-means clustering solution that was
+       found in the npass runs;
+     - error: the within-cluster sum of distances for the returned k-means
+       clustering solution;
+     - nfound: the number of times this solution was found.
+    """
+    data = __check_data(data)
+    shape = data.shape
+    if transpose:
+        ndata, nitems = shape
+    else:
+        nitems, ndata = shape
+    mask = __check_mask(mask, shape)
+    weight = __check_weight(weight, ndata)
+    clusterid, npass = __check_initialid(initialid, npass, nitems)
+    error, nfound = _cluster.kcluster(
+        data, nclusters, mask, weight, transpose, npass, method, dist, clusterid
+    )
+    return clusterid, error, nfound
+
+
+def kmedoids(distance, nclusters=2, npass=1, initialid=None):
+    """Perform k-medoids clustering.
+
+    This function performs k-medoids clustering, and returns the cluster
+    assignments, the within-cluster sum of distances of the optimal
+    k-medoids clustering solution, and the number of times the optimal
+    solution was found.
+
+    Keyword arguments:
+     - distance: The distance matrix between the items. There are three
+       ways in which you can pass a distance matrix:
+       1. a 2D Numerical Python array (in which only the left-lower
+       part of the array will be accessed);
+       2. a 1D Numerical Python array containing the distances
+       consecutively;
+       3. a list of rows containing the lower-triangular part of
+       the distance matrix.
+
+       Examples are:
+
+           >>> from numpy import array
+           >>> # option 1:
+           >>> distance = array([[0.0, 1.1, 2.3],
+           ...                   [1.1, 0.0, 4.5],
+           ...                   [2.3, 4.5, 0.0]])
+           >>> # option 2:
+           >>> distance = array([1.1, 2.3, 4.5])
+           >>> # option 3:
+           >>> distance = [array([]),
+           ...             array([1.1]),
+           ...             array([2.3, 4.5])]
+
+
+       These three correspond to the same distance matrix.
+     - nclusters: number of clusters (the 'k' in k-medoids)
+     - npass: the number of times the k-medoids clustering algorithm
+       is performed, each time with a different (random) initial
+       condition.
+     - initialid: the initial clustering from which the algorithm should start.
+       If initialid is not given, the routine carries out npass
+       repetitions of the EM algorithm, each time starting from a
+       different random initial clustering. If initialid is given,
+       the routine carries out the EM algorithm only once, starting
+       from the initial clustering specified by initialid and
+       without randomizing the order in which items are assigned to
+       clusters (i.e., using the same order as in the data matrix).
+       In that case, the k-medoids algorithm is fully deterministic.
+
+    Return values:
+     - clusterid: array containing the number of the cluster to which each
+       item was assigned in the best k-means clustering solution that was
+       found in the npass runs;
+     - error: the within-cluster sum of distances for the returned k-means
+       clustering solution;
+     - nfound: the number of times this solution was found.
+    """
+    distance = __check_distancematrix(distance)
+    nitems = len(distance)
+    clusterid, npass = __check_initialid(initialid, npass, nitems)
+    error, nfound = _cluster.kmedoids(distance, nclusters, npass, clusterid)
+    return clusterid, error, nfound
+
+
+def treecluster(
+    data,
+    mask=None,
+    weight=None,
+    transpose=False,
+    method="m",
+    dist="e",
+    distancematrix=None,
+):
+    """Perform hierarchical clustering, and return a Tree object.
+
+    This function implements the pairwise single, complete, centroid, and
+    average linkage hierarchical clustering methods.
+
+    Keyword arguments:
+     - data: nrows x ncolumns array containing the data values.
+     - mask: nrows x ncolumns array of integers, showing which data are
+       missing. If mask[i][j]==0, then data[i][j] is missing.
+     - weight: the weights to be used when calculating distances.
+     - transpose:
+       - if False, rows are clustered;
+       - if True, columns are clustered.
+     - dist: specifies the distance function to be used:
+       - dist == 'e': Euclidean distance
+       - dist == 'b': City Block distance
+       - dist == 'c': Pearson correlation
+       - dist == 'a': absolute value of the correlation
+       - dist == 'u': uncentered correlation
+       - dist == 'x': absolute uncentered correlation
+       - dist == 's': Spearman's rank correlation
+       - dist == 'k': Kendall's tau
+     - method: specifies which linkage method is used:
+       - method == 's': Single pairwise linkage
+       - method == 'm': Complete (maximum) pairwise linkage (default)
+       - method == 'c': Centroid linkage
+       - method == 'a': Average pairwise linkage
+     - distancematrix:  The distance matrix between the items. There are
+       three ways in which you can pass a distance matrix:
+       1. a 2D Numerical Python array (in which only the left-lower
+       part of the array will be accessed);
+       2. a 1D Numerical Python array containing the distances
+       consecutively;
+       3. a list of rows containing the lower-triangular part of
+       the distance matrix.
+
+       Examples are:
+
+           >>> from numpy import array
+           >>> # option 1:
+           >>> distance = array([[0.0, 1.1, 2.3],
+           ...                   [1.1, 0.0, 4.5],
+           ...                   [2.3, 4.5, 0.0]])
+           >>> # option 2:
+           >>> distance = array([1.1, 2.3, 4.5])
+           >>> # option 3:
+           >>> distance = [array([]),
+           ...             array([1.1]),
+           ...             array([2.3, 4.5])]
+
+       These three correspond to the same distance matrix.
+
+       PLEASE NOTE:
+       As the treecluster routine may shuffle the values in the
+       distance matrix as part of the clustering algorithm, be sure
+       to save this array in a different variable before calling
+       treecluster if you need it later.
+
+    Either data or distancematrix should be None. If distancematrix is None,
+    the hierarchical clustering solution is calculated from the values stored
+    in the argument data. If data is None, the hierarchical clustering solution
+    is instead calculated from the distance matrix. Pairwise centroid-linkage
+    clustering can be performed only from the data values and not from the
+    distance matrix. Pairwise single-, maximum-, and average-linkage clustering
+    can be calculated from the data values or from the distance matrix.
+
+    Return value:
+    treecluster returns a Tree object describing the hierarchical clustering
+    result. See the description of the Tree class for more information.
+    """
+    if data is None and distancematrix is None:
+        raise ValueError("use either data or distancematrix")
+    if data is not None and distancematrix is not None:
+        raise ValueError("use either data or distancematrix; do not use both")
+    if data is not None:
+        data = __check_data(data)
+        shape = data.shape
+        ndata = shape[0] if transpose else shape[1]
+        mask = __check_mask(mask, shape)
+        weight = __check_weight(weight, ndata)
+    if distancematrix is not None:
+        distancematrix = __check_distancematrix(distancematrix)
+        if mask is not None:
+            raise ValueError("mask is ignored if distancematrix is used")
+        if weight is not None:
+            raise ValueError("weight is ignored if distancematrix is used")
+    tree = Tree()
+    _cluster.treecluster(
+        tree, data, mask, weight, transpose, method, dist, distancematrix
+    )
+    return tree
+
+
+def somcluster(
+    data,
+    mask=None,
+    weight=None,
+    transpose=False,
+    nxgrid=2,
+    nygrid=1,
+    inittau=0.02,
+    niter=1,
+    dist="e",
+):
+    """Calculate a Self-Organizing Map.
+
+    This function implements a Self-Organizing Map on a rectangular grid.
+
+    Keyword arguments:
+     - data: nrows x ncolumns array containing the data values;
+     - mask: nrows x ncolumns array of integers, showing which data are
+       missing. If mask[i][j]==0, then data[i][j] is missing.
+     - weight: the weights to be used when calculating distances
+     - transpose:
+       - if False: rows are clustered;
+       - if True: columns are clustered.
+     - nxgrid: the horizontal dimension of the rectangular SOM map
+     - nygrid: the vertical dimension of the rectangular SOM map
+     - inittau: the initial value of tau (the neighborbood function)
+     - niter: the number of iterations
+     - dist: specifies the distance function to be used:
+       - dist == 'e': Euclidean distance
+       - dist == 'b': City Block distance
+       - dist == 'c': Pearson correlation
+       - dist == 'a': absolute value of the correlation
+       - dist == 'u': uncentered correlation
+       - dist == 'x': absolute uncentered correlation
+       - dist == 's': Spearman's rank correlation
+       - dist == 'k': Kendall's tau
+
+    Return values:
+
+     - clusterid: array with two columns, with the number of rows equal to
+       the items that are being clustered. Each row in the array contains
+       the x and y coordinates of the cell in the rectangular SOM grid to
+       which the item was assigned.
+     - celldata:  an array with dimensions [nxgrid, nygrid, number of columns]
+       if rows are being clustered, or [nxgrid, nygrid, number of rows) if
+       columns are being clustered.
+       Each element [ix, iy] of this array is a 1D vector containing the
+       data values for the centroid of the cluster in the SOM grid cell
+       with coordinates [ix, iy].
+    """
+    if transpose:
+        ndata, nitems = data.shape
+    else:
+        nitems, ndata = data.shape
+    data = __check_data(data)
+    shape = data.shape
+    mask = __check_mask(mask, shape)
+    weight = __check_weight(weight, ndata)
+    if nxgrid < 1:
+        raise ValueError("nxgrid should be a positive integer (default is 2)")
+    if nygrid < 1:
+        raise ValueError("nygrid should be a positive integer (default is 1)")
+    clusterids = numpy.ones((nitems, 2), dtype="intc")
+    celldata = numpy.empty((nxgrid, nygrid, ndata), dtype="d")
+    _cluster.somcluster(
+        clusterids, celldata, data, mask, weight, transpose, inittau, niter, dist
+    )
+    return clusterids, celldata
+
+
+def clusterdistance(
+    data,
+    mask=None,
+    weight=None,
+    index1=None,
+    index2=None,
+    method="a",
+    dist="e",
+    transpose=False,
+):
+    """Calculate and return the distance between two clusters.
+
+    Keyword arguments:
+     - data: nrows x ncolumns array containing the data values.
+     - mask: nrows x ncolumns array of integers, showing which data are
+       missing. If mask[i, j]==0, then data[i, j] is missing.
+     - weight: the weights to be used when calculating distances
+     - index1: 1D array identifying which items belong to the
+       first cluster. If the cluster contains only one item, then
+       index1 can also be written as a single integer.
+     - index2: 1D array identifying which items belong to the
+       second cluster. If the cluster contains only one item, then
+       index2 can also be written as a single integer.
+     - dist: specifies the distance function to be used:
+       - dist == 'e': Euclidean distance
+       - dist == 'b': City Block distance
+       - dist == 'c': Pearson correlation
+       - dist == 'a': absolute value of the correlation
+       - dist == 'u': uncentered correlation
+       - dist == 'x': absolute uncentered correlation
+       - dist == 's': Spearman's rank correlation
+       - dist == 'k': Kendall's tau
+     - method: specifies how the distance between two clusters is defined:
+       - method == 'a': the distance between the arithmetic means
+       of the two clusters
+       - method == 'm': the distance between the medians of the two clusters
+       - method == 's': the smallest pairwise distance between members
+       of the two clusters
+       - method == 'x': the largest pairwise distance between members
+       of the two clusters
+       - method == 'v': average of the pairwise distances between members
+       of the two clusters
+     - transpose:
+       - if False: clusters of rows are considered;
+       - if True: clusters of columns are considered.
+    """
+    data = __check_data(data)
+    shape = data.shape
+    ndata = shape[0] if transpose else shape[1]
+    mask = __check_mask(mask, shape)
+    weight = __check_weight(weight, ndata)
+    index1 = __check_index(index1)
+    index2 = __check_index(index2)
+    return _cluster.clusterdistance(
+        data, mask, weight, index1, index2, method, dist, transpose
+    )
+
+
+def clustercentroids(data, mask=None, clusterid=None, method="a", transpose=False):
+    """Calculate and return the centroid of each cluster.
+
+    The clustercentroids routine calculates the cluster centroids, given to
+    which cluster each item belongs. The centroid is defined as either
+    the mean or the median over all items for each dimension.
+
+    Keyword arguments:
+     - data: nrows x ncolumns array containing the data values.
+     - mask: nrows x ncolumns array of integers, showing which data are
+       missing. If mask[i, j]==0, then data[i, j] is missing.
+     - clusterid: array containing the cluster number for each item.
+       The cluster number should be non-negative.
+     - method: specifies whether the centroid is calculated from the
+       arithmetic mean (method == 'a', default) or the median (method == 'm')
+       over each dimension.
+     - transpose: if False, each row contains the data for one item;
+                  if True, each column contains the data for one item.
+
+    Return values:
+     - cdata: 2D array containing the cluster centroids.
+       If transpose is False, then the dimensions of cdata are
+       nclusters x ncolumns.
+       If transpose is True, then the dimensions of cdata are
+       nrows x nclusters.
+     - cmask: 2D array of integers describing which items in cdata,
+       if any, are missing.
+    """
+    data = __check_data(data)
+    mask = __check_mask(mask, data.shape)
+    nrows, ncolumns = data.shape
+    if clusterid is None:
+        n = ncolumns if transpose else nrows
+        clusterid = numpy.zeros(n, dtype="intc")
+        nclusters = 1
+    else:
+        clusterid = numpy.require(clusterid, dtype="intc", requirements="C")
+        nclusters = max(clusterid + 1)
+    if transpose:
+        shape = (nrows, nclusters)
+    else:
+        shape = (nclusters, ncolumns)
+    cdata = numpy.zeros(shape, dtype="d")
+    cmask = numpy.zeros(shape, dtype="intc")
+    _cluster.clustercentroids(data, mask, clusterid, method, transpose, cdata, cmask)
+    return cdata, cmask
+
+
+def distancematrix(data, mask=None, weight=None, transpose=False, dist="e"):
+    """Calculate and return a distance matrix from the data.
+
+    This function returns the distance matrix calculated from the data.
+
+    Keyword arguments:
+     - data: nrows x ncolumns array containing the data values.
+     - mask: nrows x ncolumns array of integers, showing which data are
+       missing. If mask[i, j]==0, then data[i, j] is missing.
+     - weight: the weights to be used when calculating distances.
+     - transpose: if False: the distances between rows are calculated;
+                  if True:  the distances between columns are calculated.
+     - dist: specifies the distance function to be used:
+       - dist == 'e': Euclidean distance
+       - dist == 'b': City Block distance
+       - dist == 'c': Pearson correlation
+       - dist == 'a': absolute value of the correlation
+       - dist == 'u': uncentered correlation
+       - dist == 'x': absolute uncentered correlation
+       - dist == 's': Spearman's rank correlation
+       - dist == 'k': Kendall's tau
+
+    Return value:
+    The distance matrix is returned as a list of 1D arrays containing the
+    distance matrix calculated from the data. The number of columns in eac
+    row is equal to the row number. Hence, the first row has zero length.
+    For example:
+
+    >>> from numpy import array
+    >>> from Bio.Cluster import distancematrix
+    >>> data = array([[0, 1,  2,  3],
+    ...               [4, 5,  6,  7],
+    ...               [8, 9, 10, 11],
+    ...               [1, 2,  3,  4]])
+    >>> distances = distancematrix(data, dist='e')
+    >>> distances
+    [array([], dtype=float64), array([ 16.]), array([ 64.,  16.]), array([  1.,   9.,  49.])]
+
+    which can be rewritten as
+       distances = [array([], dtype=float64),
+                    array([ 16.]),
+                    array([ 64.,  16.]),
+                    array([  1.,   9.,  49.])]
+
+    This corresponds to the distance matrix:
+
+        [ 0., 16., 64.,  1.]
+        [16.,  0., 16.,  9.]
+        [64., 16.,  0., 49.]
+        [ 1.,  9., 49.,  0.]
+    """
+    data = __check_data(data)
+    shape = data.shape
+    mask = __check_mask(mask, shape)
+    if transpose:
+        ndata, nitems = shape
+    else:
+        nitems, ndata = shape
+    weight = __check_weight(weight, ndata)
+    matrix = [numpy.empty(i, dtype="d") for i in range(nitems)]
+    _cluster.distancematrix(data, mask, weight, transpose, dist, matrix)
+    return matrix
+
+
+def pca(data):
+    """Perform principal component analysis.
+
+    Keyword arguments:
+     - data: nrows x ncolumns array containing the data values.
+
+    Return value:
+    This function returns an array containing the mean of each column, the
+    principal components as an nmin x ncolumns array, as well as the
+    coordinates (an nrows x nmin array) of the data along the principal
+    components, and the associated eigenvalues. The principal components, the
+    coordinates, and the eigenvalues are sorted by the magnitude of the
+    eigenvalue, with the largest eigenvalues appearing first. Here, nmin is
+    the smaller of nrows and ncolumns.
+    Adding the column means to the dot product of the coordinates and the
+    principal components recreates the data matrix:
+
+    >>> from numpy import array, dot, amax, amin
+    >>> from Bio.Cluster import pca
+    >>> matrix = array([[ 0.,  0.,  0.],
+    ...                 [ 1.,  0.,  0.],
+    ...                 [ 7.,  3.,  0.],
+    ...                 [ 4.,  2.,  6.]])
+    >>> columnmean, coordinates, pc, _ = pca(matrix)
+    >>> m = matrix - (columnmean + dot(coordinates, pc))
+    >>> amax(m) < 1e-12 and amin(m) > -1e-12
+    True
+
+    """
+    data = __check_data(data)
+    nrows, ncols = data.shape
+    nmin = min(nrows, ncols)
+    columnmean = numpy.empty(ncols, dtype="d")
+    pc = numpy.empty((nmin, ncols), dtype="d")
+    coordinates = numpy.empty((nrows, nmin), dtype="d")
+    eigenvalues = numpy.empty(nmin, dtype="d")
+    _cluster.pca(data, columnmean, coordinates, pc, eigenvalues)
+    return columnmean, coordinates, pc, eigenvalues
+
+
+class Record:
+    """Store gene expression data.
+
+    A Record stores the gene expression data and related information contained
+    in a data file following the file format defined for Michael Eisen's
+    Cluster/TreeView program.
+
+    Attributes:
+     - data: a matrix containing the gene expression data
+     - mask: a matrix containing only 1's and 0's, denoting which values
+       are present (1) or missing (0). If all items of mask are
+       one (no missing data), then mask is set to None.
+     - geneid: a list containing a unique identifier for each gene
+       (e.g., ORF name)
+     - genename: a list containing an additional description for each gene
+       (e.g., gene name)
+     - gweight: the weight to be used for each gene when calculating the
+       distance
+     - gorder: an array of real numbers indicating the preferred order of the
+       genes in the output file
+     - expid: a list containing a unique identifier for each sample.
+     - eweight: the weight to be used for each sample when calculating the
+       distance
+     - eorder: an array of real numbers indication the preferred order of the
+       samples in the output file
+     - uniqid: the string that was used instead of UNIQID in the input file.
+
+    """
+
+    def __init__(self, handle=None):
+        """Read gene expression data from the file handle and return a Record.
+
+        The file should be in the format defined for Michael Eisen's
+        Cluster/TreeView program.
+        """
+        self.data = None
+        self.mask = None
+        self.geneid = None
+        self.genename = None
+        self.gweight = None
+        self.gorder = None
+        self.expid = None
+        self.eweight = None
+        self.eorder = None
+        self.uniqid = None
+        if not handle:
+            return
+        line = handle.readline().strip("\r\n").split("\t")
+        n = len(line)
+        self.uniqid = line[0]
+        self.expid = []
+        cols = {0: "GENEID"}
+        for word in line[1:]:
+            if word == "NAME":
+                cols[line.index(word)] = word
+                self.genename = []
+            elif word == "GWEIGHT":
+                cols[line.index(word)] = word
+                self.gweight = []
+            elif word == "GORDER":
+                cols[line.index(word)] = word
+                self.gorder = []
+            else:
+                self.expid.append(word)
+        self.geneid = []
+        self.data = []
+        self.mask = []
+        needmask = 0
+        for line in handle:
+            line = line.strip("\r\n").split("\t")
+            if len(line) != n:
+                raise ValueError(
+                    "Line with %d columns found (expected %d)" % (len(line), n)
+                )
+            if line[0] == "EWEIGHT":
+                i = max(cols) + 1
+                self.eweight = numpy.array(line[i:], float)
+                continue
+            if line[0] == "EORDER":
+                i = max(cols) + 1
+                self.eorder = numpy.array(line[i:], float)
+                continue
+            rowdata = []
+            rowmask = []
+            n = len(line)
+            for i in range(n):
+                word = line[i]
+                if i in cols:
+                    if cols[i] == "GENEID":
+                        self.geneid.append(word)
+                    if cols[i] == "NAME":
+                        self.genename.append(word)
+                    if cols[i] == "GWEIGHT":
+                        self.gweight.append(float(word))
+                    if cols[i] == "GORDER":
+                        self.gorder.append(float(word))
+                    continue
+                if not word:
+                    rowdata.append(0.0)
+                    rowmask.append(0)
+                    needmask = 1
+                else:
+                    rowdata.append(float(word))
+                    rowmask.append(1)
+            self.data.append(rowdata)
+            self.mask.append(rowmask)
+        self.data = numpy.array(self.data)
+        if needmask:
+            self.mask = numpy.array(self.mask, int)
+        else:
+            self.mask = None
+        if self.gweight:
+            self.gweight = numpy.array(self.gweight)
+        if self.gorder:
+            self.gorder = numpy.array(self.gorder)
+
+    def treecluster(self, transpose=False, method="m", dist="e"):
+        """Apply hierarchical clustering and return a Tree object.
+
+        The pairwise single, complete, centroid, and average linkage
+        hierarchical clustering methods are available.
+
+        Keyword arguments:
+         - transpose: if False: rows are clustered;
+                      if True: columns are clustered.
+         - dist: specifies the distance function to be used:
+           - dist == 'e': Euclidean distance
+           - dist == 'b': City Block distance
+           - dist == 'c': Pearson correlation
+           - dist == 'a': absolute value of the correlation
+           - dist == 'u': uncentered correlation
+           - dist == 'x': absolute uncentered correlation
+           - dist == 's': Spearman's rank correlation
+           - dist == 'k': Kendall's tau
+         - method: specifies which linkage method is used:
+           - method == 's': Single pairwise linkage
+           - method == 'm': Complete (maximum) pairwise linkage (default)
+           - method == 'c': Centroid linkage
+           - method == 'a': Average pairwise linkage
+
+        See the description of the Tree class for more information about
+        the Tree object returned by this method.
+        """
+        if transpose:
+            weight = self.gweight
+        else:
+            weight = self.eweight
+        return treecluster(self.data, self.mask, weight, transpose, method, dist)
+
+    def kcluster(
+        self,
+        nclusters=2,
+        transpose=False,
+        npass=1,
+        method="a",
+        dist="e",
+        initialid=None,
+    ):
+        """Apply k-means or k-median clustering.
+
+        This method returns a tuple (clusterid, error, nfound).
+
+        Keyword arguments:
+         - nclusters: number of clusters (the 'k' in k-means)
+         - transpose: if False, genes (rows) are clustered;
+                      if True, samples (columns) are clustered.
+         - npass: number of times the k-means clustering algorithm is
+           performed, each time with a different (random) initial condition.
+         - method: specifies how the center of a cluster is found:
+           - method == 'a': arithmetic mean
+           - method == 'm': median
+         - dist: specifies the distance function to be used:
+           - dist == 'e': Euclidean distance
+           - dist == 'b': City Block distance
+           - dist == 'c': Pearson correlation
+           - dist == 'a': absolute value of the correlation
+           - dist == 'u': uncentered correlation
+           - dist == 'x': absolute uncentered correlation
+           - dist == 's': Spearman's rank correlation
+           - dist == 'k': Kendall's tau
+         - initialid: the initial clustering from which the algorithm should
+           start. If initialid is None, the routine carries out npass
+           repetitions of the EM algorithm, each time starting from a different
+           random initial clustering. If initialid is given, the routine
+           carries out the EM algorithm only once, starting from the given
+           initial clustering and without randomizing the order in which items
+           are assigned to clusters (i.e., using the same order as in the data
+           matrix). In that case, the k-means algorithm is fully deterministic.
+
+        Return values:
+         - clusterid: array containing the number of the cluster to which each
+           gene/sample was assigned in the best k-means clustering
+           solution that was found in the npass runs;
+         - error: the within-cluster sum of distances for the returned
+           k-means clustering solution;
+         - nfound: the number of times this solution was found.
+        """
+        if transpose:
+            weight = self.gweight
+        else:
+            weight = self.eweight
+        return kcluster(
+            self.data,
+            nclusters,
+            self.mask,
+            weight,
+            transpose,
+            npass,
+            method,
+            dist,
+            initialid,
+        )
+
+    def somcluster(
+        self, transpose=False, nxgrid=2, nygrid=1, inittau=0.02, niter=1, dist="e"
+    ):
+        """Calculate a self-organizing map on a rectangular grid.
+
+        The somcluster method returns a tuple (clusterid, celldata).
+
+        Keyword arguments:
+         - transpose: if False, genes (rows) are clustered;
+                      if True,  samples (columns) are clustered.
+         - nxgrid: the horizontal dimension of the rectangular SOM map
+         - nygrid: the vertical dimension of the rectangular SOM map
+         - inittau: the initial value of tau (the neighborbood function)
+         - niter: the number of iterations
+         - dist: specifies the distance function to be used:
+           - dist == 'e': Euclidean distance
+           - dist == 'b': City Block distance
+           - dist == 'c': Pearson correlation
+           - dist == 'a': absolute value of the correlation
+           - dist == 'u': uncentered correlation
+           - dist == 'x': absolute uncentered correlation
+           - dist == 's': Spearman's rank correlation
+           - dist == 'k': Kendall's tau
+
+        Return values:
+         - clusterid: array with two columns, while the number of rows is equal
+           to the number of genes or the number of samples depending on
+           whether genes or samples are being clustered. Each row in
+           the array contains the x and y coordinates of the cell in the
+           rectangular SOM grid to which the gene or samples was assigned.
+         - celldata: an array with dimensions (nxgrid, nygrid, number of
+           samples) if genes are being clustered, or (nxgrid, nygrid,
+           number of genes) if samples are being clustered. Each item
+           [ix, iy] of this array is a 1D vector containing the gene
+           expression data for the centroid of the cluster in the SOM grid
+           cell with coordinates [ix, iy].
+        """
+        if transpose:
+            weight = self.gweight
+        else:
+            weight = self.eweight
+        return somcluster(
+            self.data,
+            self.mask,
+            weight,
+            transpose,
+            nxgrid,
+            nygrid,
+            inittau,
+            niter,
+            dist,
+        )
+
+    def clustercentroids(self, clusterid=None, method="a", transpose=False):
+        """Calculate the cluster centroids and return a tuple (cdata, cmask).
+
+        The centroid is defined as either the mean or the median over all
+        items for each dimension.
+
+        Keyword arguments:
+         - data: nrows x ncolumns array containing the expression data
+         - mask: nrows x ncolumns array of integers, showing which data
+           are missing. If mask[i, j]==0, then data[i, j] is missing.
+         - transpose: if False, gene (row) clusters are considered;
+                      if True, sample (column) clusters are considered.
+         - clusterid: array containing the cluster number for each gene or
+           sample. The cluster number should be non-negative.
+         - method: specifies how the centroid is calculated:
+           - method == 'a': arithmetic mean over each dimension. (default)
+           - method == 'm': median over each dimension.
+
+        Return values:
+         - cdata: 2D array containing the cluster centroids. If transpose
+           is False, then the dimensions of cdata are nclusters x ncolumns.
+           If transpose is True, then the dimensions of cdata are nrows x
+           nclusters.
+         - cmask: 2D array of integers describing which items in cdata,
+           if any, are missing.
+        """
+        return clustercentroids(self.data, self.mask, clusterid, method, transpose)
+
+    def clusterdistance(
+        self, index1=0, index2=0, method="a", dist="e", transpose=False
+    ):
+        """Calculate the distance between two clusters.
+
+        Keyword arguments:
+         - index1: 1D array identifying which genes/samples belong to the
+           first cluster. If the cluster contains only one gene, then
+           index1 can also be written as a single integer.
+         - index2: 1D array identifying which genes/samples belong to the
+           second cluster. If the cluster contains only one gene, then
+           index2 can also be written as a single integer.
+         - transpose: if False, genes (rows) are clustered;
+                      if True, samples (columns) are clustered.
+         - dist: specifies the distance function to be used:
+           - dist == 'e': Euclidean distance
+           - dist == 'b': City Block distance
+           - dist == 'c': Pearson correlation
+           - dist == 'a': absolute value of the correlation
+           - dist == 'u': uncentered correlation
+           - dist == 'x': absolute uncentered correlation
+           - dist == 's': Spearman's rank correlation
+           - dist == 'k': Kendall's tau
+         - method: specifies how the distance between two clusters is defined:
+           - method == 'a': the distance between the arithmetic means
+           of the two clusters
+           - method == 'm': the distance between the medians of the
+           two clusters
+           - method == 's': the smallest pairwise distance between members
+           of the two clusters
+           - method == 'x': the largest pairwise distance between members
+           of the two clusters
+           - method == 'v': average of the pairwise distances between members
+           of the two clusters
+         - transpose: if False: clusters of rows are considered;
+                      if True: clusters of columns are considered.
+        """
+        if transpose:
+            weight = self.gweight
+        else:
+            weight = self.eweight
+        return clusterdistance(
+            self.data, self.mask, weight, index1, index2, method, dist, transpose
+        )
+
+    def distancematrix(self, transpose=False, dist="e"):
+        """Calculate the distance matrix and return it as a list of arrays.
+
+        Keyword arguments:
+         - transpose:
+             if False: calculate the distances between genes (rows);
+             if True: calculate the distances between samples (columns).
+         - dist: specifies the distance function to be used:
+           - dist == 'e': Euclidean distance
+           - dist == 'b': City Block distance
+           - dist == 'c': Pearson correlation
+           - dist == 'a': absolute value of the correlation
+           - dist == 'u': uncentered correlation
+           - dist == 'x': absolute uncentered correlation
+           - dist == 's': Spearman's rank correlation
+           - dist == 'k': Kendall's tau
+
+        Return value:
+
+        The distance matrix is returned as a list of 1D arrays containing the
+        distance matrix between the gene expression data. The number of columns
+        in each row is equal to the row number. Hence, the first row has zero
+        length. An example of the return value is:
+
+            matrix = [[],
+                      array([1.]),
+                      array([7., 3.]),
+                      array([4., 2., 6.])]
+
+        This corresponds to the distance matrix:
+
+            [0., 1., 7., 4.]
+            [1., 0., 3., 2.]
+            [7., 3., 0., 6.]
+            [4., 2., 6., 0.]
+
+        """
+        if transpose:
+            weight = self.gweight
+        else:
+            weight = self.eweight
+        return distancematrix(self.data, self.mask, weight, transpose, dist)
+
+    def save(self, jobname, geneclusters=None, expclusters=None):
+        """Save the clustering results.
+
+        The saved files follow the convention for the Java TreeView program,
+        which can therefore be used to view the clustering result.
+
+        Keyword arguments:
+         - jobname: The base name of the files to be saved. The filenames
+           are jobname.cdt, jobname.gtr, and jobname.atr for hierarchical
+           clustering, and jobname-K*.cdt, jobname-K*.kgg, jobname-K*.kag
+           for k-means clustering results.
+         - geneclusters: For hierarchical clustering results, geneclusters
+           is a Tree object as returned by the treecluster method. For k-means
+           clustering results, geneclusters is a vector containing ngenes
+           integers, describing to which cluster a given gene belongs. This
+           vector can be calculated by kcluster.
+         - expclusters: For hierarchical clustering results, expclusters
+           is a Tree object as returned by the treecluster method. For k-means
+           clustering results, expclusters is a vector containing nexps
+           integers, describing to which cluster a given sample belongs. This
+           vector can be calculated by kcluster.
+        """
+        (ngenes, nexps) = numpy.shape(self.data)
+        if self.gorder is None:
+            gorder = numpy.arange(ngenes)
+        else:
+            gorder = self.gorder
+        if self.eorder is None:
+            eorder = numpy.arange(nexps)
+        else:
+            eorder = self.eorder
+        if (
+            geneclusters is not None
+            and expclusters is not None
+            and type(geneclusters) != type(expclusters)
+        ):
+            raise ValueError(
+                "found one k-means and one hierarchical "
+                "clustering solution in geneclusters and "
+                "expclusters"
+            )
+        gid = 0
+        aid = 0
+        filename = jobname
+        postfix = ""
+        if isinstance(geneclusters, Tree):
+            # This is a hierarchical clustering result.
+            geneindex = self._savetree(jobname, geneclusters, gorder, False)
+            gid = 1
+        elif geneclusters is not None:
+            # This is a k-means clustering result.
+            filename = jobname + "_K"
+            k = max(geneclusters) + 1
+            kggfilename = "%s_K_G%d.kgg" % (jobname, k)
+            geneindex = self._savekmeans(kggfilename, geneclusters, gorder, False)
+            postfix = "_G%d" % k
+        else:
+            geneindex = numpy.argsort(gorder)
+        if isinstance(expclusters, Tree):
+            # This is a hierarchical clustering result.
+            expindex = self._savetree(jobname, expclusters, eorder, True)
+            aid = 1
+        elif expclusters is not None:
+            # This is a k-means clustering result.
+            filename = jobname + "_K"
+            k = max(expclusters) + 1
+            kagfilename = "%s_K_A%d.kag" % (jobname, k)
+            expindex = self._savekmeans(kagfilename, expclusters, eorder, True)
+            postfix += "_A%d" % k
+        else:
+            expindex = numpy.argsort(eorder)
+        filename = filename + postfix
+        self._savedata(filename, gid, aid, geneindex, expindex)
+
+    def _savetree(self, jobname, tree, order, transpose):
+        """Save the hierarchical clustering solution (PRIVATE)."""
+        if transpose:
+            extension = ".atr"
+            keyword = "ARRY"
+        else:
+            extension = ".gtr"
+            keyword = "GENE"
+        index = tree.sort(order)
+        nnodes = len(tree)
+        with open(jobname + extension, "w") as outputfile:
+            nodeID = [""] * nnodes
+            nodedist = numpy.array([node.distance for node in tree[:]])
+            for nodeindex in range(nnodes):
+                min1 = tree[nodeindex].left
+                min2 = tree[nodeindex].right
+                nodeID[nodeindex] = "NODE%dX" % (nodeindex + 1)
+                outputfile.write(nodeID[nodeindex])
+                outputfile.write("\t")
+                if min1 < 0:
+                    index1 = -min1 - 1
+                    outputfile.write(nodeID[index1] + "\t")
+                    nodedist[nodeindex] = max(nodedist[nodeindex], nodedist[index1])
+                else:
+                    outputfile.write("%s%dX\t" % (keyword, min1))
+                if min2 < 0:
+                    index2 = -min2 - 1
+                    outputfile.write(nodeID[index2] + "\t")
+                    nodedist[nodeindex] = max(nodedist[nodeindex], nodedist[index2])
+                else:
+                    outputfile.write("%s%dX\t" % (keyword, min2))
+                outputfile.write(str(1.0 - nodedist[nodeindex]))
+                outputfile.write("\n")
+        return index
+
+    def _savekmeans(self, filename, clusterids, order, transpose):
+        """Save the k-means clustering solution (PRIVATE)."""
+        if transpose:
+            label = "ARRAY"
+            names = self.expid
+        else:
+            label = self.uniqid
+            names = self.geneid
+        with open(filename, "w") as outputfile:
+            outputfile.write(label + "\tGROUP\n")
+            index = numpy.argsort(order)
+            n = len(names)
+            sortedindex = numpy.zeros(n, int)
+            counter = 0
+            cluster = 0
+            while counter < n:
+                for j in index:
+                    if clusterids[j] == cluster:
+                        outputfile.write("%s\t%s\n" % (names[j], cluster))
+                        sortedindex[counter] = j
+                        counter += 1
+                cluster += 1
+        return sortedindex
+
+    def _savedata(self, jobname, gid, aid, geneindex, expindex):
+        """Save the clustered data (PRIVATE)."""
+        if self.genename is None:
+            genename = self.geneid
+        else:
+            genename = self.genename
+        (ngenes, nexps) = numpy.shape(self.data)
+        with open(jobname + ".cdt", "w") as outputfile:
+            if self.mask is not None:
+                mask = self.mask
+            else:
+                mask = numpy.ones((ngenes, nexps), int)
+            if self.gweight is not None:
+                gweight = self.gweight
+            else:
+                gweight = numpy.ones(ngenes)
+            if self.eweight is not None:
+                eweight = self.eweight
+            else:
+                eweight = numpy.ones(nexps)
+            if gid:
+                outputfile.write("GID\t")
+            outputfile.write(self.uniqid)
+            outputfile.write("\tNAME\tGWEIGHT")
+            # Now add headers for data columns.
+            for j in expindex:
+                outputfile.write("\t%s" % self.expid[j])
+            outputfile.write("\n")
+            if aid:
+                outputfile.write("AID")
+                if gid:
+                    outputfile.write("\t")
+                outputfile.write("\t\t")
+                for j in expindex:
+                    outputfile.write("\tARRY%dX" % j)
+                outputfile.write("\n")
+            outputfile.write("EWEIGHT")
+            if gid:
+                outputfile.write("\t")
+            outputfile.write("\t\t")
+            for j in expindex:
+                outputfile.write("\t%f" % eweight[j])
+            outputfile.write("\n")
+            for i in geneindex:
+                if gid:
+                    outputfile.write("GENE%dX\t" % i)
+                outputfile.write(
+                    "%s\t%s\t%f" % (self.geneid[i], genename[i], gweight[i])
+                )
+                for j in expindex:
+                    outputfile.write("\t")
+                    if mask[i, j]:
+                        outputfile.write(str(self.data[i, j]))
+                outputfile.write("\n")
+
+
+def read(handle):
+    """Read gene expression data from the file handle and return a Record.
+
+    The file should be in the file format defined for Michael Eisen's
+    Cluster/TreeView program.
+    """
+    return Record(handle)
+
+
+# Everything below is private
+#
+
+
+def __check_data(data):
+    if isinstance(data, numpy.ndarray):
+        data = numpy.require(data, dtype="d", requirements="C")
+    else:
+        data = numpy.array(data, dtype="d")
+    if data.ndim != 2:
+        raise ValueError("data should be 2-dimensional")
+    if numpy.isnan(data).any():
+        raise ValueError("data contains NaN values")
+    return data
+
+
+def __check_mask(mask, shape):
+    if mask is None:
+        return numpy.ones(shape, dtype="intc")
+    elif isinstance(mask, numpy.ndarray):
+        return numpy.require(mask, dtype="intc", requirements="C")
+    else:
+        return numpy.array(mask, dtype="intc")
+
+
+def __check_weight(weight, ndata):
+    if weight is None:
+        return numpy.ones(ndata, dtype="d")
+    if isinstance(weight, numpy.ndarray):
+        weight = numpy.require(weight, dtype="d", requirements="C")
+    else:
+        weight = numpy.array(weight, dtype="d")
+    if numpy.isnan(weight).any():
+        raise ValueError("weight contains NaN values")
+    return weight
+
+
+def __check_initialid(initialid, npass, nitems):
+    if initialid is None:
+        if npass <= 0:
+            raise ValueError("npass should be a positive integer")
+        clusterid = numpy.empty(nitems, dtype="intc")
+    else:
+        npass = 0
+        clusterid = numpy.array(initialid, dtype="intc")
+    return clusterid, npass
+
+
+def __check_index(index):
+    if index is None:
+        return numpy.zeros(1, dtype="intc")
+    elif isinstance(index, numbers.Integral):
+        return numpy.array([index], dtype="intc")
+    elif isinstance(index, numpy.ndarray):
+        return numpy.require(index, dtype="intc", requirements="C")
+    else:
+        return numpy.array(index, dtype="intc")
+
+
+def __check_distancematrix(distancematrix):
+    if distancematrix is None:
+        return distancematrix
+    if isinstance(distancematrix, numpy.ndarray):
+        distancematrix = numpy.require(distancematrix, dtype="d", requirements="C")
+    else:
+        try:
+            distancematrix = numpy.array(distancematrix, dtype="d")
+        except ValueError:
+            n = len(distancematrix)
+            d = [None] * n
+            for i, row in enumerate(distancematrix):
+                if isinstance(row, numpy.ndarray):
+                    row = numpy.require(row, dtype="d", requirements="C")
+                else:
+                    row = numpy.array(row, dtype="d")
+                if row.ndim != 1:
+                    raise ValueError("row %d is not one-dimensional" % i) from None
+                m = len(row)
+                if m != i:
+                    raise ValueError(
+                        "row %d has incorrect size (%d, expected %d)" % (i, m, i)
+                    ) from None
+                if numpy.isnan(row).any():
+                    raise ValueError("distancematrix contains NaN values") from None
+                d[i] = row
+            return d
+    if numpy.isnan(distancematrix).any():
+        raise ValueError("distancematrix contains NaN values")
+    return distancematrix
diff --git a/code/lib/Bio/Cluster/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Cluster/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..0a45e9e
Binary files /dev/null and b/code/lib/Bio/Cluster/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Cluster/_cluster.cp37-win_amd64.pyd b/code/lib/Bio/Cluster/_cluster.cp37-win_amd64.pyd
new file mode 100644
index 0000000..e000cce
Binary files /dev/null and b/code/lib/Bio/Cluster/_cluster.cp37-win_amd64.pyd differ
diff --git a/code/lib/Bio/Cluster/cluster.c b/code/lib/Bio/Cluster/cluster.c
new file mode 100644
index 0000000..89db792
--- /dev/null
+++ b/code/lib/Bio/Cluster/cluster.c
@@ -0,0 +1,5061 @@
+/* The C clustering library.
+ * Copyright (C) 2002 Michiel Jan Laurens de Hoon.
+ *
+ * This library was written at the Laboratory of DNA Information Analysis,
+ * Human Genome Center, Institute of Medical Science, University of Tokyo,
+ * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.
+ * Contact: michiel.dehoon 'AT' riken.jp
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation with or without modifications and for any purpose and
+ * without fee is hereby granted, provided that any copyright notices
+ * appear in all copies and that both those copyright notices and this
+ * permission notice appear in supporting documentation, and that the
+ * names of the contributors or copyright holders not be used in
+ * advertising or publicity pertaining to distribution of the software
+ * without specific prior permission.
+ *
+ * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+ * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include <time.h>
+#include <stdlib.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+#include <string.h>
+#include "cluster.h"
+
+/* ************************************************************************ */
+/*                            SORTING FUNCTIONS                                */
+/*
+* C qsort() is very slow, much slower than C++ std::sort().
+* This is because qsort() doesn't utilize data-type information at compile time,
+* and it has redundant pointer dereference since it requires a compare function.
+* For projects that use old C, it's impossible to convert to C++/newer C.
+* 
+* So we implement a simple quicksort that is ~~25% faster than std::sort() 
+* with mostly random data, and much faster with structured/sorted data 
+*/
+
+static const int INF = INT_MAX; // 2^31 - 1
+
+static int TEMP_SWAP_INT;
+#define swap_int(x,y) {TEMP_SWAP_INT = (x); (x) = (y); (y) = TEMP_SWAP_INT;}
+
+/* For quicksort, we need to choose a random pivot. Any random function should work. Even bad ones. */
+static int
+cheap_random()
+{
+    const int base = 2 * 100 * 1000 * 1000 + 33;
+    static int seed = 0;
+    seed = seed * 7 + 13;
+    if (seed > base) seed %= base;
+    return seed;
+}
+
+static inline int
+median_index_of3_index(const double arr[], int index[], const int a, const int b, const int c)
+{
+    if (arr[index[a]] < arr[index[b]]) {
+        if (arr[index[b]] < arr[index[c]]) return b;
+        else if (arr[index[a]] < arr[index[c]]) return c;
+        else return a;
+    }
+    else {
+        if (arr[index[a]] < arr[index[c]]) return a;
+        else if (arr[index[b]] < arr[index[c]]) return c;
+        else return b;
+    }
+}
+
+
+/* Insertion sort is best when the array is small. */
+static void
+insertion_sort_index(const double a[], int index[], int l, int r)
+{
+    int i, j, current_index;
+    double value;
+
+    if (r <= l) return;
+    i = l; j = r;
+    value = a[index[(l + r) >> 1]];
+    while (i <= j) {
+        while (a[index[i]] < value) i++;
+        while (a[index[j]] > value) j--;
+
+        if (i <= j) {
+            swap_int(index[i], index[j]);
+            i++;
+            j--;
+        }
+    }
+
+    for (i = l + 1; i <= r; i++) {
+        j = i - 1;
+        value = a[index[i]];
+        current_index = index[i];
+
+        while (j >= l && a[index[j]] > value) {
+            index[j + 1] = index[j];
+            j--;
+        }
+        index[j + 1] = current_index;
+    }
+}
+
+//***************
+static void
+fastsort_partition_index(const double a[], int index[], const int left, const int right, int* first_end_ptr, int* second_start_ptr) {
+    int low, high, i, pivot, mid;
+    double value;
+    int increasing = 1, decreasing = 1;
+
+    /*******/
+    /* choose a random way to choose pivot, to prevent all possible worst-cases*/
+    if ((right - left) & 1) pivot = left + cheap_random() % (right - left);
+    else pivot = median_index_of3_index(a, index, left, (left + right) >> 1, right);
+    value = a[index[pivot]];
+
+    /*******/
+    /* Skip through smaller values on left and larger values on right*/
+    low = left; high = right;
+    while (a[index[low]] < value) {
+        low++;
+        decreasing = 0;
+        if (a[index[low]] < a[index[low - 1]]) increasing = 0;
+    }
+
+    while (a[index[high]] > value) {
+        high--;
+        decreasing = 0;
+        if (a[index[high]] > a[index[high + 1]]) increasing = 0;
+    }
+
+    increasing &= a[index[high]] >= a[index[low]];
+    decreasing &= a[index[high]] <= a[index[low]];
+
+    /*******/
+    /* Resolve degenerate input cases */
+    if (increasing) {
+        if ((right - left) & 1) {
+            for (i = low + 1; i <= high; i++) if (a[index[i]] < a[index[i - 1]]) {
+                increasing = 0;
+                break;
+            }
+        }
+        else {
+            for (i = high; i >= low + 1; i--) if (a[index[i]] < a[index[i - 1]]) {
+                increasing = 0;
+                break;
+            }
+        }
+        if (increasing) {    /* sorted */
+            *first_end_ptr = INF;
+            return;
+        }
+    }
+
+    if (decreasing) {
+        if ((right - left) & 1) {
+            for (i = low + 1; i <= high; i++) if (a[index[i]] > a[index[i - 1]]) {
+                decreasing = 0;
+                break;
+            }
+        }
+        else {
+            for (i = high; i >= low + 1; i--) if (a[index[i]] > a[index[i - 1]]) {
+                decreasing = 0;
+                break;
+            }
+        }
+        if (decreasing) {
+            mid = (right - left + 1) >> 1;
+            for (i = 0; i < mid; i++) swap_int(index[left + i], index[right - i]);
+            *first_end_ptr = INF;
+            return;
+        }
+    }
+
+    /******/
+    while (low <= high) {
+        while (a[index[low]] < value) low++;
+        while (a[index[high]] > value) high--;
+
+        if (low <= high) {
+            swap_int(index[low], index[high]);
+            low++;
+            high--;
+        }
+    }
+
+    *first_end_ptr = high;
+    *second_start_ptr = low;
+}
+
+//***************
+static void
+fastsort_recursive_index(const double a[], int index[], int l, int r)
+{
+    int first_end, second_start;
+    while (l < r) {
+        if (r - l <= 70) {    /* determined through experiments and benchmarks, not randomly. 70-150 works fine on random/mixed (hard) data */
+            insertion_sort_index(a, index, l, r);
+            return;
+        }
+
+        fastsort_partition_index(a, index, l, r, &first_end, &second_start);
+        if (first_end == INF) return; /* sorted */
+
+        /* Recurse into smaller branch to avoid stack overflow */
+        if (first_end - l < r - second_start) {
+            fastsort_recursive_index(a, index, l, first_end);
+            l = second_start;
+        }
+        else {
+            fastsort_recursive_index(a, index, second_start, r);
+            r = first_end;
+        }
+    }
+}
+
+/* ************************************************************************ */
+
+double
+mean(int n, double a[])
+/*
+ Add 4 elements at once instead of 1. The advantages are:
+ 1. less loop overhead
+ 2. compile with -O2 -> use SSE/AVX. 
+ 3. without AVX, still faster because 4 independent additions -> parallel instruction possible
+ 4. smaller floating-point error
+*/
+{
+    double result = 0.;
+    int i;
+    double sum[4] = {0., 0., 0., 0.};
+
+    int nstep4 = n - n % 4;
+    for (i = 0; i < nstep4; i += 4) {
+        sum[0] += a[i];
+        sum[1] += a[i + 1];
+        sum[2] += a[i + 2];
+        sum[3] += a[i + 3];
+    }
+
+    for (i = nstep4; i < n; i++) result += a[i];
+    result += (sum[0] + sum[1]) + (sum[2] + sum[3]);
+
+    return result / n;
+}
+
+/* ************************************************************************ */
+
+double
+median(int n, double x[])
+/*
+Find the median of X(1), ... , X(N), using as much of the quicksort
+algorithm as is needed to isolate it.
+N.B. On exit, the array X is partially ordered.
+Based on Alan J. Miller's median.f90 routine.
+*/
+
+{
+    int i, j;
+    int nr = n / 2;
+    int nl = nr - 1;
+    int even = 0;
+    /* hi & lo are position limits encompassing the median. */
+    int lo = 0;
+    int hi = n-1;
+
+    if (n == 2*nr) even = 1;
+    if (n < 3) {
+        if (n < 1) return 0.;
+        if (n == 1) return x[0];
+        return 0.5*(x[0]+x[1]);
+    }
+
+    /* Find median of 1st, middle & last values. */
+    do {
+        int loop;
+        int mid = (lo + hi)/2;
+        double result = x[mid];
+        double xlo = x[lo];
+        double xhi = x[hi];
+        if (xhi<xlo) {
+            double temp = xlo;
+            xlo = xhi;
+            xhi = temp;
+        }
+        if (result>xhi) result = xhi;
+        else if (result<xlo) result = xlo;
+        /* The basic quicksort algorithm to move all values <= the sort key
+         * (XMED) to the left-hand end, and all higher values to the other end.
+         */
+        i = lo;
+        j = hi;
+        do {
+            while (x[i]<result) i++;
+            while (x[j]>result) j--;
+            loop = 0;
+            if (i<j) {
+                double temp = x[i];
+                x[i] = x[j];
+                x[j] = temp;
+                i++;
+                j--;
+                if (i<=j) loop = 1;
+            }
+        } while (loop); /* Decide which half the median is in. */
+
+        if (even) {
+            if (j == nl && i == nr) {
+                /* Special case, n even, j = n/2 & i = j + 1, so the median is
+                 * between the two halves of the series. Find max. of the first
+                 * half & min. of the second half, then average.
+                 */
+                int k;
+                double xmax = x[0];
+                double xmin = x[n-1];
+                for (k = lo; k <= j; k++) xmax = max(xmax, x[k]);
+                for (k = i; k <= hi; k++) xmin = min(xmin, x[k]);
+                return 0.5*(xmin + xmax);
+            }
+            if (j<nl) lo = i;
+            if (i>nr) hi = j;
+            if (i == j) {
+                if (i == nl) lo = nl;
+                if (j == nr) hi = nr;
+            }
+        }
+        else {
+            if (j<nr) lo = i;
+            if (i>nr) hi = j;
+            /* Test whether median has been isolated. */
+            if (i == j && i == nr) return result;
+        }
+    }
+    while (lo<hi-1);
+
+    if (even) return (0.5*(x[nl]+x[nr]));
+    if (x[lo]>x[hi]) {
+        double temp = x[lo];
+        x[lo] = x[hi];
+        x[hi] = temp;
+    }
+    return x[nr];
+}
+
+/* ********************************************************************** */
+
+void
+sort_index(int n, const double data[], int index[])
+/* Sets up an index table given the data, such that data[index[]] is in
+ * increasing order. Sorting is done on the indices; the array data
+ * is unchanged.
+ */
+{
+    int i;
+    for (i = 0; i < n; i++) index[i] = i;
+    fastsort_recursive_index(data, index, 0, n - 1);
+}
+
+/* ********************************************************************** */
+
+static double*
+getrank(int n, const double data[], const double weight[])
+/* Calculates the ranks of the elements in the array data. Two elements with
+ * the same value get the same rank, equal to the average of the ranks had the
+ * elements different values. The ranks are returned as a newly allocated
+ * array that should be freed by the calling routine. If getrank fails due to
+ * a memory allocation error, it returns NULL.
+ */
+{
+    int i, j, k, l;
+    double* rank;
+    int* index;
+    double total = 0.0;
+    double subtotal;
+    double current;
+    double value;
+
+    rank = malloc(n*sizeof(double));
+    if (!rank) return NULL;
+    index = malloc(n*sizeof(int));
+    if (!index) {
+        free(rank);
+        return NULL;
+    }
+    /* Call sort to get an index table */
+    sort_index(n, data, index);
+    /* Build a rank table */
+    k = 0;
+    j = index[0];
+    current = data[j];
+    subtotal = weight[j];
+    for (i = 1; i < n; i++) {
+        j = index[i];
+        value = data[j];
+        if (value != current) {
+            current = value;
+            value = total + (subtotal + 1.0) / 2.0;
+            for (l = k; l < i; l++) rank[index[l]] = value;
+            k = i;
+            total += subtotal;
+            subtotal = 0.0;
+        }
+        subtotal += weight[j];
+    }
+    value = total + (subtotal + 1.0) / 2.0;
+    for (l = k; l < i; l++) rank[index[l]] = value;
+    free(index);
+    return rank;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int
+makedatamask(int nrows, int ncols, double*** pdata, int*** pmask)
+{
+    int i;
+    double** data;
+    int** mask;
+
+    data = malloc(nrows*sizeof(double*));
+    if (!data) return 0;
+    mask = malloc(nrows*sizeof(int*));
+    if (!mask) {
+        free(data);
+        return 0;
+    }
+    for (i = 0; i < nrows; i++) {
+        data[i] = malloc(ncols*sizeof(double));
+        if (!data[i]) break;
+        mask[i] = malloc(ncols*sizeof(int));
+        if (!mask[i]) {
+            free(data[i]);
+            break;
+        }
+    }
+    if (i == nrows) { /* break not encountered */
+        *pdata = data;
+        *pmask = mask;
+        return 1;
+    }
+    *pdata = NULL;
+    *pmask = NULL;
+    nrows = i;
+    for (i = 0; i < nrows; i++) {
+        free(data[i]);
+        free(mask[i]);
+    }
+    free(data);
+    free(mask);
+    return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void
+freedatamask(int n, double** data, int** mask)
+{
+    int i;
+
+    for (i = 0; i < n; i++) {
+        free(mask[i]);
+        free(data[i]);
+    }
+    free(mask);
+    free(data);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static double
+find_closest_pair(int n, double** distmatrix, int* ip, int* jp)
+/*
+This function searches the distance matrix to find the pair with the shortest
+distance between them. The indices of the pair are returned in ip and jp; the
+distance itself is returned by the function.
+
+n          (input) int
+The number of elements in the distance matrix.
+
+distmatrix (input) double**
+A ragged array containing the distance matrix. The number of columns in each
+row is one less than the row index.
+
+ip         (output) int*
+A pointer to the integer that is to receive the first index of the pair with
+the shortest distance.
+
+jp         (output) int*
+A pointer to the integer that is to receive the second index of the pair with
+the shortest distance.
+*/
+{
+    int i, j;
+    double temp;
+    double distance = distmatrix[1][0];
+
+    *ip = 1;
+    *jp = 0;
+    for (i = 1; i < n; i++) {
+        for (j = 0; j < i; j++) {
+            temp = distmatrix[i][j];
+            if (temp<distance) {
+                distance = temp;
+                *ip = i;
+                *jp = j;
+            }
+        }
+    }
+    return distance;
+}
+
+/* ********************************************************************* */
+
+static int
+svd(int m, int n, double** u, double w[], double** vt)
+/*
+ * This subroutine is a translation of the Algol procedure svd, described in
+ * G.H. Golub and C. Reinsch:
+ * "Singular Value Decomposition and Least Squares Solutions",
+ * published in
+ * Numerische Mathematik 14(5): page 403-420 (1970)
+ * and
+ * Handbook for Automatic Computation, Volume II: Linear Algebra,
+ * (John H. Wilkinson and C. Reinsch; edited by Friedrich L. Bauer and
+ * Alston S. Householder): page 134-151 (1971).
+ *                                                                  t
+ * This subroutine determines the singular value decomposition A=usv  of a
+ * real m by n rectangular matrix, where m is greater * than or equal to n.
+ * Householder bidiagonalization and a variant of the QR algorithm are used.
+ *
+ * On input:
+ *
+ * m  is the number of rows of A (and u).
+ *
+ * n  is the number of columns of A (and u) and the order of v.
+ *
+ * u  contains the rectangular input matrix A to be decomposed.
+ *
+ * On output:
+ *
+ * the routine returns an integer ierr equal to
+ *  0:  to indicate a normal return,
+ *  k:  if the k-th singular value has not been determined after 30 iterations,
+ * -1:  if memory allocation fails.
+ *
+ * w  contains the n (non-negative) singular values of a (the
+ *    diagonal elements of s). they are unordered.
+ *    If an error exit is made, the singular values should be correct for
+ *    indices ierr+1, ierr+2, ..., n.
+ *
+ * u  contains the matrix u (orthogonal column vectors) of the decomposition.
+ *    If an error exit is made, the columns of u corresponding to indices of
+ *    correct singular values should be correct.
+ *                         t
+ * vt contains the matrix v (orthogonal) of the decomposition.
+ *    If an error exit is made, the columns of v corresponding to indices of
+ *    correct singular values should be correct.
+ *
+ *
+ * Questions and comments should be directed to B. S. Garbow,
+ * Applied Mathematics division, Argonne National Laboratory
+ *
+ * Modified to eliminate machep
+ *
+ * Translated to C by Michiel de Hoon, Human Genome Center,
+ * University of Tokyo, for inclusion in the C Clustering Library.
+ * This routine is less general than the original svd routine, as
+ * it focuses on the singular value decomposition as needed for
+ * clustering. In particular,
+ *  - We calculate both u and v in all cases
+ *  - We pass the input array A via u; this array is subsequently overwritten.
+ *  - We allocate for the array rv1, used as a working space, internally in
+ *    this routine, instead of passing it as an argument.
+ *    If the allocation fails, svd returns -1.
+ * 2003.06.05
+ */
+{
+    int i, j, k, i1, k1, l1, its;
+    double c, f, h, s, x, y, z;
+    int l = 0;
+    int ierr = 0;
+    double g = 0.0;
+    double scale = 0.0;
+    double anorm = 0.0;
+    double* rv1;
+
+    rv1 = malloc(n*sizeof(double));
+    if (!rv1) return -1;
+    if (m >= n) {
+        /* Householder reduction to bidiagonal form */
+        for (i = 0; i < n; i++) {
+            l = i + 1;
+            rv1[i] = scale * g;
+            g = 0.0;
+            s = 0.0;
+            scale = 0.0;
+            for (k = i; k < m; k++) scale += fabs(u[k][i]);
+            if (scale != 0.0) {
+                for (k = i; k < m; k++) {
+                    u[k][i] /= scale;
+                    s += u[k][i]*u[k][i];
+                }
+                f = u[i][i];
+                g = (f >= 0) ? -sqrt(s) : sqrt(s);
+                h = f * g - s;
+                u[i][i] = f - g;
+                if (i < n-1) {
+                    for (j = l; j < n; j++) {
+                        s = 0.0;
+                        for (k = i; k < m; k++) s += u[k][i] * u[k][j];
+                        f = s / h;
+                        for (k = i; k < m; k++) u[k][j] += f * u[k][i];
+                    }
+                }
+                for (k = i; k < m; k++) u[k][i] *= scale;
+            }
+            w[i] = scale * g;
+            g = 0.0;
+            s = 0.0;
+            scale = 0.0;
+            if (i<n-1) {
+                for (k = l; k < n; k++) scale += fabs(u[i][k]);
+                if (scale != 0.0) {
+                    for (k = l; k < n; k++) {
+                        u[i][k] /= scale;
+                        s += u[i][k] * u[i][k];
+                    }
+                    f = u[i][l];
+                    g = (f >= 0) ? -sqrt(s) : sqrt(s);
+                    h = f * g - s;
+                    u[i][l] = f - g;
+                    for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
+                    for (j = l; j < m; j++) {
+                        s = 0.0;
+                        for (k = l; k < n; k++) s += u[j][k] * u[i][k];
+                        for (k = l; k < n; k++) u[j][k] += s * rv1[k];
+                    }
+                    for (k = l; k < n; k++) u[i][k] *= scale;
+                }
+            }
+            anorm = max(anorm, fabs(w[i])+fabs(rv1[i]));
+        }
+        /* accumulation of right-hand transformations */
+        for (i = n-1; i >= 0; i--) {
+            if (i < n-1) {
+                if (g != 0.0) {
+                    for (j = l; j < n; j++) vt[i][j] = (u[i][j] / u[i][l]) / g;
+                    /* double division avoids possible underflow */
+                    for (j = l; j < n; j++) {
+                        s = 0.0;
+                        for (k = l; k < n; k++) s += u[i][k] * vt[j][k];
+                        for (k = l; k < n; k++) vt[j][k] += s * vt[i][k];
+                    }
+                }
+            }
+            for (j = l; j < n; j++) {
+                vt[j][i] = 0.0;
+                vt[i][j] = 0.0;
+            }
+            vt[i][i] = 1.0;
+            g = rv1[i];
+            l = i;
+        }
+        /* accumulation of left-hand transformations */
+        for (i = n-1; i >= 0; i--) {
+            l = i + 1;
+            g = w[i];
+            if (i != n-1)
+                for (j = l; j < n; j++) u[i][j] = 0.0;
+            if (g != 0.0) {
+                if (i != n-1) {
+                    for (j = l; j < n; j++) {
+                        s = 0.0;
+                        for (k = l; k < m; k++) s += u[k][i] * u[k][j];
+                        /* double division avoids possible underflow */
+                        f = (s / u[i][i]) / g;
+                        for (k = i; k < m; k++) u[k][j] += f * u[k][i];
+                    }
+                }
+                for (j = i; j < m; j++) u[j][i] /= g;
+            }
+            else
+                for (j = i; j < m; j++) u[j][i] = 0.0;
+            u[i][i] += 1.0;
+        }
+        /* diagonalization of the bidiagonal form */
+        for (k = n-1; k >= 0; k--) {
+            k1 = k-1;
+            its = 0;
+            while (1) {
+            /* test for splitting */
+                for (l = k; l >= 0; l--) {
+                    l1 = l-1;
+                    if (fabs(rv1[l]) + anorm == anorm) break;
+                    /* rv1[0] is always zero, so there is no exit
+                     * through the bottom of the loop */
+                    if (fabs(w[l1]) + anorm == anorm) {
+                        /* cancellation of rv1[l] if l greater than 0 */
+                        c = 0.0;
+                        s = 1.0;
+                        for (i = l; i <= k; i++) {
+                            f = s * rv1[i];
+                            rv1[i] *= c;
+                            if (fabs(f) + anorm == anorm) break;
+                            g = w[i];
+                            h = sqrt(f*f+g*g);
+                            w[i] = h;
+                            c = g / h;
+                            s = -f / h;
+                            for (j = 0; j < m; j++) {
+                                y = u[j][l1];
+                                z = u[j][i];
+                                u[j][l1] = y * c + z * s;
+                                u[j][i] = -y * s + z * c;
+                            }
+                        }
+                        break;
+                    }
+                }
+                /* test for convergence */
+                z = w[k];
+                if (l == k) { /* convergence */
+                    if (z < 0.0) {
+                    /* w[k] is made non-negative */
+                        w[k] = -z;
+                        for (j = 0; j < n; j++) vt[k][j] = -vt[k][j];
+                    }
+                    break;
+                }
+                else if (its == 30) {
+                    ierr = k;
+                    break;
+                }
+                else {
+                /* shift from bottom 2 by 2 minor */
+                    its++;
+                    x = w[l];
+                    y = w[k1];
+                    g = rv1[k1];
+                    h = rv1[k];
+                    f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0*h*y);
+                    g = sqrt(f*f+1.0);
+                    f = ((x - z) * (x + z)
+                        + h * (y / (f + (f >= 0 ? g : -g)) - h)) / x;
+                    /* next qr transformation */
+                    c = 1.0;
+                    s = 1.0;
+                    for (i1 = l; i1 <= k1; i1++) {
+                        i = i1 + 1;
+                        g = rv1[i];
+                        y = w[i];
+                        h = s * g;
+                        g = c * g;
+                        z = sqrt(f*f+h*h);
+                        rv1[i1] = z;
+                        c = f / z;
+                        s = h / z;
+                        f = x * c + g * s;
+                        g = -x * s + g * c;
+                        h = y * s;
+                        y = y * c;
+                        for (j = 0; j < n; j++) {
+                            x = vt[i1][j];
+                            z = vt[i][j];
+                            vt[i1][j] = x * c + z * s;
+                            vt[i][j] = -x * s + z * c;
+                        }
+                        z = sqrt(f*f+h*h);
+                        w[i1] = z;
+                        /* rotation can be arbitrary if z is zero */
+                        if (z != 0.0) {
+                            c = f / z;
+                            s = h / z;
+                        }
+                        f = c * g + s * y;
+                        x = -s * g + c * y;
+                        for (j = 0; j < m; j++) {
+                            y = u[j][i1];
+                            z = u[j][i];
+                            u[j][i1] = y * c + z * s;
+                            u[j][i] = -y * s + z * c;
+                        }
+                    }
+                    rv1[l] = 0.0;
+                    rv1[k] = f;
+                    w[k] = x;
+                }
+            }
+        }
+    }
+    else /* m < n */ {
+        /* Householder reduction to bidiagonal form */
+        for (i = 0; i < m; i++) {
+            l = i + 1;
+            rv1[i] = scale * g;
+            g = 0.0;
+            s = 0.0;
+            scale = 0.0;
+            for (k = i; k < n; k++) scale += fabs(u[i][k]);
+            if (scale != 0.0) {
+                for (k = i; k < n; k++) {
+                    u[i][k] /= scale;
+                    s += u[i][k]*u[i][k];
+                }
+                f = u[i][i];
+                g = (f >= 0) ? -sqrt(s) : sqrt(s);
+                h = f * g - s;
+                u[i][i] = f - g;
+                if (i < m-1) {
+                    for (j = l; j < m; j++) {
+                        s = 0.0;
+                        for (k = i; k < n; k++) s += u[i][k] * u[j][k];
+                        f = s / h;
+                        for (k = i; k < n; k++) u[j][k] += f * u[i][k];
+                    }
+                }
+                for (k = i; k < n; k++) u[i][k] *= scale;
+            }
+            w[i] = scale * g;
+            g = 0.0;
+            s = 0.0;
+            scale = 0.0;
+            if (i<m-1) {
+                for (k = l; k < m; k++) scale += fabs(u[k][i]);
+                if (scale != 0.0) {
+                    for (k = l; k < m; k++) {
+                        u[k][i] /= scale;
+                        s += u[k][i] * u[k][i];
+                    }
+                    f = u[l][i];
+                    g = (f >= 0) ? -sqrt(s) : sqrt(s);
+                    h = f * g - s;
+                    u[l][i] = f - g;
+                    for (k = l; k < m; k++) rv1[k] = u[k][i] / h;
+                    for (j = l; j < n; j++) {
+                        s = 0.0;
+                        for (k = l; k < m; k++) s += u[k][j] * u[k][i];
+                        for (k = l; k < m; k++) u[k][j] += s * rv1[k];
+                    }
+                    for (k = l; k < m; k++)    u[k][i] *= scale;
+                }
+            }
+            anorm = max(anorm, fabs(w[i])+fabs(rv1[i]));
+        }
+        /* accumulation of right-hand transformations */
+        for (i = m-1; i >= 0; i--) {
+            if (i < m-1) {
+                if (g != 0.0) {
+                    for (j = l; j < m; j++) vt[j][i] = (u[j][i] / u[l][i]) / g;
+                    /* double division avoids possible underflow */
+                    for (j = l; j < m; j++) {
+                        s = 0.0;
+                        for (k = l; k < m; k++) s += u[k][i] * vt[k][j];
+                        for (k = l; k < m; k++) vt[k][j] += s * vt[k][i];
+                    }
+                }
+            }
+            for (j = l; j < m; j++) {
+                vt[i][j] = 0.0;
+                vt[j][i] = 0.0;
+            }
+            vt[i][i] = 1.0;
+            g = rv1[i];
+            l = i;
+        }
+        /* accumulation of left-hand transformations */
+        for (i = m-1; i >= 0; i--) {
+            l = i + 1;
+            g = w[i];
+            if (i != m-1)
+                for (j = l; j < m; j++) u[j][i] = 0.0;
+            if (g != 0.0) {
+                if (i != m-1) {
+                    for (j = l; j < m; j++) {
+                        s = 0.0;
+                        for (k = l; k < n; k++) s += u[i][k] * u[j][k];
+                        /* double division avoids possible underflow */
+                        f = (s / u[i][i]) / g;
+                        for (k = i; k < n; k++) u[j][k] += f * u[i][k];
+                    }
+                }
+                for (j = i; j < n; j++) u[i][j] /= g;
+            }
+            else
+                for (j = i; j < n; j++) u[i][j] = 0.0;
+            u[i][i] += 1.0;
+        }
+        /* diagonalization of the bidiagonal form */
+        for (k = m-1; k >= 0; k--) {
+            k1 = k-1;
+            its = 0;
+            while (1) {
+            /* test for splitting */
+                for (l = k; l >= 0; l--) {
+                    l1 = l-1;
+                    if (fabs(rv1[l]) + anorm == anorm) break;
+                    /* rv1[0] is always zero, so there is no exit
+                     * through the bottom of the loop */
+                    if (fabs(w[l1]) + anorm == anorm) {
+                    /* cancellation of rv1[l] if l greater than 0 */
+                        c = 0.0;
+                        s = 1.0;
+                        for (i = l; i <= k; i++) {
+                            f = s * rv1[i];
+                            rv1[i] *= c;
+                            if (fabs(f) + anorm == anorm) break;
+                            g = w[i];
+                            h = sqrt(f*f+g*g);
+                            w[i] = h;
+                            c = g / h;
+                            s = -f / h;
+                            for (j = 0; j < n; j++) {
+                                y = u[l1][j];
+                                z = u[i][j];
+                                u[l1][j] = y * c + z * s;
+                                u[i][j] = -y * s + z * c;
+                            }
+                        }
+                        break;
+                    }
+                }
+                /* test for convergence */
+                z = w[k];
+                if (l == k) /* convergence */ {
+                    if (z < 0.0) {
+                        /* w[k] is made non-negative */
+                        w[k] = -z;
+                        for (j = 0; j < m; j++) vt[j][k] = -vt[j][k];
+                    }
+                    break;
+                }
+                else if (its == 30) {
+                    ierr = k;
+                    break;
+                }
+                else {
+                /* shift from bottom 2 by 2 minor */
+                    its++;
+                    x = w[l];
+                    y = w[k1];
+                    g = rv1[k1];
+                    h = rv1[k];
+                    f = ((y - z) * (y + z)
+                      + (g - h) * (g + h)) / (2.0 * h * y);
+                    g = sqrt(f*f+1.0);
+                    f = ((x - z) * (x + z)
+                      + h * (y / (f + (f >= 0 ? g : -g)) - h)) / x;
+                    /* next qr transformation */
+                    c = 1.0;
+                    s = 1.0;
+                    for (i1 = l; i1 <= k1; i1++) {
+                        i = i1 + 1;
+                        g = rv1[i];
+                        y = w[i];
+                        h = s * g;
+                        g = c * g;
+                        z = sqrt(f*f+h*h);
+                        rv1[i1] = z;
+                        c = f / z;
+                        s = h / z;
+                        f = x * c + g * s;
+                        g = -x * s + g * c;
+                        h = y * s;
+                        y = y * c;
+                        for (j = 0; j < m; j++) {
+                            x = vt[j][i1];
+                            z = vt[j][i];
+                            vt[j][i1] = x * c + z * s;
+                            vt[j][i] = -x * s + z * c;
+                        }
+                        z = sqrt(f*f+h*h);
+                        w[i1] = z;
+                        /* rotation can be arbitrary if z is zero */
+                        if (z != 0.0) {
+                            c = f / z;
+                            s = h / z;
+                        }
+                        f = c * g + s * y;
+                        x = -s * g + c * y;
+                        for (j = 0; j < n; j++) {
+                            y = u[i1][j];
+                            z = u[i][j];
+                            u[i1][j] = y * c + z * s;
+                            u[i][j] = -y * s + z * c;
+                        }
+                    }
+                    rv1[l] = 0.0;
+                    rv1[k] = f;
+                    w[k] = x;
+                }
+            }
+        }
+    }
+    free(rv1);
+    return ierr;
+}
+
+/* ********************************************************************* */
+
+int
+pca(int nrows, int ncolumns, double** u, double** v, double* w)
+/*
+Purpose
+=======
+
+This subroutine uses the singular value decomposition to perform principal
+components analysis of a real nrows by ncolumns rectangular matrix.
+
+Arguments
+=========
+
+nrows         (input) int
+The number of rows in the matrix u.
+
+ncolumns    (input) int
+The number of columns in the matrix v.
+
+u                    (input) double[nrows][ncolumns]
+On input, the array containing the data to which the principal component
+analysis should be applied. The function assumes that the mean has already been
+subtracted of each column, and hence that the mean of each column is zero.
+On output, see below.
+
+v                    (input) double[n][n], where n = min(nrows, ncolumns)
+Not used on input.
+
+w                    (input) double[n], where n = min(nrows, ncolumns)
+Not used on input.
+
+
+Return value
+============
+
+On output:
+
+If nrows >= ncolumns, then
+
+u contains the coordinates with respect to the principal components;
+v contains the principal component vectors.
+
+The dot product u . v reproduces the data that were passed in u.
+
+
+If nrows < ncolumns, then
+
+u contains the principal component vectors;
+v contains the coordinates with respect to the principal components.
+
+The dot product v . u reproduces the data that were passed in u.
+
+The eigenvalues of the covariance matrix are returned in w.
+
+The arrays u, v, and w are sorted according to eigenvalue, with the largest
+eigenvalues appearing first.
+
+The function returns 0 if successful, -1 if memory allocation fails, and a
+positive integer if the singular value decomposition fails to converge.
+*/
+{
+    int i;
+    int j;
+    int error;
+    int* index = malloc(ncolumns*sizeof(int));
+    double* temp = malloc(ncolumns*sizeof(double));
+
+    if (!index || !temp) {
+        if (index) free(index);
+        if (temp) free(temp);
+        return -1;
+    }
+    error = svd(nrows, ncolumns, u, w, v);
+    if (error == 0) {
+        if (nrows >= ncolumns) {
+            for (j = 0; j < ncolumns; j++) {
+                const double s = w[j];
+                for (i = 0; i < nrows; i++) u[i][j] *= s;
+            }
+            sort_index(ncolumns, w, index);
+            for (i = 0; i < ncolumns/2; i++) {
+                j = index[i];
+                index[i] = index[ncolumns-1-i];
+                index[ncolumns-1-i] = j;
+            }
+            for (i = 0; i < nrows; i++) {
+                for (j = 0; j < ncolumns; j++) temp[j] = u[i][index[j]];
+                for (j = 0; j < ncolumns; j++) u[i][j] = temp[j];
+            }
+            for (i = 0; i < ncolumns; i++) {
+                for (j = 0; j < ncolumns; j++) temp[j] = v[index[j]][i];
+                for (j = 0; j < ncolumns; j++) v[j][i] = temp[j];
+            }
+            for (i = 0; i < ncolumns; i++) temp[i] = w[index[i]];
+            for (i = 0; i < ncolumns; i++) w[i] = temp[i];
+        }
+        else /* nrows < ncolumns */ {
+            for (j = 0; j < nrows; j++) {
+                const double s = w[j];
+                for (i = 0; i < nrows; i++) v[i][j] *= s;
+            }
+            sort_index(nrows, w, index);
+            for (i = 0; i < nrows/2; i++) {
+                j = index[i];
+                index[i] = index[nrows-1-i];
+                index[nrows-1-i] = j;
+            }
+            for (j = 0; j < ncolumns; j++) {
+                for (i = 0; i < nrows; i++) temp[i] = u[index[i]][j];
+                for (i = 0; i < nrows; i++) u[i][j] = temp[i];
+            }
+            for (j = 0; j < nrows; j++) {
+                for (i = 0; i < nrows; i++) temp[i] = v[j][index[i]];
+                for (i = 0; i < nrows; i++) v[j][i] = temp[i];
+            }
+            for (i = 0; i < nrows; i++) temp[i] = w[index[i]];
+            for (i = 0; i < nrows; i++) w[i] = temp[i];
+        }
+    }
+    free(index);
+    free(temp);
+    return error;
+}
+
+/* ********************************************************************* */
+
+static double
+euclid(int n, double** data1, double** data2, int** mask1, int** mask2,
+       const double weight[], int index1, int index2, int transpose)
+
+/*
+Purpose
+=======
+
+The euclid routine calculates the weighted Euclidean distance between two
+rows or columns in a matrix.
+
+Arguments
+=========
+
+n         (input) int
+The number of elements in a row or column. If transpose == 0, then n is the
+number of columns; otherwise, n is the number of rows.
+
+data1     (input) double array
+The data array containing the first vector.
+
+data2     (input) double array
+The data array containing the second vector.
+
+mask1     (input) int array
+This array which elements in data1 are missing. If mask1[i][j] == 0, then
+data1[i][j] is missing.
+
+mask2     (input) int array
+This array which elements in data2 are missing. If mask2[i][j] == 0, then
+data2[i][j] is missing.
+
+weight    (input) double[ncolumns] if transpose == 0,
+                  double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+index1    (input) int
+Index of the first row or column.
+
+index2    (input) int
+Index of the second row or column.
+
+transpose (input) int
+If transpose == 0, the distance between two rows in the matrix is calculated.
+Otherwise, the distance between two columns in the matrix is calculated.
+
+============================================================================
+*/
+{
+    double result = 0.0;
+    double tweight = 0;
+    int i;
+
+    if (transpose == 0) /* Calculate the distance between two rows */ {
+        for (i = 0; i < n; i++) {
+            if (mask1[index1][i] && mask2[index2][i]) {
+                double term = data1[index1][i] - data2[index2][i];
+                result += weight[i]*term*term;
+                tweight += weight[i];
+            }
+        }
+    }
+    else {
+        for (i = 0; i < n; i++) {
+            if (mask1[i][index1] && mask2[i][index2]) {
+                double term = data1[i][index1] - data2[i][index2];
+                result += weight[i]*term*term;
+                tweight += weight[i];
+            }
+        }
+    }
+    if (!tweight) return 0; /* usually due to empty clusters */
+    result /= tweight;
+    return result;
+}
+
+/* ********************************************************************* */
+
+static double
+cityblock(int n, double** data1, double** data2, int** mask1, int** mask2,
+          const double weight[], int index1, int index2, int transpose)
+
+/*
+Purpose
+=======
+
+The cityblock routine calculates the weighted "City Block" distance between
+two rows or columns in a matrix. City Block distance is defined as the
+absolute value of X1-X2 plus the absolute value of Y1-Y2 plus..., which is
+equivalent to taking an "up and over" path.
+
+Arguments
+=========
+
+n        (input) int
+The number of elements in a row or column. If transpose == 0, then n is the
+number of columns; otherwise, n is the number of rows.
+
+data1    (input) double array
+The data array containing the first vector.
+
+data2    (input) double array
+The data array containing the second vector.
+
+mask1    (input) int array
+This array which elements in data1 are missing. If mask1[i][j] == 0, then
+data1[i][j] is missing.
+
+mask2    (input) int array
+This array which elements in data2 are missing. If mask2[i][j] == 0, then
+data2[i][j] is missing.
+
+weight   (input) double[ncolumns] if transpose == 0,
+                 double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+index1   (input) int
+Index of the first row or column.
+
+index2         (input) int
+Index of the second row or column.
+
+transpose (input) int
+If transpose == 0, the distance between two rows in the matrix is calculated.
+Otherwise, the distance between two columns in the matrix is calculated.
+
+============================================================================ */
+{
+    double result = 0.;
+    double tweight = 0;
+    int i;
+
+    if (transpose == 0) /* Calculate the distance between two rows */ {
+        for (i = 0; i < n; i++) {
+            if (mask1[index1][i] && mask2[index2][i]) {
+                double term = data1[index1][i] - data2[index2][i];
+                result = result + weight[i]*fabs(term);
+                tweight += weight[i];
+            }
+        }
+    }
+    else {
+        for (i = 0; i < n; i++) {
+            if (mask1[i][index1] && mask2[i][index2]) {
+                double term = data1[i][index1] - data2[i][index2];
+                result = result + weight[i]*fabs(term);
+                tweight += weight[i];
+            }
+        }
+    }
+    if (!tweight) return 0; /* usually due to empty clusters */
+    result /= tweight;
+    return result;
+}
+
+/* ********************************************************************* */
+
+static double
+correlation(int n, double** data1, double** data2, int** mask1, int** mask2,
+            const double weight[], int index1, int index2, int transpose)
+/*
+Purpose
+=======
+
+The correlation routine calculates the weighted Pearson distance between two
+rows or columns in a matrix. We define the Pearson distance as one minus the
+Pearson correlation.
+This definition yields a semi-metric: d(a,b) >= 0, and d(a,b) = 0 iff a = b.
+but the triangular inequality d(a,b) + d(b,c) >= d(a,c) does not hold
+(e.g., choose b = a + c).
+
+Arguments
+=========
+
+n        (input) int
+The number of elements in a row or column. If transpose == 0, then n is the
+number of columns; otherwise, n is the number of rows.
+
+data1    (input) double array
+The data array containing the first vector.
+
+data2    (input) double array
+The data array containing the second vector.
+
+mask1    (input) int array
+This array which elements in data1 are missing. If mask1[i][j] == 0, then
+data1[i][j] is missing.
+
+mask2    (input) int array
+This array which elements in data2 are missing. If mask2[i][j] == 0, then
+data2[i][j] is missing.
+
+weight   (input) double[ncolumns] if transpose == 0,
+                 double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+index1   (input) int
+Index of the first row or column.
+
+index2   (input) int
+Index of the second row or column.
+
+transpose (input) int
+If transpose == 0, the distance between two rows in the matrix is calculated.
+Otherwise, the distance between two columns in the matrix is calculated.
+============================================================================
+*/
+{
+    double result = 0.;
+    double sum1 = 0.;
+    double sum2 = 0.;
+    double denom1 = 0.;
+    double denom2 = 0.;
+    double tweight = 0.;
+
+    if (transpose == 0) /* Calculate the distance between two rows */ {
+        int i;
+        for (i = 0; i < n; i++) {
+            if (mask1[index1][i] && mask2[index2][i]) {
+                double term1 = data1[index1][i];
+                double term2 = data2[index2][i];
+                double w = weight[i];
+                sum1 += w*term1;
+                sum2 += w*term2;
+                result += w*term1*term2;
+                denom1 += w*term1*term1;
+                denom2 += w*term2*term2;
+                tweight += w;
+            }
+        }
+    }
+    else {
+        int i;
+        for (i = 0; i < n; i++) {
+            if (mask1[i][index1] && mask2[i][index2]) {
+                double term1 = data1[i][index1];
+                double term2 = data2[i][index2];
+                double w = weight[i];
+                sum1 += w*term1;
+                sum2 += w*term2;
+                result += w*term1*term2;
+                denom1 += w*term1*term1;
+                denom2 += w*term2*term2;
+                tweight += w;
+            }
+        }
+    }
+    if (!tweight) return 0; /* usually due to empty clusters */
+    result -= sum1 * sum2 / tweight;
+    denom1 -= sum1 * sum1 / tweight;
+    denom2 -= sum2 * sum2 / tweight;
+    if (denom1 <= 0) return 1; /* include '<' to deal with roundoff errors */
+    if (denom2 <= 0) return 1; /* include '<' to deal with roundoff errors */
+    result = result / sqrt(denom1*denom2);
+    result = 1. - result;
+    return result;
+}
+
+/* ********************************************************************* */
+
+static double
+acorrelation(int n, double** data1, double** data2, int** mask1, int** mask2,
+             const double weight[], int index1, int index2, int transpose)
+/*
+Purpose
+=======
+
+The acorrelation routine calculates the weighted Pearson distance between two
+rows or columns, using the absolute value of the correlation.
+This definition yields a semi-metric: d(a,b) >= 0, and d(a,b) = 0 iff a = b.
+but the triangular inequality d(a,b) + d(b,c) >= d(a,c) does not hold
+(e.g., choose b = a + c).
+
+Arguments
+=========
+
+n        (input) int
+The number of elements in a row or column. If transpose == 0, then n is the
+number of columns; otherwise, n is the number of rows.
+
+data1    (input) double array
+The data array containing the first vector.
+
+data2    (input) double array
+The data array containing the second vector.
+
+mask1    (input) int array
+This array which elements in data1 are missing. If mask1[i][j] == 0, then
+data1[i][j] is missing.
+
+mask2    (input) int array
+This array which elements in data2 are missing. If mask2[i][j] == 0, then
+data2[i][j] is missing.
+
+weight   (input) double[ncolumns] if transpose == 0,
+                 double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+index1   (input) int
+Index of the first row or column.
+
+index2   (input) int
+Index of the second row or column.
+
+transpose (input) int
+If transpose == 0, the distance between two rows in the matrix is calculated.
+Otherwise, the distance between two columns in the matrix is calculated.
+============================================================================
+*/
+{
+    double result = 0.;
+    double sum1 = 0.;
+    double sum2 = 0.;
+    double denom1 = 0.;
+    double denom2 = 0.;
+    double tweight = 0.;
+
+    if (transpose == 0) /* Calculate the distance between two rows */ {
+        int i;
+        for (i = 0; i < n; i++) {
+            if (mask1[index1][i] && mask2[index2][i]) {
+                double term1 = data1[index1][i];
+                double term2 = data2[index2][i];
+                double w = weight[i];
+                sum1 += w*term1;
+                sum2 += w*term2;
+                result += w*term1*term2;
+                denom1 += w*term1*term1;
+                denom2 += w*term2*term2;
+                tweight += w;
+            }
+        }
+    }
+    else {
+        int i;
+        for (i = 0; i < n; i++) {
+            if (mask1[i][index1] && mask2[i][index2]) {
+                double term1 = data1[i][index1];
+                double term2 = data2[i][index2];
+                double w = weight[i];
+                sum1 += w*term1;
+                sum2 += w*term2;
+                result += w*term1*term2;
+                denom1 += w*term1*term1;
+                denom2 += w*term2*term2;
+                tweight += w;
+            }
+        }
+    }
+    if (!tweight) return 0; /* usually due to empty clusters */
+    result -= sum1 * sum2 / tweight;
+    denom1 -= sum1 * sum1 / tweight;
+    denom2 -= sum2 * sum2 / tweight;
+    if (denom1 <= 0) return 1; /* include '<' to deal with roundoff errors */
+    if (denom2 <= 0) return 1; /* include '<' to deal with roundoff errors */
+    result = fabs(result) / sqrt(denom1*denom2);
+    result = 1. - result;
+    return result;
+}
+
+/* ********************************************************************* */
+
+static double
+ucorrelation(int n, double** data1, double** data2, int** mask1, int** mask2,
+             const double weight[], int index1, int index2, int transpose)
+/*
+Purpose
+=======
+
+The ucorrelation routine calculates the weighted Pearson distance between two
+rows or columns, using the uncentered version of the Pearson correlation. In
+the uncentered Pearson correlation, a zero mean is used for both vectors even
+if the actual mean is nonzero.
+This definition yields a semi-metric: d(a,b) >= 0, and d(a,b) = 0 iff a = b.
+but the triangular inequality d(a,b) + d(b,c) >= d(a,c) does not hold
+(e.g., choose b = a + c).
+
+Arguments
+=========
+
+n        (input) int
+The number of elements in a row or column. If transpose == 0, then n is the
+number of columns; otherwise, n is the number of rows.
+
+data1    (input) double array
+The data array containing the first vector.
+
+data2    (input) double array
+The data array containing the second vector.
+
+mask1    (input) int array
+This array which elements in data1 are missing. If mask1[i][j] == 0, then
+data1[i][j] is missing.
+
+mask2    (input) int array
+This array which elements in data2 are missing. If mask2[i][j] == 0, then
+data2[i][j] is missing.
+
+weight   (input) double[ncolumns] if transpose == 0,
+                 double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+index1   (input) int
+Index of the first row or column.
+
+index2   (input) int
+Index of the second row or column.
+
+transpose (input) int
+If transpose == 0, the distance between two rows in the matrix is calculated.
+Otherwise, the distance between two columns in the matrix is calculated.
+============================================================================
+*/
+{
+    double result = 0.;
+    double denom1 = 0.;
+    double denom2 = 0.;
+    int flag = 0;
+
+    /* flag will remain zero if no nonzero combinations of mask1 and mask2 are
+     * found.
+     */
+    if (transpose == 0) /* Calculate the distance between two rows */ {
+        int i;
+        for (i = 0; i < n; i++) {
+            if (mask1[index1][i] && mask2[index2][i]) {
+                double term1 = data1[index1][i];
+                double term2 = data2[index2][i];
+                double w = weight[i];
+                result += w*term1*term2;
+                denom1 += w*term1*term1;
+                denom2 += w*term2*term2;
+                flag = 1;
+            }
+        }
+    }
+    else {
+        int i;
+        for (i = 0; i < n; i++) {
+            if (mask1[i][index1] && mask2[i][index2]) {
+                double term1 = data1[i][index1];
+                double term2 = data2[i][index2];
+                double w = weight[i];
+                result += w*term1*term2;
+                denom1 += w*term1*term1;
+                denom2 += w*term2*term2;
+                flag = 1;
+            }
+        }
+    }
+    if (!flag) return 0.;
+    if (denom1 == 0.) return 1.;
+    if (denom2 == 0.) return 1.;
+    result = result / sqrt(denom1*denom2);
+    result = 1. - result;
+    return result;
+}
+
+/* ********************************************************************* */
+
+static double
+uacorrelation(int n, double** data1, double** data2, int** mask1, int** mask2,
+              const double weight[], int index1, int index2, int transpose)
+/*
+Purpose
+=======
+
+The uacorrelation routine calculates the weighted Pearson distance between two
+rows or columns, using the absolute value of the uncentered version of the
+Pearson correlation. In the uncentered Pearson correlation, a zero mean is used
+for both vectors even if the actual mean is nonzero.
+This definition yields a semi-metric: d(a,b) >= 0, and d(a,b) = 0 iff a = b.
+but the triangular inequality d(a,b) + d(b,c) >= d(a,c) does not hold
+(e.g., choose b = a + c).
+
+Arguments
+=========
+
+n         (input) int
+The number of elements in a row or column. If transpose == 0, then n is the
+number of columns; otherwise, n is the number of rows.
+
+data1     (input) double array
+The data array containing the first vector.
+
+data2     (input) double array
+The data array containing the second vector.
+
+mask1     (input) int array
+This array which elements in data1 are missing. If mask1[i][j] == 0, then
+data1[i][j] is missing.
+
+mask2     (input) int array
+This array which elements in data2 are missing. If mask2[i][j] == 0, then
+data2[i][j] is missing.
+
+weight    (input) double[ncolumns] if transpose == 0,
+                  double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+index1    (input) int
+Index of the first row or column.
+
+index2    (input) int
+Index of the second row or column.
+
+transpose (input) int
+If transpose == 0, the distance between two rows in the matrix is calculated.
+Otherwise, the distance between two columns in the matrix is calculated.
+============================================================================
+*/
+{
+    double result = 0.;
+    double denom1 = 0.;
+    double denom2 = 0.;
+    int flag = 0;
+    /* flag will remain zero if no nonzero combinations of mask1 and mask2 are
+     * found.
+     */
+
+    if (transpose == 0) /* Calculate the distance between two rows */ {
+        int i;
+        for (i = 0; i < n; i++) {
+            if (mask1[index1][i] && mask2[index2][i]) {
+                double term1 = data1[index1][i];
+                double term2 = data2[index2][i];
+                double w = weight[i];
+                result += w*term1*term2;
+                denom1 += w*term1*term1;
+                denom2 += w*term2*term2;
+                flag = 1;
+            }
+        }
+    }
+    else {
+        int i;
+        for (i = 0; i < n; i++) {
+            if (mask1[i][index1] && mask2[i][index2]) {
+                double term1 = data1[i][index1];
+                double term2 = data2[i][index2];
+                double w = weight[i];
+                result += w*term1*term2;
+                denom1 += w*term1*term1;
+                denom2 += w*term2*term2;
+                flag = 1;
+            }
+        }
+    }
+    if (!flag) return 0.;
+    if (denom1 == 0.) return 1.;
+    if (denom2 == 0.) return 1.;
+    result = fabs(result) / sqrt(denom1*denom2);
+    result = 1. - result;
+    return result;
+}
+
+/* *********************************************************************    */
+
+static double
+spearman(int n, double** data1, double** data2, int** mask1, int** mask2,
+         const double weight[], int index1, int index2, int transpose)
+/*
+Purpose
+=======
+
+The spearman routine calculates the Spearman distance between two rows or
+columns. The Spearman distance is defined as one minus the Spearman rank
+correlation.
+
+Arguments
+=========
+
+n         (input) int
+The number of elements in a row or column. If transpose == 0, then n is the
+number of columns; otherwise, n is the number of rows.
+
+data1     (input) double array
+The data array containing the first vector.
+
+data2     (input) double array
+The data array containing the second vector.
+
+mask1     (input) int array
+This array which elements in data1 are missing. If mask1[i][j] == 0, then
+data1[i][j] is missing.
+
+mask2     (input) int array
+This array which elements in data2 are missing. If mask2[i][j] == 0, then
+data2[i][j] is missing.
+
+weight    (input) double[ncolumns] if transpose == 0,
+                  double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+index1    (input) int
+Index of the first row or column.
+
+index2    (input) int
+Index of the second row or column.
+
+transpose (input) int
+If transpose == 0, the distance between two rows in the matrix is calculated.
+Otherwise, the distance between two columns in the matrix is calculated.
+============================================================================
+*/
+{
+    int i;
+    int m = 0;
+    double* rank1;
+    double* rank2;
+    double result = 0.;
+    double denom1 = 0.;
+    double denom2 = 0.;
+    double sum1 = 0.;
+    double sum2 = 0.;
+    double totalweight = 0.;
+    double* tdata1;
+    double* tdata2;
+
+    tdata1 = malloc(n*sizeof(double));
+    if (!tdata1) return 0.0; /* Memory allocation error */
+    tdata2 = malloc(n*sizeof(double));
+    if (!tdata2) /* Memory allocation error */ {
+        free(tdata1);
+        return 0.0;
+    }
+    if (transpose == 0) {
+        for (i = 0; i < n; i++) {
+            if (mask1[index1][i] && mask2[index2][i]) {
+                tdata1[m] = data1[index1][i];
+                tdata2[m] = data2[index2][i];
+                m++;
+            }
+        }
+    }
+    else {
+        for (i = 0; i < n; i++) {
+            if (mask1[i][index1] && mask2[i][index2]) {
+                tdata1[m] = data1[i][index1];
+                tdata2[m] = data2[i][index2];
+                m++;
+            }
+        }
+    }
+    if (m == 0) {
+        free(tdata1);
+        free(tdata2);
+        return 0;
+    }
+    rank1 = getrank(m, tdata1, weight);
+    free(tdata1);
+    if (!rank1) {
+        free(tdata2);
+        return 0.0; /* Memory allocation error */
+    }
+    rank2 = getrank(m, tdata2, weight);
+    free(tdata2);
+    if (!rank2) /* Memory allocation error */ {
+        free(rank1);
+        return 0.0;
+    }
+    for (i = 0; i < m; i++) {
+        const double term1 = rank1[i];
+        const double term2 = rank2[i];
+        const double w = weight[i];
+        sum1 += term1 * w;
+        sum2 += term2 * w;
+        result += term1 * term2 * w;
+        denom1 += term1 * term1 * w;
+        denom2 += term2 * term2 * w;
+        totalweight += w;
+    }
+    /* Note: denom1 and denom2 cannot be calculated directly from the number
+     * of elements. If two elements have the same rank, the squared sum of
+     * their ranks will change.
+     */
+    free(rank1);
+    free(rank2);
+    if (!totalweight) return 0; /* usually due to empty clusters */
+    result -= sum1 * sum2 / totalweight;
+    denom1 -= sum1 * sum1 / totalweight;
+    denom2 -= sum2 * sum2 / totalweight;
+    if (denom1 <= 0) return 1; /* include '<' to deal with roundoff errors */
+    if (denom2 <= 0) return 1; /* include '<' to deal with roundoff errors */
+    result = result / sqrt(denom1*denom2);
+    result = 1. - result;
+    return result;
+}
+
+/* *********************************************************************    */
+
+static double
+kendall(int n, double** data1, double** data2, int** mask1, int** mask2,
+        const double weight[], int index1, int index2, int transpose)
+/*
+Purpose
+=======
+
+The kendall routine calculates the Kendall distance between two
+rows or columns. The Kendall distance is defined as one minus Kendall's tau.
+
+Arguments
+=========
+
+n            (input) int
+The number of elements in a row or column. If transpose == 0, then n is the
+number of columns; otherwise, n is the number of rows.
+
+data1     (input) double array
+The data array containing the first vector.
+
+data2     (input) double array
+The data array containing the second vector.
+
+mask1     (input) int array
+This array which elements in data1 are missing. If mask1[i][j] == 0, then
+data1[i][j] is missing.
+
+mask2     (input) int array
+This array which elements in data2 are missing. If mask2[i][j] == 0, then
+data2[i][j] is missing.
+
+weight    (input) double[ncolumns] if transpose == 0,
+                  double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+index1    (input) int
+Index of the first row or column.
+
+index2    (input) int
+Index of the second row or column.
+
+transpose (input) int
+If transpose == 0, the distance between two rows in the matrix is calculated.
+Otherwise, the distance between two columns in the matrix is calculated.
+============================================================================
+*/
+{
+    double con = 0;
+    double dis = 0;
+    double exx = 0;
+    double exy = 0;
+    int flag = 0;
+    /* flag will remain zero if no nonzero combinations of mask1 and mask2 are
+     * found.
+     */
+    double denomx;
+    double denomy;
+    double tau;
+    int i, j;
+
+    if (transpose == 0) {
+        for (i = 0; i < n; i++) {
+            if (mask1[index1][i] && mask2[index2][i]) {
+                for (j = 0; j < i; j++) {
+                    if (mask1[index1][j] && mask2[index2][j]) {
+                        const double x1 = data1[index1][i];
+                        const double x2 = data1[index1][j];
+                        const double y1 = data2[index2][i];
+                        const double y2 = data2[index2][j];
+                        const double w = weight[i] * weight[j];
+                        if (x1 < x2 && y1 < y2) con += w;
+                        else if (x1 > x2 && y1 > y2) con += w;
+                        else if (x1 < x2 && y1 > y2) dis += w;
+                        else if (x1 > x2 && y1 < y2) dis += w;
+                        else if (x1 == x2 && y1 != y2) exx += w;
+                        else if (x1 != x2 && y1 == y2) exy += w;
+                        flag = 1;
+                    }
+                }
+            }
+        }
+    }
+    else {
+        for (i = 0; i < n; i++) {
+            if (mask1[i][index1] && mask2[i][index2]) {
+                for (j = 0; j < i; j++) {
+                    if (mask1[j][index1] && mask2[j][index2]) {
+                        const double x1 = data1[i][index1];
+                        const double x2 = data1[j][index1];
+                        const double y1 = data2[i][index2];
+                        const double y2 = data2[j][index2];
+                        const double w = weight[i] * weight[j];
+                        if (x1 < x2 && y1 < y2) con += w;
+                        else if (x1 > x2 && y1 > y2) con += w;
+                        else if (x1 < x2 && y1 > y2) dis += w;
+                        else if (x1 > x2 && y1 < y2) dis += w;
+                        else if (x1 == x2 && y1 != y2) exx += w;
+                        else if (x1 != x2 && y1 == y2) exy += w;
+                        flag = 1;
+                    }
+                }
+            }
+        }
+    }
+    if (!flag) return 0.;
+    denomx = con + dis + exx;
+    denomy = con + dis + exy;
+    if (denomx == 0) return 1;
+    if (denomy == 0) return 1;
+    tau = (con-dis)/sqrt(denomx*denomy);
+    return 1.-tau;
+}
+
+/* *********************************************************************    */
+
+static double(*setmetric(char dist))
+    (int, double**, double**, int**, int**, const double[], int, int, int)
+{
+    switch(dist) {
+        case 'e': return &euclid;
+        case 'b': return &cityblock;
+        case 'c': return &correlation;
+        case 'a': return &acorrelation;
+        case 'u': return &ucorrelation;
+        case 'x': return &uacorrelation;
+        case 's': return &spearman;
+        case 'k': return &kendall;
+        default: return &euclid;
+    }
+}
+
+/* *********************************************************************    */
+
+static double
+uniform(void)
+/*
+Purpose
+=======
+
+This routine returns a uniform random number between 0.0 and 1.0. Both 0.0
+and 1.0 are excluded. This random number generator is described in:
+
+Pierre l'Ecuyer
+Efficient and Portable Combined Random Number Generators
+Communications of the ACM, Volume 31, Number 6, June 1988, pages 742-749, 774.
+
+The first time this routine is called, it initializes the random number
+generator using the current time. First, the current epoch time in seconds is
+used as a seed for the random number generator in the C library. The first two
+random numbers generated by this generator are used to initialize the random
+number generator implemented in this routine.
+
+
+Arguments
+=========
+
+None.
+
+
+Return value
+============
+
+A double-precison number between 0.0 and 1.0.
+============================================================================
+*/
+{
+    int z;
+    static const int m1 = 2147483563;
+    static const int m2 = 2147483399;
+    const double scale = 1.0/m1;
+
+    static int s1 = 0;
+    static int s2 = 0;
+
+    if (s1 == 0 || s2 == 0) {
+        /* initialize */
+        unsigned int initseed = (unsigned int) time(0);
+        srand(initseed);
+        s1 = rand();
+        s2 = rand();
+    }
+
+    do {
+        int k = s1/53668;
+        s1 = 40014*(s1-k*53668)-k*12211;
+        if (s1 < 0) s1+=m1;
+        k = s2/52774;
+        s2 = 40692*(s2-k*52774)-k*3791;
+        if (s2 < 0) s2 += m2;
+        z = s1-s2;
+        if (z < 1) z += (m1-1);
+    } while (z == m1); /* To avoid returning 1.0 */
+
+    return z*scale;
+}
+
+/* ************************************************************************ */
+
+static int
+binomial(int n, double p)
+/*
+Purpose
+=======
+
+This routine generates a random number between 0 and n inclusive, following
+the binomial distribution with probability p and n trials. The routine is
+based on the BTPE algorithm, described in:
+
+Voratas Kachitvichyanukul and Bruce W. Schmeiser:
+Binomial Random Variate Generation
+Communications of the ACM, Volume 31, Number 2, February 1988, pages 216-222.
+
+
+Arguments
+=========
+
+p    (input) double
+The probability of a single event. This probability should be less than or
+equal to 0.5.
+
+n    (input) int
+The number of trials.
+
+
+Return value
+============
+
+An integer drawn from a binomial distribution with parameters (p, n).
+
+============================================================================
+*/
+{
+    const double q = 1 - p;
+
+    if (n*p < 30.0) /* Algorithm BINV */ {
+        const double s = p/q;
+        const double a = (n+1)*s;
+        double r = exp(n*log(q)); /* pow() causes a crash on AIX */
+        int x = 0;
+        double u = uniform();
+        while (1) {
+            if (u < r) return x;
+            u -= r;
+            x++;
+            r *= (a/x)-s;
+        }
+    }
+    else /* Algorithm BTPE */ {
+        /* Step 0 */
+        const double fm = n*p + p;
+        const int m = (int) fm;
+        const double p1 = floor(2.195*sqrt(n*p*q) -4.6*q) + 0.5;
+        const double xm = m + 0.5;
+        const double xl = xm - p1;
+        const double xr = xm + p1;
+        const double c = 0.134 + 20.5/(15.3+m);
+        const double a = (fm-xl)/(fm-xl*p);
+        const double b = (xr-fm)/(xr*q);
+        const double lambdal = a*(1.0+0.5*a);
+        const double lambdar = b*(1.0+0.5*b);
+        const double p2 = p1*(1+2*c);
+        const double p3 = p2 + c/lambdal;
+        const double p4 = p3 + c/lambdar;
+        while (1) {
+            /* Step 1 */
+            int y;
+            int k;
+            double u = uniform();
+            double v = uniform();
+            u *= p4;
+            if (u <= p1) return (int)(xm-p1*v+u);
+            /* Step 2 */
+            if (u > p2) {
+                /* Step 3 */
+                if (u > p3) {
+                    /* Step 4 */
+                    y = (int)(xr-log(v)/lambdar);
+                    if (y > n) continue;
+                    /* Go to step 5 */
+                    v = v*(u-p3)*lambdar;
+                }
+                else {
+                    y = (int)(xl+log(v)/lambdal);
+                    if (y < 0) continue;
+                    /* Go to step 5 */
+                    v = v*(u-p2)*lambdal;
+                }
+            }
+            else {
+                const double x = xl + (u-p1)/c;
+                v = v*c + 1.0 - fabs(m-x+0.5)/p1;
+                if (v > 1) continue;
+                /* Go to step 5 */
+                y = (int)x;
+            }
+            /* Step 5 */
+            /* Step 5.0 */
+            k = abs(y-m);
+            if (k > 20 && k < 0.5*n*p*q-1.0) {
+                /* Step 5.2 */
+                double rho = (k/(n*p*q))*((k*(k/3.0 + 0.625)
+                              + 0.1666666666666)/(n*p*q)+0.5);
+                double t = -k*k/(2*n*p*q);
+                double A = log(v);
+                if (A < t-rho) return y;
+                else if (A > t+rho) continue;
+                else {
+                    /* Step 5.3 */
+                    double x1 = y+1;
+                    double f1 = m+1;
+                    double z = n+1-m;
+                    double w = n-y+1;
+                    double x2 = x1*x1;
+                    double f2 = f1*f1;
+                    double z2 = z*z;
+                    double w2 = w*w;
+                    if (A > xm * log(f1/x1) + (n-m+0.5)*log(z/w)
+                     + (y-m)*log(w*p/(x1*q))
+                     + (13860.-(462.-(132.-(99.-140./f2)/f2)/f2)/f2)/f1/166320.
+                     + (13860.-(462.-(132.-(99.-140./z2)/z2)/z2)/z2)/z/166320.
+                     + (13860.-(462.-(132.-(99.-140./x2)/x2)/x2)/x2)/x1/166320.
+                     + (13860.-(462.-(132.-(99.-140./w2)/w2)/w2)/w2)/w/166320.)
+                        continue;
+                    return y;
+                }
+            }
+            else {
+                /* Step 5.1 */
+                int i;
+                const double s = p/q;
+                const double aa = s*(n+1);
+                double f = 1.0;
+                for (i = m; i < y; f *= (aa/(++i)-s));
+                for (i = y; i < m; f /= (aa/(++i)-s));
+                if (v > f) continue;
+                return y;
+            }
+        }
+    }
+    return -1;
+}
+
+/* ************************************************************************ */
+
+static void
+randomassign(int nclusters, int nelements, int clusterid[])
+/*
+Purpose
+=======
+
+The randomassign routine performs an initial random clustering, needed for
+k-means or k-median clustering. Elements (genes or samples) are randomly
+assigned to clusters. The number of elements in each cluster is chosen
+randomly, making sure that each cluster will receive at least one element.
+
+
+Arguments
+=========
+
+nclusters    (input) int
+The number of clusters.
+
+nelements    (input) int
+The number of elements to be clustered (i.e., the number of genes or samples
+to be clustered).
+
+clusterid    (output) int[nelements]
+The cluster number to which an element was assigned.
+
+============================================================================
+*/
+{
+    int i, j;
+    int k = 0;
+    double p;
+    int n = nelements-nclusters;
+
+    /* Draw the number of elements in each cluster from a multinomial
+     * distribution, reserving ncluster elements to set independently
+     * in order to guarantee that none of the clusters are empty.
+     */
+    for (i = 0; i < nclusters-1; i++) {
+        p = 1.0/(nclusters-i);
+        j = binomial(n, p);
+        n -= j;
+        j += k+1; /* Assign at least one element to cluster i */
+        for ( ; k < j; k++) clusterid[k] = i;
+    }
+    /* Assign the remaining elements to the last cluster */
+    for ( ; k < nelements; k++) clusterid[k] = i;
+
+    /* Create a random permutation of the cluster assignments */
+    for (i = 0; i < nelements; i++) {
+        j = (int) (i + (nelements-i)*uniform());
+        k = clusterid[j];
+        clusterid[j] = clusterid[i];
+        clusterid[i] = k;
+    }
+}
+
+/* ********************************************************************* */
+
+static void
+getclustermeans(int nclusters, int nrows, int ncolumns,
+    double** data, int** mask, int clusterid[], double** cdata, int** cmask,
+    int transpose)
+/*
+Purpose
+=======
+
+The getclustermeans routine calculates the cluster centroids, given to which
+cluster each element belongs. The centroid is defined as the mean over all
+elements for each dimension.
+
+Arguments
+=========
+
+nclusters (input) int
+The number of clusters.
+
+nrows     (input) int
+The number of rows in the gene expression data matrix, equal to the number of
+genes.
+
+ncolumns  (input) int
+The number of columns in the gene expression data matrix, equal to the number
+of samples.
+
+data      (input) double[nrows][ncolumns]
+The array containing the gene expression data.
+
+mask      (input) int[nrows][ncolumns]
+This array shows which data values are missing. If mask[i][j] == 0, then
+data[i][j] is missing.
+
+clusterid (output) int[nrows] if transpose == 0
+                   int[ncolumns] otherwise
+The cluster number to which each element belongs. If transpose == 0, then the
+dimension of clusterid is equal to nrows (the number of genes). Otherwise, it
+is equal to ncolumns (the number of samples).
+
+cdata     (output) double[nclusters][ncolumns] if transpose == 0
+                   double[nrows][nclusters]    otherwise
+On exit of getclustermeans, this array contains the cluster centroids.
+
+cmask     (output) int[nclusters][ncolumns] if transpose == 0
+                   int[nrows][nclusters]    otherwise
+This array shows which data values of are missing for each centroid. If
+cmask[i][j] == 0, then cdata[i][j] is missing. A data value is missing for a
+centroid if all corresponding data values of the cluster members are missing.
+
+transpose (input) int
+If transpose == 0, clusters of rows (genes) are specified. Otherwise, clusters
+of columns (samples) are specified.
+
+========================================================================
+*/
+{
+    int i, j, k;
+
+    if (transpose == 0) {
+        for (i = 0; i < nclusters; i++) {
+            for (j = 0; j < ncolumns; j++) {
+                cmask[i][j] = 0;
+                cdata[i][j] = 0.;
+            }
+        }
+        for (k = 0; k < nrows; k++) {
+            i = clusterid[k];
+            for (j = 0; j < ncolumns; j++) {
+                if (mask[k][j] != 0) {
+                    cdata[i][j] += data[k][j];
+                    cmask[i][j]++;
+                }
+            }
+        }
+        for (i = 0; i < nclusters; i++) {
+            for (j = 0; j < ncolumns; j++) {
+                if (cmask[i][j]>0) {
+                    cdata[i][j] /= cmask[i][j];
+                    cmask[i][j] = 1;
+                }
+            }
+        }
+    }
+    else {
+        for (i = 0; i < nrows; i++) {
+            for (j = 0; j < nclusters; j++) {
+                cdata[i][j] = 0.;
+                cmask[i][j] = 0;
+            }
+        }
+        for (k = 0; k < ncolumns; k++) {
+            i = clusterid[k];
+            for (j = 0; j < nrows; j++) {
+                if (mask[j][k] != 0) {
+                    cdata[j][i] += data[j][k];
+                    cmask[j][i]++;
+                }
+            }
+        }
+        for (i = 0; i < nrows; i++) {
+            for (j = 0; j < nclusters; j++) {
+                if (cmask[i][j]>0) {
+                    cdata[i][j] /= cmask[i][j];
+                    cmask[i][j] = 1;
+                }
+            }
+        }
+    }
+}
+
+/* ********************************************************************* */
+
+static void
+getclustermedians(int nclusters, int nrows, int ncolumns,
+    double** data, int** mask, int clusterid[], double** cdata, int** cmask,
+    int transpose, double cache[])
+/*
+Purpose
+=======
+
+The getclustermedians routine calculates the cluster centroids, given to which
+cluster each element belongs. The centroid is defined as the median over all
+elements for each dimension.
+
+Arguments
+=========
+
+nclusters  (input) int
+The number of clusters.
+
+nrows      (input) int
+The number of rows in the gene expression data matrix, equal to the number of
+genes.
+
+ncolumns   (input) int
+The number of columns in the gene expression data matrix, equal to the number
+of samples.
+
+data       (input) double[nrows][ncolumns]
+The array containing the gene expression data.
+
+mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If mask[i][j] == 0, then
+data[i][j] is missing.
+
+clusterid  (output) int[nrows] if transpose == 0
+                                        int[ncolumns] otherwise
+The cluster number to which each element belongs. If transpose == 0, then the
+dimension of clusterid is equal to nrows (the number of genes). Otherwise, it
+is equal to ncolumns (the number of samples).
+
+cdata      (output) double[nclusters][ncolumns] if transpose == 0
+                    double[nrows][nclusters] otherwise
+On exit of getclustermedians, this array contains the cluster centroids.
+
+cmask      (output) int[nclusters][ncolumns] if transpose == 0
+                    int[nrows][nclusters] otherwise
+This array shows which data values of are missing for each centroid. If
+cmask[i][j] == 0, then cdata[i][j] is missing. A data value is missing for
+a centroid if all corresponding data values of the cluster members are missing.
+
+transpose  (input) int
+If transpose == 0, clusters of rows (genes) are specified. Otherwise, clusters
+of columns (samples) are specified.
+
+cache      (input) double[nrows] if transpose == 0
+                   double[ncolumns] otherwise
+This array should be allocated before calling getclustermedians; its contents
+on input is not relevant. This array is used as a temporary storage space when
+calculating the medians.
+
+========================================================================
+*/
+{
+    int i, j, k;
+
+    if (transpose == 0) {
+        for (i = 0; i < nclusters; i++) {
+            for (j = 0; j < ncolumns; j++) {
+                int count = 0;
+                for (k = 0; k < nrows; k++) {
+                    if (i == clusterid[k] && mask[k][j]) {
+                        cache[count] = data[k][j];
+                        count++;
+                    }
+                }
+                if (count>0) {
+                    cdata[i][j] = median(count, cache);
+                    cmask[i][j] = 1;
+                }
+                else {
+                    cdata[i][j] = 0.;
+                    cmask[i][j] = 0;
+                }
+            }
+        }
+    }
+    else {
+        for (i = 0; i < nclusters; i++) {
+            for (j = 0; j < nrows; j++) {
+                int count = 0;
+                for (k = 0; k < ncolumns; k++) {
+                    if (i == clusterid[k] && mask[j][k]) {
+                        cache[count] = data[j][k];
+                        count++;
+                    }
+                }
+                if (count>0) {
+                    cdata[j][i] = median(count, cache);
+                    cmask[j][i] = 1;
+                }
+                else {
+                    cdata[j][i] = 0.;
+                    cmask[j][i] = 0;
+                }
+            }
+        }
+    }
+}
+
+/* ********************************************************************* */
+
+int
+getclustercentroids(int nclusters, int nrows, int ncolumns,
+    double** data, int** mask, int clusterid[], double** cdata, int** cmask,
+    int transpose, char method)
+/*
+Purpose
+=======
+
+The getclustercentroids routine calculates the cluster centroids, given to
+which cluster each element belongs. Depending on the argument method, the
+centroid is defined as either the mean or the median for each dimension over
+all elements belonging to a cluster.
+
+Arguments
+=========
+
+nclusters  (input) int
+The number of clusters.
+
+nrows      (input) int
+The number of rows in the gene expression data matrix, equal to the number of
+genes.
+
+ncolumns   (input) int
+The number of columns in the gene expression data matrix, equal to the number
+of samples.
+
+data       (input) double[nrows][ncolumns]
+The array containing the gene expression data.
+
+mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If mask[i][j] == 0, then
+data[i][j] is missing.
+
+clusterid  (output) int[nrows] if transpose == 0
+                                        int[ncolumns] otherwise
+The cluster number to which each element belongs. If transpose == 0, then the
+dimension of clusterid is equal to nrows (the number of genes). Otherwise, it
+is equal to ncolumns (the number of samples).
+
+cdata      (output) double[nclusters][ncolumns] if transpose == 0
+                    double[nrows][nclusters] otherwise
+On exit of getclustercentroids, this array contains the cluster centroids.
+
+cmask      (output) int[nclusters][ncolumns] if transpose == 0
+                    int[nrows][nclusters] otherwise
+This array shows which data values of are missing for each centroid. If
+cmask[i][j] == 0, then cdata[i][j] is missing. A data value is missing for
+a centroid if all corresponding data values of the cluster members are missing.
+
+transpose  (input) int
+If transpose == 0, clusters of rows (genes) are specified. Otherwise, clusters
+of columns (samples) are specified.
+
+method     (input) char
+For method == 'a', the centroid is defined as the mean over all elements
+belonging to a cluster for each dimension.
+For method == 'm', the centroid is defined as the median over all elements
+belonging to a cluster for each dimension.
+
+Return value
+============
+
+The function returns an integer to indicate success or failure. If a
+memory error occurs, or if method is not 'm' or 'a', getclustercentroids
+returns 0. If successful, getclustercentroids returns 1.
+========================================================================
+*/
+{
+    switch(method) {
+        case 'm': {
+            const int nelements = (transpose == 0) ? nrows : ncolumns;
+            double* cache = malloc(nelements*sizeof(double));
+            if (!cache) return 0;
+            getclustermedians(nclusters, nrows, ncolumns, data, mask,
+                              clusterid, cdata, cmask, transpose, cache);
+            free(cache);
+            return 1;
+        }
+        case 'a': {
+            getclustermeans(nclusters, nrows, ncolumns, data, mask,
+                            clusterid, cdata, cmask, transpose);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+/* ********************************************************************* */
+
+void
+getclustermedoids(int nclusters, int nelements, double** distance,
+    int clusterid[], int centroids[], double errors[])
+/*
+Purpose
+=======
+
+The getclustermedoids routine calculates the cluster centroids, given to which
+cluster each element belongs. The centroid is defined as the element with the
+smallest sum of distances to the other elements.
+
+Arguments
+=========
+
+nclusters    (input) int
+The number of clusters.
+
+nelements    (input) int
+The total number of elements.
+
+distmatrix   (input) double array, ragged
+    (number of rows is nelements, number of columns is equal to the row number)
+The distance matrix. To save space, the distance matrix is given in the
+form of a ragged array. The distance matrix is symmetric and has zeros
+on the diagonal. See distancematrix for a description of the content.
+
+clusterid    (output) int[nelements]
+The cluster number to which each element belongs.
+
+centroid     (output) int[nclusters]
+The index of the element that functions as the centroid for each cluster.
+
+errors       (output) double[nclusters]
+The within-cluster sum of distances between the items and the cluster
+centroid.
+
+========================================================================
+*/
+{
+    int i, j, k;
+
+    for (j = 0; j < nclusters; j++) errors[j] = DBL_MAX;
+    for (i = 0; i < nelements; i++) {
+        double d = 0.0;
+        j = clusterid[i];
+        for (k = 0; k < nelements; k++) {
+            if (i == k || clusterid[k]!=j) continue;
+            d += (i < k ? distance[k][i] : distance[i][k]);
+            if (d > errors[j]) break;
+        }
+        if (d < errors[j]) {
+            errors[j] = d;
+            centroids[j] = i;
+        }
+    }
+}
+
+/* ********************************************************************* */
+
+static int
+kmeans(int nclusters, int nrows, int ncolumns, double** data, int** mask,
+    double weight[], int transpose, int npass, char dist,
+    double** cdata, int** cmask, int clusterid[], double* error,
+    int tclusterid[], int counts[], int mapping[])
+{
+    int i, j, k;
+    const int nelements = (transpose == 0) ? nrows : ncolumns;
+    const int ndata = (transpose == 0) ? ncolumns : nrows;
+    int ifound = 1;
+    int ipass = 0;
+    /* Set the metric function as indicated by dist */
+    double (*metric) (int, double**, double**, int**, int**,
+                      const double[], int, int, int) = setmetric(dist);
+
+    /* Save the clustering solution periodically and check if it reappears */
+    int* saved = malloc(nelements*sizeof(int));
+    if (saved == NULL) return -1;
+
+    *error = DBL_MAX;
+
+    do {
+        double total = DBL_MAX;
+        int counter = 0;
+        int period = 10;
+
+        /* Perform the EM algorithm.
+         * First, randomly assign elements to clusters. */
+        if (npass != 0) randomassign(nclusters, nelements, tclusterid);
+
+        for (i = 0; i < nclusters; i++) counts[i] = 0;
+        for (i = 0; i < nelements; i++) counts[tclusterid[i]]++;
+
+        /* Start the loop */
+        while (1) {
+            double previous = total;
+            total = 0.0;
+
+            if (counter % period == 0) {
+                /* Save the current cluster assignments */
+                for (i = 0; i < nelements; i++) saved[i] = tclusterid[i];
+                if (period < INT_MAX / 2) period *= 2;
+            }
+            counter++;
+
+            /* Find the center */
+            getclustermeans(nclusters, nrows, ncolumns, data, mask, tclusterid,
+                            cdata, cmask, transpose);
+
+            for (i = 0; i < nelements; i++) {
+                double distance;
+                /* Calculate the distances */
+                k = tclusterid[i];
+                if (counts[k] == 1) continue;
+                /* No reassignment if that would lead to an empty cluster */
+                /* Treat the present cluster as a special case */
+                distance = metric(ndata, data, cdata, mask, cmask, weight,
+                                  i, k, transpose);
+                for (j = 0; j < nclusters; j++) {
+                    double tdistance;
+                    if (j == k) continue;
+                    tdistance = metric(ndata, data, cdata, mask, cmask, weight,
+                                       i, j, transpose);
+                    if (tdistance < distance) {
+                        distance = tdistance;
+                        counts[tclusterid[i]]--;
+                        tclusterid[i] = j;
+                        counts[j]++;
+                    }
+                }
+                total += distance;
+            }
+            if (total >= previous) break;
+            /* total >= previous is FALSE on some machines even if total and
+             * previous are bitwise identical. */
+            for (i = 0; i < nelements; i++)
+                if (saved[i]!=tclusterid[i]) break;
+            if (i == nelements)
+                break; /* Identical solution found; break out of this loop */
+        }
+
+        if (npass <= 1) {
+            *error = total;
+            break;
+        }
+
+        for (i = 0; i < nclusters; i++) mapping[i] = -1;
+        for (i = 0; i < nelements; i++) {
+            j = tclusterid[i];
+            k = clusterid[i];
+            if (mapping[k] == -1) mapping[k] = j;
+            else if (mapping[k] != j) {
+                if (total < *error) {
+                    ifound = 1;
+                    *error = total;
+                    for (j = 0; j < nelements; j++)
+                        clusterid[j] = tclusterid[j];
+                }
+                break;
+            }
+        }
+        if (i == nelements) ifound++; /* break statement not encountered */
+    } while (++ipass < npass);
+
+    free(saved);
+    return ifound;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int
+kmedians(int nclusters, int nrows, int ncolumns, double** data, int** mask,
+    double weight[], int transpose, int npass, char dist,
+    double** cdata, int** cmask, int clusterid[], double* error,
+    int tclusterid[], int counts[], int mapping[], double cache[])
+{
+    int i, j, k;
+    const int nelements = (transpose == 0) ? nrows : ncolumns;
+    const int ndata = (transpose == 0) ? ncolumns : nrows;
+    int ifound = 1;
+    int ipass = 0;
+    int* saved;
+    /* Set the metric function as indicated by dist */
+    double (*metric) (int, double**, double**, int**, int**,
+                      const double[], int, int, int) = setmetric(dist);
+
+    /* Save the clustering solution periodically and check if it reappears */
+    saved = malloc(nelements*sizeof(int));
+    if (saved == NULL) return -1;
+
+    *error = DBL_MAX;
+
+    do {
+        double total = DBL_MAX;
+        int counter = 0;
+        int period = 10;
+
+        /* Perform the EM algorithm.
+         * First, randomly assign elements to clusters. */
+        if (npass != 0) randomassign(nclusters, nelements, tclusterid);
+
+        for (i = 0; i < nclusters; i++) counts[i] = 0;
+        for (i = 0; i < nelements; i++) counts[tclusterid[i]]++;
+
+        /* Start the loop */
+        while (1) {
+            double previous = total;
+            total = 0.0;
+
+            if (counter % period == 0) {
+                /* Save the current cluster assignments */
+                for (i = 0; i < nelements; i++) saved[i] = tclusterid[i];
+                if (period < INT_MAX / 2) period *= 2;
+            }
+            counter++;
+
+            /* Find the center */
+            getclustermedians(nclusters, nrows, ncolumns, data, mask,
+                              tclusterid, cdata, cmask, transpose, cache);
+
+            for (i = 0; i < nelements; i++) {
+                /* Calculate the distances */
+                double distance;
+                k = tclusterid[i];
+                if (counts[k] == 1) continue;
+                /* No reassignment if that would lead to an empty cluster */
+                /* Treat the present cluster as a special case */
+                distance = metric(ndata, data, cdata, mask, cmask, weight,
+                                  i, k, transpose);
+                for (j = 0; j < nclusters; j++) {
+                    double tdistance;
+                    if (j == k) continue;
+                    tdistance = metric(ndata, data, cdata, mask, cmask, weight,
+                                       i, j, transpose);
+                    if (tdistance < distance) {
+                        distance = tdistance;
+                        counts[tclusterid[i]]--;
+                        tclusterid[i] = j;
+                        counts[j]++;
+                    }
+                }
+                total += distance;
+            }
+            if (total >= previous) break;
+            /* total >= previous is FALSE on some machines even if total and
+             * previous are bitwise identical. */
+            for (i = 0; i < nelements; i++)
+                if (saved[i]!=tclusterid[i]) break;
+            if (i == nelements)
+                break; /* Identical solution found; break out of this loop */
+        }
+
+        if (npass <= 1) {
+            *error = total;
+            break;
+        }
+
+        for (i = 0; i < nclusters; i++) mapping[i] = -1;
+        for (i = 0; i < nelements; i++) {
+            j = tclusterid[i];
+            k = clusterid[i];
+            if (mapping[k] == -1) mapping[k] = j;
+            else if (mapping[k] != j) {
+                if (total < *error) {
+                    ifound = 1;
+                    *error = total;
+                    for (j = 0; j < nelements; j++)
+                        clusterid[j] = tclusterid[j];
+                }
+                break;
+            }
+        }
+        if (i == nelements) ifound++; /* break statement not encountered */
+    } while (++ipass < npass);
+
+    free(saved);
+    return ifound;
+}
+
+/* ********************************************************************* */
+
+void
+kcluster(int nclusters, int nrows, int ncolumns, double** data, int** mask,
+    double weight[], int transpose, int npass, char method, char dist,
+    int clusterid[], double* error, int* ifound)
+/*
+Purpose
+=======
+
+The kcluster routine performs k-means or k-median clustering on a given set of
+elements, using the specified distance measure. The number of clusters is given
+by the user. Multiple passes are being made to find the optimal clustering
+solution, each time starting from a different initial clustering.
+
+
+Arguments
+=========
+
+nclusters  (input) int
+The number of clusters to be found.
+
+data       (input) double[nrows][ncolumns]
+The array containing the data of the elements to be clustered (i.e., the gene
+expression data).
+
+mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If
+mask[i][j] == 0, then data[i][j] is missing.
+
+nrows      (input) int
+The number of rows in the data matrix, equal to the number of genes.
+
+ncolumns   (input) int
+The number of columns in the data matrix, equal to the number of samples.
+
+weight     (input) double[ncolumns] if transpose == 0,
+                   double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+transpose  (input) int
+If transpose == 0, the rows of the matrix are clustered. Otherwise, columns
+of the matrix are clustered.
+
+npass      (input) int
+The number of times clustering is performed. Clustering is performed npass
+times, each time starting from a different (random) initial assignment of
+genes to clusters. The clustering solution with the lowest within-cluster sum
+of distances is chosen.
+If npass == 0, then the clustering algorithm will be run once, where the
+initial assignment of elements to clusters is taken from the clusterid array.
+
+method     (input) char
+Defines whether the arithmetic mean (method == 'a') or the median
+(method == 'm') is used to calculate the cluster center.
+
+dist       (input) char
+Defines which distance measure is used, as given by the table:
+dist == 'e': Euclidean distance
+dist == 'b': City-block distance
+dist == 'c': correlation
+dist == 'a': absolute value of the correlation
+dist == 'u': uncentered correlation
+dist == 'x': absolute uncentered correlation
+dist == 's': Spearman's rank correlation
+dist == 'k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+clusterid  (output; input) int[nrows] if transpose == 0
+                           int[ncolumns] otherwise
+The cluster number to which a gene or microarray was assigned. If npass == 0,
+then on input clusterid contains the initial clustering assignment from which
+the clustering algorithm starts. On output, it contains the clustering solution
+that was found.
+
+error      (output) double*
+The sum of distances to the cluster center of each item in the optimal k-means
+clustering solution that was found.
+
+ifound     (output) int*
+The number of times the optimal clustering solution was
+found. The value of ifound is at least 1; its maximum value is npass. If the
+number of clusters is larger than the number of elements being clustered,
+*ifound is set to 0 as an error code. If a memory allocation error occurs,
+*ifound is set to -1.
+
+========================================================================
+*/
+{
+    const int nelements = (transpose == 0) ? nrows : ncolumns;
+    const int ndata = (transpose == 0) ? ncolumns : nrows;
+
+    int i;
+    int ok;
+    int* tclusterid;
+    int* mapping = NULL;
+    double** cdata;
+    int** cmask;
+    int* counts;
+
+    if (nelements < nclusters) {
+        *ifound = 0;
+        return;
+    }
+    /* More clusters asked for than elements available */
+
+    *ifound = -1;
+
+    /* This will contain the number of elements in each cluster, which is
+     * needed to check for empty clusters. */
+    counts = malloc(nclusters*sizeof(int));
+    if (!counts) return;
+
+    /* Find out if the user specified an initial clustering */
+    if (npass <= 1) tclusterid = clusterid;
+    else {
+        tclusterid = malloc(nelements*sizeof(int));
+        if (!tclusterid) {
+            free(counts);
+            return;
+        }
+        mapping = malloc(nclusters*sizeof(int));
+        if (!mapping) {
+            free(counts);
+            free(tclusterid);
+            return;
+        }
+        for (i = 0; i < nelements; i++) clusterid[i] = 0;
+    }
+
+    /* Allocate space to store the centroid data */
+    if (transpose == 0) ok = makedatamask(nclusters, ndata, &cdata, &cmask);
+    else ok = makedatamask(ndata, nclusters, &cdata, &cmask);
+    if (!ok) {
+        free(counts);
+        if (npass>1) {
+            free(tclusterid);
+            free(mapping);
+        }
+        return;
+    }
+
+    if (method == 'm') {
+        double* cache = malloc(nelements*sizeof(double));
+        if (cache) {
+            *ifound = kmedians(nclusters, nrows, ncolumns, data, mask, weight,
+                               transpose, npass, dist, cdata, cmask, clusterid,
+                               error, tclusterid, counts, mapping, cache);
+            free(cache);
+        }
+    }
+    else
+        *ifound = kmeans(nclusters, nrows, ncolumns, data, mask, weight,
+                         transpose, npass, dist, cdata, cmask, clusterid,
+                         error, tclusterid, counts, mapping);
+
+    /* Deallocate temporarily used space */
+    if (npass > 1) {
+        free(mapping);
+        free(tclusterid);
+    }
+
+    if (transpose == 0) freedatamask(nclusters, cdata, cmask);
+    else freedatamask(ndata, cdata, cmask);
+
+    free(counts);
+}
+
+/* *********************************************************************** */
+
+void
+kmedoids(int nclusters, int nelements, double** distmatrix, int npass,
+    int clusterid[], double* error, int* ifound)
+/*
+Purpose
+=======
+
+The kmedoids routine performs k-medoids clustering on a given set of elements,
+using the distance matrix and the number of clusters passed by the user.
+Multiple passes are being made to find the optimal clustering solution, each
+time starting from a different initial clustering.
+
+
+Arguments
+=========
+
+nclusters  (input) int
+The number of clusters to be found.
+
+nelements  (input) int
+The number of elements to be clustered.
+
+distmatrix (input) double array, ragged
+    (number of rows is nelements, number of columns is equal to the row number)
+The distance matrix. To save space, the distance matrix is given in the
+form of a ragged array. The distance matrix is symmetric and has zeros
+on the diagonal. See distancematrix for a description of the content.
+
+npass      (input) int
+The number of times clustering is performed. Clustering is performed npass
+times, each time starting from a different (random) initial assignment of genes
+to clusters. The clustering solution with the lowest within-cluster sum of
+distances is chosen.
+If npass == 0, then the clustering algorithm will be run once, where the
+initial assignment of elements to clusters is taken from the clusterid array.
+
+clusterid  (output; input) int[nelements]
+On input, if npass == 0, then clusterid contains the initial clustering
+assignment from which the clustering algorithm starts; all numbers in clusterid
+should be between zero and nelements-1 inclusive. If npass != 0, clusterid is
+ignored on input.
+On output, clusterid contains the clustering solution that was found: clusterid
+contains the number of the cluster to which each item was assigned. On output,
+the number of a cluster is defined as the item number of the centroid of the
+cluster.
+
+error      (output) double
+The sum of distances to the cluster center of each item in the optimal
+k-medoids clustering solution that was found.
+
+ifound     (output) int
+If kmedoids is successful: the number of times the optimal clustering solution
+was found. The value of ifound is at least 1; its maximum value is npass.
+If the user requested more clusters than elements available, ifound is set
+to 0. If kmedoids fails due to a memory allocation error, ifound is set to -1.
+
+========================================================================
+*/
+{
+    int i, j, icluster;
+    int* tclusterid;
+    int* saved;
+    int* centroids;
+    double* errors;
+    int ipass = 0;
+
+    if (nelements < nclusters) {
+        *ifound = 0;
+        return;
+    } /* More clusters asked for than elements available */
+
+    *ifound = -1;
+
+    /* Save the clustering solution periodically and check if it reappears */
+    saved = malloc(nelements*sizeof(int));
+    if (saved == NULL) return;
+
+    centroids = malloc(nclusters*sizeof(int));
+    if (!centroids) {
+        free(saved);
+        return;
+    }
+
+    errors = malloc(nclusters*sizeof(double));
+    if (!errors) {
+        free(saved);
+        free(centroids);
+        return;
+    }
+
+    /* Find out if the user specified an initial clustering */
+    if (npass <= 1) tclusterid = clusterid;
+    else {
+        tclusterid = malloc(nelements*sizeof(int));
+        if (!tclusterid) {
+            free(saved);
+            free(centroids);
+            free(errors);
+            return;
+        }
+        for (i = 0; i < nelements; i++) clusterid[i] = -1;
+    }
+
+    *error = DBL_MAX;
+    do /* Start the loop */ {
+        double total = DBL_MAX;
+        int counter = 0;
+        int period = 10;
+
+        if (npass != 0) randomassign(nclusters, nelements, tclusterid);
+        while (1) {
+            double previous = total;
+            total = 0.0;
+
+            if (counter % period == 0) {
+                /* Save the current cluster assignments */
+                for (i = 0; i < nelements; i++) saved[i] = tclusterid[i];
+                if (period < INT_MAX / 2) period *= 2;
+            }
+            counter++;
+
+            /* Find the center */
+            getclustermedoids(nclusters, nelements, distmatrix, tclusterid,
+                              centroids, errors);
+
+            for (i = 0; i < nelements; i++) {
+                /* Find the closest cluster */
+                double distance = DBL_MAX;
+                for (icluster = 0; icluster < nclusters; icluster++) {
+                    double tdistance;
+                    j = centroids[icluster];
+                    if (i == j) {
+                        distance = 0.0;
+                        tclusterid[i] = icluster;
+                        break;
+                    }
+                    tdistance = (i > j) ? distmatrix[i][j] : distmatrix[j][i];
+                    if (tdistance < distance) {
+                        distance = tdistance;
+                        tclusterid[i] = icluster;
+                    }
+                }
+                total += distance;
+            }
+            if (total >= previous) break;
+            /* total >= previous is FALSE on some machines even if total and
+             * previous are bitwise identical. */
+            for (i = 0; i < nelements; i++)
+                if (saved[i] != tclusterid[i]) break;
+            if (i == nelements)
+                break; /* Identical solution found; break out of this loop */
+        }
+
+        if (npass <= 1) {
+            *ifound = 1;
+            *error = total;
+            /* Replace by the centroid in each cluster. */
+            for (j = 0; j < nelements; j++) {
+                clusterid[j] = centroids[tclusterid[j]];
+            }
+            break;
+        }
+
+        for (i = 0; i < nelements; i++) {
+            if (clusterid[i]!=centroids[tclusterid[i]]) {
+                if (total < *error) {
+                    *ifound = 1;
+                    *error = total;
+                    /* Replace by the centroid in each cluster. */
+                    for (j = 0; j < nelements; j++) {
+                        clusterid[j] = centroids[tclusterid[j]];
+                    }
+                }
+                break;
+            }
+        }
+        if (i == nelements) (*ifound)++; /* break statement not encountered */
+    } while (++ipass < npass);
+
+    /* Deallocate temporarily used space */
+    if (npass > 1) free(tclusterid);
+
+    free(saved);
+    free(centroids);
+    free(errors);
+}
+
+/* ******************************************************************** */
+
+void
+distancematrix(int nrows, int ncolumns, double** data, int** mask,
+    double weights[], char dist, int transpose, double** matrix)
+/*
+Purpose
+=======
+
+The distancematrix routine calculates the distance matrix between genes or
+samples using their measured gene expression data. Several distance measures
+can be used. As the distance matrix is symmetric, with zeros on the diagonal,
+only the lower triangular half of the distance matrix is stored.
+Space for the distance matrix should be allocated before calling this routine.
+If the parameter transpose is set to a nonzero value, the distances between
+columns of the data matrix are calculated, otherwise distances between the rows
+are calculated.
+
+
+Arguments
+=========
+
+nrows      (input) int
+The number of rows in the gene expression data matrix (i.e., the number of
+genes)
+
+ncolumns   (input) int
+The number of columns in the gene expression data matrix (i.e., the number of
+samples)
+
+data       (input) double[nrows][ncolumns]
+The array containing the gene expression data.
+
+mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If mask[i][j] == 0, then
+data[i][j] is missing.
+
+weight     (input) double[ncolumns] if transpose == 0,
+                   double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+dist       (input) char
+Defines which distance measure is used, as given by the table:
+dist == 'e': Euclidean distance
+dist == 'b': City-block distance
+dist == 'c': correlation
+dist == 'a': absolute value of the correlation
+dist == 'u': uncentered correlation
+dist == 'x': absolute uncentered correlation
+dist == 's': Spearman's rank correlation
+dist == 'k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+transpose  (input) int
+If transpose is equal to zero, the distances between the rows is
+calculated. Otherwise, the distances between the columns is calculated.
+The former is needed when genes are being clustered; the latter is used
+when samples are being clustered.
+
+distmatrix (output) double**
+A ragged array, with the number of columns in each row is equal to the
+row index (so distmatrix[i] has i columns). Upon return, the values of
+the distance matrix are stored in this array.
+
+
+========================================================================
+*/
+{
+    /* First determine the size of the distance matrix */
+    const int n = (transpose == 0) ? nrows : ncolumns;
+    const int ndata = (transpose == 0) ? ncolumns : nrows;
+    int i, j;
+
+    /* Set the metric function as indicated by dist */
+    double (*metric) (int, double**, double**, int**, int**,
+                      const double[], int, int, int) = setmetric(dist);
+
+    /* Calculate the distances and save them in the ragged array */
+    for (i = 1; i < n; i++)
+        for (j = 0; j < i; j++)
+            matrix[i][j] = metric(ndata, data, data, mask, mask, weights,
+                                  i, j, transpose);
+}
+
+/* ******************************************************************** */
+
+double*
+calculate_weights(int nrows, int ncolumns, double** data, int** mask,
+    double weights[], int transpose, char dist, double cutoff, double exponent)
+
+/*
+Purpose
+=======
+
+This function calculates the weights using the weighting scheme proposed by
+Michael Eisen:
+w[i] = 1.0 / sum_{j where d[i][j]<cutoff} (1 - d[i][j]/cutoff)^exponent
+where the cutoff and the exponent are specified by the user.
+
+
+Arguments
+=========
+
+nrows     (input) int
+The number of rows in the gene expression data matrix, equal to the number of
+genes.
+
+ncolumns  (input) int
+The number of columns in the gene expression data matrix, equal to the number
+of samples.
+
+data      (input) double[nrows][ncolumns]
+The array containing the gene expression data.
+
+mask      (input) int[nrows][ncolumns]
+This array shows which data values are missing. If mask[i][j] == 0, then
+data[i][j] is missing.
+
+weight    (input) double[ncolumns] if transpose == 0,
+                  double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+transpose (input) int
+If transpose == 0, the weights of the rows of the data matrix are calculated.
+Otherwise, the weights of the columns of the data matrix are calculated.
+
+dist      (input) char
+Defines which distance measure is used, as given by the table:
+dist == 'e': Euclidean distance
+dist == 'b': City-block distance
+dist == 'c': correlation
+dist == 'a': absolute value of the correlation
+dist == 'u': uncentered correlation
+dist == 'x': absolute uncentered correlation
+dist == 's': Spearman's rank correlation
+dist == 'k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+cutoff    (input) double
+The cutoff to be used to calculate the weights.
+
+exponent  (input) double
+The exponent to be used to calculate the weights.
+
+
+Return value
+============
+
+The function returns a pointer to a newly allocated array containing the
+calculated weights for the rows (if transpose == 0) or columns (if
+transpose != 0). If not enough memory could be allocated to store the
+weights array, the function returns NULL.
+
+========================================================================
+*/
+{
+    int i, j;
+    const int ndata = (transpose == 0) ? ncolumns : nrows;
+    const int nelements = (transpose == 0) ? nrows : ncolumns;
+
+    /* Set the metric function as indicated by dist */
+    double (*metric) (int, double**, double**, int**, int**,
+                      const double[], int, int, int) = setmetric(dist);
+    double* result;
+
+    result = malloc(nelements*sizeof(double));
+    if (!result) return NULL;
+    memset(result, 0, nelements*sizeof(double));
+
+    for (i = 0; i < nelements; i++) {
+        result[i] += 1.0;
+        for (j = 0; j < i; j++) {
+            const double distance = metric(ndata, data, data, mask, mask,
+                                           weights, i, j, transpose);
+            if (distance < cutoff) {
+                const double dweight = exp(exponent*log(1-distance/cutoff));
+                /* pow() causes a crash on AIX */
+                result[i] += dweight;
+                result[j] += dweight;
+            }
+        }
+    }
+    for (i = 0; i < nelements; i++) result[i] = 1.0/result[i];
+    return result;
+}
+
+/* ******************************************************************** */
+
+int
+cuttree(int nelements, const Node* tree, int nclusters, int clusterid[])
+/*
+Purpose
+=======
+
+The cuttree routine takes the output of a hierarchical clustering routine, and
+divides the elements in the tree structure into clusters based on the
+hierarchical clustering result. The number of clusters is specified by the
+user.
+
+Arguments
+=========
+
+nelements    (input) int
+The number of elements that were clustered.
+
+tree         (input) Node[nelements-1]
+The clustering solution. Each node in the array describes one linking event,
+with tree[i].left and tree[i].right representing the elements that were joined.
+The original elements are numbered 0..nelements-1, nodes are numbered
+-1..-(nelements-1).
+
+nclusters    (input) int
+The number of clusters to be formed.
+
+clusterid    (output) int[nelements]
+The number of the cluster to which each element was assigned. Clusters are
+numbered 0..nclusters-1 in the left-to-right order in which they appear in the
+hierarchical clustering tree. Space for the clusterid array should be allocated
+before calling the cuttree routine.
+
+Return value
+============
+
+If no errors occur, cuttree returns 1.
+If a memory error occurs, cuttree returns 0.
+
+========================================================================
+*/
+{
+    int i = -nelements+1; /* top node */
+    int j;
+    int k = -1;
+    int previous = nelements;
+    const int n = nelements-nclusters; /* number of nodes to join */
+    int* parents;
+
+    if (nclusters == 1) {
+        for (i = 0; i < nelements; i++) clusterid[i] = 0;
+        return 1;
+    }
+    parents = malloc((nelements-1)*sizeof(int));
+    if (!parents) return 0;
+    while (1) {
+        if (i >= 0) {
+            clusterid[i] = k;
+            j = i;
+            i = previous;
+            previous = j;
+        }
+        else {
+            j = -i-1;
+            if (previous == tree[j].left) {
+                previous = i;
+                i = tree[j].right;
+                if (j >= n && (i >= 0 || -i-1 < n)) k++;
+            }
+            else if (previous == tree[j].right) {
+                previous = i;
+                i = parents[j];
+                if (i == nelements) break;
+            }
+            else {
+                parents[j] = previous;
+                previous = i;
+                i = tree[j].left;
+                if (j >= n && (i >= 0 || -i-1 < n)) k++;
+            }
+        }
+    }
+    free(parents);
+    return 1;
+}
+
+/* ******************************************************************** */
+
+static Node*
+pclcluster(int nrows, int ncolumns, double** data, int** mask, double weight[],
+    double** distmatrix, char dist, int transpose)
+
+/*
+
+Purpose
+=======
+
+The pclcluster routine performs clustering using pairwise centroid-linking on a
+given set of gene expression data, using the distance metric given by dist.
+
+Arguments
+=========
+
+nrows     (input) int
+The number of rows in the gene expression data matrix, equal to the number of
+genes.
+
+ncolumns  (input) int
+The number of columns in the gene expression data matrix, equal to the number
+of samples.
+
+data      (input) double[nrows][ncolumns]
+The array containing the gene expression data.
+
+mask      (input) int[nrows][ncolumns]
+This array shows which data values are missing. If
+mask[i][j] == 0, then data[i][j] is missing.
+
+weight    (input) double[ncolumns] if transpose == 0;
+                  double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+transpose (input) int
+If transpose == 0, the rows of the matrix are clustered. Otherwise, columns
+of the matrix are clustered.
+
+dist      (input) char
+Defines which distance measure is used, as given by the table:
+dist == 'e': Euclidean distance
+dist == 'b': City-block distance
+dist == 'c': correlation
+dist == 'a': absolute value of the correlation
+dist == 'u': uncentered correlation
+dist == 'x': absolute uncentered correlation
+dist == 's': Spearman's rank correlation
+dist == 'k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+distmatrix (input) double**
+The distance matrix. This matrix is precalculated by the calling routine
+treecluster. The pclcluster routine modifies the contents of distmatrix, but
+does not deallocate it.
+
+Return value
+============
+
+A pointer to a newly allocated array of Node structs, describing the
+hierarchical clustering solution consisting of nelements-1 nodes. Depending
+on whether genes (rows) or samples (columns) were clustered, nelements is
+equal to nrows or ncolumns. See src/cluster.h for a description of the Node
+structure.
+If a memory error occurs, pclcluster returns NULL.
+========================================================================
+*/
+{
+    int i, j;
+    const int nelements = (transpose == 0) ? nrows : ncolumns;
+    int inode;
+    const int ndata = transpose ? nrows : ncolumns;
+    const int nnodes = nelements - 1;
+    Node* result;
+    double** newdata;
+    int** newmask;
+    int* distid;
+
+    /* Set the metric function as indicated by dist */
+    double (*metric) (int, double**, double**, int**, int**,
+                      const double[], int, int, int) = setmetric(dist);
+
+    distid = malloc(nelements*sizeof(int));
+    if (!distid) return NULL;
+    result = malloc(nnodes*sizeof(Node));
+    if (!result) {
+        free(distid);
+        return NULL;
+    }
+    if (!makedatamask(nelements, ndata, &newdata, &newmask)) {
+        free(result);
+        free(distid);
+        return NULL;
+    }
+
+    for (i = 0; i < nelements; i++) distid[i] = i;
+    /* To remember which row/column in the distance matrix contains what */
+
+    /* Storage for node data */
+    if (transpose) {
+        for (i = 0; i < nelements; i++) {
+            for (j = 0; j < ndata; j++) {
+                newdata[i][j] = data[j][i];
+                newmask[i][j] = mask[j][i];
+            }
+        }
+        data = newdata;
+        mask = newmask;
+    }
+    else {
+        for (i = 0; i < nelements; i++) {
+            memcpy(newdata[i], data[i], ndata*sizeof(double));
+            memcpy(newmask[i], mask[i], ndata*sizeof(int));
+        }
+        data = newdata;
+        mask = newmask;
+    }
+
+    for (inode = 0; inode < nnodes; inode++) {
+        /* Find the pair with the shortest distance */
+        int is = 1;
+        int js = 0;
+        result[inode].distance = find_closest_pair(nelements-inode, distmatrix,
+                                                   &is, &js);
+        result[inode].left = distid[js];
+        result[inode].right = distid[is];
+
+        /* Make node js the new node */
+        for (i = 0; i < ndata; i++) {
+            data[js][i] = data[js][i]*mask[js][i] + data[is][i]*mask[is][i];
+            mask[js][i] += mask[is][i];
+            if (mask[js][i]) data[js][i] /= mask[js][i];
+        }
+        free(data[is]);
+        free(mask[is]);
+        data[is] = data[nnodes-inode];
+        mask[is] = mask[nnodes-inode];
+
+        /* Fix the distances */
+        distid[is] = distid[nnodes-inode];
+        for (i = 0; i < is; i++)
+            distmatrix[is][i] = distmatrix[nnodes-inode][i];
+        for (i = is + 1; i < nnodes-inode; i++)
+            distmatrix[i][is] = distmatrix[nnodes-inode][i];
+
+        distid[js] = -inode-1;
+        for (i = 0; i < js; i++)
+            distmatrix[js][i] = metric(ndata, data, data, mask, mask, weight,
+                                       js, i, 0);
+        for (i = js + 1; i < nnodes-inode; i++)
+            distmatrix[i][js] = metric(ndata, data, data, mask, mask, weight,
+                                       js, i, 0);
+    }
+
+    /* Free temporarily allocated space */
+    free(data[0]);
+    free(mask[0]);
+    free(data);
+    free(mask);
+    free(distid);
+
+    return result;
+}
+
+/* ******************************************************************** */
+
+static int
+nodecompare(const void* a, const void* b)
+/* Helper function for qsort. */
+{
+    const Node* node1 = (const Node*)a;
+    const Node* node2 = (const Node*)b;
+    const double term1 = node1->distance;
+    const double term2 = node2->distance;
+
+    if (term1 < term2) return -1;
+    if (term1 > term2) return +1;
+    return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static Node*
+pslcluster(int nrows, int ncolumns, double** data, int** mask,
+    double weight[], double** distmatrix, char dist, int transpose)
+
+/*
+
+Purpose
+=======
+
+The pslcluster routine performs single-linkage hierarchical clustering, using
+either the distance matrix directly, if available, or by calculating the
+distances from the data array. This implementation is based on the SLINK
+algorithm, described in:
+Sibson, R. (1973). SLINK: An optimally efficient algorithm for the single-link
+cluster method. The Computer Journal, 16(1): 30-34.
+The output of this algorithm is identical to conventional single-linkage
+hierarchical clustering, but is much more memory-efficient and faster. Hence,
+it can be applied to large data sets, for which the conventional single-
+linkage algorithm fails due to lack of memory.
+
+
+Arguments
+=========
+
+nrows      (input) int
+The number of rows in the gene expression data matrix, equal to the number of
+genes.
+
+ncolumns   (input) int
+The number of columns in the gene expression data matrix, equal to the number
+of samples.
+
+data       (input) double[nrows][ncolumns]
+The array containing the gene expression data.
+
+mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If
+mask[i][j] == 0, then data[i][j] is missing.
+
+weight     (input) double[ncolumns] if transpose == 0,
+                   double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+transpose  (input) int
+If transpose == 0, the rows of the matrix are clustered. Otherwise, columns
+of the matrix are clustered.
+
+dist       (input) char
+Defines which distance measure is used, as given by the table:
+dist == 'e': Euclidean distance
+dist == 'b': City-block distance
+dist == 'c': correlation
+dist == 'a': absolute value of the correlation
+dist == 'u': uncentered correlation
+dist == 'x': absolute uncentered correlation
+dist == 's': Spearman's rank correlation
+dist == 'k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+distmatrix (input) double**
+The distance matrix. If the distance matrix is passed by the calling routine
+treecluster, it is used by pslcluster to speed up the clustering calculation.
+The pslcluster routine does not modify the contents of distmatrix, and does
+not deallocate it. If distmatrix is NULL, the pairwise distances are calculated
+by the pslcluster routine from the gene expression data (the data and mask
+arrays) and stored in temporary arrays. If distmatrix is passed, the original
+gene expression data (specified by the data and mask arguments) are not needed
+and are therefore ignored.
+
+
+Return value
+============
+
+A pointer to a newly allocated array of Node structs, describing the
+hierarchical clustering solution consisting of nelements-1 nodes. Depending
+on whether genes (rows) or samples (columns) were clustered, nelements is
+equal to nrows or ncolumns. See src/cluster.h for a description of the Node
+structure.
+If a memory error occurs, pslcluster returns NULL.
+
+========================================================================
+*/
+{
+    int i, j, k;
+    const int nelements = transpose ? ncolumns : nrows;
+    const int nnodes = nelements - 1;
+    int* vector;
+    double* temp;
+    int* index;
+    Node* result;
+
+    temp = malloc(nnodes*sizeof(double));
+    if (!temp) return NULL;
+    index = malloc(nelements*sizeof(int));
+    if (!index) {
+        free(temp);
+        return NULL;
+    }
+    vector = malloc(nnodes*sizeof(int));
+    if (!vector) {
+        free(index);
+        free(temp);
+        return NULL;
+    }
+    result = malloc(nelements*sizeof(Node));
+    if (!result) {
+        free(vector);
+        free(index);
+        free(temp);
+        return NULL;
+    }
+
+    for (i = 0; i < nnodes; i++) vector[i] = i;
+
+    if (distmatrix) {
+        for (i = 0; i < nrows; i++) {
+            result[i].distance = DBL_MAX;
+            for (j = 0; j < i; j++) temp[j] = distmatrix[i][j];
+            for (j = 0; j < i; j++) {
+                k = vector[j];
+                if (result[j].distance >= temp[j]) {
+                    if (result[j].distance < temp[k])
+                        temp[k] = result[j].distance;
+                    result[j].distance = temp[j];
+                    vector[j] = i;
+                }
+                else if (temp[j] < temp[k]) temp[k] = temp[j];
+            }
+            for (j = 0; j < i; j++) {
+                if (result[j].distance >= result[vector[j]].distance)
+                    vector[j] = i;
+            }
+        }
+    }
+    else {
+        const int ndata = transpose ? nrows : ncolumns;
+        /* Set the metric function as indicated by dist */
+        double (*metric) (int, double**, double**, int**, int**,
+                          const double[], int, int, int) = setmetric(dist);
+
+        for (i = 0; i < nelements; i++) {
+            result[i].distance = DBL_MAX;
+            for (j = 0; j < i; j++) temp[j] =
+                metric(ndata, data, data, mask, mask, weight, i, j, transpose);
+            for (j = 0; j < i; j++) {
+                k = vector[j];
+                if (result[j].distance >= temp[j]) {
+                    if (result[j].distance < temp[k])
+                        temp[k] = result[j].distance;
+                    result[j].distance = temp[j];
+                    vector[j] = i;
+                }
+                else if (temp[j] < temp[k]) temp[k] = temp[j];
+            }
+            for (j = 0; j < i; j++)
+                if (result[j].distance >= result[vector[j]].distance)
+                    vector[j] = i;
+        }
+    }
+    free(temp);
+
+    for (i = 0; i < nnodes; i++) result[i].left = i;
+    qsort(result, nnodes, sizeof(Node), nodecompare);
+
+    for (i = 0; i < nelements; i++) index[i] = i;
+    for (i = 0; i < nnodes; i++) {
+        j = result[i].left;
+        k = vector[j];
+        result[i].left = index[j];
+        result[i].right = index[k];
+        index[k] = -i-1;
+    }
+    free(vector);
+    free(index);
+
+    result = realloc(result, nnodes*sizeof(Node));
+
+    return result;
+}
+/* ******************************************************************** */
+
+static Node*
+pmlcluster(int nelements, double** distmatrix)
+/*
+
+Purpose
+=======
+
+The pmlcluster routine performs clustering using pairwise maximum- (complete-)
+linking on the given distance matrix.
+
+Arguments
+=========
+
+nelements         (input) int
+The number of elements to be clustered.
+
+distmatrix (input) double**
+The distance matrix, with nelements rows, each row being filled up to the
+diagonal. The elements on the diagonal are not used, as they are assumed to be
+zero. The distance matrix will be modified by this routine.
+
+Return value
+============
+
+A pointer to a newly allocated array of Node structs, describing the
+hierarchical clustering solution consisting of nelements-1 nodes. Depending on
+whether genes (rows) or samples (columns) were clustered, nelements is equal
+to nrows or ncolumns. See src/cluster.h for a description of the Node
+structure.
+If a memory error occurs, pmlcluster returns NULL.
+========================================================================
+*/
+{
+    int j;
+    int n;
+    int* clusterid;
+    Node* result;
+
+    clusterid = malloc(nelements*sizeof(int));
+    if (!clusterid) return NULL;
+    result = malloc((nelements-1)*sizeof(Node));
+    if (!result) {
+        free(clusterid);
+        return NULL;
+    }
+
+    /* Setup a list specifying to which cluster a gene belongs */
+    for (j = 0; j < nelements; j++) clusterid[j] = j;
+
+    for (n = nelements; n > 1; n--) {
+        int is = 1;
+        int js = 0;
+
+        result[nelements-n].distance = find_closest_pair(n, distmatrix,
+                                                         &is, &js);
+
+        /* Fix the distances */
+        for (j = 0; j < js; j++)
+            distmatrix[js][j] = max(distmatrix[is][j], distmatrix[js][j]);
+        for (j = js+1; j < is; j++)
+            distmatrix[j][js] = max(distmatrix[is][j], distmatrix[j][js]);
+        for (j = is+1; j < n; j++)
+            distmatrix[j][js] = max(distmatrix[j][is], distmatrix[j][js]);
+
+        for (j = 0; j < is; j++) distmatrix[is][j] = distmatrix[n-1][j];
+        for (j = is+1; j < n-1; j++) distmatrix[j][is] = distmatrix[n-1][j];
+
+        /* Update clusterids */
+        result[nelements-n].left = clusterid[is];
+        result[nelements-n].right = clusterid[js];
+        clusterid[js] = n-nelements-1;
+        clusterid[is] = clusterid[n-1];
+    }
+    free(clusterid);
+
+    return result;
+}
+
+/* ******************************************************************* */
+
+static Node*
+palcluster(int nelements, double** distmatrix)
+/*
+Purpose
+=======
+
+The palcluster routine performs clustering using pairwise average
+linking on the given distance matrix.
+
+Arguments
+=========
+
+nelements  (input) int
+The number of elements to be clustered.
+
+distmatrix (input) double**
+The distance matrix, with nelements rows, each row being filled up to the
+diagonal. The elements on the diagonal are not used, as they are assumed to be
+zero. The distance matrix will be modified by this routine.
+
+Return value
+============
+
+A pointer to a newly allocated array of Node structs, describing the
+hierarchical clustering solution consisting of nelements-1 nodes. Depending on
+whether genes (rows) or samples (columns) were clustered, nelements is equal
+to nrows or ncolumns. See src/cluster.h for a description of the Node
+structure.
+If a memory error occurs, palcluster returns NULL.
+========================================================================
+*/
+{
+    int j;
+    int n;
+    int* clusterid;
+    int* number;
+    Node* result;
+
+    clusterid = malloc(nelements*sizeof(int));
+    if (!clusterid) return NULL;
+    number = malloc(nelements*sizeof(int));
+    if (!number) {
+        free(clusterid);
+        return NULL;
+    }
+    result = malloc((nelements-1)*sizeof(Node));
+    if (!result) {
+        free(clusterid);
+        free(number);
+        return NULL;
+    }
+
+    /* Setup a list specifying to which cluster a gene belongs, and keep track
+     * of the number of elements in each cluster (needed to calculate the
+     * average). */
+    for (j = 0; j < nelements; j++) {
+        number[j] = 1;
+        clusterid[j] = j;
+    }
+
+    for (n = nelements; n > 1; n--) {
+        int sum;
+        int is = 1;
+        int js = 0;
+        result[nelements-n].distance = find_closest_pair(n, distmatrix,
+                                                         &is, &js);
+
+        /* Save result */
+        result[nelements-n].left = clusterid[is];
+        result[nelements-n].right = clusterid[js];
+
+        /* Fix the distances */
+        sum = number[is] + number[js];
+        for (j = 0; j < js; j++) {
+            distmatrix[js][j] = distmatrix[is][j]*number[is]
+                              + distmatrix[js][j]*number[js];
+            distmatrix[js][j] /= sum;
+        }
+        for (j = js+1; j < is; j++) {
+            distmatrix[j][js] = distmatrix[is][j]*number[is]
+                              + distmatrix[j][js]*number[js];
+            distmatrix[j][js] /= sum;
+        }
+        for (j = is+1; j < n; j++) {
+            distmatrix[j][js] = distmatrix[j][is]*number[is]
+                              + distmatrix[j][js]*number[js];
+            distmatrix[j][js] /= sum;
+        }
+
+        for (j = 0; j < is; j++) distmatrix[is][j] = distmatrix[n-1][j];
+        for (j = is+1; j < n-1; j++) distmatrix[j][is] = distmatrix[n-1][j];
+
+        /* Update number of elements in the clusters */
+        number[js] = sum;
+        number[is] = number[n-1];
+
+        /* Update clusterids */
+        clusterid[js] = n-nelements-1;
+        clusterid[is] = clusterid[n-1];
+    }
+    free(clusterid);
+    free(number);
+
+    return result;
+}
+
+/* ******************************************************************* */
+
+Node*
+treecluster(int nrows, int ncolumns, double** data, int** mask,
+    double weight[], int transpose, char dist, char method,
+    double** distmatrix)
+/*
+Purpose
+=======
+
+The treecluster routine performs hierarchical clustering using pairwise
+single-, maximum-, centroid-, or average-linkage, as defined by method, on a
+given set of gene expression data, using the distance metric given by dist.
+If successful, the function returns a pointer to a newly allocated Tree struct
+containing the hierarchical clustering solution, and NULL if a memory error
+occurs. The pointer should be freed by the calling routine to prevent memory
+leaks.
+
+Arguments
+=========
+
+nrows      (input) int
+The number of rows in the data matrix, equal to the number of genes.
+
+ncolumns   (input) int
+The number of columns in the data matrix, equal to the number of samples.
+
+data       (input) double[nrows][ncolumns]
+The array containing the data of the vectors to be clustered.
+
+mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If mask[i][j] == 0, then
+data[i][j] is missing.
+
+weight     (input) double[ncolumns] if transpose == 0,
+                   double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+transpose  (input) int
+If transpose == 0, the rows of the matrix are clustered. Otherwise, columns
+of the matrix are clustered.
+
+dist       (input) char
+Defines which distance measure is used, as given by the table:
+dist == 'e': Euclidean distance
+dist == 'b': City-block distance
+dist == 'c': correlation
+dist == 'a': absolute value of the correlation
+dist == 'u': uncentered correlation
+dist == 'x': absolute uncentered correlation
+dist == 's': Spearman's rank correlation
+dist == 'k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+method     (input) char
+Defines which hierarchical clustering method is used:
+method == 's': pairwise single-linkage clustering
+method == 'm': pairwise maximum- (or complete-) linkage clustering
+method == 'a': pairwise average-linkage clustering
+method == 'c': pairwise centroid-linkage clustering
+For the first three, either the distance matrix or the gene expression data is
+sufficient to perform the clustering algorithm. For pairwise centroid-linkage
+clustering, however, the gene expression data are always needed, even if the
+distance matrix itself is available.
+
+distmatrix (input) double**
+The distance matrix. If the distance matrix is zero initially, the distance
+matrix will be allocated and calculated from the data by treecluster, and
+deallocated before treecluster returns. If the distance matrix is passed by the
+calling routine, treecluster will modify the contents of the distance matrix as
+part of the clustering algorithm, but will not deallocate it. The calling
+routine should deallocate the distance matrix after the return from
+treecluster.
+
+Return value
+============
+
+A pointer to a newly allocated array of Node structs, describing the
+hierarchical clustering solution consisting of nelements-1 nodes. Depending on
+whether genes (rows) or samples (columns) were clustered, nelements is equal
+to nrows or ncolumns. See src/cluster.h for a description of the Node
+structure.
+If a memory error occurs, treecluster returns NULL.
+
+========================================================================
+*/
+{
+    Node* result = NULL;
+    const int nelements = (transpose == 0) ? nrows : ncolumns;
+    const int ldistmatrix = (distmatrix == NULL && method != 's') ? 1 : 0;
+
+    if (nelements < 2) return NULL;
+
+    /* Calculate the distance matrix if the user didn't give it */
+    if (ldistmatrix) {
+        /* Set up the ragged array */
+        int i;
+        distmatrix = malloc(nelements*sizeof(double*));
+        if (distmatrix == NULL) return NULL; /* Not enough memory available */
+        distmatrix[0] = NULL;
+        for (i = 1; i < nelements; i++) {
+            distmatrix[i] = malloc(i*sizeof(double));
+            if (distmatrix[i] == NULL) /* Not enough memory available */ {
+                while (--i > 0) free(distmatrix[i]);
+                free(distmatrix);
+                return NULL;
+            }
+        }
+        distancematrix(nrows, ncolumns, data, mask, weight, dist, transpose,
+                       distmatrix);
+    }
+
+    switch(method) {
+        case 's':
+            result = pslcluster(nrows, ncolumns, data, mask, weight,
+                                distmatrix, dist, transpose);
+            break;
+        case 'm':
+            result = pmlcluster(nelements, distmatrix);
+            break;
+        case 'a':
+            result = palcluster(nelements, distmatrix);
+            break;
+        case 'c':
+            result = pclcluster(nrows, ncolumns, data, mask, weight,
+                                distmatrix, dist, transpose);
+            break;
+    }
+
+    /* Deallocate space for distance matrix if allocated by treecluster */
+    if (ldistmatrix) {
+        int i;
+        for (i = 1; i < nelements; i++) free(distmatrix[i]);
+        free(distmatrix);
+    }
+
+    return result;
+}
+
+/* ******************************************************************* */
+
+int
+sorttree(const int nnodes, Node* tree, const double order[], int indices[])
+/*
+Purpose
+=======
+
+The sorttree routine sorts the items in a hierarchical clustering solution
+based on their order values, while remaining consistent with the hierchical
+clustering solution.
+
+Arguments
+=========
+
+nnodes  (input) int
+The number of nodes in the hierarchical clustering tree.
+
+tree    (input) Node[nnodes]
+The hierarchical clustering tree describing the clustering solution.
+
+order   (input) double[nnodes+1]
+The preferred order of the items.
+
+indices (output) int*
+The indices of each item after sorting, with item i appearing at indices[i]
+after sorting.
+
+Return value
+============
+
+If no errors occur, sorttree returns 1.
+If a memory error occurs, sorttree returns 0.
+
+========================================================================
+*/
+
+{
+    int i;
+    int index;
+    int i1, i2;
+    double order1, order2;
+    int counts1, counts2;
+    int* nodecounts;
+
+    nodecounts = malloc(nnodes*sizeof(int));
+    if (!nodecounts) return 0;
+    if (order) {
+        double* nodeorder = malloc(nnodes*sizeof(double));
+        if (!nodeorder) {
+            free(nodecounts);
+            return 0;
+        }
+        for (i = 0; i < nnodes; i++) {
+            i1 = tree[i].left;
+            i2 = tree[i].right;
+            /* i1 and i2 are the elements that are to be joined */
+            if (i1 < 0) {
+                index = -i1-1;
+                order1 = nodeorder[index];
+                counts1 = nodecounts[index];
+            }
+            else {
+                order1 = order[i1];
+                counts1 = 1;
+            }
+            if (i2 < 0) {
+                index = -i2-1;
+                order2 = nodeorder[index];
+                counts2 = nodecounts[index];
+            }
+            else {
+                order2 = order[i2];
+                counts2 = 1;
+            }
+            if (order1 > order2) {
+                tree[i].left = i2;
+                tree[i].right = i1;
+            }
+            nodecounts[i] = counts1 + counts2;
+            nodeorder[i] = (counts1*order1+counts2*order2) / (counts1+counts2);
+        }
+        free(nodeorder);
+    }
+    else {
+        for (i = 0; i < nnodes; i++) {
+            i1 = tree[i].left;
+            i2 = tree[i].right;
+            /* i1 and i2 are the elements that are to be joined */
+            counts1 = (i1 < 0) ? nodecounts[-i1-1] : 1;
+            counts2 = (i2 < 0) ? nodecounts[-i2-1] : 1;
+            nodecounts[i] = counts1 + counts2;
+        }
+    }
+    i--;
+    nodecounts[i] = 0;
+    for ( ; i >= 0; i--) {
+        i1 = tree[i].left;
+        i2 = tree[i].right;
+        counts1 = (i1<0) ? nodecounts[-i1-1] : 1;
+        index = nodecounts[i];
+        if (i1 >= 0) indices[index] = i1;
+        else nodecounts[-i1-1] = index;
+        index += counts1;
+        if (i2 >= 0) indices[index] = i2;
+        else nodecounts[-i2-1] = index;
+    }
+    free(nodecounts);
+    return 1;
+}
+
+/* ******************************************************************* */
+
+static void
+somworker(int nrows, int ncolumns, double** data, int** mask,
+    const double weights[], int transpose, int nxgrid, int nygrid,
+    double inittau, double*** celldata, int niter, char dist)
+
+{
+    const int nelements = (transpose == 0) ? nrows : ncolumns;
+    const int ndata = (transpose == 0) ? ncolumns : nrows;
+    int i, j;
+    int** dummymask;
+    int ix, iy;
+    int* index;
+    int iter;
+    /* Maximum radius in which nodes are adjusted */
+    double maxradius = sqrt(nxgrid*nxgrid+nygrid*nygrid);
+    double* stddata = calloc(nelements, sizeof(double));
+
+    /* Set the metric function as indicated by dist */
+    double (*metric) (int, double**, double**, int**, int**,
+                      const double[], int, int, int) = setmetric(dist);
+
+    /* Calculate the standard deviation for each row or column */
+    if (transpose == 0) {
+        for (i = 0; i < nelements; i++) {
+            int n = 0;
+            for (j = 0; j < ndata; j++) {
+                if (mask[i][j]) {
+                    double term = data[i][j];
+                    term = term * term;
+                    stddata[i] += term;
+                    n++;
+                }
+            }
+            if (stddata[i] > 0) stddata[i] = sqrt(stddata[i]/n);
+            else stddata[i] = 1;
+        }
+    }
+    else {
+        for (i = 0; i < nelements; i++) {
+            int n = 0;
+            for (j = 0; j < ndata; j++) {
+                if (mask[j][i]) {
+                    double term = data[j][i];
+                    term = term * term;
+                    stddata[i] += term;
+                    n++;
+                }
+            }
+            if (stddata[i] > 0) stddata[i] = sqrt(stddata[i]/n);
+            else stddata[i] = 1;
+        }
+    }
+
+    if (transpose == 0) {
+        dummymask = malloc(nygrid*sizeof(int*));
+        for (i = 0; i < nygrid; i++) {
+            dummymask[i] = malloc(ndata*sizeof(int));
+            for (j = 0; j < ndata; j++) dummymask[i][j] = 1;
+        }
+    }
+    else {
+        dummymask = malloc(ndata*sizeof(int*));
+        for (i = 0; i < ndata; i++) {
+            dummymask[i] = malloc(sizeof(int));
+            dummymask[i][0] = 1;
+        }
+    }
+
+    /* Randomly initialize the nodes */
+    for (ix = 0; ix < nxgrid; ix++) {
+        for (iy = 0; iy < nygrid; iy++) {
+            double sum = 0.;
+            for (i = 0; i < ndata; i++) {
+                double term = -1.0 + 2.0*uniform();
+                celldata[ix][iy][i] = term;
+                sum += term * term;
+            }
+            sum = sqrt(sum/ndata);
+            for (i = 0; i < ndata; i++) celldata[ix][iy][i] /= sum;
+        }
+    }
+
+    /* Randomize the order in which genes or arrays will be used */
+    index = malloc(nelements*sizeof(int));
+    for (i = 0; i < nelements; i++) index[i] = i;
+    for (i = 0; i < nelements; i++) {
+        j = (int) (i + (nelements-i)*uniform());
+        ix = index[j];
+        index[j] = index[i];
+        index[i] = ix;
+    }
+
+    /* Start the iteration */
+    for (iter = 0; iter < niter; iter++) {
+        int ixbest = 0;
+        int iybest = 0;
+        int iobject = iter % nelements;
+        iobject = index[iobject];
+        if (transpose == 0) {
+            double closest = metric(ndata, data, celldata[ixbest], mask,
+                                    dummymask, weights, iobject, iybest,
+                                    transpose);
+            double radius = maxradius * (1. - ((double)iter)/((double)niter));
+            double tau = inittau * (1. - ((double)iter)/((double)niter));
+
+            for (ix = 0; ix < nxgrid; ix++) {
+                for (iy = 0; iy < nygrid; iy++) {
+                    double distance = metric(ndata, data, celldata[ix], mask,
+                                             dummymask, weights, iobject, iy,
+                                             transpose);
+                    if (distance < closest) {
+                        ixbest = ix;
+                        iybest = iy;
+                        closest = distance;
+                    }
+                }
+            }
+            for (ix = 0; ix < nxgrid; ix++) {
+                for (iy = 0; iy < nygrid; iy++) {
+                    if (sqrt((ix-ixbest)*(ix-ixbest)+(iy-iybest)*(iy-iybest)) <
+                        radius) {
+                        double sum = 0.;
+                        for (i = 0; i < ndata; i++) {
+                            if (mask[iobject][i] == 0) continue;
+                            celldata[ix][iy][i] +=
+                                tau * (data[iobject][i]/stddata[iobject]
+                                      -celldata[ix][iy][i]);
+                        }
+                        for (i = 0; i < ndata; i++) {
+                            double term = celldata[ix][iy][i];
+                            term = term * term;
+                            sum += term;
+                        }
+                        if (sum>0) {
+                            sum = sqrt(sum/ndata);
+                            for (i = 0; i < ndata; i++)
+                                celldata[ix][iy][i] /= sum;
+                        }
+                    }
+                }
+            }
+        }
+        else {
+            double closest;
+            double** celldatavector = malloc(ndata*sizeof(double*));
+            double radius = maxradius * (1. - ((double)iter)/((double)niter));
+            double tau = inittau * (1. - ((double)iter)/((double)niter));
+
+            for (i = 0; i < ndata; i++)
+                celldatavector[i] = &(celldata[ixbest][iybest][i]);
+            closest = metric(ndata, data, celldatavector, mask, dummymask,
+                             weights, iobject, 0, transpose);
+            for (ix = 0; ix < nxgrid; ix++) {
+                for (iy = 0; iy < nygrid; iy++) {
+                    double distance;
+                    for (i = 0; i < ndata; i++)
+                        celldatavector[i] = &(celldata[ixbest][iybest][i]);
+                    distance = metric(ndata, data, celldatavector, mask,
+                                      dummymask, weights, iobject, 0,
+                                      transpose);
+                    if (distance < closest) {
+                        ixbest = ix;
+                        iybest = iy;
+                        closest = distance;
+                    }
+                }
+            }
+            free(celldatavector);
+            for (ix = 0; ix < nxgrid; ix++) {
+                for (iy = 0; iy < nygrid; iy++) {
+                    if (sqrt((ix-ixbest)*(ix-ixbest)+(iy-iybest)*(iy-iybest)) <
+                        radius) {
+                        double sum = 0.;
+                        for (i = 0; i < ndata; i++) {
+                            if (mask[i][iobject] == 0) continue;
+                            celldata[ix][iy][i] +=
+                                tau * (data[i][iobject]/stddata[iobject]
+                                      -celldata[ix][iy][i]);
+                        }
+                        for (i = 0; i < ndata; i++) {
+                            double term = celldata[ix][iy][i];
+                            term = term * term;
+                            sum += term;
+                        }
+                        if (sum>0) {
+                            sum = sqrt(sum/ndata);
+                            for (i = 0; i < ndata; i++)
+                                celldata[ix][iy][i] /= sum;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    if (transpose == 0)
+        for (i = 0; i < nygrid; i++) free(dummymask[i]);
+    else
+        for (i = 0; i < ndata; i++) free(dummymask[i]);
+    free(dummymask);
+    free(stddata);
+    free(index);
+}
+
+/* ******************************************************************* */
+
+static void
+somassign(int nrows, int ncolumns, double** data, int** mask,
+    const double weights[], int transpose, int nxgrid, int nygrid,
+    double*** celldata, char dist, int clusterid[][2])
+/* Collect clusterids */
+{
+    const int ndata = (transpose == 0) ? ncolumns : nrows;
+    int i, j;
+
+    /* Set the metric function as indicated by dist */
+    double (*metric) (int, double**, double**, int**, int**,
+                      const double[], int, int, int) = setmetric(dist);
+
+    if (transpose == 0) {
+        int** dummymask = malloc(nygrid*sizeof(int*));
+        for (i = 0; i < nygrid; i++) {
+            dummymask[i] = malloc(ncolumns*sizeof(int));
+            for (j = 0; j < ncolumns; j++) dummymask[i][j] = 1;
+        }
+        for (i = 0; i < nrows; i++) {
+            int ixbest = 0;
+            int iybest = 0;
+            double closest = metric(ndata, data, celldata[ixbest], mask,
+                                    dummymask, weights, i, iybest, transpose);
+            int ix, iy;
+            for (ix = 0; ix < nxgrid; ix++) {
+                for (iy = 0; iy < nygrid; iy++) {
+                    double distance = metric(ndata, data, celldata[ix], mask,
+                                             dummymask, weights, i, iy,
+                                             transpose);
+                    if (distance < closest) {
+                        ixbest = ix;
+                        iybest = iy;
+                        closest = distance;
+                    }
+                }
+            }
+            clusterid[i][0] = ixbest;
+            clusterid[i][1] = iybest;
+        }
+        for (i = 0; i < nygrid; i++) free(dummymask[i]);
+        free(dummymask);
+    }
+    else {
+        double** celldatavector = malloc(ndata*sizeof(double*));
+        int** dummymask = malloc(nrows*sizeof(int*));
+        int ixbest = 0;
+        int iybest = 0;
+        for (i = 0; i < nrows; i++) {
+            dummymask[i] = malloc(sizeof(int));
+            dummymask[i][0] = 1;
+        }
+        for (i = 0; i < ncolumns; i++) {
+            double closest;
+            int ix, iy;
+            for (j = 0; j < ndata; j++)
+                celldatavector[j] = &(celldata[ixbest][iybest][j]);
+            closest = metric(ndata, data, celldatavector, mask, dummymask,
+                             weights, i, 0, transpose);
+            for (ix = 0; ix < nxgrid; ix++) {
+                for (iy = 0; iy < nygrid; iy++) {
+                    double distance;
+                    for (j = 0; j < ndata; j++)
+                        celldatavector[j] = &(celldata[ix][iy][j]);
+                    distance = metric(ndata, data, celldatavector, mask,
+                                      dummymask, weights, i, 0, transpose);
+                    if (distance < closest) {
+                        ixbest = ix;
+                        iybest = iy;
+                        closest = distance;
+                    }
+                }
+            }
+            clusterid[i][0] = ixbest;
+            clusterid[i][1] = iybest;
+        }
+        free(celldatavector);
+        for (i = 0; i < nrows; i++) free(dummymask[i]);
+        free(dummymask);
+    }
+}
+
+/* ******************************************************************* */
+
+void
+somcluster(int nrows, int ncolumns, double** data, int** mask,
+    const double weight[], int transpose, int nxgrid, int nygrid,
+    double inittau, int niter, char dist, double*** celldata,
+    int clusterid[][2])
+/*
+
+Purpose
+=======
+
+The somcluster routine implements a self-organizing map (Kohonen) on a
+rectangular grid, using a given set of vectors. The distance measure to be
+used to find the similarity between genes and nodes is given by dist.
+
+Arguments
+=========
+
+nrows     (input) int
+The number of rows in the data matrix, equal to the number of genes.
+
+ncolumns  (input) int
+The number of columns in the data matrix, equal to the number of samples.
+
+data      (input) double[nrows][ncolumns]
+The array containing the gene expression data.
+
+mask      (input) int[nrows][ncolumns]
+This array shows which data values are missing. If
+mask[i][j] == 0, then data[i][j] is missing.
+
+weight    (input) double[ncolumns] if transpose == 0;
+                  double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+transpose (input) int
+If transpose == 0, the rows (genes) of the matrix are clustered. Otherwise,
+columns (samples) of the matrix are clustered.
+
+nxgrid    (input) int
+The number of grid cells horizontally in the rectangular topology of clusters.
+
+nygrid    (input) int
+The number of grid cells horizontally in the rectangular topology of clusters.
+
+inittau   (input) double
+The initial value of tau, representing the neighborhood function.
+
+niter     (input) int
+The number of iterations to be performed.
+
+dist      (input) char
+Defines which distance measure is used, as given by the table:
+dist == 'e': Euclidean distance
+dist == 'b': City-block distance
+dist == 'c': correlation
+dist == 'a': absolute value of the correlation
+dist == 'u': uncentered correlation
+dist == 'x': absolute uncentered correlation
+dist == 's': Spearman's rank correlation
+dist == 'k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+celldata  (output) double[nxgrid][nygrid][ncolumns] if transpose == 0;
+                   double[nxgrid][nygrid][nrows]    otherwise
+The gene expression data for each node (cell) in the 2D grid. This can be
+interpreted as the centroid for the cluster corresponding to that cell. If
+celldata is NULL, then the centroids are not returned. If celldata is not
+NULL, enough space should be allocated to store the centroid data before
+calling somcluster.
+
+clusterid (output) int[nrows][2]        if transpose == 0;
+                   int[ncolumns][2]     otherwise
+For each item (gene or microarray) that is clustered, the coordinates of the
+cell in the 2D grid to which the item was assigned. If clusterid is NULL, the
+cluster assignments are not returned. If clusterid is not NULL, enough memory
+should be allocated to store the clustering information before calling
+somcluster.
+
+========================================================================
+*/
+{
+    const int nobjects = (transpose == 0) ? nrows : ncolumns;
+    const int ndata = (transpose == 0) ? ncolumns : nrows;
+    int i, j;
+    const int lcelldata = (celldata == NULL) ? 0 : 1;
+
+    if (nobjects < 2) return;
+
+    if (lcelldata == 0) {
+        celldata = malloc(nxgrid*nygrid*ndata*sizeof(double**));
+        for (i = 0; i < nxgrid; i++) {
+            celldata[i] = malloc(nygrid*ndata*sizeof(double*));
+            for (j = 0; j < nygrid; j++)
+                celldata[i][j] = malloc(ndata*sizeof(double));
+        }
+    }
+
+    somworker(nrows, ncolumns, data, mask, weight, transpose, nxgrid, nygrid,
+        inittau, celldata, niter, dist);
+    if (clusterid)
+        somassign(nrows, ncolumns, data, mask, weight, transpose,
+            nxgrid, nygrid, celldata, dist, clusterid);
+    if (lcelldata == 0) {
+        for (i = 0; i < nxgrid; i++)
+            for (j = 0; j < nygrid; j++)
+                free(celldata[i][j]);
+        for (i = 0; i < nxgrid; i++)
+            free(celldata[i]);
+        free(celldata);
+    }
+}
+
+/* ******************************************************************** */
+
+double
+clusterdistance(int nrows, int ncolumns, double** data, int** mask,
+    double weight[], int n1, int n2, int index1[], int index2[],
+    char dist, char method, int transpose)
+
+/*
+Purpose
+=======
+
+The clusterdistance routine calculates the distance between two clusters
+containing genes or samples using the measured gene expression vectors. The
+distance between clusters, given the genes/samples in each cluster, can be
+defined in several ways. Several distance measures can be used.
+
+The routine returns the distance in double precision.
+If the parameter transpose is set to a nonzero value, the clusters are
+interpreted as clusters of samples, otherwise as clusters of gene.
+
+Arguments
+=========
+
+nrows      (input) int
+The number of rows (i.e., the number of genes) in the gene expression data
+matrix.
+
+ncolumns   (input) int
+The number of columns (i.e., the number of samples) in the gene expression
+data matrix.
+
+data       (input) double[nrows][ncolumns]
+The array containing the data of the vectors.
+
+mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If mask[i][j] == 0, then
+data[i][j] is missing.
+
+weight     (input) double[ncolumns] if transpose == 0;
+                   double[nrows]    otherwise
+The weights that are used to calculate the distance. This is equivalent
+to including the jth data point weight[j] times in the calculation. The
+weights can be non-integer.
+
+n1         (input) int
+The number of elements in the first cluster.
+
+n2         (input) int
+The number of elements in the second cluster.
+
+index1     (input) int[n1]
+Identifies which genes/samples belong to the first cluster.
+
+index2     (input) int[n2]
+Identifies which genes/samples belong to the second cluster.
+
+dist       (input) char
+Defines which distance measure is used, as given by the table:
+dist == 'e': Euclidean distance
+dist == 'b': City-block distance
+dist == 'c': correlation
+dist == 'a': absolute value of the correlation
+dist == 'u': uncentered correlation
+dist == 'x': absolute uncentered correlation
+dist == 's': Spearman's rank correlation
+dist == 'k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+method     (input) char
+Defines how the distance between two clusters is defined, given which genes
+belong to which cluster:
+method == 'a': the distance between the arithmetic means of the two clusters
+method == 'm': the distance between the medians of the two clusters
+method == 's': the smallest pairwise distance between members of the two
+               clusters
+method == 'x': the largest pairwise distance between members of the two
+               clusters
+method == 'v': average of the pairwise distances between members of the two
+               clusters
+
+transpose  (input) int
+If transpose is equal to zero, the distances between the rows is
+calculated. Otherwise, the distances between the columns is calculated.
+The former is needed when genes are being clustered; the latter is used
+when samples are being clustered.
+
+========================================================================
+*/
+{
+    /* Set the metric function as indicated by dist */
+    double (*metric) (int, double**, double**, int**, int**,
+                      const double[], int, int, int) = setmetric(dist);
+
+    /* if one or both clusters are empty, return */
+    if (n1 < 1 || n2 < 1) return -1.0;
+    /* Check the indices */
+    if (transpose == 0) {
+        int i;
+        for (i = 0; i < n1; i++) {
+            int index = index1[i];
+            if (index < 0 || index >= nrows) return -1.0;
+        }
+        for (i = 0; i < n2; i++) {
+            int index = index2[i];
+            if (index < 0 || index >= nrows) return -1.0;
+        }
+    }
+    else {
+        int i;
+        for (i = 0; i < n1; i++) {
+            int index = index1[i];
+            if (index < 0 || index >= ncolumns) return -1.0;
+        }
+        for (i = 0; i < n2; i++) {
+            int index = index2[i];
+            if (index < 0 || index >= ncolumns) return -1.0;
+        }
+    }
+
+    switch (method) {
+        case 'a': {
+            /* Find the center */
+            int i, j, k;
+            if (transpose == 0) {
+                double distance;
+                double* cdata[2];
+                int* cmask[2];
+                int* count[2];
+                count[0] = calloc(ncolumns, sizeof(int));
+                count[1] = calloc(ncolumns, sizeof(int));
+                cdata[0] = calloc(ncolumns, sizeof(double));
+                cdata[1] = calloc(ncolumns, sizeof(double));
+                cmask[0] = malloc(ncolumns*sizeof(int));
+                cmask[1] = malloc(ncolumns*sizeof(int));
+                for (i = 0; i < n1; i++) {
+                    k = index1[i];
+                    for (j = 0; j < ncolumns; j++)
+                        if (mask[k][j] != 0) {
+                            cdata[0][j] = cdata[0][j] + data[k][j];
+                            count[0][j] = count[0][j] + 1;
+                        }
+                }
+                for (i = 0; i < n2; i++) {
+                    k = index2[i];
+                    for (j = 0; j < ncolumns; j++)
+                        if (mask[k][j] != 0) {
+                            cdata[1][j] = cdata[1][j] + data[k][j];
+                            count[1][j] = count[1][j] + 1;
+                        }
+                }
+                for (i = 0; i < 2; i++)
+                    for (j = 0; j < ncolumns; j++) {
+                        if (count[i][j]>0) {
+                            cdata[i][j] = cdata[i][j] / count[i][j];
+                            cmask[i][j] = 1;
+                        }
+                        else
+                            cmask[i][j] = 0;
+                    }
+                distance = metric(ncolumns, cdata, cdata, cmask, cmask, weight,
+                                  0, 1, 0);
+                for (i = 0; i < 2; i++) {
+                    free(cdata[i]);
+                    free(cmask[i]);
+                    free(count[i]);
+                }
+                return distance;
+            }
+            else {
+                double distance;
+                int** count = malloc(nrows*sizeof(int*));
+                double** cdata = malloc(nrows*sizeof(double*));
+                int** cmask = malloc(nrows*sizeof(int*));
+                for (i = 0; i < nrows; i++) {
+                    count[i] = calloc(2, sizeof(int));
+                    cdata[i] = calloc(2, sizeof(double));
+                    cmask[i] = malloc(2*sizeof(int));
+                }
+                for (i = 0; i < n1; i++) {
+                    k = index1[i];
+                    for (j = 0; j < nrows; j++) {
+                        if (mask[j][k] != 0) {
+                            cdata[j][0] += data[j][k];
+                            count[j][0]++;
+                        }
+                    }
+                }
+                for (i = 0; i < n2; i++) {
+                    k = index2[i];
+                    for (j = 0; j < nrows; j++) {
+                        if (mask[j][k] != 0) {
+                            cdata[j][1] += data[j][k];
+                            count[j][1]++;
+                        }
+                    }
+                }
+                for (i = 0; i < nrows; i++)
+                    for (j = 0; j < 2; j++)
+                        if (count[i][j]>0) {
+                            cdata[i][j] /= count[i][j];
+                            cmask[i][j] = 1;
+                        }
+                        else
+                            cmask[i][j] = 0;
+                distance = metric(nrows, cdata, cdata, cmask, cmask, weight,
+                                  0, 1, 1);
+                for (i = 0; i < nrows; i++) {
+                    free(count[i]);
+                    free(cdata[i]);
+                    free(cmask[i]);
+                }
+                free(count);
+                free(cdata);
+                free(cmask);
+                return distance;
+            }
+        }
+        case 'm': {
+            int i, j, k;
+            if (transpose == 0) {
+                double distance;
+                double* temp = malloc(nrows*sizeof(double));
+                double* cdata[2];
+                int* cmask[2];
+                for (i = 0; i < 2; i++) {
+                    cdata[i] = malloc(ncolumns*sizeof(double));
+                    cmask[i] = malloc(ncolumns*sizeof(int));
+                }
+                for (j = 0; j < ncolumns; j++) {
+                    int count = 0;
+                    for (k = 0; k < n1; k++) {
+                        i = index1[k];
+                        if (mask[i][j]) {
+                            temp[count] = data[i][j];
+                            count++;
+                        }
+                    }
+                    if (count>0) {
+                        cdata[0][j] = median(count, temp);
+                        cmask[0][j] = 1;
+                    }
+                    else {
+                        cdata[0][j] = 0.;
+                        cmask[0][j] = 0;
+                    }
+                }
+                for (j = 0; j < ncolumns; j++) {
+                    int count = 0;
+                    for (k = 0; k < n2; k++) {
+                        i = index2[k];
+                        if (mask[i][j]) {
+                            temp[count] = data[i][j];
+                            count++;
+                        }
+                    }
+                    if (count>0) {
+                        cdata[1][j] = median(count, temp);
+                        cmask[1][j] = 1;
+                    }
+                    else {
+                        cdata[1][j] = 0.;
+                        cmask[1][j] = 0;
+                    }
+                }
+                distance = metric(ncolumns, cdata, cdata, cmask, cmask, weight,
+                                  0, 1, 0);
+                for (i = 0; i < 2; i++) {
+                    free(cdata[i]);
+                    free(cmask[i]);
+                }
+                free(temp);
+                return distance;
+            }
+            else {
+                double distance;
+                double* temp = malloc(ncolumns*sizeof(double));
+                double** cdata = malloc(nrows*sizeof(double*));
+                int** cmask = malloc(nrows*sizeof(int*));
+                for (i = 0; i < nrows; i++) {
+                    cdata[i] = malloc(2*sizeof(double));
+                    cmask[i] = malloc(2*sizeof(int));
+                }
+                for (j = 0; j < nrows; j++) {
+                    int count = 0;
+                    for (k = 0; k < n1; k++) {
+                        i = index1[k];
+                        if (mask[j][i]) {
+                            temp[count] = data[j][i];
+                            count++;
+                        }
+                    }
+                    if (count>0) {
+                        cdata[j][0] = median(count, temp);
+                        cmask[j][0] = 1;
+                    }
+                    else {
+                        cdata[j][0] = 0.;
+                        cmask[j][0] = 0;
+                    }
+                }
+                for (j = 0; j < nrows; j++) {
+                    int count = 0;
+                    for (k = 0; k < n2; k++) {
+                        i = index2[k];
+                        if (mask[j][i]) {
+                            temp[count] = data[j][i];
+                            count++;
+                        }
+                    }
+                    if (count>0) {
+                        cdata[j][1] = median(count, temp);
+                        cmask[j][1] = 1;
+                    }
+                    else {
+                        cdata[j][1] = 0.;
+                        cmask[j][1] = 0;
+                    }
+                }
+                distance = metric(nrows, cdata, cdata, cmask, cmask, weight,
+                                  0, 1, 1);
+                for (i = 0; i < nrows; i++) {
+                    free(cdata[i]);
+                    free(cmask[i]);
+                }
+                free(cdata);
+                free(cmask);
+                free(temp);
+                return distance;
+            }
+        }
+        case 's': {
+            int i1, i2, j1, j2;
+            const int n = (transpose == 0) ? ncolumns : nrows;
+            double mindistance = DBL_MAX;
+            for (i1 = 0; i1 < n1; i1++)
+                for (i2 = 0; i2 < n2; i2++) {
+                    double distance;
+                    j1 = index1[i1];
+                    j2 = index2[i2];
+                    distance = metric(n, data, data, mask, mask, weight,
+                                      j1, j2, transpose);
+                    if (distance < mindistance) mindistance = distance;
+                }
+            return mindistance;
+        }
+        case 'x': {
+            int i1, i2, j1, j2;
+            const int n = (transpose == 0) ? ncolumns : nrows;
+            double maxdistance = 0;
+            for (i1 = 0; i1 < n1; i1++)
+                for (i2 = 0; i2 < n2; i2++) {
+                    double distance;
+                    j1 = index1[i1];
+                    j2 = index2[i2];
+                    distance = metric(n, data, data, mask, mask, weight,
+                                      j1, j2, transpose);
+                    if (distance > maxdistance) maxdistance = distance;
+                }
+            return maxdistance;
+        }
+        case 'v': {
+            int i1, i2, j1, j2;
+            const int n = (transpose == 0) ? ncolumns : nrows;
+            double distance = 0;
+            for (i1 = 0; i1 < n1; i1++)
+                for (i2 = 0; i2 < n2; i2++) {
+                    j1 = index1[i1];
+                    j2 = index2[i2];
+                    distance += metric(n, data, data, mask, mask, weight,
+                                       j1, j2, transpose);
+                }
+            distance /= (n1*n2);
+            return distance;
+        }
+    }
+    /* Never get here */
+    return -2.0;
+}
diff --git a/code/lib/Bio/Cluster/cluster.h b/code/lib/Bio/Cluster/cluster.h
new file mode 100644
index 0000000..fbbfd26
--- /dev/null
+++ b/code/lib/Bio/Cluster/cluster.h
@@ -0,0 +1,90 @@
+/******************************************************************************/
+/* The C Clustering Library.
+ * Copyright (C) 2002 Michiel Jan Laurens de Hoon.
+ *
+ * This library was written at the Laboratory of DNA Information Analysis,
+ * Human Genome Center, Institute of Medical Science, University of Tokyo,
+ * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.
+ * Contact: michiel.dehoon 'AT' riken.jp
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation with or without modifications and for any purpose and
+ * without fee is hereby granted, provided that any copyright notices
+ * appear in all copies and that both those copyright notices and this
+ * permission notice appear in supporting documentation, and that the
+ * names of the contributors or copyright holders not be used in
+ * advertising or publicity pertaining to distribution of the software
+ * without specific prior permission.
+ *
+ * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+ * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#ifndef min
+#define min(x, y)	((x) < (y) ? (x) : (y))
+#endif
+#ifndef max
+#define	max(x, y)	((x) > (y) ? (x) : (y))
+#endif
+
+#define CLUSTERVERSION "1.59"
+
+/* Chapter 2 */
+double clusterdistance(int nrows, int ncolumns, double** data, int** mask,
+  double weight[], int n1, int n2, int index1[], int index2[], char dist,
+  char method, int transpose);
+void distancematrix(int ngenes, int ndata, double** data, int** mask,
+  double* weight, char dist, int transpose, double** distances);
+
+/* Chapter 3 */
+int getclustercentroids(int nclusters, int nrows, int ncolumns,
+  double** data, int** mask, int clusterid[], double** cdata, int** cmask,
+  int transpose, char method);
+void getclustermedoids(int nclusters, int nelements, double** distance,
+  int clusterid[], int centroids[], double errors[]);
+void kcluster(int nclusters, int ngenes, int ndata, double** data,
+  int** mask, double weight[], int transpose, int npass, char method, char dist,
+  int clusterid[], double* error, int* ifound);
+void kmedoids(int nclusters, int nelements, double** distance,
+  int npass, int clusterid[], double* error, int* ifound);
+
+/* Chapter 4 */
+typedef struct {int left; int right; double distance;} Node;
+/*
+ * A Node struct describes a single node in a tree created by hierarchical
+ * clustering. The tree can be represented by an array of n Node structs,
+ * where n is the number of elements minus one. The integers left and right
+ * in each Node struct refer to the two elements or subnodes that are joined
+ * in this node. The original elements are numbered 0..nelements-1, and the
+ * nodes -1..-(nelements-1). For each node, distance contains the distance
+ * between the two subnodes that were joined.
+ */
+
+Node* treecluster(int nrows, int ncolumns, double** data, int** mask,
+  double weight[], int transpose, char dist, char method, double** distmatrix);
+int sorttree(const int nnodes, Node* tree, const double order[], int indices[]);
+int cuttree(int nelements, const Node* tree, int nclusters, int clusterid[]);
+
+/* Chapter 5 */
+void somcluster(int nrows, int ncolumns, double** data, int** mask,
+  const double weight[], int transpose, int nxnodes, int nynodes,
+  double inittau, int niter, char dist, double*** celldata,
+  int clusterid[][2]);
+
+/* Chapter 6 */
+int pca(int m, int n, double** u, double** v, double* w);
+
+/* Utility routines, currently undocumented */
+void sort(int n, const double data[], int index[]);
+double mean(int n, double x[]);
+double median (int n, double x[]);
+
+double* calculate_weights(int nrows, int ncolumns, double** data, int** mask,
+  double weights[], int transpose, char dist, double cutoff, double exponent);
diff --git a/code/lib/Bio/Cluster/clustermodule.c b/code/lib/Bio/Cluster/clustermodule.c
new file mode 100644
index 0000000..29b2a5c
--- /dev/null
+++ b/code/lib/Bio/Cluster/clustermodule.c
@@ -0,0 +1,2457 @@
+#include "Python.h"
+#include <stdio.h>
+#include <string.h>
+#include <float.h>
+#include "cluster.h"
+
+
+/* ========================================================================= */
+/* -- Helper routines ------------------------------------------------------ */
+/* ========================================================================= */
+
+static char
+extract_single_character(PyObject* object, const char variable[],
+                         const char allowed[])
+{
+    Py_UCS4 ch;
+    Py_ssize_t n;
+    if (!PyUnicode_Check(object)) {
+        PyErr_Format(PyExc_ValueError, "%s should be a string", variable);
+        return 0;
+    }
+    if (PyUnicode_READY(object) == -1) return 0;
+    n = PyUnicode_GET_LENGTH(object);
+    if (n != 1) {
+        PyErr_Format(PyExc_ValueError,
+                     "%s should be a single character", variable);
+        return 0;
+    }
+    ch = PyUnicode_READ_CHAR(object, 0);
+    if (ch < 128) {
+        const char c = ch;
+        if (strchr(allowed, c)) return c;
+    }
+    PyErr_Format(PyExc_ValueError,
+                 "unknown %s function specified (should be one of '%s')",
+                 variable, allowed);
+    return 0;
+}
+
+static int
+distance_converter(PyObject* object, void* pointer)
+{
+    char c;
+
+    c = extract_single_character(object, "dist", "ebcauxsk");
+    if (c == 0) return 0;
+    *((char*)pointer) = c;
+    return 1;
+}
+
+static int
+method_treecluster_converter(PyObject* object, void* pointer)
+{
+    char c;
+
+    c = extract_single_character(object, "method", "csma");
+    if (c == 0) return 0;
+    *((char*)pointer) = c;
+    return 1;
+}
+
+static int
+method_kcluster_converter(PyObject* object, void* pointer)
+{
+    char c;
+
+    c = extract_single_character(object, "method", "am");
+    if (c == 0) return 0;
+    *((char*)pointer) = c;
+    return 1;
+}
+
+static int
+method_clusterdistance_converter(PyObject* object, void* pointer)
+{
+    char c;
+
+    c = extract_single_character(object, "method", "amsxv");
+    if (c == 0) return 0;
+    *((char*)pointer) = c;
+    return 1;
+}
+
+/* -- data ----------------------------------------------------------------- */
+
+typedef struct {
+    int nrows;
+    int ncols;
+    double** values;
+    Py_buffer view;
+} Data;
+
+static int
+data_converter(PyObject* object, void* pointer)
+{
+    Data* data = pointer;
+    int nrows;
+    int ncols;
+    int i;
+    double** values = data->values;
+    Py_buffer* view = &data->view;
+    const char* p;
+    Py_ssize_t stride;
+    const int flag = PyBUF_ND | PyBUF_STRIDES;
+
+    if (object == NULL) goto exit;
+    if (object == Py_None) return 1;
+
+    if (PyObject_GetBuffer(object, view, flag) == -1) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "data matrix has unexpected format.");
+        return 0;
+    }
+
+    if (view->ndim != 2) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "data matrix has incorrect rank %d (expected 2)",
+                     view->ndim);
+        goto exit;
+    }
+    if (view->itemsize != sizeof(double)) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "data matrix has incorrect data type");
+        goto exit;
+    }
+    nrows = (int) view->shape[0];
+    ncols = (int) view->shape[1];
+    if (nrows != view->shape[0] || ncols != view->shape[1]) {
+        PyErr_Format(PyExc_ValueError,
+            "data matrix is too large (dimensions = %zd x %zd)",
+            view->shape[0], view->shape[1]);
+        goto exit;
+    }
+    if (nrows < 1 || ncols < 1) {
+        PyErr_SetString(PyExc_ValueError, "data matrix is empty");
+        goto exit;
+    }
+    stride = view->strides[0];
+    if (view->strides[1] != view->itemsize) {
+        PyErr_SetString(PyExc_RuntimeError, "data is not contiguous");
+        goto exit;
+    }
+    values = PyMem_Malloc(nrows*sizeof(double*));
+    if (!values) {
+        PyErr_NoMemory();
+        goto exit;
+    }
+    for (i = 0, p = view->buf; i < nrows; i++, p += stride)
+        values[i] = (double*)p;
+    data->values = values;
+    data->nrows = nrows;
+    data->ncols = ncols;
+    return Py_CLEANUP_SUPPORTED;
+
+exit:
+    if (values) PyMem_Free(values);
+    PyBuffer_Release(view);
+    return 0;
+}
+
+/* -- mask ----------------------------------------------------------------- */
+
+typedef struct {
+    int** values;
+    Py_buffer view;
+} Mask;
+
+static int
+mask_converter(PyObject* object, void* pointer)
+{
+    Mask* mask = pointer;
+    int nrows;
+    int ncols;
+    int i;
+    int** values = mask->values;
+    Py_buffer* view = &mask->view;
+    const char* p;
+    Py_ssize_t stride;
+    const int flag = PyBUF_ND | PyBUF_STRIDES;
+
+    if (object == NULL) goto exit;
+    if (object == Py_None) return 1;
+
+    if (PyObject_GetBuffer(object, view, flag) == -1) {
+        PyErr_SetString(PyExc_RuntimeError, "mask has unexpected format.");
+        return 0;
+    }
+
+    if (view->ndim != 2) {
+        PyErr_Format(PyExc_ValueError,
+                     "mask has incorrect rank %d (expected 2)", view->ndim);
+        goto exit;
+    }
+    if (view->itemsize != sizeof(int)) {
+        PyErr_SetString(PyExc_RuntimeError, "mask has incorrect data type");
+        goto exit;
+    }
+    nrows = (int) view->shape[0];
+    ncols = (int) view->shape[1];
+    if (nrows != view->shape[0] || ncols != view->shape[1]) {
+        PyErr_Format(PyExc_ValueError,
+                     "mask is too large (dimensions = %zd x %zd)",
+                     view->shape[0], view->shape[1]);
+        goto exit;
+    }
+    stride = view->strides[0];
+    if (view->strides[1] != view->itemsize) {
+        PyErr_SetString(PyExc_RuntimeError, "mask is not contiguous");
+        goto exit;
+    }
+    values = PyMem_Malloc(nrows*sizeof(int*));
+    if (!values) {
+        PyErr_NoMemory();
+        goto exit;
+    }
+    for (i = 0, p = view->buf; i < nrows; i++, p += stride)
+        values[i] = (int*)p;
+    mask->values = values;
+    return Py_CLEANUP_SUPPORTED;
+
+exit:
+    if (values) PyMem_Free(values);
+    PyBuffer_Release(view);
+    return 0;
+}
+
+/* -- 1d array ------------------------------------------------------------- */
+
+static int
+vector_converter(PyObject* object, void* pointer)
+{
+    Py_buffer* view = pointer;
+    int ndata;
+    const int flag = PyBUF_ND | PyBUF_C_CONTIGUOUS;
+
+    if (object == NULL) goto exit;
+
+    if (PyObject_GetBuffer(object, view, flag) == -1) {
+        PyErr_SetString(PyExc_RuntimeError, "unexpected format.");
+        return 0;
+    }
+
+    if (view->ndim != 1) {
+        PyErr_Format(PyExc_ValueError, "incorrect rank %d (expected 1)",
+                     view->ndim);
+        goto exit;
+    }
+    if (view->itemsize != sizeof(double)) {
+        PyErr_SetString(PyExc_RuntimeError, "array has incorrect data type");
+        goto exit;
+    }
+    ndata = (int) view->shape[0];
+    if (ndata != view->shape[0]) {
+        PyErr_Format(PyExc_ValueError,
+                     "array is too large (size = %zd)", view->shape[0]);
+        goto exit;
+    }
+    return Py_CLEANUP_SUPPORTED;
+
+exit:
+    PyBuffer_Release(view);
+    return 0;
+}
+
+static int
+vector_none_converter(PyObject* object, void* pointer)
+{
+    if (object == Py_None) return 1;
+    return vector_converter(object, pointer);
+}
+
+/* -- clusterid ------------------------------------------------------------ */
+
+static int
+check_clusterid(Py_buffer clusterid, int nitems) {
+    int i, j;
+    int *p = clusterid.buf;
+    int nclusters = 0;
+    int* number;
+
+    if (nitems != clusterid.shape[0]) {
+        PyErr_Format(PyExc_ValueError, "incorrect size (%zd, expected %d)",
+                     clusterid.shape[0], nitems);
+        return 0;
+    }
+    for (i = 0; i < nitems; i++) {
+        j = p[i];
+        if (j > nclusters) nclusters = j;
+        if (j < 0) {
+            PyErr_SetString(PyExc_ValueError, "negative cluster number found");
+            return 0;
+        }
+    }
+    nclusters++;
+    /* -- Count the number of items in each cluster --------------------- */
+    number = calloc(nclusters, sizeof(int));
+    if (!number) {
+        PyErr_NoMemory();
+        return 0;
+    }
+    for (i = 0; i < nitems; i++) {
+        j = p[i];
+        number[j]++;
+    }
+    for (j = 0; j < nclusters; j++) if (number[j] == 0) break;
+    PyMem_Free(number);
+    if (j < nclusters) {
+        PyErr_Format(PyExc_ValueError, "cluster %d is empty", j);
+        return 0;
+    }
+    return nclusters;
+}
+
+/* -- distance ----------------------------------------------------------- */
+
+typedef struct {
+    int n;
+    double** values;
+    Py_buffer* views;
+    Py_buffer view;
+} Distancematrix;
+
+static int
+_convert_list_to_distancematrix(PyObject* list, Distancematrix* distances)
+{
+    int i;
+    double** values;
+    Py_buffer* view;
+    Py_buffer* views;
+    const int flag = PyBUF_ND | PyBUF_C_CONTIGUOUS;
+    const int n = (int) PyList_GET_SIZE(list);
+
+    if (n != PyList_GET_SIZE(list)) {
+        PyErr_SetString(PyExc_ValueError, "distance matrix is too large");
+        return 0;
+    }
+    values = PyMem_Malloc(n*sizeof(double*));
+    if (!values) {
+        PyErr_NoMemory();
+        return 0;
+    }
+    distances->values = values;
+    views = PyMem_Malloc(n*sizeof(Py_buffer));
+    if (!views) {
+        PyErr_NoMemory();
+        return 0;
+    }
+    view = views;
+    for (i = 0; i < n; i++, view++) {
+        PyObject* item = PyList_GET_ITEM(list, i);
+        view->len = -1;
+        if (PyObject_GetBuffer(item, view, flag) == -1) {
+            PyErr_Format(PyExc_RuntimeError, "failed to parse row %d.", i);
+            view--;
+            break;
+        }
+        if (view->ndim != 1) {
+            PyErr_Format(PyExc_ValueError,
+                         "row %d has incorrect rank (%d expected 1)",
+                         i, view->ndim);
+            break;
+        }
+        if (view->itemsize != sizeof(double)) {
+            PyErr_Format(PyExc_RuntimeError,
+                         "row %d has incorrect data type", i);
+            break;
+        }
+        if (view->shape[0] != i) {
+            PyErr_Format(PyExc_RuntimeError,
+                         "row %d has incorrect size %zd (expected %d)",
+                         i, view->shape[0], i);
+            break;
+        }
+        values[i] = view->buf;
+    }
+    if (i < n) {
+        for ( ; view >= views; view--) PyBuffer_Release(view);
+        PyMem_Free(views);
+        return 0;
+    }
+    distances->n = n;
+    distances->view.len = 0;
+    distances->views = views;
+    distances->values = values;
+    return 1;
+}
+
+static int
+_convert_array_to_distancematrix(PyObject* array, Distancematrix* distances)
+{
+    int i;
+    int n;
+    double** values;
+    double* p;
+    Py_buffer* view = &distances->view;
+    const int flag = PyBUF_ND | PyBUF_C_CONTIGUOUS;
+
+    if (PyObject_GetBuffer(array, view, flag) == -1) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "distance matrix has unexpected format.");
+        return 0;
+    }
+
+    if (view->len == 0) {
+        PyBuffer_Release(view);
+        PyErr_SetString(PyExc_ValueError, "distance matrix is empty");
+        return 0;
+    }
+    if (view->itemsize != sizeof(double)) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "distance matrix has an incorrect data type");
+        return 0;
+    }
+    if (view->ndim == 1) {
+        int m = (int) view->shape[0];
+        if (m != view->shape[0]) {
+            PyErr_Format(PyExc_ValueError,
+                         "distance matrix is too large (size = %zd)",
+                         view->shape[0]);
+            return 0;
+        }
+        n = (int)(1+sqrt(1+8*m)/2); /* rounds to (1+sqrt(1+8*m))/2 */
+        if (n*n-n != 2 * m) {
+            PyErr_SetString(PyExc_ValueError,
+                            "distance matrix has unexpected size.");
+            return 0;
+        }
+        distances->n = n;
+        values = PyMem_Malloc(n*sizeof(double*));
+        if (!values) {
+            PyErr_NoMemory();
+            return 0;
+        }
+        distances->values = values;
+        for (p = view->buf, i = 0; i < n; p += i, i++) values[i] = p;
+    }
+    else if (view->ndim == 2) {
+        n = (int) view->shape[0];
+        if (n != view->shape[0]) {
+            PyErr_Format(PyExc_ValueError,
+                         "distance matrix is too large (size = %zd)",
+                         view->shape[0]);
+            return 0;
+        }
+        distances->n = n;
+        if (view->shape[1] != n) {
+            PyErr_SetString(PyExc_ValueError,
+                            "distance matrix is not square.");
+            return 0;
+        }
+        values = PyMem_Malloc(n*sizeof(double*));
+        if (!values) {
+            PyErr_NoMemory();
+            return 0;
+        }
+        distances->values = values;
+        for (p = view->buf, i = 0; i < n; p += n, i++) values[i] = p;
+    }
+    else {
+        PyErr_Format(PyExc_ValueError,
+                     "distance matrix has incorrect rank %d (expected 1 or 2)",
+                     view->ndim);
+        return 0;
+    }
+    return 1;
+}
+
+static int
+distancematrix_converter(PyObject* argument, void* pointer)
+{
+    Distancematrix* distances = pointer;
+    double** values;
+
+    if (argument == NULL) goto exit;
+    if (argument == Py_None) return 1;
+    if (PyList_Check(argument)) {
+        if (_convert_list_to_distancematrix(argument, distances))
+            return Py_CLEANUP_SUPPORTED;
+    }
+    else {
+        if (_convert_array_to_distancematrix(argument, distances))
+            return Py_CLEANUP_SUPPORTED;
+    }
+
+exit:
+    values = distances->values;
+    if (values == NULL) return 0;
+    else {
+        int i;
+        const int n = distances->n;
+        Py_buffer* views = distances->views;
+        if (views) {
+            for (i = 0; i < n; i++) PyBuffer_Release(&views[i]);
+            PyMem_Free(views);
+        }
+        else if (distances->view.len) {
+            PyBuffer_Release(&distances->view);
+        }
+        PyMem_Free(values);
+    }
+    return 0;
+}
+
+/* -- celldata ------------------------------------------------------------- */
+
+typedef struct {
+    int nx;
+    int ny;
+    int nz;
+    double*** values;
+    Py_buffer view;
+} Celldata;
+
+static int
+celldata_converter(PyObject* argument, void* pointer)
+{
+    int i, n;
+    double* p;
+    Celldata* celldata = pointer;
+    double*** ppp = celldata->values;
+    double** pp = ppp ? ppp[0] : NULL;
+    int nx;
+    int ny;
+    int nz;
+    Py_buffer* view = &celldata->view;
+    const int flag = PyBUF_ND | PyBUF_C_CONTIGUOUS;
+
+    if (argument == NULL) goto exit;
+
+    if (PyObject_GetBuffer(argument, view, flag) == -1) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "celldata array has unexpected format.");
+        return 0;
+    }
+
+    nx = (int) view->shape[0];
+    ny = (int) view->shape[1];
+    nz = (int) view->shape[2];
+    if (nx != view->shape[0] || ny != view->shape[1] || nz != view->shape[2]) {
+        PyErr_SetString(PyExc_RuntimeError, "celldata array too large");
+        goto exit;
+    }
+    if (view->itemsize != sizeof(double)) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "celldata array has incorrect data type");
+        goto exit;
+    }
+    pp = PyMem_Malloc(nx*ny*sizeof(double*));
+    ppp = PyMem_Malloc(nx*sizeof(double**));
+    if (!pp || !ppp) {
+        PyErr_NoMemory();
+        goto exit;
+    }
+    p = view->buf;
+    n = nx * ny;
+    for (i = 0; i < n; i++, p += nz) pp[i] = p;
+    for (i = 0; i < nx; i++, pp += ny) ppp[i] = pp;
+    celldata->values = ppp;
+    celldata->nx = nx;
+    celldata->ny = ny;
+    celldata->nz = nz;
+    return Py_CLEANUP_SUPPORTED;
+
+exit:
+    if (pp) PyMem_Free(pp);
+    if (ppp) PyMem_Free(ppp);
+    PyBuffer_Release(view);
+    return 0;
+}
+
+
+/* -- index ---------------------------------------------------------------- */
+
+static int
+index_converter(PyObject* argument, void* pointer)
+{
+    Py_buffer* view = pointer;
+    int n;
+    const int flag = PyBUF_ND | PyBUF_C_CONTIGUOUS;
+
+    if (argument == NULL) goto exit;
+
+    if (PyObject_GetBuffer(argument, view, flag) == -1) {
+        PyErr_SetString(PyExc_RuntimeError, "unexpected format.");
+        return 0;
+    }
+
+    if (view->ndim != 1) {
+        PyErr_Format(PyExc_ValueError, "incorrect rank %d (expected 1)",
+                     view->ndim);
+        goto exit;
+    }
+    if (view->itemsize != sizeof(int)) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "argument has incorrect data type");
+        goto exit;
+    }
+    n = (int) view->shape[0];
+    if (n != view->shape[0]) {
+        PyErr_Format(PyExc_ValueError,
+            "array size is too large (size = %zd)", view->shape[0]);
+        goto exit;
+    }
+    return Py_CLEANUP_SUPPORTED;
+
+exit:
+    PyBuffer_Release(view);
+    return 0;
+}
+
+/* -- index2d ------------------------------------------------------------- */
+
+static int
+index2d_converter(PyObject* argument, void* pointer)
+{
+    Py_buffer* view = pointer;
+    int n;
+    const int flag = PyBUF_ND | PyBUF_C_CONTIGUOUS;
+
+    if (argument == NULL) goto exit;
+
+    if (PyObject_GetBuffer(argument, view, flag) == -1) {
+        PyErr_SetString(PyExc_RuntimeError, "unexpected format.");
+        return 0;
+    }
+
+    if (view->ndim != 2) {
+        PyErr_Format(PyExc_ValueError, "incorrect rank %d (expected 2)",
+                     view->ndim);
+        goto exit;
+    }
+    if (view->itemsize != sizeof(int)) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "argument has incorrect data type");
+        goto exit;
+    }
+    n = (int) view->shape[0];
+    if (n != view->shape[0]) {
+        PyErr_Format(PyExc_ValueError,
+            "array size is too large (size = %zd)", view->shape[0]);
+        goto exit;
+    }
+    if (view->shape[1] != 2) {
+        PyErr_Format(PyExc_ValueError,
+            "array has %zd columns (expected 2)", view->shape[1]);
+        goto exit;
+    }
+    return Py_CLEANUP_SUPPORTED;
+
+exit:
+    PyBuffer_Release(view);
+    return 0;
+}
+
+/* ========================================================================= */
+/* -- Classes -------------------------------------------------------------- */
+/* ========================================================================= */
+
+typedef struct {
+    PyObject_HEAD
+    Node node;
+} PyNode;
+
+static int
+PyNode_init(PyNode *self, PyObject *args, PyObject *kwds)
+{
+    int left, right;
+    double distance = 0.0;
+    static char *kwlist[] = {"left", "right", "distance", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "ii|d", kwlist,
+                                      &left, &right, &distance))
+        return -1;
+    self->node.left = left;
+    self->node.right = right;
+    self->node.distance = distance;
+    return 0;
+}
+
+static PyObject*
+PyNode_repr(PyNode* self)
+{
+    char string[64];
+
+    sprintf(string, "(%d, %d): %g",
+                   self->node.left, self->node.right, self->node.distance);
+    return PyUnicode_FromString(string);
+}
+
+static char PyNode_left__doc__[] =
+"integer representing the first member of this node";
+
+static PyObject*
+PyNode_getleft(PyNode* self, void* closure)
+{
+    int left = self->node.left;
+
+    return PyLong_FromLong((long)left);
+}
+
+static int
+PyNode_setleft(PyNode* self, PyObject* value, void* closure)
+{
+    long left = PyLong_AsLong(value);
+
+    if (PyErr_Occurred()) return -1;
+    self->node.left = (int) left;
+    return 0;
+}
+
+static char PyNode_right__doc__[] =
+"integer representing the second member of this node";
+
+static PyObject*
+PyNode_getright(PyNode* self, void* closure)
+{
+    int right = self->node.right;
+
+    return PyLong_FromLong((long)right);
+}
+
+static int
+PyNode_setright(PyNode* self, PyObject* value, void* closure)
+{
+    long right = PyLong_AsLong(value);
+
+    if (PyErr_Occurred()) return -1;
+    self->node.right = (int) right;
+    return 0;
+}
+
+static PyObject*
+PyNode_getdistance(PyNode* self, void* closure)
+{
+    return PyFloat_FromDouble(self->node.distance);
+}
+
+static int
+PyNode_setdistance(PyNode* self, PyObject* value, void* closure)
+{
+    const double distance = PyFloat_AsDouble(value);
+
+    if (PyErr_Occurred()) return -1;
+    self->node.distance = distance;
+    return 0;
+}
+
+static char PyNode_distance__doc__[] =
+"the distance between the two members of this node\n";
+
+static PyGetSetDef PyNode_getset[] = {
+    {"left",
+     (getter)PyNode_getleft,
+     (setter)PyNode_setleft,
+     PyNode_left__doc__, NULL},
+    {"right",
+     (getter)PyNode_getright,
+     (setter)PyNode_setright,
+     PyNode_right__doc__, NULL},
+    {"distance",
+     (getter)PyNode_getdistance,
+     (setter)PyNode_setdistance,
+     PyNode_distance__doc__, NULL},
+    {NULL}  /* Sentinel */
+};
+
+static char PyNode_doc[] =
+"A Node object describes a single node in a hierarchical clustering tree.\n"
+"The integer attributes 'left' and 'right' represent the two members that\n"
+"make up this node; the floating point attribute 'distance' contains the\n"
+"distance between the two members of this node.\n";
+
+static PyTypeObject PyNodeType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_cluster.Node",           /* tp_name */
+    sizeof(PyNode),            /* tp_basicsize */
+    0,                         /* tp_itemsize */
+    0,                         /* tp_dealloc */
+    0,                         /* tp_print */
+    0,                         /* tp_getattr */
+    0,                         /* tp_setattr */
+    0,                         /* tp_compare */
+    (reprfunc)PyNode_repr,     /* tp_repr */
+    0,                         /* tp_as_number */
+    0,                         /* tp_as_sequence */
+    0,                         /* tp_as_mapping */
+    0,                         /* tp_hash */
+    0,                         /* tp_call */
+    0,                         /* tp_str */
+    0,                         /* tp_getattro */
+    0,                         /* tp_setattro */
+    0,                         /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,          /*tp_flags*/
+    PyNode_doc,                /* tp_doc */
+    0,                         /* tp_traverse */
+    0,                         /* tp_clear */
+    0,                         /* tp_richcompare */
+    0,                         /* tp_weaklistoffset */
+    0,                         /* tp_iter */
+    0,                         /* tp_iternext */
+    0,                         /* tp_methods */
+    0,                         /* tp_members */
+    PyNode_getset,             /* tp_getset */
+    0,                         /* tp_base */
+    0,                         /* tp_dict */
+    0,                         /* tp_descr_get */
+    0,                         /* tp_descr_set */
+    0,                         /* tp_dictoffset */
+    (initproc)PyNode_init,     /* tp_init */
+};
+
+typedef struct {
+    PyObject_HEAD
+    Node* nodes;
+    int n;
+} PyTree;
+
+static void
+PyTree_dealloc(PyTree* self)
+{
+    if (self->n) PyMem_Free(self->nodes);
+    Py_TYPE(self)->tp_free((PyObject*)self);
+}
+
+static PyObject*
+PyTree_new(PyTypeObject *type, PyObject* args, PyObject* kwds)
+{
+    int i, j;
+    int n;
+    Node* nodes;
+    PyObject* arg = NULL;
+    int* flag;
+    PyTree* self;
+
+    self = (PyTree *)type->tp_alloc(type, 0);
+    if (!self) return NULL;
+
+    if (!PyArg_ParseTuple(args, "|O", &arg)) {
+        Py_DECREF(self);
+        return NULL;
+    }
+
+    if (arg == NULL) {
+        self->n = 0;
+        self->nodes = NULL;
+        return (PyObject*)self;
+    }
+
+    if (!PyList_Check(arg)) {
+        Py_DECREF(self);
+        PyErr_SetString(PyExc_TypeError,
+                        "Argument should be a list of Node objects");
+        return NULL;
+    }
+
+    n = (int) PyList_GET_SIZE(arg);
+    if (n != PyList_GET_SIZE(arg)) {
+        Py_DECREF(self);
+        PyErr_Format(PyExc_ValueError,
+                     "List is too large (size = %zd)", PyList_GET_SIZE(arg));
+        return NULL;
+    }
+    if (n < 1) {
+        Py_DECREF(self);
+        PyErr_SetString(PyExc_ValueError, "List is empty");
+        return NULL;
+    }
+    nodes = PyMem_Malloc(n*sizeof(Node));
+    if (!nodes) {
+        Py_DECREF(self);
+        return PyErr_NoMemory();
+    }
+    for (i = 0; i < n; i++) {
+        PyNode* p;
+        PyObject* row = PyList_GET_ITEM(arg, i);
+        if (!PyType_IsSubtype(Py_TYPE(row), &PyNodeType)) {
+            PyMem_Free(nodes);
+            Py_DECREF(self);
+            PyErr_Format(PyExc_TypeError,
+                         "Row %d in list is not a Node object", i);
+            return NULL;
+        }
+        p = (PyNode*)row;
+        nodes[i] = p->node;
+    }
+    /* --- Check if this is a bona fide tree ------------------------------- */
+    flag = PyMem_Malloc((2*n+1)*sizeof(int));
+    if (!flag) {
+        PyMem_Free(nodes);
+        Py_DECREF(self);
+        return PyErr_NoMemory();
+    }
+    for (i = 0; i < 2*n+1; i++) flag[i] = 0;
+    for (i = 0; i < n; i++) {
+        j = nodes[i].left;
+        if (j < 0) {
+            j = -j-1;
+            if (j >= i) break;
+        }
+        else j += n;
+        if (flag[j]) break;
+        flag[j] = 1;
+        j = nodes[i].right;
+        if (j < 0) {
+          j = -j-1;
+          if (j >= i) break;
+        }
+        else j += n;
+        if (flag[j]) break;
+        flag[j] = 1;
+    }
+    PyMem_Free(flag);
+    if (i < n) {
+        /* break encountered */
+        PyMem_Free(nodes);
+        Py_DECREF(self);
+        PyErr_SetString(PyExc_ValueError, "Inconsistent tree");
+        return NULL;
+    }
+    self->n = n;
+    self->nodes = nodes;
+    return (PyObject*)self;
+}
+
+static PyObject*
+PyTree_str(PyTree* self)
+{
+    int i;
+    const int n = self->n;
+    char string[128];
+    Node node;
+    PyObject* line;
+    PyObject* output;
+    PyObject* temp;
+
+    output = PyUnicode_FromString("");
+    for (i = 0; i < n; i++) {
+        node = self->nodes[i];
+        sprintf(string, "(%d, %d): %g", node.left, node.right, node.distance);
+        if (i < n-1) strcat(string, "\n");
+        line = PyUnicode_FromString(string);
+        if (!line) {
+            Py_DECREF(output);
+            return NULL;
+        }
+        temp = PyUnicode_Concat(output, line);
+        if (!temp) {
+            Py_DECREF(output);
+            Py_DECREF(line);
+            return NULL;
+        }
+        output = temp;
+    }
+    return output;
+}
+
+static int
+PyTree_length(PyTree *self)
+{
+    return self->n;
+}
+
+static PyObject*
+PyTree_subscript(PyTree* self, PyObject* item)
+{
+    if (PyIndex_Check(item)) {
+        PyNode* result;
+        Py_ssize_t i;
+        i = PyNumber_AsSsize_t(item, PyExc_IndexError);
+        if (i == -1 && PyErr_Occurred())
+            return NULL;
+        if (i < 0)
+            i += self->n;
+        if (i < 0 || i >= self->n) {
+            PyErr_SetString(PyExc_IndexError, "tree index out of range");
+            return NULL;
+        }
+        result = (PyNode*) PyNodeType.tp_alloc(&PyNodeType, 0);
+        if (!result) return PyErr_NoMemory();
+        result->node = self->nodes[i];
+        return (PyObject*) result;
+    }
+    else if (PySlice_Check(item)) {
+        Py_ssize_t i, j;
+        Py_ssize_t start, stop, step, slicelength;
+        if (PySlice_GetIndicesEx(item, self->n, &start, &stop, &step,
+                                 &slicelength) == -1) return NULL;
+        if (slicelength == 0) return PyList_New(0);
+        else {
+            PyNode* node;
+            PyObject* result = PyList_New(slicelength);
+            if (!result) return PyErr_NoMemory();
+            for (i = 0, j = start; i < slicelength; i++, j += step) {
+                node = (PyNode*) PyNodeType.tp_alloc(&PyNodeType, 0);
+                if (!node) {
+                    Py_DECREF(result);
+                    return PyErr_NoMemory();
+                }
+                node->node = self->nodes[j];
+                PyList_SET_ITEM(result, i, (PyObject*)node);
+            }
+            return result;
+        }
+    }
+    else {
+        PyErr_Format(PyExc_TypeError,
+                     "tree indices must be integers, not %.200s",
+                     item->ob_type->tp_name);
+        return NULL;
+    }
+}
+
+static PyMappingMethods PyTree_mapping = {
+    (lenfunc)PyTree_length,           /* mp_length */
+    (binaryfunc)PyTree_subscript,     /* mp_subscript */
+};
+
+static char PyTree_scale__doc__[] =
+"mytree.scale()\n"
+"\n"
+"Scale the node distances in the tree such that they are all between one\n"
+"and zero.\n";
+
+static PyObject*
+PyTree_scale(PyTree* self)
+{
+    int i;
+    const int n = self->n;
+    Node* nodes = self->nodes;
+    double maximum = DBL_MIN;
+
+    for (i = 0; i < n; i++) {
+        double distance = nodes[i].distance;
+        if (distance > maximum) maximum = distance;
+    }
+    if (maximum != 0.0)
+        for (i = 0; i < n; i++) nodes[i].distance /= maximum;
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+static char PyTree_cut__doc__[] =
+"mytree.cut(nclusters) -> array\n"
+"\n"
+"Divide the elements in a hierarchical clustering result mytree into\n"
+"clusters, and return an array with the number of the cluster to which each\n"
+"element was assigned. The number of clusters is given by nclusters.\n";
+
+static PyObject*
+PyTree_cut(PyTree* self, PyObject* args)
+{
+    int ok = -1;
+    int nclusters;
+    const int n = self->n + 1;
+    Py_buffer indices = {0};
+
+    if (!PyArg_ParseTuple(args, "O&i",
+                          index_converter, &indices, &nclusters)) goto exit;
+    if (nclusters < 1) {
+        PyErr_SetString(PyExc_ValueError,
+                        "requested number of clusters should be positive");
+        goto exit;
+    }
+    if (nclusters > n) {
+        PyErr_SetString(PyExc_ValueError,
+                        "more clusters requested than items available");
+        goto exit;
+    }
+    if (indices.shape[0] != n) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "indices array inconsistent with tree");
+        goto exit;
+    }
+    ok = cuttree(n, self->nodes, nclusters, indices.buf);
+
+exit:
+    index_converter(NULL, &indices);
+    if (ok == -1) return NULL;
+    if (ok == 0) return PyErr_NoMemory();
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+static char PyTree_sort__doc__[] =
+"mytree.sort(order) -> array\n"
+"\n"
+"Sort a hierarchical clustering tree by switching the left and right\n"
+"subnode of nodes such that the elements in the left-to-right order of the\n"
+"tree tend to have increasing order values.\n"
+"\n"
+"Return the indices of the elements in the left-to-right order in the\n"
+"hierarchical clustering tree, such that the element with index indices[i]\n"
+"occurs at position i in the dendrogram.\n";
+
+static PyObject*
+PyTree_sort(PyTree* self, PyObject* args)
+{
+    int ok = -1;
+    Py_buffer indices = {0};
+    const int n = self->n;
+    Py_buffer order = {0};
+
+    if (n == 0) {
+        PyErr_SetString(PyExc_ValueError, "tree is empty");
+        return NULL;
+    }
+    if (!PyArg_ParseTuple(args, "O&O&",
+                          index_converter, &indices,
+                          vector_converter, &order)) goto exit;
+    if (indices.shape[0] != n + 1) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "indices array inconsistent with tree");
+        goto exit;
+    }
+    if (order.shape[0] != n + 1) {
+        PyErr_Format(PyExc_ValueError,
+            "order array has incorrect size %zd (expected %d)",
+            order.shape[0], n + 1);
+        goto exit;
+    }
+    ok = sorttree(n, self->nodes, order.buf, indices.buf);
+exit:
+    index_converter(NULL, &indices);
+    vector_converter(NULL, &order);
+    if (ok == -1) return NULL;
+    if (ok == 0) return PyErr_NoMemory();
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+static PyMethodDef PyTree_methods[] = {
+    {"scale", (PyCFunction)PyTree_scale, METH_NOARGS, PyTree_scale__doc__},
+    {"cut", (PyCFunction)PyTree_cut, METH_VARARGS, PyTree_cut__doc__},
+    {"sort", (PyCFunction)PyTree_sort, METH_VARARGS, PyTree_sort__doc__},
+    {NULL}  /* Sentinel */
+};
+
+static char PyTree_doc[] =
+"Tree objects store a hierarchical clustering solution.\n"
+"Individual nodes in the tree can be accessed with tree[i], where i is\n"
+"an integer. Whereas the tree itself is a read-only object, tree[:]\n"
+"returns a list of all the nodes, which can then be modified. To create\n"
+"a new Tree from this list, use Tree(list).\n"
+"See the description of the Node class for more information.";
+
+static PyTypeObject PyTreeType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_cluster.Tree",             /* tp_name */
+    sizeof(PyTree),              /* tp_basicsize */
+    0,                           /* tp_itemsize */
+    (destructor)PyTree_dealloc,  /* tp_dealloc */
+    0,                           /* tp_print */
+    0,                           /* tp_getattr */
+    0,                           /* tp_setattr */
+    0,                           /* tp_compare */
+    0,                           /* tp_repr */
+    0,                           /* tp_as_number */
+    0,                           /* tp_as_sequence */
+    &PyTree_mapping,             /* tp_as_mapping */
+    0,                           /* tp_hash */
+    0,                           /* tp_call */
+    (reprfunc)PyTree_str,        /* tp_str */
+    0,                           /* tp_getattro */
+    0,                           /* tp_setattro */
+    0,                           /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,          /*tp_flags*/
+    PyTree_doc,                  /* tp_doc */
+    0,                           /* tp_traverse */
+    0,                           /* tp_clear */
+    0,                           /* tp_richcompare */
+    0,                           /* tp_weaklistoffset */
+    0,                           /* tp_iter */
+    0,                           /* tp_iternext */
+    PyTree_methods,              /* tp_methods */
+    NULL,                        /* tp_members */
+    0,                           /* tp_getset */
+    0,                           /* tp_base */
+    0,                           /* tp_dict */
+    0,                           /* tp_descr_get */
+    0,                           /* tp_descr_set */
+    0,                           /* tp_dictoffset */
+    0,                           /* tp_init */
+    0,                           /* tp_alloc */
+    (newfunc)PyTree_new,         /* tp_new */
+};
+
+/* ========================================================================= */
+/* -- Methods -------------------------------------------------------------- */
+/* ========================================================================= */
+
+/* version */
+static char version__doc__[] =
+"version() -> string\n"
+"\n"
+"Return the version number of the C Clustering Library as a string.\n";
+
+static PyObject*
+py_version(PyObject* self)
+{
+    return PyUnicode_FromString( CLUSTERVERSION );
+}
+
+/* kcluster */
+static char kcluster__doc__[] =
+"kcluster(data, nclusters, mask, weight, transpose, npass, method,\n"
+"         dist, clusterid) -> None\n"
+"\n"
+"This function implements k-means clustering.\n"
+"\n"
+"Arguments:\n"
+"\n"
+" - data: nrows x ncols array containing the data to be clustered\n"
+"\n"
+" - nclusters: number of clusters (the 'k' in k-means)\n"
+"\n"
+" - mask: nrows x ncols array of integers, showing which data are\n"
+"   missing. If mask[i,j] == 0, then data[i,j] is missing.\n"
+"\n"
+" - weight: the weights to be used when calculating distances\n"
+" - transpose:\n"
+"\n"
+"   - if equal to 0, rows are clustered;\n"
+"   - if equal to 1, columns are clustered.\n"
+"\n"
+" - npass: number of times the k-means clustering algorithm is\n"
+"   performed, each time with a different (random) initial\n"
+"   condition. If npass == 0, then the assignments in clusterid\n"
+"   are used as the initial condition.\n"
+"\n"
+" - method: specifies how the center of a cluster is found:\n"
+"\n"
+"   - method == 'a': arithmetic mean\n"
+"   - method == 'm': median\n"
+"\n"
+" - dist: specifies the distance function to be used:\n"
+"\n"
+"   - dist == 'e': Euclidean distance\n"
+"   - dist == 'b': City Block distance\n"
+"   - dist == 'c': Pearson correlation\n"
+"   - dist == 'a': absolute value of the correlation\n"
+"   - dist == 'u': uncentered correlation\n"
+"   - dist == 'x': absolute uncentered correlation\n"
+"   - dist == 's': Spearman's rank correlation\n"
+"   - dist == 'k': Kendall's tau\n"
+"\n"
+" - clusterid: array in which the final clustering solution will be\n"
+"   stored (output variable). If npass == 0, then clusterid is also used\n"
+"   as an input variable, containing the initial condition from which\n"
+"   the EM algorithm should start. In this case, the k-means algorithm\n"
+"   is fully deterministic.\n"
+"\n";
+
+static PyObject*
+py_kcluster(PyObject* self, PyObject* args, PyObject* keywords)
+{
+    int nclusters = 2;
+    int nrows, ncols;
+    int nitems;
+    int ndata;
+    Data data = {0};
+    Mask mask = {0};
+    Py_buffer weight = {0};
+    int transpose = 0;
+    int npass = 1;
+    char method = 'a';
+    char dist = 'e';
+    Py_buffer clusterid = {0};
+    double error;
+    int ifound = 0;
+
+    static char* kwlist[] = {"data",
+                             "nclusters",
+                             "mask",
+                             "weight",
+                             "transpose",
+                             "npass",
+                             "method",
+                             "dist",
+                             "clusterid",
+                              NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, keywords, "O&iO&O&iiO&O&O&", kwlist,
+                                     data_converter, &data,
+                                     &nclusters,
+                                     mask_converter, &mask,
+                                     vector_converter, &weight,
+                                     &transpose,
+                                     &npass,
+                                     method_kcluster_converter, &method,
+                                     distance_converter, &dist,
+                                     index_converter, &clusterid)) return NULL;
+    if (!data.values) {
+        PyErr_SetString(PyExc_RuntimeError, "data is None");
+        goto exit;
+    }
+    if (!mask.values) {
+        PyErr_SetString(PyExc_RuntimeError, "mask is None");
+        goto exit;
+    }
+    if (data.nrows != mask.view.shape[0] ||
+        data.ncols != mask.view.shape[1]) {
+        PyErr_Format(PyExc_ValueError,
+            "mask has incorrect dimensions %zd x %zd (expected %d x %d)",
+            mask.view.shape[0], mask.view.shape[1], data.nrows, data.ncols);
+        goto exit;
+    }
+    nrows = data.nrows;
+    ncols = data.ncols;
+    ndata = transpose ? nrows : ncols;
+    nitems = transpose ? ncols : nrows;
+    if (weight.shape[0] != ndata) {
+        PyErr_Format(PyExc_ValueError,
+                     "weight has incorrect size %zd (expected %d)",
+                     weight.shape[0], ndata);
+        goto exit;
+    }
+    if (nclusters < 1) {
+        PyErr_SetString(PyExc_ValueError, "nclusters should be positive");
+        goto exit;
+    }
+    if (nitems < nclusters) {
+        PyErr_SetString(PyExc_ValueError,
+                        "more clusters than items to be clustered");
+        goto exit;
+    }
+    if (npass < 0) {
+        PyErr_SetString(PyExc_RuntimeError, "expected a non-negative integer");
+        goto exit;
+    }
+    else if (npass == 0) {
+        int n = check_clusterid(clusterid, nitems);
+        if (n == 0) goto exit;
+        if (n != nclusters) {
+            PyErr_SetString(PyExc_ValueError,
+                            "more clusters requested than found in clusterid");
+            goto exit;
+        }
+    }
+    kcluster(nclusters,
+             nrows,
+             ncols,
+             data.values,
+             mask.values,
+             weight.buf,
+             transpose,
+             npass,
+             method,
+             dist,
+             clusterid.buf,
+             &error,
+             &ifound);
+exit:
+    data_converter(NULL, &data);
+    mask_converter(NULL, &mask);
+    vector_converter(NULL, &weight);
+    index_converter(NULL, &clusterid);
+    if (ifound) return Py_BuildValue("di", error, ifound);
+    return NULL;
+}
+/* end of wrapper for kcluster */
+
+/* kmedoids */
+static char kmedoids__doc__[] =
+"kmedoids(distance, nclusters, npass, clusterid) -> error, nfound\n"
+"\n"
+"This function implements k-medoids clustering.\n"
+"\n"
+"Arguments:\n"
+" - distance: The distance matrix between the elements. There are three\n"
+"   ways in which you can pass a distance matrix:\n"
+"\n"
+"   1. a 2D Numerical Python array (in which only the left-lower\n"
+"      part of the array will be accessed);\n"
+"   2. a 1D Numerical Python array containing the distances\n"
+"      consecutively;\n"
+"   3. a list of rows containing the lower-triangular part of\n"
+"      the distance matrix.\n"
+"\n"
+"   Examples are:\n"
+"\n"
+"       >>> from numpy import array\n"
+"       >>> distance = array([[0.0, 1.1, 2.3],\n"
+"       ...                   [1.1, 0.0, 4.5],\n"
+"       ...                   [2.3, 4.5, 0.0]])\n"
+"       >>> # (option #1)\n"
+"       >>> distance = array([1.1, 2.3, 4.5])\n"
+"       >>> # (option #2)\n"
+"       >>> distance = [array([]),\n"
+"       ...             array([1.1]),\n"
+"       ...             array([2.3, 4.5])]\n"
+"       >>> # (option #3)\n"
+"\n"
+"   These three correspond to the same distance matrix.\n"
+"\n"
+" - nclusters: number of clusters (the 'k' in k-medoids)\n"
+"\n"
+" - npass: number of times the k-medoids clustering algorithm is\n"
+"   performed, each time with a different (random) initial\n"
+"   condition. If npass == 0, then the assignments in clusterid\n"
+"   are used as the initial condition.\n"
+"\n"
+" - clusterid: array in which the final clustering solution will be\n"
+"   stored (output variable). If npass == 0, then clusterid is also used\n"
+"   as an input variable, containing the initial condition from which\n"
+"   the EM algorithm should start. In this case, the k-medoids algorithm\n"
+"   is fully deterministic.\n"
+"\n"
+"Return values:\n"
+" - error: the within-cluster sum of distances for the returned k-means\n"
+"   clustering solution;\n"
+" - nfound: the number of times this solution was found.\n";
+
+static PyObject*
+py_kmedoids(PyObject* self, PyObject* args, PyObject* keywords)
+{
+    int nclusters = 2;
+    Distancematrix distances = {0};
+    Py_buffer clusterid = {0};
+    int npass = 1;
+    double error;
+    int ifound = -2;
+
+    static char* kwlist[] = {"distance",
+                             "nclusters",
+                             "npass",
+                             "clusterid",
+                              NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, keywords, "O&iiO&", kwlist,
+                                     distancematrix_converter, &distances,
+                                     &nclusters,
+                                     &npass,
+                                     index_converter, &clusterid)) return NULL;
+    if (npass < 0) {
+        PyErr_SetString(PyExc_RuntimeError, "expected a non-negative integer");
+        goto exit;
+    }
+    else if (npass == 0) {
+        int n = check_clusterid(clusterid, distances.n);
+        if (n == 0) goto exit;
+        if (n != nclusters) {
+            PyErr_SetString(PyExc_RuntimeError,
+                            "more clusters requested than found in clusterid");
+            goto exit;
+        }
+    }
+    if (nclusters <= 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "nclusters should be a positive integer");
+        goto exit;
+    }
+    if (distances.n < nclusters) {
+        PyErr_SetString(PyExc_ValueError,
+                        "more clusters requested than items to be clustered");
+        goto exit;
+    }
+    kmedoids(nclusters,
+             distances.n,
+             distances.values,
+             npass,
+             clusterid.buf,
+             &error,
+             &ifound);
+
+exit:
+    distancematrix_converter(NULL, &distances);
+    index_converter(NULL, &clusterid);
+    switch (ifound) {
+        case -2:
+            return NULL;
+        case -1:
+            return PyErr_NoMemory();
+        case 0: /* should not occur */
+            PyErr_SetString(PyExc_RuntimeError,
+                        "error in kmedoids input arguments");
+            return NULL;
+        default:
+            return Py_BuildValue("di", error, ifound);
+    }
+}
+/* end of wrapper for kmedoids */
+
+/* treecluster */
+static char treecluster__doc__[] =
+"treecluster(tree, data, mask, weight, transpose, dist, method,\n"
+"            distancematrix) -> None\n"
+"\n"
+"This function implements the pairwise single, complete, centroid, and\n"
+"average linkage hierarchical clustering methods.\n"
+"\n"
+"Arguments:\n"
+" - tree: an empty Tree object; its nodes will be filled by treecluster\n"
+"   to describe the hierarchical clustering result. See the description\n"
+"   of the Tree class for more information.\n"
+"\n"
+" - data: nrows x ncols array containing the data to be clustered.\n"
+"   Either data or distancematrix (see below) should be None.\n"
+"\n"
+" - mask: nrows x ncols array of integers, showing which data are\n"
+"   missing. If mask[i,j]==0, then data[i,j] is missing.\n"
+"\n"
+" - weight: the weights to be used when calculating distances.\n"
+"\n"
+" - transpose:\n"
+"\n"
+"   - if equal to 0, rows are clustered;\n"
+"   - if equal to 1, columns are clustered.\n"
+"\n"
+" - dist: specifies the distance function to be used:\n"
+"\n"
+"   - dist == 'e': Euclidean distance\n"
+"   - dist == 'b': City Block distance\n"
+"   - dist == 'c': Pearson correlation\n"
+"   - dist == 'a': absolute value of the correlation\n"
+"   - dist == 'u': uncentered correlation\n"
+"   - dist == 'x': absolute uncentered correlation\n"
+"   - dist == 's': Spearman's rank correlation\n"
+"   - dist == 'k': Kendall's tau\n"
+"\n"
+" - method: specifies which linkage method is used:\n"
+"\n"
+"   - method == 's': Single pairwise linkage\n"
+"   - method == 'm': Complete (maximum) pairwise linkage (default)\n"
+"   - method == 'c': Centroid linkage\n"
+"   - method == 'a': Average pairwise linkage\n"
+"\n"
+" - distancematrix:  The distance matrix between the elements.\n"
+"   Either data (see above) or distancematrix should be None.\n"
+"   There are three ways in which you can pass a distance matrix:\n"
+"\n"
+"   1. a 2D Numerical Python array (in which only the left-lower\n"
+"      part of the array will be accessed);\n"
+"   2. a 1D Numerical Python array containing the distances\n"
+"      consecutively;\n"
+"   3. a list of rows containing the lower-triangular part of\n"
+"      the distance matrix.\n"
+"\n"
+"   Examples are:\n"
+"\n"
+"       >>> from numpy import array\n"
+"       >>> distance = array([[0.0, 1.1, 2.3],\n"
+"       ...                   [1.1, 0.0, 4.5],\n"
+"       ...                   [2.3, 4.5, 0.0]])\n"
+"       >>> # option 1.\n"
+"       >>> distance = array([1.1, 2.3, 4.5])\n"
+"       >>> # option 2.\n"
+"       >>> distance = [array([]),\n"
+"       ...             array([1.1]),\n"
+"       ...             array([2.3, 4.5])]\n"
+"       >>> # option 3.\n"
+"\n"
+"   These three correspond to the same distance matrix.\n"
+"\n"
+"   PLEASE NOTE:\n"
+"   As the treecluster routine may shuffle the values in the\n"
+"   distance matrix as part of the clustering algorithm, be sure\n"
+"   to save this array in a different variable before calling\n"
+"   treecluster if you need it later.\n"
+"\n"
+"Either data or distancematrix should be None. If distancematrix is None,\n"
+"the hierarchical clustering solution is calculated from the values in\n"
+"the argument data. Instead if data is None, the hierarchical clustering\n"
+"solution is calculated from the distance matrix.\n"
+"Pairwise centroid-linkage clustering can be calculated only from the data\n"
+"and not from the distance matrix.\n"
+"Pairwise single-, maximum-, and average-linkage clustering can be\n"
+"calculated from either the data or from the distance matrix.\n";
+
+static PyObject*
+py_treecluster(PyObject* self, PyObject* args, PyObject* keywords)
+{
+    Data data = {0};
+    Mask mask = {0};
+    Py_buffer weight = {0};
+    int transpose = 0;
+    char dist = 'e';
+    char method = 'm';
+    Distancematrix distances = {0};
+    PyTree* tree = NULL;
+    Node* nodes;
+    int nitems;
+
+    static char* kwlist[] = {"tree",
+                             "data",
+                             "mask",
+                             "weight",
+                             "transpose",
+                             "method",
+                             "dist",
+                             "distancematrix",
+                              NULL };
+
+    if (!PyArg_ParseTupleAndKeywords(args, keywords, "O!O&O&O&iO&O&O&", kwlist,
+                                     &PyTreeType, &tree,
+                                     data_converter, &data,
+                                     mask_converter, &mask,
+                                     vector_none_converter, &weight,
+                                     &transpose,
+                                     method_treecluster_converter, &method,
+                                     distance_converter, &dist,
+                                     distancematrix_converter, &distances))
+        return NULL;
+
+    if (tree->n != 0) {
+        PyErr_SetString(PyExc_RuntimeError, "expected an empty tree");
+        goto exit;
+    }
+    if (data.values != NULL && distances.values != NULL) {
+        PyErr_SetString(PyExc_ValueError,
+            "use either data or distancematrix, do not use both");
+        goto exit;
+    }
+    if (data.values == NULL && distances.values == NULL) {
+        PyErr_SetString(PyExc_ValueError,
+                        "neither data nor distancematrix was given");
+        goto exit;
+    }
+
+    if (data.values) /* use the values in data, not the distance matrix */ {
+        int nrows;
+        int ncols;
+        int ndata;
+
+        if (!mask.values) {
+            PyErr_SetString(PyExc_RuntimeError, "mask is None");
+            goto exit;
+        }
+        if (!weight.buf) {
+            PyErr_SetString(PyExc_RuntimeError, "weight is None");
+            goto exit;
+        }
+        nrows = data.nrows;
+        ncols = data.ncols;
+        if (nrows != mask.view.shape[0] || ncols != mask.view.shape[1]) {
+            PyErr_Format(PyExc_ValueError,
+                "mask has incorrect dimensions (%zd x %zd, expected %d x %d)",
+                mask.view.shape[0], mask.view.shape[1],
+                data.nrows, data.ncols);
+            goto exit;
+        }
+        ndata = transpose ? nrows : ncols;
+        nitems = transpose ? ncols : nrows;
+        if (weight.shape[0] != ndata) {
+            PyErr_Format(PyExc_RuntimeError,
+                         "weight has incorrect size %zd (expected %d)",
+                         weight.shape[0], ndata);
+            goto exit;
+        }
+
+        nodes = treecluster(nrows,
+                            ncols,
+                            data.values,
+                            mask.values,
+                            weight.buf,
+                            transpose,
+                            dist,
+                            method,
+                            NULL);
+    }
+    else { /* use the distance matrix instead of the values in data */
+        if (!strchr("sma", method)) {
+            PyErr_SetString(PyExc_ValueError,
+                            "argument method should be 's', 'm', or 'a' "
+                            "when specifying the distance matrix");
+            goto exit;
+        }
+        nitems = distances.n;
+        nodes = treecluster(nitems,
+                            nitems,
+                            0,
+                            0,
+                            0,
+                            transpose,
+                            dist,
+                            method,
+                            distances.values);
+    }
+
+    if (!nodes) {
+        PyErr_NoMemory();
+        goto exit;
+    }
+    tree->nodes = nodes;
+    tree->n = nitems-1;
+
+exit:
+    data_converter(NULL, &data);
+    mask_converter(NULL, &mask);
+    vector_none_converter(NULL, &weight);
+    distancematrix_converter(NULL, &distances);
+    if (tree == NULL || tree->n == 0) return NULL;
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+/* end of wrapper for treecluster */
+
+/* somcluster */
+static char somcluster__doc__[] =
+"somcluster(clusterid, celldata, data, mask, weight, transpose,\n"
+"           inittau, niter, dist) -> None\n"
+"\n"
+"This function implements a self-organizing map on a rectangular grid.\n"
+"\n"
+"Arguments:\n"
+" - clusterid: array with two columns, with the number of rows equal\n"
+"   to the number of items being clustered. Upon return, each row\n"
+"   in the array contains the x and y coordinates of the cell in the\n"
+"   the rectangular SOM grid to which the item was assigned.\n"
+"\n"
+" - celldata: array with dimensions nxgrid x nygrid x number of columns\n"
+"   if rows are being clustered, or nxgrid x nygrid x number of rows\n"
+"   if columns are being clustered, where nxgrid is the horizontal\n"
+"   dimension of the rectangular SOM map and nygrid is the vertical\n"
+"   dimension of the rectangular SOM map.\n"
+"   Upon return, each element [ix, iy] of this array contains the\n"
+"   data for the centroid of the cluster in the SOM grid cell with\n"
+"   coordinates [ix, iy].\n"
+"\n"
+" - data: nrows x ncols array containing the data to be clustered.\n"
+"\n"
+" - mask: nrows x ncols array of integers, showing which data are\n"
+"   missing. If mask[i,j] == 0, then data[i,j] is missing.\n"
+"\n"
+" - weight: the weights to be used when calculating distances\n"
+"\n"
+" - transpose:\n"
+"\n"
+"   - if equal to 0, rows are clustered;\n"
+"   - if equal to 1, columns are clustered.\n"
+"\n"
+" - inittau: the initial value of tau (the neighborbood function)\n"
+"\n"
+" - niter: the number of iterations\n"
+"\n"
+" - dist: specifies the distance function to be used:\n"
+"\n"
+"   - dist == 'e': Euclidean distance\n"
+"   - dist == 'b': City Block distance\n"
+"   - dist == 'c': Pearson correlation\n"
+"   - dist == 'a': absolute value of the correlation\n"
+"   - dist == 'u': uncentered correlation\n"
+"   - dist == 'x': absolute uncentered correlation\n"
+"   - dist == 's': Spearman's rank correlation\n"
+"   - dist == 'k': Kendall's tau\n";
+
+static PyObject*
+py_somcluster(PyObject* self, PyObject* args, PyObject* keywords)
+{
+    int nrows;
+    int ncols;
+    int ndata;
+    Data data = {0};
+    Mask mask = {0};
+    Py_buffer weight = {0};
+    int transpose = 0;
+    double inittau = 0.02;
+    int niter = 1;
+    char dist = 'e';
+    Py_buffer indices = {0};
+    Celldata celldata = {0};
+    PyObject* result = NULL;
+
+    static char* kwlist[] = {"clusterids",
+                             "celldata",
+                             "data",
+                             "mask",
+                             "weight",
+                             "transpose",
+                             "inittau",
+                             "niter",
+                             "dist",
+                             NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, keywords, "O&O&O&O&O&idiO&", kwlist,
+                                     index2d_converter, &indices,
+                                     celldata_converter, &celldata,
+                                     data_converter, &data,
+                                     mask_converter, &mask,
+                                     vector_converter, &weight,
+                                     &transpose,
+                                     &inittau,
+                                     &niter,
+                                     distance_converter, &dist)) return NULL;
+    if (niter < 1) {
+        PyErr_SetString(PyExc_ValueError,
+                      "number of iterations (niter) should be positive");
+        goto exit;
+    }
+    if (!data.values) {
+        PyErr_SetString(PyExc_RuntimeError, "data is None");
+        goto exit;
+    }
+    if (!mask.values) {
+        PyErr_SetString(PyExc_RuntimeError, "mask is None");
+        goto exit;
+    }
+    nrows = data.nrows;
+    ncols = data.ncols;
+    if (nrows != mask.view.shape[0] || ncols != mask.view.shape[1]) {
+        PyErr_Format(PyExc_ValueError,
+            "mask has incorrect dimensions (%zd x %zd, expected %d x %d)",
+            mask.view.shape[0], mask.view.shape[1], data.nrows, data.ncols);
+        goto exit;
+    }
+    ndata = transpose ? nrows : ncols;
+    if (weight.shape[0] != ndata) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "weight has incorrect size %zd (expected %d)",
+                     weight.shape[0], ndata);
+        goto exit;
+    }
+    if (celldata.nz != ndata) {
+        PyErr_Format(PyExc_RuntimeError,
+                    "the celldata array size is not consistent with the data "
+                    "(last dimension is %d; expected %d)", celldata.nz, ndata);
+        goto exit;
+    }
+    somcluster(nrows,
+               ncols,
+               data.values,
+               mask.values,
+               weight.buf,
+               transpose,
+               celldata.nx,
+               celldata.ny,
+               inittau,
+               niter,
+               dist,
+               celldata.values,
+               indices.buf);
+    Py_INCREF(Py_None);
+    result = Py_None;
+
+exit:
+    data_converter(NULL, &data);
+    vector_converter(NULL, &weight);
+    index2d_converter(NULL, &indices);
+    celldata_converter(NULL, &celldata);
+    return result;
+}
+/* end of wrapper for somcluster */
+
+/* clusterdistance */
+static char clusterdistance__doc__[] =
+"clusterdistance(data, mask, weight, index1, index2, dist, method,\n"
+"                transpose) -> distance between two clusters\n"
+"\n"
+"Arguments:\n"
+"\n"
+" - data: nrows x ncols array containing the data values.\n"
+"\n"
+" - mask: nrows x ncols array of integers, showing which data are\n"
+"   missing. If mask[i,j] == 0, then data[i,j] is missing.\n"
+"\n"
+" - weight: the weights to be used when calculating distances\n"
+"\n"
+" - index1: 1D array identifying which items belong to the first\n"
+"   cluster.\n"
+"\n"
+" - index2: 1D array identifying which items belong to the second\n"
+"   cluster.\n"
+"\n"
+" - dist: specifies the distance function to be used:\n"
+"\n"
+"   - dist == 'e': Euclidean distance\n"
+"   - dist == 'b': City Block distance\n"
+"   - dist == 'c': Pearson correlation\n"
+"   - dist == 'a': absolute value of the correlation\n"
+"   - dist == 'u': uncentered correlation\n"
+"   - dist == 'x': absolute uncentered correlation\n"
+"   - dist == 's': Spearman's rank correlation\n"
+"   - dist == 'k': Kendall's tau\n"
+"\n"
+" - method: specifies how the distance between two clusters is defined:\n"
+"\n"
+"   - method == 'a': the distance between the arithmetic means of the\n"
+"     two clusters\n"
+"   - method == 'm': the distance between the medians of the two\n"
+"     clusters\n"
+"   - method == 's': the smallest pairwise distance between members\n"
+"     of the two clusters\n"
+"   - method == 'x': the largest pairwise distance between members of\n"
+"     the two clusters\n"
+"   - method == 'v': average of the pairwise distances between\n"
+"     members of the clusters\n"
+"\n"
+" - transpose:\n"
+"\n"
+"   - if equal to 0: clusters of rows are considered;\n"
+"   - if equal to 1: clusters of columns are considered.\n"
+"\n";
+
+static PyObject*
+py_clusterdistance(PyObject* self, PyObject* args, PyObject* keywords)
+{
+    double distance;
+    int nrows;
+    int ncols;
+    int ndata;
+    Data data = {0};
+    Mask mask = {0};
+    Py_buffer weight = {0};
+    char dist = 'e';
+    char method = 'a';
+    int transpose = 0;
+    Py_buffer index1 = {0};
+    Py_buffer index2 = {0};
+    PyObject* result = NULL;
+
+    static char* kwlist[] = {"data",
+                             "mask",
+                             "weight",
+                             "index1",
+                             "index2",
+                             "method",
+                             "dist",
+                             "transpose",
+                              NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, keywords, "O&O&O&O&O&O&O&i", kwlist,
+                                     data_converter, &data,
+                                     mask_converter, &mask,
+                                     vector_converter, &weight,
+                                     index_converter, &index1,
+                                     index_converter, &index2,
+                                     method_clusterdistance_converter, &method,
+                                     distance_converter, &dist,
+                                     &transpose)) return NULL;
+    if (!data.values) {
+        PyErr_SetString(PyExc_RuntimeError, "data is None");
+        goto exit;
+    }
+    if (!mask.values) {
+        PyErr_SetString(PyExc_RuntimeError, "mask is None");
+        goto exit;
+    }
+    nrows = data.nrows;
+    ncols = data.ncols;
+    ndata = transpose ? nrows : ncols;
+    if (nrows != mask.view.shape[0] || ncols != mask.view.shape[1]) {
+        PyErr_Format(PyExc_ValueError,
+            "mask has incorrect dimensions (%zd x %zd, expected %d x %d)",
+            mask.view.shape[0], mask.view.shape[1], data.nrows, data.ncols);
+        goto exit;
+    }
+    if (weight.shape[0] != ndata) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "weight has incorrect size %zd (expected %d)",
+                     weight.shape[0], ndata);
+        goto exit;
+    }
+
+    distance = clusterdistance(nrows,
+                               ncols,
+                               data.values,
+                               mask.values,
+                               weight.buf,
+                               (int) index1.shape[0],
+                               (int) index2.shape[0],
+                               index1.buf,
+                               index2.buf,
+                               dist,
+                               method,
+                               transpose);
+
+    if (distance < -0.5) /* Actually -1.0; avoiding roundoff errors */
+        PyErr_SetString(PyExc_IndexError, "index out of range");
+    else
+        result = PyFloat_FromDouble(distance);
+exit:
+    data_converter(NULL, &data);
+    mask_converter(NULL, &mask);
+    vector_converter(NULL, &weight);
+    index_converter(NULL, &index1);
+    index_converter(NULL, &index2);
+    return result;
+}
+/* end of wrapper for clusterdistance */
+
+/* clustercentroids */
+static char clustercentroids__doc__[] =
+"clustercentroids(data, mask, clusterid, method, transpose) -> cdata, cmask\n"
+"\n"
+"The clustercentroids routine calculates the cluster centroids, given to\n"
+"which cluster each element belongs. The centroid is defined as either\n"
+"the mean or the median over all elements for each dimension.\n"
+"\n"
+"Arguments:\n"
+" - data: nrows x ncols array containing the data values.\n"
+"\n"
+" - mask: nrows x ncols array of integers, showing which data are\n"
+"   missing. If mask[i,j] == 0, then data[i,j] is missing.\n"
+"\n"
+" - clusterid: array containing the cluster number for each item.\n"
+"   The cluster number should be non-negative.\n"
+"\n"
+" - method: specifies whether the centroid is calculated from the\n"
+"   arithmetic mean (method == 'a', default) or the median\n"
+"   (method == 'm') over each dimension.\n"
+"\n"
+" - transpose: if equal to 0, row clusters are considered;\n"
+"   if equal to 1, column clusters are considered.\n"
+"\n"
+" - cdata: 2D array containing, upon return, the cluster centroids.\n"
+"   If transpose == 0, then the dimensions of cdata should be\n"
+"   nclusters x ncols.\n"
+"   If transpose == 1, then the dimensions of cdata should be \n"
+"   nrows x nclusters.\n"
+"\n"
+" - cmask: 2D array of integers describing, upon return,  which elements\n"
+"   in cdata, if any, are missing.\n";
+
+static PyObject*
+py_clustercentroids(PyObject* self, PyObject* args, PyObject* keywords)
+{
+    int nrows;
+    int ncols;
+    int nclusters;
+    Data data = {0};
+    Mask mask = {0};
+    Data cdata = {0};
+    Mask cmask = {0};
+    Py_buffer clusterid = {0};
+    char method = 'a';
+    int transpose = 0;
+    int ok = -1;
+
+    static char* kwlist[] = {"data",
+                             "mask",
+                             "clusterid",
+                             "method",
+                             "transpose",
+                             "cdata",
+                             "cmask",
+                              NULL };
+
+    if (!PyArg_ParseTupleAndKeywords(args, keywords, "O&O&O&O&iO&O&", kwlist,
+                                     data_converter, &data,
+                                     mask_converter, &mask,
+                                     index_converter, &clusterid,
+                                     method_kcluster_converter, &method,
+                                     &transpose,
+                                     data_converter, &cdata,
+                                     mask_converter, &cmask)) return NULL;
+    if (!data.values) {
+        PyErr_SetString(PyExc_RuntimeError, "data is None");
+        goto exit;
+    }
+    if (!mask.values) {
+        PyErr_SetString(PyExc_RuntimeError, "mask is None");
+        goto exit;
+    }
+    nrows = data.nrows;
+    ncols = data.ncols;
+    if (nrows != mask.view.shape[0] || ncols != mask.view.shape[1]) {
+        PyErr_Format(PyExc_ValueError,
+            "mask has incorrect dimensions (%zd x %zd, expected %d x %d)",
+            mask.view.shape[0], mask.view.shape[1], data.nrows, data.ncols);
+        goto exit;
+    }
+    if (transpose == 0) {
+        nclusters = check_clusterid(clusterid, nrows);
+        nrows = nclusters;
+    }
+    else {
+        nclusters = check_clusterid(clusterid, ncols);
+        ncols = nclusters;
+    }
+    if (nclusters == 0) goto exit;
+    if (cdata.nrows != nrows) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "cdata has incorrect number of rows (%d, expected %d)",
+                     cdata.nrows, nrows);
+        goto exit;
+    }
+    if (cdata.ncols != ncols) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "cdata has incorrect number of columns (%d, expected %d)",
+                     cdata.ncols, ncols);
+        goto exit;
+    }
+    if (cmask.view.shape[0] != nrows) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "cmask has incorrect number of rows (%zd, expected %d)",
+                     cmask.view.shape[0], nrows);
+        goto exit;
+    }
+    if (cmask.view.shape[1] != ncols) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "cmask has incorrect number of columns "
+                     "(%zd, expected %d)", cmask.view.shape[1], ncols);
+        goto exit;
+    }
+    ok = getclustercentroids(nclusters,
+                             data.nrows,
+                             data.ncols,
+                             data.values,
+                             mask.values,
+                             clusterid.buf,
+                             cdata.values,
+                             cmask.values,
+                             transpose,
+                             method);
+exit:
+    data_converter(NULL, &data);
+    mask_converter(NULL, &mask);
+    data_converter(NULL, &cdata);
+    mask_converter(NULL, &cmask);
+    index_converter(NULL, &clusterid);
+    if (ok == -1) return NULL;
+    if (ok == 0) return PyErr_NoMemory();
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+/* end of wrapper for clustercentroids */
+
+/* distancematrix */
+static char distancematrix__doc__[] =
+"distancematrix(data, mask, weight, transpose, dist, distancematrix)\n"
+"              -> None\n"
+"\n"
+"This function calculuates the distance matrix between the data values.\n"
+"\n"
+"Arguments:\n"
+"\n"
+" - data: nrows x ncols array containing the data values.\n"
+"\n"
+" - mask: nrows x ncols array of integers, showing which data are\n"
+"   missing. If mask[i,j] == 0, then data[i,j] is missing.\n"
+"\n"
+" - weight: the weights to be used when calculating distances.\n"
+"\n"
+" - transpose: if equal to 0: the distances between rows are\n"
+"   calculated;\n"
+"   if equal to 1, the distances between columns are calculated.\n"
+"\n"
+" - dist: specifies the distance function to be used:\n"
+"\n"
+"   - dist == 'e': Euclidean distance\n"
+"   - dist == 'b': City Block distance\n"
+"   - dist == 'c': Pearson correlation\n"
+"   - dist == 'a': absolute value of the correlation\n"
+"   - dist == 'u': uncentered correlation\n"
+"   - dist == 'x': absolute uncentered correlation\n"
+"   - dist == 's': Spearman's rank correlation\n"
+"   - dist == 'k': Kendall's tau\n"
+"\n"
+" - distancematrix: Upon return, the distance matrix as a list of 1D\n"
+"   arrays. The number of columns in each row is equal to the row number\n"
+"   (i.e., len(distancematrix[i]) == i).\n"
+"   An example of the return value is:\n"
+"\n"
+"    matrix = [[],\n"
+"              array([1.]),\n"
+"              array([7., 3.]),\n"
+"              array([4., 2., 6.])]\n"
+"\n"
+"This corresponds to the distance matrix:\n"
+"\n"
+"    [0.\t1.\t7.\t4.]\n"
+"    [1.\t0.\t3.\t2.]\n"
+"    [7.\t3.\t0.\t6.]\n"
+"    [4.\t2.\t6.\t0.]\n";
+
+static PyObject*
+py_distancematrix(PyObject* self, PyObject* args, PyObject* keywords)
+{
+    PyObject* list;
+    Distancematrix distances = {0};
+    Data data = {0};
+    Mask mask = {0};
+    Py_buffer weight = {0};
+    int transpose = 0;
+    char dist = 'e';
+    int nrows, ncols, ndata;
+    PyObject* result = NULL;
+
+    /* -- Read the input variables --------------------------------------- */
+    static char* kwlist[] = {"data",
+                             "mask",
+                             "weight",
+                             "transpose",
+                             "dist",
+                             "distancematrix",
+                              NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, keywords, "O&O&O&iO&O!", kwlist,
+                                     data_converter, &data,
+                                     mask_converter, &mask,
+                                     vector_converter, &weight,
+                                     &transpose,
+                                     distance_converter, &dist,
+                                     &PyList_Type, &list)) return NULL;
+    if (!data.values) {
+        PyErr_SetString(PyExc_RuntimeError, "data is None");
+        goto exit;
+    }
+    if (!mask.values) {
+        PyErr_SetString(PyExc_RuntimeError, "mask is None");
+        goto exit;
+    }
+    nrows = data.nrows;
+    ncols = data.ncols;
+    if (nrows != mask.view.shape[0] || ncols != mask.view.shape[1]) {
+        PyErr_Format(PyExc_ValueError,
+            "mask has incorrect dimensions (%zd x %zd, expected %d x %d)",
+            mask.view.shape[0], mask.view.shape[1], data.nrows, data.ncols);
+        goto exit;
+    }
+    ndata = (transpose == 0) ? ncols : nrows;
+    if (weight.shape[0] != ndata) {
+        PyErr_Format(PyExc_ValueError,
+                     "weight has incorrect size %zd (expected %d)",
+                     weight.shape[0], ndata);
+        goto exit;
+    }
+    if (_convert_list_to_distancematrix(list, &distances) == 0) goto exit;
+
+    distancematrix(nrows,
+                   ncols,
+                   data.values,
+                   mask.values,
+                   weight.buf,
+                   dist,
+                   transpose,
+                   distances.values);
+
+    Py_INCREF(Py_None);
+    result = Py_None;
+exit:
+    data_converter(NULL, &data);
+    mask_converter(NULL, &mask);
+    vector_converter(NULL, &weight);
+    distancematrix_converter(NULL, &distances);
+    return result;
+}
+/* end of wrapper for distancematrix */
+
+/* pca */
+static char pca__doc__[] =
+"pca(data, columnmean, coordinates, pc, eigenvalues) -> None\n"
+"\n"
+"This function calculates the principal component decomposition\n"
+"of the values in data.\n"
+"\n"
+"Arguments:\n"
+"\n"
+" - data: nrows x ncols array containing the data values.\n"
+"\n"
+" - columnmean: array of size nrows) in which the mean of each column\n"
+"               will be sorted.\n"
+"\n"
+" - coordinates: nrows x nmin array in which the coordinates of the\n"
+"                data along the principal components will be stored;\n"
+"                nmin is min(nrows, ncols).\n"
+"\n"
+" - pc : the principal components as an nmin x ncols array, where nmin\n"
+"        is min(nrows, ncols).\n"
+"\n"
+" - eigenvalues: array of size min(nrows, ncols), in which the\n"
+"                eigenvalues will be stored, sorted by the magnitude\n"
+"                of the eigenvalues, with the largest eigenvalues\n"
+"                appearing first.\n"
+"\n"
+"Adding the column means to the dot product of the coordinates and the\n"
+"principal components, i.e.\n"
+"\n"
+"   columnmean + dot(coordinates, pc)\n"
+"\n"
+"recreates the data matrix.\n";
+
+static PyObject*
+py_pca(PyObject* self, PyObject* args)
+{
+    Py_buffer eigenvalues = {0};
+    double** u;
+    double** v;
+    Data data = {0};
+    Data pc = {0};
+    Data coordinates = {0};
+    Py_buffer mean = {0};
+    int nrows, ncols;
+    int nmin;
+    int error = -2;
+    double* p;
+    double** values;
+    int i, j;
+
+    if (!PyArg_ParseTuple(args, "O&O&O&O&O&",
+                          data_converter, &data,
+                          vector_converter, &mean,
+                          data_converter, &coordinates,
+                          data_converter, &pc,
+                          vector_converter, &eigenvalues)) return NULL;
+
+    values = data.values;
+    if (!values) {
+        PyErr_SetString(PyExc_RuntimeError, "data is None");
+        goto exit;
+    }
+    nrows = data.nrows;
+    ncols = data.ncols;
+    if (mean.shape[0] != ncols) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "columnmean has inconsistent size %zd (expected %d)",
+                     mean.shape[0], ncols);
+        goto exit;
+    }
+    nmin = nrows < ncols ? nrows : ncols;
+    if (pc.nrows != nmin || pc.ncols != ncols) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "pc has inconsistent size %zd x %zd (expected %d x %d)",
+                     mean.shape[0], mean.shape[1], nmin, ncols);
+        goto exit;
+    }
+    if (coordinates.nrows != nrows || coordinates.ncols != nmin) {
+        PyErr_Format(PyExc_RuntimeError,
+            "coordinates has inconsistent size %zd x %zd (expected %d x %d)",
+            mean.shape[0], mean.shape[1], nrows, nmin);
+        goto exit;
+    }
+    if (nrows >= ncols) {
+        u = coordinates.values;
+        v = pc.values;
+    }
+    else { /* nrows < ncolums */
+        u = pc.values;
+        v = coordinates.values;
+    }
+    /* -- Calculate the mean of each column ------------------------------ */
+    p = mean.buf;
+    for (j = 0; j < ncols; j++) {
+        p[j] = 0.0;
+        for (i = 0; i < nrows; i++) p[j] += values[i][j];
+        p[j] /= nrows;
+    }
+    /* --   Subtract the mean of each column ----------------------------- */
+    for (i = 0; i < nrows; i++)
+        for (j = 0; j < ncols; j++)
+            u[i][j] = values[i][j] - p[j];
+    /* -- Perform the principal component analysis ----------------------- */
+    error = pca(nrows, ncols, u, v, eigenvalues.buf);
+    /* ------------------------------------------------------------------- */
+exit:
+    data_converter(NULL, &data);
+    vector_converter(NULL, &mean);
+    data_converter(NULL, &pc);
+    data_converter(NULL, &coordinates);
+    vector_converter(NULL, &eigenvalues);
+    if (error == 0) {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+    if (error == -1) return PyErr_NoMemory();
+    else if (error > 0)
+        PyErr_SetString(PyExc_RuntimeError,
+            "Singular value decomposition failed to converge");
+    return NULL;
+}
+/* end of wrapper for pca */
+
+/* ========================================================================= */
+/* -- The methods table ---------------------------------------------------- */
+/* ========================================================================= */
+
+
+static struct PyMethodDef cluster_methods[] = {
+    {"version", (PyCFunction) py_version, METH_NOARGS, version__doc__},
+    {"kcluster",
+     (PyCFunction) py_kcluster,
+     METH_VARARGS | METH_KEYWORDS,
+     kcluster__doc__
+    },
+    {"kmedoids",
+     (PyCFunction) py_kmedoids,
+     METH_VARARGS | METH_KEYWORDS,
+     kmedoids__doc__
+    },
+    {"treecluster",
+     (PyCFunction) py_treecluster,
+     METH_VARARGS | METH_KEYWORDS,
+     treecluster__doc__
+    },
+    {"somcluster",
+     (PyCFunction) py_somcluster,
+     METH_VARARGS | METH_KEYWORDS,
+     somcluster__doc__
+    },
+    {"clusterdistance",
+     (PyCFunction) py_clusterdistance,
+     METH_VARARGS | METH_KEYWORDS,
+     clusterdistance__doc__
+    },
+    {"clustercentroids",
+     (PyCFunction) py_clustercentroids,
+     METH_VARARGS | METH_KEYWORDS,
+     clustercentroids__doc__
+    },
+    {"distancematrix",
+     (PyCFunction) py_distancematrix,
+     METH_VARARGS | METH_KEYWORDS,
+     distancematrix__doc__
+    },
+    {"pca",
+     (PyCFunction) py_pca,
+     METH_VARARGS | METH_KEYWORDS,
+     pca__doc__
+    },
+    {NULL, NULL, 0, NULL} /* sentinel */
+};
+
+/* ========================================================================= */
+/* -- Initialization ------------------------------------------------------- */
+/* ========================================================================= */
+
+static struct PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "_cluster",
+    "C Clustering Library",
+    -1,
+    cluster_methods,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+
+PyObject *
+PyInit__cluster(void)
+{
+    PyObject *module;
+
+    PyNodeType.tp_new = PyType_GenericNew;
+    if (PyType_Ready(&PyNodeType) < 0)
+        return NULL;
+    if (PyType_Ready(&PyTreeType) < 0)
+        return NULL;
+
+    module = PyModule_Create(&moduledef);
+    if (module == NULL) return NULL;
+
+    Py_INCREF(&PyTreeType);
+    if (PyModule_AddObject(module, "Tree", (PyObject*) &PyTreeType) < 0) {
+        Py_DECREF(module);
+        Py_DECREF(&PyTreeType);
+        return NULL;
+    }
+
+    Py_INCREF(&PyNodeType);
+    if (PyModule_AddObject(module, "Node", (PyObject*) &PyNodeType) < 0) {
+        Py_DECREF(module);
+        Py_DECREF(&PyNodeType);
+        return NULL;
+    }
+
+    return module;
+}
diff --git a/code/lib/Bio/Compass/__init__.py b/code/lib/Bio/Compass/__init__.py
new file mode 100644
index 0000000..3d5e37a
--- /dev/null
+++ b/code/lib/Bio/Compass/__init__.py
@@ -0,0 +1,223 @@
+# Copyright 2004 by James Casbon.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Code to deal with COMPASS output, a program for profile/profile comparison.
+
+Compass is described in:
+
+Sadreyev R, Grishin N. COMPASS: a tool for comparison of multiple protein
+alignments with assessment of statistical significance. J Mol Biol. 2003 Feb
+7;326(1):317-36.
+
+Tested with COMPASS 1.24.
+"""
+
+import re
+
+
+def read(handle):
+    """Read a COMPASS file containing one COMPASS record."""
+    record = None
+    try:
+        line = next(handle)
+        record = Record()
+        __read_names(record, line)
+        line = next(handle)
+        __read_threshold(record, line)
+        line = next(handle)
+        __read_lengths(record, line)
+        line = next(handle)
+        __read_profilewidth(record, line)
+        line = next(handle)
+        __read_scores(record, line)
+    except StopIteration:
+        if not record:
+            raise ValueError("No record found in handle") from None
+        else:
+            raise ValueError("Unexpected end of stream.") from None
+    for line in handle:
+        if not line.strip():  # skip empty lines
+            continue
+        __read_query_alignment(record, line)
+        try:
+            line = next(handle)
+            __read_positive_alignment(record, line)
+            line = next(handle)
+            __read_hit_alignment(record, line)
+        except StopIteration:
+            raise ValueError("Unexpected end of stream.") from None
+    return record
+
+
+def parse(handle):
+    """Iterate over records in a COMPASS file."""
+    record = None
+    try:
+        line = next(handle)
+    except StopIteration:
+        return
+    while True:
+        try:
+            record = Record()
+            __read_names(record, line)
+            line = next(handle)
+            __read_threshold(record, line)
+            line = next(handle)
+            __read_lengths(record, line)
+            line = next(handle)
+            __read_profilewidth(record, line)
+            line = next(handle)
+            __read_scores(record, line)
+        except StopIteration:
+            raise ValueError("Unexpected end of stream.") from None
+        for line in handle:
+            if not line.strip():
+                continue
+            if "Ali1:" in line:
+                yield record
+                break
+            __read_query_alignment(record, line)
+            try:
+                line = next(handle)
+                __read_positive_alignment(record, line)
+                line = next(handle)
+                __read_hit_alignment(record, line)
+            except StopIteration:
+                raise ValueError("Unexpected end of stream.") from None
+        else:
+            yield record
+            break
+
+
+class Record:
+    """Hold information from one compass hit.
+
+    Ali1 is the query, Ali2 the hit.
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.query = ""
+        self.hit = ""
+        self.gap_threshold = 0
+        self.query_length = 0
+        self.query_filtered_length = 0
+        self.query_nseqs = 0
+        self.query_neffseqs = 0
+        self.hit_length = 0
+        self.hit_filtered_length = 0
+        self.hit_nseqs = 0
+        self.hit_neffseqs = 0
+        self.sw_score = 0
+        self.evalue = -1
+        self.query_start = -1
+        self.hit_start = -1
+        self.query_aln = ""
+        self.hit_aln = ""
+        self.positives = ""
+
+    def query_coverage(self):
+        """Return the length of the query covered in the alignment."""
+        s = self.query_aln.replace("=", "")
+        return len(s)
+
+    def hit_coverage(self):
+        """Return the length of the hit covered in the alignment."""
+        s = self.hit_aln.replace("=", "")
+        return len(s)
+
+
+# Everything below is private
+
+__regex = {
+    "names": re.compile(r"Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+"),
+    "threshold": re.compile(r"Threshold of effective gap content in columns: (\S+)"),
+    "lengths": re.compile(
+        r"length1=(\S+)\s+filtered_length1=(\S+)"
+        r"\s+length2=(\S+)\s+filtered_length2=(\S+)"
+    ),
+    "profilewidth": re.compile(
+        r"Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)"
+    ),
+    "scores": re.compile(r"Smith-Waterman score = (\S+)\s+Evalue = (\S+)"),
+    "start": re.compile(r"(\d+)"),
+    "align": re.compile(r"^.{15}(\S+)"),
+    "positive_alignment": re.compile(r"^.{15}(.+)"),
+}
+
+
+def __read_names(record, line):
+    # Ali1: 60456.blo.gz.aln  Ali2: allscop//14984.blo.gz.aln
+    #       ------query-----        -------hit-------------
+    if "Ali1:" not in line:
+        raise ValueError("Line does not contain 'Ali1:':\n%s" % line)
+    m = __regex["names"].search(line)
+    record.query = m.group(1)
+    record.hit = m.group(2)
+
+
+def __read_threshold(record, line):
+    if not line.startswith("Threshold"):
+        raise ValueError("Line does not start with 'Threshold':\n%s" % line)
+    m = __regex["threshold"].search(line)
+    record.gap_threshold = float(m.group(1))
+
+
+def __read_lengths(record, line):
+    if not line.startswith("length1="):
+        raise ValueError("Line does not start with 'length1=':\n%s" % line)
+    m = __regex["lengths"].search(line)
+    record.query_length = int(m.group(1))
+    record.query_filtered_length = float(m.group(2))
+    record.hit_length = int(m.group(3))
+    record.hit_filtered_length = float(m.group(4))
+
+
+def __read_profilewidth(record, line):
+    if "Nseqs1" not in line:
+        raise ValueError("Line does not contain 'Nseqs1':\n%s" % line)
+    m = __regex["profilewidth"].search(line)
+    record.query_nseqs = int(m.group(1))
+    record.query_neffseqs = float(m.group(2))
+    record.hit_nseqs = int(m.group(3))
+    record.hit_neffseqs = float(m.group(4))
+
+
+def __read_scores(record, line):
+    if not line.startswith("Smith-Waterman"):
+        raise ValueError("Line does not start with 'Smith-Waterman':\n%s" % line)
+    m = __regex["scores"].search(line)
+    if m:
+        record.sw_score = int(m.group(1))
+        record.evalue = float(m.group(2))
+    else:
+        record.sw_score = 0
+        record.evalue = -1.0
+
+
+def __read_query_alignment(record, line):
+    m = __regex["start"].search(line)
+    if m:
+        record.query_start = int(m.group(1))
+    m = __regex["align"].match(line)
+    assert m is not None, "invalid match"
+    record.query_aln += m.group(1)
+
+
+def __read_positive_alignment(record, line):
+    m = __regex["positive_alignment"].match(line)
+    assert m is not None, "invalid match"
+    record.positives += m.group(1)
+
+
+def __read_hit_alignment(record, line):
+    m = __regex["start"].search(line)
+    if m:
+        record.hit_start = int(m.group(1))
+    m = __regex["align"].match(line)
+    assert m is not None, "invalid match"
+    record.hit_aln += m.group(1)
diff --git a/code/lib/Bio/Compass/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Compass/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..fea698f
Binary files /dev/null and b/code/lib/Bio/Compass/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Data/CodonTable.py b/code/lib/Bio/Data/CodonTable.py
new file mode 100644
index 0000000..bc006ee
--- /dev/null
+++ b/code/lib/Bio/Data/CodonTable.py
@@ -0,0 +1,1313 @@
+# Copyright 2000 Andrew Dalke.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Codon tables based on those from the NCBI.
+
+These tables are based on parsing the NCBI file
+ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
+using Scripts/update_ncbi_codon_table.py
+
+Last updated at Version 4.4 (May 2019)
+"""
+
+from Bio.Data import IUPACData
+
+
+unambiguous_dna_by_name = {}
+unambiguous_dna_by_id = {}
+unambiguous_rna_by_name = {}
+unambiguous_rna_by_id = {}
+generic_by_name = {}  # unambiguous DNA or RNA
+generic_by_id = {}  # unambiguous DNA or RNA
+
+ambiguous_dna_by_name = {}
+ambiguous_dna_by_id = {}
+ambiguous_rna_by_name = {}
+ambiguous_rna_by_id = {}
+ambiguous_generic_by_name = {}  # ambiguous DNA or RNA
+ambiguous_generic_by_id = {}  # ambiguous DNA or RNA
+
+# standard IUPAC unambiguous codons
+standard_dna_table = None
+standard_rna_table = None
+
+
+# In the future, the back_table could return a statistically
+# appropriate distribution of codons, so do not cache the results of
+# back_table lookups!
+
+
+class TranslationError(Exception):
+    """Container for translation specific exceptions."""
+
+    pass
+
+
+class CodonTable:
+    """A codon-table, or genetic code."""
+
+    forward_table = {}  # only includes codons which actually code
+    back_table = {}  # for back translations
+    start_codons = []
+    stop_codons = []
+
+    # Not always called from derived classes!
+    def __init__(
+        self,
+        nucleotide_alphabet=None,
+        protein_alphabet=None,
+        forward_table=forward_table,
+        back_table=back_table,
+        start_codons=start_codons,
+        stop_codons=stop_codons,
+    ):
+        """Initialize the class."""
+        self.nucleotide_alphabet = nucleotide_alphabet
+        self.protein_alphabet = protein_alphabet
+        self.forward_table = forward_table
+        self.back_table = back_table
+        self.start_codons = start_codons
+        self.stop_codons = stop_codons
+
+    def __str__(self):
+        """Return a simple text representation of the codon table.
+
+        e.g.::
+
+            >>> import Bio.Data.CodonTable
+            >>> print(Bio.Data.CodonTable.standard_dna_table)
+            Table 1 Standard, SGC0
+            <BLANKLINE>
+              |  T      |  C      |  A      |  G      |
+            --+---------+---------+---------+---------+--
+            T | TTT F   | TCT S   | TAT Y   | TGT C   | T
+            T | TTC F   | TCC S   | TAC Y   | TGC C   | C
+            ...
+            G | GTA V   | GCA A   | GAA E   | GGA G   | A
+            G | GTG V   | GCG A   | GAG E   | GGG G   | G
+            --+---------+---------+---------+---------+--
+            >>> print(Bio.Data.CodonTable.generic_by_id[1])
+            Table 1 Standard, SGC0
+            <BLANKLINE>
+              |  U      |  C      |  A      |  G      |
+            --+---------+---------+---------+---------+--
+            U | UUU F   | UCU S   | UAU Y   | UGU C   | U
+            U | UUC F   | UCC S   | UAC Y   | UGC C   | C
+            ...
+            G | GUA V   | GCA A   | GAA E   | GGA G   | A
+            G | GUG V   | GCG A   | GAG E   | GGG G   | G
+            --+---------+---------+---------+---------+--
+        """
+        if self.id:
+            answer = "Table %i" % self.id
+        else:
+            answer = "Table ID unknown"
+        if self.names:
+            answer += " " + ", ".join([x for x in self.names if x])
+
+        # Use the main four letters (and the conventional ordering)
+        # even for ambiguous tables
+        letters = self.nucleotide_alphabet
+        if letters is not None and "T" in letters:
+            letters = "TCAG"
+        else:
+            # Should be either RNA or generic nucleotides,
+            # e.g. Bio.Data.CodonTable.generic_by_id[1]
+            letters = "UCAG"
+
+        # Build the table...
+        answer += "\n\n"
+        answer += "  |" + "|".join("  %s      " % c2 for c2 in letters) + "|"
+        answer += "\n--+" + "+".join("---------" for c2 in letters) + "+--"
+        for c1 in letters:
+            for c3 in letters:
+                line = c1 + " |"
+                for c2 in letters:
+                    codon = c1 + c2 + c3
+                    line += " %s" % codon
+                    if codon in self.stop_codons:
+                        line += " Stop|"
+                    else:
+                        try:
+                            amino = self.forward_table[codon]
+                        except KeyError:
+                            amino = "?"
+                        except TranslationError:
+                            amino = "?"
+                        if codon in self.start_codons:
+                            line += " %s(s)|" % amino
+                        else:
+                            line += " %s   |" % amino
+                line += " " + c3
+                answer += "\n" + line
+            answer += "\n--+" + "+".join("---------" for c2 in letters) + "+--"
+        return answer
+
+
+def make_back_table(table, default_stop_codon):
+    """Back a back-table (naive single codon mapping).
+
+    ONLY RETURNS A SINGLE CODON, chosen from the possible alternatives
+    based on their sort order.
+    """
+    # Do the sort so changes in the hash implementation won't affect
+    # the result when one amino acid is coded by more than one codon.
+    back_table = {}
+    for key in sorted(table):
+        back_table[table[key]] = key
+    back_table[None] = default_stop_codon
+    return back_table
+
+
+class NCBICodonTable(CodonTable):
+    """Codon table for generic nucleotide sequences."""
+
+    nucleotide_alphabet = None
+    protein_alphabet = IUPACData.protein_letters
+
+    def __init__(self, id, names, table, start_codons, stop_codons):
+        """Initialize the class."""
+        self.id = id
+        self.names = names
+        self.forward_table = table
+        self.back_table = make_back_table(table, stop_codons[0])
+        self.start_codons = start_codons
+        self.stop_codons = stop_codons
+
+    def __repr__(self):
+        """Represent the NCBI codon table class as a string for debugging."""
+        return "%s(id=%r, names=%r, ...)" % (
+            self.__class__.__name__,
+            self.id,
+            self.names,
+        )
+
+
+class NCBICodonTableDNA(NCBICodonTable):
+    """Codon table for unambiguous DNA sequences."""
+
+    nucleotide_alphabet = IUPACData.unambiguous_dna_letters
+
+
+class NCBICodonTableRNA(NCBICodonTable):
+    """Codon table for unambiguous RNA sequences."""
+
+    nucleotide_alphabet = IUPACData.unambiguous_rna_letters
+
+
+# ########  Deal with ambiguous forward translations
+
+
+class AmbiguousCodonTable(CodonTable):
+    """Base codon table for ambiguous sequences."""
+
+    def __init__(
+        self,
+        codon_table,
+        ambiguous_nucleotide_alphabet,
+        ambiguous_nucleotide_values,
+        ambiguous_protein_alphabet,
+        ambiguous_protein_values,
+    ):
+        """Initialize the class."""
+        CodonTable.__init__(
+            self,
+            ambiguous_nucleotide_alphabet,
+            ambiguous_protein_alphabet,
+            AmbiguousForwardTable(
+                codon_table.forward_table,
+                ambiguous_nucleotide_values,
+                ambiguous_protein_values,
+            ),
+            codon_table.back_table,
+            # These two are WRONG!  I need to get the
+            # list of ambiguous codons which code for
+            # the stop codons  XXX
+            list_ambiguous_codons(
+                codon_table.start_codons, ambiguous_nucleotide_values
+            ),
+            list_ambiguous_codons(codon_table.stop_codons, ambiguous_nucleotide_values),
+        )
+        self._codon_table = codon_table
+
+    # Be sneaky and forward attribute lookups to the original table.
+    # This lets us get the names, if the original table is an NCBI
+    # table.
+    def __getattr__(self, name):
+        """Forward attribute lookups to the original table."""
+        return getattr(self._codon_table, name)
+
+
+def list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values):
+    """Return all possible encoded amino acids for ambiguous codon."""
+    c1, c2, c3 = codon
+    x1 = ambiguous_nucleotide_values[c1]
+    x2 = ambiguous_nucleotide_values[c2]
+    x3 = ambiguous_nucleotide_values[c3]
+    possible = {}
+    stops = []
+    for y1 in x1:
+        for y2 in x2:
+            for y3 in x3:
+                try:
+                    possible[forward_table[y1 + y2 + y3]] = 1
+                except KeyError:
+                    # If tripping over a stop codon
+                    stops.append(y1 + y2 + y3)
+    if stops:
+        if possible:
+            raise TranslationError(
+                "ambiguous codon %r codes for both proteins and stop codons" % codon
+            )
+        # This is a true stop codon - tell the caller about it
+        raise KeyError(codon)
+    return list(possible)
+
+
+def list_ambiguous_codons(codons, ambiguous_nucleotide_values):
+    """Extend a codon list to include all possible ambigous codons.
+
+    e.g.::
+
+         ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR']
+         ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA']
+
+    Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'
+    (which could also mean 'TAA' or 'TGG').
+    Thus only two more codons are added in the following:
+
+    e.g.::
+
+        ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR']
+
+    Returns a new (longer) list of codon strings.
+    """
+    # Note ambiguous_nucleotide_values['R'] = 'AG' (etc)
+    # This will generate things like 'TRR' from ['TAG', 'TGA'], which
+    # we don't want to include:
+    c1_list = sorted(
+        letter
+        for letter, meanings in ambiguous_nucleotide_values.items()
+        if {codon[0] for codon in codons}.issuperset(set(meanings))
+    )
+    c2_list = sorted(
+        letter
+        for letter, meanings in ambiguous_nucleotide_values.items()
+        if {codon[1] for codon in codons}.issuperset(set(meanings))
+    )
+    c3_list = sorted(
+        letter
+        for letter, meanings in ambiguous_nucleotide_values.items()
+        if {codon[2] for codon in codons}.issuperset(set(meanings))
+    )
+    # candidates is a list (not a set) to preserve the iteration order
+    candidates = []
+    for c1 in c1_list:
+        for c2 in c2_list:
+            for c3 in c3_list:
+                codon = c1 + c2 + c3
+                if codon not in candidates and codon not in codons:
+                    candidates.append(codon)
+    answer = codons[:]  # copy
+    # print("Have %i new candidates" % len(candidates))
+    for ambig_codon in candidates:
+        wanted = True
+        # e.g. 'TRR' -> 'TAA', 'TAG', 'TGA', 'TGG'
+        for codon in [
+            c1 + c2 + c3
+            for c1 in ambiguous_nucleotide_values[ambig_codon[0]]
+            for c2 in ambiguous_nucleotide_values[ambig_codon[1]]
+            for c3 in ambiguous_nucleotide_values[ambig_codon[2]]
+        ]:
+            if codon not in codons:
+                # This ambiguous codon can code for a non-stop, exclude it!
+                wanted = False
+                # print("Rejecting %s" % ambig_codon)
+                continue
+        if wanted:
+            answer.append(ambig_codon)
+    return answer
+
+
+assert list_ambiguous_codons(["TGA", "TAA"], IUPACData.ambiguous_dna_values) == [
+    "TGA",
+    "TAA",
+    "TRA",
+]
+assert list_ambiguous_codons(["TAG", "TGA"], IUPACData.ambiguous_dna_values) == [
+    "TAG",
+    "TGA",
+]
+assert list_ambiguous_codons(["TAG", "TAA"], IUPACData.ambiguous_dna_values) == [
+    "TAG",
+    "TAA",
+    "TAR",
+]
+assert list_ambiguous_codons(["UAG", "UAA"], IUPACData.ambiguous_rna_values) == [
+    "UAG",
+    "UAA",
+    "UAR",
+]
+assert list_ambiguous_codons(["TGA", "TAA", "TAG"], IUPACData.ambiguous_dna_values) == [
+    "TGA",
+    "TAA",
+    "TAG",
+    "TAR",
+    "TRA",
+]
+
+# Forward translation is "onto", that is, any given codon always maps
+# to the same protein, or it doesn't map at all.  Thus, I can build
+# off of an existing table to produce the ambiguous mappings.
+#
+# This handles the general case.  Perhaps it's overkill?
+#  >>> t = CodonTable.ambiguous_dna_by_id[1]
+#  >>> t.forward_table["AAT"]
+#  'N'
+#  >>> t.forward_table["GAT"]
+#  'D'
+#  >>> t.forward_table["RAT"]
+#  'B'
+#  >>> t.forward_table["YTA"]
+#  'L'
+
+
+class AmbiguousForwardTable:
+    """Forward table for translation of ambiguous nucleotide sequences."""
+
+    def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
+        """Initialize the class."""
+        self.forward_table = forward_table
+
+        self.ambiguous_nucleotide = ambiguous_nucleotide
+        self.ambiguous_protein = ambiguous_protein
+
+        inverted = {}
+        for name, val in ambiguous_protein.items():
+            for c in val:
+                x = inverted.get(c, {})
+                x[name] = 1
+                inverted[c] = x
+        for name, val in inverted.items():
+            inverted[name] = list(val)
+        self._inverted = inverted
+
+        self._cache = {}
+
+    def __contains__(self, codon):
+        """Check if codon works as key for ambiguous forward_table.
+
+        Only returns 'True' if forward_table[codon] returns a value.
+        """
+        try:
+            self.__getitem__(codon)
+            return True
+        except (KeyError, TranslationError):
+            return False
+
+    def get(self, codon, failobj=None):
+        """Implement get for dictionary-like behaviour."""
+        try:
+            return self.__getitem__(codon)
+        except KeyError:
+            return failobj
+
+    def __getitem__(self, codon):
+        """Implement dictionary-like behaviour for AmbiguousForwardTable.
+
+        forward_table[codon] will either return an amino acid letter,
+        or throws a KeyError (if codon does not encode an amino acid)
+        or a TranslationError (if codon does encode for an amino acid,
+        but either is also a stop codon or does encode several amino acids,
+        for which no unique letter is available in the given alphabet.
+        """
+        try:
+            x = self._cache[codon]
+        except KeyError:
+            pass
+        else:
+            if x is TranslationError:
+                raise TranslationError(codon)  # no unique translation
+            if x is KeyError:
+                raise KeyError(codon)  # it's a stop codon
+            return x
+        try:
+            x = self.forward_table[codon]
+            self._cache[codon] = x
+            return x
+        except KeyError:
+            pass
+
+        # XXX Need to make part of this into a method which returns
+        # a list of all possible encodings for a codon!
+        try:
+            possible = list_possible_proteins(
+                codon, self.forward_table, self.ambiguous_nucleotide
+            )
+        except KeyError:
+            self._cache[codon] = KeyError
+            raise KeyError(codon) from None  # stop codon
+        except TranslationError:
+            self._cache[codon] = TranslationError
+            raise TranslationError(codon)  # does not code
+        assert len(possible) > 0, "unambiguous codons must code"
+
+        # Hah!  Only one possible protein, so use it
+        if len(possible) == 1:
+            self._cache[codon] = possible[0]
+            return possible[0]
+
+        # See if there's an ambiguous protein encoding for the multiples.
+        # Find residues which exist in every coding set.
+        ambiguous_possible = {}
+        for amino in possible:
+            for term in self._inverted[amino]:
+                ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1
+
+        n = len(possible)
+        possible = []
+        for amino, val in ambiguous_possible.items():
+            if val == n:
+                possible.append(amino)
+
+        # No amino acid encoding for the results
+        if len(possible) == 0:
+            self._cache[codon] = TranslationError
+            raise TranslationError(codon)  # no valid translation
+
+        # All of these are valid, so choose one
+        # To be unique, sort by smallet ambiguity then alphabetically
+        # Can get this if "X" encodes for everything.
+        # def _sort(x, y, table = self.ambiguous_protein):
+        #    a = cmp(len(table[x]), len(table[y]))
+        #    if a == 0:
+        #        return cmp(x, y)
+        #    return a
+
+        # Sort by key is 2.x and 3.x compatible
+        possible.sort(key=lambda x: (len(self.ambiguous_protein[x]), x))
+
+        x = possible[0]
+        self._cache[codon] = x
+        return x
+
+
+def register_ncbi_table(name, alt_name, id, table, start_codons, stop_codons):
+    """Turn codon table data into objects (PRIVATE).
+
+    The data is stored in the dictionaries.
+    """
+    # In most cases names are divided by "; ", however there is also
+    # Table 11 'Bacterial, Archaeal and Plant Plastid Code', previously
+    # 'Bacterial and Plant Plastid' which used to be just 'Bacterial'
+    names = [
+        x.strip() for x in name.replace(" and ", "; ").replace(", ", "; ").split("; ")
+    ]
+
+    dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons, stop_codons)
+    ambig_dna = AmbiguousCodonTable(
+        dna,
+        IUPACData.ambiguous_dna_letters,
+        IUPACData.ambiguous_dna_values,
+        IUPACData.extended_protein_letters,
+        IUPACData.extended_protein_values,
+    )
+
+    # replace all T's with U's for the RNA tables
+    rna_table = {}
+    generic_table = {}
+    for codon, val in table.items():
+        generic_table[codon] = val
+        codon = codon.replace("T", "U")
+        generic_table[codon] = val
+        rna_table[codon] = val
+    rna_start_codons = []
+    generic_start_codons = []
+    for codon in start_codons:
+        generic_start_codons.append(codon)
+        # We need to check if 'T' is in the codon, otherwise
+        # generic_start_codons may contain duplicates
+        if "T" in codon:
+            codon = codon.replace("T", "U")
+            generic_start_codons.append(codon)
+        rna_start_codons.append(codon)
+    rna_stop_codons = []
+    generic_stop_codons = []
+    for codon in stop_codons:
+        generic_stop_codons.append(codon)
+        if "T" in codon:
+            codon = codon.replace("T", "U")
+            generic_stop_codons.append(codon)
+        rna_stop_codons.append(codon)
+
+    generic = NCBICodonTable(
+        id, names + [alt_name], generic_table, generic_start_codons, generic_stop_codons
+    )
+
+    # The following isn't very elegant, but seems to work nicely.
+    _merged_values = dict(IUPACData.ambiguous_rna_values.items())
+    _merged_values["T"] = "U"
+    ambig_generic = AmbiguousCodonTable(
+        generic,
+        None,
+        _merged_values,
+        IUPACData.extended_protein_letters,
+        IUPACData.extended_protein_values,
+    )
+
+    rna = NCBICodonTableRNA(
+        id, names + [alt_name], rna_table, rna_start_codons, rna_stop_codons
+    )
+
+    ambig_rna = AmbiguousCodonTable(
+        rna,
+        IUPACData.ambiguous_rna_letters,
+        IUPACData.ambiguous_rna_values,
+        IUPACData.extended_protein_letters,
+        IUPACData.extended_protein_values,
+    )
+
+    if id == 1:
+        global standard_dna_table, standard_rna_table
+        standard_dna_table = dna
+        standard_rna_table = rna
+
+    unambiguous_dna_by_id[id] = dna
+    unambiguous_rna_by_id[id] = rna
+    generic_by_id[id] = generic
+    ambiguous_dna_by_id[id] = ambig_dna
+    ambiguous_rna_by_id[id] = ambig_rna
+    ambiguous_generic_by_id[id] = ambig_generic
+
+    if alt_name is not None:
+        names.append(alt_name)
+
+    for name in names:
+        unambiguous_dna_by_name[name] = dna
+        unambiguous_rna_by_name[name] = rna
+        generic_by_name[name] = generic
+        ambiguous_dna_by_name[name] = ambig_dna
+        ambiguous_rna_by_name[name] = ambig_rna
+        ambiguous_generic_by_name[name] = ambig_generic
+
+
+# The rest of this file is automatically generated, here we turn off
+# black formatting in order to keep the codon tables compact.
+#
+# fmt: off
+
+##########################################################################
+# Start of auto-generated output from Scripts/update_ncbi_codon_table.py #
+##########################################################################
+
+# Data from NCBI genetic code table version 4.5
+
+register_ncbi_table(
+    name="Standard",
+    alt_name="SGC0",
+    id=1,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG", "TGA"],
+    start_codons=["TTG", "CTG", "ATG"],
+)
+
+register_ncbi_table(
+    name="Vertebrate Mitochondrial",
+    alt_name="SGC1",
+    id=2,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "M", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S",                           # noqa: E241
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG", "AGA", "AGG"],
+    start_codons=["ATT", "ATC", "ATA", "ATG", "GTG"],
+)
+
+register_ncbi_table(
+    name="Yeast Mitochondrial",
+    alt_name="SGC2",
+    id=3,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "T", "CTC": "T", "CTA": "T", "CTG": "T",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "M", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG"],
+    start_codons=["ATA", "ATG", "GTG"],
+)
+
+register_ncbi_table(
+    name="Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate "
+    "Mitochondrial; Mycoplasma; Spiroplasma",
+    alt_name="SGC3",
+    id=4,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG"],
+    start_codons=["TTA", "TTG", "CTG", "ATT", "ATC", "ATA", "ATG", "GTG"],
+)
+
+register_ncbi_table(
+    name="Invertebrate Mitochondrial",
+    alt_name="SGC4",
+    id=5,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "M", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "S", "AGG": "S",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG"],
+    start_codons=["TTG", "ATT", "ATC", "ATA", "ATG", "GTG"],
+)
+
+register_ncbi_table(
+    name="Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear",
+    alt_name="SGC5",
+    id=6,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y", "TAA": "Q", "TAG": "Q",
+        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TGA"],
+    start_codons=["ATG"],
+)
+
+register_ncbi_table(
+    name="Echinoderm Mitochondrial; Flatworm Mitochondrial",
+    alt_name="SGC8",
+    id=9,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "N", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "S", "AGG": "S",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG"],
+    start_codons=["ATG", "GTG"],
+)
+
+register_ncbi_table(
+    name="Euplotid Nuclear",
+    alt_name="SGC9",
+    id=10,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C", "TGA": "C", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG"],
+    start_codons=["ATG"],
+)
+
+register_ncbi_table(
+    name="Bacterial, Archaeal and Plant Plastid",
+    alt_name=None,
+    id=11,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG", "TGA"],
+    start_codons=["TTG", "CTG", "ATT", "ATC", "ATA", "ATG", "GTG"],
+)
+
+register_ncbi_table(
+    name="Alternative Yeast Nuclear",
+    alt_name=None,
+    id=12,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "S",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG", "TGA"],
+    start_codons=["CTG", "ATG"],
+)
+
+register_ncbi_table(
+    name="Ascidian Mitochondrial",
+    alt_name=None,
+    id=13,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "M", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "G", "AGG": "G",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG"],
+    start_codons=["TTG", "ATA", "ATG", "GTG"],
+)
+
+register_ncbi_table(
+    name="Alternative Flatworm Mitochondrial",
+    alt_name=None,
+    id=14,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y", "TAA": "Y",               # noqa: E241
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "N", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "S", "AGG": "S",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAG"],
+    start_codons=["ATG"],
+)
+
+register_ncbi_table(
+    name="Blepharisma Macronuclear",
+    alt_name=None,
+    id=15,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",             "TAG": "Q",   # noqa: E241
+        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TGA"],
+    start_codons=["ATG"],
+)
+
+register_ncbi_table(
+    name="Chlorophycean Mitochondrial",
+    alt_name=None,
+    id=16,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",             "TAG": "L",   # noqa: E241
+        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TGA"],
+    start_codons=["ATG"],
+)
+
+register_ncbi_table(
+    name="Trematode Mitochondrial",
+    alt_name=None,
+    id=21,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "M", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "N", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "S", "AGG": "S",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG"],
+    start_codons=["ATG", "GTG"],
+)
+
+register_ncbi_table(
+    name="Scenedesmus obliquus Mitochondrial",
+    alt_name=None,
+    id=22,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S",             "TCG": "S",   # noqa: E241
+        "TAT": "Y", "TAC": "Y",             "TAG": "L",   # noqa: E241
+        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TCA", "TAA", "TGA"],
+    start_codons=["ATG"],
+)
+
+register_ncbi_table(
+    name="Thraustochytrium Mitochondrial",
+    alt_name=None,
+    id=23,
+    table={
+        "TTT": "F", "TTC": "F",             "TTG": "L",   # noqa: E241
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TTA", "TAA", "TAG", "TGA"],
+    start_codons=["ATT", "ATG", "GTG"],
+)
+
+register_ncbi_table(
+    name="Pterobranchia Mitochondrial",
+    alt_name=None,
+    id=24,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "S", "AGG": "K",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG"],
+    start_codons=["TTG", "CTG", "ATG", "GTG"],
+)
+
+register_ncbi_table(
+    name="Candidate Division SR1 and Gracilibacteria",
+    alt_name=None,
+    id=25,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C", "TGA": "G", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG"],
+    start_codons=["TTG", "ATG", "GTG"],
+)
+
+register_ncbi_table(
+    name="Pachysolen tannophilus Nuclear",
+    alt_name=None,
+    id=26,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",                           # noqa: E241
+        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "A",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG", "TGA"],
+    start_codons=["CTG", "ATG"],
+)
+
+register_ncbi_table(
+    name="Karyorelict Nuclear",
+    alt_name=None,
+    id=27,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y", "TAA": "Q", "TAG": "Q",
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TGA"],
+    start_codons=["ATG"],
+)
+
+register_ncbi_table(
+    name="Condylostoma Nuclear",
+    alt_name=None,
+    id=28,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y", "TAA": "Q", "TAG": "Q",
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG", "TGA"],
+    start_codons=["ATG"],
+)
+
+register_ncbi_table(
+    name="Mesodinium Nuclear",
+    alt_name=None,
+    id=29,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y", "TAA": "Y", "TAG": "Y",
+        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TGA"],
+    start_codons=["ATG"],
+)
+
+register_ncbi_table(
+    name="Peritrich Nuclear",
+    alt_name=None,
+    id=30,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y", "TAA": "E", "TAG": "E",
+        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TGA"],
+    start_codons=["ATG"],
+)
+
+register_ncbi_table(
+    name="Blastocrithidia Nuclear",
+    alt_name=None,
+    id=31,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y", "TAA": "E", "TAG": "E",
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TAG"],
+    start_codons=["ATG"],
+)
+
+register_ncbi_table(
+    name="Balanophoraceae Plastid",
+    alt_name=None,
+    id=32,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y",             "TAG": "W",   # noqa: E241
+        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAA", "TGA"],
+    start_codons=["TTG", "CTG", "ATT", "ATC", "ATA", "ATG", "GTG"],
+)
+
+register_ncbi_table(
+    name="Cephalodiscidae Mitochondrial",
+    alt_name=None,
+    id=33,
+    table={
+        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+        "TAT": "Y", "TAC": "Y", "TAA": "Y",               # noqa: E241
+        "TGT": "C", "TGC": "C", "TGA": "W", "TGG": "W",
+        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+        "AGT": "S", "AGC": "S", "AGA": "S", "AGG": "K",
+        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
+    },
+    stop_codons=["TAG"],
+    start_codons=["TTG", "CTG", "ATG", "GTG"],
+)
+
+########################################################################
+# End of auto-generated output from Scripts/update_ncbi_codon_table.py #
+########################################################################
diff --git a/code/lib/Bio/Data/IUPACData.py b/code/lib/Bio/Data/IUPACData.py
new file mode 100644
index 0000000..42bf7a9
--- /dev/null
+++ b/code/lib/Bio/Data/IUPACData.py
@@ -0,0 +1,423 @@
+# Copyright 2000 Andrew Dalke.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Information about the IUPAC alphabets."""
+
+
+protein_letters = "ACDEFGHIKLMNPQRSTVWY"
+extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO"
+#   B = "Asx";  aspartic acid or asparagine (D or N)
+#   X = "Xxx";  unknown or 'other' amino acid
+#   Z = "Glx";  glutamic acid or glutamine (E or Q)
+#   http://www.chem.qmul.ac.uk/iupac/AminoAcid/A2021.html#AA212
+#
+#   J = "Xle";  leucine or isoleucine (L or I, used in NMR)
+#   Mentioned in http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html
+#   Also the International Nucleotide Sequence Database Collaboration (INSDC)
+#   (i.e. GenBank, EMBL, DDBJ) adopted this in 2006
+#   http://www.ddbj.nig.ac.jp/insdc/icm2006-e.html
+#
+#   Xle (J); Leucine or Isoleucine
+#   The residue abbreviations, Xle (the three-letter abbreviation) and J
+#   (the one-letter abbreviation) are reserved for the case that cannot
+#   experimentally distinguish leucine from isoleucine.
+#
+#   U = "Sec";  selenocysteine
+#   http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html
+#
+#   O = "Pyl";  pyrrolysine
+#   http://www.chem.qmul.ac.uk/iubmb/newsletter/2009.html#item35
+
+protein_letters_1to3 = {
+    "A": "Ala",
+    "C": "Cys",
+    "D": "Asp",
+    "E": "Glu",
+    "F": "Phe",
+    "G": "Gly",
+    "H": "His",
+    "I": "Ile",
+    "K": "Lys",
+    "L": "Leu",
+    "M": "Met",
+    "N": "Asn",
+    "P": "Pro",
+    "Q": "Gln",
+    "R": "Arg",
+    "S": "Ser",
+    "T": "Thr",
+    "V": "Val",
+    "W": "Trp",
+    "Y": "Tyr",
+}
+protein_letters_1to3_extended = dict(
+    list(protein_letters_1to3.items())
+    + list(
+        {"B": "Asx", "X": "Xaa", "Z": "Glx", "J": "Xle", "U": "Sec", "O": "Pyl"}.items()
+    )
+)
+
+protein_letters_3to1 = {x[1]: x[0] for x in protein_letters_1to3.items()}
+protein_letters_3to1_extended = {
+    x[1]: x[0] for x in protein_letters_1to3_extended.items()
+}
+
+ambiguous_dna_letters = "GATCRYWSMKHBVDN"
+unambiguous_dna_letters = "GATC"
+ambiguous_rna_letters = "GAUCRYWSMKHBVDN"
+unambiguous_rna_letters = "GAUC"
+
+#   B == 5-bromouridine
+#   D == 5,6-dihydrouridine
+#   S == thiouridine
+#   W == wyosine
+extended_dna_letters = "GATCBDSW"
+
+# are there extended forms?
+# extended_rna_letters = "GAUCBDSW"
+
+# "X" is included in the following _values and _complement dictionaries,
+# for historical reasons although it is not an IUPAC nucleotide,
+# and so is not in the corresponding _letters strings above
+ambiguous_dna_values = {
+    "A": "A",
+    "C": "C",
+    "G": "G",
+    "T": "T",
+    "M": "AC",
+    "R": "AG",
+    "W": "AT",
+    "S": "CG",
+    "Y": "CT",
+    "K": "GT",
+    "V": "ACG",
+    "H": "ACT",
+    "D": "AGT",
+    "B": "CGT",
+    "X": "GATC",
+    "N": "GATC",
+}
+ambiguous_rna_values = {
+    "A": "A",
+    "C": "C",
+    "G": "G",
+    "U": "U",
+    "M": "AC",
+    "R": "AG",
+    "W": "AU",
+    "S": "CG",
+    "Y": "CU",
+    "K": "GU",
+    "V": "ACG",
+    "H": "ACU",
+    "D": "AGU",
+    "B": "CGU",
+    "X": "GAUC",
+    "N": "GAUC",
+}
+
+ambiguous_dna_complement = {
+    "A": "T",
+    "C": "G",
+    "G": "C",
+    "T": "A",
+    "M": "K",
+    "R": "Y",
+    "W": "W",
+    "S": "S",
+    "Y": "R",
+    "K": "M",
+    "V": "B",
+    "H": "D",
+    "D": "H",
+    "B": "V",
+    "X": "X",
+    "N": "N",
+}
+
+ambiguous_rna_complement = {
+    "A": "U",
+    "C": "G",
+    "G": "C",
+    "U": "A",
+    "M": "K",
+    "R": "Y",
+    "W": "W",
+    "S": "S",
+    "Y": "R",
+    "K": "M",
+    "V": "B",
+    "H": "D",
+    "D": "H",
+    "B": "V",
+    "X": "X",
+    "N": "N",
+}
+
+
+def _make_ranges(mydict):
+    d = {}
+    for key, value in mydict.items():
+        d[key] = (value, value)
+    return d
+
+
+# Mass data taken from PubChem
+
+
+# Average masses of monophosphate deoxy nucleotides
+unambiguous_dna_weights = {"A": 331.2218, "C": 307.1971, "G": 347.2212, "T": 322.2085}
+
+# Monoisotopic masses of monophospate deoxy nucleotides
+monoisotopic_unambiguous_dna_weights = {
+    "A": 331.06817,
+    "C": 307.056936,
+    "G": 347.063084,
+    "T": 322.056602,
+}
+
+unambiguous_dna_weight_ranges = _make_ranges(unambiguous_dna_weights)
+
+unambiguous_rna_weights = {"A": 347.2212, "C": 323.1965, "G": 363.2206, "U": 324.1813}
+
+monoisotopic_unambiguous_rna_weights = {
+    "A": 347.063084,
+    "C": 323.051851,
+    "G": 363.057999,
+    "U": 324.035867,
+}
+
+unambiguous_rna_weight_ranges = _make_ranges(unambiguous_rna_weights)
+
+
+def _make_ambiguous_ranges(mydict, weight_table):
+    range_d = {}
+    avg_d = {}
+    for letter, values in mydict.items():
+        # Following line is a quick hack to skip undefined weights for U and O
+        if len(values) == 1 and values[0] not in weight_table:
+            continue
+
+        weights = [weight_table.get(x) for x in values]
+        range_d[letter] = (min(weights), max(weights))
+        total_w = 0.0
+        for w in weights:
+            total_w = total_w + w
+        avg_d[letter] = total_w / len(weights)
+    return range_d, avg_d
+
+
+ambiguous_dna_weight_ranges, avg_ambiguous_dna_weights = _make_ambiguous_ranges(
+    ambiguous_dna_values, unambiguous_dna_weights
+)
+
+ambiguous_rna_weight_ranges, avg_ambiguous_rna_weights = _make_ambiguous_ranges(
+    ambiguous_rna_values, unambiguous_rna_weights
+)
+
+protein_weights = {
+    "A": 89.0932,
+    "C": 121.1582,
+    "D": 133.1027,
+    "E": 147.1293,
+    "F": 165.1891,
+    "G": 75.0666,
+    "H": 155.1546,
+    "I": 131.1729,
+    "K": 146.1876,
+    "L": 131.1729,
+    "M": 149.2113,
+    "N": 132.1179,
+    "O": 255.3134,
+    "P": 115.1305,
+    "Q": 146.1445,
+    "R": 174.201,
+    "S": 105.0926,
+    "T": 119.1192,
+    "U": 168.0532,
+    "V": 117.1463,
+    "W": 204.2252,
+    "Y": 181.1885,
+}
+
+monoisotopic_protein_weights = {
+    "A": 89.047678,
+    "C": 121.019749,
+    "D": 133.037508,
+    "E": 147.053158,
+    "F": 165.078979,
+    "G": 75.032028,
+    "H": 155.069477,
+    "I": 131.094629,
+    "K": 146.105528,
+    "L": 131.094629,
+    "M": 149.051049,
+    "N": 132.053492,
+    "O": 255.158292,
+    "P": 115.063329,
+    "Q": 146.069142,
+    "R": 174.111676,
+    "S": 105.042593,
+    "T": 119.058243,
+    "U": 168.964203,
+    "V": 117.078979,
+    "W": 204.089878,
+    "Y": 181.073893,
+}
+
+extended_protein_values = {
+    "A": "A",
+    "B": "ND",
+    "C": "C",
+    "D": "D",
+    "E": "E",
+    "F": "F",
+    "G": "G",
+    "H": "H",
+    "I": "I",
+    "J": "IL",
+    "K": "K",
+    "L": "L",
+    "M": "M",
+    "N": "N",
+    "O": "O",
+    "P": "P",
+    "Q": "Q",
+    "R": "R",
+    "S": "S",
+    "T": "T",
+    "U": "U",
+    "V": "V",
+    "W": "W",
+    "X": "ACDEFGHIKLMNPQRSTVWY",
+    # TODO - Include U and O in the possible values of X?
+    # This could alter the extended_protein_weight_ranges ...
+    # by MP: Won't do this, because they are so rare.
+    "Y": "Y",
+    "Z": "QE",
+}
+
+protein_weight_ranges = _make_ranges(protein_weights)
+
+extended_protein_weight_ranges, avg_extended_protein_weights = _make_ambiguous_ranges(
+    extended_protein_values, protein_weights
+)
+
+
+# For Center of Mass Calculation.
+# Taken from http://www.chem.qmul.ac.uk/iupac/AtWt/ & PyMol
+atom_weights = {
+    "H": 1.00794,
+    "D": 2.01410,
+    "He": 4.002602,
+    "Li": 6.941,
+    "Be": 9.012182,
+    "B": 10.811,
+    "C": 12.0107,
+    "N": 14.0067,
+    "O": 15.9994,
+    "F": 18.9984032,
+    "Ne": 20.1797,
+    "Na": 22.989770,
+    "Mg": 24.3050,
+    "Al": 26.981538,
+    "Si": 28.0855,
+    "P": 30.973761,
+    "S": 32.065,
+    "Cl": 35.453,
+    "Ar": 39.948,
+    "K": 39.0983,
+    "Ca": 40.078,
+    "Sc": 44.955910,
+    "Ti": 47.867,
+    "V": 50.9415,
+    "Cr": 51.9961,
+    "Mn": 54.938049,
+    "Fe": 55.845,
+    "Co": 58.933200,
+    "Ni": 58.6934,
+    "Cu": 63.546,
+    "Zn": 65.39,
+    "Ga": 69.723,
+    "Ge": 72.64,
+    "As": 74.92160,
+    "Se": 78.96,
+    "Br": 79.904,
+    "Kr": 83.80,
+    "Rb": 85.4678,
+    "Sr": 87.62,
+    "Y": 88.90585,
+    "Zr": 91.224,
+    "Nb": 92.90638,
+    "Mo": 95.94,
+    "Tc": 98.0,
+    "Ru": 101.07,
+    "Rh": 102.90550,
+    "Pd": 106.42,
+    "Ag": 107.8682,
+    "Cd": 112.411,
+    "In": 114.818,
+    "Sn": 118.710,
+    "Sb": 121.760,
+    "Te": 127.60,
+    "I": 126.90447,
+    "Xe": 131.293,
+    "Cs": 132.90545,
+    "Ba": 137.327,
+    "La": 138.9055,
+    "Ce": 140.116,
+    "Pr": 140.90765,
+    "Nd": 144.24,
+    "Pm": 145.0,
+    "Sm": 150.36,
+    "Eu": 151.964,
+    "Gd": 157.25,
+    "Tb": 158.92534,
+    "Dy": 162.50,
+    "Ho": 164.93032,
+    "Er": 167.259,
+    "Tm": 168.93421,
+    "Yb": 173.04,
+    "Lu": 174.967,
+    "Hf": 178.49,
+    "Ta": 180.9479,
+    "W": 183.84,
+    "Re": 186.207,
+    "Os": 190.23,
+    "Ir": 192.217,
+    "Pt": 195.078,
+    "Au": 196.96655,
+    "Hg": 200.59,
+    "Tl": 204.3833,
+    "Pb": 207.2,
+    "Bi": 208.98038,
+    "Po": 208.98,
+    "At": 209.99,
+    "Rn": 222.02,
+    "Fr": 223.02,
+    "Ra": 226.03,
+    "Ac": 227.03,
+    "Th": 232.0381,
+    "Pa": 231.03588,
+    "U": 238.02891,
+    "Np": 237.05,
+    "Pu": 244.06,
+    "Am": 243.06,
+    "Cm": 247.07,
+    "Bk": 247.07,
+    "Cf": 251.08,
+    "Es": 252.08,
+    "Fm": 257.10,
+    "Md": 258.10,
+    "No": 259.10,
+    "Lr": 262.11,
+    "Rf": 261.11,
+    "Db": 262.11,
+    "Sg": 266.12,
+    "Bh": 264.12,
+    "Hs": 269.13,
+    "Mt": 268.14,
+}
diff --git a/code/lib/Bio/Data/SCOPData.py b/code/lib/Bio/Data/SCOPData.py
new file mode 100644
index 0000000..79cfd4e
--- /dev/null
+++ b/code/lib/Bio/Data/SCOPData.py
@@ -0,0 +1,277 @@
+# Copyright Lenna Peterson (2012)
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Additional protein alphabets used in the SCOP database and PDB files.
+
+See Bio.SCOP for more information about SCOP and Biopython's SCOP module.
+"""
+
+# This file was automatically generated from PDB data.
+# Black would reformat this to one entry per line, so tell it not to:
+# fmt: off
+protein_letters_3to1 = {
+    "00C": "C", "01W": "X", "02K": "A", "03Y": "C", "07O": "C",
+    "08P": "C", "0A0": "D", "0A1": "Y", "0A2": "K", "0A8": "C",
+    "0AA": "V", "0AB": "V", "0AC": "G", "0AD": "G", "0AF": "W",
+    "0AG": "L", "0AH": "S", "0AK": "D", "0AM": "A", "0AP": "C",
+    "0AU": "U", "0AV": "A", "0AZ": "P", "0BN": "F", "0C ": "C",
+    "0CS": "A", "0DC": "C", "0DG": "G", "0DT": "T", "0FL": "A",
+    "0G ": "G", "0NC": "A", "0SP": "A", "0U ": "U", "0YG": "YG",
+    "10C": "C", "125": "U", "126": "U", "127": "U", "128": "N",
+    "12A": "A", "143": "C", "175": "ASG", "193": "X", "1AP": "A",
+    "1MA": "A", "1MG": "G", "1PA": "F", "1PI": "A", "1PR": "N",
+    "1SC": "C", "1TQ": "W", "1TY": "Y", "1X6": "S", "200": "F",
+    "23F": "F", "23S": "X", "26B": "T", "2AD": "X", "2AG": "A",
+    "2AO": "X", "2AR": "A", "2AS": "X", "2AT": "T", "2AU": "U",
+    "2BD": "I", "2BT": "T", "2BU": "A", "2CO": "C", "2DA": "A",
+    "2DF": "N", "2DM": "N", "2DO": "X", "2DT": "T", "2EG": "G",
+    "2FE": "N", "2FI": "N", "2FM": "M", "2GT": "T", "2HF": "H",
+    "2LU": "L", "2MA": "A", "2MG": "G", "2ML": "L", "2MR": "R",
+    "2MT": "P", "2MU": "U", "2NT": "T", "2OM": "U", "2OT": "T",
+    "2PI": "X", "2PR": "G", "2SA": "N", "2SI": "X", "2ST": "T",
+    "2TL": "T", "2TY": "Y", "2VA": "V", "2XA": "C", "32S": "X",
+    "32T": "X", "3AH": "H", "3AR": "X", "3CF": "F", "3DA": "A",
+    "3DR": "N", "3GA": "A", "3MD": "D", "3ME": "U", "3NF": "Y",
+    "3QN": "K", "3TY": "X", "3XH": "G", "4AC": "N", "4BF": "Y",
+    "4CF": "F", "4CY": "M", "4DP": "W", "4F3": "GYG", "4FB": "P",
+    "4FW": "W", "4HT": "W", "4IN": "W", "4MF": "N", "4MM": "X",
+    "4OC": "C", "4PC": "C", "4PD": "C", "4PE": "C", "4PH": "F",
+    "4SC": "C", "4SU": "U", "4TA": "N", "4U7": "A", "56A": "H",
+    "5AA": "A", "5AB": "A", "5AT": "T", "5BU": "U", "5CG": "G",
+    "5CM": "C", "5CS": "C", "5FA": "A", "5FC": "C", "5FU": "U",
+    "5HP": "E", "5HT": "T", "5HU": "U", "5IC": "C", "5IT": "T",
+    "5IU": "U", "5MC": "C", "5MD": "N", "5MU": "U", "5NC": "C",
+    "5PC": "C", "5PY": "T", "5SE": "U", "5ZA": "TWG", "64T": "T",
+    "6CL": "K", "6CT": "T", "6CW": "W", "6HA": "A", "6HC": "C",
+    "6HG": "G", "6HN": "K", "6HT": "T", "6IA": "A", "6MA": "A",
+    "6MC": "A", "6MI": "N", "6MT": "A", "6MZ": "N", "6OG": "G",
+    "70U": "U", "7DA": "A", "7GU": "G", "7JA": "I", "7MG": "G",
+    "8AN": "A", "8FG": "G", "8MG": "G", "8OG": "G", "9NE": "E",
+    "9NF": "F", "9NR": "R", "9NV": "V", "A  ": "A", "A1P": "N",
+    "A23": "A", "A2L": "A", "A2M": "A", "A34": "A", "A35": "A",
+    "A38": "A", "A39": "A", "A3A": "A", "A3P": "A", "A40": "A",
+    "A43": "A", "A44": "A", "A47": "A", "A5L": "A", "A5M": "C",
+    "A5N": "N", "A5O": "A", "A66": "X", "AA3": "A", "AA4": "A",
+    "AAR": "R", "AB7": "X", "ABA": "A", "ABR": "A", "ABS": "A",
+    "ABT": "N", "ACB": "D", "ACL": "R", "AD2": "A", "ADD": "X",
+    "ADX": "N", "AEA": "X", "AEI": "D", "AET": "A", "AFA": "N",
+    "AFF": "N", "AFG": "G", "AGM": "R", "AGT": "C", "AHB": "N",
+    "AHH": "X", "AHO": "A", "AHP": "A", "AHS": "X", "AHT": "X",
+    "AIB": "A", "AKL": "D", "AKZ": "D", "ALA": "A", "ALC": "A",
+    "ALM": "A", "ALN": "A", "ALO": "T", "ALQ": "X", "ALS": "A",
+    "ALT": "A", "ALV": "A", "ALY": "K", "AN8": "A", "AP7": "A",
+    "APE": "X", "APH": "A", "API": "K", "APK": "K", "APM": "X",
+    "APP": "X", "AR2": "R", "AR4": "E", "AR7": "R", "ARG": "R",
+    "ARM": "R", "ARO": "R", "ARV": "X", "AS ": "A", "AS2": "D",
+    "AS9": "X", "ASA": "D", "ASB": "D", "ASI": "D", "ASK": "D",
+    "ASL": "D", "ASM": "X", "ASN": "N", "ASP": "D", "ASQ": "D",
+    "ASU": "N", "ASX": "B", "ATD": "T", "ATL": "T", "ATM": "T",
+    "AVC": "A", "AVN": "X", "AYA": "A", "AYG": "AYG", "AZK": "K",
+    "AZS": "S", "AZY": "Y", "B1F": "F", "B1P": "N", "B2A": "A",
+    "B2F": "F", "B2I": "I", "B2V": "V", "B3A": "A", "B3D": "D",
+    "B3E": "E", "B3K": "K", "B3L": "X", "B3M": "X", "B3Q": "X",
+    "B3S": "S", "B3T": "X", "B3U": "H", "B3X": "N", "B3Y": "Y",
+    "BB6": "C", "BB7": "C", "BB8": "F", "BB9": "C", "BBC": "C",
+    "BCS": "C", "BE2": "X", "BFD": "D", "BG1": "S", "BGM": "G",
+    "BH2": "D", "BHD": "D", "BIF": "F", "BIL": "X", "BIU": "I",
+    "BJH": "X", "BLE": "L", "BLY": "K", "BMP": "N", "BMT": "T",
+    "BNN": "F", "BNO": "X", "BOE": "T", "BOR": "R", "BPE": "C",
+    "BRU": "U", "BSE": "S", "BT5": "N", "BTA": "L", "BTC": "C",
+    "BTR": "W", "BUC": "C", "BUG": "V", "BVP": "U", "BZG": "N",
+    "C  ": "C", "C12": "TYG", "C1X": "K", "C25": "C", "C2L": "C",
+    "C2S": "C", "C31": "C", "C32": "C", "C34": "C", "C36": "C",
+    "C37": "C", "C38": "C", "C3Y": "C", "C42": "C", "C43": "C",
+    "C45": "C", "C46": "C", "C49": "C", "C4R": "C", "C4S": "C",
+    "C5C": "C", "C66": "X", "C6C": "C", "C99": "TFG", "CAF": "C",
+    "CAL": "X", "CAR": "C", "CAS": "C", "CAV": "X", "CAY": "C",
+    "CB2": "C", "CBR": "C", "CBV": "C", "CCC": "C", "CCL": "K",
+    "CCS": "C", "CCY": "CYG", "CDE": "X", "CDV": "X", "CDW": "C",
+    "CEA": "C", "CFL": "C", "CFY": "FCYG", "CG1": "G", "CGA": "E",
+    "CGU": "E", "CH ": "C", "CH6": "MYG", "CH7": "KYG", "CHF": "X",
+    "CHG": "X", "CHP": "G", "CHS": "X", "CIR": "R", "CJO": "GYG",
+    "CLE": "L", "CLG": "K", "CLH": "K", "CLV": "AFG", "CM0": "N",
+    "CME": "C", "CMH": "C", "CML": "C", "CMR": "C", "CMT": "C",
+    "CNU": "U", "CP1": "C", "CPC": "X", "CPI": "X", "CQR": "GYG",
+    "CR0": "TLG", "CR2": "GYG", "CR5": "G", "CR7": "KYG", "CR8": "HYG",
+    "CRF": "TWG", "CRG": "THG", "CRK": "MYG", "CRO": "GYG", "CRQ": "QYG",
+    "CRU": "EYG", "CRW": "ASG", "CRX": "ASG", "CS0": "C", "CS1": "C",
+    "CS3": "C", "CS4": "C", "CS8": "N", "CSA": "C", "CSB": "C",
+    "CSD": "C", "CSE": "C", "CSF": "C", "CSH": "SHG", "CSI": "G",
+    "CSJ": "C", "CSL": "C", "CSO": "C", "CSP": "C", "CSR": "C",
+    "CSS": "C", "CSU": "C", "CSW": "C", "CSX": "C", "CSY": "SYG",
+    "CSZ": "C", "CTE": "W", "CTG": "T", "CTH": "T", "CUC": "X",
+    "CWR": "S", "CXM": "M", "CY0": "C", "CY1": "C", "CY3": "C",
+    "CY4": "C", "CYA": "C", "CYD": "C", "CYF": "C", "CYG": "C",
+    "CYJ": "X", "CYM": "C", "CYQ": "C", "CYR": "C", "CYS": "C",
+    "CZ2": "C", "CZO": "GYG", "CZZ": "C", "D11": "T", "D1P": "N",
+    "D3 ": "N", "D33": "N", "D3P": "G", "D3T": "T", "D4M": "T",
+    "D4P": "X", "DA ": "A", "DA2": "X", "DAB": "A", "DAH": "F",
+    "DAL": "A", "DAR": "R", "DAS": "D", "DBB": "T", "DBM": "N",
+    "DBS": "S", "DBU": "T", "DBY": "Y", "DBZ": "A", "DC ": "C",
+    "DC2": "C", "DCG": "G", "DCI": "X", "DCL": "X", "DCT": "C",
+    "DCY": "C", "DDE": "H", "DDG": "G", "DDN": "U", "DDX": "N",
+    "DFC": "C", "DFG": "G", "DFI": "X", "DFO": "X", "DFT": "N",
+    "DG ": "G", "DGH": "G", "DGI": "G", "DGL": "E", "DGN": "Q",
+    "DHA": "S", "DHI": "H", "DHL": "X", "DHN": "V", "DHP": "X",
+    "DHU": "U", "DHV": "V", "DI ": "I", "DIL": "I", "DIR": "R",
+    "DIV": "V", "DLE": "L", "DLS": "K", "DLY": "K", "DM0": "K",
+    "DMH": "N", "DMK": "D", "DMT": "X", "DN ": "N", "DNE": "L",
+    "DNG": "L", "DNL": "K", "DNM": "L", "DNP": "A", "DNR": "C",
+    "DNS": "K", "DOA": "X", "DOC": "C", "DOH": "D", "DON": "L",
+    "DPB": "T", "DPH": "F", "DPL": "P", "DPP": "A", "DPQ": "Y",
+    "DPR": "P", "DPY": "N", "DRM": "U", "DRP": "N", "DRT": "T",
+    "DRZ": "N", "DSE": "S", "DSG": "N", "DSN": "S", "DSP": "D",
+    "DT ": "T", "DTH": "T", "DTR": "W", "DTY": "Y", "DU ": "U",
+    "DVA": "V", "DXD": "N", "DXN": "N", "DYG": "DYG", "DYS": "C",
+    "DZM": "A", "E  ": "A", "E1X": "A", "ECC": "Q", "EDA": "A",
+    "EFC": "C", "EHP": "F", "EIT": "T", "ENP": "N", "ESB": "Y",
+    "ESC": "M", "EXB": "X", "EXY": "L", "EY5": "N", "EYS": "X",
+    "F2F": "F", "FA2": "A", "FA5": "N", "FAG": "N", "FAI": "N",
+    "FB5": "A", "FB6": "A", "FCL": "F", "FFD": "N", "FGA": "E",
+    "FGL": "G", "FGP": "S", "FHL": "X", "FHO": "K", "FHU": "U",
+    "FLA": "A", "FLE": "L", "FLT": "Y", "FME": "M", "FMG": "G",
+    "FMU": "N", "FOE": "C", "FOX": "G", "FP9": "P", "FPA": "F",
+    "FRD": "X", "FT6": "W", "FTR": "W", "FTY": "Y", "FVA": "V",
+    "FZN": "K", "G  ": "G", "G25": "G", "G2L": "G", "G2S": "G",
+    "G31": "G", "G32": "G", "G33": "G", "G36": "G", "G38": "G",
+    "G42": "G", "G46": "G", "G47": "G", "G48": "G", "G49": "G",
+    "G4P": "N", "G7M": "G", "GAO": "G", "GAU": "E", "GCK": "C",
+    "GCM": "X", "GDP": "G", "GDR": "G", "GFL": "G", "GGL": "E",
+    "GH3": "G", "GHG": "Q", "GHP": "G", "GL3": "G", "GLH": "Q",
+    "GLJ": "E", "GLK": "E", "GLM": "X", "GLN": "Q", "GLQ": "E",
+    "GLU": "E", "GLX": "Z", "GLY": "G", "GLZ": "G", "GMA": "E",
+    "GMS": "G", "GMU": "U", "GN7": "G", "GND": "X", "GNE": "N",
+    "GOM": "G", "GPL": "K", "GS ": "G", "GSC": "G", "GSR": "G",
+    "GSS": "G", "GSU": "E", "GT9": "C", "GTP": "G", "GVL": "X",
+    "GYC": "CYG", "GYS": "SYG", "H2U": "U", "H5M": "P", "HAC": "A",
+    "HAR": "R", "HBN": "H", "HCS": "X", "HDP": "U", "HEU": "U",
+    "HFA": "X", "HGL": "X", "HHI": "H", "HHK": "AK", "HIA": "H",
+    "HIC": "H", "HIP": "H", "HIQ": "H", "HIS": "H", "HL2": "L",
+    "HLU": "L", "HMR": "R", "HOL": "N", "HPC": "F", "HPE": "F",
+    "HPH": "F", "HPQ": "F", "HQA": "A", "HRG": "R", "HRP": "W",
+    "HS8": "H", "HS9": "H", "HSE": "S", "HSL": "S", "HSO": "H",
+    "HTI": "C", "HTN": "N", "HTR": "W", "HV5": "A", "HVA": "V",
+    "HY3": "P", "HYP": "P", "HZP": "P", "I  ": "I", "I2M": "I",
+    "I58": "K", "I5C": "C", "IAM": "A", "IAR": "R", "IAS": "D",
+    "IC ": "C", "IEL": "K", "IEY": "HYG", "IG ": "G", "IGL": "G",
+    "IGU": "G", "IIC": "SHG", "IIL": "I", "ILE": "I", "ILG": "E",
+    "ILX": "I", "IMC": "C", "IML": "I", "IOY": "F", "IPG": "G",
+    "IPN": "N", "IRN": "N", "IT1": "K", "IU ": "U", "IYR": "Y",
+    "IYT": "T", "IZO": "M", "JJJ": "C", "JJK": "C", "JJL": "C",
+    "JW5": "N", "K1R": "C", "KAG": "G", "KCX": "K", "KGC": "K",
+    "KNB": "A", "KOR": "M", "KPI": "K", "KST": "K", "KYQ": "K",
+    "L2A": "X", "LA2": "K", "LAA": "D", "LAL": "A", "LBY": "K",
+    "LC ": "C", "LCA": "A", "LCC": "N", "LCG": "G", "LCH": "N",
+    "LCK": "K", "LCX": "K", "LDH": "K", "LED": "L", "LEF": "L",
+    "LEH": "L", "LEI": "V", "LEM": "L", "LEN": "L", "LET": "X",
+    "LEU": "L", "LEX": "L", "LG ": "G", "LGP": "G", "LHC": "X",
+    "LHU": "U", "LKC": "N", "LLP": "K", "LLY": "K", "LME": "E",
+    "LMF": "K", "LMQ": "Q", "LMS": "N", "LP6": "K", "LPD": "P",
+    "LPG": "G", "LPL": "X", "LPS": "S", "LSO": "X", "LTA": "X",
+    "LTR": "W", "LVG": "G", "LVN": "V", "LYF": "K", "LYK": "K",
+    "LYM": "K", "LYN": "K", "LYR": "K", "LYS": "K", "LYX": "K",
+    "LYZ": "K", "M0H": "C", "M1G": "G", "M2G": "G", "M2L": "K",
+    "M2S": "M", "M30": "G", "M3L": "K", "M5M": "C", "MA ": "A",
+    "MA6": "A", "MA7": "A", "MAA": "A", "MAD": "A", "MAI": "R",
+    "MBQ": "Y", "MBZ": "N", "MC1": "S", "MCG": "X", "MCL": "K",
+    "MCS": "C", "MCY": "C", "MD3": "C", "MD6": "G", "MDH": "X",
+    "MDO": "ASG", "MDR": "N", "MEA": "F", "MED": "M", "MEG": "E",
+    "MEN": "N", "MEP": "U", "MEQ": "Q", "MET": "M", "MEU": "G",
+    "MF3": "X", "MFC": "GYG", "MG1": "G", "MGG": "R", "MGN": "Q",
+    "MGQ": "A", "MGV": "G", "MGY": "G", "MHL": "L", "MHO": "M",
+    "MHS": "H", "MIA": "A", "MIS": "S", "MK8": "L", "ML3": "K",
+    "MLE": "L", "MLL": "L", "MLY": "K", "MLZ": "K", "MME": "M",
+    "MMO": "R", "MMT": "T", "MND": "N", "MNL": "L", "MNU": "U",
+    "MNV": "V", "MOD": "X", "MP8": "P", "MPH": "X", "MPJ": "X",
+    "MPQ": "G", "MRG": "G", "MSA": "G", "MSE": "M", "MSL": "M",
+    "MSO": "M", "MSP": "X", "MT2": "M", "MTR": "T", "MTU": "A",
+    "MTY": "Y", "MVA": "V", "N  ": "N", "N10": "S", "N2C": "X",
+    "N5I": "N", "N5M": "C", "N6G": "G", "N7P": "P", "NA8": "A",
+    "NAL": "A", "NAM": "A", "NB8": "N", "NBQ": "Y", "NC1": "S",
+    "NCB": "A", "NCX": "N", "NCY": "X", "NDF": "F", "NDN": "U",
+    "NEM": "H", "NEP": "H", "NF2": "N", "NFA": "F", "NHL": "E",
+    "NIT": "X", "NIY": "Y", "NLE": "L", "NLN": "L", "NLO": "L",
+    "NLP": "L", "NLQ": "Q", "NMC": "G", "NMM": "R", "NMS": "T",
+    "NMT": "T", "NNH": "R", "NP3": "N", "NPH": "C", "NPI": "A",
+    "NRP": "LYG", "NRQ": "MYG", "NSK": "X", "NTY": "Y", "NVA": "V",
+    "NYC": "TWG", "NYG": "NYG", "NYM": "N", "NYS": "C", "NZH": "H",
+    "O12": "X", "O2C": "N", "O2G": "G", "OAD": "N", "OAS": "S",
+    "OBF": "X", "OBS": "X", "OCS": "C", "OCY": "C", "ODP": "N",
+    "OHI": "H", "OHS": "D", "OIC": "X", "OIP": "I", "OLE": "X",
+    "OLT": "T", "OLZ": "S", "OMC": "C", "OMG": "G", "OMT": "M",
+    "OMU": "U", "ONE": "U", "ONH": "A", "ONL": "X", "OPR": "R",
+    "ORN": "A", "ORQ": "R", "OSE": "S", "OTB": "X", "OTH": "T",
+    "OTY": "Y", "OXX": "D", "P  ": "G", "P1L": "C", "P1P": "N",
+    "P2T": "T", "P2U": "U", "P2Y": "P", "P5P": "A", "PAQ": "Y",
+    "PAS": "D", "PAT": "W", "PAU": "A", "PBB": "C", "PBF": "F",
+    "PBT": "N", "PCA": "E", "PCC": "P", "PCE": "X", "PCS": "F",
+    "PDL": "X", "PDU": "U", "PEC": "C", "PF5": "F", "PFF": "F",
+    "PFX": "X", "PG1": "S", "PG7": "G", "PG9": "G", "PGL": "X",
+    "PGN": "G", "PGP": "G", "PGY": "G", "PHA": "F", "PHD": "D",
+    "PHE": "F", "PHI": "F", "PHL": "F", "PHM": "F", "PIA": "AYG",
+    "PIV": "X", "PLE": "L", "PM3": "F", "PMT": "C", "POM": "P",
+    "PPN": "F", "PPU": "A", "PPW": "G", "PQ1": "N", "PR3": "C",
+    "PR5": "A", "PR9": "P", "PRN": "A", "PRO": "P", "PRS": "P",
+    "PSA": "F", "PSH": "H", "PST": "T", "PSU": "U", "PSW": "C",
+    "PTA": "X", "PTH": "Y", "PTM": "Y", "PTR": "Y", "PU ": "A",
+    "PUY": "N", "PVH": "H", "PVL": "X", "PYA": "A", "PYO": "U",
+    "PYX": "C", "PYY": "N", "QLG": "QLG", "QMM": "Q", "QPA": "C",
+    "QPH": "F", "QUO": "G", "R  ": "A", "R1A": "C", "R4K": "W",
+    "RC7": "HYG", "RE0": "W", "RE3": "W", "RIA": "A", "RMP": "A",
+    "RON": "X", "RT ": "T", "RTP": "N", "S1H": "S", "S2C": "C",
+    "S2D": "A", "S2M": "T", "S2P": "A", "S4A": "A", "S4C": "C",
+    "S4G": "G", "S4U": "U", "S6G": "G", "SAC": "S", "SAH": "C",
+    "SAR": "G", "SBL": "S", "SC ": "C", "SCH": "C", "SCS": "C",
+    "SCY": "C", "SD2": "X", "SDG": "G", "SDP": "S", "SEB": "S",
+    "SEC": "A", "SEG": "A", "SEL": "S", "SEM": "S", "SEN": "S",
+    "SEP": "S", "SER": "S", "SET": "S", "SGB": "S", "SHC": "C",
+    "SHP": "G", "SHR": "K", "SIB": "C", "SIC": "DC", "SLA": "P",
+    "SLR": "P", "SLZ": "K", "SMC": "C", "SME": "M", "SMF": "F",
+    "SMP": "A", "SMT": "T", "SNC": "C", "SNN": "N", "SOC": "C",
+    "SOS": "N", "SOY": "S", "SPT": "T", "SRA": "A", "SSU": "U",
+    "STY": "Y", "SUB": "X", "SUI": "DG", "SUN": "S", "SUR": "U",
+    "SVA": "S", "SVV": "S", "SVW": "S", "SVX": "S", "SVY": "S",
+    "SVZ": "X", "SWG": "SWG", "SYS": "C", "T  ": "T", "T11": "F",
+    "T23": "T", "T2S": "T", "T2T": "N", "T31": "U", "T32": "T",
+    "T36": "T", "T37": "T", "T38": "T", "T39": "T", "T3P": "T",
+    "T41": "T", "T48": "T", "T49": "T", "T4S": "T", "T5O": "U",
+    "T5S": "T", "T66": "X", "T6A": "A", "TA3": "T", "TA4": "X",
+    "TAF": "T", "TAL": "N", "TAV": "D", "TBG": "V", "TBM": "T",
+    "TC1": "C", "TCP": "T", "TCQ": "Y", "TCR": "W", "TCY": "A",
+    "TDD": "L", "TDY": "T", "TFE": "T", "TFO": "A", "TFQ": "F",
+    "TFT": "T", "TGP": "G", "TH6": "T", "THC": "T", "THO": "X",
+    "THR": "T", "THX": "N", "THZ": "R", "TIH": "A", "TLB": "N",
+    "TLC": "T", "TLN": "U", "TMB": "T", "TMD": "T", "TNB": "C",
+    "TNR": "S", "TOX": "W", "TP1": "T", "TPC": "C", "TPG": "G",
+    "TPH": "X", "TPL": "W", "TPO": "T", "TPQ": "Y", "TQI": "W",
+    "TQQ": "W", "TRF": "W", "TRG": "K", "TRN": "W", "TRO": "W",
+    "TRP": "W", "TRQ": "W", "TRW": "W", "TRX": "W", "TS ": "N",
+    "TST": "X", "TT ": "N", "TTD": "T", "TTI": "U", "TTM": "T",
+    "TTQ": "W", "TTS": "Y", "TY1": "Y", "TY2": "Y", "TY3": "Y",
+    "TY5": "Y", "TYB": "Y", "TYI": "Y", "TYJ": "Y", "TYN": "Y",
+    "TYO": "Y", "TYQ": "Y", "TYR": "Y", "TYS": "Y", "TYT": "Y",
+    "TYU": "N", "TYW": "Y", "TYX": "X", "TYY": "Y", "TZB": "X",
+    "TZO": "X", "U  ": "U", "U25": "U", "U2L": "U", "U2N": "U",
+    "U2P": "U", "U31": "U", "U33": "U", "U34": "U", "U36": "U",
+    "U37": "U", "U8U": "U", "UAR": "U", "UCL": "U", "UD5": "U",
+    "UDP": "N", "UFP": "N", "UFR": "U", "UFT": "U", "UMA": "A",
+    "UMP": "U", "UMS": "U", "UN1": "X", "UN2": "X", "UNK": "X",
+    "UR3": "U", "URD": "U", "US1": "U", "US2": "U", "US3": "T",
+    "US5": "U", "USM": "U", "VAD": "V", "VAF": "V", "VAL": "V",
+    "VB1": "K", "VDL": "X", "VLL": "X", "VLM": "X", "VMS": "X",
+    "VOL": "X", "WCR": "GYG", "X  ": "G", "X2W": "E", "X4A": "N",
+    "X9Q": "AFG", "XAD": "A", "XAE": "N", "XAL": "A", "XAR": "N",
+    "XCL": "C", "XCN": "C", "XCP": "X", "XCR": "C", "XCS": "N",
+    "XCT": "C", "XCY": "C", "XGA": "N", "XGL": "G", "XGR": "G",
+    "XGU": "G", "XPR": "P", "XSN": "N", "XTH": "T", "XTL": "T",
+    "XTR": "T", "XTS": "G", "XTY": "N", "XUA": "A", "XUG": "G",
+    "XX1": "K", "XXY": "THG", "XYG": "DYG", "Y  ": "A", "YCM": "C",
+    "YG ": "G", "YOF": "Y", "YRR": "N", "YYG": "G", "Z  ": "C",
+    "Z01": "A", "ZAD": "A", "ZAL": "A", "ZBC": "C", "ZBU": "U",
+    "ZCL": "F", "ZCY": "C", "ZDU": "U", "ZFB": "X", "ZGU": "G",
+    "ZHP": "N", "ZTH": "T", "ZU0": "T", "ZZJ": "A"}
diff --git a/code/lib/Bio/Data/__init__.py b/code/lib/Bio/Data/__init__.py
new file mode 100644
index 0000000..568286c
--- /dev/null
+++ b/code/lib/Bio/Data/__init__.py
@@ -0,0 +1,8 @@
+# Copyright 2000 Andrew Dalke.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Collections of various bits of useful biological data."""
diff --git a/code/lib/Bio/Data/__pycache__/CodonTable.cpython-37.pyc b/code/lib/Bio/Data/__pycache__/CodonTable.cpython-37.pyc
new file mode 100644
index 0000000..7566625
Binary files /dev/null and b/code/lib/Bio/Data/__pycache__/CodonTable.cpython-37.pyc differ
diff --git a/code/lib/Bio/Data/__pycache__/IUPACData.cpython-37.pyc b/code/lib/Bio/Data/__pycache__/IUPACData.cpython-37.pyc
new file mode 100644
index 0000000..f47d34a
Binary files /dev/null and b/code/lib/Bio/Data/__pycache__/IUPACData.cpython-37.pyc differ
diff --git a/code/lib/Bio/Data/__pycache__/SCOPData.cpython-37.pyc b/code/lib/Bio/Data/__pycache__/SCOPData.cpython-37.pyc
new file mode 100644
index 0000000..ce87c43
Binary files /dev/null and b/code/lib/Bio/Data/__pycache__/SCOPData.cpython-37.pyc differ
diff --git a/code/lib/Bio/Data/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Data/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..1ff1640
Binary files /dev/null and b/code/lib/Bio/Data/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Emboss/Applications.py b/code/lib/Bio/Emboss/Applications.py
new file mode 100644
index 0000000..c3eab72
--- /dev/null
+++ b/code/lib/Bio/Emboss/Applications.py
@@ -0,0 +1,1221 @@
+# Copyright 2001-2009 Brad Chapman.
+# Revisions copyright 2009-2016 by Peter Cock.
+# Revisions copyright 2009 by David Winter.
+# Revisions copyright 2009-2010 by Leighton Pritchard.
+# All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Code to interact with and run various EMBOSS programs (OBSOLETE).
+
+These classes follow the AbstractCommandline interfaces for running
+programs.
+
+We have decided to remove this module in future, and instead recommend
+building your command and invoking it via the subprocess module directly.
+"""
+
+
+from Bio.Application import _Option, _Switch, AbstractCommandline
+
+
+class _EmbossMinimalCommandLine(AbstractCommandline):
+    """Base Commandline object for EMBOSS wrappers (PRIVATE).
+
+    This is provided for subclassing, it deals with shared options
+    common to all the EMBOSS tools:
+
+    Attributes:
+     - auto               Turn off prompts
+     - stdout             Write standard output
+     - filter             Read standard input, write standard output
+     - options            Prompt for standard and additional values
+     - debug              Write debug output to program.dbg
+     - verbose            Report some/full command line options
+     - help               Report command line options. More
+                          information on associated and general
+                          qualifiers can be found with -help -verbose
+     - warning            Report warnings
+     - error              Report errors
+     - fatal              Report fatal errors
+     - die                Report dying program messages
+
+    """
+
+    def __init__(self, cmd=None, **kwargs):
+        assert cmd is not None
+        extra_parameters = [
+            _Switch(
+                ["-auto", "auto"],
+                "Turn off prompts.\n\n"
+                "Automatic mode disables prompting, so we recommend you set this "
+                "argument all the time when calling an EMBOSS tool from Biopython.",
+            ),
+            _Switch(["-stdout", "stdout"], "Write standard output."),
+            _Switch(
+                ["-filter", "filter"], "Read standard input, write standard output."
+            ),
+            _Switch(
+                ["-options", "options"],
+                "Prompt for standard and additional values.\n\n"
+                "If you are calling an EMBOSS tool from within Biopython, "
+                "we DO NOT recommend using this option.",
+            ),
+            _Switch(["-debug", "debug"], "Write debug output to program.dbg."),
+            _Switch(["-verbose", "verbose"], "Report some/full command line options"),
+            _Switch(
+                ["-help", "help"],
+                "Report command line options.\n\n"
+                "More information on associated and general qualifiers "
+                "can be found with -help -verbose",
+            ),
+            _Switch(["-warning", "warning"], "Report warnings."),
+            _Switch(["-error", "error"], "Report errors."),
+            _Switch(["-die", "die"], "Report dying program messages."),
+        ]
+        try:
+            # Insert extra parameters - at the start just in case there
+            # are any arguments which must come last:
+            self.parameters = extra_parameters + self.parameters
+        except AttributeError:
+            # Should we raise an error?  The subclass should have set this up!
+            self.parameters = extra_parameters
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class _EmbossCommandLine(_EmbossMinimalCommandLine):
+    """Base Commandline object for EMBOSS wrappers (PRIVATE).
+
+    This is provided for subclassing, it deals with shared options
+    common to all the EMBOSS tools plus:
+
+     - outfile            Output filename
+
+    """
+
+    def __init__(self, cmd=None, **kwargs):
+        assert cmd is not None
+        extra_parameters = [
+            _Option(["-outfile", "outfile"], "Output filename", filename=True)
+        ]
+        try:
+            # Insert extra parameters - at the start just in case there
+            # are any arguments which must come last:
+            self.parameters = extra_parameters + self.parameters
+        except AttributeError:
+            # Should we raise an error?  The subclass should have set this up!
+            self.parameters = extra_parameters
+        _EmbossMinimalCommandLine.__init__(self, cmd, **kwargs)
+
+    def _validate(self):
+        # Check the outfile, filter, or stdout option has been set.
+        # We can't simply do this via the required flag for the outfile
+        # output - this seems the simplest solution.
+        if not (self.outfile or self.filter or self.stdout):
+            raise ValueError(
+                "You must either set outfile (output filename), "
+                "or enable filter or stdout (output to stdout)."
+            )
+        return _EmbossMinimalCommandLine._validate(self)
+
+
+class Primer3Commandline(_EmbossCommandLine):
+    """Commandline object for the Primer3 interface from EMBOSS.
+
+    The precise set of supported arguments depends on your version of EMBOSS.
+    This version accepts arguments current at EMBOSS 6.1.0:
+
+    >>> cline = Primer3Commandline(sequence="mysequence.fas", auto=True, hybridprobe=True)
+    >>> cline.explainflag = True
+    >>> cline.osizeopt=20
+    >>> cline.psizeopt=200
+    >>> cline.outfile = "myresults.out"
+    >>> cline.bogusparameter = 1967  # Invalid parameter
+    Traceback (most recent call last):
+        ...
+    ValueError: Option name bogusparameter was not found.
+    >>> print(cline)
+    eprimer3 -auto -outfile=myresults.out -sequence=mysequence.fas -hybridprobe=True -psizeopt=200 -osizeopt=20 -explainflag=True
+
+    """
+
+    def __init__(self, cmd="eprimer3", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"],
+                "Sequence to choose primers from.",
+                is_required=True,
+            ),
+            _Option(["-task", "task"], "Tell eprimer3 what task to perform."),
+            _Option(
+                ["-hybridprobe", "hybridprobe"],
+                "Find an internal oligo to use as a hyb probe.",
+            ),
+            _Option(
+                ["-numreturn", "numreturn"], "Maximum number of primer pairs to return."
+            ),
+            _Option(
+                ["-includedregion", "includedregion"],
+                "Subregion of the sequence in which to pick primers.",
+            ),
+            _Option(["-target", "target"], "Sequence to target for flanking primers."),
+            _Option(
+                ["-excludedregion", "excludedregion"],
+                "Regions to exclude from primer picking.",
+            ),
+            _Option(
+                ["-forwardinput", "forwardinput"],
+                "Sequence of a forward primer to check.",
+            ),
+            _Option(
+                ["-reverseinput", "reverseinput"],
+                "Sequence of a reverse primer to check.",
+            ),
+            _Option(
+                ["-gcclamp", "gcclamp"],
+                "The required number of Gs and Cs at the 3' of each primer.",
+            ),
+            _Option(["-osize", "osize"], "Optimum length of a primer oligo."),
+            _Option(["-minsize", "minsize"], "Minimum length of a primer oligo."),
+            _Option(["-maxsize", "maxsize"], "Maximum length of a primer oligo."),
+            _Option(
+                ["-otm", "otm"],
+                "Melting temperature for primer oligo (OBSOLETE).\n\n"
+                "Option replaced in EMBOSS 6.6.0 by -opttm",
+            ),
+            _Option(
+                ["-opttm", "opttm"],
+                "Optimum melting temperature for a primer oligo.\n\n"
+                "Option added in EMBOSS 6.6.0, replacing -otm",
+            ),
+            _Option(
+                ["-mintm", "mintm"], "Minimum melting temperature for a primer oligo."
+            ),
+            _Option(
+                ["-maxtm", "maxtm"], "Maximum melting temperature for a primer oligo."
+            ),
+            _Option(
+                ["-maxdifftm", "maxdifftm"],
+                "Maximum difference in melting temperatures between "
+                "forward and reverse primers.",
+            ),
+            _Option(["-ogcpercent", "ogcpercent"], "Optimum GC% for a primer."),
+            _Option(["-mingc", "mingc"], "Minimum GC% for a primer."),
+            _Option(["-maxgc", "maxgc"], "Maximum GC% for a primer."),
+            _Option(
+                ["-saltconc", "saltconc"], "Millimolar salt concentration in the PCR."
+            ),
+            _Option(
+                ["-dnaconc", "dnaconc"],
+                "Nanomolar concentration of annealing oligos in the PCR.",
+            ),
+            _Option(
+                ["-maxpolyx", "maxpolyx"],
+                "Maximum allowable mononucleotide repeat length in a primer.",
+            ),
+            # Primer length:
+            _Option(["-psizeopt", "psizeopt"], "Optimum size for the PCR product."),
+            _Option(
+                ["-prange", "prange"], "Acceptable range of length for the PCR product."
+            ),
+            # Primer temperature:
+            _Option(
+                ["-ptmopt", "ptmopt"],
+                "Optimum melting temperature for the PCR product.",
+            ),
+            _Option(
+                ["-ptmmin", "ptmmin"],
+                "Minimum allowed melting temperature for the amplicon.",
+            ),
+            _Option(
+                ["-ptmmax", "ptmmax"],
+                "Maximum allowed melting temperature for the amplicon.",
+            ),
+            # Note to self, should be -oexcludedregion not -oexcluderegion
+            _Option(
+                ["-oexcludedregion", "oexcludedregion"],
+                "Do not pick internal oligos in this region.",
+            ),
+            _Option(["-oligoinput", "oligoinput"], "Sequence of the internal oligo."),
+            # Oligo length:
+            _Option(["-osizeopt", "osizeopt"], "Optimum length of internal oligo."),
+            _Option(["-ominsize", "ominsize"], "Minimum length of internal oligo."),
+            _Option(["-omaxsize", "omaxsize"], "Maximum length of internal oligo."),
+            # Oligo GC temperature:
+            _Option(
+                ["-otmopt", "otmopt"], "Optimum melting temperature of internal oligo."
+            ),
+            _Option(
+                ["-otmmin", "otmmin"], "Minimum melting temperature of internal oligo."
+            ),
+            _Option(
+                ["-otmmax", "otmmax"], "Maximum melting temperature of internal oligo."
+            ),
+            # Oligo GC percent:
+            _Option(["-ogcopt", "ogcopt"], "Optimum GC% for internal oligo."),
+            _Option(["-ogcmin", "ogcmin"], "Minimum GC% for internal oligo."),
+            _Option(["-ogcmax", "ogcmax"], "Maximum GC% for internal oligo."),
+            # Oligo salt concentration:
+            _Option(
+                ["-osaltconc", "osaltconc"],
+                "Millimolar concentration of salt in the hybridisation.",
+            ),
+            _Option(
+                ["-odnaconc", "odnaconc"],
+                "Nanomolar concentration of internal oligo in the hybridisation.",
+            ),
+            # Oligo self complementarity
+            _Option(
+                ["-oanyself", "oanyself"],
+                "Maximum allowable alignment score for self-complementarity.",
+            ),
+            _Option(
+                ["-oendself", "oendself"],
+                "Max 3'-anchored self-complementarity global alignment score.",
+            ),
+            _Option(
+                ["-opolyxmax", "opolyxmax"],
+                "Maximum length of mononucleotide repeat in internal oligo.",
+            ),
+            _Option(
+                ["-mispriminglibraryfile", "mispriminglibraryfile"],
+                "File containing library of sequences to avoid amplifying",
+            ),
+            _Option(
+                ["-maxmispriming", "maxmispriming"],
+                "Maximum allowed similarity of primers to sequences in "
+                "library specified by -mispriminglibrary",
+            ),
+            _Option(
+                ["-omishybmax", "omishybmax"],
+                "Maximum alignment score for hybridisation of internal oligo to "
+                "library specified by -mishyblibraryfile.",
+            ),
+            _Option(
+                ["-mishyblibraryfile", "mishyblibraryfile"],
+                "Library file of seqs to avoid internal oligo hybridisation.",
+            ),
+            _Option(
+                ["-explainflag", "explainflag"],
+                "Produce output tags with eprimer3 statistics",
+            ),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class PrimerSearchCommandline(_EmbossCommandLine):
+    """Commandline object for the primersearch program from EMBOSS."""
+
+    def __init__(self, cmd="primersearch", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-seqall", "-sequences", "sequences", "seqall"],
+                "Sequence to look for the primer pairs in.",
+                is_required=True,
+            ),
+            # When this wrapper was written primersearch used -sequences
+            # as the argument name. Since at least EMBOSS 5.0 (and
+            # perhaps earlier) this has been -seqall instead.
+            _Option(
+                ["-infile", "-primers", "primers", "infile"],
+                "File containing the primer pairs to search for.",
+                filename=True,
+                is_required=True,
+            ),
+            # When this wrapper was written primersearch used -primers
+            # as the argument name. Since at least EMBOSS 5.0 (and
+            # perhaps earlier) this has been -infile instead.
+            _Option(
+                ["-mismatchpercent", "mismatchpercent"],
+                "Allowed percentage mismatch (any integer value, default 0).",
+                is_required=True,
+            ),
+            _Option(
+                ["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"
+            ),
+            _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class FDNADistCommandline(_EmbossCommandLine):
+    """Commandline object for the fdnadist program from EMBOSS.
+
+    fdnadist is an EMBOSS wrapper for the PHYLIP program dnadist for
+    calulating distance matrices from DNA sequence files.
+    """
+
+    def __init__(self, cmd="fdnadist", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"],
+                "seq file to use (phylip)",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(["-method", "method"], "sub. model [f,k,j,l,s]", is_required=True),
+            _Option(["-gamma", "gamma"], "gamma [g, i,n]"),
+            _Option(
+                ["-ncategories", "ncategories"], "number of rate catergories (1-9)"
+            ),
+            _Option(["-rate", "rate"], "rate for each category"),
+            _Option(
+                ["-categories", "categories"], "File of substitution rate categories"
+            ),
+            _Option(["-weights", "weights"], "weights file"),
+            _Option(
+                ["-gammacoefficient", "gammacoefficient"], "value for gamma (> 0.001)"
+            ),
+            _Option(["-invarfrac", "invarfrac"], "proportoin of invariant sites"),
+            _Option(["-ttratio", "ttratio"], "ts/tv ratio"),
+            _Option(["-freqsfrom", "freqsfrom"], "use emprical base freqs"),
+            _Option(["-basefreq", "basefreq"], "specify basefreqs"),
+            _Option(["-lower", "lower"], "lower triangle matrix (y/N)"),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class FTreeDistCommandline(_EmbossCommandLine):
+    """Commandline object for the ftreedist program from EMBOSS.
+
+    ftreedist is an EMBOSS wrapper for the PHYLIP program treedist used for
+    calulating distance measures between phylogentic trees.
+    """
+
+    def __init__(self, cmd="ftreedist", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-intreefile", "intreefile"],
+                "tree file to score (phylip)",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(["-dtype", "dtype"], "distance type ([S]ymetric, [b]ranch score)"),
+            _Option(
+                ["-pairing", "pairing"],
+                "tree pairing method ([A]djacent pairs, all [p]ossible pairs)",
+            ),
+            _Option(["-style", "style"], "output style - [V]erbose, [f]ill, [s]parse"),
+            _Option(["-noroot", "noroot"], "treat trees as rooted [N/y]"),
+            _Option(
+                ["-outgrno", "outgrno"],
+                "which taxon to root the trees with (starts from 0)",
+            ),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class FNeighborCommandline(_EmbossCommandLine):
+    """Commandline object for the fneighbor program from EMBOSS.
+
+    fneighbor is an EMBOSS wrapper for the PHYLIP program neighbor used for
+    calulating neighbor-joining or UPGMA trees from distance matrices.
+    """
+
+    def __init__(self, cmd="fneighbor", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-datafile", "datafile"],
+                "dist file to use (phylip)",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-matrixtype", "matrixtype"],
+                "is martrix [S]quare pr [u]pper or [l]ower",
+            ),
+            _Option(["-treetype", "treetype"], "nj or UPGMA tree (n/u)"),
+            _Option(["-outgrno", "outgrno"], "taxon to use as OG"),
+            _Option(["-jumble", "jumble"], "randommise input order (Y/n)"),
+            _Option(["-seed", "seed"], "provide a random seed"),
+            _Option(["-trout", "trout"], "write tree (Y/n)"),
+            _Option(["-outtreefile", "outtreefile"], "filename for output tree"),
+            _Option(["-progress", "progress"], "print progress (Y/n)"),
+            _Option(["-treeprint", "treeprint"], "print tree (Y/n)"),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class FSeqBootCommandline(_EmbossCommandLine):
+    """Commandline object for the fseqboot program from EMBOSS.
+
+    fseqboot is an EMBOSS wrapper for the PHYLIP program seqboot used to
+    pseudo-sample alignment files.
+    """
+
+    def __init__(self, cmd="fseqboot", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"],
+                "seq file to sample (phylip)",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(["-categories", "catergories"], "file of input categories"),
+            _Option(["-weights", "weights"], " weights file"),
+            _Option(["-test", "test"], "specify operation, default is bootstrap"),
+            _Option(["-regular", "regular"], "absolute number to resample"),
+            _Option(["-fracsample", "fracsample"], "fraction to resample"),
+            _Option(
+                ["-rewriteformat", "rewriteformat"],
+                "output format ([P]hyilp, [n]exus, [x]ml",
+            ),
+            _Option(["-seqtype", "seqtype"], "output format ([D]na, [p]rotein, [r]na"),
+            _Option(["-blocksize", "blocksize"], "print progress (Y/n)"),
+            _Option(["-reps", "reps"], "how many replicates, defaults to 100)"),
+            _Option(
+                ["-justweights", "jusweights"],
+                "what to write out [D]atasets of just [w]eights",
+            ),
+            _Option(["-seed", "seed"], "specify random seed"),
+            _Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]"),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class FDNAParsCommandline(_EmbossCommandLine):
+    """Commandline object for the fdnapars program from EMBOSS.
+
+    fdnapars is an EMBOSS version of the PHYLIP program dnapars, for
+    estimating trees from DNA sequences using parsiomny. Calling this command
+    without providing a value for the option "-intreefile" will invoke
+    "interactive mode" (and as a result fail if called with subprocess) if
+    "-auto" is not set to true.
+    """
+
+    def __init__(self, cmd="fdnapars", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"],
+                "seq file to use (phylip)",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(["-intreefile", "intreefile"], "Phylip tree file"),
+            _Option(["-weights", "weights"], "weights file"),
+            _Option(["-maxtrees", "maxtrees"], "max trees to save during run"),
+            _Option(["-thorough", "thorough"], "more thorough search (Y/n)"),
+            _Option(["-rearrange", "rearrange"], "Rearrange on just 1 best tree (Y/n)"),
+            _Option(
+                ["-transversion", "transversion"], "Use tranversion parsimony (y/N)"
+            ),
+            _Option(
+                ["-njumble", "njumble"],
+                "number of times to randomise input order (default is 0)",
+            ),
+            _Option(["-seed", "seed"], "provide random seed"),
+            _Option(["-outgrno", "outgrno"], "Specify outgroup"),
+            _Option(["-thresh", "thresh"], "Use threshold parsimony (y/N)"),
+            _Option(["-threshold", "threshold"], "Threshold value"),
+            _Option(["-trout", "trout"], "Write trees to file (Y/n)"),
+            _Option(["-outtreefile", "outtreefile"], "filename for output tree"),
+            _Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]"),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class FProtParsCommandline(_EmbossCommandLine):
+    """Commandline object for the fdnapars program from EMBOSS.
+
+    fprotpars is an EMBOSS version of the PHYLIP program protpars, for
+    estimating trees from protein  sequences using parsiomny. Calling this
+    command without providing a value for the option "-intreefile" will invoke
+    "interactive mode" (and as a result fail if called with subprocess) if
+    "-auto" is not set to true.
+    """
+
+    def __init__(self, cmd="fprotpars", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"],
+                "seq file to use (phylip)",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(["-intreefile", "intreefile"], "Phylip tree file to score"),
+            _Option(
+                ["-outtreefile", "outtreefile"],
+                "phylip tree output file",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(["-weights", "weights"], "weights file"),
+            _Option(["-whichcode", "whichcode"], "which genetic code, [U,M,V,F,Y]]"),
+            _Option(
+                ["-njumble", "njumble"],
+                "number of times to randomise input order (default is 0)",
+            ),
+            _Option(["-seed", "seed"], "provide random seed"),
+            _Option(["-outgrno", "outgrno"], "Specify outgroup"),
+            _Option(["-thresh", "thresh"], "Use threshold parsimony (y/N)"),
+            _Option(["-threshold", "threshold"], "Threshold value"),
+            _Option(["-trout", "trout"], "Write trees to file (Y/n)"),
+            _Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]"),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class FProtDistCommandline(_EmbossCommandLine):
+    """Commandline object for the fprotdist program from EMBOSS.
+
+    fprotdist is an EMBOSS wrapper for the PHYLIP program protdist used to
+    estimate trees from protein sequences using parsimony
+    """
+
+    def __init__(self, cmd="fprotdist", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"],
+                "seq file to use (phylip)",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-ncategories", "ncategories"], "number of rate catergories (1-9)"
+            ),
+            _Option(["-rate", "rate"], "rate for each category"),
+            _Option(["-catergories", "catergories"], "file of rates"),
+            _Option(["-weights", "weights"], "weights file"),
+            _Option(["-method", "method"], "sub. model [j,h,d,k,s,c]"),
+            _Option(["-gamma", "gamma"], "gamma [g, i,c]"),
+            _Option(
+                ["-gammacoefficient", "gammacoefficient"], "value for gamma (> 0.001)"
+            ),
+            _Option(
+                ["-invarcoefficient", "invarcoefficient"],
+                "float for variation of substitution rate among sites",
+            ),
+            _Option(["-aacateg", "aacateg"], "Choose the category to use [G,C,H]"),
+            _Option(["-whichcode", "whichcode"], "genetic code [c,m,v,f,y]"),
+            _Option(["-ease", "ease"], "Pob change catergory (float between -0 and 1)"),
+            _Option(["-ttratio", "ttratio"], "Transition/transversion ratio (0-1)"),
+            _Option(
+                ["-basefreq", "basefreq"], "DNA base frequencies (space separated list)"
+            ),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class FConsenseCommandline(_EmbossCommandLine):
+    """Commandline object for the fconsense program from EMBOSS.
+
+    fconsense is an EMBOSS wrapper for the PHYLIP program consense used to
+    calculate consensus trees.
+    """
+
+    def __init__(self, cmd="fconsense", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-intreefile", "intreefile"],
+                "file with phylip trees to make consensus from",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(["-method", "method"], "consensus method [s, mr, MRE, ml]"),
+            _Option(
+                ["-mlfrac", "mlfrac"],
+                "cut-off freq for branch to appear in consensus (0.5-1.0)",
+            ),
+            _Option(["-root", "root"], "treat trees as rooted (YES, no)"),
+            _Option(["-outgrno", "outgrno"], "OTU to use as outgroup (starts from 0)"),
+            _Option(["-trout", "trout"], "treat trees as rooted (YES, no)"),
+            _Option(
+                ["-outtreefile", "outtreefile"], "Phylip tree output file (optional)"
+            ),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class WaterCommandline(_EmbossCommandLine):
+    """Commandline object for the water program from EMBOSS."""
+
+    def __init__(self, cmd="water", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-asequence", "asequence"],
+                "First sequence to align",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-bsequence", "bsequence"],
+                "Second sequence to align",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True),
+            _Option(
+                ["-gapextend", "gapextend"], "Gap extension penalty", is_required=True
+            ),
+            _Option(["-datafile", "datafile"], "Matrix file", filename=True),
+            _Switch(
+                ["-nobrief", "nobrief"], "Display extended identity and similarity"
+            ),
+            _Switch(["-brief", "brief"], "Display brief identity and similarity"),
+            _Option(
+                ["-similarity", "similarity"], "Display percent identity and similarity"
+            ),
+            _Option(
+                ["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"
+            ),
+            _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"),
+            _Option(
+                ["-aformat", "aformat"],
+                "Display output in a different specified output format",
+            ),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class NeedleCommandline(_EmbossCommandLine):
+    """Commandline object for the needle program from EMBOSS."""
+
+    def __init__(self, cmd="needle", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-asequence", "asequence"],
+                "First sequence to align",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-bsequence", "bsequence"],
+                "Second sequence to align",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True),
+            _Option(
+                ["-gapextend", "gapextend"], "Gap extension penalty", is_required=True
+            ),
+            _Option(["-datafile", "datafile"], "Matrix file", filename=True),
+            _Option(["-endweight", "endweight"], "Apply And gap penalties"),
+            _Option(
+                ["-endopen", "endopen"],
+                "The score taken away when an end gap is created.",
+            ),
+            _Option(
+                ["-endextend", "endextend"],
+                "The score added to the end gap penality for each base or "
+                "residue in the end gap.",
+            ),
+            _Switch(
+                ["-nobrief", "nobrief"], "Display extended identity and similarity"
+            ),
+            _Switch(["-brief", "brief"], "Display brief identity and similarity"),
+            _Option(
+                ["-similarity", "similarity"], "Display percent identity and similarity"
+            ),
+            _Option(
+                ["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"
+            ),
+            _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"),
+            _Option(
+                ["-aformat", "aformat"],
+                "Display output in a different specified output format",
+            ),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class NeedleallCommandline(_EmbossCommandLine):
+    """Commandline object for the needleall program from EMBOSS."""
+
+    def __init__(self, cmd="needleall", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-asequence", "asequence"],
+                "First sequence to align",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-bsequence", "bsequence"],
+                "Second sequence to align",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True),
+            _Option(
+                ["-gapextend", "gapextend"], "Gap extension penalty", is_required=True
+            ),
+            _Option(["-datafile", "datafile"], "Matrix file", filename=True),
+            _Option(
+                ["-minscore", "minscore"],
+                "Exclude alignments with scores below this threshold score.",
+            ),
+            _Option(["-errorfile", "errorfile"], "Error file to be written to."),
+            _Option(["-endweight", "endweight"], "Apply And gap penalties"),
+            _Option(
+                ["-endopen", "endopen"],
+                "The score taken away when an end gap is created.",
+            ),
+            _Option(
+                ["-endextend", "endextend"],
+                "The score added to the end gap penality for each base or "
+                "residue in the end gap.",
+            ),
+            _Switch(
+                ["-nobrief", "nobrief"], "Display extended identity and similarity"
+            ),
+            _Switch(["-brief", "brief"], "Display brief identity and similarity"),
+            _Option(
+                ["-similarity", "similarity"], "Display percent identity and similarity"
+            ),
+            _Option(
+                ["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"
+            ),
+            _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"),
+            _Option(
+                ["-aformat", "aformat"],
+                "Display output in a different specified output format",
+            ),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class StretcherCommandline(_EmbossCommandLine):
+    """Commandline object for the stretcher program from EMBOSS."""
+
+    def __init__(self, cmd="stretcher", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-asequence", "asequence"],
+                "First sequence to align",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-bsequence", "bsequence"],
+                "Second sequence to align",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-gapopen", "gapopen"],
+                "Gap open penalty",
+                is_required=True,
+                checker_function=lambda value: isinstance(value, int),
+            ),
+            _Option(
+                ["-gapextend", "gapextend"],
+                "Gap extension penalty",
+                is_required=True,
+                checker_function=lambda value: isinstance(value, int),
+            ),
+            _Option(["-datafile", "datafile"], "Matrix file", filename=True),
+            _Option(
+                ["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"
+            ),
+            _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"),
+            _Option(
+                ["-aformat", "aformat"],
+                "Display output in a different specified output format",
+            ),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class FuzznucCommandline(_EmbossCommandLine):
+    """Commandline object for the fuzznuc program from EMBOSS."""
+
+    def __init__(self, cmd="fuzznuc", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"], "Sequence database USA", is_required=True
+            ),
+            _Option(
+                ["-pattern", "pattern"],
+                "Search pattern, using standard IUPAC one-letter codes",
+                is_required=True,
+            ),
+            _Option(["-pmismatch", "pmismatch"], "Number of mismatches"),
+            _Option(["-complement", "complement"], "Search complementary strand"),
+            _Option(["-rformat", "rformat"], "Specify the report format to output in."),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class FuzzproCommandline(_EmbossCommandLine):
+    """Commandline object for the fuzzpro program from EMBOSS."""
+
+    def __init__(self, cmd="fuzzpro", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"], "Sequence database USA", is_required=True
+            ),
+            _Option(
+                ["-pattern", "pattern"],
+                "Search pattern, using standard IUPAC one-letter codes",
+                is_required=True,
+            ),
+            _Option(["-pmismatch", "pmismatch"], "Number of mismatches"),
+            _Option(["-rformat", "rformat"], "Specify the report format to output in."),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class Est2GenomeCommandline(_EmbossCommandLine):
+    """Commandline object for the est2genome program from EMBOSS."""
+
+    def __init__(self, cmd="est2genome", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(["-est", "est"], "EST sequence(s)", is_required=True),
+            _Option(["-genome", "genome"], "Genomic sequence", is_required=True),
+            _Option(["-match", "match"], "Score for matching two bases"),
+            _Option(["-mismatch", "mismatch"], "Cost for mismatching two bases"),
+            _Option(
+                ["-gappenalty", "gappenalty"],
+                "Cost for deleting a single base in either sequence, "
+                "excluding introns",
+            ),
+            _Option(
+                ["-intronpenalty", "intronpenalty"],
+                "Cost for an intron, independent of length.",
+            ),
+            _Option(
+                ["-splicepenalty", "splicepenalty"],
+                "Cost for an intron, independent of length "
+                "and starting/ending on donor-acceptor sites",
+            ),
+            _Option(
+                ["-minscore", "minscore"],
+                "Exclude alignments with scores below this threshold score.",
+            ),
+            _Option(
+                ["-reverse", "reverse"], "Reverse the orientation of the EST sequence"
+            ),
+            _Option(["-splice", "splice"], "Use donor and acceptor splice sites."),
+            _Option(
+                ["-mode", "mode"],
+                "This determines the comparion mode. 'both', 'forward', or 'reverse'",
+            ),
+            _Option(
+                ["-best", "best"],
+                "You can print out all comparisons instead of just the best",
+            ),
+            _Option(["-space", "space"], "for linear-space recursion."),
+            _Option(["-shuffle", "shuffle"], "Shuffle"),
+            _Option(["-seed", "seed"], "Random number seed"),
+            _Option(["-align", "align"], "Show the alignment."),
+            _Option(["-width", "width"], "Alignment width"),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class ETandemCommandline(_EmbossCommandLine):
+    """Commandline object for the etandem program from EMBOSS."""
+
+    def __init__(self, cmd="etandem", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"], "Sequence", filename=True, is_required=True
+            ),
+            _Option(
+                ["-minrepeat", "minrepeat"], "Minimum repeat size", is_required=True
+            ),
+            _Option(
+                ["-maxrepeat", "maxrepeat"], "Maximum repeat size", is_required=True
+            ),
+            _Option(["-threshold", "threshold"], "Threshold score"),
+            _Option(["-mismatch", "mismatch"], "Allow N as a mismatch"),
+            _Option(["-uniform", "uniform"], "Allow uniform consensus"),
+            _Option(["-rformat", "rformat"], "Output report format"),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class EInvertedCommandline(_EmbossCommandLine):
+    """Commandline object for the einverted program from EMBOSS."""
+
+    def __init__(self, cmd="einverted", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"], "Sequence", filename=True, is_required=True
+            ),
+            _Option(["-gap", "gap"], "Gap penalty", filename=True, is_required=True),
+            _Option(
+                ["-threshold", "threshold"], "Minimum score threshold", is_required=True
+            ),
+            _Option(["-match", "match"], "Match score", is_required=True),
+            _Option(["-mismatch", "mismatch"], "Mismatch score", is_required=True),
+            _Option(
+                ["-maxrepeat", "maxrepeat"],
+                "Maximum separation between the start and end of repeat",
+            ),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class PalindromeCommandline(_EmbossCommandLine):
+    """Commandline object for the palindrome program from EMBOSS."""
+
+    def __init__(self, cmd="palindrome", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"], "Sequence", filename=True, is_required=True
+            ),
+            _Option(
+                ["-minpallen", "minpallen"],
+                "Minimum palindrome length",
+                is_required=True,
+            ),
+            _Option(
+                ["-maxpallen", "maxpallen"],
+                "Maximum palindrome length",
+                is_required=True,
+            ),
+            _Option(
+                ["-gaplimit", "gaplimit"],
+                "Maximum gap between repeats",
+                is_required=True,
+            ),
+            _Option(
+                ["-nummismatches", "nummismatches"],
+                "Number of mismatches allowed",
+                is_required=True,
+            ),
+            _Option(
+                ["-overlap", "overlap"], "Report overlapping matches", is_required=True
+            ),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class TranalignCommandline(_EmbossCommandLine):
+    """Commandline object for the tranalign program from EMBOSS."""
+
+    def __init__(self, cmd="tranalign", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-asequence", "asequence"],
+                "Nucleotide sequences to be aligned.",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-bsequence", "bsequence"],
+                "Protein sequence alignment",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-outseq", "outseq"],
+                "Output sequence file.",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(["-table", "table"], "Code to use"),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class DiffseqCommandline(_EmbossCommandLine):
+    """Commandline object for the diffseq program from EMBOSS."""
+
+    def __init__(self, cmd="diffseq", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-asequence", "asequence"],
+                "First sequence to compare",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-bsequence", "bsequence"],
+                "Second sequence to compare",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-wordsize", "wordsize"],
+                "Word size to use for comparisons (10 default)",
+                is_required=True,
+            ),
+            _Option(
+                ["-aoutfeat", "aoutfeat"],
+                "File for output of first sequence's features",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-boutfeat", "boutfeat"],
+                "File for output of second sequence's features",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(["-rformat", "rformat"], "Output report file format"),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+class IepCommandline(_EmbossCommandLine):
+    """Commandline for EMBOSS iep: calculated isoelectric point and charge.
+
+    Examples
+    --------
+    >>> from Bio.Emboss.Applications import IepCommandline
+    >>> iep_cline = IepCommandline(sequence="proteins.faa",
+    ...                            outfile="proteins.txt")
+    >>> print(iep_cline)
+    iep -outfile=proteins.txt -sequence=proteins.faa
+
+    You would typically run the command line with iep_cline() or via the
+    Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="iep", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"],
+                "Protein sequence(s) filename",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-amino", "amino"],
+                """Number of N-termini
+
+                    Integer 0 (default) or more.
+                    """,
+            ),
+            _Option(
+                ["-carboxyl", "carboxyl"],
+                """Number of C-termini
+
+                    Integer 0 (default) or more.
+                    """,
+            ),
+            _Option(
+                ["-lysinemodified", "lysinemodified"],
+                """Number of modified lysines
+
+                    Integer 0 (default) or more.
+                    """,
+            ),
+            _Option(
+                ["-disulphides", "disulphides"],
+                """Number of disulphide bridges
+
+                    Integer 0 (default) or more.
+                    """,
+            ),
+            # Should we implement the -termini switch as well?
+            _Option(
+                ["-notermini", "notermini"],
+                "Exclude (True) or include (False) charge at N and C terminus.",
+            ),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+# seqret uses -outseq, not -outfile, so use the base class:
+class SeqretCommandline(_EmbossMinimalCommandLine):
+    """Commandline object for the seqret program from EMBOSS.
+
+    This tool allows you to interconvert between different sequence file
+    formats (e.g. GenBank to FASTA). Combining Biopython's Bio.SeqIO module
+    with seqret using a suitable intermediate file format can allow you to
+    read/write to an even wider range of file formats.
+
+    This wrapper currently only supports the core functionality, things like
+    feature tables (in EMBOSS 6.1.0 onwards) are not yet included.
+    """
+
+    def __init__(self, cmd="seqret", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"], "Input sequence(s) filename", filename=True
+            ),
+            _Option(["-outseq", "outseq"], "Output sequence file.", filename=True),
+            _Option(
+                ["-sformat", "sformat"],
+                "Input sequence(s) format (e.g. fasta, genbank)",
+            ),
+            _Option(
+                ["-osformat", "osformat"],
+                "Output sequence(s) format (e.g. fasta, genbank)",
+            ),
+        ]
+        _EmbossMinimalCommandLine.__init__(self, cmd, **kwargs)
+
+    def _validate(self):
+        # Check the outfile, filter, or stdout option has been set.
+        # We can't simply do this via the required flag for the outfile
+        # output - this seems the simplest solution.
+        if not (self.outseq or self.filter or self.stdout):
+            raise ValueError(
+                "You must either set outfile (output filename), "
+                "or enable filter or stdout (output to stdout)."
+            )
+        if not (self.sequence or self.filter or self.stdint):
+            raise ValueError(
+                "You must either set sequence (input filename), "
+                "or enable filter or stdin (input from stdin)."
+            )
+        return _EmbossMinimalCommandLine._validate(self)
+
+
+class SeqmatchallCommandline(_EmbossCommandLine):
+    """Commandline object for the seqmatchall program from EMBOSS.
+
+    e.g.
+    >>> cline = SeqmatchallCommandline(sequence="opuntia.fasta", outfile="opuntia.txt")
+    >>> cline.auto = True
+    >>> cline.wordsize = 18
+    >>> cline.aformat = "pair"
+    >>> print(cline)
+    seqmatchall -auto -outfile=opuntia.txt -sequence=opuntia.fasta -wordsize=18 -aformat=pair
+
+    """
+
+    def __init__(self, cmd="seqmatchall", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-sequence", "sequence"],
+                "Readable set of sequences",
+                filename=True,
+                is_required=True,
+            ),
+            _Option(
+                ["-wordsize", "wordsize"], "Word size (Integer 2 or more, default 4)"
+            ),
+            _Option(
+                ["-aformat", "aformat"],
+                "Display output in a different specified output format",
+            ),
+        ]
+        _EmbossCommandLine.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Emboss/Primer3.py b/code/lib/Bio/Emboss/Primer3.py
new file mode 100644
index 0000000..0e210a5
--- /dev/null
+++ b/code/lib/Bio/Emboss/Primer3.py
@@ -0,0 +1,183 @@
+# Copyright 2008 Michiel de Hoon.
+# Revisions copyright 2009 Leighton Pritchard.
+# Revisions copyright 2010 Peter Cock.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Code to parse output from the EMBOSS eprimer3 program.
+
+As elsewhere in Biopython there are two input functions, read and parse,
+for single record output and multi-record output. For primer3, a single
+record object is created for each target sequence and may contain
+multiple primers.
+
+i.e. If you ran eprimer3 with a single target sequence, use the read
+function. If you ran eprimer3 with multiple targets, use the parse
+function to iterate over the retsults.
+"""
+
+
+# --- primer3
+
+
+class Record:
+    """Represent information from a primer3 run finding primers.
+
+    Members:
+
+        - primers  - list of Primer objects describing primer pairs for
+          this target sequence.
+        - comments - the comment line(s) for the record
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.comments = ""
+        self.primers = []
+
+
+class Primers:
+    """A primer set designed by Primer3.
+
+    Members:
+
+        - size - length of product, note you can use len(primer) as an
+          alternative to primer.size
+
+        - forward_seq
+        - forward_start
+        - forward_length
+        - forward_tm
+        - forward_gc
+
+        - reverse_seq
+        - reverse_start
+        - reverse_length
+        - reverse_tm
+        - reverse_gc
+
+        - internal_seq
+        - internal_start
+        - internal_length
+        - internal_tm
+        - internal_gc
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.size = 0
+        self.forward_seq = ""
+        self.forward_start = 0
+        self.forward_length = 0
+        self.forward_tm = 0.0
+        self.forward_gc = 0.0
+        self.reverse_seq = ""
+        self.reverse_start = 0
+        self.reverse_length = 0
+        self.reverse_tm = 0.0
+        self.reverse_gc = 0.0
+        self.internal_seq = ""
+        self.internal_start = 0
+        self.internal_length = 0
+        self.internal_tm = 0.0
+        self.internal_gc = 0.0
+
+    def __len__(self):
+        """Length of the primer product (i.e. product size)."""
+        return self.size
+
+
+def parse(handle):
+    """Iterate over primer3 output as Bio.Emboss.Primer3.Record objects."""
+    # Skip blank lines at head of file
+    while True:
+        line = handle.readline()
+        if line.strip():
+            break  # Starting a record
+
+    # Read each record
+    record = None
+    primer = None
+    while True:
+        if line.startswith("# EPRIMER3") or line.startswith("# PRIMER3"):
+            # Record data
+            if record is not None:
+                yield record
+            record = Record()
+            record.comments += line
+            primer = None
+        elif line.startswith("#"):
+            if (
+                line.strip()
+                != "#                      Start  Len   Tm     GC%   Sequence"
+            ):
+                record.comments += line
+        elif not line.strip():
+            pass
+        elif line[5:19] == "PRODUCT SIZE: ":
+            primer = Primers()
+            primer.size = int(line[19:])
+            record.primers.append(primer)
+        elif line[5:19] == "FORWARD PRIMER":
+            words = line.split()
+            if not primer or primer.size == 0:
+                primer = Primers()
+                record.primers.append(primer)
+            primer.forward_start = int(words[2])
+            primer.forward_length = int(words[3])
+            primer.forward_tm = float(words[4])
+            primer.forward_gc = float(words[5])
+            primer.forward_seq = words[6]
+        elif line[5:19] == "REVERSE PRIMER":
+            words = line.split()
+            if not primer or primer.size == 0:
+                primer = Primers()
+                record.primers.append(primer)
+            primer.reverse_start = int(words[2])
+            primer.reverse_length = int(words[3])
+            primer.reverse_tm = float(words[4])
+            primer.reverse_gc = float(words[5])
+            primer.reverse_seq = words[6]
+        elif line[5:19] == "INTERNAL OLIGO":
+            words = line.split()
+            if not primer or primer.size == 0:
+                primer = Primers()
+                record.primers.append(primer)
+            primer.internal_start = int(words[2])
+            primer.internal_length = int(words[3])
+            primer.internal_tm = float(words[4])
+            primer.internal_gc = float(words[5])
+            try:
+                primer.internal_seq = words[6]
+            except IndexError:  # eprimer3 reports oligo without sequence
+                primer.internal_seq = ""
+        try:
+            line = next(handle)
+        except StopIteration:
+            break
+    if record:
+        yield record
+
+
+def read(handle):
+    """Parse primer3 output into a Bio.Emboss.Primer3.Record object.
+
+    This is for when there is one and only one target sequence. If
+    designing primers for multiple sequences, use the parse function.
+    """
+    iterator = parse(handle)
+    try:
+        record = next(iterator)
+    except StopIteration:
+        raise ValueError("No records found in handle") from None
+    try:
+        next(iterator)
+        raise ValueError("More than one record found in handle")
+    except StopIteration:
+        pass
+    return record
diff --git a/code/lib/Bio/Emboss/PrimerSearch.py b/code/lib/Bio/Emboss/PrimerSearch.py
new file mode 100644
index 0000000..3a7fb7a
--- /dev/null
+++ b/code/lib/Bio/Emboss/PrimerSearch.py
@@ -0,0 +1,80 @@
+# Copyright 2008 Michiel de Hoon. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Code to interact with the primersearch program from EMBOSS."""
+
+
+class InputRecord:
+    """Represent the input file into the primersearch program.
+
+    This makes it easy to add primer information and write it out to the
+    simple primer file format.
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.primer_info = []
+
+    def __str__(self):
+        """Summarize the primersearch input record as a string."""
+        output = ""
+        for name, primer1, primer2 in self.primer_info:
+            output += "%s %s %s\n" % (name, primer1, primer2)
+        return output
+
+    def add_primer_set(self, primer_name, first_primer_seq, second_primer_seq):
+        """Add primer information to the record."""
+        self.primer_info.append((primer_name, first_primer_seq, second_primer_seq))
+
+
+class OutputRecord:
+    """Represent the information from a primersearch job.
+
+    amplifiers is a dictionary where the keys are the primer names and
+    the values are a list of PrimerSearchAmplifier objects.
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.amplifiers = {}
+
+
+class Amplifier:
+    """Represent a single amplification from a primer."""
+
+    def __init__(self):
+        """Initialize the class."""
+        self.hit_info = ""
+        self.length = 0
+
+
+def read(handle):
+    """Get output from primersearch into a PrimerSearchOutputRecord."""
+    record = OutputRecord()
+
+    for line in handle:
+        if not line.strip():
+            continue
+        elif line.startswith("Primer name"):
+            name = line.split()[-1]
+            record.amplifiers[name] = []
+        elif line.startswith("Amplimer"):
+            amplifier = Amplifier()
+            record.amplifiers[name].append(amplifier)
+        elif line.startswith("\tSequence: "):
+            amplifier.hit_info = line.replace("\tSequence: ", "")
+        elif line.startswith("\tAmplimer length: "):
+            length = line.split()[-2]
+            amplifier.length = int(length)
+        else:
+            amplifier.hit_info += line
+
+    for name in record.amplifiers:
+        for amplifier in record.amplifiers[name]:
+            amplifier.hit_info = amplifier.hit_info.rstrip()
+
+    return record
diff --git a/code/lib/Bio/Emboss/__init__.py b/code/lib/Bio/Emboss/__init__.py
new file mode 100644
index 0000000..630780d
--- /dev/null
+++ b/code/lib/Bio/Emboss/__init__.py
@@ -0,0 +1,8 @@
+# Copyright 2001 Brad Chapman. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Code to interact with the ever-so-useful EMBOSS programs."""
diff --git a/code/lib/Bio/Emboss/__pycache__/Applications.cpython-37.pyc b/code/lib/Bio/Emboss/__pycache__/Applications.cpython-37.pyc
new file mode 100644
index 0000000..56718f6
Binary files /dev/null and b/code/lib/Bio/Emboss/__pycache__/Applications.cpython-37.pyc differ
diff --git a/code/lib/Bio/Emboss/__pycache__/Primer3.cpython-37.pyc b/code/lib/Bio/Emboss/__pycache__/Primer3.cpython-37.pyc
new file mode 100644
index 0000000..6302571
Binary files /dev/null and b/code/lib/Bio/Emboss/__pycache__/Primer3.cpython-37.pyc differ
diff --git a/code/lib/Bio/Emboss/__pycache__/PrimerSearch.cpython-37.pyc b/code/lib/Bio/Emboss/__pycache__/PrimerSearch.cpython-37.pyc
new file mode 100644
index 0000000..808a8cb
Binary files /dev/null and b/code/lib/Bio/Emboss/__pycache__/PrimerSearch.cpython-37.pyc differ
diff --git a/code/lib/Bio/Emboss/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Emboss/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..3515132
Binary files /dev/null and b/code/lib/Bio/Emboss/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Entrez/DTDs/Docsum_3_0.dtd b/code/lib/Bio/Entrez/DTDs/Docsum_3_0.dtd
new file mode 100644
index 0000000..43a18d0
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/Docsum_3_0.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "docsum.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 07/01/2008 23:08:13
+     ============================================ -->
+
+<!-- Docsum_3_0.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % Docsum_3_0_module PUBLIC "-//NCBI//Docsum 3 0 Module//EN" "Docsum_3_0.mod.dtd">
+%Docsum_3_0_module;
diff --git a/code/lib/Bio/Entrez/DTDs/Docsum_3_0.mod.dtd b/code/lib/Bio/Entrez/DTDs/Docsum_3_0.mod.dtd
new file mode 100644
index 0000000..64a4549
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/Docsum_3_0.mod.dtd
@@ -0,0 +1,1054 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "docsum.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 07/01/2008 23:08:13
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "Docsum-3-0"
+================================================= -->
+
+<!--
+ ============================================
+ ::DATATOOL:: Generated from "docsum_3.0.xsd"
+ ::DATATOOL:: by application DATATOOL version 1.8.6
+ ::DATATOOL:: on 05/02/2008 10:59:28
+ ============================================
+ edited with XMLSPY v5 rel. 4 U (http://www.xmlspy.com) by Michael Kholodov (National Library of Medicine) 
+ edited with XMLSpy v2005 rel. 3 U (http://www.altova.com) by Michael Feolo (NCBI/NLM/NIH) 
+-->
+
+
+<!ELEMENT Assay (
+        Assay_attlist, 
+        Assay_method, 
+        Assay_taxonomy, 
+        Assay_strains?, 
+        Assay_comment?, 
+        Assay_citation?)>
+
+<!ELEMENT Assay_attlist (
+        Assay_attlist_handle?, 
+        Assay_attlist_batch?, 
+        Assay_attlist_batchId?, 
+        Assay_attlist_batchType?, 
+        Assay_attlist_molType?, 
+        Assay_attlist_sampleSize?, 
+        Assay_attlist_population?, 
+        Assay_attlist_linkoutUrl?)>
+
+<!ELEMENT Assay_attlist_handle (#PCDATA)>
+
+<!ELEMENT Assay_attlist_batch (#PCDATA)>
+
+<!ELEMENT Assay_attlist_batchId (%INTEGER;)>
+
+<!ELEMENT Assay_attlist_batchType %ENUM;>
+<!ATTLIST Assay_attlist_batchType value (
+        snpassay |
+        validation |
+        doublehit
+        ) #REQUIRED >
+
+
+<!ELEMENT Assay_attlist_molType %ENUM;>
+<!ATTLIST Assay_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro
+        ) #REQUIRED >
+
+
+<!ELEMENT Assay_attlist_sampleSize (%INTEGER;)>
+
+<!ELEMENT Assay_attlist_population (#PCDATA)>
+
+<!ELEMENT Assay_attlist_linkoutUrl (#PCDATA)>
+
+<!ELEMENT Assay_method (
+        Assay_method_eMethod?)>
+
+<!ELEMENT Assay_method_eMethod (
+        Assay_method_eMethod_attlist, 
+        Assay_method_eMethod_exception)>
+
+<!ELEMENT Assay_method_eMethod_attlist (
+        Assay_method_eMethod_attlist_name?, 
+        Assay_method_eMethod_attlist_id?)>
+
+<!--Submitters method identifier -->
+<!ELEMENT Assay_method_eMethod_attlist_name (#PCDATA)>
+
+<!--dbSNP method identifier -->
+<!ELEMENT Assay_method_eMethod_attlist_id (#PCDATA)>
+
+<!--description of deviation from/addition to given method -->
+<!ELEMENT Assay_method_eMethod_exception (#PCDATA)>
+
+<!ELEMENT Assay_taxonomy (
+        Assay_taxonomy_attlist, 
+        Assay_taxonomy_taxonomy)>
+
+<!ELEMENT Assay_taxonomy_attlist (
+        Assay_taxonomy_attlist_id, 
+        Assay_taxonomy_attlist_organism?)>
+
+<!--NCBI taxonomy ID for variation -->
+<!ELEMENT Assay_taxonomy_attlist_id (%INTEGER;)>
+
+<!ELEMENT Assay_taxonomy_attlist_organism (#PCDATA)>
+
+<!ELEMENT Assay_taxonomy_taxonomy EMPTY>
+
+<!ELEMENT Assay_strains (Assay_strains_E*)>
+
+
+<!ELEMENT Assay_strains_E (#PCDATA)>
+
+<!ELEMENT Assay_comment (#PCDATA)>
+
+<!ELEMENT Assay_citation (Assay_citation_E*)>
+
+
+<!ELEMENT Assay_citation_E (#PCDATA)>
+
+<!--A collection of genome sequence records (curated gene regions (NG's), contigs (NWNT's)  and chromosomes (NC/AC's) produced by a genome sequence project. Structure is populated from ContigInfo tables. -->
+<!ELEMENT Assembly (
+        Assembly_attlist, 
+        Assembly_component?, 
+        Assembly_snpStat)>
+
+<!ELEMENT Assembly_attlist (
+        Assembly_attlist_dbSnpBuild, 
+        Assembly_attlist_genomeBuild, 
+        Assembly_attlist_groupLabel?, 
+        Assembly_attlist_assemblySource?, 
+        Assembly_attlist_current?, 
+        Assembly_attlist_reference?)>
+
+<!--dbSNP build number defining the rsid set aligned to this assembly -->
+<!ELEMENT Assembly_attlist_dbSnpBuild (%INTEGER;)>
+
+<!--assembly build number with possible 'subbuild' version numbers to reflect updates in gene annotation (human e.g. 34_3, 35_1, 36_1) -->
+<!ELEMENT Assembly_attlist_genomeBuild (#PCDATA)>
+
+<!--High-level classification of the assembly to distinguish reference projects from alternate solutions. GroupLabel field from organism/build-specific ContigInfo tables. "reference" is occasionally used as the preferred assembly; standards will converge as additional organism genome projects are finished. Note that some organism assembly names include extended characters like '~' and '/' that may be incompatible with OS filename conventions. -->
+<!ELEMENT Assembly_attlist_groupLabel (#PCDATA)>
+
+<!--Name of the group(s) or organization(s) that generated the assembly -->
+<!ELEMENT Assembly_attlist_assemblySource (#PCDATA)>
+
+<!--Marks the current genomic assembly -->
+<!ELEMENT Assembly_attlist_current EMPTY>
+<!ATTLIST Assembly_attlist_current value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Assembly_attlist_reference EMPTY>
+<!ATTLIST Assembly_attlist_reference value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Assembly_component (Component*)>
+
+<!ELEMENT Assembly_snpStat (
+        Assembly_snpStat_attlist, 
+        Assembly_snpStat_snpStat)>
+
+<!ELEMENT Assembly_snpStat_attlist (
+        Assembly_snpStat_attlist_mapWeight, 
+        Assembly_snpStat_attlist_chromCount?, 
+        Assembly_snpStat_attlist_placedContigCount?, 
+        Assembly_snpStat_attlist_unplacedContigCount?, 
+        Assembly_snpStat_attlist_seqlocCount?, 
+        Assembly_snpStat_attlist_hapCount?)>
+
+<!--summary measure of placement precision in the assembly -->
+<!ELEMENT Assembly_snpStat_attlist_mapWeight %ENUM;>
+<!ATTLIST Assembly_snpStat_attlist_mapWeight value (
+        unmapped |
+        unique-in-contig |
+        two-hits-in-contig |
+        less-10-hits |
+        multiple-hits
+        ) #REQUIRED >
+
+
+<!--number of distinct chromosomes in the mapset -->
+<!ELEMENT Assembly_snpStat_attlist_chromCount (%INTEGER;)>
+
+<!--number of distinct contigs [ gi | accession[.version] ] in the mapset -->
+<!ELEMENT Assembly_snpStat_attlist_placedContigCount (%INTEGER;)>
+
+<!--number of sequence postions to a contig with unknown chromosomal assignment -->
+<!ELEMENT Assembly_snpStat_attlist_unplacedContigCount (%INTEGER;)>
+
+<!--total number of sequence positions in the mapset -->
+<!ELEMENT Assembly_snpStat_attlist_seqlocCount (%INTEGER;)>
+
+<!--Number of hits to alternative genomic haplotypes (e.g. HLA DR region, KIR, or pseudo-autosomal regions like PAR) within the assembly mapset. Note that positions on haplotypes defined in other assemblies (a different assembly_group_label value) will not be counted in this value. -->
+<!ELEMENT Assembly_snpStat_attlist_hapCount (%INTEGER;)>
+
+<!ELEMENT Assembly_snpStat_snpStat EMPTY>
+
+<!--URL value from dbSNP_main.BaseURL links table. attributes provide context information and URL id that is referenced within individual refSNP objects. -->
+<!ELEMENT BaseURL (
+        BaseURL_attlist, 
+        BaseURL_baseURL)>
+
+<!ELEMENT BaseURL_attlist (
+        BaseURL_attlist_urlId?, 
+        BaseURL_attlist_resourceName?, 
+        BaseURL_attlist_resourceId?)>
+
+<!--Resource identifier from dbSNP_main.baseURL. -->
+<!ELEMENT BaseURL_attlist_urlId (%INTEGER;)>
+
+<!--Name of linked resource -->
+<!ELEMENT BaseURL_attlist_resourceName (#PCDATA)>
+
+<!--identifier expected by resource for URL -->
+<!ELEMENT BaseURL_attlist_resourceId (#PCDATA)>
+
+<!--URL value from dbSNP_main.BaseURL links table. attributes provide context information and URL id that is referenced within individual refSNP objects. -->
+<!ELEMENT BaseURL_baseURL (#PCDATA)>
+
+
+<!ELEMENT Component (
+        Component_attlist, 
+        Component_mapLoc)>
+
+<!ELEMENT Component_attlist (
+        Component_attlist_componentType?, 
+        Component_attlist_ctgId?, 
+        Component_attlist_accession?, 
+        Component_attlist_name?, 
+        Component_attlist_chromosome?, 
+        Component_attlist_start?, 
+        Component_attlist_end?, 
+        Component_attlist_orientation?, 
+        Component_attlist_gi?, 
+        Component_attlist_groupTerm?, 
+        Component_attlist_contigLabel?)>
+
+<!--type of component: chromosome, contig, gene_region, etc. -->
+<!ELEMENT Component_attlist_componentType %ENUM;>
+<!ATTLIST Component_attlist_componentType value (
+        contig |
+        mrna
+        ) #REQUIRED >
+
+
+<!--dbSNP contig_id used to join on contig hit / mapset data to these assembly properties -->
+<!ELEMENT Component_attlist_ctgId (%INTEGER;)>
+
+<!--Accession[.version] for the sequence component -->
+<!ELEMENT Component_attlist_accession (#PCDATA)>
+
+<!--contig name defined as either a submitter local id, element of a whole genome assembly set, or internal NCBI local id -->
+<!ELEMENT Component_attlist_name (#PCDATA)>
+
+<!--Organism appropriate chromosome tag, 'Un' reserved for default case of unplaced components -->
+<!ELEMENT Component_attlist_chromosome (#PCDATA)>
+
+<!--component starting position on the chromosome (base 0 inclusive) -->
+<!ELEMENT Component_attlist_start (%INTEGER;)>
+
+<!--component ending position on the chromosome (base 0 inclusive) -->
+<!ELEMENT Component_attlist_end (%INTEGER;)>
+
+<!--orientation of this component to chromosome, forward (fwd) = 0, reverse (rev) = 1, unknown = NULL in ContigInfo.orient. -->
+<!ELEMENT Component_attlist_orientation %ENUM;>
+<!ATTLIST Component_attlist_orientation value (
+        fwd |
+        rev |
+        unknown
+        ) #REQUIRED >
+
+
+<!--NCBI gi for component sequence (equivalent to accession.version) for nucleotide sequence. -->
+<!ELEMENT Component_attlist_gi (#PCDATA)>
+
+<!--Identifier label for the genome assembly that defines the contigs in this mapset and their placement within the organism genome.  -->
+<!ELEMENT Component_attlist_groupTerm (#PCDATA)>
+
+<!--Display label for component -->
+<!ELEMENT Component_attlist_contigLabel (#PCDATA)>
+
+<!ELEMENT Component_mapLoc (MapLoc*)>
+
+<!--Set of dbSNP refSNP docsums -->
+<!ELEMENT ExchangeSet (
+        ExchangeSet_attlist, 
+        ExchangeSet_sourceDatabase, 
+        ExchangeSet_rs?, 
+        ExchangeSet_assay?, 
+        ExchangeSet_query?, 
+        ExchangeSet_summary, 
+        ExchangeSet_baseURL)>
+
+<!ELEMENT ExchangeSet_attlist (
+        ExchangeSet_attlist_setType?, 
+        ExchangeSet_attlist_setDepth?, 
+        ExchangeSet_attlist_specVersion?, 
+        ExchangeSet_attlist_dbSnpBuild?, 
+        ExchangeSet_attlist_generated?)>
+
+<!--set-type: full dump; from query; single refSNP -->
+<!ELEMENT ExchangeSet_attlist_setType (#PCDATA)>
+
+<!--content depth: brief XML (only refSNP properties and summary subSNP element content); full XML (full refSNP, full subSNP content; all flanking sequences) -->
+<!ELEMENT ExchangeSet_attlist_setDepth (#PCDATA)>
+
+<!--version number of docsum.asn/docsum.dtd specification -->
+<!ELEMENT ExchangeSet_attlist_specVersion (#PCDATA)>
+
+<!--build number of database for this export -->
+<!ELEMENT ExchangeSet_attlist_dbSnpBuild (%INTEGER;)>
+
+<!--Generated date -->
+<!ELEMENT ExchangeSet_attlist_generated (#PCDATA)>
+
+<!ELEMENT ExchangeSet_sourceDatabase (
+        ExchangeSet_sourceDatabase_attlist, 
+        ExchangeSet_sourceDatabase_sourceDatabase)>
+
+<!ELEMENT ExchangeSet_sourceDatabase_attlist (
+        ExchangeSet_sourceDatabase_attlist_taxId, 
+        ExchangeSet_sourceDatabase_attlist_organism, 
+        ExchangeSet_sourceDatabase_attlist_dbSnpOrgAbbr?, 
+        ExchangeSet_sourceDatabase_attlist_gpipeOrgAbbr?)>
+
+<!--NCBI taxonomy ID for variation -->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_taxId (%INTEGER;)>
+
+<!--common name for species used as part of database name. -->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_organism (#PCDATA)>
+
+<!--organism abbreviation used in dbSNP.  -->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_dbSnpOrgAbbr (#PCDATA)>
+
+<!--organism abbreviation used within NCBI genome pipeline data dumps. -->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_gpipeOrgAbbr (#PCDATA)>
+
+<!ELEMENT ExchangeSet_sourceDatabase_sourceDatabase EMPTY>
+
+<!ELEMENT ExchangeSet_rs (Rs*)>
+
+<!ELEMENT ExchangeSet_assay (Assay)>
+
+<!ELEMENT ExchangeSet_query (
+        ExchangeSet_query_attlist, 
+        ExchangeSet_query_query)>
+
+<!ELEMENT ExchangeSet_query_attlist (
+        ExchangeSet_query_attlist_date?, 
+        ExchangeSet_query_attlist_string?)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT ExchangeSet_query_attlist_date (#PCDATA)>
+
+<!--Query terms or search constraints -->
+<!ELEMENT ExchangeSet_query_attlist_string (#PCDATA)>
+
+<!ELEMENT ExchangeSet_query_query EMPTY>
+
+<!ELEMENT ExchangeSet_summary (
+        ExchangeSet_summary_attlist, 
+        ExchangeSet_summary_summary)>
+
+<!ELEMENT ExchangeSet_summary_attlist (
+        ExchangeSet_summary_attlist_numRsIds?, 
+        ExchangeSet_summary_attlist_totalSeqLength?, 
+        ExchangeSet_summary_attlist_numContigHits?, 
+        ExchangeSet_summary_attlist_numGeneHits?, 
+        ExchangeSet_summary_attlist_numGiHits?, 
+        ExchangeSet_summary_attlist_num3dStructs?, 
+        ExchangeSet_summary_attlist_numAlleleFreqs?, 
+        ExchangeSet_summary_attlist_numStsHits?, 
+        ExchangeSet_summary_attlist_numUnigeneCids?)>
+
+<!--Total number of refsnp-ids in this exchange set -->
+<!ELEMENT ExchangeSet_summary_attlist_numRsIds (%INTEGER;)>
+
+<!--Total length of exemplar flanking sequences -->
+<!ELEMENT ExchangeSet_summary_attlist_totalSeqLength (%INTEGER;)>
+
+<!--Total number of contig locations from SNPContigLoc -->
+<!ELEMENT ExchangeSet_summary_attlist_numContigHits (%INTEGER;)>
+
+<!--Total number of locus ids from SNPContigLocusId -->
+<!ELEMENT ExchangeSet_summary_attlist_numGeneHits (%INTEGER;)>
+
+<!--Total number of gi hits from MapLink -->
+<!ELEMENT ExchangeSet_summary_attlist_numGiHits (%INTEGER;)>
+
+<!--Total number of 3D structures from SNP3D -->
+<!ELEMENT ExchangeSet_summary_attlist_num3dStructs (%INTEGER;)>
+
+<!--Total number of allele frequences from SubPopAllele -->
+<!ELEMENT ExchangeSet_summary_attlist_numAlleleFreqs (%INTEGER;)>
+
+<!--Total number of STS hits from SnpInSts -->
+<!ELEMENT ExchangeSet_summary_attlist_numStsHits (%INTEGER;)>
+
+<!--Total number of unigene cluster ids from UnigeneSnp -->
+<!ELEMENT ExchangeSet_summary_attlist_numUnigeneCids (%INTEGER;)>
+
+<!ELEMENT ExchangeSet_summary_summary EMPTY>
+
+<!ELEMENT ExchangeSet_baseURL (BaseURL*)>
+
+<!--functional relationship of SNP (and possibly alleles) to genes at contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx tables. -->
+<!ELEMENT FxnSet (
+        FxnSet_attlist, 
+        FxnSet_fxnSet)>
+
+<!ELEMENT FxnSet_attlist (
+        FxnSet_attlist_geneId?, 
+        FxnSet_attlist_symbol?, 
+        FxnSet_attlist_mrnaAcc?, 
+        FxnSet_attlist_mrnaVer?, 
+        FxnSet_attlist_protAcc?, 
+        FxnSet_attlist_protVer?, 
+        FxnSet_attlist_fxnClass?, 
+        FxnSet_attlist_readingFrame?, 
+        FxnSet_attlist_allele?, 
+        FxnSet_attlist_residue?, 
+        FxnSet_attlist_aaPosition?)>
+
+<!--gene-id of gene as aligned to contig -->
+<!ELEMENT FxnSet_attlist_geneId (%INTEGER;)>
+
+<!--symbol (official if present in Entrez Gene) of gene -->
+<!ELEMENT FxnSet_attlist_symbol (#PCDATA)>
+
+<!--mRNA accession if variation in transcript -->
+<!ELEMENT FxnSet_attlist_mrnaAcc (#PCDATA)>
+
+<!--mRNA sequence version if variation is in transcripot -->
+<!ELEMENT FxnSet_attlist_mrnaVer (%INTEGER;)>
+
+<!--protein accession if variation in protein -->
+<!ELEMENT FxnSet_attlist_protAcc (#PCDATA)>
+
+<!--protein version if variation is in protein -->
+<!ELEMENT FxnSet_attlist_protVer (%INTEGER;)>
+<!--
+variation in region of gene, but not in transcript - deprecated
+								synonymous change	
+								nonsynonymous change - deprecated
+								untranslated region - deprecated
+								splice-site - deprecated
+								contig reference
+								deprecated
+									coding: synonymy unknown
+									In gene segment with null mrna and protein. ex. IGLV4-69. geneId=28784
+									within 3' 0.5kb to a gene.
+									changes to STOP codon.
+									alters codon to make an altered amino acid in protein product.
+									indel snp causing frameshift.
+									3 prime untranslated region
+									5 prime untranslated region
+									3 prime acceptor dinucleotide
+									5 prime donor dinucleotide
+-->
+<!ELEMENT FxnSet_attlist_fxnClass %ENUM;>
+<!ATTLIST FxnSet_attlist_fxnClass value (
+        locus-region |
+        coding-unknown |
+        coding-synonymous |
+        coding-nonsynonymous |
+        mrna-utr |
+        intron |
+        splice-site |
+        reference |
+        coding-exception |
+        synonymy-unknown |
+        gene-segment |
+        near-gene-3 |
+        near-gene-5 |
+        nonsense |
+        missense |
+        frameshift |
+        utr-3 |
+        utr-5 |
+        splice-3 |
+        splice-5
+        ) #REQUIRED >
+
+
+<!ELEMENT FxnSet_attlist_readingFrame (%INTEGER;)>
+
+<!--variation allele: * suffix indicates allele of contig at this location -->
+<!ELEMENT FxnSet_attlist_allele (#PCDATA)>
+
+<!--translated amino acid residue for allele -->
+<!ELEMENT FxnSet_attlist_residue (#PCDATA)>
+
+<!--position of the variant residue in peptide sequence -->
+<!ELEMENT FxnSet_attlist_aaPosition (%INTEGER;)>
+
+<!--functional relationship of SNP (and possibly alleles) to genes at contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx tables. -->
+<!ELEMENT FxnSet_fxnSet EMPTY>
+
+<!--Position of a single hit of a variation on a contig -->
+<!ELEMENT MapLoc (
+        MapLoc_attlist, 
+        MapLoc_fxnSet?)>
+
+<!ELEMENT MapLoc_attlist (
+        MapLoc_attlist_asnFrom, 
+        MapLoc_attlist_asnTo, 
+        MapLoc_attlist_locType, 
+        MapLoc_attlist_alnQuality?, 
+        MapLoc_attlist_orient?, 
+        MapLoc_attlist_physMapInt?, 
+        MapLoc_attlist_leftFlankNeighborPos?, 
+        MapLoc_attlist_rightFlankNeighborPos?, 
+        MapLoc_attlist_leftContigNeighborPos?, 
+        MapLoc_attlist_rightContigNeighborPos?, 
+        MapLoc_attlist_numberOfMismatches?, 
+        MapLoc_attlist_numberOfDeletions?, 
+        MapLoc_attlist_numberOfInsertions?)>
+
+<!--beginning of variation as feature on contig -->
+<!ELEMENT MapLoc_attlist_asnFrom (%INTEGER;)>
+
+<!--end position of variation as feature on contig -->
+<!ELEMENT MapLoc_attlist_asnTo (%INTEGER;)>
+<!--
+defines the seq-loc symbol if asn_from != asn_to
+insertion on contig
+asn-from = asn-to write as 'asn-from'
+deletion on contig
+-->
+<!ELEMENT MapLoc_attlist_locType %ENUM;>
+<!ATTLIST MapLoc_attlist_locType value (
+        insertion |
+        exact |
+        deletion |
+        range-ins |
+        range-exact |
+        range-del
+        ) #REQUIRED >
+
+
+<!--alignment qualiity -->
+<!ELEMENT MapLoc_attlist_alnQuality (%REAL;)>
+
+<!--orientation of refSNP sequence to contig sequence -->
+<!ELEMENT MapLoc_attlist_orient %ENUM;>
+<!ATTLIST MapLoc_attlist_orient value (
+        forward |
+        reverse
+        ) #REQUIRED >
+
+
+<!--chromosome position as integer for sorting -->
+<!ELEMENT MapLoc_attlist_physMapInt (%INTEGER;)>
+
+<!--nearest aligned position in 5' flanking sequence of snp -->
+<!ELEMENT MapLoc_attlist_leftFlankNeighborPos (%INTEGER;)>
+
+<!--nearest aligned position in 3' flanking sequence of snp  -->
+<!ELEMENT MapLoc_attlist_rightFlankNeighborPos (%INTEGER;)>
+
+<!--nearest aligned position in 5' contig alignment of snp -->
+<!ELEMENT MapLoc_attlist_leftContigNeighborPos (%INTEGER;)>
+
+<!--nearest aligned position in 3' contig alignment of snp -->
+<!ELEMENT MapLoc_attlist_rightContigNeighborPos (%INTEGER;)>
+
+<!--number of Mismatched positions in this alignment -->
+<!ELEMENT MapLoc_attlist_numberOfMismatches (%INTEGER;)>
+
+<!--number of deletions in this alignment -->
+<!ELEMENT MapLoc_attlist_numberOfDeletions (%INTEGER;)>
+
+<!--number of insetions in this alignment -->
+<!ELEMENT MapLoc_attlist_numberOfInsertions (%INTEGER;)>
+
+<!ELEMENT MapLoc_fxnSet (FxnSet*)>
+
+
+<!ELEMENT PrimarySequence (
+        PrimarySequence_attlist, 
+        PrimarySequence_mapLoc)>
+
+<!ELEMENT PrimarySequence_attlist (
+        PrimarySequence_attlist_dbSnpBuild, 
+        PrimarySequence_attlist_gi, 
+        PrimarySequence_attlist_source?, 
+        PrimarySequence_attlist_accession?)>
+
+<!ELEMENT PrimarySequence_attlist_dbSnpBuild (%INTEGER;)>
+
+<!ELEMENT PrimarySequence_attlist_gi (%INTEGER;)>
+
+<!ELEMENT PrimarySequence_attlist_source %ENUM;>
+<!ATTLIST PrimarySequence_attlist_source value (
+        submitter |
+        blastmb |
+        xm
+        ) #REQUIRED >
+
+
+<!ELEMENT PrimarySequence_attlist_accession (#PCDATA)>
+
+<!ELEMENT PrimarySequence_mapLoc (MapLoc*)>
+
+<!--defines the docsum structure for refSNP clusters, where a refSNP cluster (rs) is a grouping of individual dbSNP submissions that all refer to the same variation. The refsnp provides a single unified record for annotation of NCBI resources such as reference genome sequence. -->
+<!ELEMENT Rs (
+        Rs_attlist, 
+        Rs_het?, 
+        Rs_validation, 
+        Rs_create, 
+        Rs_update?, 
+        Rs_sequence, 
+        Rs_ss, 
+        Rs_assembly?, 
+        Rs_primarySequence?, 
+        Rs_rsStruct?, 
+        Rs_rsLinkout?, 
+        Rs_mergeHistory?, 
+        Rs_hgvs?)>
+
+<!ELEMENT Rs_attlist (
+        Rs_attlist_rsId, 
+        Rs_attlist_snpClass, 
+        Rs_attlist_snpType, 
+        Rs_attlist_molType, 
+        Rs_attlist_validProbMin?, 
+        Rs_attlist_validProbMax?, 
+        Rs_attlist_genotype?, 
+        Rs_attlist_bitField?)>
+
+<!--refSNP (rs) number -->
+<!ELEMENT Rs_attlist_rsId (%INTEGER;)>
+
+<!ELEMENT Rs_attlist_snpClass %ENUM;>
+<!ATTLIST Rs_attlist_snpClass value (
+        snp |
+        in-del |
+        heterozygous |
+        microsatellite |
+        named-locus |
+        no-variation |
+        mixed |
+        multinucleotide-polymorphism
+        ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_snpType %ENUM;>
+<!ATTLIST Rs_attlist_snpType value (
+        notwithdrawn |
+        artifact |
+        gene-duplication |
+        duplicate-submission |
+        notspecified |
+        ambiguous-location |
+        low-map-quality
+        ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_molType %ENUM;>
+<!ATTLIST Rs_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro |
+        unknown
+        ) #REQUIRED >
+
+
+<!--minimum reported success rate of all submissions in cluster -->
+<!ELEMENT Rs_attlist_validProbMin (%INTEGER;)>
+
+<!--maximum reported success rate of all submissions in cluster -->
+<!ELEMENT Rs_attlist_validProbMax (%INTEGER;)>
+
+<!--at least one genotype reported for this refSNP -->
+<!ELEMENT Rs_attlist_genotype EMPTY>
+<!ATTLIST Rs_attlist_genotype value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_bitField (#PCDATA)>
+
+<!ELEMENT Rs_het (
+        Rs_het_attlist, 
+        Rs_het_het)>
+
+<!ELEMENT Rs_het_attlist (
+        Rs_het_attlist_type, 
+        Rs_het_attlist_value, 
+        Rs_het_attlist_stdError?)>
+
+<!--Est=Estimated average het from allele frequencies, Obs=Observed from genotype data -->
+<!ELEMENT Rs_het_attlist_type %ENUM;>
+<!ATTLIST Rs_het_attlist_type value (
+        est |
+        obs
+        ) #REQUIRED >
+
+
+<!--Heterozygosity -->
+<!ELEMENT Rs_het_attlist_value (%REAL;)>
+
+<!--Standard error of Het estimate -->
+<!ELEMENT Rs_het_attlist_stdError (%REAL;)>
+
+<!ELEMENT Rs_het_het EMPTY>
+
+<!ELEMENT Rs_validation (
+        Rs_validation_attlist, 
+        Rs_validation_otherPopBatchId?, 
+        Rs_validation_twoHit2AlleleBatchId?)>
+
+<!ELEMENT Rs_validation_attlist (
+        Rs_validation_attlist_byCluster?, 
+        Rs_validation_attlist_byFrequency?, 
+        Rs_validation_attlist_byOtherPop?, 
+        Rs_validation_attlist_by2Hit2Allele?, 
+        Rs_validation_attlist_byHapMap?)>
+
+<!--at least one subsnp in cluster has frequency data submitted -->
+<!ELEMENT Rs_validation_attlist_byCluster EMPTY>
+<!ATTLIST Rs_validation_attlist_byCluster value ( true | false ) #REQUIRED >
+
+
+<!--cluster has 2+ submissions, with 1+ submissions assayed with a non-computational method -->
+<!ELEMENT Rs_validation_attlist_byFrequency EMPTY>
+<!ATTLIST Rs_validation_attlist_byFrequency value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_validation_attlist_byOtherPop EMPTY>
+<!ATTLIST Rs_validation_attlist_byOtherPop value ( true | false ) #REQUIRED >
+
+
+<!--cluster has 2+ submissions, with 1+ submissions assayed with a non-computational method -->
+<!ELEMENT Rs_validation_attlist_by2Hit2Allele EMPTY>
+<!ATTLIST Rs_validation_attlist_by2Hit2Allele value ( true | false ) #REQUIRED >
+
+
+<!--TBD -->
+<!ELEMENT Rs_validation_attlist_byHapMap EMPTY>
+<!ATTLIST Rs_validation_attlist_byHapMap value ( true | false ) #REQUIRED >
+
+
+<!--dbSNP batch-id's for other pop snp validation data. -->
+<!ELEMENT Rs_validation_otherPopBatchId (Rs_validation_otherPopBatchId_E*)>
+
+
+<!ELEMENT Rs_validation_otherPopBatchId_E (%INTEGER;)>
+
+<!--dbSNP batch-id's for double-hit snp validation data. Use batch-id to get methods, etc. -->
+<!ELEMENT Rs_validation_twoHit2AlleleBatchId (Rs_validation_twoHit2AlleleBatchId_E*)>
+
+
+<!ELEMENT Rs_validation_twoHit2AlleleBatchId_E (%INTEGER;)>
+<!--date the refsnp cluster was instantiated -->
+<!--date the refsnp cluster was instantiated -->
+<!ELEMENT Rs_create (
+        Rs_create_attlist, 
+        Rs_create_create)>
+<!--date the refsnp cluster was instantiated -->
+<!ELEMENT Rs_create_attlist (
+        Rs_create_attlist_build?, 
+        Rs_create_attlist_date?)>
+
+<!--build number when the cluster was created -->
+<!ELEMENT Rs_create_attlist_build (%INTEGER;)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT Rs_create_attlist_date (#PCDATA)>
+
+<!--date the refsnp cluster was instantiated -->
+<!ELEMENT Rs_create_create EMPTY>
+<!--most recent date the cluster was updated (member added or deleted) -->
+<!--most recent date the cluster was updated (member added or deleted) -->
+<!ELEMENT Rs_update (
+        Rs_update_attlist, 
+        Rs_update_update)>
+<!--most recent date the cluster was updated (member added or deleted) -->
+<!ELEMENT Rs_update_attlist (
+        Rs_update_attlist_build?, 
+        Rs_update_attlist_date?)>
+
+<!--build number when the cluster was updated -->
+<!ELEMENT Rs_update_attlist_build (%INTEGER;)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT Rs_update_attlist_date (#PCDATA)>
+
+<!--most recent date the cluster was updated (member added or deleted) -->
+<!ELEMENT Rs_update_update EMPTY>
+
+<!ELEMENT Rs_sequence (
+        Rs_sequence_attlist, 
+        Rs_sequence_seq5?, 
+        Rs_sequence_observed, 
+        Rs_sequence_seq3?)>
+
+<!ELEMENT Rs_sequence_attlist (
+        Rs_sequence_attlist_exemplarSs)>
+
+<!--dbSNP ss# selected as source of refSNP flanking sequence, ss# part of ss-list below  -->
+<!ELEMENT Rs_sequence_attlist_exemplarSs (%INTEGER;)>
+
+<!--
+5' sequence that flanks the variation
+5' sequence that flanks the variation
+-->
+<!ELEMENT Rs_sequence_seq5 (#PCDATA)>
+
+<!--
+list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation
+list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation
+-->
+<!ELEMENT Rs_sequence_observed (#PCDATA)>
+
+<!--
+3' sequence that flanks the variation
+3' sequence that flanks the variation
+-->
+<!ELEMENT Rs_sequence_seq3 (#PCDATA)>
+
+<!ELEMENT Rs_ss (Ss*)>
+
+<!ELEMENT Rs_assembly (Assembly*)>
+
+<!ELEMENT Rs_primarySequence (PrimarySequence*)>
+
+<!ELEMENT Rs_rsStruct (RsStruct*)>
+
+<!ELEMENT Rs_rsLinkout (RsLinkout*)>
+
+<!ELEMENT Rs_mergeHistory (Rs_mergeHistory_E*)>
+
+
+<!ELEMENT Rs_mergeHistory_E (
+        Rs_mergeHistory_E_attlist, 
+        Rs_mergeHistory_E_mergeHistory)>
+
+<!ELEMENT Rs_mergeHistory_E_attlist (
+        Rs_mergeHistory_E_attlist_rsId, 
+        Rs_mergeHistory_E_attlist_buildId?, 
+        Rs_mergeHistory_E_attlist_orientFlip?)>
+
+<!--previously issued rs id whose member assays have now been merged -->
+<!ELEMENT Rs_mergeHistory_E_attlist_rsId (%INTEGER;)>
+
+<!--build id when rs id was merged into parent rs -->
+<!ELEMENT Rs_mergeHistory_E_attlist_buildId (%INTEGER;)>
+
+<!--TRUE if strand of rs id is reverse to parent object's current strand -->
+<!ELEMENT Rs_mergeHistory_E_attlist_orientFlip EMPTY>
+<!ATTLIST Rs_mergeHistory_E_attlist_orientFlip value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_mergeHistory_E_mergeHistory EMPTY>
+
+<!--							HGVS name list -->
+<!ELEMENT Rs_hgvs (Rs_hgvs_E*)>
+
+
+<!ELEMENT Rs_hgvs_E (#PCDATA)>
+
+<!--link data for another resource -->
+<!ELEMENT RsLinkout (
+        RsLinkout_attlist, 
+        RsLinkout_rsLinkout)>
+
+<!ELEMENT RsLinkout_attlist (
+        RsLinkout_attlist_resourceId, 
+        RsLinkout_attlist_linkValue)>
+
+<!--BaseURLList.url_id -->
+<!ELEMENT RsLinkout_attlist_resourceId (#PCDATA)>
+
+<!--value to append to ResourceURL.base-url for complete link -->
+<!ELEMENT RsLinkout_attlist_linkValue (#PCDATA)>
+
+<!--link data for another resource -->
+<!ELEMENT RsLinkout_rsLinkout EMPTY>
+
+<!--structure information for SNP -->
+<!ELEMENT RsStruct (
+        RsStruct_attlist, 
+        RsStruct_rsStruct)>
+
+<!ELEMENT RsStruct_attlist (
+        RsStruct_attlist_protAcc?, 
+        RsStruct_attlist_protGi?, 
+        RsStruct_attlist_protLoc?, 
+        RsStruct_attlist_protResidue?, 
+        RsStruct_attlist_rsResidue?, 
+        RsStruct_attlist_structGi?, 
+        RsStruct_attlist_structLoc?, 
+        RsStruct_attlist_structResidue?)>
+
+<!--accession of the protein with variation -->
+<!ELEMENT RsStruct_attlist_protAcc (#PCDATA)>
+
+<!--GI of the protein with variation -->
+<!ELEMENT RsStruct_attlist_protGi (%INTEGER;)>
+
+<!--position of the residue for the protein GI -->
+<!ELEMENT RsStruct_attlist_protLoc (%INTEGER;)>
+
+<!--residue specified for protein at prot-loc location -->
+<!ELEMENT RsStruct_attlist_protResidue (#PCDATA)>
+
+<!--alternative residue specified by variation sequence -->
+<!ELEMENT RsStruct_attlist_rsResidue (#PCDATA)>
+
+<!--GI of the structure neighbor -->
+<!ELEMENT RsStruct_attlist_structGi (%INTEGER;)>
+
+<!--position of the residue for the structure GI -->
+<!ELEMENT RsStruct_attlist_structLoc (%INTEGER;)>
+
+<!--residue specified for protein at struct-loc location -->
+<!ELEMENT RsStruct_attlist_structResidue (#PCDATA)>
+
+<!--structure information for SNP -->
+<!ELEMENT RsStruct_rsStruct EMPTY>
+
+<!--data for an individual submission to dbSNP -->
+<!ELEMENT Ss (
+        Ss_attlist, 
+        Ss_sequence)>
+
+<!ELEMENT Ss_attlist (
+        Ss_attlist_ssId, 
+        Ss_attlist_handle, 
+        Ss_attlist_batchId, 
+        Ss_attlist_locSnpId?, 
+        Ss_attlist_subSnpClass?, 
+        Ss_attlist_orient?, 
+        Ss_attlist_strand?, 
+        Ss_attlist_molType?, 
+        Ss_attlist_buildId?, 
+        Ss_attlist_methodClass?, 
+        Ss_attlist_validated?, 
+        Ss_attlist_linkoutUrl?)>
+
+<!--dbSNP accession number for submission -->
+<!ELEMENT Ss_attlist_ssId (%INTEGER;)>
+
+<!--Tag for the submitting laboratory -->
+<!ELEMENT Ss_attlist_handle (#PCDATA)>
+
+<!--dbSNP number for batch submission -->
+<!ELEMENT Ss_attlist_batchId (%INTEGER;)>
+
+<!--
+submission (ss#)
+submitter ID
+-->
+<!ELEMENT Ss_attlist_locSnpId (#PCDATA)>
+
+<!--SubSNP classification by type of variation -->
+<!ELEMENT Ss_attlist_subSnpClass %ENUM;>
+<!ATTLIST Ss_attlist_subSnpClass value (
+        snp |
+        in-del |
+        heterozygous |
+        microsatellite |
+        named-locus |
+        no-variation |
+        mixed |
+        multinucleotide-polymorphism
+        ) #REQUIRED >
+
+<!--
+orientation of refsnp cluster members to refsnp cluster sequence
+ss flanking sequence is in same orientation as seq-ss-exemplar
+lanking sequence and alleles are reverse complement of refSNP as defined by ss exemplar
+-->
+<!ELEMENT Ss_attlist_orient %ENUM;>
+<!ATTLIST Ss_attlist_orient value (
+        forward |
+        reverse
+        ) #REQUIRED >
+
+
+<!--strand is defined as TOP/BOTTOM by nature of flanking nucleotide sequence -->
+<!ELEMENT Ss_attlist_strand %ENUM;>
+<!ATTLIST Ss_attlist_strand value (
+        top |
+        bottom
+        ) #REQUIRED >
+
+
+<!--moltype from Batch table -->
+<!ELEMENT Ss_attlist_molType %ENUM;>
+<!ATTLIST Ss_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro |
+        unknown
+        ) #REQUIRED >
+
+
+<!--dbSNP build number when ss# was added to a refSNP (rs#) cluster -->
+<!ELEMENT Ss_attlist_buildId (%INTEGER;)>
+<!--
+class of method used to assay for the variation
+Denaturing High Pressure Liquid Chromatography used to detect SNP
+a hybridization method (e.g. chip) was used to assay for variation
+variation was mined from sequence alignment with software
+samples were sequenced and resulting alignment used to define variation
+-->
+<!ELEMENT Ss_attlist_methodClass %ENUM;>
+<!ATTLIST Ss_attlist_methodClass value (
+        dHPLC |
+        hybridize |
+        computed |
+        sSCP |
+        other |
+        unknown |
+        rFLP |
+        sequence
+        ) #REQUIRED >
+
+<!--
+subsnp has been experimentally validated by submitter
+subsnp has frequency data submitted
+has 2+ submissions, with 1+ submission assayed with a non-computational method
+-->
+<!ELEMENT Ss_attlist_validated %ENUM;>
+<!ATTLIST Ss_attlist_validated value (
+        by-submitter |
+        by-frequency |
+        by-cluster
+        ) #REQUIRED >
+
+
+<!--append loc-snp-id to this base URL to construct a pointer to submitter data. -->
+<!ELEMENT Ss_attlist_linkoutUrl (#PCDATA)>
+
+<!ELEMENT Ss_sequence (
+        Ss_sequence_seq5?, 
+        Ss_sequence_observed, 
+        Ss_sequence_seq3?)>
+
+<!--
+5' sequence that flanks the variation
+5' sequence that flanks the variation
+-->
+<!ELEMENT Ss_sequence_seq5 (#PCDATA)>
+
+<!--
+list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation
+list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation
+-->
+<!ELEMENT Ss_sequence_observed (#PCDATA)>
+
+<!--
+3' sequence that flanks the variation
+3' sequence that flanks the variation
+-->
+<!ELEMENT Ss_sequence_seq3 (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/Docsum_3_1.dtd b/code/lib/Bio/Entrez/DTDs/Docsum_3_1.dtd
new file mode 100644
index 0000000..a82d6a8
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/Docsum_3_1.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "docsum.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.2
+     ::DATATOOL:: on 05/06/2009 23:05:17
+     ============================================ -->
+
+<!-- Docsum_3_1.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % Docsum_3_1_module PUBLIC "-//NCBI//Docsum 3 1 Module//EN" "Docsum_3_1.mod.dtd">
+%Docsum_3_1_module;
diff --git a/code/lib/Bio/Entrez/DTDs/Docsum_3_1.mod.dtd b/code/lib/Bio/Entrez/DTDs/Docsum_3_1.mod.dtd
new file mode 100644
index 0000000..ce57767
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/Docsum_3_1.mod.dtd
@@ -0,0 +1,1055 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "docsum.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.2
+     ::DATATOOL:: on 05/06/2009 23:05:17
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "Docsum-3-1"
+================================================= -->
+
+<!--
+ ============================================
+ ::DATATOOL:: Generated from "docsum_3.1.xsd"
+ ::DATATOOL:: by application DATATOOL version 1.8.6
+ ::DATATOOL:: on 05/05/2009 10:27:19
+ ============================================
+ edited with XMLSPY v5 rel. 4 U (http://www.xmlspy.com) by Michael Kholodov (National Library of Medicine) 
+ edited with XMLSpy v2005 rel. 3 U (http://www.altova.com) by Michael Feolo (NCBI/NLM/NIH) 
+-->
+
+
+<!ELEMENT Assay (
+        Assay_attlist, 
+        Assay_method, 
+        Assay_taxonomy, 
+        Assay_strains?, 
+        Assay_comment?, 
+        Assay_citation?)>
+
+<!ELEMENT Assay_attlist (
+        Assay_attlist_handle?, 
+        Assay_attlist_batch?, 
+        Assay_attlist_batchId?, 
+        Assay_attlist_batchType?, 
+        Assay_attlist_molType?, 
+        Assay_attlist_sampleSize?, 
+        Assay_attlist_population?, 
+        Assay_attlist_linkoutUrl?)>
+
+<!ELEMENT Assay_attlist_handle (#PCDATA)>
+
+<!ELEMENT Assay_attlist_batch (#PCDATA)>
+
+<!ELEMENT Assay_attlist_batchId (%INTEGER;)>
+
+<!ELEMENT Assay_attlist_batchType %ENUM;>
+<!ATTLIST Assay_attlist_batchType value (
+        snpassay |
+        validation |
+        doublehit
+        ) #REQUIRED >
+
+
+<!ELEMENT Assay_attlist_molType %ENUM;>
+<!ATTLIST Assay_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro
+        ) #REQUIRED >
+
+
+<!ELEMENT Assay_attlist_sampleSize (%INTEGER;)>
+
+<!ELEMENT Assay_attlist_population (#PCDATA)>
+
+<!ELEMENT Assay_attlist_linkoutUrl (#PCDATA)>
+
+<!ELEMENT Assay_method (
+        Assay_method_eMethod?)>
+
+<!ELEMENT Assay_method_eMethod (
+        Assay_method_eMethod_attlist, 
+        Assay_method_eMethod_exception)>
+
+<!ELEMENT Assay_method_eMethod_attlist (
+        Assay_method_eMethod_attlist_name?, 
+        Assay_method_eMethod_attlist_id?)>
+
+<!--Submitters method identifier -->
+<!ELEMENT Assay_method_eMethod_attlist_name (#PCDATA)>
+
+<!--dbSNP method identifier -->
+<!ELEMENT Assay_method_eMethod_attlist_id (#PCDATA)>
+
+<!--description of deviation from/addition to given method -->
+<!ELEMENT Assay_method_eMethod_exception (#PCDATA)>
+
+<!ELEMENT Assay_taxonomy (
+        Assay_taxonomy_attlist, 
+        Assay_taxonomy_taxonomy)>
+
+<!ELEMENT Assay_taxonomy_attlist (
+        Assay_taxonomy_attlist_id, 
+        Assay_taxonomy_attlist_organism?)>
+
+<!--NCBI taxonomy ID for variation -->
+<!ELEMENT Assay_taxonomy_attlist_id (%INTEGER;)>
+
+<!ELEMENT Assay_taxonomy_attlist_organism (#PCDATA)>
+
+<!ELEMENT Assay_taxonomy_taxonomy EMPTY>
+
+<!ELEMENT Assay_strains (Assay_strains_E*)>
+
+
+<!ELEMENT Assay_strains_E (#PCDATA)>
+
+<!ELEMENT Assay_comment (#PCDATA)>
+
+<!ELEMENT Assay_citation (Assay_citation_E*)>
+
+
+<!ELEMENT Assay_citation_E (#PCDATA)>
+
+<!--A collection of genome sequence records (curated gene regions (NG's), contigs (NWNT's)  and chromosomes (NC/AC's) produced by a genome sequence project. Structure is populated from ContigInfo tables. -->
+<!ELEMENT Assembly (
+        Assembly_attlist, 
+        Assembly_component?, 
+        Assembly_snpStat)>
+
+<!ELEMENT Assembly_attlist (
+        Assembly_attlist_dbSnpBuild, 
+        Assembly_attlist_genomeBuild, 
+        Assembly_attlist_groupLabel?, 
+        Assembly_attlist_assemblySource?, 
+        Assembly_attlist_current?, 
+        Assembly_attlist_reference?)>
+
+<!--dbSNP build number defining the rsid set aligned to this assembly -->
+<!ELEMENT Assembly_attlist_dbSnpBuild (%INTEGER;)>
+
+<!--assembly build number with possible 'subbuild' version numbers to reflect updates in gene annotation (human e.g. 34_3, 35_1, 36_1) -->
+<!ELEMENT Assembly_attlist_genomeBuild (#PCDATA)>
+
+<!--High-level classification of the assembly to distinguish reference projects from alternate solutions. GroupLabel field from organism/build-specific ContigInfo tables. "reference" is occasionally used as the preferred assembly; standards will converge as additional organism genome projects are finished. Note that some organism assembly names include extended characters like '~' and '/' that may be incompatible with OS filename conventions. -->
+<!ELEMENT Assembly_attlist_groupLabel (#PCDATA)>
+
+<!--Name of the group(s) or organization(s) that generated the assembly -->
+<!ELEMENT Assembly_attlist_assemblySource (#PCDATA)>
+
+<!--Marks the current genomic assembly -->
+<!ELEMENT Assembly_attlist_current EMPTY>
+<!ATTLIST Assembly_attlist_current value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Assembly_attlist_reference EMPTY>
+<!ATTLIST Assembly_attlist_reference value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Assembly_component (Component*)>
+
+<!ELEMENT Assembly_snpStat (
+        Assembly_snpStat_attlist, 
+        Assembly_snpStat_snpStat)>
+
+<!ELEMENT Assembly_snpStat_attlist (
+        Assembly_snpStat_attlist_mapWeight, 
+        Assembly_snpStat_attlist_chromCount?, 
+        Assembly_snpStat_attlist_placedContigCount?, 
+        Assembly_snpStat_attlist_unplacedContigCount?, 
+        Assembly_snpStat_attlist_seqlocCount?, 
+        Assembly_snpStat_attlist_hapCount?)>
+
+<!--summary measure of placement precision in the assembly -->
+<!ELEMENT Assembly_snpStat_attlist_mapWeight %ENUM;>
+<!ATTLIST Assembly_snpStat_attlist_mapWeight value (
+        unmapped |
+        unique-in-contig |
+        two-hits-in-contig |
+        less-10-hits |
+        multiple-hits
+        ) #REQUIRED >
+
+
+<!--number of distinct chromosomes in the mapset -->
+<!ELEMENT Assembly_snpStat_attlist_chromCount (%INTEGER;)>
+
+<!--number of distinct contigs [ gi | accession[.version] ] in the mapset -->
+<!ELEMENT Assembly_snpStat_attlist_placedContigCount (%INTEGER;)>
+
+<!--number of sequence postions to a contig with unknown chromosomal assignment -->
+<!ELEMENT Assembly_snpStat_attlist_unplacedContigCount (%INTEGER;)>
+
+<!--total number of sequence positions in the mapset -->
+<!ELEMENT Assembly_snpStat_attlist_seqlocCount (%INTEGER;)>
+
+<!--Number of hits to alternative genomic haplotypes (e.g. HLA DR region, KIR, or pseudo-autosomal regions like PAR) within the assembly mapset. Note that positions on haplotypes defined in other assemblies (a different assembly_group_label value) will not be counted in this value. -->
+<!ELEMENT Assembly_snpStat_attlist_hapCount (%INTEGER;)>
+
+<!ELEMENT Assembly_snpStat_snpStat EMPTY>
+
+<!--URL value from dbSNP_main.BaseURL links table. attributes provide context information and URL id that is referenced within individual refSNP objects. -->
+<!ELEMENT BaseURL (
+        BaseURL_attlist, 
+        BaseURL_baseURL)>
+
+<!ELEMENT BaseURL_attlist (
+        BaseURL_attlist_urlId?, 
+        BaseURL_attlist_resourceName?, 
+        BaseURL_attlist_resourceId?)>
+
+<!--Resource identifier from dbSNP_main.baseURL. -->
+<!ELEMENT BaseURL_attlist_urlId (%INTEGER;)>
+
+<!--Name of linked resource -->
+<!ELEMENT BaseURL_attlist_resourceName (#PCDATA)>
+
+<!--identifier expected by resource for URL -->
+<!ELEMENT BaseURL_attlist_resourceId (#PCDATA)>
+
+<!--URL value from dbSNP_main.BaseURL links table. attributes provide context information and URL id that is referenced within individual refSNP objects. -->
+<!ELEMENT BaseURL_baseURL (#PCDATA)>
+
+
+<!ELEMENT Component (
+        Component_attlist, 
+        Component_mapLoc)>
+
+<!ELEMENT Component_attlist (
+        Component_attlist_componentType?, 
+        Component_attlist_ctgId?, 
+        Component_attlist_accession?, 
+        Component_attlist_name?, 
+        Component_attlist_chromosome?, 
+        Component_attlist_start?, 
+        Component_attlist_end?, 
+        Component_attlist_orientation?, 
+        Component_attlist_gi?, 
+        Component_attlist_groupTerm?, 
+        Component_attlist_contigLabel?)>
+
+<!--type of component: chromosome, contig, gene_region, etc. -->
+<!ELEMENT Component_attlist_componentType %ENUM;>
+<!ATTLIST Component_attlist_componentType value (
+        contig |
+        mrna
+        ) #REQUIRED >
+
+
+<!--dbSNP contig_id used to join on contig hit / mapset data to these assembly properties -->
+<!ELEMENT Component_attlist_ctgId (%INTEGER;)>
+
+<!--Accession[.version] for the sequence component -->
+<!ELEMENT Component_attlist_accession (#PCDATA)>
+
+<!--contig name defined as either a submitter local id, element of a whole genome assembly set, or internal NCBI local id -->
+<!ELEMENT Component_attlist_name (#PCDATA)>
+
+<!--Organism appropriate chromosome tag, 'Un' reserved for default case of unplaced components -->
+<!ELEMENT Component_attlist_chromosome (#PCDATA)>
+
+<!--component starting position on the chromosome (base 0 inclusive) -->
+<!ELEMENT Component_attlist_start (%INTEGER;)>
+
+<!--component ending position on the chromosome (base 0 inclusive) -->
+<!ELEMENT Component_attlist_end (%INTEGER;)>
+
+<!--orientation of this component to chromosome, forward (fwd) = 0, reverse (rev) = 1, unknown = NULL in ContigInfo.orient. -->
+<!ELEMENT Component_attlist_orientation %ENUM;>
+<!ATTLIST Component_attlist_orientation value (
+        fwd |
+        rev |
+        unknown
+        ) #REQUIRED >
+
+
+<!--NCBI gi for component sequence (equivalent to accession.version) for nucleotide sequence. -->
+<!ELEMENT Component_attlist_gi (#PCDATA)>
+
+<!--Identifier label for the genome assembly that defines the contigs in this mapset and their placement within the organism genome.  -->
+<!ELEMENT Component_attlist_groupTerm (#PCDATA)>
+
+<!--Display label for component -->
+<!ELEMENT Component_attlist_contigLabel (#PCDATA)>
+
+<!ELEMENT Component_mapLoc (MapLoc*)>
+
+<!--Set of dbSNP refSNP docsums -->
+<!ELEMENT ExchangeSet (
+        ExchangeSet_attlist, 
+        ExchangeSet_sourceDatabase, 
+        ExchangeSet_rs?, 
+        ExchangeSet_assay?, 
+        ExchangeSet_query?, 
+        ExchangeSet_summary, 
+        ExchangeSet_baseURL)>
+
+<!ELEMENT ExchangeSet_attlist (
+        ExchangeSet_attlist_setType?, 
+        ExchangeSet_attlist_setDepth?, 
+        ExchangeSet_attlist_specVersion?, 
+        ExchangeSet_attlist_dbSnpBuild?, 
+        ExchangeSet_attlist_generated?)>
+
+<!--set-type: full dump; from query; single refSNP -->
+<!ELEMENT ExchangeSet_attlist_setType (#PCDATA)>
+
+<!--content depth: brief XML (only refSNP properties and summary subSNP element content); full XML (full refSNP, full subSNP content; all flanking sequences) -->
+<!ELEMENT ExchangeSet_attlist_setDepth (#PCDATA)>
+
+<!--version number of docsum.asn/docsum.dtd specification -->
+<!ELEMENT ExchangeSet_attlist_specVersion (#PCDATA)>
+
+<!--build number of database for this export -->
+<!ELEMENT ExchangeSet_attlist_dbSnpBuild (%INTEGER;)>
+
+<!--Generated date -->
+<!ELEMENT ExchangeSet_attlist_generated (#PCDATA)>
+
+<!ELEMENT ExchangeSet_sourceDatabase (
+        ExchangeSet_sourceDatabase_attlist, 
+        ExchangeSet_sourceDatabase_sourceDatabase)>
+
+<!ELEMENT ExchangeSet_sourceDatabase_attlist (
+        ExchangeSet_sourceDatabase_attlist_taxId, 
+        ExchangeSet_sourceDatabase_attlist_organism, 
+        ExchangeSet_sourceDatabase_attlist_dbSnpOrgAbbr?, 
+        ExchangeSet_sourceDatabase_attlist_gpipeOrgAbbr?)>
+
+<!--NCBI taxonomy ID for variation -->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_taxId (%INTEGER;)>
+
+<!--common name for species used as part of database name. -->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_organism (#PCDATA)>
+
+<!--organism abbreviation used in dbSNP.  -->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_dbSnpOrgAbbr (#PCDATA)>
+
+<!--organism abbreviation used within NCBI genome pipeline data dumps. -->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_gpipeOrgAbbr (#PCDATA)>
+
+<!ELEMENT ExchangeSet_sourceDatabase_sourceDatabase EMPTY>
+
+<!ELEMENT ExchangeSet_rs (Rs*)>
+
+<!ELEMENT ExchangeSet_assay (Assay)>
+
+<!ELEMENT ExchangeSet_query (
+        ExchangeSet_query_attlist, 
+        ExchangeSet_query_query)>
+
+<!ELEMENT ExchangeSet_query_attlist (
+        ExchangeSet_query_attlist_date?, 
+        ExchangeSet_query_attlist_string?)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT ExchangeSet_query_attlist_date (#PCDATA)>
+
+<!--Query terms or search constraints -->
+<!ELEMENT ExchangeSet_query_attlist_string (#PCDATA)>
+
+<!ELEMENT ExchangeSet_query_query EMPTY>
+
+<!ELEMENT ExchangeSet_summary (
+        ExchangeSet_summary_attlist, 
+        ExchangeSet_summary_summary)>
+
+<!ELEMENT ExchangeSet_summary_attlist (
+        ExchangeSet_summary_attlist_numRsIds?, 
+        ExchangeSet_summary_attlist_totalSeqLength?, 
+        ExchangeSet_summary_attlist_numContigHits?, 
+        ExchangeSet_summary_attlist_numGeneHits?, 
+        ExchangeSet_summary_attlist_numGiHits?, 
+        ExchangeSet_summary_attlist_num3dStructs?, 
+        ExchangeSet_summary_attlist_numAlleleFreqs?, 
+        ExchangeSet_summary_attlist_numStsHits?, 
+        ExchangeSet_summary_attlist_numUnigeneCids?)>
+
+<!--Total number of refsnp-ids in this exchange set -->
+<!ELEMENT ExchangeSet_summary_attlist_numRsIds (%INTEGER;)>
+
+<!--Total length of exemplar flanking sequences -->
+<!ELEMENT ExchangeSet_summary_attlist_totalSeqLength (%INTEGER;)>
+
+<!--Total number of contig locations from SNPContigLoc -->
+<!ELEMENT ExchangeSet_summary_attlist_numContigHits (%INTEGER;)>
+
+<!--Total number of locus ids from SNPContigLocusId -->
+<!ELEMENT ExchangeSet_summary_attlist_numGeneHits (%INTEGER;)>
+
+<!--Total number of gi hits from MapLink -->
+<!ELEMENT ExchangeSet_summary_attlist_numGiHits (%INTEGER;)>
+
+<!--Total number of 3D structures from SNP3D -->
+<!ELEMENT ExchangeSet_summary_attlist_num3dStructs (%INTEGER;)>
+
+<!--Total number of allele frequences from SubPopAllele -->
+<!ELEMENT ExchangeSet_summary_attlist_numAlleleFreqs (%INTEGER;)>
+
+<!--Total number of STS hits from SnpInSts -->
+<!ELEMENT ExchangeSet_summary_attlist_numStsHits (%INTEGER;)>
+
+<!--Total number of unigene cluster ids from UnigeneSnp -->
+<!ELEMENT ExchangeSet_summary_attlist_numUnigeneCids (%INTEGER;)>
+
+<!ELEMENT ExchangeSet_summary_summary EMPTY>
+
+<!ELEMENT ExchangeSet_baseURL (BaseURL*)>
+
+<!--functional relationship of SNP (and possibly alleles) to genes at contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx tables. -->
+<!ELEMENT FxnSet (
+        FxnSet_attlist, 
+        FxnSet_fxnSet)>
+
+<!ELEMENT FxnSet_attlist (
+        FxnSet_attlist_geneId?, 
+        FxnSet_attlist_symbol?, 
+        FxnSet_attlist_mrnaAcc?, 
+        FxnSet_attlist_mrnaVer?, 
+        FxnSet_attlist_protAcc?, 
+        FxnSet_attlist_protVer?, 
+        FxnSet_attlist_fxnClass?, 
+        FxnSet_attlist_readingFrame?, 
+        FxnSet_attlist_allele?, 
+        FxnSet_attlist_residue?, 
+        FxnSet_attlist_aaPosition?)>
+
+<!--gene-id of gene as aligned to contig -->
+<!ELEMENT FxnSet_attlist_geneId (%INTEGER;)>
+
+<!--symbol (official if present in Entrez Gene) of gene -->
+<!ELEMENT FxnSet_attlist_symbol (#PCDATA)>
+
+<!--mRNA accession if variation in transcript -->
+<!ELEMENT FxnSet_attlist_mrnaAcc (#PCDATA)>
+
+<!--mRNA sequence version if variation is in transcripot -->
+<!ELEMENT FxnSet_attlist_mrnaVer (%INTEGER;)>
+
+<!--protein accession if variation in protein -->
+<!ELEMENT FxnSet_attlist_protAcc (#PCDATA)>
+
+<!--protein version if variation is in protein -->
+<!ELEMENT FxnSet_attlist_protVer (%INTEGER;)>
+<!--
+variation in region of gene, but not in transcript - deprecated
+								synonymous change	
+								nonsynonymous change - deprecated
+								untranslated region - deprecated
+								splice-site - deprecated
+								contig reference
+								deprecated
+									coding: synonymy unknown
+									In gene segment with null mrna and protein. ex. IGLV4-69. geneId=28784
+									within 3' 0.5kb to a gene.
+									changes to STOP codon.
+									alters codon to make an altered amino acid in protein product.
+									indel snp causing frameshift.
+									3 prime untranslated region
+									5 prime untranslated region
+									3 prime acceptor dinucleotide
+									5 prime donor dinucleotide
+-->
+<!ELEMENT FxnSet_attlist_fxnClass %ENUM;>
+<!ATTLIST FxnSet_attlist_fxnClass value (
+        locus-region |
+        coding-unknown |
+        coding-synonymous |
+        coding-nonsynonymous |
+        mrna-utr |
+        intron |
+        splice-site |
+        reference |
+        coding-exception |
+        synonymy-unknown |
+        gene-segment |
+        near-gene-3 |
+        near-gene-5 |
+        nonsense |
+        missense |
+        frameshift |
+        utr-3 |
+        utr-5 |
+        splice-3 |
+        splice-5
+        ) #REQUIRED >
+
+
+<!ELEMENT FxnSet_attlist_readingFrame (%INTEGER;)>
+
+<!--variation allele: * suffix indicates allele of contig at this location -->
+<!ELEMENT FxnSet_attlist_allele (#PCDATA)>
+
+<!--translated amino acid residue for allele -->
+<!ELEMENT FxnSet_attlist_residue (#PCDATA)>
+
+<!--position of the variant residue in peptide sequence -->
+<!ELEMENT FxnSet_attlist_aaPosition (%INTEGER;)>
+
+<!--functional relationship of SNP (and possibly alleles) to genes at contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx tables. -->
+<!ELEMENT FxnSet_fxnSet EMPTY>
+
+<!--Position of a single hit of a variation on a contig -->
+<!ELEMENT MapLoc (
+        MapLoc_attlist, 
+        MapLoc_fxnSet?)>
+
+<!ELEMENT MapLoc_attlist (
+        MapLoc_attlist_asnFrom, 
+        MapLoc_attlist_asnTo, 
+        MapLoc_attlist_locType, 
+        MapLoc_attlist_alnQuality?, 
+        MapLoc_attlist_orient?, 
+        MapLoc_attlist_physMapInt?, 
+        MapLoc_attlist_leftFlankNeighborPos?, 
+        MapLoc_attlist_rightFlankNeighborPos?, 
+        MapLoc_attlist_leftContigNeighborPos?, 
+        MapLoc_attlist_rightContigNeighborPos?, 
+        MapLoc_attlist_numberOfMismatches?, 
+        MapLoc_attlist_numberOfDeletions?, 
+        MapLoc_attlist_numberOfInsertions?)>
+
+<!--beginning of variation as feature on contig -->
+<!ELEMENT MapLoc_attlist_asnFrom (%INTEGER;)>
+
+<!--end position of variation as feature on contig -->
+<!ELEMENT MapLoc_attlist_asnTo (%INTEGER;)>
+<!--
+defines the seq-loc symbol if asn_from != asn_to
+insertion on contig
+asn-from = asn-to write as 'asn-from'
+deletion on contig
+-->
+<!ELEMENT MapLoc_attlist_locType %ENUM;>
+<!ATTLIST MapLoc_attlist_locType value (
+        insertion |
+        exact |
+        deletion |
+        range-ins |
+        range-exact |
+        range-del
+        ) #REQUIRED >
+
+
+<!--alignment qualiity -->
+<!ELEMENT MapLoc_attlist_alnQuality (%REAL;)>
+
+<!--orientation of refSNP sequence to contig sequence -->
+<!ELEMENT MapLoc_attlist_orient %ENUM;>
+<!ATTLIST MapLoc_attlist_orient value (
+        forward |
+        reverse
+        ) #REQUIRED >
+
+
+<!--chromosome position as integer for sorting -->
+<!ELEMENT MapLoc_attlist_physMapInt (%INTEGER;)>
+
+<!--nearest aligned position in 5' flanking sequence of snp -->
+<!ELEMENT MapLoc_attlist_leftFlankNeighborPos (%INTEGER;)>
+
+<!--nearest aligned position in 3' flanking sequence of snp  -->
+<!ELEMENT MapLoc_attlist_rightFlankNeighborPos (%INTEGER;)>
+
+<!--nearest aligned position in 5' contig alignment of snp -->
+<!ELEMENT MapLoc_attlist_leftContigNeighborPos (%INTEGER;)>
+
+<!--nearest aligned position in 3' contig alignment of snp -->
+<!ELEMENT MapLoc_attlist_rightContigNeighborPos (%INTEGER;)>
+
+<!--number of Mismatched positions in this alignment -->
+<!ELEMENT MapLoc_attlist_numberOfMismatches (%INTEGER;)>
+
+<!--number of deletions in this alignment -->
+<!ELEMENT MapLoc_attlist_numberOfDeletions (%INTEGER;)>
+
+<!--number of insetions in this alignment -->
+<!ELEMENT MapLoc_attlist_numberOfInsertions (%INTEGER;)>
+
+<!ELEMENT MapLoc_fxnSet (FxnSet*)>
+
+
+<!ELEMENT PrimarySequence (
+        PrimarySequence_attlist, 
+        PrimarySequence_mapLoc)>
+
+<!ELEMENT PrimarySequence_attlist (
+        PrimarySequence_attlist_dbSnpBuild, 
+        PrimarySequence_attlist_gi, 
+        PrimarySequence_attlist_source?, 
+        PrimarySequence_attlist_accession?)>
+
+<!ELEMENT PrimarySequence_attlist_dbSnpBuild (%INTEGER;)>
+
+<!ELEMENT PrimarySequence_attlist_gi (%INTEGER;)>
+
+<!ELEMENT PrimarySequence_attlist_source %ENUM;>
+<!ATTLIST PrimarySequence_attlist_source value (
+        submitter |
+        blastmb |
+        xm |
+        remap
+        ) #REQUIRED >
+
+
+<!ELEMENT PrimarySequence_attlist_accession (#PCDATA)>
+
+<!ELEMENT PrimarySequence_mapLoc (MapLoc*)>
+
+<!--defines the docsum structure for refSNP clusters, where a refSNP cluster (rs) is a grouping of individual dbSNP submissions that all refer to the same variation. The refsnp provides a single unified record for annotation of NCBI resources such as reference genome sequence. -->
+<!ELEMENT Rs (
+        Rs_attlist, 
+        Rs_het?, 
+        Rs_validation, 
+        Rs_create, 
+        Rs_update?, 
+        Rs_sequence, 
+        Rs_ss, 
+        Rs_assembly?, 
+        Rs_primarySequence?, 
+        Rs_rsStruct?, 
+        Rs_rsLinkout?, 
+        Rs_mergeHistory?, 
+        Rs_hgvs?)>
+
+<!ELEMENT Rs_attlist (
+        Rs_attlist_rsId, 
+        Rs_attlist_snpClass, 
+        Rs_attlist_snpType, 
+        Rs_attlist_molType, 
+        Rs_attlist_validProbMin?, 
+        Rs_attlist_validProbMax?, 
+        Rs_attlist_genotype?, 
+        Rs_attlist_bitField?)>
+
+<!--refSNP (rs) number -->
+<!ELEMENT Rs_attlist_rsId (%INTEGER;)>
+
+<!ELEMENT Rs_attlist_snpClass %ENUM;>
+<!ATTLIST Rs_attlist_snpClass value (
+        snp |
+        in-del |
+        heterozygous |
+        microsatellite |
+        named-locus |
+        no-variation |
+        mixed |
+        multinucleotide-polymorphism
+        ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_snpType %ENUM;>
+<!ATTLIST Rs_attlist_snpType value (
+        notwithdrawn |
+        artifact |
+        gene-duplication |
+        duplicate-submission |
+        notspecified |
+        ambiguous-location |
+        low-map-quality
+        ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_molType %ENUM;>
+<!ATTLIST Rs_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro |
+        unknown
+        ) #REQUIRED >
+
+
+<!--minimum reported success rate of all submissions in cluster -->
+<!ELEMENT Rs_attlist_validProbMin (%INTEGER;)>
+
+<!--maximum reported success rate of all submissions in cluster -->
+<!ELEMENT Rs_attlist_validProbMax (%INTEGER;)>
+
+<!--at least one genotype reported for this refSNP -->
+<!ELEMENT Rs_attlist_genotype EMPTY>
+<!ATTLIST Rs_attlist_genotype value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_bitField (#PCDATA)>
+
+<!ELEMENT Rs_het (
+        Rs_het_attlist, 
+        Rs_het_het)>
+
+<!ELEMENT Rs_het_attlist (
+        Rs_het_attlist_type, 
+        Rs_het_attlist_value, 
+        Rs_het_attlist_stdError?)>
+
+<!--Est=Estimated average het from allele frequencies, Obs=Observed from genotype data -->
+<!ELEMENT Rs_het_attlist_type %ENUM;>
+<!ATTLIST Rs_het_attlist_type value (
+        est |
+        obs
+        ) #REQUIRED >
+
+
+<!--Heterozygosity -->
+<!ELEMENT Rs_het_attlist_value (%REAL;)>
+
+<!--Standard error of Het estimate -->
+<!ELEMENT Rs_het_attlist_stdError (%REAL;)>
+
+<!ELEMENT Rs_het_het EMPTY>
+
+<!ELEMENT Rs_validation (
+        Rs_validation_attlist, 
+        Rs_validation_otherPopBatchId?, 
+        Rs_validation_twoHit2AlleleBatchId?)>
+
+<!ELEMENT Rs_validation_attlist (
+        Rs_validation_attlist_byCluster?, 
+        Rs_validation_attlist_byFrequency?, 
+        Rs_validation_attlist_byOtherPop?, 
+        Rs_validation_attlist_by2Hit2Allele?, 
+        Rs_validation_attlist_byHapMap?)>
+
+<!--at least one subsnp in cluster has frequency data submitted -->
+<!ELEMENT Rs_validation_attlist_byCluster EMPTY>
+<!ATTLIST Rs_validation_attlist_byCluster value ( true | false ) #REQUIRED >
+
+
+<!--cluster has 2+ submissions, with 1+ submissions assayed with a non-computational method -->
+<!ELEMENT Rs_validation_attlist_byFrequency EMPTY>
+<!ATTLIST Rs_validation_attlist_byFrequency value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_validation_attlist_byOtherPop EMPTY>
+<!ATTLIST Rs_validation_attlist_byOtherPop value ( true | false ) #REQUIRED >
+
+
+<!--cluster has 2+ submissions, with 1+ submissions assayed with a non-computational method -->
+<!ELEMENT Rs_validation_attlist_by2Hit2Allele EMPTY>
+<!ATTLIST Rs_validation_attlist_by2Hit2Allele value ( true | false ) #REQUIRED >
+
+
+<!--TBD -->
+<!ELEMENT Rs_validation_attlist_byHapMap EMPTY>
+<!ATTLIST Rs_validation_attlist_byHapMap value ( true | false ) #REQUIRED >
+
+
+<!--dbSNP batch-id's for other pop snp validation data. -->
+<!ELEMENT Rs_validation_otherPopBatchId (Rs_validation_otherPopBatchId_E*)>
+
+
+<!ELEMENT Rs_validation_otherPopBatchId_E (%INTEGER;)>
+
+<!--dbSNP batch-id's for double-hit snp validation data. Use batch-id to get methods, etc. -->
+<!ELEMENT Rs_validation_twoHit2AlleleBatchId (Rs_validation_twoHit2AlleleBatchId_E*)>
+
+
+<!ELEMENT Rs_validation_twoHit2AlleleBatchId_E (%INTEGER;)>
+<!--date the refsnp cluster was instantiated -->
+<!--date the refsnp cluster was instantiated -->
+<!ELEMENT Rs_create (
+        Rs_create_attlist, 
+        Rs_create_create)>
+<!--date the refsnp cluster was instantiated -->
+<!ELEMENT Rs_create_attlist (
+        Rs_create_attlist_build?, 
+        Rs_create_attlist_date?)>
+
+<!--build number when the cluster was created -->
+<!ELEMENT Rs_create_attlist_build (%INTEGER;)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT Rs_create_attlist_date (#PCDATA)>
+
+<!--date the refsnp cluster was instantiated -->
+<!ELEMENT Rs_create_create EMPTY>
+<!--most recent date the cluster was updated (member added or deleted) -->
+<!--most recent date the cluster was updated (member added or deleted) -->
+<!ELEMENT Rs_update (
+        Rs_update_attlist, 
+        Rs_update_update)>
+<!--most recent date the cluster was updated (member added or deleted) -->
+<!ELEMENT Rs_update_attlist (
+        Rs_update_attlist_build?, 
+        Rs_update_attlist_date?)>
+
+<!--build number when the cluster was updated -->
+<!ELEMENT Rs_update_attlist_build (%INTEGER;)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT Rs_update_attlist_date (#PCDATA)>
+
+<!--most recent date the cluster was updated (member added or deleted) -->
+<!ELEMENT Rs_update_update EMPTY>
+
+<!ELEMENT Rs_sequence (
+        Rs_sequence_attlist, 
+        Rs_sequence_seq5?, 
+        Rs_sequence_observed, 
+        Rs_sequence_seq3?)>
+
+<!ELEMENT Rs_sequence_attlist (
+        Rs_sequence_attlist_exemplarSs)>
+
+<!--dbSNP ss# selected as source of refSNP flanking sequence, ss# part of ss-list below  -->
+<!ELEMENT Rs_sequence_attlist_exemplarSs (%INTEGER;)>
+
+<!--
+5' sequence that flanks the variation
+5' sequence that flanks the variation
+-->
+<!ELEMENT Rs_sequence_seq5 (#PCDATA)>
+
+<!--
+list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation
+list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation
+-->
+<!ELEMENT Rs_sequence_observed (#PCDATA)>
+
+<!--
+3' sequence that flanks the variation
+3' sequence that flanks the variation
+-->
+<!ELEMENT Rs_sequence_seq3 (#PCDATA)>
+
+<!ELEMENT Rs_ss (Ss*)>
+
+<!ELEMENT Rs_assembly (Assembly*)>
+
+<!ELEMENT Rs_primarySequence (PrimarySequence*)>
+
+<!ELEMENT Rs_rsStruct (RsStruct*)>
+
+<!ELEMENT Rs_rsLinkout (RsLinkout*)>
+
+<!ELEMENT Rs_mergeHistory (Rs_mergeHistory_E*)>
+
+
+<!ELEMENT Rs_mergeHistory_E (
+        Rs_mergeHistory_E_attlist, 
+        Rs_mergeHistory_E_mergeHistory)>
+
+<!ELEMENT Rs_mergeHistory_E_attlist (
+        Rs_mergeHistory_E_attlist_rsId, 
+        Rs_mergeHistory_E_attlist_buildId?, 
+        Rs_mergeHistory_E_attlist_orientFlip?)>
+
+<!--previously issued rs id whose member assays have now been merged -->
+<!ELEMENT Rs_mergeHistory_E_attlist_rsId (%INTEGER;)>
+
+<!--build id when rs id was merged into parent rs -->
+<!ELEMENT Rs_mergeHistory_E_attlist_buildId (%INTEGER;)>
+
+<!--TRUE if strand of rs id is reverse to parent object's current strand -->
+<!ELEMENT Rs_mergeHistory_E_attlist_orientFlip EMPTY>
+<!ATTLIST Rs_mergeHistory_E_attlist_orientFlip value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_mergeHistory_E_mergeHistory EMPTY>
+
+<!--							HGVS name list -->
+<!ELEMENT Rs_hgvs (Rs_hgvs_E*)>
+
+
+<!ELEMENT Rs_hgvs_E (#PCDATA)>
+
+<!--link data for another resource -->
+<!ELEMENT RsLinkout (
+        RsLinkout_attlist, 
+        RsLinkout_rsLinkout)>
+
+<!ELEMENT RsLinkout_attlist (
+        RsLinkout_attlist_resourceId, 
+        RsLinkout_attlist_linkValue)>
+
+<!--BaseURLList.url_id -->
+<!ELEMENT RsLinkout_attlist_resourceId (#PCDATA)>
+
+<!--value to append to ResourceURL.base-url for complete link -->
+<!ELEMENT RsLinkout_attlist_linkValue (#PCDATA)>
+
+<!--link data for another resource -->
+<!ELEMENT RsLinkout_rsLinkout EMPTY>
+
+<!--structure information for SNP -->
+<!ELEMENT RsStruct (
+        RsStruct_attlist, 
+        RsStruct_rsStruct)>
+
+<!ELEMENT RsStruct_attlist (
+        RsStruct_attlist_protAcc?, 
+        RsStruct_attlist_protGi?, 
+        RsStruct_attlist_protLoc?, 
+        RsStruct_attlist_protResidue?, 
+        RsStruct_attlist_rsResidue?, 
+        RsStruct_attlist_structGi?, 
+        RsStruct_attlist_structLoc?, 
+        RsStruct_attlist_structResidue?)>
+
+<!--accession of the protein with variation -->
+<!ELEMENT RsStruct_attlist_protAcc (#PCDATA)>
+
+<!--GI of the protein with variation -->
+<!ELEMENT RsStruct_attlist_protGi (%INTEGER;)>
+
+<!--position of the residue for the protein GI -->
+<!ELEMENT RsStruct_attlist_protLoc (%INTEGER;)>
+
+<!--residue specified for protein at prot-loc location -->
+<!ELEMENT RsStruct_attlist_protResidue (#PCDATA)>
+
+<!--alternative residue specified by variation sequence -->
+<!ELEMENT RsStruct_attlist_rsResidue (#PCDATA)>
+
+<!--GI of the structure neighbor -->
+<!ELEMENT RsStruct_attlist_structGi (%INTEGER;)>
+
+<!--position of the residue for the structure GI -->
+<!ELEMENT RsStruct_attlist_structLoc (%INTEGER;)>
+
+<!--residue specified for protein at struct-loc location -->
+<!ELEMENT RsStruct_attlist_structResidue (#PCDATA)>
+
+<!--structure information for SNP -->
+<!ELEMENT RsStruct_rsStruct EMPTY>
+
+<!--data for an individual submission to dbSNP -->
+<!ELEMENT Ss (
+        Ss_attlist, 
+        Ss_sequence)>
+
+<!ELEMENT Ss_attlist (
+        Ss_attlist_ssId, 
+        Ss_attlist_handle, 
+        Ss_attlist_batchId, 
+        Ss_attlist_locSnpId?, 
+        Ss_attlist_subSnpClass?, 
+        Ss_attlist_orient?, 
+        Ss_attlist_strand?, 
+        Ss_attlist_molType?, 
+        Ss_attlist_buildId?, 
+        Ss_attlist_methodClass?, 
+        Ss_attlist_validated?, 
+        Ss_attlist_linkoutUrl?)>
+
+<!--dbSNP accession number for submission -->
+<!ELEMENT Ss_attlist_ssId (%INTEGER;)>
+
+<!--Tag for the submitting laboratory -->
+<!ELEMENT Ss_attlist_handle (#PCDATA)>
+
+<!--dbSNP number for batch submission -->
+<!ELEMENT Ss_attlist_batchId (%INTEGER;)>
+
+<!--
+submission (ss#)
+submitter ID
+-->
+<!ELEMENT Ss_attlist_locSnpId (#PCDATA)>
+
+<!--SubSNP classification by type of variation -->
+<!ELEMENT Ss_attlist_subSnpClass %ENUM;>
+<!ATTLIST Ss_attlist_subSnpClass value (
+        snp |
+        in-del |
+        heterozygous |
+        microsatellite |
+        named-locus |
+        no-variation |
+        mixed |
+        multinucleotide-polymorphism
+        ) #REQUIRED >
+
+<!--
+orientation of refsnp cluster members to refsnp cluster sequence
+ss flanking sequence is in same orientation as seq-ss-exemplar
+lanking sequence and alleles are reverse complement of refSNP as defined by ss exemplar
+-->
+<!ELEMENT Ss_attlist_orient %ENUM;>
+<!ATTLIST Ss_attlist_orient value (
+        forward |
+        reverse
+        ) #REQUIRED >
+
+
+<!--strand is defined as TOP/BOTTOM by nature of flanking nucleotide sequence -->
+<!ELEMENT Ss_attlist_strand %ENUM;>
+<!ATTLIST Ss_attlist_strand value (
+        top |
+        bottom
+        ) #REQUIRED >
+
+
+<!--moltype from Batch table -->
+<!ELEMENT Ss_attlist_molType %ENUM;>
+<!ATTLIST Ss_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro |
+        unknown
+        ) #REQUIRED >
+
+
+<!--dbSNP build number when ss# was added to a refSNP (rs#) cluster -->
+<!ELEMENT Ss_attlist_buildId (%INTEGER;)>
+<!--
+class of method used to assay for the variation
+Denaturing High Pressure Liquid Chromatography used to detect SNP
+a hybridization method (e.g. chip) was used to assay for variation
+variation was mined from sequence alignment with software
+samples were sequenced and resulting alignment used to define variation
+-->
+<!ELEMENT Ss_attlist_methodClass %ENUM;>
+<!ATTLIST Ss_attlist_methodClass value (
+        dHPLC |
+        hybridize |
+        computed |
+        sSCP |
+        other |
+        unknown |
+        rFLP |
+        sequence
+        ) #REQUIRED >
+
+<!--
+subsnp has been experimentally validated by submitter
+subsnp has frequency data submitted
+has 2+ submissions, with 1+ submission assayed with a non-computational method
+-->
+<!ELEMENT Ss_attlist_validated %ENUM;>
+<!ATTLIST Ss_attlist_validated value (
+        by-submitter |
+        by-frequency |
+        by-cluster
+        ) #REQUIRED >
+
+
+<!--append loc-snp-id to this base URL to construct a pointer to submitter data. -->
+<!ELEMENT Ss_attlist_linkoutUrl (#PCDATA)>
+
+<!ELEMENT Ss_sequence (
+        Ss_sequence_seq5?, 
+        Ss_sequence_observed, 
+        Ss_sequence_seq3?)>
+
+<!--
+5' sequence that flanks the variation
+5' sequence that flanks the variation
+-->
+<!ELEMENT Ss_sequence_seq5 (#PCDATA)>
+
+<!--
+list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation
+list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation
+-->
+<!ELEMENT Ss_sequence_observed (#PCDATA)>
+
+<!--
+3' sequence that flanks the variation
+3' sequence that flanks the variation
+-->
+<!ELEMENT Ss_sequence_seq3 (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/Docsum_3_2.dtd b/code/lib/Bio/Entrez/DTDs/Docsum_3_2.dtd
new file mode 100644
index 0000000..0c04c5e
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/Docsum_3_2.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "docsum.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 02/11/2011 23:04:43
+     ============================================ -->
+
+<!-- Docsum_3_2.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % Docsum_3_2_module PUBLIC "-//NCBI//Docsum 3 2 Module//EN" "Docsum_3_2.mod.dtd">
+%Docsum_3_2_module;
diff --git a/code/lib/Bio/Entrez/DTDs/Docsum_3_2.mod.dtd b/code/lib/Bio/Entrez/DTDs/Docsum_3_2.mod.dtd
new file mode 100644
index 0000000..fd17c81
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/Docsum_3_2.mod.dtd
@@ -0,0 +1,1418 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "docsum.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 02/11/2011 23:04:43
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "Docsum-3-2"
+================================================= -->
+
+<!--
+ ============================================
+ ::DATATOOL:: Generated from "docsum_3.2.xsd"
+ ::DATATOOL:: by application DATATOOL version 2.1.0
+ ::DATATOOL:: on 08/17/2010 10:11:21
+ ============================================
+ edited with XMLSPY v5 rel. 4 U (http://www.xmlspy.com) by Michael Kholodov (National Library of Medicine) 
+ edited with XMLSpy v2005 rel. 3 U (http://www.altova.com) by Michael Feolo (NCBI/NLM/NIH) 
+-->
+
+
+<!ELEMENT Assay (
+        Assay_attlist, 
+        Assay_method, 
+        Assay_taxonomy, 
+        Assay_strains?, 
+        Assay_comment?, 
+        Assay_citation?)>
+
+<!ELEMENT Assay_attlist (
+        Assay_attlist_handle?, 
+        Assay_attlist_batch?, 
+        Assay_attlist_batchId?, 
+        Assay_attlist_batchType?, 
+        Assay_attlist_molType?, 
+        Assay_attlist_sampleSize?, 
+        Assay_attlist_population?, 
+        Assay_attlist_linkoutUrl?)>
+
+<!ELEMENT Assay_attlist_handle (#PCDATA)>
+
+<!ELEMENT Assay_attlist_batch (#PCDATA)>
+
+<!ELEMENT Assay_attlist_batchId (%INTEGER;)>
+
+<!ELEMENT Assay_attlist_batchType %ENUM;>
+<!ATTLIST Assay_attlist_batchType value (
+        snpassay |
+        validation |
+        doublehit
+        ) #REQUIRED >
+
+
+<!ELEMENT Assay_attlist_molType %ENUM;>
+<!ATTLIST Assay_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro
+        ) #REQUIRED >
+
+
+<!ELEMENT Assay_attlist_sampleSize (%INTEGER;)>
+
+<!ELEMENT Assay_attlist_population (#PCDATA)>
+
+<!ELEMENT Assay_attlist_linkoutUrl (#PCDATA)>
+
+<!ELEMENT Assay_method (
+        Assay_method_eMethod?)>
+
+<!ELEMENT Assay_method_eMethod (
+        Assay_method_eMethod_attlist, 
+        Assay_method_eMethod_exception)>
+
+<!ELEMENT Assay_method_eMethod_attlist (
+        Assay_method_eMethod_attlist_name?, 
+        Assay_method_eMethod_attlist_id?)>
+
+<!--Submitters method identifier -->
+<!ELEMENT Assay_method_eMethod_attlist_name (#PCDATA)>
+
+<!--dbSNP method identifier -->
+<!ELEMENT Assay_method_eMethod_attlist_id (#PCDATA)>
+
+<!--
+description of deviation from/addition to
+										given method 
+-->
+<!ELEMENT Assay_method_eMethod_exception (#PCDATA)>
+
+<!ELEMENT Assay_taxonomy (
+        Assay_taxonomy_attlist, 
+        Assay_taxonomy_taxonomy)>
+
+<!ELEMENT Assay_taxonomy_attlist (
+        Assay_taxonomy_attlist_id, 
+        Assay_taxonomy_attlist_organism?)>
+
+<!--
+NCBI taxonomy ID for
+									variation
+-->
+<!ELEMENT Assay_taxonomy_attlist_id (%INTEGER;)>
+
+<!ELEMENT Assay_taxonomy_attlist_organism (#PCDATA)>
+
+<!ELEMENT Assay_taxonomy_taxonomy EMPTY>
+
+<!ELEMENT Assay_strains (Assay_strains_E*)>
+
+
+<!ELEMENT Assay_strains_E (#PCDATA)>
+
+<!ELEMENT Assay_comment (#PCDATA)>
+
+<!ELEMENT Assay_citation (Assay_citation_E*)>
+
+
+<!ELEMENT Assay_citation_E (#PCDATA)>
+
+<!--
+A collection of genome sequence records (curated gene regions (NG's),
+				contigs (NWNT's) and chromosomes (NC/AC's) produced by a genome sequence project.
+				Structure is populated from ContigInfo tables.
+-->
+<!ELEMENT Assembly (
+        Assembly_attlist, 
+        Assembly_component?, 
+        Assembly_snpStat)>
+
+<!ELEMENT Assembly_attlist (
+        Assembly_attlist_dbSnpBuild, 
+        Assembly_attlist_genomeBuild, 
+        Assembly_attlist_groupLabel?, 
+        Assembly_attlist_assemblySource?, 
+        Assembly_attlist_current?, 
+        Assembly_attlist_reference?)>
+
+<!--
+dbSNP build number defining the rsid set aligned to this
+						assembly
+-->
+<!ELEMENT Assembly_attlist_dbSnpBuild (%INTEGER;)>
+
+<!--
+assembly build number with possible 'subbuild' version
+						numbers to reflect updates in gene annotation (human e.g. 34_3, 35_1,
+						36_1)
+-->
+<!ELEMENT Assembly_attlist_genomeBuild (#PCDATA)>
+
+<!--
+High-level classification of the assembly to distinguish
+						reference projects from alternate solutions. GroupLabel field from
+						organism/build-specific ContigInfo tables. "reference" is occasionally used
+						as the preferred assembly; standards will converge as additional organism
+						genome projects are finished. Note that some organism assembly names include
+						extended characters like '~' and '/' that may be incompatible with OS
+						filename conventions.
+-->
+<!ELEMENT Assembly_attlist_groupLabel (#PCDATA)>
+
+<!--
+Name of the group(s) or organization(s) that generated the
+						assembly
+-->
+<!ELEMENT Assembly_attlist_assemblySource (#PCDATA)>
+
+<!--Marks the current genomic assembly -->
+<!ELEMENT Assembly_attlist_current EMPTY>
+<!ATTLIST Assembly_attlist_current value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Assembly_attlist_reference EMPTY>
+<!ATTLIST Assembly_attlist_reference value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Assembly_component (Component*)>
+
+<!ELEMENT Assembly_snpStat (
+        Assembly_snpStat_attlist, 
+        Assembly_snpStat_snpStat)>
+
+<!ELEMENT Assembly_snpStat_attlist (
+        Assembly_snpStat_attlist_mapWeight, 
+        Assembly_snpStat_attlist_chromCount?, 
+        Assembly_snpStat_attlist_placedContigCount?, 
+        Assembly_snpStat_attlist_unplacedContigCount?, 
+        Assembly_snpStat_attlist_seqlocCount?, 
+        Assembly_snpStat_attlist_hapCount?)>
+<!--
+summary measure of placement precision in the
+									assembly
+-->
+<!ELEMENT Assembly_snpStat_attlist_mapWeight %ENUM;>
+<!ATTLIST Assembly_snpStat_attlist_mapWeight value (
+        unmapped |
+        unique-in-contig |
+        two-hits-in-contig |
+        less-10-hits |
+        multiple-hits
+        ) #REQUIRED >
+
+
+<!--
+number of distinct chromosomes in the
+									mapset
+-->
+<!ELEMENT Assembly_snpStat_attlist_chromCount (%INTEGER;)>
+
+<!--
+number of distinct contigs [ gi |
+									accession[.version] ] in the mapset
+-->
+<!ELEMENT Assembly_snpStat_attlist_placedContigCount (%INTEGER;)>
+
+<!--
+number of sequence postions to a contig with
+									unknown chromosomal assignment
+-->
+<!ELEMENT Assembly_snpStat_attlist_unplacedContigCount (%INTEGER;)>
+
+<!--
+total number of sequence positions in the
+									mapset
+-->
+<!ELEMENT Assembly_snpStat_attlist_seqlocCount (%INTEGER;)>
+
+<!--
+Number of hits to alternative genomic haplotypes
+									(e.g. HLA DR region, KIR, or pseudo-autosomal regions like PAR)
+									within the assembly mapset. Note that positions on haplotypes
+									defined in other assemblies (a different assembly_group_label
+									value) will not be counted in this value.
+-->
+<!ELEMENT Assembly_snpStat_attlist_hapCount (%INTEGER;)>
+
+<!ELEMENT Assembly_snpStat_snpStat EMPTY>
+
+<!--
+URL value from dbSNP_main.BaseURL links table. attributes provide
+				context information and URL id that is referenced within individual refSNP
+				objects.
+-->
+<!ELEMENT BaseURL (
+        BaseURL_attlist, 
+        BaseURL_baseURL)>
+
+<!ELEMENT BaseURL_attlist (
+        BaseURL_attlist_urlId?, 
+        BaseURL_attlist_resourceName?, 
+        BaseURL_attlist_resourceId?)>
+
+<!--
+Resource identifier from
+								dbSNP_main.baseURL.
+-->
+<!ELEMENT BaseURL_attlist_urlId (%INTEGER;)>
+
+<!--Name of linked resource -->
+<!ELEMENT BaseURL_attlist_resourceName (#PCDATA)>
+
+<!--
+identifier expected by resource for
+								URL
+-->
+<!ELEMENT BaseURL_attlist_resourceId (#PCDATA)>
+
+<!--
+URL value from dbSNP_main.BaseURL links table. attributes provide
+				context information and URL id that is referenced within individual refSNP
+				objects.
+-->
+<!ELEMENT BaseURL_baseURL (#PCDATA)>
+
+
+<!ELEMENT Component (
+        Component_attlist, 
+        Component_mapLoc)>
+
+<!ELEMENT Component_attlist (
+        Component_attlist_componentType?, 
+        Component_attlist_ctgId?, 
+        Component_attlist_accession?, 
+        Component_attlist_name?, 
+        Component_attlist_chromosome?, 
+        Component_attlist_start?, 
+        Component_attlist_end?, 
+        Component_attlist_orientation?, 
+        Component_attlist_gi?, 
+        Component_attlist_groupTerm?, 
+        Component_attlist_contigLabel?)>
+<!--
+type of component: chromosome, contig, gene_region,
+						etc.
+-->
+<!ELEMENT Component_attlist_componentType %ENUM;>
+<!ATTLIST Component_attlist_componentType value (
+        contig |
+        mrna
+        ) #REQUIRED >
+
+
+<!--
+dbSNP contig_id used to join on contig hit / mapset data to
+						these assembly properties
+-->
+<!ELEMENT Component_attlist_ctgId (%INTEGER;)>
+
+<!--
+Accession[.version] for the sequence
+						component
+-->
+<!ELEMENT Component_attlist_accession (#PCDATA)>
+
+<!--
+contig name defined as either a submitter local id, element
+						of a whole genome assembly set, or internal NCBI local
+						id
+-->
+<!ELEMENT Component_attlist_name (#PCDATA)>
+
+<!--
+Organism appropriate chromosome tag, 'Un' reserved for
+						default case of unplaced components
+-->
+<!ELEMENT Component_attlist_chromosome (#PCDATA)>
+
+<!--
+component starting position on the chromosome (base 0
+						inclusive)
+-->
+<!ELEMENT Component_attlist_start (%INTEGER;)>
+
+<!--
+component ending position on the chromosome (base 0
+						inclusive)
+-->
+<!ELEMENT Component_attlist_end (%INTEGER;)>
+<!--
+orientation of this component to chromosome, forward (fwd) =
+						0, reverse (rev) = 1, unknown = NULL in
+						ContigInfo.orient.
+-->
+<!ELEMENT Component_attlist_orientation %ENUM;>
+<!ATTLIST Component_attlist_orientation value (
+        fwd |
+        rev |
+        unknown
+        ) #REQUIRED >
+
+
+<!--
+NCBI gi for component sequence (equivalent to
+						accession.version) for nucleotide sequence.
+-->
+<!ELEMENT Component_attlist_gi (#PCDATA)>
+
+<!--
+Identifier label for the genome assembly that defines the
+						contigs in this mapset and their placement within the organism genome.
+-->
+<!ELEMENT Component_attlist_groupTerm (#PCDATA)>
+
+<!--Display label for component -->
+<!ELEMENT Component_attlist_contigLabel (#PCDATA)>
+
+<!ELEMENT Component_mapLoc (MapLoc*)>
+
+<!--Set of dbSNP refSNP docsums, version 3.2 -->
+<!ELEMENT ExchangeSet (
+        ExchangeSet_attlist, 
+        ExchangeSet_sourceDatabase, 
+        ExchangeSet_rs?, 
+        ExchangeSet_assay?, 
+        ExchangeSet_query?, 
+        ExchangeSet_summary, 
+        ExchangeSet_baseURL)>
+
+<!ELEMENT ExchangeSet_attlist (
+        ExchangeSet_attlist_setType?, 
+        ExchangeSet_attlist_setDepth?, 
+        ExchangeSet_attlist_specVersion?, 
+        ExchangeSet_attlist_dbSnpBuild?, 
+        ExchangeSet_attlist_generated?)>
+
+<!--
+set-type: full dump; from query; single
+						refSNP
+-->
+<!ELEMENT ExchangeSet_attlist_setType (#PCDATA)>
+
+<!--
+content depth: brief XML (only refSNP properties and summary
+						subSNP element content); full XML (full refSNP, full subSNP content; all
+						flanking sequences) 
+-->
+<!ELEMENT ExchangeSet_attlist_setDepth (#PCDATA)>
+
+<!--
+version number of docsum.asn/docsum.dtd
+						specification
+-->
+<!ELEMENT ExchangeSet_attlist_specVersion (#PCDATA)>
+
+<!--build number of database for this export -->
+<!ELEMENT ExchangeSet_attlist_dbSnpBuild (%INTEGER;)>
+
+<!--Generated date -->
+<!ELEMENT ExchangeSet_attlist_generated (#PCDATA)>
+
+<!ELEMENT ExchangeSet_sourceDatabase (
+        ExchangeSet_sourceDatabase_attlist, 
+        ExchangeSet_sourceDatabase_sourceDatabase)>
+
+<!ELEMENT ExchangeSet_sourceDatabase_attlist (
+        ExchangeSet_sourceDatabase_attlist_taxId, 
+        ExchangeSet_sourceDatabase_attlist_organism, 
+        ExchangeSet_sourceDatabase_attlist_dbSnpOrgAbbr?, 
+        ExchangeSet_sourceDatabase_attlist_gpipeOrgAbbr?)>
+
+<!--
+NCBI taxonomy ID for
+									variation
+-->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_taxId (%INTEGER;)>
+
+<!--
+common name for species used as part of database
+									name.
+-->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_organism (#PCDATA)>
+
+<!--organism abbreviation used in dbSNP. -->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_dbSnpOrgAbbr (#PCDATA)>
+
+<!--
+organism abbreviation used within NCBI genome
+									pipeline data dumps.
+-->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_gpipeOrgAbbr (#PCDATA)>
+
+<!ELEMENT ExchangeSet_sourceDatabase_sourceDatabase EMPTY>
+
+<!ELEMENT ExchangeSet_rs (Rs*)>
+
+<!ELEMENT ExchangeSet_assay (Assay)>
+
+<!ELEMENT ExchangeSet_query (
+        ExchangeSet_query_attlist, 
+        ExchangeSet_query_query)>
+
+<!ELEMENT ExchangeSet_query_attlist (
+        ExchangeSet_query_attlist_date?, 
+        ExchangeSet_query_attlist_string?)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT ExchangeSet_query_attlist_date (#PCDATA)>
+
+<!--
+Query terms or search
+									constraints
+-->
+<!ELEMENT ExchangeSet_query_attlist_string (#PCDATA)>
+
+<!ELEMENT ExchangeSet_query_query EMPTY>
+
+<!ELEMENT ExchangeSet_summary (
+        ExchangeSet_summary_attlist, 
+        ExchangeSet_summary_summary)>
+
+<!ELEMENT ExchangeSet_summary_attlist (
+        ExchangeSet_summary_attlist_numRsIds?, 
+        ExchangeSet_summary_attlist_totalSeqLength?, 
+        ExchangeSet_summary_attlist_numContigHits?, 
+        ExchangeSet_summary_attlist_numGeneHits?, 
+        ExchangeSet_summary_attlist_numGiHits?, 
+        ExchangeSet_summary_attlist_num3dStructs?, 
+        ExchangeSet_summary_attlist_numAlleleFreqs?, 
+        ExchangeSet_summary_attlist_numStsHits?, 
+        ExchangeSet_summary_attlist_numUnigeneCids?)>
+
+<!--Total number of refsnp-ids in this exchange set -->
+<!ELEMENT ExchangeSet_summary_attlist_numRsIds (%INTEGER;)>
+
+<!--
+Total length of exemplar flanking
+									sequences
+-->
+<!ELEMENT ExchangeSet_summary_attlist_totalSeqLength (%INTEGER;)>
+
+<!--
+Total number of contig locations from
+									SNPContigLoc
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numContigHits (%INTEGER;)>
+
+<!--
+Total number of locus ids from
+									SNPContigLocusId
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numGeneHits (%INTEGER;)>
+
+<!--
+Total number of gi hits from
+									MapLink
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numGiHits (%INTEGER;)>
+
+<!--
+Total number of 3D structures from
+									SNP3D
+-->
+<!ELEMENT ExchangeSet_summary_attlist_num3dStructs (%INTEGER;)>
+
+<!--
+Total number of allele frequences from
+									SubPopAllele
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numAlleleFreqs (%INTEGER;)>
+
+<!--
+Total number of STS hits from
+									SnpInSts
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numStsHits (%INTEGER;)>
+
+<!--
+Total number of unigene cluster ids from
+									UnigeneSnp
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numUnigeneCids (%INTEGER;)>
+
+<!ELEMENT ExchangeSet_summary_summary EMPTY>
+
+<!ELEMENT ExchangeSet_baseURL (BaseURL*)>
+
+<!--
+functional relationship of SNP (and possibly alleles) to genes at
+				contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx
+				tables.
+-->
+<!ELEMENT FxnSet (
+        FxnSet_attlist, 
+        FxnSet_fxnSet)>
+
+<!ELEMENT FxnSet_attlist (
+        FxnSet_attlist_geneId?, 
+        FxnSet_attlist_symbol?, 
+        FxnSet_attlist_mrnaAcc?, 
+        FxnSet_attlist_mrnaVer?, 
+        FxnSet_attlist_protAcc?, 
+        FxnSet_attlist_protVer?, 
+        FxnSet_attlist_fxnClass?, 
+        FxnSet_attlist_readingFrame?, 
+        FxnSet_attlist_allele?, 
+        FxnSet_attlist_residue?, 
+        FxnSet_attlist_aaPosition?, 
+        FxnSet_attlist_mrnaPosition?)>
+
+<!--gene-id of gene as aligned to contig -->
+<!ELEMENT FxnSet_attlist_geneId (%INTEGER;)>
+
+<!--
+symbol (official if present in Entrez Gene) of
+						gene
+-->
+<!ELEMENT FxnSet_attlist_symbol (#PCDATA)>
+
+<!--mRNA accession if variation in transcript -->
+<!ELEMENT FxnSet_attlist_mrnaAcc (#PCDATA)>
+
+<!--
+mRNA sequence version if variation is in
+						transcripot
+-->
+<!ELEMENT FxnSet_attlist_mrnaVer (%INTEGER;)>
+
+<!--protein accession if variation in protein -->
+<!ELEMENT FxnSet_attlist_protAcc (#PCDATA)>
+
+<!--
+protein version if variation is in
+						protein
+-->
+<!ELEMENT FxnSet_attlist_protVer (%INTEGER;)>
+<!--
+variation in region of gene, but not in
+									transcript - deprecated
+-->
+<!ELEMENT FxnSet_attlist_fxnClass %ENUM;>
+<!ATTLIST FxnSet_attlist_fxnClass value (
+        locus-region |
+        coding-unknown |
+        coding-synonymous |
+        coding-nonsynonymous |
+        mrna-utr |
+        intron |
+        splice-site |
+        reference |
+        coding-exception |
+        synonymy-unknown |
+        gene-segment |
+        near-gene-3 |
+        near-gene-5 |
+        nonsense |
+        missense |
+        frameshift |
+        utr-3 |
+        utr-5 |
+        splice-3 |
+        splice-5 |
+        cds-indel |
+        stop-gain |
+        stop-loss
+        ) #REQUIRED >
+
+
+<!ELEMENT FxnSet_attlist_readingFrame (%INTEGER;)>
+
+<!--
+variation allele: * suffix indicates allele of contig at this
+						location
+-->
+<!ELEMENT FxnSet_attlist_allele (#PCDATA)>
+
+<!--translated amino acid residue for allele -->
+<!ELEMENT FxnSet_attlist_residue (#PCDATA)>
+
+<!--
+position of the variant residue in peptide
+						sequence
+-->
+<!ELEMENT FxnSet_attlist_aaPosition (%INTEGER;)>
+
+<!ELEMENT FxnSet_attlist_mrnaPosition (%INTEGER;)>
+
+<!--
+functional relationship of SNP (and possibly alleles) to genes at
+				contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx
+				tables.
+-->
+<!ELEMENT FxnSet_fxnSet EMPTY>
+
+<!--
+Position of a single hit of a variation on a
+				contig
+-->
+<!ELEMENT MapLoc (
+        MapLoc_attlist, 
+        MapLoc_fxnSet?)>
+
+<!ELEMENT MapLoc_attlist (
+        MapLoc_attlist_asnFrom, 
+        MapLoc_attlist_asnTo, 
+        MapLoc_attlist_locType, 
+        MapLoc_attlist_alnQuality?, 
+        MapLoc_attlist_orient?, 
+        MapLoc_attlist_physMapInt?, 
+        MapLoc_attlist_leftFlankNeighborPos?, 
+        MapLoc_attlist_rightFlankNeighborPos?, 
+        MapLoc_attlist_leftContigNeighborPos?, 
+        MapLoc_attlist_rightContigNeighborPos?, 
+        MapLoc_attlist_numberOfMismatches?, 
+        MapLoc_attlist_numberOfDeletions?, 
+        MapLoc_attlist_numberOfInsertions?)>
+
+<!--
+beginning of variation as feature on
+						contig
+-->
+<!ELEMENT MapLoc_attlist_asnFrom (%INTEGER;)>
+
+<!--
+end position of variation as feature on
+						contig
+-->
+<!ELEMENT MapLoc_attlist_asnTo (%INTEGER;)>
+<!--
+defines the seq-loc symbol if asn_from !=
+						asn_to
+-->
+<!ELEMENT MapLoc_attlist_locType %ENUM;>
+<!ATTLIST MapLoc_attlist_locType value (
+        insertion |
+        exact |
+        deletion |
+        range-ins |
+        range-exact |
+        range-del
+        ) #REQUIRED >
+
+
+<!--alignment qualiity -->
+<!ELEMENT MapLoc_attlist_alnQuality (%REAL;)>
+<!--
+orientation of refSNP sequence to contig
+						sequence
+-->
+<!ELEMENT MapLoc_attlist_orient %ENUM;>
+<!ATTLIST MapLoc_attlist_orient value (
+        forward |
+        reverse
+        ) #REQUIRED >
+
+
+<!--
+chromosome position as integer for
+						sorting
+-->
+<!ELEMENT MapLoc_attlist_physMapInt (%INTEGER;)>
+
+<!--
+nearest aligned position in 5' flanking sequence of
+						snp
+-->
+<!ELEMENT MapLoc_attlist_leftFlankNeighborPos (%INTEGER;)>
+
+<!--nearest aligned position in 3' flanking sequence of snp -->
+<!ELEMENT MapLoc_attlist_rightFlankNeighborPos (%INTEGER;)>
+
+<!--
+nearest aligned position in 5' contig alignment of
+						snp
+-->
+<!ELEMENT MapLoc_attlist_leftContigNeighborPos (%INTEGER;)>
+
+<!--
+nearest aligned position in 3' contig alignment of
+						snp
+-->
+<!ELEMENT MapLoc_attlist_rightContigNeighborPos (%INTEGER;)>
+
+<!--
+number of Mismatched positions in this
+						alignment
+-->
+<!ELEMENT MapLoc_attlist_numberOfMismatches (%INTEGER;)>
+
+<!--number of deletions in this alignment -->
+<!ELEMENT MapLoc_attlist_numberOfDeletions (%INTEGER;)>
+
+<!--number of insetions in this alignment -->
+<!ELEMENT MapLoc_attlist_numberOfInsertions (%INTEGER;)>
+
+<!ELEMENT MapLoc_fxnSet (FxnSet*)>
+
+
+<!ELEMENT PrimarySequence (
+        PrimarySequence_attlist, 
+        PrimarySequence_mapLoc)>
+
+<!ELEMENT PrimarySequence_attlist (
+        PrimarySequence_attlist_dbSnpBuild, 
+        PrimarySequence_attlist_gi, 
+        PrimarySequence_attlist_source?, 
+        PrimarySequence_attlist_accession?)>
+
+<!ELEMENT PrimarySequence_attlist_dbSnpBuild (%INTEGER;)>
+
+<!ELEMENT PrimarySequence_attlist_gi (%INTEGER;)>
+
+<!ELEMENT PrimarySequence_attlist_source %ENUM;>
+<!ATTLIST PrimarySequence_attlist_source value (
+        submitter |
+        blastmb |
+        xm |
+        remap |
+        hgvs
+        ) #REQUIRED >
+
+
+<!ELEMENT PrimarySequence_attlist_accession (#PCDATA)>
+
+<!ELEMENT PrimarySequence_mapLoc (MapLoc*)>
+
+<!--
+defines the docsum structure for refSNP clusters, where a refSNP
+				cluster (rs) is a grouping of individual dbSNP submissions that all refer to the
+				same variation. The refsnp provides a single unified record for annotation of NCBI
+				resources such as reference genome sequence.
+-->
+<!ELEMENT Rs (
+        Rs_attlist, 
+        Rs_het?, 
+        Rs_validation, 
+        Rs_create, 
+        Rs_update?, 
+        Rs_sequence, 
+        Rs_ss, 
+        Rs_assembly?, 
+        Rs_primarySequence?, 
+        Rs_rsStruct?, 
+        Rs_rsLinkout?, 
+        Rs_mergeHistory?, 
+        Rs_hgvs?)>
+
+<!ELEMENT Rs_attlist (
+        Rs_attlist_rsId, 
+        Rs_attlist_snpClass, 
+        Rs_attlist_snpType, 
+        Rs_attlist_molType, 
+        Rs_attlist_validProbMin?, 
+        Rs_attlist_validProbMax?, 
+        Rs_attlist_genotype?, 
+        Rs_attlist_bitField?, 
+        Rs_attlist_taxId?)>
+
+<!--refSNP (rs) number -->
+<!ELEMENT Rs_attlist_rsId (%INTEGER;)>
+
+<!ELEMENT Rs_attlist_snpClass %ENUM;>
+<!ATTLIST Rs_attlist_snpClass value (
+        snp |
+        in-del |
+        heterozygous |
+        microsatellite |
+        named-locus |
+        no-variation |
+        mixed |
+        multinucleotide-polymorphism
+        ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_snpType %ENUM;>
+<!ATTLIST Rs_attlist_snpType value (
+        notwithdrawn |
+        artifact |
+        gene-duplication |
+        duplicate-submission |
+        notspecified |
+        ambiguous-location |
+        low-map-quality
+        ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_molType %ENUM;>
+<!ATTLIST Rs_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro |
+        unknown
+        ) #REQUIRED >
+
+
+<!--
+minimum reported success rate of all submissions in
+						cluster
+-->
+<!ELEMENT Rs_attlist_validProbMin (%INTEGER;)>
+
+<!--
+maximum reported success rate of all submissions in
+						cluster
+-->
+<!ELEMENT Rs_attlist_validProbMax (%INTEGER;)>
+
+<!--
+at least one genotype reported for this
+						refSNP
+-->
+<!ELEMENT Rs_attlist_genotype EMPTY>
+<!ATTLIST Rs_attlist_genotype value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_bitField (#PCDATA)>
+
+<!ELEMENT Rs_attlist_taxId (%INTEGER;)>
+
+<!ELEMENT Rs_het (
+        Rs_het_attlist, 
+        Rs_het_het)>
+
+<!ELEMENT Rs_het_attlist (
+        Rs_het_attlist_type, 
+        Rs_het_attlist_value, 
+        Rs_het_attlist_stdError?)>
+<!--
+Est=Estimated average het from allele
+									frequencies, Obs=Observed from genotype data
+-->
+<!ELEMENT Rs_het_attlist_type %ENUM;>
+<!ATTLIST Rs_het_attlist_type value (
+        est |
+        obs
+        ) #REQUIRED >
+
+
+<!--Heterozygosity -->
+<!ELEMENT Rs_het_attlist_value (%REAL;)>
+
+<!--
+Standard error of Het
+									estimate
+-->
+<!ELEMENT Rs_het_attlist_stdError (%REAL;)>
+
+<!ELEMENT Rs_het_het EMPTY>
+
+<!ELEMENT Rs_validation (
+        Rs_validation_attlist, 
+        Rs_validation_otherPopBatchId?, 
+        Rs_validation_twoHit2AlleleBatchId?, 
+        Rs_validation_frequencyClass?, 
+        Rs_validation_hapMapPhase?, 
+        Rs_validation_tGPPhase?, 
+        Rs_validation_suspectEvidence?)>
+
+<!ELEMENT Rs_validation_attlist (
+        Rs_validation_attlist_byCluster?, 
+        Rs_validation_attlist_byFrequency?, 
+        Rs_validation_attlist_byOtherPop?, 
+        Rs_validation_attlist_by2Hit2Allele?, 
+        Rs_validation_attlist_byHapMap?, 
+        Rs_validation_attlist_by1000G?, 
+        Rs_validation_attlist_suspect?)>
+
+<!--
+at least one subsnp in cluster has frequency data
+									submitted
+-->
+<!ELEMENT Rs_validation_attlist_byCluster EMPTY>
+<!ATTLIST Rs_validation_attlist_byCluster value ( true | false ) #REQUIRED >
+
+
+<!--Validated by allele frequency -->
+<!ELEMENT Rs_validation_attlist_byFrequency EMPTY>
+<!ATTLIST Rs_validation_attlist_byFrequency value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_validation_attlist_byOtherPop EMPTY>
+<!ATTLIST Rs_validation_attlist_byOtherPop value ( true | false ) #REQUIRED >
+
+
+<!--
+cluster has 2+ submissions, with 1+ submissions
+									assayed with a non-computational method
+-->
+<!ELEMENT Rs_validation_attlist_by2Hit2Allele EMPTY>
+<!ATTLIST Rs_validation_attlist_by2Hit2Allele value ( true | false ) #REQUIRED >
+
+
+<!--Validated by HapMap Project -->
+<!ELEMENT Rs_validation_attlist_byHapMap EMPTY>
+<!ATTLIST Rs_validation_attlist_byHapMap value ( true | false ) #REQUIRED >
+
+
+<!--Validated by 1000 Genomes Project -->
+<!ELEMENT Rs_validation_attlist_by1000G EMPTY>
+<!ATTLIST Rs_validation_attlist_by1000G value ( true | false ) #REQUIRED >
+
+
+<!--Suspected to be false SNP -->
+<!ELEMENT Rs_validation_attlist_suspect EMPTY>
+<!ATTLIST Rs_validation_attlist_suspect value ( true | false ) #REQUIRED >
+
+
+<!--
+dbSNP batch-id's for other pop snp validation
+										data.
+-->
+<!ELEMENT Rs_validation_otherPopBatchId (Rs_validation_otherPopBatchId_E*)>
+
+
+<!ELEMENT Rs_validation_otherPopBatchId_E (%INTEGER;)>
+
+<!--
+dbSNP batch-id's for double-hit snp
+										validation data. Use batch-id to get methods,
+										etc.
+-->
+<!ELEMENT Rs_validation_twoHit2AlleleBatchId (Rs_validation_twoHit2AlleleBatchId_E*)>
+
+
+<!ELEMENT Rs_validation_twoHit2AlleleBatchId_E (%INTEGER;)>
+
+<!--
+Frequency validation class
+										(1) low frequency variation that is cited in journal and other reputable sources 
+										(2) greater than 5 percent minor allele freq in each and all populations 
+										(4) greater than 5 percent minor allele freq in 1+ populations  
+										(8) if the variant has 2+ minor allele count based on freq or genotype data
+										(16) less than 1 percent minor allele freq in each and all populations 
+										(32) less than 1 percent minor freq in 1+ populations
+-->
+<!ELEMENT Rs_validation_frequencyClass (Rs_validation_frequencyClass_E*)>
+
+
+<!ELEMENT Rs_validation_frequencyClass_E (%INTEGER;)>
+
+<!--
+alidated by HapMap Project
+										phase1-genotyped (1),  Phase 1 genotyped; filtered, non-redundant
+										phase2-genotyped (2),  Phase 2 genotyped; filtered, non-redundant
+										phase3-genotyped (4)   Phase 3 genotyped; filtered, non-redundant
+-->
+<!ELEMENT Rs_validation_hapMapPhase (Rs_validation_hapMapPhase_E*)>
+
+
+<!ELEMENT Rs_validation_hapMapPhase_E (%INTEGER;)>
+
+<!--
+Validated by 1000 Genomes Project (TGP)
+										pilot 1 (1),
+										pilot 2 (2),
+										pilot 3 (4)
+-->
+<!ELEMENT Rs_validation_tGPPhase (Rs_validation_tGPPhase_E*)>
+
+
+<!ELEMENT Rs_validation_tGPPhase_E (%INTEGER;)>
+
+<!--
+Suspected to be false SNP evidence
+										Single Nucleotide Difference - paralogous genes (1),
+										Genotype or base calling errors  (2),
+										Submission evidence or errors (4),
+										Others (8)
+-->
+<!ELEMENT Rs_validation_suspectEvidence (Rs_validation_suspectEvidence_E*)>
+
+
+<!ELEMENT Rs_validation_suspectEvidence_E (%INTEGER;)>
+<!--
+date the refsnp cluster was
+							instantiated
+date the refsnp cluster was
+							instantiated
+-->
+<!ELEMENT Rs_create (
+        Rs_create_attlist, 
+        Rs_create_create)>
+
+<!ELEMENT Rs_create_attlist (
+        Rs_create_attlist_build?, 
+        Rs_create_attlist_date?)>
+
+<!--
+build number when the cluster was
+									created
+-->
+<!ELEMENT Rs_create_attlist_build (%INTEGER;)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT Rs_create_attlist_date (#PCDATA)>
+
+<!--
+date the refsnp cluster was
+							instantiated
+-->
+<!ELEMENT Rs_create_create EMPTY>
+<!--
+most recent date the cluster was updated (member added or
+							deleted)
+most recent date the cluster was updated (member added or
+							deleted)
+-->
+<!ELEMENT Rs_update (
+        Rs_update_attlist, 
+        Rs_update_update)>
+
+<!ELEMENT Rs_update_attlist (
+        Rs_update_attlist_build?, 
+        Rs_update_attlist_date?)>
+
+<!--
+build number when the cluster was
+									updated
+-->
+<!ELEMENT Rs_update_attlist_build (%INTEGER;)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT Rs_update_attlist_date (#PCDATA)>
+
+<!--
+most recent date the cluster was updated (member added or
+							deleted)
+-->
+<!ELEMENT Rs_update_update EMPTY>
+
+<!ELEMENT Rs_sequence (
+        Rs_sequence_attlist, 
+        Rs_sequence_seq5?, 
+        Rs_sequence_observed, 
+        Rs_sequence_seq3?)>
+
+<!ELEMENT Rs_sequence_attlist (
+        Rs_sequence_attlist_exemplarSs, 
+        Rs_sequence_attlist_ancestralAllele?)>
+
+<!--
+dbSNP ss# selected as source of refSNP flanking
+									sequence, ss# part of ss-list below 
+-->
+<!ELEMENT Rs_sequence_attlist_exemplarSs (%INTEGER;)>
+
+<!ELEMENT Rs_sequence_attlist_ancestralAllele (#PCDATA)>
+
+<!--
+5' sequence that flanks the
+										variation
+-->
+<!ELEMENT Rs_sequence_seq5 (#PCDATA)>
+
+<!--
+list of all nucleotide alleles observed in
+										ss-list members, correcting for reverse complementation of
+										members reported in reverse orientation
+-->
+<!ELEMENT Rs_sequence_observed (#PCDATA)>
+
+<!--
+3' sequence that flanks the
+										variation
+-->
+<!ELEMENT Rs_sequence_seq3 (#PCDATA)>
+
+<!ELEMENT Rs_ss (Ss*)>
+
+<!ELEMENT Rs_assembly (Assembly*)>
+
+<!ELEMENT Rs_primarySequence (PrimarySequence*)>
+
+<!ELEMENT Rs_rsStruct (RsStruct*)>
+
+<!ELEMENT Rs_rsLinkout (RsLinkout*)>
+
+<!ELEMENT Rs_mergeHistory (Rs_mergeHistory_E*)>
+
+
+<!ELEMENT Rs_mergeHistory_E (
+        Rs_mergeHistory_E_attlist, 
+        Rs_mergeHistory_E_mergeHistory)>
+
+<!ELEMENT Rs_mergeHistory_E_attlist (
+        Rs_mergeHistory_E_attlist_rsId, 
+        Rs_mergeHistory_E_attlist_buildId?, 
+        Rs_mergeHistory_E_attlist_orientFlip?)>
+
+<!--
+previously issued rs id whose member assays have
+									now been merged
+-->
+<!ELEMENT Rs_mergeHistory_E_attlist_rsId (%INTEGER;)>
+
+<!--
+build id when rs id was merged into parent
+									rs
+-->
+<!ELEMENT Rs_mergeHistory_E_attlist_buildId (%INTEGER;)>
+
+<!--
+TRUE if strand of rs id is reverse to parent
+									object's current strand
+-->
+<!ELEMENT Rs_mergeHistory_E_attlist_orientFlip EMPTY>
+<!ATTLIST Rs_mergeHistory_E_attlist_orientFlip value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_mergeHistory_E_mergeHistory EMPTY>
+
+<!-- HGVS name list  -->
+<!ELEMENT Rs_hgvs (Rs_hgvs_E*)>
+
+
+<!ELEMENT Rs_hgvs_E (#PCDATA)>
+
+<!--link data for another resource -->
+<!ELEMENT RsLinkout (
+        RsLinkout_attlist, 
+        RsLinkout_rsLinkout)>
+
+<!ELEMENT RsLinkout_attlist (
+        RsLinkout_attlist_resourceId, 
+        RsLinkout_attlist_linkValue)>
+
+<!--BaseURLList.url_id -->
+<!ELEMENT RsLinkout_attlist_resourceId (#PCDATA)>
+
+<!--
+value to append to ResourceURL.base-url for complete
+						link
+-->
+<!ELEMENT RsLinkout_attlist_linkValue (#PCDATA)>
+
+<!--link data for another resource -->
+<!ELEMENT RsLinkout_rsLinkout EMPTY>
+
+<!--structure information for SNP -->
+<!ELEMENT RsStruct (
+        RsStruct_attlist, 
+        RsStruct_rsStruct)>
+
+<!ELEMENT RsStruct_attlist (
+        RsStruct_attlist_protAcc?, 
+        RsStruct_attlist_protGi?, 
+        RsStruct_attlist_protLoc?, 
+        RsStruct_attlist_protResidue?, 
+        RsStruct_attlist_rsResidue?, 
+        RsStruct_attlist_structGi?, 
+        RsStruct_attlist_structLoc?, 
+        RsStruct_attlist_structResidue?)>
+
+<!--accession of the protein with variation -->
+<!ELEMENT RsStruct_attlist_protAcc (#PCDATA)>
+
+<!--GI of the protein with variation -->
+<!ELEMENT RsStruct_attlist_protGi (%INTEGER;)>
+
+<!--
+position of the residue for the protein
+						GI
+-->
+<!ELEMENT RsStruct_attlist_protLoc (%INTEGER;)>
+
+<!--
+residue specified for protein at prot-loc
+						location
+-->
+<!ELEMENT RsStruct_attlist_protResidue (#PCDATA)>
+
+<!--
+alternative residue specified by variation
+						sequence
+-->
+<!ELEMENT RsStruct_attlist_rsResidue (#PCDATA)>
+
+<!--GI of the structure neighbor -->
+<!ELEMENT RsStruct_attlist_structGi (%INTEGER;)>
+
+<!--
+position of the residue for the structure
+						GI
+-->
+<!ELEMENT RsStruct_attlist_structLoc (%INTEGER;)>
+
+<!--
+residue specified for protein at struct-loc
+						location
+-->
+<!ELEMENT RsStruct_attlist_structResidue (#PCDATA)>
+
+<!--structure information for SNP -->
+<!ELEMENT RsStruct_rsStruct EMPTY>
+
+<!--data for an individual submission to dbSNP -->
+<!ELEMENT Ss (
+        Ss_attlist, 
+        Ss_sequence)>
+
+<!ELEMENT Ss_attlist (
+        Ss_attlist_ssId, 
+        Ss_attlist_handle, 
+        Ss_attlist_batchId, 
+        Ss_attlist_locSnpId?, 
+        Ss_attlist_subSnpClass?, 
+        Ss_attlist_orient?, 
+        Ss_attlist_strand?, 
+        Ss_attlist_molType?, 
+        Ss_attlist_buildId?, 
+        Ss_attlist_methodClass?, 
+        Ss_attlist_validated?, 
+        Ss_attlist_linkoutUrl?, 
+        Ss_attlist_ssAlias?, 
+        Ss_attlist_alleleOrigin?, 
+        Ss_attlist_clinicalSignificance?)>
+
+<!--dbSNP accession number for submission -->
+<!ELEMENT Ss_attlist_ssId (%INTEGER;)>
+
+<!--Tag for the submitting laboratory -->
+<!ELEMENT Ss_attlist_handle (#PCDATA)>
+
+<!--dbSNP number for batch submission -->
+<!ELEMENT Ss_attlist_batchId (%INTEGER;)>
+
+<!--submission (ss#) submitter ID -->
+<!ELEMENT Ss_attlist_locSnpId (#PCDATA)>
+<!--
+SubSNP classification by type of
+						variation
+-->
+<!ELEMENT Ss_attlist_subSnpClass %ENUM;>
+<!ATTLIST Ss_attlist_subSnpClass value (
+        snp |
+        in-del |
+        heterozygous |
+        microsatellite |
+        named-locus |
+        no-variation |
+        mixed |
+        multinucleotide-polymorphism
+        ) #REQUIRED >
+
+<!--
+orientation of refsnp cluster members to refsnp cluster
+						sequence
+-->
+<!ELEMENT Ss_attlist_orient %ENUM;>
+<!ATTLIST Ss_attlist_orient value (
+        forward |
+        reverse
+        ) #REQUIRED >
+
+<!--
+strand is defined as TOP/BOTTOM by nature of flanking
+						nucleotide sequence
+-->
+<!ELEMENT Ss_attlist_strand %ENUM;>
+<!ATTLIST Ss_attlist_strand value (
+        top |
+        bottom
+        ) #REQUIRED >
+
+
+<!--moltype from Batch table -->
+<!ELEMENT Ss_attlist_molType %ENUM;>
+<!ATTLIST Ss_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro |
+        unknown
+        ) #REQUIRED >
+
+
+<!--
+dbSNP build number when ss# was added to a refSNP (rs#)
+						cluster
+-->
+<!ELEMENT Ss_attlist_buildId (%INTEGER;)>
+<!--
+class of method used to assay for the
+						variation
+-->
+<!ELEMENT Ss_attlist_methodClass %ENUM;>
+<!ATTLIST Ss_attlist_methodClass value (
+        dHPLC |
+        hybridize |
+        computed |
+        sSCP |
+        other |
+        unknown |
+        rFLP |
+        sequence
+        ) #REQUIRED >
+
+<!--
+subsnp has been experimentally validated by
+									submitter
+-->
+<!ELEMENT Ss_attlist_validated %ENUM;>
+<!ATTLIST Ss_attlist_validated value (
+        by-submitter |
+        by-frequency |
+        by-cluster
+        ) #REQUIRED >
+
+
+<!--
+append loc-snp-id to this base URL to construct a pointer to
+						submitter data.
+-->
+<!ELEMENT Ss_attlist_linkoutUrl (#PCDATA)>
+
+<!ELEMENT Ss_attlist_ssAlias (#PCDATA)>
+
+<!ELEMENT Ss_attlist_alleleOrigin %ENUM;>
+<!ATTLIST Ss_attlist_alleleOrigin value (
+        unknown |
+        germline |
+        somatic |
+        inherited |
+        paternal |
+        maternal |
+        de-novo |
+        other
+        ) #REQUIRED >
+
+
+<!ELEMENT Ss_attlist_clinicalSignificance %ENUM;>
+<!ATTLIST Ss_attlist_clinicalSignificance value (
+        unknown |
+        untested |
+        non-pathogenic |
+        probable-non-pathogenic |
+        probable-pathogenic |
+        pathogenic |
+        other
+        ) #REQUIRED >
+
+
+<!ELEMENT Ss_sequence (
+        Ss_sequence_seq5?, 
+        Ss_sequence_observed, 
+        Ss_sequence_seq3?)>
+
+<!--
+5' sequence that flanks the
+										variation
+-->
+<!ELEMENT Ss_sequence_seq5 (#PCDATA)>
+
+<!--
+list of all nucleotide alleles observed in
+										ss-list members, correcting for reverse complementation of
+										memebers reported in reverse orientation
+-->
+<!ELEMENT Ss_sequence_observed (#PCDATA)>
+
+<!--
+3' sequence that flanks the
+										variation
+-->
+<!ELEMENT Ss_sequence_seq3 (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/Docsum_3_3.dtd b/code/lib/Bio/Entrez/DTDs/Docsum_3_3.dtd
new file mode 100644
index 0000000..36da5a0
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/Docsum_3_3.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "docsum.asn"
+     ::DATATOOL:: by application DATATOOL version 2.3.1
+     ::DATATOOL:: on 04/01/2011 23:04:41
+     ============================================ -->
+
+<!-- Docsum_3_3.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % Docsum_3_3_module PUBLIC "-//NCBI//Docsum 3 3 Module//EN" "Docsum_3_3.mod.dtd">
+%Docsum_3_3_module;
diff --git a/code/lib/Bio/Entrez/DTDs/Docsum_3_3.mod.dtd b/code/lib/Bio/Entrez/DTDs/Docsum_3_3.mod.dtd
new file mode 100644
index 0000000..c1c1169
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/Docsum_3_3.mod.dtd
@@ -0,0 +1,1585 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "docsum.asn"
+     ::DATATOOL:: by application DATATOOL version 2.3.1
+     ::DATATOOL:: on 04/01/2011 23:04:41
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "Docsum-3-3"
+================================================= -->
+
+<!--
+ ============================================
+ ::DATATOOL:: Generated from "docsum_3.3.xsd"
+ ::DATATOOL:: by application DATATOOL version 2.1.0
+ ::DATATOOL:: on 03/03/2011 12:34:38
+ ============================================
+ edited with XMLSPY v5 rel. 4 U (http://www.xmlspy.com) by Michael Kholodov (National Library of Medicine) 
+ edited with XMLSpy v2005 rel. 3 U (http://www.altova.com) by Michael Feolo (NCBI/NLM/NIH) 
+-->
+
+
+<!ELEMENT Assay (
+        Assay_attlist, 
+        Assay_method, 
+        Assay_taxonomy, 
+        Assay_strains?, 
+        Assay_comment?, 
+        Assay_citation?)>
+
+<!ELEMENT Assay_attlist (
+        Assay_attlist_handle?, 
+        Assay_attlist_batch?, 
+        Assay_attlist_batchId?, 
+        Assay_attlist_batchType?, 
+        Assay_attlist_molType?, 
+        Assay_attlist_sampleSize?, 
+        Assay_attlist_population?, 
+        Assay_attlist_linkoutUrl?)>
+
+<!ELEMENT Assay_attlist_handle (#PCDATA)>
+
+<!ELEMENT Assay_attlist_batch (#PCDATA)>
+
+<!ELEMENT Assay_attlist_batchId (%INTEGER;)>
+
+<!ELEMENT Assay_attlist_batchType %ENUM;>
+<!ATTLIST Assay_attlist_batchType value (
+        snpassay |
+        validation |
+        doublehit
+        ) #REQUIRED >
+
+
+<!ELEMENT Assay_attlist_molType %ENUM;>
+<!ATTLIST Assay_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro
+        ) #REQUIRED >
+
+
+<!ELEMENT Assay_attlist_sampleSize (%INTEGER;)>
+
+<!ELEMENT Assay_attlist_population (#PCDATA)>
+
+<!ELEMENT Assay_attlist_linkoutUrl (#PCDATA)>
+
+<!ELEMENT Assay_method (
+        Assay_method_eMethod?)>
+
+<!ELEMENT Assay_method_eMethod (
+        Assay_method_eMethod_attlist, 
+        Assay_method_eMethod_exception)>
+
+<!ELEMENT Assay_method_eMethod_attlist (
+        Assay_method_eMethod_attlist_name?, 
+        Assay_method_eMethod_attlist_id?)>
+
+<!--Submitters method identifier -->
+<!ELEMENT Assay_method_eMethod_attlist_name (#PCDATA)>
+
+<!--dbSNP method identifier -->
+<!ELEMENT Assay_method_eMethod_attlist_id (#PCDATA)>
+
+<!--
+description of deviation from/addition to
+										given method 
+-->
+<!ELEMENT Assay_method_eMethod_exception (#PCDATA)>
+
+<!ELEMENT Assay_taxonomy (
+        Assay_taxonomy_attlist, 
+        Assay_taxonomy_taxonomy)>
+
+<!ELEMENT Assay_taxonomy_attlist (
+        Assay_taxonomy_attlist_id, 
+        Assay_taxonomy_attlist_organism?)>
+
+<!--
+NCBI taxonomy ID for
+									variation
+-->
+<!ELEMENT Assay_taxonomy_attlist_id (%INTEGER;)>
+
+<!ELEMENT Assay_taxonomy_attlist_organism (#PCDATA)>
+
+<!ELEMENT Assay_taxonomy_taxonomy EMPTY>
+
+<!ELEMENT Assay_strains (Assay_strains_E*)>
+
+
+<!ELEMENT Assay_strains_E (#PCDATA)>
+
+<!ELEMENT Assay_comment (#PCDATA)>
+
+<!ELEMENT Assay_citation (Assay_citation_E*)>
+
+
+<!ELEMENT Assay_citation_E (#PCDATA)>
+
+<!--
+A collection of genome sequence records (curated gene regions (NG's),
+				contigs (NWNT's) and chromosomes (NC/AC's) produced by a genome sequence project.
+				Structure is populated from ContigInfo tables.
+-->
+<!ELEMENT Assembly (
+        Assembly_attlist, 
+        Assembly_component?, 
+        Assembly_snpStat)>
+
+<!ELEMENT Assembly_attlist (
+        Assembly_attlist_dbSnpBuild, 
+        Assembly_attlist_genomeBuild, 
+        Assembly_attlist_groupLabel?, 
+        Assembly_attlist_assemblySource?, 
+        Assembly_attlist_current?, 
+        Assembly_attlist_reference?)>
+
+<!--
+dbSNP build number defining the rsid set aligned to this
+						assembly
+-->
+<!ELEMENT Assembly_attlist_dbSnpBuild (%INTEGER;)>
+
+<!--
+assembly build number with possible 'subbuild' version
+						numbers to reflect updates in gene annotation (human e.g. 34_3, 35_1,
+						36_1)
+-->
+<!ELEMENT Assembly_attlist_genomeBuild (#PCDATA)>
+
+<!--
+High-level classification of the assembly to distinguish
+						reference projects from alternate solutions. GroupLabel field from
+						organism/build-specific ContigInfo tables. "reference" is occasionally used
+						as the preferred assembly; standards will converge as additional organism
+						genome projects are finished. Note that some organism assembly names include
+						extended characters like '~' and '/' that may be incompatible with OS
+						filename conventions.
+-->
+<!ELEMENT Assembly_attlist_groupLabel (#PCDATA)>
+
+<!--
+Name of the group(s) or organization(s) that generated the
+						assembly
+-->
+<!ELEMENT Assembly_attlist_assemblySource (#PCDATA)>
+
+<!--Marks the current genomic assembly -->
+<!ELEMENT Assembly_attlist_current EMPTY>
+<!ATTLIST Assembly_attlist_current value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Assembly_attlist_reference EMPTY>
+<!ATTLIST Assembly_attlist_reference value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Assembly_component (Component*)>
+
+<!ELEMENT Assembly_snpStat (
+        Assembly_snpStat_attlist, 
+        Assembly_snpStat_snpStat)>
+
+<!ELEMENT Assembly_snpStat_attlist (
+        Assembly_snpStat_attlist_mapWeight, 
+        Assembly_snpStat_attlist_chromCount?, 
+        Assembly_snpStat_attlist_placedContigCount?, 
+        Assembly_snpStat_attlist_unplacedContigCount?, 
+        Assembly_snpStat_attlist_seqlocCount?, 
+        Assembly_snpStat_attlist_hapCount?)>
+<!--
+summary measure of placement precision in the
+									assembly
+-->
+<!ELEMENT Assembly_snpStat_attlist_mapWeight %ENUM;>
+<!ATTLIST Assembly_snpStat_attlist_mapWeight value (
+        unmapped |
+        unique-in-contig |
+        two-hits-in-contig |
+        less-10-hits |
+        multiple-hits
+        ) #REQUIRED >
+
+
+<!--
+number of distinct chromosomes in the
+									mapset
+-->
+<!ELEMENT Assembly_snpStat_attlist_chromCount (%INTEGER;)>
+
+<!--
+number of distinct contigs [ gi |
+									accession[.version] ] in the mapset
+-->
+<!ELEMENT Assembly_snpStat_attlist_placedContigCount (%INTEGER;)>
+
+<!--
+number of sequence postions to a contig with
+									unknown chromosomal assignment
+-->
+<!ELEMENT Assembly_snpStat_attlist_unplacedContigCount (%INTEGER;)>
+
+<!--
+total number of sequence positions in the
+									mapset
+-->
+<!ELEMENT Assembly_snpStat_attlist_seqlocCount (%INTEGER;)>
+
+<!--
+Number of hits to alternative genomic haplotypes
+									(e.g. HLA DR region, KIR, or pseudo-autosomal regions like PAR)
+									within the assembly mapset. Note that positions on haplotypes
+									defined in other assemblies (a different assembly_group_label
+									value) will not be counted in this value.
+-->
+<!ELEMENT Assembly_snpStat_attlist_hapCount (%INTEGER;)>
+
+<!ELEMENT Assembly_snpStat_snpStat EMPTY>
+
+<!--
+URL value from dbSNP_main.BaseURL links table. attributes provide
+				context information and URL id that is referenced within individual refSNP
+				objects.
+-->
+<!ELEMENT BaseURL (
+        BaseURL_attlist, 
+        BaseURL_baseURL)>
+
+<!ELEMENT BaseURL_attlist (
+        BaseURL_attlist_urlId?, 
+        BaseURL_attlist_resourceName?, 
+        BaseURL_attlist_resourceId?)>
+
+<!--
+Resource identifier from
+								dbSNP_main.baseURL.
+-->
+<!ELEMENT BaseURL_attlist_urlId (%INTEGER;)>
+
+<!--Name of linked resource -->
+<!ELEMENT BaseURL_attlist_resourceName (#PCDATA)>
+
+<!--
+identifier expected by resource for
+								URL
+-->
+<!ELEMENT BaseURL_attlist_resourceId (#PCDATA)>
+
+<!--
+URL value from dbSNP_main.BaseURL links table. attributes provide
+				context information and URL id that is referenced within individual refSNP
+				objects.
+-->
+<!ELEMENT BaseURL_baseURL (#PCDATA)>
+
+
+<!ELEMENT Component (
+        Component_attlist, 
+        Component_mapLoc)>
+
+<!ELEMENT Component_attlist (
+        Component_attlist_componentType?, 
+        Component_attlist_ctgId?, 
+        Component_attlist_accession?, 
+        Component_attlist_name?, 
+        Component_attlist_chromosome?, 
+        Component_attlist_start?, 
+        Component_attlist_end?, 
+        Component_attlist_orientation?, 
+        Component_attlist_gi?, 
+        Component_attlist_groupTerm?, 
+        Component_attlist_contigLabel?)>
+<!--
+type of component: chromosome, contig, gene_region,
+						etc.
+-->
+<!ELEMENT Component_attlist_componentType %ENUM;>
+<!ATTLIST Component_attlist_componentType value (
+        contig |
+        mrna
+        ) #REQUIRED >
+
+
+<!--
+dbSNP contig_id used to join on contig hit / mapset data to
+						these assembly properties
+-->
+<!ELEMENT Component_attlist_ctgId (%INTEGER;)>
+
+<!--
+Accession[.version] for the sequence
+						component
+-->
+<!ELEMENT Component_attlist_accession (#PCDATA)>
+
+<!--
+contig name defined as either a submitter local id, element
+						of a whole genome assembly set, or internal NCBI local
+						id
+-->
+<!ELEMENT Component_attlist_name (#PCDATA)>
+
+<!--
+Organism appropriate chromosome tag, 'Un' reserved for
+						default case of unplaced components
+-->
+<!ELEMENT Component_attlist_chromosome (#PCDATA)>
+
+<!--
+component starting position on the chromosome (base 0
+						inclusive)
+-->
+<!ELEMENT Component_attlist_start (%INTEGER;)>
+
+<!--
+component ending position on the chromosome (base 0
+						inclusive)
+-->
+<!ELEMENT Component_attlist_end (%INTEGER;)>
+<!--
+orientation of this component to chromosome, forward (fwd) =
+						0, reverse (rev) = 1, unknown = NULL in
+						ContigInfo.orient.
+-->
+<!ELEMENT Component_attlist_orientation %ENUM;>
+<!ATTLIST Component_attlist_orientation value (
+        fwd |
+        rev |
+        unknown
+        ) #REQUIRED >
+
+
+<!--
+NCBI gi for component sequence (equivalent to
+						accession.version) for nucleotide sequence.
+-->
+<!ELEMENT Component_attlist_gi (#PCDATA)>
+
+<!--
+Identifier label for the genome assembly that defines the
+						contigs in this mapset and their placement within the organism genome.
+-->
+<!ELEMENT Component_attlist_groupTerm (#PCDATA)>
+
+<!--Display label for component -->
+<!ELEMENT Component_attlist_contigLabel (#PCDATA)>
+
+<!ELEMENT Component_mapLoc (MapLoc*)>
+
+<!--Set of dbSNP refSNP docsums, version 3.2 -->
+<!ELEMENT ExchangeSet (
+        ExchangeSet_attlist, 
+        ExchangeSet_sourceDatabase, 
+        ExchangeSet_rs?, 
+        ExchangeSet_assay?, 
+        ExchangeSet_query?, 
+        ExchangeSet_summary, 
+        ExchangeSet_baseURL)>
+
+<!ELEMENT ExchangeSet_attlist (
+        ExchangeSet_attlist_setType?, 
+        ExchangeSet_attlist_setDepth?, 
+        ExchangeSet_attlist_specVersion?, 
+        ExchangeSet_attlist_dbSnpBuild?, 
+        ExchangeSet_attlist_generated?)>
+
+<!--
+set-type: full dump; from query; single
+						refSNP
+-->
+<!ELEMENT ExchangeSet_attlist_setType (#PCDATA)>
+
+<!--
+content depth: brief XML (only refSNP properties and summary
+						subSNP element content); full XML (full refSNP, full subSNP content; all
+						flanking sequences) 
+-->
+<!ELEMENT ExchangeSet_attlist_setDepth (#PCDATA)>
+
+<!--
+version number of docsum.asn/docsum.dtd
+						specification
+-->
+<!ELEMENT ExchangeSet_attlist_specVersion (#PCDATA)>
+
+<!--build number of database for this export -->
+<!ELEMENT ExchangeSet_attlist_dbSnpBuild (%INTEGER;)>
+
+<!--Generated date -->
+<!ELEMENT ExchangeSet_attlist_generated (#PCDATA)>
+
+<!ELEMENT ExchangeSet_sourceDatabase (
+        ExchangeSet_sourceDatabase_attlist, 
+        ExchangeSet_sourceDatabase_sourceDatabase)>
+
+<!ELEMENT ExchangeSet_sourceDatabase_attlist (
+        ExchangeSet_sourceDatabase_attlist_taxId, 
+        ExchangeSet_sourceDatabase_attlist_organism, 
+        ExchangeSet_sourceDatabase_attlist_dbSnpOrgAbbr?, 
+        ExchangeSet_sourceDatabase_attlist_gpipeOrgAbbr?)>
+
+<!--
+NCBI taxonomy ID for
+									variation
+-->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_taxId (%INTEGER;)>
+
+<!--
+common name for species used as part of database
+									name.
+-->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_organism (#PCDATA)>
+
+<!--organism abbreviation used in dbSNP. -->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_dbSnpOrgAbbr (#PCDATA)>
+
+<!--
+organism abbreviation used within NCBI genome
+									pipeline data dumps.
+-->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_gpipeOrgAbbr (#PCDATA)>
+
+<!ELEMENT ExchangeSet_sourceDatabase_sourceDatabase EMPTY>
+
+<!ELEMENT ExchangeSet_rs (Rs*)>
+
+<!ELEMENT ExchangeSet_assay (Assay)>
+
+<!ELEMENT ExchangeSet_query (
+        ExchangeSet_query_attlist, 
+        ExchangeSet_query_query)>
+
+<!ELEMENT ExchangeSet_query_attlist (
+        ExchangeSet_query_attlist_date?, 
+        ExchangeSet_query_attlist_string?)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT ExchangeSet_query_attlist_date (#PCDATA)>
+
+<!--
+Query terms or search
+									constraints
+-->
+<!ELEMENT ExchangeSet_query_attlist_string (#PCDATA)>
+
+<!ELEMENT ExchangeSet_query_query EMPTY>
+
+<!ELEMENT ExchangeSet_summary (
+        ExchangeSet_summary_attlist, 
+        ExchangeSet_summary_summary)>
+
+<!ELEMENT ExchangeSet_summary_attlist (
+        ExchangeSet_summary_attlist_numRsIds?, 
+        ExchangeSet_summary_attlist_totalSeqLength?, 
+        ExchangeSet_summary_attlist_numContigHits?, 
+        ExchangeSet_summary_attlist_numGeneHits?, 
+        ExchangeSet_summary_attlist_numGiHits?, 
+        ExchangeSet_summary_attlist_num3dStructs?, 
+        ExchangeSet_summary_attlist_numAlleleFreqs?, 
+        ExchangeSet_summary_attlist_numStsHits?, 
+        ExchangeSet_summary_attlist_numUnigeneCids?)>
+
+<!--Total number of refsnp-ids in this exchange set -->
+<!ELEMENT ExchangeSet_summary_attlist_numRsIds (%INTEGER;)>
+
+<!--
+Total length of exemplar flanking
+									sequences
+-->
+<!ELEMENT ExchangeSet_summary_attlist_totalSeqLength (%INTEGER;)>
+
+<!--
+Total number of contig locations from
+									SNPContigLoc
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numContigHits (%INTEGER;)>
+
+<!--
+Total number of locus ids from
+									SNPContigLocusId
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numGeneHits (%INTEGER;)>
+
+<!--
+Total number of gi hits from
+									MapLink
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numGiHits (%INTEGER;)>
+
+<!--
+Total number of 3D structures from
+									SNP3D
+-->
+<!ELEMENT ExchangeSet_summary_attlist_num3dStructs (%INTEGER;)>
+
+<!--
+Total number of allele frequences from
+									SubPopAllele
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numAlleleFreqs (%INTEGER;)>
+
+<!--
+Total number of STS hits from
+									SnpInSts
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numStsHits (%INTEGER;)>
+
+<!--
+Total number of unigene cluster ids from
+									UnigeneSnp
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numUnigeneCids (%INTEGER;)>
+
+<!ELEMENT ExchangeSet_summary_summary EMPTY>
+
+<!ELEMENT ExchangeSet_baseURL (BaseURL*)>
+
+<!--
+functional relationship of SNP (and possibly alleles) to genes at
+				contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx
+				tables.
+-->
+<!ELEMENT FxnSet (
+        FxnSet_attlist, 
+        FxnSet_fxnSet)>
+
+<!ELEMENT FxnSet_attlist (
+        FxnSet_attlist_geneId?, 
+        FxnSet_attlist_symbol?, 
+        FxnSet_attlist_mrnaAcc?, 
+        FxnSet_attlist_mrnaVer?, 
+        FxnSet_attlist_protAcc?, 
+        FxnSet_attlist_protVer?, 
+        FxnSet_attlist_fxnClass?, 
+        FxnSet_attlist_readingFrame?, 
+        FxnSet_attlist_allele?, 
+        FxnSet_attlist_residue?, 
+        FxnSet_attlist_aaPosition?, 
+        FxnSet_attlist_mrnaPosition?)>
+
+<!--gene-id of gene as aligned to contig -->
+<!ELEMENT FxnSet_attlist_geneId (%INTEGER;)>
+
+<!--
+symbol (official if present in Entrez Gene) of
+						gene
+-->
+<!ELEMENT FxnSet_attlist_symbol (#PCDATA)>
+
+<!--mRNA accession if variation in transcript -->
+<!ELEMENT FxnSet_attlist_mrnaAcc (#PCDATA)>
+
+<!--
+mRNA sequence version if variation is in
+						transcripot
+-->
+<!ELEMENT FxnSet_attlist_mrnaVer (%INTEGER;)>
+
+<!--protein accession if variation in protein -->
+<!ELEMENT FxnSet_attlist_protAcc (#PCDATA)>
+
+<!--
+protein version if variation is in
+						protein
+-->
+<!ELEMENT FxnSet_attlist_protVer (%INTEGER;)>
+<!--
+variation in region of gene, but not in
+										transcript - deprecated
+-->
+<!ELEMENT FxnSet_attlist_fxnClass %ENUM;>
+<!ATTLIST FxnSet_attlist_fxnClass value (
+        locus-region |
+        coding-unknown |
+        synonymous-codon |
+        non-synonymous-codon |
+        mrna-utr |
+        intron-variant |
+        splice-region-variant |
+        reference |
+        coding-exception |
+        coding-sequence-variant |
+        nc-transcript-variant |
+        downstream-variant-500B |
+        upstream-variant-2KB |
+        stop-gained |
+        missense |
+        frameshift-variant |
+        utr-variant-3-prime |
+        upstream-variant-5KB |
+        splice-acceptor-variant |
+        splice-donor-variant |
+        cds-indel |
+        downstream-variant-5KB |
+        complex-change-in-transcript |
+        stop-lost |
+        incomplete-terminal-codon-variant |
+        nmd-transcript-variant |
+        mature-miRNA-variant
+        ) #REQUIRED >
+
+
+<!ELEMENT FxnSet_attlist_readingFrame (%INTEGER;)>
+
+<!--
+variation allele: * suffix indicates allele of contig at this
+						location
+-->
+<!ELEMENT FxnSet_attlist_allele (#PCDATA)>
+
+<!--translated amino acid residue for allele -->
+<!ELEMENT FxnSet_attlist_residue (#PCDATA)>
+
+<!--
+position of the variant residue in peptide
+						sequence
+-->
+<!ELEMENT FxnSet_attlist_aaPosition (%INTEGER;)>
+
+<!ELEMENT FxnSet_attlist_mrnaPosition (%INTEGER;)>
+
+<!--
+functional relationship of SNP (and possibly alleles) to genes at
+				contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx
+				tables.
+-->
+<!ELEMENT FxnSet_fxnSet EMPTY>
+
+<!--
+Position of a single hit of a variation on a
+				contig
+-->
+<!ELEMENT MapLoc (
+        MapLoc_attlist, 
+        MapLoc_fxnSet?)>
+
+<!ELEMENT MapLoc_attlist (
+        MapLoc_attlist_asnFrom, 
+        MapLoc_attlist_asnTo, 
+        MapLoc_attlist_locType, 
+        MapLoc_attlist_alnQuality?, 
+        MapLoc_attlist_orient?, 
+        MapLoc_attlist_physMapInt?, 
+        MapLoc_attlist_leftFlankNeighborPos?, 
+        MapLoc_attlist_rightFlankNeighborPos?, 
+        MapLoc_attlist_leftContigNeighborPos?, 
+        MapLoc_attlist_rightContigNeighborPos?, 
+        MapLoc_attlist_numberOfMismatches?, 
+        MapLoc_attlist_numberOfDeletions?, 
+        MapLoc_attlist_numberOfInsertions?)>
+
+<!--
+beginning of variation as feature on
+						contig
+-->
+<!ELEMENT MapLoc_attlist_asnFrom (%INTEGER;)>
+
+<!--
+end position of variation as feature on
+						contig
+-->
+<!ELEMENT MapLoc_attlist_asnTo (%INTEGER;)>
+<!--
+defines the seq-loc symbol if asn_from !=
+						asn_to
+-->
+<!ELEMENT MapLoc_attlist_locType %ENUM;>
+<!ATTLIST MapLoc_attlist_locType value (
+        insertion |
+        exact |
+        deletion |
+        range-ins |
+        range-exact |
+        range-del
+        ) #REQUIRED >
+
+
+<!--alignment qualiity -->
+<!ELEMENT MapLoc_attlist_alnQuality (%REAL;)>
+<!--
+orientation of refSNP sequence to contig
+						sequence
+-->
+<!ELEMENT MapLoc_attlist_orient %ENUM;>
+<!ATTLIST MapLoc_attlist_orient value (
+        forward |
+        reverse
+        ) #REQUIRED >
+
+
+<!--
+chromosome position as integer for
+						sorting
+-->
+<!ELEMENT MapLoc_attlist_physMapInt (%INTEGER;)>
+
+<!--
+nearest aligned position in 5' flanking sequence of
+						snp
+-->
+<!ELEMENT MapLoc_attlist_leftFlankNeighborPos (%INTEGER;)>
+
+<!--nearest aligned position in 3' flanking sequence of snp -->
+<!ELEMENT MapLoc_attlist_rightFlankNeighborPos (%INTEGER;)>
+
+<!--
+nearest aligned position in 5' contig alignment of
+						snp
+-->
+<!ELEMENT MapLoc_attlist_leftContigNeighborPos (%INTEGER;)>
+
+<!--
+nearest aligned position in 3' contig alignment of
+						snp
+-->
+<!ELEMENT MapLoc_attlist_rightContigNeighborPos (%INTEGER;)>
+
+<!--
+number of Mismatched positions in this
+						alignment
+-->
+<!ELEMENT MapLoc_attlist_numberOfMismatches (%INTEGER;)>
+
+<!--number of deletions in this alignment -->
+<!ELEMENT MapLoc_attlist_numberOfDeletions (%INTEGER;)>
+
+<!--number of insetions in this alignment -->
+<!ELEMENT MapLoc_attlist_numberOfInsertions (%INTEGER;)>
+
+<!ELEMENT MapLoc_fxnSet (FxnSet*)>
+
+
+<!ELEMENT PrimarySequence (
+        PrimarySequence_attlist, 
+        PrimarySequence_mapLoc)>
+
+<!ELEMENT PrimarySequence_attlist (
+        PrimarySequence_attlist_dbSnpBuild, 
+        PrimarySequence_attlist_gi, 
+        PrimarySequence_attlist_source?, 
+        PrimarySequence_attlist_accession?)>
+
+<!ELEMENT PrimarySequence_attlist_dbSnpBuild (%INTEGER;)>
+
+<!ELEMENT PrimarySequence_attlist_gi (%INTEGER;)>
+
+<!ELEMENT PrimarySequence_attlist_source %ENUM;>
+<!ATTLIST PrimarySequence_attlist_source value (
+        submitter |
+        blastmb |
+        xm |
+        remap |
+        hgvs
+        ) #REQUIRED >
+
+
+<!ELEMENT PrimarySequence_attlist_accession (#PCDATA)>
+
+<!ELEMENT PrimarySequence_mapLoc (MapLoc*)>
+
+<!--
+defines the docsum structure for refSNP clusters, where a refSNP
+				cluster (rs) is a grouping of individual dbSNP submissions that all refer to the
+				same variation. The refsnp provides a single unified record for annotation of NCBI
+				resources such as reference genome sequence.
+-->
+<!ELEMENT Rs (
+        Rs_attlist, 
+        Rs_het?, 
+        Rs_validation, 
+        Rs_create, 
+        Rs_update?, 
+        Rs_sequence, 
+        Rs_ss, 
+        Rs_assembly?, 
+        Rs_primarySequence?, 
+        Rs_rsStruct?, 
+        Rs_rsLinkout?, 
+        Rs_mergeHistory?, 
+        Rs_hgvs?, 
+        Rs_alleleOrigin?, 
+        Rs_phenotype?, 
+        Rs_bioSource?, 
+        Rs_frequency?)>
+
+<!ELEMENT Rs_attlist (
+        Rs_attlist_rsId, 
+        Rs_attlist_snpClass, 
+        Rs_attlist_snpType, 
+        Rs_attlist_molType, 
+        Rs_attlist_validProbMin?, 
+        Rs_attlist_validProbMax?, 
+        Rs_attlist_genotype?, 
+        Rs_attlist_bitField?, 
+        Rs_attlist_taxId?)>
+
+<!--refSNP (rs) number -->
+<!ELEMENT Rs_attlist_rsId (%INTEGER;)>
+
+<!ELEMENT Rs_attlist_snpClass %ENUM;>
+<!ATTLIST Rs_attlist_snpClass value (
+        snp |
+        in-del |
+        heterozygous |
+        microsatellite |
+        named-locus |
+        no-variation |
+        mixed |
+        multinucleotide-polymorphism
+        ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_snpType %ENUM;>
+<!ATTLIST Rs_attlist_snpType value (
+        notwithdrawn |
+        artifact |
+        gene-duplication |
+        duplicate-submission |
+        notspecified |
+        ambiguous-location |
+        low-map-quality
+        ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_molType %ENUM;>
+<!ATTLIST Rs_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro |
+        unknown
+        ) #REQUIRED >
+
+
+<!--
+minimum reported success rate of all submissions in
+						cluster
+-->
+<!ELEMENT Rs_attlist_validProbMin (%INTEGER;)>
+
+<!--
+maximum reported success rate of all submissions in
+						cluster
+-->
+<!ELEMENT Rs_attlist_validProbMax (%INTEGER;)>
+
+<!--
+at least one genotype reported for this
+						refSNP
+-->
+<!ELEMENT Rs_attlist_genotype EMPTY>
+<!ATTLIST Rs_attlist_genotype value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_bitField (#PCDATA)>
+
+<!ELEMENT Rs_attlist_taxId (%INTEGER;)>
+
+<!ELEMENT Rs_het (
+        Rs_het_attlist, 
+        Rs_het_het)>
+
+<!ELEMENT Rs_het_attlist (
+        Rs_het_attlist_type, 
+        Rs_het_attlist_value, 
+        Rs_het_attlist_stdError?)>
+<!--
+Est=Estimated average het from allele
+									frequencies, Obs=Observed from genotype data
+-->
+<!ELEMENT Rs_het_attlist_type %ENUM;>
+<!ATTLIST Rs_het_attlist_type value (
+        est |
+        obs
+        ) #REQUIRED >
+
+
+<!--Heterozygosity -->
+<!ELEMENT Rs_het_attlist_value (%REAL;)>
+
+<!--
+Standard error of Het
+									estimate
+-->
+<!ELEMENT Rs_het_attlist_stdError (%REAL;)>
+
+<!ELEMENT Rs_het_het EMPTY>
+
+<!ELEMENT Rs_validation (
+        Rs_validation_attlist, 
+        Rs_validation_otherPopBatchId?, 
+        Rs_validation_twoHit2AlleleBatchId?, 
+        Rs_validation_frequencyClass?, 
+        Rs_validation_hapMapPhase?, 
+        Rs_validation_tGPPhase?, 
+        Rs_validation_suspectEvidence?)>
+
+<!ELEMENT Rs_validation_attlist (
+        Rs_validation_attlist_byCluster?, 
+        Rs_validation_attlist_byFrequency?, 
+        Rs_validation_attlist_byOtherPop?, 
+        Rs_validation_attlist_by2Hit2Allele?, 
+        Rs_validation_attlist_byHapMap?, 
+        Rs_validation_attlist_by1000G?, 
+        Rs_validation_attlist_suspect?)>
+
+<!--
+at least one subsnp in cluster has frequency data
+									submitted
+-->
+<!ELEMENT Rs_validation_attlist_byCluster EMPTY>
+<!ATTLIST Rs_validation_attlist_byCluster value ( true | false ) #REQUIRED >
+
+
+<!--Validated by allele frequency -->
+<!ELEMENT Rs_validation_attlist_byFrequency EMPTY>
+<!ATTLIST Rs_validation_attlist_byFrequency value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_validation_attlist_byOtherPop EMPTY>
+<!ATTLIST Rs_validation_attlist_byOtherPop value ( true | false ) #REQUIRED >
+
+
+<!--
+cluster has 2+ submissions, with 1+ submissions
+									assayed with a non-computational method
+-->
+<!ELEMENT Rs_validation_attlist_by2Hit2Allele EMPTY>
+<!ATTLIST Rs_validation_attlist_by2Hit2Allele value ( true | false ) #REQUIRED >
+
+
+<!--Validated by HapMap Project  -->
+<!ELEMENT Rs_validation_attlist_byHapMap EMPTY>
+<!ATTLIST Rs_validation_attlist_byHapMap value ( true | false ) #REQUIRED >
+
+
+<!--Validated by 1000 Genomes Project -->
+<!ELEMENT Rs_validation_attlist_by1000G EMPTY>
+<!ATTLIST Rs_validation_attlist_by1000G value ( true | false ) #REQUIRED >
+
+
+<!--Suspected to be false SNP -->
+<!ELEMENT Rs_validation_attlist_suspect EMPTY>
+<!ATTLIST Rs_validation_attlist_suspect value ( true | false ) #REQUIRED >
+
+
+<!--
+dbSNP batch-id's for other pop snp validation
+										data.
+-->
+<!ELEMENT Rs_validation_otherPopBatchId (Rs_validation_otherPopBatchId_E*)>
+
+
+<!ELEMENT Rs_validation_otherPopBatchId_E (%INTEGER;)>
+
+<!--
+dbSNP batch-id's for double-hit snp
+										validation data. Use batch-id to get methods, etc.
+-->
+<!ELEMENT Rs_validation_twoHit2AlleleBatchId (Rs_validation_twoHit2AlleleBatchId_E*)>
+
+
+<!ELEMENT Rs_validation_twoHit2AlleleBatchId_E (%INTEGER;)>
+
+<!--
+Frequency validation class (1) low frequency
+										variation that is cited in journal and other reputable
+										sources (2) greater than 5 percent minor allele freq in each
+										and all populations (4) greater than 5 percent minor allele
+										freq in 1+ populations (8) if the variant has 2+ minor
+										allele count based on freq or genotype data (16) less than 1
+										percent minor allele freq in each and all populations (32)
+										less than 1 percent minor freq in 1+ populations
+-->
+<!ELEMENT Rs_validation_frequencyClass (Rs_validation_frequencyClass_E*)>
+
+
+<!ELEMENT Rs_validation_frequencyClass_E (%INTEGER;)>
+
+<!--
+alidated by HapMap Project phase1-genotyped
+										(1), Phase 1 genotyped; filtered, non-redundant
+										phase2-genotyped (2), Phase 2 genotyped; filtered,
+										non-redundant phase3-genotyped (4) Phase 3 genotyped;
+										filtered, non-redundant 
+-->
+<!ELEMENT Rs_validation_hapMapPhase (Rs_validation_hapMapPhase_E*)>
+
+
+<!ELEMENT Rs_validation_hapMapPhase_E (%INTEGER;)>
+
+<!--
+Validated by 1000 Genomes Project (TGP) pilot
+										1 (1), pilot 2 (2), pilot 3 (4) 
+-->
+<!ELEMENT Rs_validation_tGPPhase (Rs_validation_tGPPhase_E*)>
+
+
+<!ELEMENT Rs_validation_tGPPhase_E (%INTEGER;)>
+
+<!--
+Suspected to be false SNP evidence Single
+										Nucleotide Difference - paralogous genes (1), Genotype or
+										base calling errors (2), Submission evidence or errors (4),
+										Others (8) 
+-->
+<!ELEMENT Rs_validation_suspectEvidence (Rs_validation_suspectEvidence_E*)>
+
+
+<!ELEMENT Rs_validation_suspectEvidence_E (#PCDATA)>
+<!--
+date the refsnp cluster was
+							instantiated
+date the refsnp cluster was
+							instantiated
+-->
+<!ELEMENT Rs_create (
+        Rs_create_attlist, 
+        Rs_create_create)>
+
+<!ELEMENT Rs_create_attlist (
+        Rs_create_attlist_build?, 
+        Rs_create_attlist_date?)>
+
+<!--
+build number when the cluster was
+									created
+-->
+<!ELEMENT Rs_create_attlist_build (%INTEGER;)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT Rs_create_attlist_date (#PCDATA)>
+
+<!--
+date the refsnp cluster was
+							instantiated
+-->
+<!ELEMENT Rs_create_create EMPTY>
+<!--
+most recent date the cluster was updated (member added or
+							deleted)
+most recent date the cluster was updated (member added or
+							deleted)
+-->
+<!ELEMENT Rs_update (
+        Rs_update_attlist, 
+        Rs_update_update)>
+
+<!ELEMENT Rs_update_attlist (
+        Rs_update_attlist_build?, 
+        Rs_update_attlist_date?)>
+
+<!--
+build number when the cluster was
+									updated
+-->
+<!ELEMENT Rs_update_attlist_build (%INTEGER;)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT Rs_update_attlist_date (#PCDATA)>
+
+<!--
+most recent date the cluster was updated (member added or
+							deleted)
+-->
+<!ELEMENT Rs_update_update EMPTY>
+
+<!ELEMENT Rs_sequence (
+        Rs_sequence_attlist, 
+        Rs_sequence_seq5?, 
+        Rs_sequence_observed, 
+        Rs_sequence_seq3?)>
+
+<!ELEMENT Rs_sequence_attlist (
+        Rs_sequence_attlist_exemplarSs, 
+        Rs_sequence_attlist_ancestralAllele?)>
+
+<!--
+dbSNP ss# selected as source of refSNP flanking
+									sequence, ss# part of ss-list below 
+-->
+<!ELEMENT Rs_sequence_attlist_exemplarSs (%INTEGER;)>
+
+<!ELEMENT Rs_sequence_attlist_ancestralAllele (#PCDATA)>
+
+<!--
+5' sequence that flanks the
+										variation
+-->
+<!ELEMENT Rs_sequence_seq5 (#PCDATA)>
+
+<!--
+list of all nucleotide alleles observed in
+										ss-list members, correcting for reverse complementation of
+										members reported in reverse orientation
+-->
+<!ELEMENT Rs_sequence_observed (#PCDATA)>
+
+<!--
+3' sequence that flanks the
+										variation
+-->
+<!ELEMENT Rs_sequence_seq3 (#PCDATA)>
+
+<!ELEMENT Rs_ss (Ss*)>
+
+<!ELEMENT Rs_assembly (Assembly*)>
+
+<!ELEMENT Rs_primarySequence (PrimarySequence*)>
+
+<!ELEMENT Rs_rsStruct (RsStruct*)>
+
+<!ELEMENT Rs_rsLinkout (RsLinkout*)>
+
+<!ELEMENT Rs_mergeHistory (Rs_mergeHistory_E*)>
+
+
+<!ELEMENT Rs_mergeHistory_E (
+        Rs_mergeHistory_E_attlist, 
+        Rs_mergeHistory_E_mergeHistory)>
+
+<!ELEMENT Rs_mergeHistory_E_attlist (
+        Rs_mergeHistory_E_attlist_rsId, 
+        Rs_mergeHistory_E_attlist_buildId?, 
+        Rs_mergeHistory_E_attlist_orientFlip?)>
+
+<!--
+previously issued rs id whose member assays have
+									now been merged
+-->
+<!ELEMENT Rs_mergeHistory_E_attlist_rsId (%INTEGER;)>
+
+<!--
+build id when rs id was merged into parent
+									rs
+-->
+<!ELEMENT Rs_mergeHistory_E_attlist_buildId (%INTEGER;)>
+
+<!--
+TRUE if strand of rs id is reverse to parent
+									object's current strand
+-->
+<!ELEMENT Rs_mergeHistory_E_attlist_orientFlip EMPTY>
+<!ATTLIST Rs_mergeHistory_E_attlist_orientFlip value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_mergeHistory_E_mergeHistory EMPTY>
+
+<!-- HGVS name list  -->
+<!ELEMENT Rs_hgvs (Rs_hgvs_E*)>
+
+
+<!ELEMENT Rs_hgvs_E (#PCDATA)>
+<!--
+							origin of this allele, if known
+							note that these are powers-of-two, and represent bits; thus, we can
+							represent more than one state simultaneously through a bitwise OR
+							unknown         (0),
+							germline        (1),
+							somatic         (2),
+							inherited       (4),
+							paternal        (8),
+							maternal        (16),
+							de-novo         (32),
+							biparental      (64),
+							uniparental     (128),
+							not-tested      (256),
+							tested-inconclusive (512),
+							origin of this allele, if known
+							note that these are powers-of-two, and represent bits; thus, we can
+							represent more than one state simultaneously through a bitwise OR
+							unknown         (0),
+							germline        (1),
+							somatic         (2),
+							inherited       (4),
+							paternal        (8),
+							maternal        (16),
+							de-novo         (32),
+							biparental      (64),
+							uniparental     (128),
+							not-tested      (256),
+							tested-inconclusive (512),
+-->
+<!ELEMENT Rs_alleleOrigin (Rs_alleleOrigin_E*)>
+
+
+<!ELEMENT Rs_alleleOrigin_E (
+        Rs_alleleOrigin_E_attlist, 
+        Rs_alleleOrigin_E_alleleOrigin)>
+
+<!ELEMENT Rs_alleleOrigin_E_attlist (
+        Rs_alleleOrigin_E_attlist_allele?)>
+
+<!ELEMENT Rs_alleleOrigin_E_attlist_allele (#PCDATA)>
+
+<!--
+							origin of this allele, if known
+							note that these are powers-of-two, and represent bits; thus, we can
+							represent more than one state simultaneously through a bitwise OR
+							unknown         (0),
+							germline        (1),
+							somatic         (2),
+							inherited       (4),
+							paternal        (8),
+							maternal        (16),
+							de-novo         (32),
+							biparental      (64),
+							uniparental     (128),
+							not-tested      (256),
+							tested-inconclusive (512),
+-->
+<!ELEMENT Rs_alleleOrigin_E_alleleOrigin (%INTEGER;)>
+
+<!ELEMENT Rs_phenotype (Rs_phenotype_E*)>
+
+
+<!ELEMENT Rs_phenotype_E (
+        Rs_phenotype_E_clinicalSignificance?)>
+
+<!--
+										unknown                 (0),
+										untested                (1),
+										non-pathogenic          (2),
+										probable-non-pathogenic (3),
+										probable-pathogenic     (4),
+										pathogenic              (5),
+										drug response           (6),
+										other                   (255)
+-->
+<!ELEMENT Rs_phenotype_E_clinicalSignificance (Rs_phenotype_E_clinicalSignificance_E*)>
+
+
+<!ELEMENT Rs_phenotype_E_clinicalSignificance_E (#PCDATA)>
+
+<!ELEMENT Rs_bioSource (Rs_bioSource_E*)>
+
+
+<!ELEMENT Rs_bioSource_E (
+        Rs_bioSource_E_genome?, 
+        Rs_bioSource_E_origin?)>
+
+<!--
+										unknown (0) ,
+										genomic (1) ,
+										chloroplast (2) ,
+										chromoplast (3) ,
+										kinetoplast (4) ,
+										mitochondrion (5) ,
+										plastid (6) ,
+										macronuclear (7) ,
+										extrachrom (8) ,
+										plasmid (9) ,
+										transposon (10) ,
+										insertion-seq (11) ,
+										cyanelle (12) ,
+										proviral (13) ,
+										virion (14) ,
+										nucleomorph (15) ,
+										apicoplast (16) ,
+										leucoplast (17) ,
+										proplastid (18) ,
+										endogenous-virus (19) ,
+										hydrogenosome (20) ,
+										chromosome (21) ,
+										chromatophore (22)
+-->
+<!ELEMENT Rs_bioSource_E_genome (Rs_bioSource_E_genome_E*)>
+
+
+<!ELEMENT Rs_bioSource_E_genome_E (#PCDATA)>
+
+<!--
+										unknown (0) ,
+										natural (1) ,                    normal biological entity
+										natmut (2) ,                    naturally occurring mutant
+										mut (3) ,                        artificially mutagenized
+										artificial (4) ,                 artificially engineered
+										synthetic (5) ,                 purely synthetic
+										other (255)
+-->
+<!ELEMENT Rs_bioSource_E_origin (Rs_bioSource_E_origin_E*)>
+
+
+<!ELEMENT Rs_bioSource_E_origin_E (#PCDATA)>
+
+<!ELEMENT Rs_frequency (Rs_frequency_E*)>
+
+
+<!ELEMENT Rs_frequency_E (
+        Rs_frequency_E_attlist, 
+        Rs_frequency_E_frequency)>
+
+<!ELEMENT Rs_frequency_E_attlist (
+        Rs_frequency_E_attlist_freq?, 
+        Rs_frequency_E_attlist_allele?, 
+        Rs_frequency_E_attlist_popId?, 
+        Rs_frequency_E_attlist_sampleSize?)>
+
+<!ELEMENT Rs_frequency_E_attlist_freq (%REAL;)>
+
+<!ELEMENT Rs_frequency_E_attlist_allele (#PCDATA)>
+
+<!--dbSNP Populaton ID -->
+<!ELEMENT Rs_frequency_E_attlist_popId (%INTEGER;)>
+
+<!ELEMENT Rs_frequency_E_attlist_sampleSize (%INTEGER;)>
+
+<!ELEMENT Rs_frequency_E_frequency EMPTY>
+
+<!--link data for another resource -->
+<!ELEMENT RsLinkout (
+        RsLinkout_attlist, 
+        RsLinkout_rsLinkout)>
+
+<!ELEMENT RsLinkout_attlist (
+        RsLinkout_attlist_resourceId, 
+        RsLinkout_attlist_linkValue)>
+
+<!--BaseURLList.url_id -->
+<!ELEMENT RsLinkout_attlist_resourceId (#PCDATA)>
+
+<!--
+value to append to ResourceURL.base-url for complete
+						link
+-->
+<!ELEMENT RsLinkout_attlist_linkValue (#PCDATA)>
+
+<!--link data for another resource -->
+<!ELEMENT RsLinkout_rsLinkout EMPTY>
+
+<!--structure information for SNP -->
+<!ELEMENT RsStruct (
+        RsStruct_attlist, 
+        RsStruct_rsStruct)>
+
+<!ELEMENT RsStruct_attlist (
+        RsStruct_attlist_protAcc?, 
+        RsStruct_attlist_protGi?, 
+        RsStruct_attlist_protLoc?, 
+        RsStruct_attlist_protResidue?, 
+        RsStruct_attlist_rsResidue?, 
+        RsStruct_attlist_structGi?, 
+        RsStruct_attlist_structLoc?, 
+        RsStruct_attlist_structResidue?)>
+
+<!--accession of the protein with variation -->
+<!ELEMENT RsStruct_attlist_protAcc (#PCDATA)>
+
+<!--GI of the protein with variation -->
+<!ELEMENT RsStruct_attlist_protGi (%INTEGER;)>
+
+<!--
+position of the residue for the protein
+						GI
+-->
+<!ELEMENT RsStruct_attlist_protLoc (%INTEGER;)>
+
+<!--
+residue specified for protein at prot-loc
+						location
+-->
+<!ELEMENT RsStruct_attlist_protResidue (#PCDATA)>
+
+<!--
+alternative residue specified by variation
+						sequence
+-->
+<!ELEMENT RsStruct_attlist_rsResidue (#PCDATA)>
+
+<!--GI of the structure neighbor -->
+<!ELEMENT RsStruct_attlist_structGi (%INTEGER;)>
+
+<!--
+position of the residue for the structure
+						GI
+-->
+<!ELEMENT RsStruct_attlist_structLoc (%INTEGER;)>
+
+<!--
+residue specified for protein at struct-loc
+						location
+-->
+<!ELEMENT RsStruct_attlist_structResidue (#PCDATA)>
+
+<!--structure information for SNP -->
+<!ELEMENT RsStruct_rsStruct EMPTY>
+
+<!--data for an individual submission to dbSNP -->
+<!ELEMENT Ss (
+        Ss_attlist, 
+        Ss_sequence)>
+
+<!ELEMENT Ss_attlist (
+        Ss_attlist_ssId, 
+        Ss_attlist_handle, 
+        Ss_attlist_batchId, 
+        Ss_attlist_locSnpId?, 
+        Ss_attlist_subSnpClass?, 
+        Ss_attlist_orient?, 
+        Ss_attlist_strand?, 
+        Ss_attlist_molType?, 
+        Ss_attlist_buildId?, 
+        Ss_attlist_methodClass?, 
+        Ss_attlist_validated?, 
+        Ss_attlist_linkoutUrl?, 
+        Ss_attlist_ssAlias?, 
+        Ss_attlist_alleleOrigin?, 
+        Ss_attlist_clinicalSignificance?)>
+
+<!--dbSNP accession number for submission -->
+<!ELEMENT Ss_attlist_ssId (%INTEGER;)>
+
+<!--Tag for the submitting laboratory -->
+<!ELEMENT Ss_attlist_handle (#PCDATA)>
+
+<!--dbSNP number for batch submission -->
+<!ELEMENT Ss_attlist_batchId (%INTEGER;)>
+
+<!--submission (ss#) submitter ID -->
+<!ELEMENT Ss_attlist_locSnpId (#PCDATA)>
+<!--
+SubSNP classification by type of
+						variation
+-->
+<!ELEMENT Ss_attlist_subSnpClass %ENUM;>
+<!ATTLIST Ss_attlist_subSnpClass value (
+        snp |
+        in-del |
+        heterozygous |
+        microsatellite |
+        named-locus |
+        no-variation |
+        mixed |
+        multinucleotide-polymorphism
+        ) #REQUIRED >
+
+<!--
+orientation of refsnp cluster members to refsnp cluster
+						sequence
+-->
+<!ELEMENT Ss_attlist_orient %ENUM;>
+<!ATTLIST Ss_attlist_orient value (
+        forward |
+        reverse
+        ) #REQUIRED >
+
+<!--
+strand is defined as TOP/BOTTOM by nature of flanking
+						nucleotide sequence
+-->
+<!ELEMENT Ss_attlist_strand %ENUM;>
+<!ATTLIST Ss_attlist_strand value (
+        top |
+        bottom
+        ) #REQUIRED >
+
+
+<!--moltype from Batch table -->
+<!ELEMENT Ss_attlist_molType %ENUM;>
+<!ATTLIST Ss_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro |
+        unknown
+        ) #REQUIRED >
+
+
+<!--
+dbSNP build number when ss# was added to a refSNP (rs#)
+						cluster
+-->
+<!ELEMENT Ss_attlist_buildId (%INTEGER;)>
+<!--
+class of method used to assay for the
+						variation
+-->
+<!ELEMENT Ss_attlist_methodClass %ENUM;>
+<!ATTLIST Ss_attlist_methodClass value (
+        dHPLC |
+        hybridize |
+        computed |
+        sSCP |
+        other |
+        unknown |
+        rFLP |
+        sequence
+        ) #REQUIRED >
+
+<!--
+subsnp has been experimentally validated by
+									submitter
+-->
+<!ELEMENT Ss_attlist_validated %ENUM;>
+<!ATTLIST Ss_attlist_validated value (
+        by-submitter |
+        by-frequency |
+        by-cluster
+        ) #REQUIRED >
+
+
+<!--
+append loc-snp-id to this base URL to construct a pointer to
+						submitter data.
+-->
+<!ELEMENT Ss_attlist_linkoutUrl (#PCDATA)>
+
+<!ELEMENT Ss_attlist_ssAlias (#PCDATA)>
+
+<!--
+				<xsd:simpleType>
+					<xsd:restriction base="xsd:string">
+						<xsd:enumeration value="unknown"/>
+						<xsd:enumeration value="germline"/>
+						<xsd:enumeration value="somatic"/>
+						<xsd:enumeration value="inherited"/>
+						<xsd:enumeration value="paternal"/>
+						<xsd:enumeration value="maternal"/>
+						<xsd:enumeration value="de-novo"/>
+						<xsd:enumeration value="other"/>
+					</xsd:restriction>
+				</xsd:simpleType>
+-->
+<!ELEMENT Ss_attlist_alleleOrigin (%INTEGER;)>
+
+<!--
+				<xsd:simpleType>
+					<xsd:restriction base="xsd:string">
+						<xsd:enumeration value="unknown"/>
+						<xsd:enumeration value="untested"/>
+						<xsd:enumeration value="non-pathogenic"/>
+						<xsd:enumeration value="probable-non-pathogenic"/>
+						<xsd:enumeration value="probable-pathogenic"/>
+						<xsd:enumeration value="pathogenic"/>
+						<xsd:enumeration value="other"/>
+					</xsd:restriction>
+				</xsd:simpleType>
+-->
+<!ELEMENT Ss_attlist_clinicalSignificance (#PCDATA)>
+
+<!ELEMENT Ss_sequence (
+        Ss_sequence_seq5?, 
+        Ss_sequence_observed, 
+        Ss_sequence_seq3?)>
+
+<!--
+5' sequence that flanks the
+										variation
+-->
+<!ELEMENT Ss_sequence_seq5 (#PCDATA)>
+
+<!--
+list of all nucleotide alleles observed in
+										ss-list members, correcting for reverse complementation of
+										memebers reported in reverse orientation
+-->
+<!ELEMENT Ss_sequence_observed (#PCDATA)>
+
+<!--
+3' sequence that flanks the
+										variation
+-->
+<!ELEMENT Ss_sequence_seq3 (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/Docsum_3_4.dtd b/code/lib/Bio/Entrez/DTDs/Docsum_3_4.dtd
new file mode 100644
index 0000000..89ba0bc
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/Docsum_3_4.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "docsum.asn"
+     ::DATATOOL:: by application DATATOOL version 2.4.4
+     ::DATATOOL:: on 09/25/2012 23:04:47
+     ============================================ -->
+
+<!-- Docsum_3_4.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % Docsum_3_4_module PUBLIC "-//NCBI//Docsum 3 4 Module//EN" "Docsum_3_4.mod.dtd">
+%Docsum_3_4_module;
diff --git a/code/lib/Bio/Entrez/DTDs/Docsum_3_4.mod.dtd b/code/lib/Bio/Entrez/DTDs/Docsum_3_4.mod.dtd
new file mode 100644
index 0000000..51899a8
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/Docsum_3_4.mod.dtd
@@ -0,0 +1,1594 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "docsum.asn"
+     ::DATATOOL:: by application DATATOOL version 2.4.4
+     ::DATATOOL:: on 09/25/2012 23:04:47
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "Docsum-3-4"
+================================================= -->
+
+<!--
+ ============================================
+ ::DATATOOL:: Generated from "docsum_3.4.xsd"
+ ::DATATOOL:: by application DATATOOL version 2.1.0
+ ::DATATOOL:: on 08/14/2012 12:01:24
+ ============================================
+ edited with XMLSPY v5 rel. 4 U (http://www.xmlspy.com) by Michael Kholodov (National Library of Medicine) 
+ edited with XMLSpy v2005 rel. 3 U (http://www.altova.com) by Michael Feolo (NCBI/NLM/NIH) 
+-->
+
+
+<!ELEMENT Assay (
+        Assay_attlist, 
+        Assay_method, 
+        Assay_taxonomy, 
+        Assay_strains?, 
+        Assay_comment?, 
+        Assay_citation?)>
+
+<!ELEMENT Assay_attlist (
+        Assay_attlist_handle?, 
+        Assay_attlist_batch?, 
+        Assay_attlist_batchId?, 
+        Assay_attlist_batchType?, 
+        Assay_attlist_molType?, 
+        Assay_attlist_sampleSize?, 
+        Assay_attlist_population?, 
+        Assay_attlist_linkoutUrl?)>
+
+<!ELEMENT Assay_attlist_handle (#PCDATA)>
+
+<!ELEMENT Assay_attlist_batch (#PCDATA)>
+
+<!ELEMENT Assay_attlist_batchId (%INTEGER;)>
+
+<!ELEMENT Assay_attlist_batchType %ENUM;>
+<!ATTLIST Assay_attlist_batchType value (
+        snpassay |
+        validation |
+        doublehit
+        ) #REQUIRED >
+
+
+<!ELEMENT Assay_attlist_molType %ENUM;>
+<!ATTLIST Assay_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro
+        ) #REQUIRED >
+
+
+<!ELEMENT Assay_attlist_sampleSize (%INTEGER;)>
+
+<!ELEMENT Assay_attlist_population (#PCDATA)>
+
+<!ELEMENT Assay_attlist_linkoutUrl (#PCDATA)>
+
+<!ELEMENT Assay_method (
+        Assay_method_eMethod?)>
+
+<!ELEMENT Assay_method_eMethod (
+        Assay_method_eMethod_attlist, 
+        Assay_method_eMethod_exception)>
+
+<!ELEMENT Assay_method_eMethod_attlist (
+        Assay_method_eMethod_attlist_name?, 
+        Assay_method_eMethod_attlist_id?)>
+
+<!--Submitters method identifier -->
+<!ELEMENT Assay_method_eMethod_attlist_name (#PCDATA)>
+
+<!--dbSNP method identifier -->
+<!ELEMENT Assay_method_eMethod_attlist_id (#PCDATA)>
+
+<!--
+description of deviation from/addition to
+										given method 
+-->
+<!ELEMENT Assay_method_eMethod_exception (#PCDATA)>
+
+<!ELEMENT Assay_taxonomy (
+        Assay_taxonomy_attlist, 
+        Assay_taxonomy_taxonomy)>
+
+<!ELEMENT Assay_taxonomy_attlist (
+        Assay_taxonomy_attlist_id, 
+        Assay_taxonomy_attlist_organism?)>
+
+<!--
+NCBI taxonomy ID for
+									variation
+-->
+<!ELEMENT Assay_taxonomy_attlist_id (%INTEGER;)>
+
+<!ELEMENT Assay_taxonomy_attlist_organism (#PCDATA)>
+
+<!ELEMENT Assay_taxonomy_taxonomy EMPTY>
+
+<!ELEMENT Assay_strains (Assay_strains_E*)>
+
+
+<!ELEMENT Assay_strains_E (#PCDATA)>
+
+<!ELEMENT Assay_comment (#PCDATA)>
+
+<!ELEMENT Assay_citation (Assay_citation_E*)>
+
+
+<!ELEMENT Assay_citation_E (#PCDATA)>
+
+<!--
+A collection of genome sequence records (curated gene regions (NG's),
+				contigs (NWNT's) and chromosomes (NC/AC's) produced by a genome sequence project.
+				Structure is populated from ContigInfo tables.
+-->
+<!ELEMENT Assembly (
+        Assembly_attlist, 
+        Assembly_component?, 
+        Assembly_snpStat)>
+
+<!ELEMENT Assembly_attlist (
+        Assembly_attlist_dbSnpBuild, 
+        Assembly_attlist_genomeBuild, 
+        Assembly_attlist_groupLabel?, 
+        Assembly_attlist_assemblySource?, 
+        Assembly_attlist_current?, 
+        Assembly_attlist_reference?)>
+
+<!--
+dbSNP build number defining the rsid set aligned to this
+						assembly
+-->
+<!ELEMENT Assembly_attlist_dbSnpBuild (%INTEGER;)>
+
+<!--
+assembly build number with possible 'subbuild' version
+						numbers to reflect updates in gene annotation (human e.g. 34_3, 35_1,
+						36_1)
+-->
+<!ELEMENT Assembly_attlist_genomeBuild (#PCDATA)>
+
+<!--
+High-level classification of the assembly to distinguish
+						reference projects from alternate solutions. GroupLabel field from
+						organism/build-specific ContigInfo tables. "reference" is occasionally used
+						as the preferred assembly; standards will converge as additional organism
+						genome projects are finished. Note that some organism assembly names include
+						extended characters like '~' and '/' that may be incompatible with OS
+						filename conventions.
+-->
+<!ELEMENT Assembly_attlist_groupLabel (#PCDATA)>
+
+<!--
+Name of the group(s) or organization(s) that generated the
+						assembly
+-->
+<!ELEMENT Assembly_attlist_assemblySource (#PCDATA)>
+
+<!--Marks the current genomic assembly -->
+<!ELEMENT Assembly_attlist_current EMPTY>
+<!ATTLIST Assembly_attlist_current value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Assembly_attlist_reference EMPTY>
+<!ATTLIST Assembly_attlist_reference value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Assembly_component (Component*)>
+
+<!ELEMENT Assembly_snpStat (
+        Assembly_snpStat_attlist, 
+        Assembly_snpStat_snpStat)>
+
+<!ELEMENT Assembly_snpStat_attlist (
+        Assembly_snpStat_attlist_mapWeight, 
+        Assembly_snpStat_attlist_chromCount?, 
+        Assembly_snpStat_attlist_placedContigCount?, 
+        Assembly_snpStat_attlist_unplacedContigCount?, 
+        Assembly_snpStat_attlist_seqlocCount?, 
+        Assembly_snpStat_attlist_hapCount?)>
+<!--
+summary measure of placement precision in the
+									assembly
+-->
+<!ELEMENT Assembly_snpStat_attlist_mapWeight %ENUM;>
+<!ATTLIST Assembly_snpStat_attlist_mapWeight value (
+        unmapped |
+        unique-in-contig |
+        two-hits-in-contig |
+        less-10-hits |
+        multiple-hits
+        ) #REQUIRED >
+
+
+<!--
+number of distinct chromosomes in the
+									mapset
+-->
+<!ELEMENT Assembly_snpStat_attlist_chromCount (%INTEGER;)>
+
+<!--
+number of distinct contigs [ gi |
+									accession[.version] ] in the mapset
+-->
+<!ELEMENT Assembly_snpStat_attlist_placedContigCount (%INTEGER;)>
+
+<!--
+number of sequence postions to a contig with
+									unknown chromosomal assignment
+-->
+<!ELEMENT Assembly_snpStat_attlist_unplacedContigCount (%INTEGER;)>
+
+<!--
+total number of sequence positions in the
+									mapset
+-->
+<!ELEMENT Assembly_snpStat_attlist_seqlocCount (%INTEGER;)>
+
+<!--
+Number of hits to alternative genomic haplotypes
+									(e.g. HLA DR region, KIR, or pseudo-autosomal regions like PAR)
+									within the assembly mapset. Note that positions on haplotypes
+									defined in other assemblies (a different assembly_group_label
+									value) will not be counted in this value.
+-->
+<!ELEMENT Assembly_snpStat_attlist_hapCount (%INTEGER;)>
+
+<!ELEMENT Assembly_snpStat_snpStat EMPTY>
+
+<!--
+URL value from dbSNP_main.BaseURL links table. attributes provide
+				context information and URL id that is referenced within individual refSNP
+				objects.
+-->
+<!ELEMENT BaseURL (
+        BaseURL_attlist, 
+        BaseURL_baseURL)>
+
+<!ELEMENT BaseURL_attlist (
+        BaseURL_attlist_urlId?, 
+        BaseURL_attlist_resourceName?, 
+        BaseURL_attlist_resourceId?)>
+
+<!--
+Resource identifier from
+								dbSNP_main.baseURL.
+-->
+<!ELEMENT BaseURL_attlist_urlId (%INTEGER;)>
+
+<!--Name of linked resource -->
+<!ELEMENT BaseURL_attlist_resourceName (#PCDATA)>
+
+<!--
+identifier expected by resource for
+								URL
+-->
+<!ELEMENT BaseURL_attlist_resourceId (#PCDATA)>
+
+<!--
+URL value from dbSNP_main.BaseURL links table. attributes provide
+				context information and URL id that is referenced within individual refSNP
+				objects.
+-->
+<!ELEMENT BaseURL_baseURL (#PCDATA)>
+
+
+<!ELEMENT Component (
+        Component_attlist, 
+        Component_mapLoc)>
+
+<!ELEMENT Component_attlist (
+        Component_attlist_componentType?, 
+        Component_attlist_ctgId?, 
+        Component_attlist_accession?, 
+        Component_attlist_name?, 
+        Component_attlist_chromosome?, 
+        Component_attlist_start?, 
+        Component_attlist_end?, 
+        Component_attlist_orientation?, 
+        Component_attlist_gi?, 
+        Component_attlist_groupTerm?, 
+        Component_attlist_contigLabel?)>
+<!--
+type of component: chromosome, contig, gene_region,
+						etc.
+-->
+<!ELEMENT Component_attlist_componentType %ENUM;>
+<!ATTLIST Component_attlist_componentType value (
+        contig |
+        mrna
+        ) #REQUIRED >
+
+
+<!--
+dbSNP contig_id used to join on contig hit / mapset data to
+						these assembly properties
+-->
+<!ELEMENT Component_attlist_ctgId (%INTEGER;)>
+
+<!--
+Accession[.version] for the sequence
+						component
+-->
+<!ELEMENT Component_attlist_accession (#PCDATA)>
+
+<!--
+contig name defined as either a submitter local id, element
+						of a whole genome assembly set, or internal NCBI local
+						id
+-->
+<!ELEMENT Component_attlist_name (#PCDATA)>
+
+<!--
+Organism appropriate chromosome tag, 'Un' reserved for
+						default case of unplaced components
+-->
+<!ELEMENT Component_attlist_chromosome (#PCDATA)>
+
+<!--
+component starting position on the chromosome (base 0
+						inclusive)
+-->
+<!ELEMENT Component_attlist_start (%INTEGER;)>
+
+<!--
+component ending position on the chromosome (base 0
+						inclusive)
+-->
+<!ELEMENT Component_attlist_end (%INTEGER;)>
+<!--
+orientation of this component to chromosome, forward (fwd) =
+						0, reverse (rev) = 1, unknown = NULL in
+						ContigInfo.orient.
+-->
+<!ELEMENT Component_attlist_orientation %ENUM;>
+<!ATTLIST Component_attlist_orientation value (
+        fwd |
+        rev |
+        unknown
+        ) #REQUIRED >
+
+
+<!--
+NCBI gi for component sequence (equivalent to
+						accession.version) for nucleotide sequence.
+-->
+<!ELEMENT Component_attlist_gi (#PCDATA)>
+
+<!--
+Identifier label for the genome assembly that defines the
+						contigs in this mapset and their placement within the organism genome.
+-->
+<!ELEMENT Component_attlist_groupTerm (#PCDATA)>
+
+<!--Display label for component -->
+<!ELEMENT Component_attlist_contigLabel (#PCDATA)>
+
+<!ELEMENT Component_mapLoc (MapLoc*)>
+
+<!--Set of dbSNP refSNP docsums, version 3.4 -->
+<!ELEMENT ExchangeSet (
+        ExchangeSet_attlist, 
+        ExchangeSet_sourceDatabase?, 
+        ExchangeSet_rs?, 
+        ExchangeSet_assay?, 
+        ExchangeSet_query?, 
+        ExchangeSet_summary?, 
+        ExchangeSet_baseURL?)>
+
+<!ELEMENT ExchangeSet_attlist (
+        ExchangeSet_attlist_setType?, 
+        ExchangeSet_attlist_setDepth?, 
+        ExchangeSet_attlist_specVersion?, 
+        ExchangeSet_attlist_dbSnpBuild?, 
+        ExchangeSet_attlist_generated?)>
+
+<!--
+set-type: full dump; from query; single
+						refSNP
+-->
+<!ELEMENT ExchangeSet_attlist_setType (#PCDATA)>
+
+<!--
+content depth: brief XML (only refSNP properties and summary
+						subSNP element content); full XML (full refSNP, full subSNP content; all
+						flanking sequences) 
+-->
+<!ELEMENT ExchangeSet_attlist_setDepth (#PCDATA)>
+
+<!--
+version number of docsum.asn/docsum.dtd
+						specification
+-->
+<!ELEMENT ExchangeSet_attlist_specVersion (#PCDATA)>
+
+<!--build number of database for this export -->
+<!ELEMENT ExchangeSet_attlist_dbSnpBuild (%INTEGER;)>
+
+<!--Generated date -->
+<!ELEMENT ExchangeSet_attlist_generated (#PCDATA)>
+
+<!ELEMENT ExchangeSet_sourceDatabase (
+        ExchangeSet_sourceDatabase_attlist, 
+        ExchangeSet_sourceDatabase_sourceDatabase)>
+
+<!ELEMENT ExchangeSet_sourceDatabase_attlist (
+        ExchangeSet_sourceDatabase_attlist_taxId, 
+        ExchangeSet_sourceDatabase_attlist_organism, 
+        ExchangeSet_sourceDatabase_attlist_dbSnpOrgAbbr?, 
+        ExchangeSet_sourceDatabase_attlist_gpipeOrgAbbr?)>
+
+<!--
+NCBI taxonomy ID for
+									variation
+-->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_taxId (%INTEGER;)>
+
+<!--
+common name for species used as part of database
+									name.
+-->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_organism (#PCDATA)>
+
+<!--organism abbreviation used in dbSNP. -->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_dbSnpOrgAbbr (#PCDATA)>
+
+<!--
+organism abbreviation used within NCBI genome
+									pipeline data dumps.
+-->
+<!ELEMENT ExchangeSet_sourceDatabase_attlist_gpipeOrgAbbr (#PCDATA)>
+
+<!ELEMENT ExchangeSet_sourceDatabase_sourceDatabase EMPTY>
+
+<!ELEMENT ExchangeSet_rs (Rs*)>
+
+<!ELEMENT ExchangeSet_assay (Assay)>
+
+<!ELEMENT ExchangeSet_query (
+        ExchangeSet_query_attlist, 
+        ExchangeSet_query_query)>
+
+<!ELEMENT ExchangeSet_query_attlist (
+        ExchangeSet_query_attlist_date?, 
+        ExchangeSet_query_attlist_string?)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT ExchangeSet_query_attlist_date (#PCDATA)>
+
+<!--
+Query terms or search
+									constraints
+-->
+<!ELEMENT ExchangeSet_query_attlist_string (#PCDATA)>
+
+<!ELEMENT ExchangeSet_query_query EMPTY>
+
+<!ELEMENT ExchangeSet_summary (
+        ExchangeSet_summary_attlist, 
+        ExchangeSet_summary_summary)>
+
+<!ELEMENT ExchangeSet_summary_attlist (
+        ExchangeSet_summary_attlist_numRsIds?, 
+        ExchangeSet_summary_attlist_totalSeqLength?, 
+        ExchangeSet_summary_attlist_numContigHits?, 
+        ExchangeSet_summary_attlist_numGeneHits?, 
+        ExchangeSet_summary_attlist_numGiHits?, 
+        ExchangeSet_summary_attlist_num3dStructs?, 
+        ExchangeSet_summary_attlist_numAlleleFreqs?, 
+        ExchangeSet_summary_attlist_numStsHits?, 
+        ExchangeSet_summary_attlist_numUnigeneCids?)>
+
+<!--Total number of refsnp-ids in this exchange set -->
+<!ELEMENT ExchangeSet_summary_attlist_numRsIds (%INTEGER;)>
+
+<!--
+Total length of exemplar flanking
+									sequences
+-->
+<!ELEMENT ExchangeSet_summary_attlist_totalSeqLength (%INTEGER;)>
+
+<!--
+Total number of contig locations from
+									SNPContigLoc
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numContigHits (%INTEGER;)>
+
+<!--
+Total number of locus ids from
+									SNPContigLocusId
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numGeneHits (%INTEGER;)>
+
+<!--
+Total number of gi hits from
+									MapLink
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numGiHits (%INTEGER;)>
+
+<!--
+Total number of 3D structures from
+									SNP3D
+-->
+<!ELEMENT ExchangeSet_summary_attlist_num3dStructs (%INTEGER;)>
+
+<!--
+Total number of allele frequences from
+									SubPopAllele
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numAlleleFreqs (%INTEGER;)>
+
+<!--
+Total number of STS hits from
+									SnpInSts
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numStsHits (%INTEGER;)>
+
+<!--
+Total number of unigene cluster ids from
+									UnigeneSnp
+-->
+<!ELEMENT ExchangeSet_summary_attlist_numUnigeneCids (%INTEGER;)>
+
+<!ELEMENT ExchangeSet_summary_summary EMPTY>
+
+<!ELEMENT ExchangeSet_baseURL (BaseURL*)>
+
+<!--
+functional relationship of SNP (and possibly alleles) to genes at
+				contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx
+				tables.
+-->
+<!ELEMENT FxnSet (
+        FxnSet_attlist, 
+        FxnSet_fxnSet)>
+
+<!ELEMENT FxnSet_attlist (
+        FxnSet_attlist_geneId?, 
+        FxnSet_attlist_symbol?, 
+        FxnSet_attlist_mrnaAcc?, 
+        FxnSet_attlist_mrnaVer?, 
+        FxnSet_attlist_protAcc?, 
+        FxnSet_attlist_protVer?, 
+        FxnSet_attlist_fxnClass?, 
+        FxnSet_attlist_readingFrame?, 
+        FxnSet_attlist_allele?, 
+        FxnSet_attlist_residue?, 
+        FxnSet_attlist_aaPosition?, 
+        FxnSet_attlist_mrnaPosition?, 
+        FxnSet_attlist_soTerm?)>
+
+<!--gene-id of gene as aligned to contig -->
+<!ELEMENT FxnSet_attlist_geneId (%INTEGER;)>
+
+<!--
+symbol (official if present in Entrez Gene) of
+						gene
+-->
+<!ELEMENT FxnSet_attlist_symbol (#PCDATA)>
+
+<!--mRNA accession if variation in transcript -->
+<!ELEMENT FxnSet_attlist_mrnaAcc (#PCDATA)>
+
+<!--
+mRNA sequence version if variation is in
+						transcripot
+-->
+<!ELEMENT FxnSet_attlist_mrnaVer (%INTEGER;)>
+
+<!--protein accession if variation in protein -->
+<!ELEMENT FxnSet_attlist_protAcc (#PCDATA)>
+
+<!--
+protein version if variation is in
+						protein
+-->
+<!ELEMENT FxnSet_attlist_protVer (%INTEGER;)>
+<!--
+variation in region of gene, but not in
+										transcript - deprecated
+-->
+<!ELEMENT FxnSet_attlist_fxnClass %ENUM;>
+<!ATTLIST FxnSet_attlist_fxnClass value (
+        locus-region |
+        coding-unknown |
+        synonymous-codon |
+        non-synonymous-codon |
+        mrna-utr |
+        intron-variant |
+        splice-region-variant |
+        reference |
+        coding-exception |
+        coding-sequence-variant |
+        nc-transcript-variant |
+        downstream-variant-500B |
+        upstream-variant-2KB |
+        nonsense |
+        missense |
+        frameshift-variant |
+        utr-variant-3-prime |
+        utr-variant-5-prime |
+        splice-acceptor-variant |
+        splice-donor-variant |
+        cds-indel |
+        stop-gained |
+        stop-lost |
+        complex-change-in-transcript |
+        incomplete-terminal-codon-variant |
+        nmd-transcript-variant |
+        mature-miRNA-variant |
+        upstream-variant-5KB |
+        downstream-variant-5KB |
+        intergenic
+        ) #REQUIRED >
+
+
+<!ELEMENT FxnSet_attlist_readingFrame (%INTEGER;)>
+
+<!--
+variation allele: * suffix indicates allele of contig at this
+						location
+-->
+<!ELEMENT FxnSet_attlist_allele (#PCDATA)>
+
+<!--translated amino acid residue for allele -->
+<!ELEMENT FxnSet_attlist_residue (#PCDATA)>
+
+<!--
+position of the variant residue in peptide
+						sequence
+-->
+<!ELEMENT FxnSet_attlist_aaPosition (%INTEGER;)>
+
+<!ELEMENT FxnSet_attlist_mrnaPosition (%INTEGER;)>
+
+<!ELEMENT FxnSet_attlist_soTerm (#PCDATA)>
+
+<!--
+functional relationship of SNP (and possibly alleles) to genes at
+				contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx
+				tables.
+-->
+<!ELEMENT FxnSet_fxnSet EMPTY>
+
+<!--
+Position of a single hit of a variation on a
+				contig
+-->
+<!ELEMENT MapLoc (
+        MapLoc_attlist, 
+        MapLoc_fxnSet?)>
+
+<!ELEMENT MapLoc_attlist (
+        MapLoc_attlist_asnFrom, 
+        MapLoc_attlist_asnTo, 
+        MapLoc_attlist_locType, 
+        MapLoc_attlist_alnQuality?, 
+        MapLoc_attlist_orient?, 
+        MapLoc_attlist_physMapInt?, 
+        MapLoc_attlist_leftFlankNeighborPos?, 
+        MapLoc_attlist_rightFlankNeighborPos?, 
+        MapLoc_attlist_leftContigNeighborPos?, 
+        MapLoc_attlist_rightContigNeighborPos?, 
+        MapLoc_attlist_numberOfMismatches?, 
+        MapLoc_attlist_numberOfDeletions?, 
+        MapLoc_attlist_numberOfInsertions?, 
+        MapLoc_attlist_refAllele?)>
+
+<!--
+beginning of variation as feature on
+						contig
+-->
+<!ELEMENT MapLoc_attlist_asnFrom (%INTEGER;)>
+
+<!--
+end position of variation as feature on
+						contig
+-->
+<!ELEMENT MapLoc_attlist_asnTo (%INTEGER;)>
+<!--
+defines the seq-loc symbol if asn_from !=
+						asn_to
+-->
+<!ELEMENT MapLoc_attlist_locType %ENUM;>
+<!ATTLIST MapLoc_attlist_locType value (
+        insertion |
+        exact |
+        deletion |
+        range-ins |
+        range-exact |
+        range-del
+        ) #REQUIRED >
+
+
+<!--alignment qualiity -->
+<!ELEMENT MapLoc_attlist_alnQuality (%REAL;)>
+<!--
+orientation of refSNP sequence to contig
+						sequence
+-->
+<!ELEMENT MapLoc_attlist_orient %ENUM;>
+<!ATTLIST MapLoc_attlist_orient value (
+        forward |
+        reverse
+        ) #REQUIRED >
+
+
+<!--
+chromosome position as integer for
+						sorting
+-->
+<!ELEMENT MapLoc_attlist_physMapInt (%INTEGER;)>
+
+<!--
+nearest aligned position in 5' flanking sequence of
+						snp
+-->
+<!ELEMENT MapLoc_attlist_leftFlankNeighborPos (%INTEGER;)>
+
+<!--nearest aligned position in 3' flanking sequence of snp -->
+<!ELEMENT MapLoc_attlist_rightFlankNeighborPos (%INTEGER;)>
+
+<!--
+nearest aligned position in 5' contig alignment of
+						snp
+-->
+<!ELEMENT MapLoc_attlist_leftContigNeighborPos (%INTEGER;)>
+
+<!--
+nearest aligned position in 3' contig alignment of
+						snp
+-->
+<!ELEMENT MapLoc_attlist_rightContigNeighborPos (%INTEGER;)>
+
+<!--
+number of Mismatched positions in this
+						alignment
+-->
+<!ELEMENT MapLoc_attlist_numberOfMismatches (%INTEGER;)>
+
+<!--number of deletions in this alignment -->
+<!ELEMENT MapLoc_attlist_numberOfDeletions (%INTEGER;)>
+
+<!--number of insetions in this alignment -->
+<!ELEMENT MapLoc_attlist_numberOfInsertions (%INTEGER;)>
+
+<!ELEMENT MapLoc_attlist_refAllele (#PCDATA)>
+
+<!ELEMENT MapLoc_fxnSet (FxnSet*)>
+
+
+<!ELEMENT PrimarySequence (
+        PrimarySequence_attlist, 
+        PrimarySequence_mapLoc)>
+
+<!ELEMENT PrimarySequence_attlist (
+        PrimarySequence_attlist_dbSnpBuild, 
+        PrimarySequence_attlist_gi, 
+        PrimarySequence_attlist_source?, 
+        PrimarySequence_attlist_accession?)>
+
+<!ELEMENT PrimarySequence_attlist_dbSnpBuild (%INTEGER;)>
+
+<!ELEMENT PrimarySequence_attlist_gi (%INTEGER;)>
+
+<!ELEMENT PrimarySequence_attlist_source %ENUM;>
+<!ATTLIST PrimarySequence_attlist_source value (
+        submitter |
+        blastmb |
+        xm |
+        remap |
+        hgvs
+        ) #REQUIRED >
+
+
+<!ELEMENT PrimarySequence_attlist_accession (#PCDATA)>
+
+<!ELEMENT PrimarySequence_mapLoc (MapLoc*)>
+
+<!--
+defines the docsum structure for refSNP clusters, where a refSNP
+				cluster (rs) is a grouping of individual dbSNP submissions that all refer to the
+				same variation. The refsnp provides a single unified record for annotation of NCBI
+				resources such as reference genome sequence.
+-->
+<!ELEMENT Rs (
+        Rs_attlist, 
+        Rs_het?, 
+        Rs_validation, 
+        Rs_create, 
+        Rs_update?, 
+        Rs_sequence, 
+        Rs_ss, 
+        Rs_assembly?, 
+        Rs_primarySequence?, 
+        Rs_rsStruct?, 
+        Rs_rsLinkout?, 
+        Rs_mergeHistory?, 
+        Rs_hgvs?, 
+        Rs_alleleOrigin?, 
+        Rs_phenotype?, 
+        Rs_bioSource?, 
+        Rs_frequency?)>
+
+<!ELEMENT Rs_attlist (
+        Rs_attlist_rsId, 
+        Rs_attlist_snpClass, 
+        Rs_attlist_snpType, 
+        Rs_attlist_molType, 
+        Rs_attlist_validProbMin?, 
+        Rs_attlist_validProbMax?, 
+        Rs_attlist_genotype?, 
+        Rs_attlist_bitField?, 
+        Rs_attlist_taxId?)>
+
+<!--refSNP (rs) number -->
+<!ELEMENT Rs_attlist_rsId (%INTEGER;)>
+
+<!ELEMENT Rs_attlist_snpClass %ENUM;>
+<!ATTLIST Rs_attlist_snpClass value (
+        snp |
+        in-del |
+        heterozygous |
+        microsatellite |
+        named-locus |
+        no-variation |
+        mixed |
+        multinucleotide-polymorphism
+        ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_snpType %ENUM;>
+<!ATTLIST Rs_attlist_snpType value (
+        notwithdrawn |
+        artifact |
+        gene-duplication |
+        duplicate-submission |
+        notspecified |
+        ambiguous-location |
+        low-map-quality
+        ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_molType %ENUM;>
+<!ATTLIST Rs_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro |
+        unknown
+        ) #REQUIRED >
+
+
+<!--
+minimum reported success rate of all submissions in
+						cluster
+-->
+<!ELEMENT Rs_attlist_validProbMin (%INTEGER;)>
+
+<!--
+maximum reported success rate of all submissions in
+						cluster
+-->
+<!ELEMENT Rs_attlist_validProbMax (%INTEGER;)>
+
+<!--
+at least one genotype reported for this
+						refSNP
+-->
+<!ELEMENT Rs_attlist_genotype EMPTY>
+<!ATTLIST Rs_attlist_genotype value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_attlist_bitField (#PCDATA)>
+
+<!ELEMENT Rs_attlist_taxId (%INTEGER;)>
+
+<!ELEMENT Rs_het (
+        Rs_het_attlist, 
+        Rs_het_het)>
+
+<!ELEMENT Rs_het_attlist (
+        Rs_het_attlist_type, 
+        Rs_het_attlist_value, 
+        Rs_het_attlist_stdError?)>
+<!--
+Est=Estimated average het from allele
+									frequencies, Obs=Observed from genotype data
+-->
+<!ELEMENT Rs_het_attlist_type %ENUM;>
+<!ATTLIST Rs_het_attlist_type value (
+        est |
+        obs
+        ) #REQUIRED >
+
+
+<!--Heterozygosity -->
+<!ELEMENT Rs_het_attlist_value (%REAL;)>
+
+<!--
+Standard error of Het
+									estimate
+-->
+<!ELEMENT Rs_het_attlist_stdError (%REAL;)>
+
+<!ELEMENT Rs_het_het EMPTY>
+
+<!ELEMENT Rs_validation (
+        Rs_validation_attlist, 
+        Rs_validation_otherPopBatchId?, 
+        Rs_validation_twoHit2AlleleBatchId?, 
+        Rs_validation_frequencyClass?, 
+        Rs_validation_hapMapPhase?, 
+        Rs_validation_tGPPhase?, 
+        Rs_validation_suspectEvidence?)>
+
+<!ELEMENT Rs_validation_attlist (
+        Rs_validation_attlist_byCluster?, 
+        Rs_validation_attlist_byFrequency?, 
+        Rs_validation_attlist_byOtherPop?, 
+        Rs_validation_attlist_by2Hit2Allele?, 
+        Rs_validation_attlist_byHapMap?, 
+        Rs_validation_attlist_by1000G?, 
+        Rs_validation_attlist_suspect?)>
+
+<!--
+at least one subsnp in cluster has frequency data
+									submitted
+-->
+<!ELEMENT Rs_validation_attlist_byCluster EMPTY>
+<!ATTLIST Rs_validation_attlist_byCluster value ( true | false ) #REQUIRED >
+
+
+<!--Validated by allele frequency -->
+<!ELEMENT Rs_validation_attlist_byFrequency EMPTY>
+<!ATTLIST Rs_validation_attlist_byFrequency value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_validation_attlist_byOtherPop EMPTY>
+<!ATTLIST Rs_validation_attlist_byOtherPop value ( true | false ) #REQUIRED >
+
+
+<!--
+cluster has 2+ submissions, with 1+ submissions
+									assayed with a non-computational method
+-->
+<!ELEMENT Rs_validation_attlist_by2Hit2Allele EMPTY>
+<!ATTLIST Rs_validation_attlist_by2Hit2Allele value ( true | false ) #REQUIRED >
+
+
+<!--Validated by HapMap Project  -->
+<!ELEMENT Rs_validation_attlist_byHapMap EMPTY>
+<!ATTLIST Rs_validation_attlist_byHapMap value ( true | false ) #REQUIRED >
+
+
+<!--Validated by 1000 Genomes Project -->
+<!ELEMENT Rs_validation_attlist_by1000G EMPTY>
+<!ATTLIST Rs_validation_attlist_by1000G value ( true | false ) #REQUIRED >
+
+
+<!--Suspected to be false SNP -->
+<!ELEMENT Rs_validation_attlist_suspect EMPTY>
+<!ATTLIST Rs_validation_attlist_suspect value ( true | false ) #REQUIRED >
+
+
+<!--
+dbSNP batch-id's for other pop snp validation
+										data.
+-->
+<!ELEMENT Rs_validation_otherPopBatchId (Rs_validation_otherPopBatchId_E*)>
+
+
+<!ELEMENT Rs_validation_otherPopBatchId_E (%INTEGER;)>
+
+<!--
+dbSNP batch-id's for double-hit snp
+										validation data. Use batch-id to get methods, etc.
+-->
+<!ELEMENT Rs_validation_twoHit2AlleleBatchId (Rs_validation_twoHit2AlleleBatchId_E*)>
+
+
+<!ELEMENT Rs_validation_twoHit2AlleleBatchId_E (%INTEGER;)>
+
+<!--
+Frequency validation class (1) low frequency
+										variation that is cited in journal and other reputable
+										sources (2) greater than 5 percent minor allele freq in each
+										and all populations (4) greater than 5 percent minor allele
+										freq in 1+ populations (8) if the variant has 2+ minor
+										allele count based on freq or genotype data (16) less than 1
+										percent minor allele freq in each and all populations (32)
+										less than 1 percent minor freq in 1+ populations
+-->
+<!ELEMENT Rs_validation_frequencyClass (Rs_validation_frequencyClass_E*)>
+
+
+<!ELEMENT Rs_validation_frequencyClass_E (%INTEGER;)>
+
+<!--
+alidated by HapMap Project phase1-genotyped
+										(1), Phase 1 genotyped; filtered, non-redundant
+										phase2-genotyped (2), Phase 2 genotyped; filtered,
+										non-redundant phase3-genotyped (4) Phase 3 genotyped;
+										filtered, non-redundant 
+-->
+<!ELEMENT Rs_validation_hapMapPhase (Rs_validation_hapMapPhase_E*)>
+
+
+<!ELEMENT Rs_validation_hapMapPhase_E (%INTEGER;)>
+
+<!--
+Validated by 1000 Genomes Project (TGP) pilot
+										1 (1), pilot 2 (2), pilot 3 (4) 
+-->
+<!ELEMENT Rs_validation_tGPPhase (Rs_validation_tGPPhase_E*)>
+
+
+<!ELEMENT Rs_validation_tGPPhase_E (%INTEGER;)>
+
+<!--
+Suspected to be false SNP evidence Single
+										Nucleotide Difference - paralogous genes (1), Genotype or
+										base calling errors (2), Submission evidence or errors (4),
+										Others (8) 
+-->
+<!ELEMENT Rs_validation_suspectEvidence (Rs_validation_suspectEvidence_E*)>
+
+
+<!ELEMENT Rs_validation_suspectEvidence_E (#PCDATA)>
+<!--
+date the refsnp cluster was
+							instantiated
+date the refsnp cluster was
+							instantiated
+-->
+<!ELEMENT Rs_create (
+        Rs_create_attlist, 
+        Rs_create_create)>
+
+<!ELEMENT Rs_create_attlist (
+        Rs_create_attlist_build?, 
+        Rs_create_attlist_date?)>
+
+<!--
+build number when the cluster was
+									created
+-->
+<!ELEMENT Rs_create_attlist_build (%INTEGER;)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT Rs_create_attlist_date (#PCDATA)>
+
+<!--
+date the refsnp cluster was
+							instantiated
+-->
+<!ELEMENT Rs_create_create EMPTY>
+<!--
+most recent date the cluster was updated (member added or
+							deleted)
+most recent date the cluster was updated (member added or
+							deleted)
+-->
+<!ELEMENT Rs_update (
+        Rs_update_attlist, 
+        Rs_update_update)>
+
+<!ELEMENT Rs_update_attlist (
+        Rs_update_attlist_build?, 
+        Rs_update_attlist_date?)>
+
+<!--
+build number when the cluster was
+									updated
+-->
+<!ELEMENT Rs_update_attlist_build (%INTEGER;)>
+
+<!--yyyy-mm-dd -->
+<!ELEMENT Rs_update_attlist_date (#PCDATA)>
+
+<!--
+most recent date the cluster was updated (member added or
+							deleted)
+-->
+<!ELEMENT Rs_update_update EMPTY>
+
+<!ELEMENT Rs_sequence (
+        Rs_sequence_attlist, 
+        Rs_sequence_seq5?, 
+        Rs_sequence_observed, 
+        Rs_sequence_seq3?)>
+
+<!ELEMENT Rs_sequence_attlist (
+        Rs_sequence_attlist_exemplarSs, 
+        Rs_sequence_attlist_ancestralAllele?)>
+
+<!--
+dbSNP ss# selected as source of refSNP flanking
+									sequence, ss# part of ss-list below 
+-->
+<!ELEMENT Rs_sequence_attlist_exemplarSs (%INTEGER;)>
+
+<!ELEMENT Rs_sequence_attlist_ancestralAllele (#PCDATA)>
+
+<!--
+5' sequence that flanks the
+										variation
+-->
+<!ELEMENT Rs_sequence_seq5 (#PCDATA)>
+
+<!--
+list of all nucleotide alleles observed in
+										ss-list members, correcting for reverse complementation of
+										members reported in reverse orientation
+-->
+<!ELEMENT Rs_sequence_observed (#PCDATA)>
+
+<!--
+3' sequence that flanks the
+										variation
+-->
+<!ELEMENT Rs_sequence_seq3 (#PCDATA)>
+
+<!ELEMENT Rs_ss (Ss*)>
+
+<!ELEMENT Rs_assembly (Assembly*)>
+
+<!ELEMENT Rs_primarySequence (PrimarySequence*)>
+
+<!ELEMENT Rs_rsStruct (RsStruct*)>
+
+<!ELEMENT Rs_rsLinkout (RsLinkout*)>
+
+<!ELEMENT Rs_mergeHistory (Rs_mergeHistory_E*)>
+
+
+<!ELEMENT Rs_mergeHistory_E (
+        Rs_mergeHistory_E_attlist, 
+        Rs_mergeHistory_E_mergeHistory)>
+
+<!ELEMENT Rs_mergeHistory_E_attlist (
+        Rs_mergeHistory_E_attlist_rsId, 
+        Rs_mergeHistory_E_attlist_buildId?, 
+        Rs_mergeHistory_E_attlist_orientFlip?)>
+
+<!--
+previously issued rs id whose member assays have
+									now been merged
+-->
+<!ELEMENT Rs_mergeHistory_E_attlist_rsId (%INTEGER;)>
+
+<!--
+build id when rs id was merged into parent
+									rs
+-->
+<!ELEMENT Rs_mergeHistory_E_attlist_buildId (%INTEGER;)>
+
+<!--
+TRUE if strand of rs id is reverse to parent
+									object's current strand
+-->
+<!ELEMENT Rs_mergeHistory_E_attlist_orientFlip EMPTY>
+<!ATTLIST Rs_mergeHistory_E_attlist_orientFlip value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_mergeHistory_E_mergeHistory EMPTY>
+
+<!-- HGVS name list  -->
+<!ELEMENT Rs_hgvs (Rs_hgvs_E*)>
+
+
+<!ELEMENT Rs_hgvs_E (#PCDATA)>
+<!--
+							origin of this allele, if known
+							note that these are powers-of-two, and represent bits; thus, we can
+							represent more than one state simultaneously through a bitwise OR
+							unknown         (0),
+							germline        (1),
+							somatic         (2),
+							inherited       (4),
+							paternal        (8),
+							maternal        (16),
+							de-novo         (32),
+							biparental      (64),
+							uniparental     (128),
+							not-tested      (256),
+							tested-inconclusive (512),
+							origin of this allele, if known
+							note that these are powers-of-two, and represent bits; thus, we can
+							represent more than one state simultaneously through a bitwise OR
+							unknown         (0),
+							germline        (1),
+							somatic         (2),
+							inherited       (4),
+							paternal        (8),
+							maternal        (16),
+							de-novo         (32),
+							biparental      (64),
+							uniparental     (128),
+							not-tested      (256),
+							tested-inconclusive (512),
+-->
+<!ELEMENT Rs_alleleOrigin (Rs_alleleOrigin_E*)>
+
+
+<!ELEMENT Rs_alleleOrigin_E (
+        Rs_alleleOrigin_E_attlist, 
+        Rs_alleleOrigin_E_alleleOrigin)>
+
+<!ELEMENT Rs_alleleOrigin_E_attlist (
+        Rs_alleleOrigin_E_attlist_allele?)>
+
+<!ELEMENT Rs_alleleOrigin_E_attlist_allele (#PCDATA)>
+
+<!--
+							origin of this allele, if known
+							note that these are powers-of-two, and represent bits; thus, we can
+							represent more than one state simultaneously through a bitwise OR
+							unknown         (0),
+							germline        (1),
+							somatic         (2),
+							inherited       (4),
+							paternal        (8),
+							maternal        (16),
+							de-novo         (32),
+							biparental      (64),
+							uniparental     (128),
+							not-tested      (256),
+							tested-inconclusive (512),
+-->
+<!ELEMENT Rs_alleleOrigin_E_alleleOrigin (%INTEGER;)>
+
+<!ELEMENT Rs_phenotype (Rs_phenotype_E*)>
+
+
+<!ELEMENT Rs_phenotype_E (
+        Rs_phenotype_E_clinicalSignificance?)>
+
+<!--
+										unknown                 (0),
+										untested                (1),
+										non-pathogenic          (2),
+										probable-non-pathogenic (3),
+										probable-pathogenic     (4),
+										pathogenic              (5),
+										drug response           (6),
+										other                   (255)
+-->
+<!ELEMENT Rs_phenotype_E_clinicalSignificance (Rs_phenotype_E_clinicalSignificance_E*)>
+
+
+<!ELEMENT Rs_phenotype_E_clinicalSignificance_E (#PCDATA)>
+
+<!ELEMENT Rs_bioSource (Rs_bioSource_E*)>
+
+
+<!ELEMENT Rs_bioSource_E (
+        Rs_bioSource_E_genome?, 
+        Rs_bioSource_E_origin?)>
+
+<!--
+										unknown (0) ,
+										genomic (1) ,
+										chloroplast (2) ,
+										chromoplast (3) ,
+										kinetoplast (4) ,
+										mitochondrion (5) ,
+										plastid (6) ,
+										macronuclear (7) ,
+										extrachrom (8) ,
+										plasmid (9) ,
+										transposon (10) ,
+										insertion-seq (11) ,
+										cyanelle (12) ,
+										proviral (13) ,
+										virion (14) ,
+										nucleomorph (15) ,
+										apicoplast (16) ,
+										leucoplast (17) ,
+										proplastid (18) ,
+										endogenous-virus (19) ,
+										hydrogenosome (20) ,
+										chromosome (21) ,
+										chromatophore (22)
+-->
+<!ELEMENT Rs_bioSource_E_genome (Rs_bioSource_E_genome_E*)>
+
+
+<!ELEMENT Rs_bioSource_E_genome_E (#PCDATA)>
+
+<!--
+										unknown (0) ,
+										natural (1) ,                    normal biological entity
+										natmut (2) ,                    naturally occurring mutant
+										mut (3) ,                        artificially mutagenized
+										artificial (4) ,                 artificially engineered
+										synthetic (5) ,                 purely synthetic
+										other (255)
+-->
+<!ELEMENT Rs_bioSource_E_origin (Rs_bioSource_E_origin_E*)>
+
+
+<!ELEMENT Rs_bioSource_E_origin_E (#PCDATA)>
+
+<!ELEMENT Rs_frequency (Rs_frequency_E*)>
+
+
+<!ELEMENT Rs_frequency_E (
+        Rs_frequency_E_attlist, 
+        Rs_frequency_E_frequency)>
+
+<!ELEMENT Rs_frequency_E_attlist (
+        Rs_frequency_E_attlist_freq?, 
+        Rs_frequency_E_attlist_allele?, 
+        Rs_frequency_E_attlist_popId?, 
+        Rs_frequency_E_attlist_sampleSize?)>
+
+<!ELEMENT Rs_frequency_E_attlist_freq (%REAL;)>
+
+<!ELEMENT Rs_frequency_E_attlist_allele (#PCDATA)>
+
+<!--dbSNP Populaton ID -->
+<!ELEMENT Rs_frequency_E_attlist_popId (%INTEGER;)>
+
+<!ELEMENT Rs_frequency_E_attlist_sampleSize (%INTEGER;)>
+
+<!ELEMENT Rs_frequency_E_frequency EMPTY>
+
+<!--link data for another resource -->
+<!ELEMENT RsLinkout (
+        RsLinkout_attlist, 
+        RsLinkout_rsLinkout)>
+
+<!ELEMENT RsLinkout_attlist (
+        RsLinkout_attlist_resourceId, 
+        RsLinkout_attlist_linkValue)>
+
+<!--BaseURLList.url_id -->
+<!ELEMENT RsLinkout_attlist_resourceId (#PCDATA)>
+
+<!--
+value to append to ResourceURL.base-url for complete
+						link
+-->
+<!ELEMENT RsLinkout_attlist_linkValue (#PCDATA)>
+
+<!--link data for another resource -->
+<!ELEMENT RsLinkout_rsLinkout EMPTY>
+
+<!--structure information for SNP -->
+<!ELEMENT RsStruct (
+        RsStruct_attlist, 
+        RsStruct_rsStruct)>
+
+<!ELEMENT RsStruct_attlist (
+        RsStruct_attlist_protAcc?, 
+        RsStruct_attlist_protGi?, 
+        RsStruct_attlist_protLoc?, 
+        RsStruct_attlist_protResidue?, 
+        RsStruct_attlist_rsResidue?, 
+        RsStruct_attlist_structGi?, 
+        RsStruct_attlist_structLoc?, 
+        RsStruct_attlist_structResidue?)>
+
+<!--accession of the protein with variation -->
+<!ELEMENT RsStruct_attlist_protAcc (#PCDATA)>
+
+<!--GI of the protein with variation -->
+<!ELEMENT RsStruct_attlist_protGi (%INTEGER;)>
+
+<!--
+position of the residue for the protein
+						GI
+-->
+<!ELEMENT RsStruct_attlist_protLoc (%INTEGER;)>
+
+<!--
+residue specified for protein at prot-loc
+						location
+-->
+<!ELEMENT RsStruct_attlist_protResidue (#PCDATA)>
+
+<!--
+alternative residue specified by variation
+						sequence
+-->
+<!ELEMENT RsStruct_attlist_rsResidue (#PCDATA)>
+
+<!--GI of the structure neighbor -->
+<!ELEMENT RsStruct_attlist_structGi (%INTEGER;)>
+
+<!--
+position of the residue for the structure
+						GI
+-->
+<!ELEMENT RsStruct_attlist_structLoc (%INTEGER;)>
+
+<!--
+residue specified for protein at struct-loc
+						location
+-->
+<!ELEMENT RsStruct_attlist_structResidue (#PCDATA)>
+
+<!--structure information for SNP -->
+<!ELEMENT RsStruct_rsStruct EMPTY>
+
+<!--data for an individual submission to dbSNP -->
+<!ELEMENT Ss (
+        Ss_attlist, 
+        Ss_sequence)>
+
+<!ELEMENT Ss_attlist (
+        Ss_attlist_ssId, 
+        Ss_attlist_handle, 
+        Ss_attlist_batchId, 
+        Ss_attlist_locSnpId?, 
+        Ss_attlist_subSnpClass?, 
+        Ss_attlist_orient?, 
+        Ss_attlist_strand?, 
+        Ss_attlist_molType?, 
+        Ss_attlist_buildId?, 
+        Ss_attlist_methodClass?, 
+        Ss_attlist_validated?, 
+        Ss_attlist_linkoutUrl?, 
+        Ss_attlist_ssAlias?, 
+        Ss_attlist_alleleOrigin?, 
+        Ss_attlist_clinicalSignificance?)>
+
+<!--dbSNP accession number for submission -->
+<!ELEMENT Ss_attlist_ssId (%INTEGER;)>
+
+<!--Tag for the submitting laboratory -->
+<!ELEMENT Ss_attlist_handle (#PCDATA)>
+
+<!--dbSNP number for batch submission -->
+<!ELEMENT Ss_attlist_batchId (%INTEGER;)>
+
+<!--submission (ss#) submitter ID -->
+<!ELEMENT Ss_attlist_locSnpId (#PCDATA)>
+<!--
+SubSNP classification by type of
+						variation
+-->
+<!ELEMENT Ss_attlist_subSnpClass %ENUM;>
+<!ATTLIST Ss_attlist_subSnpClass value (
+        snp |
+        in-del |
+        heterozygous |
+        microsatellite |
+        named-locus |
+        no-variation |
+        mixed |
+        multinucleotide-polymorphism
+        ) #REQUIRED >
+
+<!--
+orientation of refsnp cluster members to refsnp cluster
+						sequence
+-->
+<!ELEMENT Ss_attlist_orient %ENUM;>
+<!ATTLIST Ss_attlist_orient value (
+        forward |
+        reverse
+        ) #REQUIRED >
+
+<!--
+strand is defined as TOP/BOTTOM by nature of flanking
+						nucleotide sequence
+-->
+<!ELEMENT Ss_attlist_strand %ENUM;>
+<!ATTLIST Ss_attlist_strand value (
+        top |
+        bottom
+        ) #REQUIRED >
+
+
+<!--moltype from Batch table -->
+<!ELEMENT Ss_attlist_molType %ENUM;>
+<!ATTLIST Ss_attlist_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro |
+        unknown
+        ) #REQUIRED >
+
+
+<!--
+dbSNP build number when ss# was added to a refSNP (rs#)
+						cluster
+-->
+<!ELEMENT Ss_attlist_buildId (%INTEGER;)>
+<!--
+class of method used to assay for the
+						variation
+-->
+<!ELEMENT Ss_attlist_methodClass %ENUM;>
+<!ATTLIST Ss_attlist_methodClass value (
+        dHPLC |
+        hybridize |
+        computed |
+        sSCP |
+        other |
+        unknown |
+        rFLP |
+        sequence
+        ) #REQUIRED >
+
+<!--
+subsnp has been experimentally validated by
+									submitter
+-->
+<!ELEMENT Ss_attlist_validated %ENUM;>
+<!ATTLIST Ss_attlist_validated value (
+        by-submitter |
+        by-frequency |
+        by-cluster
+        ) #REQUIRED >
+
+
+<!--
+append loc-snp-id to this base URL to construct a pointer to
+						submitter data.
+-->
+<!ELEMENT Ss_attlist_linkoutUrl (#PCDATA)>
+
+<!ELEMENT Ss_attlist_ssAlias (#PCDATA)>
+
+<!--
+				<xsd:simpleType>
+					<xsd:restriction base="xsd:string">
+						<xsd:enumeration value="unknown"/>
+						<xsd:enumeration value="germline"/>
+						<xsd:enumeration value="somatic"/>
+						<xsd:enumeration value="inherited"/>
+						<xsd:enumeration value="paternal"/>
+						<xsd:enumeration value="maternal"/>
+						<xsd:enumeration value="de-novo"/>
+						<xsd:enumeration value="other"/>
+					</xsd:restriction>
+				</xsd:simpleType>
+-->
+<!ELEMENT Ss_attlist_alleleOrigin (%INTEGER;)>
+
+<!--
+				<xsd:simpleType>
+					<xsd:restriction base="xsd:string">
+						<xsd:enumeration value="unknown"/>
+						<xsd:enumeration value="untested"/>
+						<xsd:enumeration value="non-pathogenic"/>
+						<xsd:enumeration value="probable-non-pathogenic"/>
+						<xsd:enumeration value="probable-pathogenic"/>
+						<xsd:enumeration value="pathogenic"/>
+						<xsd:enumeration value="other"/>
+					</xsd:restriction>
+				</xsd:simpleType>
+-->
+<!ELEMENT Ss_attlist_clinicalSignificance (#PCDATA)>
+
+<!ELEMENT Ss_sequence (
+        Ss_sequence_seq5?, 
+        Ss_sequence_observed, 
+        Ss_sequence_seq3?)>
+
+<!--
+5' sequence that flanks the
+										variation
+-->
+<!ELEMENT Ss_sequence_seq5 (#PCDATA)>
+
+<!--
+list of all nucleotide alleles observed in
+										ss-list members, correcting for reverse complementation of
+										memebers reported in reverse orientation
+-->
+<!ELEMENT Ss_sequence_observed (#PCDATA)>
+
+<!--
+3' sequence that flanks the
+										variation
+-->
+<!ELEMENT Ss_sequence_seq3 (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/EMBL_General.dtd b/code/lib/Bio/Entrez/DTDs/EMBL_General.dtd
new file mode 100644
index 0000000..267f9e1
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/EMBL_General.dtd
@@ -0,0 +1,20 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqblock.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- EMBL_General.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/EMBL_General.mod.dtd b/code/lib/Bio/Entrez/DTDs/EMBL_General.mod.dtd
new file mode 100644
index 0000000..1f6f6fa
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/EMBL_General.mod.dtd
@@ -0,0 +1,133 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqblock.asn"
+     ::DATATOOL:: by application DATATOOL version 1.8.1
+     ::DATATOOL:: on 01/18/2007 23:07:18
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "EMBL-General"
+================================================= -->
+
+<!--
+$Revision: 6.0 $
+*********************************************************************
+
+ 1990 - J.Ostell
+ Version 3.0 - June 1994
+
+*********************************************************************
+*********************************************************************
+
+  EMBL specific data
+  This block of specifications was developed by Reiner Fuchs of EMBL
+  Updated by J.Ostell, 1994
+
+*********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          EMBL-dbname,
+          EMBL-xref,
+          EMBL-block -->
+
+<!-- Elements referenced from other modules:
+          Date,
+          Object-id FROM NCBI-General -->
+<!-- ============================================ -->
+
+
+<!ELEMENT EMBL-dbname (
+        EMBL-dbname_code | 
+        EMBL-dbname_name)>
+
+<!ELEMENT EMBL-dbname_code %ENUM;>
+<!ATTLIST EMBL-dbname_code value (
+        embl |
+        genbank |
+        ddbj |
+        geninfo |
+        medline |
+        swissprot |
+        pir |
+        pdb |
+        epd |
+        ecd |
+        tfd |
+        flybase |
+        prosite |
+        enzyme |
+        mim |
+        ecoseq |
+        hiv |
+        other
+        ) #REQUIRED >
+
+
+<!ELEMENT EMBL-dbname_name (#PCDATA)>
+
+
+<!ELEMENT EMBL-xref (
+        EMBL-xref_dbname, 
+        EMBL-xref_id)>
+
+<!ELEMENT EMBL-xref_dbname (EMBL-dbname)>
+
+<!ELEMENT EMBL-xref_id (Object-id*)>
+
+
+<!ELEMENT EMBL-block (
+        EMBL-block_class?, 
+        EMBL-block_div?, 
+        EMBL-block_creation-date, 
+        EMBL-block_update-date, 
+        EMBL-block_extra-acc?, 
+        EMBL-block_keywords?, 
+        EMBL-block_xref?)>
+
+<!ELEMENT EMBL-block_class %ENUM;>
+<!ATTLIST EMBL-block_class value (
+        not-set |
+        standard |
+        unannotated |
+        other
+        ) #REQUIRED >
+
+
+<!ELEMENT EMBL-block_div %ENUM;>
+<!ATTLIST EMBL-block_div value (
+        fun |
+        inv |
+        mam |
+        org |
+        phg |
+        pln |
+        pri |
+        pro |
+        rod |
+        syn |
+        una |
+        vrl |
+        vrt |
+        pat |
+        est |
+        sts |
+        other
+        ) #REQUIRED >
+
+
+<!ELEMENT EMBL-block_creation-date (Date)>
+
+<!ELEMENT EMBL-block_update-date (Date)>
+
+<!ELEMENT EMBL-block_extra-acc (EMBL-block_extra-acc_E*)>
+
+
+<!ELEMENT EMBL-block_extra-acc_E (#PCDATA)>
+
+<!ELEMENT EMBL-block_keywords (EMBL-block_keywords_E*)>
+
+
+<!ELEMENT EMBL-block_keywords_E (#PCDATA)>
+
+<!ELEMENT EMBL-block_xref (EMBL-xref*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/GenBank_General.dtd b/code/lib/Bio/Entrez/DTDs/GenBank_General.dtd
new file mode 100644
index 0000000..c8707a9
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/GenBank_General.dtd
@@ -0,0 +1,20 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqblock.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- GenBank_General.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/GenBank_General.mod.dtd b/code/lib/Bio/Entrez/DTDs/GenBank_General.mod.dtd
new file mode 100644
index 0000000..0cba454
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/GenBank_General.mod.dtd
@@ -0,0 +1,65 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqblock.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "GenBank-General"
+================================================= -->
+
+<!--
+*********************************************************************
+
+  GenBank specific data
+  This block of specifications was developed by Jim Ostell of
+      NCBI
+
+*********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          GB-block -->
+
+<!-- Elements referenced from other modules:
+          Date FROM NCBI-General -->
+<!-- ============================================ -->
+
+<!-- GenBank specific descriptions -->
+<!ELEMENT GB-block (
+        GB-block_extra-accessions?, 
+        GB-block_source?, 
+        GB-block_keywords?, 
+        GB-block_origin?, 
+        GB-block_date?, 
+        GB-block_entry-date?, 
+        GB-block_div?, 
+        GB-block_taxonomy?)>
+
+<!ELEMENT GB-block_extra-accessions (GB-block_extra-accessions_E*)>
+
+
+<!ELEMENT GB-block_extra-accessions_E (#PCDATA)>
+
+<!-- source line -->
+<!ELEMENT GB-block_source (#PCDATA)>
+
+<!ELEMENT GB-block_keywords (GB-block_keywords_E*)>
+
+
+<!ELEMENT GB-block_keywords_E (#PCDATA)>
+
+<!ELEMENT GB-block_origin (#PCDATA)>
+
+<!-- OBSOLETE old form Entry Date -->
+<!ELEMENT GB-block_date (#PCDATA)>
+
+<!-- replaces date -->
+<!ELEMENT GB-block_entry-date (Date)>
+
+<!-- GenBank division -->
+<!ELEMENT GB-block_div (#PCDATA)>
+
+<!-- continuation line of organism -->
+<!ELEMENT GB-block_taxonomy (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/HomoloGene.dtd b/code/lib/Bio/Entrez/DTDs/HomoloGene.dtd
new file mode 100644
index 0000000..82262e4
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/HomoloGene.dtd
@@ -0,0 +1,89 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "homologene.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- HomoloGene.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % HomoloGene_module PUBLIC "-//NCBI//HomoloGene Module//EN" "HomoloGene.mod.dtd">
+%HomoloGene_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/HomoloGene.mod.dtd b/code/lib/Bio/Entrez/DTDs/HomoloGene.mod.dtd
new file mode 100644
index 0000000..c88a5cd
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/HomoloGene.mod.dtd
@@ -0,0 +1,293 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "homologene.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "HomoloGene"
+================================================= -->
+
+
+<!-- Elements referenced from other modules:
+          Date FROM NCBI-General,
+          Seq-loc FROM NCBI-Seqloc,
+          Seq-align FROM NCBI-Seqalign -->
+<!-- ============================================ -->
+
+<!--
+ HomoloGeneEntry taxid is the tax id of the group node, which can
+ be the same as the Gene tax id in case of singletons
+-->
+<!ELEMENT HG-EntrySet (
+        HG-EntrySet_entries)>
+
+<!-- homologene entry -->
+<!ELEMENT HG-EntrySet_entries (HG-Entry*)>
+
+
+<!ELEMENT HG-Entry (
+        HG-Entry_hg-id, 
+        HG-Entry_version?, 
+        HG-Entry_title?, 
+        HG-Entry_caption?, 
+        HG-Entry_taxid?, 
+        HG-Entry_genes?, 
+        HG-Entry_cr-date?, 
+        HG-Entry_up-date?, 
+        HG-Entry_distances?, 
+        HG-Entry_commentaries?, 
+        HG-Entry_warnings?, 
+        HG-Entry_node?)>
+
+<!ELEMENT HG-Entry_hg-id (%INTEGER;)>
+
+<!ELEMENT HG-Entry_version (%INTEGER;)>
+
+<!ELEMENT HG-Entry_title (#PCDATA)>
+
+<!ELEMENT HG-Entry_caption (#PCDATA)>
+
+<!ELEMENT HG-Entry_taxid (%INTEGER;)>
+
+<!ELEMENT HG-Entry_genes (HG-Gene*)>
+
+<!ELEMENT HG-Entry_cr-date (Date)>
+
+<!ELEMENT HG-Entry_up-date (Date)>
+
+<!ELEMENT HG-Entry_distances (HG-Stats*)>
+
+<!ELEMENT HG-Entry_commentaries (HG-CommentarySet*)>
+
+<!ELEMENT HG-Entry_warnings (HG-Entry_warnings_E*)>
+
+
+<!ELEMENT HG-Entry_warnings_E (#PCDATA)>
+
+<!ELEMENT HG-Entry_node (HG-Node)>
+
+
+<!ELEMENT HG-Gene (
+        HG-Gene_geneid, 
+        HG-Gene_otherid?, 
+        HG-Gene_symbol?, 
+        HG-Gene_aliases?, 
+        HG-Gene_title, 
+        HG-Gene_taxid, 
+        HG-Gene_prot-gi?, 
+        HG-Gene_prot-acc?, 
+        HG-Gene_prot-len?, 
+        HG-Gene_nuc-gi?, 
+        HG-Gene_nuc-acc?, 
+        HG-Gene_gene-links?, 
+        HG-Gene_prot-links?, 
+        HG-Gene_domains?, 
+        HG-Gene_chr?, 
+        HG-Gene_location?, 
+        HG-Gene_locus-tag?)>
+
+<!ELEMENT HG-Gene_geneid (%INTEGER;)>
+
+<!-- internal use only!!!!! -->
+<!ELEMENT HG-Gene_otherid (%INTEGER;)>
+
+<!ELEMENT HG-Gene_symbol (#PCDATA)>
+
+<!ELEMENT HG-Gene_aliases (HG-Gene_aliases_E*)>
+
+
+<!ELEMENT HG-Gene_aliases_E (#PCDATA)>
+
+<!ELEMENT HG-Gene_title (#PCDATA)>
+
+<!--taxid of gene node -->
+<!ELEMENT HG-Gene_taxid (%INTEGER;)>
+
+<!ELEMENT HG-Gene_prot-gi (%INTEGER;)>
+
+<!ELEMENT HG-Gene_prot-acc (#PCDATA)>
+
+<!ELEMENT HG-Gene_prot-len (%INTEGER;)>
+
+<!ELEMENT HG-Gene_nuc-gi (%INTEGER;)>
+
+<!ELEMENT HG-Gene_nuc-acc (#PCDATA)>
+
+<!ELEMENT HG-Gene_gene-links (HG-Link*)>
+
+<!ELEMENT HG-Gene_prot-links (HG-Link*)>
+
+<!ELEMENT HG-Gene_domains (HG-Domain*)>
+
+<!ELEMENT HG-Gene_chr (#PCDATA)>
+
+<!-- location on the genome -->
+<!ELEMENT HG-Gene_location (Seq-loc)>
+
+<!ELEMENT HG-Gene_locus-tag (#PCDATA)>
+
+
+<!ELEMENT HG-Stats (
+        HG-Stats_gi1, 
+        HG-Stats_gi2, 
+        HG-Stats_nuc-change, 
+        HG-Stats_nuc-change-jc, 
+        HG-Stats_prot-change, 
+        HG-Stats_ka, 
+        HG-Stats_ks, 
+        HG-Stats_knr, 
+        HG-Stats_knc, 
+        HG-Stats_recip-best?)>
+
+<!ELEMENT HG-Stats_gi1 (%INTEGER;)>
+
+<!ELEMENT HG-Stats_gi2 (%INTEGER;)>
+
+<!ELEMENT HG-Stats_nuc-change (%REAL;)>
+
+<!ELEMENT HG-Stats_nuc-change-jc (%REAL;)>
+
+<!ELEMENT HG-Stats_prot-change (%REAL;)>
+
+<!ELEMENT HG-Stats_ka (%REAL;)>
+
+<!ELEMENT HG-Stats_ks (%REAL;)>
+
+<!ELEMENT HG-Stats_knr (%REAL;)>
+
+<!ELEMENT HG-Stats_knc (%REAL;)>
+
+<!ELEMENT HG-Stats_recip-best EMPTY>
+<!ATTLIST HG-Stats_recip-best value ( true | false ) #REQUIRED >
+
+
+
+<!ELEMENT HG-Commentary (
+        HG-Commentary_link, 
+        HG-Commentary_description?, 
+        HG-Commentary_caption?, 
+        HG-Commentary_provider?, 
+        HG-Commentary_other-links?, 
+        HG-Commentary_other-commentaries?, 
+        HG-Commentary_taxid?, 
+        HG-Commentary_geneid?)>
+
+<!ELEMENT HG-Commentary_link (HG-Link)>
+
+<!-- main description -->
+<!ELEMENT HG-Commentary_description (#PCDATA)>
+
+<!-- extra text -->
+<!ELEMENT HG-Commentary_caption (#PCDATA)>
+
+<!ELEMENT HG-Commentary_provider (#PCDATA)>
+
+<!ELEMENT HG-Commentary_other-links (HG-Link*)>
+
+<!ELEMENT HG-Commentary_other-commentaries (HG-Commentary*)>
+
+<!ELEMENT HG-Commentary_taxid (%INTEGER;)>
+
+<!ELEMENT HG-Commentary_geneid (%INTEGER;)>
+
+
+<!ELEMENT HG-CommentarySet (
+        HG-CommentarySet_hg-id?, 
+        HG-CommentarySet_title, 
+        HG-CommentarySet_commentaries)>
+
+<!ELEMENT HG-CommentarySet_hg-id (%INTEGER;)>
+
+<!ELEMENT HG-CommentarySet_title (#PCDATA)>
+
+<!ELEMENT HG-CommentarySet_commentaries (HG-Commentary*)>
+
+
+<!ELEMENT HG-CommentaryContainer (HG-CommentarySet*)>
+
+
+<!ELEMENT HG-Link (
+        HG-Link_hypertext, 
+        HG-Link_url?)>
+
+<!ELEMENT HG-Link_hypertext (#PCDATA)>
+
+<!ELEMENT HG-Link_url (#PCDATA)>
+
+
+<!ELEMENT HG-Domain (
+        HG-Domain_begin, 
+        HG-Domain_end, 
+        HG-Domain_pssm-id?, 
+        HG-Domain_cdd-id?, 
+        HG-Domain_cdd-name?)>
+
+<!ELEMENT HG-Domain_begin (%INTEGER;)>
+
+<!ELEMENT HG-Domain_end (%INTEGER;)>
+
+<!-- entrez uid -->
+<!ELEMENT HG-Domain_pssm-id (%INTEGER;)>
+
+<!ELEMENT HG-Domain_cdd-id (#PCDATA)>
+
+<!ELEMENT HG-Domain_cdd-name (#PCDATA)>
+
+
+<!ELEMENT HG-Node (
+        HG-Node_type, 
+        HG-Node_id, 
+        HG-Node_caption?, 
+        HG-Node_current-node?, 
+        HG-Node_children?, 
+        HG-Node_branch-len?)>
+
+<!ELEMENT HG-Node_type %ENUM;>
+<!ATTLIST HG-Node_type value (
+        family |
+        ortholog |
+        paralog |
+        leaf
+        ) #REQUIRED >
+
+
+<!ELEMENT HG-Node_id (HG-Node-id)>
+
+<!ELEMENT HG-Node_caption (#PCDATA)>
+
+<!ELEMENT HG-Node_current-node EMPTY>
+<!ATTLIST HG-Node_current-node value ( true | false ) "false" >
+
+
+<!ELEMENT HG-Node_children (HG-Node*)>
+
+<!ELEMENT HG-Node_branch-len (%INTEGER;)>
+
+
+<!ELEMENT HG-Node-id (
+        HG-Node-id_id?, 
+        HG-Node-id_id-type)>
+
+<!ELEMENT HG-Node-id_id (%INTEGER;)>
+
+<!ELEMENT HG-Node-id_id-type %ENUM;>
+<!ATTLIST HG-Node-id_id-type value (
+        none |
+        geneid |
+        hid
+        ) #REQUIRED >
+
+
+
+<!ELEMENT HG-Alignment (
+        HG-Alignment_hg-id, 
+        HG-Alignment_alignment)>
+
+<!ELEMENT HG-Alignment_hg-id (%INTEGER;)>
+
+<!ELEMENT HG-Alignment_alignment (Seq-align)>
+
+
+<!ELEMENT HG-AlignmentSet (HG-Alignment*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/INSD_INSDSeq.dtd b/code/lib/Bio/Entrez/DTDs/INSD_INSDSeq.dtd
new file mode 100644
index 0000000..fdf3b96
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/INSD_INSDSeq.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "insdseq.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- INSD_INSDSeq.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % INSD_INSDSeq_module PUBLIC "-//NCBI//INSD INSDSeq Module//EN" "INSD_INSDSeq.mod.dtd">
+%INSD_INSDSeq_module;
diff --git a/code/lib/Bio/Entrez/DTDs/INSD_INSDSeq.mod.dtd b/code/lib/Bio/Entrez/DTDs/INSD_INSDSeq.mod.dtd
new file mode 100644
index 0000000..308423d
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/INSD_INSDSeq.mod.dtd
@@ -0,0 +1,491 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "insdseq.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 08/02/2010 23:05:14
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "INSD-INSDSeq"
+================================================= -->
+
+<!--
+$Revision: 192674 $
+************************************************************************
+
+ ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
+ The International Nucleotide Sequence Database (INSD) collaboration
+ Version 1.6, 25 May 2010
+
+************************************************************************
+-->
+
+<!--
+  INSDSeq provides the elements of a sequence as presented in the
+    GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
+    additional structure.
+    Although this single perspective of the three flatfile formats
+    provides a useful simplification, it hides to some extent the
+    details of the actual data underlying those formats. Nevertheless,
+    the XML version of INSD-Seq is being provided with
+    the hopes that it will prove useful to those who bulk-process
+    sequence data at the flatfile-format level of detail. Further 
+    documentation regarding the content and conventions of those formats 
+    can be found at:
+
+    URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
+    http://www.ddbj.nig.ac.jp/FT/full_index.html
+    http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
+    http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
+
+    URLs for DDBJ, EMBL, and GenBank Release Notes :
+    ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt
+    http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
+    ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
+
+    Because INSDSeq is a compromise, a number of pragmatic decisions have
+    been made:
+
+  In pursuit of simplicity and familiarity a number of fields do not
+    have full substructure defined here where there is already a
+    standard flatfile format string. For example:
+
+   Dates:      DD-MON-YYYY (eg 10-JUN-2003)
+
+   Author:     LastName, Initials  (eg Smith, J.N.)
+            or Lastname Initials   (eg Smith J.N.)
+
+   Journal:    JournalName Volume (issue), page-range (year)
+            or JournalName Volume(issue):page-range(year)
+            eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
+               Appl. Environ. Microbiol. 61(4):1646-1648(1995).
+
+  FeatureLocations are representated as in the flatfile feature table,
+    but FeatureIntervals may also be provided as a convenience
+
+  FeatureQualifiers are represented as in the flatfile feature table.
+
+  Primary has a string that represents a table to construct
+    a third party (TPA) sequence.
+
+  other-seqids can have strings with the "vertical bar format" sequence
+    identifiers used in BLAST for example, when they are non-INSD types.
+
+  Currently in flatfile format you only see Accession numbers, but there 
+    are others, like patents, submitter clone names, etc which will 
+    appear here
+
+  There are also a number of elements that could have been more exactly
+    specified, but in the interest of simplicity have been simply left as
+    optional. For example:
+
+  All publicly accessible sequence records in INSDSeq format will
+    include accession and accession.version. However, these elements are 
+    optional in optional in INSDSeq so that this format can also be used   
+    for non-public sequence data, prior to the assignment of accessions and 
+    version numbers. In such cases, records will have only "other-seqids".
+
+  sequences will normally all have "sequence" filled in. But contig records
+    will have a "join" statement in the "contig" slot, and no "sequence".
+    We also may consider a retrieval option with no sequence of any kind
+    and no feature table to quickly check minimal values.
+
+  Four (optional) elements are specific to records represented via the EMBL
+    sequence database: INSDSeq_update-release, INSDSeq_create-release,
+    INSDSeq_entry-version, and INSDSeq_database-reference.
+
+  One (optional) element is specific to records originating at the GenBank
+    and DDBJ sequence databases: INSDSeq_segment.
+
+********
+-->
+<!ELEMENT INSDSet (INSDSeq*)>
+
+
+<!ELEMENT INSDSeq (
+        INSDSeq_locus, 
+        INSDSeq_length, 
+        INSDSeq_strandedness?, 
+        INSDSeq_moltype, 
+        INSDSeq_topology?, 
+        INSDSeq_division, 
+        INSDSeq_update-date, 
+        INSDSeq_create-date?, 
+        INSDSeq_update-release?, 
+        INSDSeq_create-release?, 
+        INSDSeq_definition, 
+        INSDSeq_primary-accession?, 
+        INSDSeq_entry-version?, 
+        INSDSeq_accession-version?, 
+        INSDSeq_other-seqids?, 
+        INSDSeq_secondary-accessions?, 
+        INSDSeq_project?, 
+        INSDSeq_keywords?, 
+        INSDSeq_segment?, 
+        INSDSeq_source?, 
+        INSDSeq_organism?, 
+        INSDSeq_taxonomy?, 
+        INSDSeq_references?, 
+        INSDSeq_comment?, 
+        INSDSeq_comment-set?, 
+        INSDSeq_struc-comments?, 
+        INSDSeq_primary?, 
+        INSDSeq_source-db?, 
+        INSDSeq_database-reference?, 
+        INSDSeq_feature-table?, 
+        INSDSeq_feature-set?, 
+        INSDSeq_sequence?, 
+        INSDSeq_contig?, 
+        INSDSeq_alt-seq?)>
+
+<!ELEMENT INSDSeq_locus (#PCDATA)>
+
+<!ELEMENT INSDSeq_length (%INTEGER;)>
+
+<!ELEMENT INSDSeq_strandedness (#PCDATA)>
+
+<!ELEMENT INSDSeq_moltype (#PCDATA)>
+
+<!ELEMENT INSDSeq_topology (#PCDATA)>
+
+<!ELEMENT INSDSeq_division (#PCDATA)>
+
+<!ELEMENT INSDSeq_update-date (#PCDATA)>
+
+<!ELEMENT INSDSeq_create-date (#PCDATA)>
+
+<!ELEMENT INSDSeq_update-release (#PCDATA)>
+
+<!ELEMENT INSDSeq_create-release (#PCDATA)>
+
+<!ELEMENT INSDSeq_definition (#PCDATA)>
+
+<!ELEMENT INSDSeq_primary-accession (#PCDATA)>
+
+<!ELEMENT INSDSeq_entry-version (#PCDATA)>
+
+<!ELEMENT INSDSeq_accession-version (#PCDATA)>
+
+<!ELEMENT INSDSeq_other-seqids (INSDSeqid*)>
+
+<!ELEMENT INSDSeq_secondary-accessions (INSDSecondary-accn*)>
+
+<!ELEMENT INSDSeq_project (#PCDATA)>
+
+<!ELEMENT INSDSeq_keywords (INSDKeyword*)>
+
+<!ELEMENT INSDSeq_segment (#PCDATA)>
+
+<!ELEMENT INSDSeq_source (#PCDATA)>
+
+<!ELEMENT INSDSeq_organism (#PCDATA)>
+
+<!ELEMENT INSDSeq_taxonomy (#PCDATA)>
+
+<!ELEMENT INSDSeq_references (INSDReference*)>
+
+<!ELEMENT INSDSeq_comment (#PCDATA)>
+
+<!ELEMENT INSDSeq_comment-set (INSDComment*)>
+
+<!ELEMENT INSDSeq_struc-comments (INSDStrucComment*)>
+
+<!ELEMENT INSDSeq_primary (#PCDATA)>
+
+<!ELEMENT INSDSeq_source-db (#PCDATA)>
+
+<!ELEMENT INSDSeq_database-reference (#PCDATA)>
+
+<!ELEMENT INSDSeq_feature-table (INSDFeature*)>
+
+<!ELEMENT INSDSeq_feature-set (INSDFeatureSet*)>
+
+<!-- Optional for contig, wgs, etc. -->
+<!ELEMENT INSDSeq_sequence (#PCDATA)>
+
+<!ELEMENT INSDSeq_contig (#PCDATA)>
+
+<!ELEMENT INSDSeq_alt-seq (INSDAltSeqData*)>
+
+
+<!ELEMENT INSDSeqid (#PCDATA)>
+
+
+<!ELEMENT INSDSecondary-accn (#PCDATA)>
+
+
+<!ELEMENT INSDKeyword (#PCDATA)>
+
+<!--
+ INSDReference_position contains a string value indicating the
+ basepair span(s) to which a reference applies. The allowable
+ formats are:
+ 
+   X..Y  : Where X and Y are integers separated by two periods,
+           X >= 1 , Y <= sequence length, and X <= Y 
+
+           Multiple basepair spans can exist, separated by a
+           semi-colon and a space. For example : 10..20; 100..500
+             
+   sites : The string literal 'sites', indicating that a reference
+           provides sequence annotation information, but the specific
+           basepair spans are either not captured, or were too numerous
+           to record.
+ 
+           The 'sites' literal string is singly occuring, and
+            cannot be used in conjunction with any X..Y basepair spans.
+ 
+   References that lack an INSDReference_position element apply
+   to the entire sequence.
+-->
+<!ELEMENT INSDReference (
+        INSDReference_reference, 
+        INSDReference_position?, 
+        INSDReference_authors?, 
+        INSDReference_consortium?, 
+        INSDReference_title?, 
+        INSDReference_journal, 
+        INSDReference_xref?, 
+        INSDReference_pubmed?, 
+        INSDReference_remark?)>
+
+<!ELEMENT INSDReference_reference (#PCDATA)>
+
+<!ELEMENT INSDReference_position (#PCDATA)>
+
+<!ELEMENT INSDReference_authors (INSDAuthor*)>
+
+<!ELEMENT INSDReference_consortium (#PCDATA)>
+
+<!ELEMENT INSDReference_title (#PCDATA)>
+
+<!ELEMENT INSDReference_journal (#PCDATA)>
+
+<!ELEMENT INSDReference_xref (INSDXref*)>
+
+<!ELEMENT INSDReference_pubmed (%INTEGER;)>
+
+<!ELEMENT INSDReference_remark (#PCDATA)>
+
+
+<!ELEMENT INSDAuthor (#PCDATA)>
+
+<!--
+ INSDXref provides a method for referring to records in
+ other databases. INSDXref_dbname is a string value that
+ provides the name of the database, and INSDXref_dbname
+ is a string value that provides the record's identifier
+ in that database.
+-->
+<!ELEMENT INSDXref (
+        INSDXref_dbname, 
+        INSDXref_id)>
+
+<!ELEMENT INSDXref_dbname (#PCDATA)>
+
+<!ELEMENT INSDXref_id (#PCDATA)>
+
+
+<!ELEMENT INSDComment (
+        INSDComment_type?, 
+        INSDComment_paragraphs)>
+
+<!ELEMENT INSDComment_type (#PCDATA)>
+
+<!ELEMENT INSDComment_paragraphs (INSDCommentParagraph*)>
+
+
+<!ELEMENT INSDCommentParagraph (
+        INSDCommentParagraph_items)>
+
+<!ELEMENT INSDCommentParagraph_items (INSDCommentItem*)>
+
+
+<!ELEMENT INSDCommentItem (
+        INSDCommentItem_value?, 
+        INSDCommentItem_url?)>
+
+<!ELEMENT INSDCommentItem_value (#PCDATA)>
+
+<!ELEMENT INSDCommentItem_url (#PCDATA)>
+
+
+<!ELEMENT INSDStrucComment (
+        INSDStrucComment_name?, 
+        INSDStrucComment_items)>
+
+<!ELEMENT INSDStrucComment_name (#PCDATA)>
+
+<!ELEMENT INSDStrucComment_items (INSDStrucCommentItem*)>
+
+
+<!ELEMENT INSDStrucCommentItem (
+        INSDStrucCommentItem_tag?, 
+        INSDStrucCommentItem_value?, 
+        INSDStrucCommentItem_url?)>
+
+<!ELEMENT INSDStrucCommentItem_tag (#PCDATA)>
+
+<!ELEMENT INSDStrucCommentItem_value (#PCDATA)>
+
+<!ELEMENT INSDStrucCommentItem_url (#PCDATA)>
+
+<!--
+ INSDFeature_operator contains a string value describing
+ the relationship among a set of INSDInterval within
+ INSDFeature_intervals. The allowable formats are:
+ 
+   join :  The string literal 'join' indicates that the
+           INSDInterval intervals are biologically joined
+           together into a contiguous molecule.
+ 
+   order : The string literal 'order' indicates that the
+           INSDInterval intervals are in the presented
+           order, but they are not necessarily contiguous.
+ 
+   Either 'join' or 'order' is required if INSDFeature_intervals
+   is comprised of more than one INSDInterval .
+-->
+<!ELEMENT INSDFeatureSet (
+        INSDFeatureSet_annot-source?, 
+        INSDFeatureSet_features)>
+
+<!ELEMENT INSDFeatureSet_annot-source (#PCDATA)>
+
+<!ELEMENT INSDFeatureSet_features (INSDFeature*)>
+
+
+<!ELEMENT INSDFeature (
+        INSDFeature_key, 
+        INSDFeature_location, 
+        INSDFeature_intervals?, 
+        INSDFeature_operator?, 
+        INSDFeature_partial5?, 
+        INSDFeature_partial3?, 
+        INSDFeature_quals?, 
+        INSDFeature_xrefs?)>
+
+<!ELEMENT INSDFeature_key (#PCDATA)>
+
+<!ELEMENT INSDFeature_location (#PCDATA)>
+
+<!ELEMENT INSDFeature_intervals (INSDInterval*)>
+
+<!ELEMENT INSDFeature_operator (#PCDATA)>
+
+<!ELEMENT INSDFeature_partial5 EMPTY>
+<!ATTLIST INSDFeature_partial5 value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT INSDFeature_partial3 EMPTY>
+<!ATTLIST INSDFeature_partial3 value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT INSDFeature_quals (INSDQualifier*)>
+
+<!ELEMENT INSDFeature_xrefs (INSDXref*)>
+
+<!--
+ INSDInterval_iscomp is a boolean indicating whether
+ an INSDInterval_from / INSDInterval_to location
+ represents a location on the complement strand.
+ When INSDInterval_iscomp is TRUE, it essentially
+ confirms that a 'from' value which is greater than
+ a 'to' value is intentional, because the location
+ is on the opposite strand of the presented sequence.
+ INSDInterval_interbp is a boolean indicating whether
+ a feature (such as a restriction site) is located
+ between two adjacent basepairs. When INSDInterval_iscomp
+ is TRUE, the 'from' and 'to' values must differ by
+ exactly one base.
+-->
+<!ELEMENT INSDInterval (
+        INSDInterval_from?, 
+        INSDInterval_to?, 
+        INSDInterval_point?, 
+        INSDInterval_iscomp?, 
+        INSDInterval_interbp?, 
+        INSDInterval_accession)>
+
+<!ELEMENT INSDInterval_from (%INTEGER;)>
+
+<!ELEMENT INSDInterval_to (%INTEGER;)>
+
+<!ELEMENT INSDInterval_point (%INTEGER;)>
+
+<!ELEMENT INSDInterval_iscomp EMPTY>
+<!ATTLIST INSDInterval_iscomp value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT INSDInterval_interbp EMPTY>
+<!ATTLIST INSDInterval_interbp value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT INSDInterval_accession (#PCDATA)>
+
+
+<!ELEMENT INSDQualifier (
+        INSDQualifier_name, 
+        INSDQualifier_value?)>
+
+<!ELEMENT INSDQualifier_name (#PCDATA)>
+
+<!ELEMENT INSDQualifier_value (#PCDATA)>
+
+
+<!ELEMENT INSDAltSeqData (
+        INSDAltSeqData_name, 
+        INSDAltSeqData_items?)>
+
+<!--
+ e.g., CON-division-join, WGS-contig-range,
+ WGS-scaffold-range, MGA/CAGE-range, genome
+-->
+<!ELEMENT INSDAltSeqData_name (#PCDATA)>
+
+<!ELEMENT INSDAltSeqData_items (INSDAltSeqItem*)>
+
+
+<!ELEMENT INSDAltSeqItem (
+        INSDAltSeqItem_interval?, 
+        INSDAltSeqItem_isgap?, 
+        INSDAltSeqItem_gap-length?, 
+        INSDAltSeqItem_gap-type?, 
+        INSDAltSeqItem_gap-linkage?, 
+        INSDAltSeqItem_gap-comment?, 
+        INSDAltSeqItem_first-accn?, 
+        INSDAltSeqItem_last-accn?, 
+        INSDAltSeqItem_value?)>
+<!--
+ INSDInterval_iscomp is a boolean indicating whether
+ an INSDInterval_from / INSDInterval_to location
+ represents a location on the complement strand.
+ When INSDInterval_iscomp is TRUE, it essentially
+ confirms that a 'from' value which is greater than
+ a 'to' value is intentional, because the location
+ is on the opposite strand of the presented sequence.
+ INSDInterval_interbp is a boolean indicating whether
+ a feature (such as a restriction site) is located
+ between two adjacent basepairs. When INSDInterval_iscomp
+ is TRUE, the 'from' and 'to' values must differ by
+ exactly one base.
+-->
+<!ELEMENT INSDAltSeqItem_interval (INSDInterval)>
+
+<!ELEMENT INSDAltSeqItem_isgap EMPTY>
+<!ATTLIST INSDAltSeqItem_isgap value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT INSDAltSeqItem_gap-length (%INTEGER;)>
+
+<!ELEMENT INSDAltSeqItem_gap-type (#PCDATA)>
+
+<!ELEMENT INSDAltSeqItem_gap-linkage (#PCDATA)>
+
+<!ELEMENT INSDAltSeqItem_gap-comment (#PCDATA)>
+
+<!ELEMENT INSDAltSeqItem_first-accn (#PCDATA)>
+
+<!ELEMENT INSDAltSeqItem_last-accn (#PCDATA)>
+
+<!ELEMENT INSDAltSeqItem_value (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/MMDB.dtd b/code/lib/Bio/Entrez/DTDs/MMDB.dtd
new file mode 100644
index 0000000..738efa9
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/MMDB.dtd
@@ -0,0 +1,98 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "mmdb1.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- MMDB.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % MMDB_module PUBLIC "-//NCBI//MMDB Module//EN" "MMDB.mod.dtd">
+%MMDB_module;
+
+<!ENTITY % MMDB_Chemical_graph_module PUBLIC "-//NCBI//MMDB Chemical graph Module//EN" "MMDB_Chemical_graph.mod.dtd">
+%MMDB_Chemical_graph_module;
+
+<!ENTITY % MMDB_Features_module PUBLIC "-//NCBI//MMDB Features Module//EN" "MMDB_Features.mod.dtd">
+%MMDB_Features_module;
+
+<!ENTITY % MMDB_Structural_model_module PUBLIC "-//NCBI//MMDB Structural model Module//EN" "MMDB_Structural_model.mod.dtd">
+%MMDB_Structural_model_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/MMDB.mod.dtd b/code/lib/Bio/Entrez/DTDs/MMDB.mod.dtd
new file mode 100644
index 0000000..8424533
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/MMDB.mod.dtd
@@ -0,0 +1,259 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "mmdb1.asn"
+     ::DATATOOL:: by application DATATOOL version 1.8.1
+     ::DATATOOL:: on 01/18/2007 23:07:18
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "MMDB"
+================================================= -->
+
+<!--
+$Revision: 6.1 $
+**********************************************************************
+
+  Biological Macromolecule 3-D Structure Data Types for MMDB,
+                A Molecular Modeling Database
+
+  Definitions for a biomolecular assembly and the MMDB database
+
+  By Hitomi Ohkawa, Jim Ostell, Chris Hogue, and Steve Bryant 
+
+  National Center for Biotechnology Information
+  National Institutes of Health
+  Bethesda, MD 20894 USA
+
+  July 1995
+
+**********************************************************************
+ Contents of the MMDB database are currently based on files distributed by
+ the Protein Data Bank, PDB.  These data are changed in form, as described
+ in this specification. To some extent they are also changed in content, in 
+ that many data items implicit in PDB are made explicit, and others are
+ corrected or omitted as a consequence of validation checks.  The semantics
+ of MMDB data items are indicated by comments within the specification below.
+ These comments explain in detail the manner in which data items from  PDB 
+ have been mapped into MMDB. 
+-->
+
+<!-- Elements used by other modules:
+          Biostruc,
+          Biostruc-id,
+          Biostruc-set,
+          Biostruc-annot-set,
+          Biostruc-residue-graph-set -->
+
+<!-- Elements referenced from other modules:
+          Biostruc-graph,
+          Biomol-descr,
+          Residue-graph FROM MMDB-Chemical-graph,
+          Biostruc-model FROM MMDB-Structural-model,
+          Biostruc-feature-set FROM MMDB-Features,
+          Pub FROM NCBI-Pub,
+          Date,
+          Object-id,
+          Dbtag FROM NCBI-General -->
+<!-- ============================================ -->
+
+<!--
+ A structure report or "biostruc" describes the components of a biomolecular 
+ assembly in terms of their names and descriptions, and a chemical graph 
+ giving atomic formula, connectivity and chirality. It also gives one or more
+ three-dimensional model structures, literally a mapping of the atoms, 
+ residues and/or molecules of each component into a measured three-
+ dimensional space. Structure may also be described by named features, which 
+ associate nodes in the chemical graph, or regions in space, with text or 
+ numeric descriptors.
+ Note that a biostruc may also contain cross references to other databases,
+ including citations to relevant scientific literature. These cross 
+ references use object types from other NCBI data specifications, which are 
+ "imported" into MMDB, and not repeated in this specification. 
+-->
+<!ELEMENT Biostruc (
+        Biostruc_id, 
+        Biostruc_descr?, 
+        Biostruc_chemical-graph, 
+        Biostruc_features?, 
+        Biostruc_model?)>
+
+<!ELEMENT Biostruc_id (Biostruc-id*)>
+
+<!ELEMENT Biostruc_descr (Biostruc-descr*)>
+
+<!ELEMENT Biostruc_chemical-graph (Biostruc-graph)>
+
+<!ELEMENT Biostruc_features (Biostruc-feature-set*)>
+
+<!ELEMENT Biostruc_model (Biostruc-model*)>
+
+<!--
+ A Biostruc-id is a collection identifiers for the molecular assembly.
+ Mmdb-id's are NCBI-assigned, and are intended to be unique and stable 
+ identifiers.  Other-id's are synonyms.
+-->
+<!ELEMENT Biostruc-id (
+        Biostruc-id_mmdb-id | 
+        Biostruc-id_other-database | 
+        Biostruc-id_local-id)>
+
+<!ELEMENT Biostruc-id_mmdb-id (Mmdb-id)>
+
+<!ELEMENT Biostruc-id_other-database (Dbtag)>
+
+<!ELEMENT Biostruc-id_local-id (Object-id)>
+
+
+<!ELEMENT Mmdb-id (%INTEGER;)>
+
+<!--
+ The description of a biostruc refers to both the reported chemical and 
+ spatial structure of a biomolecular assembly.  PDB-derived descriptors
+ which refer specifically to the chemical components or spatial structure
+ are not provided here, but instead as descriptors of the biostruc-graph or 
+ biostruc-model. For PDB-derived structures the biostruc name is the PDB 
+ id-code.  PDB-derived citations appear as publications within the biostruc 
+ description, and include a data-submission citation derived from PDB AUTHOR 
+ records.  Citations are described using the NCBI Pub specification.
+-->
+<!ELEMENT Biostruc-descr (
+        Biostruc-descr_name | 
+        Biostruc-descr_pdb-comment | 
+        Biostruc-descr_other-comment | 
+        Biostruc-descr_history | 
+        Biostruc-descr_attribution)>
+
+<!ELEMENT Biostruc-descr_name (#PCDATA)>
+
+<!ELEMENT Biostruc-descr_pdb-comment (#PCDATA)>
+
+<!ELEMENT Biostruc-descr_other-comment (#PCDATA)>
+<!--
+ The history of a biostruc indicates it's origin and it's update history
+ within MMDB, the NCBI-maintained molecular structure database.  
+-->
+<!ELEMENT Biostruc-descr_history (Biostruc-history)>
+
+<!ELEMENT Biostruc-descr_attribution (Pub)>
+
+<!--
+ The history of a biostruc indicates it's origin and it's update history
+ within MMDB, the NCBI-maintained molecular structure database.  
+-->
+<!ELEMENT Biostruc-history (
+        Biostruc-history_replaces?, 
+        Biostruc-history_replaced-by?, 
+        Biostruc-history_data-source?)>
+
+<!ELEMENT Biostruc-history_replaces (Biostruc-replace)>
+
+<!ELEMENT Biostruc-history_replaced-by (Biostruc-replace)>
+<!--
+ The origin of a biostruc is a reference to another database.  PDB release 
+ date and PDB-assigned id codes are recorded here, as are the PDB-assigned 
+ entry date and replacement history.
+-->
+<!ELEMENT Biostruc-history_data-source (Biostruc-source)>
+
+
+<!ELEMENT Biostruc-replace (
+        Biostruc-replace_id, 
+        Biostruc-replace_date)>
+<!--
+ A Biostruc-id is a collection identifiers for the molecular assembly.
+ Mmdb-id's are NCBI-assigned, and are intended to be unique and stable 
+ identifiers.  Other-id's are synonyms.
+-->
+<!ELEMENT Biostruc-replace_id (Biostruc-id)>
+
+<!ELEMENT Biostruc-replace_date (Date)>
+
+<!--
+ The origin of a biostruc is a reference to another database.  PDB release 
+ date and PDB-assigned id codes are recorded here, as are the PDB-assigned 
+ entry date and replacement history.
+-->
+<!ELEMENT Biostruc-source (
+        Biostruc-source_name-of-database, 
+        Biostruc-source_version-of-database?, 
+        Biostruc-source_database-entry-id, 
+        Biostruc-source_database-entry-date, 
+        Biostruc-source_database-entry-history?)>
+
+<!ELEMENT Biostruc-source_name-of-database (#PCDATA)>
+
+<!ELEMENT Biostruc-source_version-of-database (
+        Biostruc-source_version-of-database_release-date | 
+        Biostruc-source_version-of-database_release-code)>
+
+<!ELEMENT Biostruc-source_version-of-database_release-date (Date)>
+
+<!ELEMENT Biostruc-source_version-of-database_release-code (#PCDATA)>
+<!--
+ A Biostruc-id is a collection identifiers for the molecular assembly.
+ Mmdb-id's are NCBI-assigned, and are intended to be unique and stable 
+ identifiers.  Other-id's are synonyms.
+-->
+<!ELEMENT Biostruc-source_database-entry-id (Biostruc-id)>
+
+<!ELEMENT Biostruc-source_database-entry-date (Date)>
+
+<!ELEMENT Biostruc-source_database-entry-history (Biostruc-source_database-entry-history_E*)>
+
+
+<!ELEMENT Biostruc-source_database-entry-history_E (#PCDATA)>
+
+<!--
+ A biostruc set is a means to collect ASN.1 data for many biostrucs in 
+ one file, as convenient for application programs.  The object type is not
+ inteded to imply similarity of the biostrucs grouped together.
+-->
+<!ELEMENT Biostruc-set (
+        Biostruc-set_id?, 
+        Biostruc-set_descr?, 
+        Biostruc-set_biostrucs)>
+
+<!ELEMENT Biostruc-set_id (Biostruc-id*)>
+
+<!ELEMENT Biostruc-set_descr (Biostruc-descr*)>
+
+<!ELEMENT Biostruc-set_biostrucs (Biostruc*)>
+
+<!--
+ A biostruc annotation set is a means to collect ASN.1 data for biostruc
+ features into one file. The object type is intended as a means to store 
+ feature annotation of similar type, such as "core" definitions for a 
+ threading program, or structure-structure alignments for a structure-
+ similarity browser.
+-->
+<!ELEMENT Biostruc-annot-set (
+        Biostruc-annot-set_id?, 
+        Biostruc-annot-set_descr?, 
+        Biostruc-annot-set_features)>
+
+<!ELEMENT Biostruc-annot-set_id (Biostruc-id*)>
+
+<!ELEMENT Biostruc-annot-set_descr (Biostruc-descr*)>
+
+<!ELEMENT Biostruc-annot-set_features (Biostruc-feature-set*)>
+
+<!--
+ A biostruc residue graph set is a collection of residue graphs.  The object
+ type is intended as a means to record dictionaries containing the chemical
+ subgraphs of "standard" residue types, which are used as a means to 
+ simplify discription of the covalent structure of a biomolecular assembly.
+ The standard residue graph dictionary supplied with the MMDB database 
+ contains 20 standard L amino acids and 8 standard ribonucleotide groups. 
+ These graphs are complete, including explicit hydrogen atoms and separate 
+ instances for the terminal polypeptide and polynucleotide residues. 
+-->
+<!ELEMENT Biostruc-residue-graph-set (
+        Biostruc-residue-graph-set_id?, 
+        Biostruc-residue-graph-set_descr?, 
+        Biostruc-residue-graph-set_residue-graphs)>
+
+<!ELEMENT Biostruc-residue-graph-set_id (Biostruc-id*)>
+
+<!ELEMENT Biostruc-residue-graph-set_descr (Biomol-descr*)>
+
+<!ELEMENT Biostruc-residue-graph-set_residue-graphs (Residue-graph*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/MMDB_Chemical_graph.dtd b/code/lib/Bio/Entrez/DTDs/MMDB_Chemical_graph.dtd
new file mode 100644
index 0000000..fd56bf7
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/MMDB_Chemical_graph.dtd
@@ -0,0 +1,98 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "mmdb1.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- MMDB_Chemical_graph.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % MMDB_module PUBLIC "-//NCBI//MMDB Module//EN" "MMDB.mod.dtd">
+%MMDB_module;
+
+<!ENTITY % MMDB_Chemical_graph_module PUBLIC "-//NCBI//MMDB Chemical graph Module//EN" "MMDB_Chemical_graph.mod.dtd">
+%MMDB_Chemical_graph_module;
+
+<!ENTITY % MMDB_Features_module PUBLIC "-//NCBI//MMDB Features Module//EN" "MMDB_Features.mod.dtd">
+%MMDB_Features_module;
+
+<!ENTITY % MMDB_Structural_model_module PUBLIC "-//NCBI//MMDB Structural model Module//EN" "MMDB_Structural_model.mod.dtd">
+%MMDB_Structural_model_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/MMDB_Chemical_graph.mod.dtd b/code/lib/Bio/Entrez/DTDs/MMDB_Chemical_graph.mod.dtd
new file mode 100644
index 0000000..5763354
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/MMDB_Chemical_graph.mod.dtd
@@ -0,0 +1,561 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "mmdb1.asn"
+     ::DATATOOL:: by application DATATOOL version 1.8.1
+     ::DATATOOL:: on 01/18/2007 23:07:18
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "MMDB-Chemical-graph"
+================================================= -->
+
+<!--
+**********************************************************************
+
+  Biological Macromolecule 3-D Structure Data Types for MMDB,
+                A Molecular Modeling Database
+
+  Definitions for a chemical graph
+
+  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 
+
+  National Center for Biotechnology Information
+  National Institutes of Health
+  Bethesda, MD 20894 USA
+
+  July, 1995
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Biostruc-graph,
+          Biomol-descr,
+          Residue-graph,
+          Molecule-id,
+          PCSubstance-id,
+          Residue-id,
+          Atom-id -->
+
+<!-- Elements referenced from other modules:
+          Pub FROM NCBI-Pub,
+          BioSource FROM NCBI-BioSource,
+          Seq-id FROM NCBI-Seqloc,
+          Biostruc-id FROM MMDB -->
+<!-- ============================================ -->
+
+<!--
+ A biostruc graph contains the complete chemical graph of the biomolecular 
+ assembly.  The assembly graph is defined hierarchically, in terms of 
+ subgraphs graphs of component molecules.  For PDB-derived biostrucs,
+ the molecules forming the assembly are the individual biopolymer chains and 
+ any non-polymer or "heterogen" groups which are present. 
+ The PDB-derived  "compound name" field appears as the name within the
+ biostruc-graph description.  PDB "class" and "source" fields appear as 
+ explicit attributes.  PDB-derived structures are assigned an assembly type 
+ of "other" unless they have been further classified as the "physiological
+ form" or "crystallographic cell" contents.  If they have, the source of the 
+ type classification appears as a citation within the  assembly description. 
+ Note that the biostruc-graph also includes as literals the subgraphs of 
+ any nonstandard residues present within it. For PDB-derived biostrucs these 
+ subgraphs are constructed automatically, with validation as described below.
+-->
+<!ELEMENT Biostruc-graph (
+        Biostruc-graph_descr?, 
+        Biostruc-graph_molecule-graphs, 
+        Biostruc-graph_inter-molecule-bonds?, 
+        Biostruc-graph_residue-graphs?)>
+
+<!ELEMENT Biostruc-graph_descr (Biomol-descr*)>
+
+<!ELEMENT Biostruc-graph_molecule-graphs (Molecule-graph*)>
+
+<!ELEMENT Biostruc-graph_inter-molecule-bonds (Inter-residue-bond*)>
+
+<!ELEMENT Biostruc-graph_residue-graphs (Residue-graph*)>
+
+<!--
+ A biomolecule description refers to the chemical structure of a molecule or 
+ component substructures.  This descriptor type is used at the level of
+ assemblies, molecules and residues, and also for residue-graph dictionaries.
+ The BioSource object type is drawn from NCBI taxonomy data specifications,
+ and is not repeated here.
+-->
+<!ELEMENT Biomol-descr (
+        Biomol-descr_name | 
+        Biomol-descr_pdb-class | 
+        Biomol-descr_pdb-source | 
+        Biomol-descr_pdb-comment | 
+        Biomol-descr_other-comment | 
+        Biomol-descr_organism | 
+        Biomol-descr_attribution | 
+        Biomol-descr_assembly-type | 
+        Biomol-descr_molecule-type)>
+
+<!ELEMENT Biomol-descr_name (#PCDATA)>
+
+<!ELEMENT Biomol-descr_pdb-class (#PCDATA)>
+
+<!ELEMENT Biomol-descr_pdb-source (#PCDATA)>
+
+<!ELEMENT Biomol-descr_pdb-comment (#PCDATA)>
+
+<!ELEMENT Biomol-descr_other-comment (#PCDATA)>
+
+<!ELEMENT Biomol-descr_organism (BioSource)>
+
+<!ELEMENT Biomol-descr_attribution (Pub)>
+
+<!ELEMENT Biomol-descr_assembly-type (%INTEGER;)>
+<!ATTLIST Biomol-descr_assembly-type value (
+        physiological-form |
+        crystallographic-cell |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Biomol-descr_molecule-type (%INTEGER;)>
+<!ATTLIST Biomol-descr_molecule-type value (
+        dna |
+        rna |
+        protein |
+        other-biopolymer |
+        solvent |
+        other-nonpolymer |
+        other
+        ) #IMPLIED >
+
+
+<!--
+ A molecule chemical graph is defined by a sequence of residues.  Nonpolymers
+ are described in the same way, but may contain only a single residue.  
+ Biopolymer molecules are identified within PDB entries according to their
+ appearance on SEQRES records, which formally define a biopolymer as such. 
+ Biopolymers are defined by the distinction between ATOM and HETATM 
+ coordinate records only in cases where the chemical sequence from SEQRES
+ is in conflict with coordinate data. The PDB-assigned chain code appears as 
+ the name within the molecule descriptions of the biopolymers.
+ Nonpolymer molecules from PDB correspond to individual HETEROGEN groups, 
+ excluding any HETEROGEN groups which represent modified biopolymer residues.
+ These molecules are named according to the chain, residue type and residue 
+ number fields as assigned by PDB. Any description appearing in the PDB HET 
+ record appears as a pdb-comment within the molecule description. 
+ Molecule types for PDB-derived molecule graphs are assigned by matching 
+ residue and atom names against the PDB-documented standard types for protein,
+ DNA and RNA, and against residue codes commonly used to indicate solvent.
+ Classification is by "majority rule". If more than half of the residues in
+ a biopolymer are standard groups of one type, then the molecule is of that 
+ type, and otherwise classified as "other". Note that this classification does
+ not preclude the presence of modified residues, but insists they constitute 
+ less than half the biopolymer. Non-polymers are classified only as "solvent"
+ or "other".  
+ Note that a molecule graph may also contain a set of cross references 
+ to biopolymer sequence databases.  All biopolymer molecules in MMDB contain 
+ appropriate identifiers for the corresponding entry in the NCBI-Sequences 
+ database, in particular the NCBI "gi" number, which may be used for sequence
+ retrieval. The Seq-id object type is defined in the NCBI molecular sequence 
+ specification, and not repeated here.
+-->
+<!ELEMENT Molecule-graph (
+        Molecule-graph_id, 
+        Molecule-graph_descr?, 
+        Molecule-graph_seq-id?, 
+        Molecule-graph_residue-sequence, 
+        Molecule-graph_inter-residue-bonds?, 
+        Molecule-graph_sid?)>
+
+<!ELEMENT Molecule-graph_id (Molecule-id)>
+
+<!ELEMENT Molecule-graph_descr (Biomol-descr*)>
+
+<!ELEMENT Molecule-graph_seq-id (Seq-id)>
+
+<!ELEMENT Molecule-graph_residue-sequence (Residue*)>
+
+<!ELEMENT Molecule-graph_inter-residue-bonds (Inter-residue-bond*)>
+<!-- Pubchem substance id -->
+<!ELEMENT Molecule-graph_sid (PCSubstance-id)>
+
+
+<!ELEMENT Molecule-id (%INTEGER;)>
+
+<!-- Pubchem substance id -->
+<!ELEMENT PCSubstance-id (%INTEGER;)>
+
+<!--
+ Residues may be assigned a text-string name as well as an id number. PDB 
+ assigned residue numbers appear as the residue name.
+-->
+<!ELEMENT Residue (
+        Residue_id, 
+        Residue_name?, 
+        Residue_residue-graph)>
+
+<!ELEMENT Residue_id (Residue-id)>
+
+<!ELEMENT Residue_name (#PCDATA)>
+<!--
+ Residue graphs from different sources may be referenced within a molecule
+ graph.  The allowed choices are the nonstandard residue graphs included in 
+ the present biostruc, residue graphs within other biostrucs, or residue 
+ graphs within tables of standard residue definitions.
+-->
+<!ELEMENT Residue_residue-graph (Residue-graph-pntr)>
+
+
+<!ELEMENT Residue-id (%INTEGER;)>
+
+<!--
+ Residue graphs from different sources may be referenced within a molecule
+ graph.  The allowed choices are the nonstandard residue graphs included in 
+ the present biostruc, residue graphs within other biostrucs, or residue 
+ graphs within tables of standard residue definitions.
+-->
+<!ELEMENT Residue-graph-pntr (
+        Residue-graph-pntr_local | 
+        Residue-graph-pntr_biostruc | 
+        Residue-graph-pntr_standard)>
+
+<!ELEMENT Residue-graph-pntr_local (Residue-graph-id)>
+
+<!ELEMENT Residue-graph-pntr_biostruc (Biostruc-graph-pntr)>
+
+<!ELEMENT Residue-graph-pntr_standard (Biostruc-residue-graph-set-pntr)>
+
+
+<!ELEMENT Biostruc-graph-pntr (
+        Biostruc-graph-pntr_biostruc-id, 
+        Biostruc-graph-pntr_residue-graph-id)>
+
+<!ELEMENT Biostruc-graph-pntr_biostruc-id (Biostruc-id)>
+
+<!ELEMENT Biostruc-graph-pntr_residue-graph-id (Residue-graph-id)>
+
+
+<!ELEMENT Biostruc-residue-graph-set-pntr (
+        Biostruc-residue-graph-set-pntr_biostruc-residue-graph-set-id, 
+        Biostruc-residue-graph-set-pntr_residue-graph-id)>
+
+<!ELEMENT Biostruc-residue-graph-set-pntr_biostruc-residue-graph-set-id (Biostruc-id)>
+
+<!ELEMENT Biostruc-residue-graph-set-pntr_residue-graph-id (Residue-graph-id)>
+
+<!--
+ Residue graphs define atomic formulae, connectivity, chirality, and names.
+ For standard residue graphs from the MMDB dictionary the PDB-assigned 
+ residue-type code appears as the name within the residue graph description,
+ and the full trivial name of the residue as a comment within that 
+ description.  For any nonstandard residue graphs provided with an MMDB 
+ biostruc the PDB-assigned residue-type code similarly appears as the name 
+ within the description, and any information provided on PDB HET records as 
+ a pdb-comment within that description.  
+ Note that nonstandard residue graphs for a PDB-derived biostruc may be 
+ incomplete. Current PDB format cannot represent connectivity for groups 
+ which are disordered, and for which no coordinates are given.  In these 
+ cases the residue graph defined in MMDB represents only the subgraph that 
+ could be identified from available ATOM, HETATM and CONECT records.
+-->
+<!ELEMENT Residue-graph (
+        Residue-graph_id, 
+        Residue-graph_descr?, 
+        Residue-graph_residue-type?, 
+        Residue-graph_iupac-code?, 
+        Residue-graph_atoms, 
+        Residue-graph_bonds, 
+        Residue-graph_chiral-centers?)>
+
+<!ELEMENT Residue-graph_id (Residue-graph-id)>
+
+<!ELEMENT Residue-graph_descr (Biomol-descr*)>
+
+<!ELEMENT Residue-graph_residue-type (%INTEGER;)>
+<!ATTLIST Residue-graph_residue-type value (
+        deoxyribonucleotide |
+        ribonucleotide |
+        amino-acid |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Residue-graph_iupac-code (Residue-graph_iupac-code_E*)>
+
+
+<!ELEMENT Residue-graph_iupac-code_E (#PCDATA)>
+
+<!ELEMENT Residue-graph_atoms (Atom*)>
+
+<!ELEMENT Residue-graph_bonds (Intra-residue-bond*)>
+
+<!ELEMENT Residue-graph_chiral-centers (Chiral-center*)>
+
+
+<!ELEMENT Residue-graph-id (%INTEGER;)>
+
+<!--
+ Atoms in residue graphs are defined by elemental symbols and names.  PDB-
+ assigned atom names appear here in the name field, except in cases of known 
+ PDB synonyms.  In these cases atom names are mapped to the names used in the
+ MMDB standard dictionary. This occurs primarily for hydrogen atoms, where 
+ PDB practice allows synonyms for several atom types.  For PDB atoms the 
+ elemental symbol is obtained by parsing the PDB atom name field, allowing 
+ for known special-semantics cases where the atom name does not follow the
+ documented encoding rule.  Ionizable protons are identified within standard 
+ residue graphs in the MMDB dictionary, but not within automatically-defined
+ nonstandard graphs.
+-->
+<!ELEMENT Atom (
+        Atom_id, 
+        Atom_name?, 
+        Atom_iupac-code?, 
+        Atom_element, 
+        Atom_ionizable-proton?)>
+
+<!ELEMENT Atom_id (Atom-id)>
+
+<!ELEMENT Atom_name (#PCDATA)>
+
+<!ELEMENT Atom_iupac-code (Atom_iupac-code_E*)>
+
+
+<!ELEMENT Atom_iupac-code_E (#PCDATA)>
+
+<!ELEMENT Atom_element %ENUM;>
+<!ATTLIST Atom_element value (
+        h |
+        he |
+        li |
+        be |
+        b |
+        c |
+        n |
+        o |
+        f |
+        ne |
+        na |
+        mg |
+        al |
+        si |
+        p |
+        s |
+        cl |
+        ar |
+        k |
+        ca |
+        sc |
+        ti |
+        v |
+        cr |
+        mn |
+        fe |
+        co |
+        ni |
+        cu |
+        zn |
+        ga |
+        ge |
+        as |
+        se |
+        br |
+        kr |
+        rb |
+        sr |
+        y |
+        zr |
+        nb |
+        mo |
+        tc |
+        ru |
+        rh |
+        pd |
+        ag |
+        cd |
+        in |
+        sn |
+        sb |
+        te |
+        i |
+        xe |
+        cs |
+        ba |
+        la |
+        ce |
+        pr |
+        nd |
+        pm |
+        sm |
+        eu |
+        gd |
+        tb |
+        dy |
+        ho |
+        er |
+        tm |
+        yb |
+        lu |
+        hf |
+        ta |
+        w |
+        re |
+        os |
+        ir |
+        pt |
+        au |
+        hg |
+        tl |
+        pb |
+        bi |
+        po |
+        at |
+        rn |
+        fr |
+        ra |
+        ac |
+        th |
+        pa |
+        u |
+        np |
+        pu |
+        am |
+        cm |
+        bk |
+        cf |
+        es |
+        fm |
+        md |
+        no |
+        lr |
+        other |
+        unknown
+        ) #REQUIRED >
+
+
+<!ELEMENT Atom_ionizable-proton %ENUM;>
+<!ATTLIST Atom_ionizable-proton value (
+        true |
+        false |
+        unknown
+        ) #REQUIRED >
+
+
+
+<!ELEMENT Atom-id (%INTEGER;)>
+
+<!--
+ Intra-residue-bond specifies connectivity between atoms in Residue-graph.
+ Unlike Inter-residue-bond defined later, its participating atoms are part of
+ a residue subgraph dictionary, not part of a specific biostruc-graph.
+ For residue graphs in the standard MMDB dictionary bonds are defined from
+ the known chemical structures of amino acids and nucleotides.  For 
+ nonstandard residue graphs bonds are defined from PDB CONECT records, with
+ validation for consistency with coordinate data, and from stereochemical
+ calculation to identify unreported bonds.  Validation and bond identification
+ are based on comparison of inter-atomic distances to the sum of covalent
+ radii for the corresponding elements. 
+-->
+<!ELEMENT Intra-residue-bond (
+        Intra-residue-bond_atom-id-1, 
+        Intra-residue-bond_atom-id-2, 
+        Intra-residue-bond_bond-order?)>
+
+<!ELEMENT Intra-residue-bond_atom-id-1 (Atom-id)>
+
+<!ELEMENT Intra-residue-bond_atom-id-2 (Atom-id)>
+
+<!ELEMENT Intra-residue-bond_bond-order (%INTEGER;)>
+<!ATTLIST Intra-residue-bond_bond-order value (
+        single |
+        partial-double |
+        aromatic |
+        double |
+        triple |
+        other |
+        unknown
+        ) #IMPLIED >
+
+
+<!--
+ Chiral centers are atoms with tetrahedral geometry.  Chirality is defined
+ by a chiral volume involving the chiral center and 3 other atoms bonded to 
+ it.  For any coordinates assigned to atoms c, n1, n2, and n3, the vector 
+ triple product (n1-c) dot ( (n2-c) cross (n3-c) ) must have the indicated
+ sign.  The calculation assumes an orthogonal right-handed coordinate system
+ as is used for MMDB model structures.  
+ Chirality is defined for standard residues in the MMDB dictionary, but is 
+ not assigned automatically for PDB-derived nonstandard residues. If assigned
+ for nonstandard residues, the source of chirality information is described 
+ by a citation within the residue description.
+-->
+<!ELEMENT Chiral-center (
+        Chiral-center_c, 
+        Chiral-center_n1, 
+        Chiral-center_n2, 
+        Chiral-center_n3, 
+        Chiral-center_sign)>
+
+<!ELEMENT Chiral-center_c (Atom-id)>
+
+<!ELEMENT Chiral-center_n1 (Atom-id)>
+
+<!ELEMENT Chiral-center_n2 (Atom-id)>
+
+<!ELEMENT Chiral-center_n3 (Atom-id)>
+
+<!ELEMENT Chiral-center_sign %ENUM;>
+<!ATTLIST Chiral-center_sign value (
+        positive |
+        negative
+        ) #REQUIRED >
+
+
+<!--
+ Inter-residue bonds are defined by a reference to two atoms. For PDB-derived 
+ structures bonds are identified from biopolymer connectivity according to
+ SEQRES and from other connectivity information on SSBOND and CONECT 
+ records. These data are validated and unreported bonds identified by
+ stereochemical calculation, using the same criteria as for intra-residue 
+ bonds.
+-->
+<!ELEMENT Inter-residue-bond (
+        Inter-residue-bond_atom-id-1, 
+        Inter-residue-bond_atom-id-2, 
+        Inter-residue-bond_bond-order?)>
+<!--
+ Atoms, residues and molecules within the current biostruc are referenced 
+ by hierarchical pointers.
+-->
+<!ELEMENT Inter-residue-bond_atom-id-1 (Atom-pntr)>
+<!--
+ Atoms, residues and molecules within the current biostruc are referenced 
+ by hierarchical pointers.
+-->
+<!ELEMENT Inter-residue-bond_atom-id-2 (Atom-pntr)>
+
+<!ELEMENT Inter-residue-bond_bond-order (%INTEGER;)>
+<!ATTLIST Inter-residue-bond_bond-order value (
+        single |
+        partial-double |
+        aromatic |
+        double |
+        triple |
+        other |
+        unknown
+        ) #IMPLIED >
+
+
+<!--
+ Atoms, residues and molecules within the current biostruc are referenced 
+ by hierarchical pointers.
+-->
+<!ELEMENT Atom-pntr (
+        Atom-pntr_molecule-id, 
+        Atom-pntr_residue-id, 
+        Atom-pntr_atom-id)>
+
+<!ELEMENT Atom-pntr_molecule-id (Molecule-id)>
+
+<!ELEMENT Atom-pntr_residue-id (Residue-id)>
+
+<!ELEMENT Atom-pntr_atom-id (Atom-id)>
+
+
+<!ELEMENT Atom-pntr-set (Atom-pntr*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/MMDB_Features.dtd b/code/lib/Bio/Entrez/DTDs/MMDB_Features.dtd
new file mode 100644
index 0000000..b8eb295
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/MMDB_Features.dtd
@@ -0,0 +1,98 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "mmdb3.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- MMDB_Features.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % MMDB_module PUBLIC "-//NCBI//MMDB Module//EN" "MMDB.mod.dtd">
+%MMDB_module;
+
+<!ENTITY % MMDB_Chemical_graph_module PUBLIC "-//NCBI//MMDB Chemical graph Module//EN" "MMDB_Chemical_graph.mod.dtd">
+%MMDB_Chemical_graph_module;
+
+<!ENTITY % MMDB_Features_module PUBLIC "-//NCBI//MMDB Features Module//EN" "MMDB_Features.mod.dtd">
+%MMDB_Features_module;
+
+<!ENTITY % MMDB_Structural_model_module PUBLIC "-//NCBI//MMDB Structural model Module//EN" "MMDB_Structural_model.mod.dtd">
+%MMDB_Structural_model_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/MMDB_Features.mod.dtd b/code/lib/Bio/Entrez/DTDs/MMDB_Features.mod.dtd
new file mode 100644
index 0000000..160fb02
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/MMDB_Features.mod.dtd
@@ -0,0 +1,932 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "mmdb3.asn"
+     ::DATATOOL:: by application DATATOOL version 2.3.1
+     ::DATATOOL:: on 02/28/2011 23:04:52
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "MMDB-Features"
+================================================= -->
+
+<!--
+$Revision: 240129 $
+**********************************************************************
+
+  Biological Macromolecule 3-D Structure Data Types for MMDB,
+                A Molecular Modeling Database
+
+  Definitions for structural features and biostruc addressing
+
+  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 
+
+  National Center for Biotechnology Information
+  National Institutes of Health
+  Bethesda, MD 20894 USA
+
+  July, 1996
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Biostruc-feature-set,
+          Chem-graph-pntrs,
+          Atom-pntrs,
+          Chem-graph-alignment,
+          Chem-graph-interaction,
+          Sphere,
+          Cone,
+          Cylinder,
+          Brick,
+          Transform,
+          Biostruc-feature-set-id,
+          Biostruc-feature-id -->
+
+<!-- Elements referenced from other modules:
+          Biostruc-id FROM MMDB,
+          Molecule-id,
+          Residue-id,
+          Atom-id FROM MMDB-Chemical-graph,
+          Model-id,
+          Model-coordinate-set-id FROM MMDB-Structural-model,
+          User-object FROM NCBI-General,
+          Pub FROM NCBI-Pub -->
+<!-- ============================================ -->
+
+<!--
+ Named model features refer to sets of residues or atoms, or a region in 
+ the model space.  A few specific feature types are allowed for compatibility
+ with PDB usage, but the purpose of a named model feature is simply to
+ associate various types of information with a set of atoms or 
+ residues, or a spatially-defined region of the model structure.  They also
+ support association of various properties with each residue or atom of a
+ set.
+ PDB-derived secondary structure defines a single feature, represented as a
+ sequence of residue motifs, as are the contents of PDB SITE and
+ FTNOTE records.  NCBI-assigned core and secondary structure descriptions
+ are also represented as a sequence of residue motifs.
+-->
+<!ELEMENT Biostruc-feature-set (
+        Biostruc-feature-set_id, 
+        Biostruc-feature-set_descr?, 
+        Biostruc-feature-set_features)>
+
+<!ELEMENT Biostruc-feature-set_id (Biostruc-feature-set-id)>
+
+<!ELEMENT Biostruc-feature-set_descr (Biostruc-feature-set-descr*)>
+
+<!ELEMENT Biostruc-feature-set_features (Biostruc-feature*)>
+
+
+<!ELEMENT Biostruc-feature-set-id (%INTEGER;)>
+
+
+<!ELEMENT Biostruc-feature-set-descr (
+        Biostruc-feature-set-descr_name | 
+        Biostruc-feature-set-descr_pdb-comment | 
+        Biostruc-feature-set-descr_other-comment | 
+        Biostruc-feature-set-descr_attribution)>
+
+<!ELEMENT Biostruc-feature-set-descr_name (#PCDATA)>
+
+<!ELEMENT Biostruc-feature-set-descr_pdb-comment (#PCDATA)>
+
+<!ELEMENT Biostruc-feature-set-descr_other-comment (#PCDATA)>
+
+<!ELEMENT Biostruc-feature-set-descr_attribution (Pub)>
+
+<!--
+ An explicitly specified type in Biostruc-feature allows for
+ efficient extraction and indexing of feature sets of a specific type. 
+ Special types are provided for coloring and rendering, as
+ as needed by molecular graphics programs.
+ new
+-->
+<!ELEMENT Biostruc-feature (
+        Biostruc-feature_id?, 
+        Biostruc-feature_name?, 
+        Biostruc-feature_type?, 
+        Biostruc-feature_property?, 
+        Biostruc-feature_location?)>
+
+<!ELEMENT Biostruc-feature_id (Biostruc-feature-id)>
+
+<!ELEMENT Biostruc-feature_name (#PCDATA)>
+
+<!ELEMENT Biostruc-feature_type (%INTEGER;)>
+
+<!--
+    comment	-  new
+    interaction	-  interaction Data
+    subgraph	-  NCBI domain reserved
+    core	-  user core definition
+    supercore	-  NCBI reserved
+    color	-  new
+    render	-  new
+    label	-  new
+    transform	-  new
+    camera	-  new
+    script	-  for scripts
+    alignment	-  VAST reserved 
+    multalign	-  multiple alignment
+    indirect	-  new
+    cn3dstate	-  Cn3D reserved
+-->
+<!ATTLIST Biostruc-feature_type value (
+        helix |
+        strand |
+        sheet |
+        turn |
+        site |
+        footnote |
+        comment |
+        interaction |
+        subgraph |
+        region |
+        core |
+        supercore |
+        color |
+        render |
+        label |
+        transform |
+        camera |
+        script |
+        alignment |
+        similarity |
+        multalign |
+        indirect |
+        cn3dstate |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Biostruc-feature_property (
+        Biostruc-feature_property_color | 
+        Biostruc-feature_property_render | 
+        Biostruc-feature_property_transform | 
+        Biostruc-feature_property_camera | 
+        Biostruc-feature_property_script | 
+        Biostruc-feature_property_user)>
+
+<!ELEMENT Biostruc-feature_property_color (Color-prop)>
+<!--
+ Note that Render-prop is compatible with the Annmm specification,
+ i.e., its numbering schemes do not clash with those in Render-prop.
+-->
+<!ELEMENT Biostruc-feature_property_render (Render-prop)>
+
+<!ELEMENT Biostruc-feature_property_transform (Transform)>
+<!--
+ The camera is a position relative to the world coordinates
+ of the structure referred to by a location.  
+ this is used to set the initial position of the
+ camera using OpenGL.  scale is the value used to scale the
+ other values from floating point to integer
+-->
+<!ELEMENT Biostruc-feature_property_camera (Camera)>
+<!--
+  When a Biostruc-Feature with a Biostruc-script is initiated,
+  it should play the specified steps one at a time, setting the feature-do
+  list as the active display.
+  The camera can be set using a feature-do, 
+  but it may be moved independently with
+  camera-move, which specifies how to move
+  the camera dynamically during the step along the path defined (e.g.,
+  a zoom, a rotate).
+  Any value of pause (in 1:10th's of a second) will force a pause
+  after an image is shown.
+  If waitevent is TRUE, it will await a mouse or keypress and ignore 
+  the pause value.
+-->
+<!ELEMENT Biostruc-feature_property_script (Biostruc-script)>
+
+<!ELEMENT Biostruc-feature_property_user (User-object)>
+
+<!ELEMENT Biostruc-feature_location (
+        Biostruc-feature_location_subgraph | 
+        Biostruc-feature_location_region | 
+        Biostruc-feature_location_alignment | 
+        Biostruc-feature_location_interaction | 
+        Biostruc-feature_location_similarity | 
+        Biostruc-feature_location_indirect)>
+
+<!ELEMENT Biostruc-feature_location_subgraph (Chem-graph-pntrs)>
+<!--
+ Region motifs describe features defined by spatial location, such as the
+ site specified by a coordinate value, or a rgeion within a bounding volume.
+-->
+<!ELEMENT Biostruc-feature_location_region (Region-pntrs)>
+<!--
+ A biostruc alignment establishes an equivalence of nodes in the chemical
+ graphs of two or more biostrucs. This may be mapped to a sequence
+ alignment in the case of biopolymers.
+ The 'dimension' component indicates the number of participants
+ in the alignment.  For pairwise alignments, such as VAST 
+ structure-structure alignments, the dimension will be always 2, with
+ biostruc-ids, alignment, and domain each containing two entries for an  
+ aligned pair.  The 'alignment' component contains a pair of Chem-graph-pntrs
+ specifying a like number of corresponding residues in each structure.
+ The 'domain' component specifies a region of each structure considered 
+ in the alignment.  Only one transform (for the second structure) and
+ one aligndata (for the pair) are provided for each VAST alignment.
+
+ For multiple alignments, a set of components are treated as
+ parallel arrays of length 'dimension'.
+ The 'transform' component moves each structure to align it with
+ the structure specified as the first element in the "parallel" array,
+ so necessarily the first transform is a NULL transform.
+ Align-stats are placeholders for scores.
+-->
+<!ELEMENT Biostruc-feature_location_alignment (Chem-graph-alignment)>
+
+<!ELEMENT Biostruc-feature_location_interaction (Chem-graph-interaction)>
+<!--
+ A biostruc similarity describes spatial features which are similar between
+ two or more biostrucs.  Similarities are model dependent, and the model and
+ coordinate set ids of the biostrucs must be specified.  They do not 
+ necessarily map to a sequence alignment, as the regions referenced may
+ be pieces of a surface or grid, and thus not uniquely mapable to particular
+ chemical components.
+-->
+<!ELEMENT Biostruc-feature_location_similarity (Region-similarity)>
+<!--
+ Other-feature allows for specifying location via reference to another
+ Biostruc-feature and its location.
+-->
+<!ELEMENT Biostruc-feature_location_indirect (Other-feature)>
+
+<!--
+ Other-feature allows for specifying location via reference to another
+ Biostruc-feature and its location.
+-->
+<!ELEMENT Other-feature (
+        Other-feature_biostruc-id, 
+        Other-feature_set, 
+        Other-feature_feature)>
+
+<!ELEMENT Other-feature_biostruc-id (Biostruc-id)>
+
+<!ELEMENT Other-feature_set (Biostruc-feature-set-id)>
+
+<!ELEMENT Other-feature_feature (Biostruc-feature-id)>
+
+
+<!ELEMENT Biostruc-feature-id (%INTEGER;)>
+
+<!--
+ Atom, residue or molecule motifs describe a substructure defined by a set
+ of nodes from the chemical graph. PDB secondary structure features are
+ described as a residue motif, since they are not associated with any one of
+ the multiple models that may be provided in a PDB file.  NCBI-assigned
+ secondary structure is represented in the same way, even though it is
+ model specific, since this allows for simple mapping of the structural 
+ feature onto a sequence-only representation. This addressing mode may also 
+ be used to describe features to be associated with particular atoms, 
+ as, for example, the chemical shift observed in an NMR experiment.
+-->
+<!ELEMENT Biostruc-molecule-pntr (
+        Biostruc-molecule-pntr_biostruc-id, 
+        Biostruc-molecule-pntr_molecule-id)>
+
+<!ELEMENT Biostruc-molecule-pntr_biostruc-id (Biostruc-id)>
+
+<!ELEMENT Biostruc-molecule-pntr_molecule-id (Molecule-id)>
+
+
+<!ELEMENT Chem-graph-pntrs (
+        Chem-graph-pntrs_atoms | 
+        Chem-graph-pntrs_residues | 
+        Chem-graph-pntrs_molecules)>
+
+<!ELEMENT Chem-graph-pntrs_atoms (Atom-pntrs)>
+
+<!ELEMENT Chem-graph-pntrs_residues (Residue-pntrs)>
+
+<!ELEMENT Chem-graph-pntrs_molecules (Molecule-pntrs)>
+
+
+<!ELEMENT Atom-pntrs (
+        Atom-pntrs_number-of-ptrs, 
+        Atom-pntrs_molecule-ids, 
+        Atom-pntrs_residue-ids, 
+        Atom-pntrs_atom-ids)>
+
+<!ELEMENT Atom-pntrs_number-of-ptrs (%INTEGER;)>
+
+<!ELEMENT Atom-pntrs_molecule-ids (Molecule-id*)>
+
+<!ELEMENT Atom-pntrs_residue-ids (Residue-id*)>
+
+<!ELEMENT Atom-pntrs_atom-ids (Atom-id*)>
+
+
+<!ELEMENT Residue-pntrs (
+        Residue-pntrs_explicit | 
+        Residue-pntrs_interval)>
+
+<!ELEMENT Residue-pntrs_explicit (Residue-explicit-pntrs)>
+
+<!ELEMENT Residue-pntrs_interval (Residue-interval-pntr*)>
+
+
+<!ELEMENT Residue-explicit-pntrs (
+        Residue-explicit-pntrs_number-of-ptrs, 
+        Residue-explicit-pntrs_molecule-ids, 
+        Residue-explicit-pntrs_residue-ids)>
+
+<!ELEMENT Residue-explicit-pntrs_number-of-ptrs (%INTEGER;)>
+
+<!ELEMENT Residue-explicit-pntrs_molecule-ids (Molecule-id*)>
+
+<!ELEMENT Residue-explicit-pntrs_residue-ids (Residue-id*)>
+
+
+<!ELEMENT Residue-interval-pntr (
+        Residue-interval-pntr_molecule-id, 
+        Residue-interval-pntr_from, 
+        Residue-interval-pntr_to)>
+
+<!ELEMENT Residue-interval-pntr_molecule-id (Molecule-id)>
+
+<!ELEMENT Residue-interval-pntr_from (Residue-id)>
+
+<!ELEMENT Residue-interval-pntr_to (Residue-id)>
+
+
+<!ELEMENT Molecule-pntrs (
+        Molecule-pntrs_number-of-ptrs, 
+        Molecule-pntrs_molecule-ids)>
+
+<!ELEMENT Molecule-pntrs_number-of-ptrs (%INTEGER;)>
+
+<!ELEMENT Molecule-pntrs_molecule-ids (Molecule-id*)>
+
+<!--
+ Region motifs describe features defined by spatial location, such as the
+ site specified by a coordinate value, or a rgeion within a bounding volume.
+-->
+<!ELEMENT Region-pntrs (
+        Region-pntrs_model-id, 
+        Region-pntrs_region)>
+
+<!ELEMENT Region-pntrs_model-id (Model-id)>
+
+<!ELEMENT Region-pntrs_region (
+        Region-pntrs_region_site | 
+        Region-pntrs_region_boundary)>
+
+<!ELEMENT Region-pntrs_region_site (Region-coordinates*)>
+
+<!ELEMENT Region-pntrs_region_boundary (Region-boundary*)>
+
+<!--
+ Coordinate sites describe a region in space by reference to individual 
+ coordinates, in a particular model.  These coordinates may be either the
+ x, y and z values of atomic coordinates, the triangles of a surface mesh, 
+ or the grid points of a density model. All are addressed in the same manner,
+ as coordinate indices which give offsets from the beginning of the 
+ coordinate data arrays.  A coordinate-index of 5, for example, refers to 
+ the 5th x, y and z values of an atomic coordinate set, the 5th v1, v2, and v3
+ values of a triangle mesh, or the 5th value in a density grid.
+ PDB SITE and FTNOTE records refer to particular atomic coordinates, and they
+ are represented as a region motif with addresses of type Region-coordinates.
+ Any names or descriptions provided by PDB are thus associated with the
+ indicated sites, in the indicated model. 
+-->
+<!ELEMENT Region-coordinates (
+        Region-coordinates_model-coord-set-id, 
+        Region-coordinates_number-of-coords?, 
+        Region-coordinates_coordinate-indices?)>
+
+<!ELEMENT Region-coordinates_model-coord-set-id (Model-coordinate-set-id)>
+
+<!ELEMENT Region-coordinates_number-of-coords (%INTEGER;)>
+
+<!ELEMENT Region-coordinates_coordinate-indices (Region-coordinates_coordinate-indices_E*)>
+
+
+<!ELEMENT Region-coordinates_coordinate-indices_E (%INTEGER;)>
+
+<!-- Region boundaries are defined by regular solids located in the model space.   -->
+<!ELEMENT Region-boundary (
+        Region-boundary_sphere | 
+        Region-boundary_cone | 
+        Region-boundary_cylinder | 
+        Region-boundary_brick)>
+<!--
+ Geometrical primitives are used in the definition of region motifs, and 
+ also non-atomic coordinates.  Spheres, cones, cylinders and bricks are 
+ defined by a few points in the model space.
+-->
+<!ELEMENT Region-boundary_sphere (Sphere)>
+
+<!ELEMENT Region-boundary_cone (Cone)>
+
+<!ELEMENT Region-boundary_cylinder (Cylinder)>
+<!--
+ A brick is defined by the coordinates of eight corners.  These are assumed
+ to appear in the order 000, 001, 010, 011, 100, 101, 110, 111, where the 
+ digits 0 and 1 refer to respectively to the x, y and z axes of a unit cube.
+ Opposite edges are assumed to be parallel. 
+-->
+<!ELEMENT Region-boundary_brick (Brick)>
+
+<!--
+ A biostruc alignment establishes an equivalence of nodes in the chemical
+ graphs of two or more biostrucs. This may be mapped to a sequence
+ alignment in the case of biopolymers.
+ The 'dimension' component indicates the number of participants
+ in the alignment.  For pairwise alignments, such as VAST 
+ structure-structure alignments, the dimension will be always 2, with
+ biostruc-ids, alignment, and domain each containing two entries for an  
+ aligned pair.  The 'alignment' component contains a pair of Chem-graph-pntrs
+ specifying a like number of corresponding residues in each structure.
+ The 'domain' component specifies a region of each structure considered 
+ in the alignment.  Only one transform (for the second structure) and
+ one aligndata (for the pair) are provided for each VAST alignment.
+
+ For multiple alignments, a set of components are treated as
+ parallel arrays of length 'dimension'.
+ The 'transform' component moves each structure to align it with
+ the structure specified as the first element in the "parallel" array,
+ so necessarily the first transform is a NULL transform.
+ Align-stats are placeholders for scores.
+-->
+<!ELEMENT Chem-graph-alignment (
+        Chem-graph-alignment_dimension?, 
+        Chem-graph-alignment_biostruc-ids, 
+        Chem-graph-alignment_alignment, 
+        Chem-graph-alignment_domain?, 
+        Chem-graph-alignment_transform?, 
+        Chem-graph-alignment_aligndata?)>
+
+<!ELEMENT Chem-graph-alignment_dimension (%INTEGER;)>
+
+<!ELEMENT Chem-graph-alignment_biostruc-ids (Biostruc-id*)>
+
+<!ELEMENT Chem-graph-alignment_alignment (Chem-graph-pntrs*)>
+
+<!ELEMENT Chem-graph-alignment_domain (Chem-graph-pntrs*)>
+
+<!ELEMENT Chem-graph-alignment_transform (Transform*)>
+
+<!ELEMENT Chem-graph-alignment_aligndata (Align-stats*)>
+
+
+<!ELEMENT Chem-graph-interaction (
+        Chem-graph-interaction_type?, 
+        Chem-graph-interaction_distance-threshold?, 
+        Chem-graph-interaction_interactors, 
+        Chem-graph-interaction_residue-contacts?, 
+        Chem-graph-interaction_atom-contacts?, 
+        Chem-graph-interaction_atom-distance?)>
+
+<!ELEMENT Chem-graph-interaction_type (%INTEGER;)>
+<!ATTLIST Chem-graph-interaction_type value (
+        protein-protein |
+        protein-dna |
+        protein-rna |
+        protein-chemical |
+        dna-dna |
+        dna-rna |
+        dna-chemical |
+        rna-rna |
+        rna-chemical |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Chem-graph-interaction_distance-threshold (RealValue)>
+
+<!ELEMENT Chem-graph-interaction_interactors (Biostruc-molecule-pntr*)>
+
+<!ELEMENT Chem-graph-interaction_residue-contacts (Chem-graph-pntrs*)>
+
+<!ELEMENT Chem-graph-interaction_atom-contacts (Chem-graph-pntrs*)>
+
+<!ELEMENT Chem-graph-interaction_atom-distance (RealValue*)>
+
+
+<!ELEMENT Align-stats (
+        Align-stats_descr?, 
+        Align-stats_scale-factor?, 
+        Align-stats_vast-score?, 
+        Align-stats_vast-mlogp?, 
+        Align-stats_align-res?, 
+        Align-stats_rmsd?, 
+        Align-stats_blast-score?, 
+        Align-stats_blast-mlogp?, 
+        Align-stats_other-score?)>
+
+<!ELEMENT Align-stats_descr (#PCDATA)>
+
+<!ELEMENT Align-stats_scale-factor (%INTEGER;)>
+
+<!ELEMENT Align-stats_vast-score (%INTEGER;)>
+
+<!ELEMENT Align-stats_vast-mlogp (%INTEGER;)>
+
+<!ELEMENT Align-stats_align-res (%INTEGER;)>
+
+<!ELEMENT Align-stats_rmsd (%INTEGER;)>
+
+<!ELEMENT Align-stats_blast-score (%INTEGER;)>
+
+<!ELEMENT Align-stats_blast-mlogp (%INTEGER;)>
+
+<!ELEMENT Align-stats_other-score (%INTEGER;)>
+
+<!--
+ A biostruc similarity describes spatial features which are similar between
+ two or more biostrucs.  Similarities are model dependent, and the model and
+ coordinate set ids of the biostrucs must be specified.  They do not 
+ necessarily map to a sequence alignment, as the regions referenced may
+ be pieces of a surface or grid, and thus not uniquely mapable to particular
+ chemical components.
+-->
+<!ELEMENT Region-similarity (
+        Region-similarity_dimension?, 
+        Region-similarity_biostruc-ids, 
+        Region-similarity_similarity, 
+        Region-similarity_transform)>
+
+<!ELEMENT Region-similarity_dimension (%INTEGER;)>
+
+<!ELEMENT Region-similarity_biostruc-ids (Biostruc-id*)>
+
+<!ELEMENT Region-similarity_similarity (Region-pntrs*)>
+
+<!ELEMENT Region-similarity_transform (Transform*)>
+
+<!--
+ Geometrical primitives are used in the definition of region motifs, and 
+ also non-atomic coordinates.  Spheres, cones, cylinders and bricks are 
+ defined by a few points in the model space.
+-->
+<!ELEMENT Sphere (
+        Sphere_center, 
+        Sphere_radius)>
+
+<!ELEMENT Sphere_center (Model-space-point)>
+
+<!ELEMENT Sphere_radius (RealValue)>
+
+
+<!ELEMENT Cone (
+        Cone_axis-top, 
+        Cone_axis-bottom, 
+        Cone_radius-bottom)>
+
+<!ELEMENT Cone_axis-top (Model-space-point)>
+
+<!ELEMENT Cone_axis-bottom (Model-space-point)>
+
+<!ELEMENT Cone_radius-bottom (RealValue)>
+
+
+<!ELEMENT Cylinder (
+        Cylinder_axis-top, 
+        Cylinder_axis-bottom, 
+        Cylinder_radius)>
+
+<!ELEMENT Cylinder_axis-top (Model-space-point)>
+
+<!ELEMENT Cylinder_axis-bottom (Model-space-point)>
+
+<!ELEMENT Cylinder_radius (RealValue)>
+
+<!--
+ A brick is defined by the coordinates of eight corners.  These are assumed
+ to appear in the order 000, 001, 010, 011, 100, 101, 110, 111, where the 
+ digits 0 and 1 refer to respectively to the x, y and z axes of a unit cube.
+ Opposite edges are assumed to be parallel. 
+-->
+<!ELEMENT Brick (
+        Brick_corner-000, 
+        Brick_corner-001, 
+        Brick_corner-010, 
+        Brick_corner-011, 
+        Brick_corner-100, 
+        Brick_corner-101, 
+        Brick_corner-110, 
+        Brick_corner-111)>
+
+<!ELEMENT Brick_corner-000 (Model-space-point)>
+
+<!ELEMENT Brick_corner-001 (Model-space-point)>
+
+<!ELEMENT Brick_corner-010 (Model-space-point)>
+
+<!ELEMENT Brick_corner-011 (Model-space-point)>
+
+<!ELEMENT Brick_corner-100 (Model-space-point)>
+
+<!ELEMENT Brick_corner-101 (Model-space-point)>
+
+<!ELEMENT Brick_corner-110 (Model-space-point)>
+
+<!ELEMENT Brick_corner-111 (Model-space-point)>
+
+
+<!ELEMENT Model-space-point (
+        Model-space-point_scale-factor, 
+        Model-space-point_x, 
+        Model-space-point_y, 
+        Model-space-point_z)>
+
+<!ELEMENT Model-space-point_scale-factor (%INTEGER;)>
+
+<!ELEMENT Model-space-point_x (%INTEGER;)>
+
+<!ELEMENT Model-space-point_y (%INTEGER;)>
+
+<!ELEMENT Model-space-point_z (%INTEGER;)>
+
+
+<!ELEMENT RealValue (
+        RealValue_scale-factor, 
+        RealValue_scaled-integer-value)>
+
+<!ELEMENT RealValue_scale-factor (%INTEGER;)>
+
+<!ELEMENT RealValue_scaled-integer-value (%INTEGER;)>
+
+
+<!ELEMENT Transform (
+        Transform_id, 
+        Transform_moves)>
+
+<!ELEMENT Transform_id (%INTEGER;)>
+
+<!ELEMENT Transform_moves (Move*)>
+
+
+<!ELEMENT Move (
+        Move_rotate | 
+        Move_translate)>
+<!--
+ A rotation matrix is defined by 9 numbers, given by row, i.e.,
+ with column indices varying fastest.
+ Coordinates, as a matrix with columns x, y, an z, are rotated 
+ via multiplication with the rotation matrix. 
+ A translation matrix is defined by 3 numbers, which is added to
+ the rotated coordinates for specified amount of translation. 
+-->
+<!ELEMENT Move_rotate (Rot-matrix)>
+
+<!ELEMENT Move_translate (Trans-matrix)>
+
+<!--
+ A rotation matrix is defined by 9 numbers, given by row, i.e.,
+ with column indices varying fastest.
+ Coordinates, as a matrix with columns x, y, an z, are rotated 
+ via multiplication with the rotation matrix. 
+ A translation matrix is defined by 3 numbers, which is added to
+ the rotated coordinates for specified amount of translation. 
+-->
+<!ELEMENT Rot-matrix (
+        Rot-matrix_scale-factor, 
+        Rot-matrix_rot-11, 
+        Rot-matrix_rot-12, 
+        Rot-matrix_rot-13, 
+        Rot-matrix_rot-21, 
+        Rot-matrix_rot-22, 
+        Rot-matrix_rot-23, 
+        Rot-matrix_rot-31, 
+        Rot-matrix_rot-32, 
+        Rot-matrix_rot-33)>
+
+<!ELEMENT Rot-matrix_scale-factor (%INTEGER;)>
+
+<!ELEMENT Rot-matrix_rot-11 (%INTEGER;)>
+
+<!ELEMENT Rot-matrix_rot-12 (%INTEGER;)>
+
+<!ELEMENT Rot-matrix_rot-13 (%INTEGER;)>
+
+<!ELEMENT Rot-matrix_rot-21 (%INTEGER;)>
+
+<!ELEMENT Rot-matrix_rot-22 (%INTEGER;)>
+
+<!ELEMENT Rot-matrix_rot-23 (%INTEGER;)>
+
+<!ELEMENT Rot-matrix_rot-31 (%INTEGER;)>
+
+<!ELEMENT Rot-matrix_rot-32 (%INTEGER;)>
+
+<!ELEMENT Rot-matrix_rot-33 (%INTEGER;)>
+
+
+<!ELEMENT Trans-matrix (
+        Trans-matrix_scale-factor, 
+        Trans-matrix_tran-1, 
+        Trans-matrix_tran-2, 
+        Trans-matrix_tran-3)>
+
+<!ELEMENT Trans-matrix_scale-factor (%INTEGER;)>
+
+<!ELEMENT Trans-matrix_tran-1 (%INTEGER;)>
+
+<!ELEMENT Trans-matrix_tran-2 (%INTEGER;)>
+
+<!ELEMENT Trans-matrix_tran-3 (%INTEGER;)>
+
+<!--
+ The camera is a position relative to the world coordinates
+ of the structure referred to by a location.  
+ this is used to set the initial position of the
+ camera using OpenGL.  scale is the value used to scale the
+ other values from floating point to integer
+-->
+<!ELEMENT Camera (
+        Camera_x, 
+        Camera_y, 
+        Camera_distance, 
+        Camera_angle, 
+        Camera_scale, 
+        Camera_modelview)>
+
+<!ELEMENT Camera_x (%INTEGER;)>
+
+<!ELEMENT Camera_y (%INTEGER;)>
+
+<!ELEMENT Camera_distance (%INTEGER;)>
+
+<!ELEMENT Camera_angle (%INTEGER;)>
+
+<!ELEMENT Camera_scale (%INTEGER;)>
+
+<!ELEMENT Camera_modelview (GL-matrix)>
+
+
+<!ELEMENT GL-matrix (
+        GL-matrix_scale, 
+        GL-matrix_m11, 
+        GL-matrix_m12, 
+        GL-matrix_m13, 
+        GL-matrix_m14, 
+        GL-matrix_m21, 
+        GL-matrix_m22, 
+        GL-matrix_m23, 
+        GL-matrix_m24, 
+        GL-matrix_m31, 
+        GL-matrix_m32, 
+        GL-matrix_m33, 
+        GL-matrix_m34, 
+        GL-matrix_m41, 
+        GL-matrix_m42, 
+        GL-matrix_m43, 
+        GL-matrix_m44)>
+
+<!ELEMENT GL-matrix_scale (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m11 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m12 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m13 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m14 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m21 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m22 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m23 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m24 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m31 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m32 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m33 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m34 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m41 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m42 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m43 (%INTEGER;)>
+
+<!ELEMENT GL-matrix_m44 (%INTEGER;)>
+
+
+<!ELEMENT Color-prop (
+        Color-prop_r?, 
+        Color-prop_g?, 
+        Color-prop_b?, 
+        Color-prop_name?)>
+
+<!ELEMENT Color-prop_r (%INTEGER;)>
+
+<!ELEMENT Color-prop_g (%INTEGER;)>
+
+<!ELEMENT Color-prop_b (%INTEGER;)>
+
+<!ELEMENT Color-prop_name (#PCDATA)>
+
+<!--
+ Note that Render-prop is compatible with the Annmm specification,
+ i.e., its numbering schemes do not clash with those in Render-prop.
+-->
+<!ELEMENT Render-prop (%INTEGER;)>
+
+<!--
+    default	-  Default view
+    wire	-  use wireframe 
+    space	-  use spacefill
+    stick	-  use stick model (thin cylinders)
+    ballNStick	-  use ball & stick model
+    thickWire	-  thicker wireframe
+    hide	-  don't show this
+    name	-  display its name next to it
+    number	-  display its number next to it 
+    pdbNumber	-  display its PDB number next to it
+    objWireFrame	-  display MMDB surface object as wireframe
+    objPolygons	-  display MMDB surface object as polygons   
+    colorsetCPK	-  color atoms like CPK models
+    colorsetbyChain	-  color each chain different
+    colorsetbyTemp	-  color using isotropic Temp factors 
+    colorsetbyRes	-  color using residue properties
+    colorsetbyLen	-  color changes along chain length
+    colorsetbySStru	-  color by secondary structure
+    colorsetbyHydro	-  color by hydrophobicity
+    colorsetbyObject	-  color each object differently
+    colorsetbyDomain	-  color each domain differently
+-->
+<!ATTLIST Render-prop value (
+        default |
+        wire |
+        space |
+        stick |
+        ballNStick |
+        thickWire |
+        hide |
+        name |
+        number |
+        pdbNumber |
+        objWireFrame |
+        objPolygons |
+        colorsetCPK |
+        colorsetbyChain |
+        colorsetbyTemp |
+        colorsetbyRes |
+        colorsetbyLen |
+        colorsetbySStru |
+        colorsetbyHydro |
+        colorsetbyObject |
+        colorsetbyDomain |
+        other
+        ) #IMPLIED >
+
+
+<!--
+  When a Biostruc-Feature with a Biostruc-script is initiated,
+  it should play the specified steps one at a time, setting the feature-do
+  list as the active display.
+  The camera can be set using a feature-do, 
+  but it may be moved independently with
+  camera-move, which specifies how to move
+  the camera dynamically during the step along the path defined (e.g.,
+  a zoom, a rotate).
+  Any value of pause (in 1:10th's of a second) will force a pause
+  after an image is shown.
+  If waitevent is TRUE, it will await a mouse or keypress and ignore 
+  the pause value.
+-->
+<!ELEMENT Biostruc-script (Biostruc-script-step*)>
+
+
+<!ELEMENT Biostruc-script-step (
+        Biostruc-script-step_step-id, 
+        Biostruc-script-step_step-name?, 
+        Biostruc-script-step_feature-do?, 
+        Biostruc-script-step_camera-move?, 
+        Biostruc-script-step_pause?, 
+        Biostruc-script-step_waitevent, 
+        Biostruc-script-step_extra, 
+        Biostruc-script-step_jump?)>
+
+<!ELEMENT Biostruc-script-step_step-id (Step-id)>
+
+<!ELEMENT Biostruc-script-step_step-name (#PCDATA)>
+
+<!ELEMENT Biostruc-script-step_feature-do (Other-feature*)>
+
+<!ELEMENT Biostruc-script-step_camera-move (Transform)>
+
+<!ELEMENT Biostruc-script-step_pause (%INTEGER;)>
+
+<!ELEMENT Biostruc-script-step_waitevent EMPTY>
+<!ATTLIST Biostruc-script-step_waitevent value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Biostruc-script-step_extra (%INTEGER;)>
+
+<!ELEMENT Biostruc-script-step_jump (Step-id)>
+
+
+<!ELEMENT Step-id (%INTEGER;)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/MMDB_Structural_model.dtd b/code/lib/Bio/Entrez/DTDs/MMDB_Structural_model.dtd
new file mode 100644
index 0000000..a5a075e
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/MMDB_Structural_model.dtd
@@ -0,0 +1,98 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "mmdb2.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- MMDB_Structural_model.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % MMDB_module PUBLIC "-//NCBI//MMDB Module//EN" "MMDB.mod.dtd">
+%MMDB_module;
+
+<!ENTITY % MMDB_Chemical_graph_module PUBLIC "-//NCBI//MMDB Chemical graph Module//EN" "MMDB_Chemical_graph.mod.dtd">
+%MMDB_Chemical_graph_module;
+
+<!ENTITY % MMDB_Features_module PUBLIC "-//NCBI//MMDB Features Module//EN" "MMDB_Features.mod.dtd">
+%MMDB_Features_module;
+
+<!ENTITY % MMDB_Structural_model_module PUBLIC "-//NCBI//MMDB Structural model Module//EN" "MMDB_Structural_model.mod.dtd">
+%MMDB_Structural_model_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/MMDB_Structural_model.mod.dtd b/code/lib/Bio/Entrez/DTDs/MMDB_Structural_model.mod.dtd
new file mode 100644
index 0000000..aa9f16c
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/MMDB_Structural_model.mod.dtd
@@ -0,0 +1,676 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "mmdb2.asn"
+     ::DATATOOL:: by application DATATOOL version 1.8.1
+     ::DATATOOL:: on 01/18/2007 23:07:18
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "MMDB-Structural-model"
+================================================= -->
+
+<!--
+$Revision: 6.0 $
+**********************************************************************
+
+  Biological Macromolecule 3-D Structure Data Types for MMDB,
+                A Molecular Modeling Database
+
+  Definitions for structural models
+
+  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 
+
+  National Center for Biotechnology Information
+  National Institutes of Health
+  Bethesda, MD 20894 USA
+
+  July, 1996
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Biostruc-model,
+          Model-id,
+          Model-coordinate-set-id -->
+
+<!-- Elements referenced from other modules:
+          Chem-graph-pntrs,
+          Atom-pntrs,
+          Chem-graph-alignment,
+          Sphere,
+          Cone,
+          Cylinder,
+          Brick,
+          Transform FROM MMDB-Features,
+          Biostruc-id FROM MMDB,
+          Pub FROM NCBI-Pub -->
+<!-- ============================================ -->
+
+<!--
+ A structural model maps chemical components into a measured three-
+ dimensional space. PDB-derived biostrucs generally contain 4 models, 
+ corresponding to "views" of the structure of a biomolecular assemble with 
+ increasing levels of complexity.  Model types indicate the complexity of the
+ view.  
+ The model named "NCBI all atom" represents a view suitable for most 
+ computational biology applications.  It provides complete atomic coordinate 
+ data for a "single best" model, omitting statistical disorder information 
+ and/or ensemble structure descriptions provided in the source PDB file.  
+ Construction of the single best model is based on the assumption that the 
+ contents of the "alternate conformation" field from pdb imply no correlation
+ among the occupancies of multiple sites assigned to sets of atoms: the best 
+ site is chosen only on the basis of highest occupancy. Note, however, that 
+ alternate conformation sets where correlation is implied are generally 
+ constrained in crystallographic refinement to have uniform occupancy, and 
+ will thus be selected as a set. For ensemble models the model which assigns 
+ coordinates to the most atoms is chosen.  If numbers of coordinates are the 
+ same, the model occurring first in the PDB file is selected.  The single 
+ best model includes complete coordinates for all nonpolymer components, but 
+ omits those classified as "solvent".  Model type is 3 for this model. 
+ The model named "NCBI backbone" represents a simple view intended for 
+ graphic displays and rapid transmission over a network.  It includes only 
+ alpha carbon or backbone phosphate coordinates for biopolymers. It is based 
+ on selection of alpha-carbon and backbone phosphate atoms from the "NCBI
+ all atom" model. The model type is set to 2.  An even simpler model gives 
+ only a cartoon representation, using cylinders corresponding to secondary 
+ structure elements.  This is named "NCBI vector", and has model type 1.
+ The models named "PDB Model 1", "PDB Model 2", etc. represent the complete
+ information provided by PDB, including full descriptions of statistical
+ disorder.  The name of the model is based on the contents of the PDB MODEL
+ record, with a default name of "PDB Model 1" for PDB files which contain 
+ only a single model.  Construction of these models is based on the 
+ assumption that contents of the PDB "alternate conformation" field are 
+ intended to imply correlation among the occupancies of atom sets flagged by
+ the same identifier.  The special flag " " (blank) is assumed to indicate 
+ sites occupied in all alternate conformations, and sites flagged otherwise,
+ together with " ", to indicate a distinct member of an ensemble of 
+ alternate conformations.  Note that construction of ensemble members 
+ according to these assumption requires two validation checks on PDB 
+ "alternate conformation" flags: they must be unique among sites assigned to 
+ the same atom, and that the special " " flag must occur only for unique
+ sites.  Sites which violate the first check are flagged as "u", for 
+ "unknown"; they are omitted from all ensemble definitions but are 
+ nontheless retained in the coordinate list.  Sites which violate the second
+ check are flagged "b" for "blank", and are included in an appropriately
+ named ensemble.  The model type for pdb all models is 4.
+ Note that in the MMDB database models are stored in the ASN.1 stream in
+ order of increasing model type value.  Since models occur as the last item
+ in a biostruc, parsers may avoid reading the entire stream if the desired
+ model is one of the simplified types, which occur first in the stream. This
+ can save considerable I/O time, particularly for large ensemble models from 
+ NMR determinations.
+-->
+<!ELEMENT Biostruc-model (
+        Biostruc-model_id, 
+        Biostruc-model_type, 
+        Biostruc-model_descr?, 
+        Biostruc-model_model-space?, 
+        Biostruc-model_model-coordinates?)>
+
+<!ELEMENT Biostruc-model_id (Model-id)>
+
+<!ELEMENT Biostruc-model_type (Model-type)>
+
+<!ELEMENT Biostruc-model_descr (Model-descr*)>
+<!--
+ The model space defines measurement units and any external reference frame.
+ Coordinates refer to a right-handed orthogonal system defined on axes 
+ tagged x, y and z in the coordinate and feature definitions of a biostruc.
+ Coordinates from PDB-derived structures are reported without change, in
+ angstrom units.  The units of temperature and occupancy factors are not
+ defined explicitly in PDB, but are inferred from their value range.
+-->
+<!ELEMENT Biostruc-model_model-space (Model-space)>
+
+<!ELEMENT Biostruc-model_model-coordinates (Model-coordinate-set*)>
+
+
+<!ELEMENT Model-id (%INTEGER;)>
+
+
+<!ELEMENT Model-type (%INTEGER;)>
+<!ATTLIST Model-type value (
+        ncbi-vector |
+        ncbi-backbone |
+        ncbi-all-atom |
+        pdb-model |
+        other
+        ) #IMPLIED >
+
+
+
+<!ELEMENT Model-descr (
+        Model-descr_name | 
+        Model-descr_pdb-reso | 
+        Model-descr_pdb-method | 
+        Model-descr_pdb-comment | 
+        Model-descr_other-comment | 
+        Model-descr_attribution)>
+
+<!ELEMENT Model-descr_name (#PCDATA)>
+
+<!ELEMENT Model-descr_pdb-reso (#PCDATA)>
+
+<!ELEMENT Model-descr_pdb-method (#PCDATA)>
+
+<!ELEMENT Model-descr_pdb-comment (#PCDATA)>
+
+<!ELEMENT Model-descr_other-comment (#PCDATA)>
+
+<!ELEMENT Model-descr_attribution (Pub)>
+
+<!--
+ The model space defines measurement units and any external reference frame.
+ Coordinates refer to a right-handed orthogonal system defined on axes 
+ tagged x, y and z in the coordinate and feature definitions of a biostruc.
+ Coordinates from PDB-derived structures are reported without change, in
+ angstrom units.  The units of temperature and occupancy factors are not
+ defined explicitly in PDB, but are inferred from their value range.
+-->
+<!ELEMENT Model-space (
+        Model-space_coordinate-units, 
+        Model-space_thermal-factor-units?, 
+        Model-space_occupancy-factor-units?, 
+        Model-space_density-units?, 
+        Model-space_reference-frame?)>
+
+<!ELEMENT Model-space_coordinate-units %ENUM;>
+<!ATTLIST Model-space_coordinate-units value (
+        angstroms |
+        nanometers |
+        other |
+        unknown
+        ) #REQUIRED >
+
+
+<!ELEMENT Model-space_thermal-factor-units %ENUM;>
+<!ATTLIST Model-space_thermal-factor-units value (
+        b |
+        u |
+        other |
+        unknown
+        ) #REQUIRED >
+
+
+<!ELEMENT Model-space_occupancy-factor-units %ENUM;>
+<!ATTLIST Model-space_occupancy-factor-units value (
+        fractional |
+        electrons |
+        other |
+        unknown
+        ) #REQUIRED >
+
+
+<!ELEMENT Model-space_density-units %ENUM;>
+<!ATTLIST Model-space_density-units value (
+        electrons-per-unit-volume |
+        arbitrary-scale |
+        other |
+        unknown
+        ) #REQUIRED >
+
+<!--
+ An external reference frame is a pointer to another biostruc, with an 
+ optional operator to rotate and translate coordinates into its model space.
+ This item is intended for representation of homology-derived model 
+ structures, and is not present for structures from PDB.
+-->
+<!ELEMENT Model-space_reference-frame (Reference-frame)>
+
+<!--
+ An external reference frame is a pointer to another biostruc, with an 
+ optional operator to rotate and translate coordinates into its model space.
+ This item is intended for representation of homology-derived model 
+ structures, and is not present for structures from PDB.
+-->
+<!ELEMENT Reference-frame (
+        Reference-frame_biostruc-id, 
+        Reference-frame_rotation-translation?)>
+
+<!ELEMENT Reference-frame_biostruc-id (Biostruc-id)>
+
+<!ELEMENT Reference-frame_rotation-translation (Transform)>
+
+<!--
+ Atomic coordinates may be assigned literally or by reference to another
+ biostruc.  The reference coordinate type is used to represent homology-
+ derived model structures.  PDB-derived structures have literal coordinates.
+ Referenced coordinates identify another biostruc, any transformation to be 
+ applied to coordinates from that biostruc, and a mapping of the chemical
+ graph of the present biostruc onto that of the referenced biostruc.  They
+ give an "alignment" of atoms in the current biostruc with those in another,
+ from which the coordinates of matched atoms may be retrieved.  For non-
+ atomic models "alignment" may also be represented by molecule and residue
+ equivalence lists.  Referenced coordinates are a data item inteded for 
+ representation of homology models, with an explicit pointer to their source
+ information. They do not occur in PDB-derived models.
+-->
+<!ELEMENT Model-coordinate-set (
+        Model-coordinate-set_id?, 
+        Model-coordinate-set_descr?, 
+        Model-coordinate-set_coordinates)>
+
+<!ELEMENT Model-coordinate-set_id (Model-coordinate-set-id)>
+
+<!ELEMENT Model-coordinate-set_descr (Model-descr*)>
+
+<!ELEMENT Model-coordinate-set_coordinates (
+        Model-coordinate-set_coordinates_literal | 
+        Model-coordinate-set_coordinates_reference)>
+<!--
+ Literal coordinates map chemical components into the model space.  Three 
+ mapping types are allowed, atomic coordinate models, density-grid models,
+ and surface models. A model consists of a sequence of such coordinate sets, 
+ and may thus combine coordinate subsets which have a different source.  
+ PDB-derived models contain a single atomic coordinate set, as they by
+ definition represent information from a single source.
+-->
+<!ELEMENT Model-coordinate-set_coordinates_literal (Coordinates)>
+
+<!ELEMENT Model-coordinate-set_coordinates_reference (Chem-graph-alignment)>
+
+
+<!ELEMENT Model-coordinate-set-id (%INTEGER;)>
+
+<!--
+ Literal coordinates map chemical components into the model space.  Three 
+ mapping types are allowed, atomic coordinate models, density-grid models,
+ and surface models. A model consists of a sequence of such coordinate sets, 
+ and may thus combine coordinate subsets which have a different source.  
+ PDB-derived models contain a single atomic coordinate set, as they by
+ definition represent information from a single source.
+-->
+<!ELEMENT Coordinates (
+        Coordinates_atomic | 
+        Coordinates_surface | 
+        Coordinates_density)>
+<!--
+ Literal atomic coordinate values give location, occupancy and order
+ parameters, and a pointer to a specific atom defined in the biostruc graph.
+ Temperature and occupancy factors have their conventional crystallographic
+ definitions, with units defined in the model space declaration.  Atoms,
+ sites, temperature-factors, occupancies and alternate-conformation-ids
+ are parallel arrays, i.e. the have the same number of values as given by
+ number-of-points. Conformation ensembles represent distinct correlated-
+ disorder subsets of the coordinates.  They will be present only for certain 
+ "views" of PDB structures, as described above. Their derivation from PDB-
+ supplied "alternate-conformation" ids is described below. 
+-->
+<!ELEMENT Coordinates_atomic (Atomic-coordinates)>
+<!--
+ Literal surface coordinates define the chemical components whose structure
+ is described by a surface, and the surface itself.  The surface may be
+ either a regular geometric solid or a triangle-mesh of arbitrary shape.
+-->
+<!ELEMENT Coordinates_surface (Surface-coordinates)>
+<!--
+ Literal density coordinates define the chemical components whose structure
+ is described by a density grid, parameters of this grid, and density values.
+-->
+<!ELEMENT Coordinates_density (Density-coordinates)>
+
+<!--
+ Literal atomic coordinate values give location, occupancy and order
+ parameters, and a pointer to a specific atom defined in the biostruc graph.
+ Temperature and occupancy factors have their conventional crystallographic
+ definitions, with units defined in the model space declaration.  Atoms,
+ sites, temperature-factors, occupancies and alternate-conformation-ids
+ are parallel arrays, i.e. the have the same number of values as given by
+ number-of-points. Conformation ensembles represent distinct correlated-
+ disorder subsets of the coordinates.  They will be present only for certain 
+ "views" of PDB structures, as described above. Their derivation from PDB-
+ supplied "alternate-conformation" ids is described below. 
+-->
+<!ELEMENT Atomic-coordinates (
+        Atomic-coordinates_number-of-points, 
+        Atomic-coordinates_atoms, 
+        Atomic-coordinates_sites, 
+        Atomic-coordinates_temperature-factors?, 
+        Atomic-coordinates_occupancies?, 
+        Atomic-coordinates_alternate-conf-ids?, 
+        Atomic-coordinates_conf-ensembles?)>
+
+<!ELEMENT Atomic-coordinates_number-of-points (%INTEGER;)>
+
+<!ELEMENT Atomic-coordinates_atoms (Atom-pntrs)>
+<!--
+ The atoms whose location is described by each coordinate are identified
+ via a hierarchical pointer to the chemical graph of the biomolecular
+ assembly.  Coordinates may be matched with atoms in the chemical structure
+ by the values of the molecule, residue and atom id's given here,  which 
+ match exactly the items of the same type defined in Biostruc-graph.
+ Coordinates are given as integer values, with a scale factor to convert 
+ to real values for each x, y or z, in the units indicated in model-space.
+ Integer values must be divided by the the scale factor.  This use of integer
+ values reduces the ASN.1 stream size. The scale factors for temperature 
+ factors and occupancies are given separately, but must be used in the same 
+ fashion to produce properly scaled real values.
+-->
+<!ELEMENT Atomic-coordinates_sites (Model-space-points)>
+
+<!ELEMENT Atomic-coordinates_temperature-factors (Atomic-temperature-factors)>
+
+<!ELEMENT Atomic-coordinates_occupancies (Atomic-occupancies)>
+<!--
+ An alternate conformation id is optionally associated with each coordinate. 
+ Aside from corrections due to the validation checks described above, the 
+ contents of MMDB Alternate-conformation-ids are identical to the PDB 
+ "alternate conformation" field.
+-->
+<!ELEMENT Atomic-coordinates_alternate-conf-ids (Alternate-conformation-ids)>
+
+<!ELEMENT Atomic-coordinates_conf-ensembles (Conformation-ensemble*)>
+
+<!--
+ The atoms whose location is described by each coordinate are identified
+ via a hierarchical pointer to the chemical graph of the biomolecular
+ assembly.  Coordinates may be matched with atoms in the chemical structure
+ by the values of the molecule, residue and atom id's given here,  which 
+ match exactly the items of the same type defined in Biostruc-graph.
+ Coordinates are given as integer values, with a scale factor to convert 
+ to real values for each x, y or z, in the units indicated in model-space.
+ Integer values must be divided by the the scale factor.  This use of integer
+ values reduces the ASN.1 stream size. The scale factors for temperature 
+ factors and occupancies are given separately, but must be used in the same 
+ fashion to produce properly scaled real values.
+-->
+<!ELEMENT Model-space-points (
+        Model-space-points_scale-factor, 
+        Model-space-points_x, 
+        Model-space-points_y, 
+        Model-space-points_z)>
+
+<!ELEMENT Model-space-points_scale-factor (%INTEGER;)>
+
+<!ELEMENT Model-space-points_x (Model-space-points_x_E*)>
+
+
+<!ELEMENT Model-space-points_x_E (%INTEGER;)>
+
+<!ELEMENT Model-space-points_y (Model-space-points_y_E*)>
+
+
+<!ELEMENT Model-space-points_y_E (%INTEGER;)>
+
+<!ELEMENT Model-space-points_z (Model-space-points_z_E*)>
+
+
+<!ELEMENT Model-space-points_z_E (%INTEGER;)>
+
+
+<!ELEMENT Atomic-temperature-factors (
+        Atomic-temperature-factors_isotropic | 
+        Atomic-temperature-factors_anisotropic)>
+
+<!ELEMENT Atomic-temperature-factors_isotropic (Isotropic-temperature-factors)>
+
+<!ELEMENT Atomic-temperature-factors_anisotropic (Anisotropic-temperature-factors)>
+
+
+<!ELEMENT Isotropic-temperature-factors (
+        Isotropic-temperature-factors_scale-factor, 
+        Isotropic-temperature-factors_b)>
+
+<!ELEMENT Isotropic-temperature-factors_scale-factor (%INTEGER;)>
+
+<!ELEMENT Isotropic-temperature-factors_b (Isotropic-temperature-factors_b_E*)>
+
+
+<!ELEMENT Isotropic-temperature-factors_b_E (%INTEGER;)>
+
+
+<!ELEMENT Anisotropic-temperature-factors (
+        Anisotropic-temperature-factors_scale-factor, 
+        Anisotropic-temperature-factors_b-11, 
+        Anisotropic-temperature-factors_b-12, 
+        Anisotropic-temperature-factors_b-13, 
+        Anisotropic-temperature-factors_b-22, 
+        Anisotropic-temperature-factors_b-23, 
+        Anisotropic-temperature-factors_b-33)>
+
+<!ELEMENT Anisotropic-temperature-factors_scale-factor (%INTEGER;)>
+
+<!ELEMENT Anisotropic-temperature-factors_b-11 (Anisotropic-temperature-factors_b-11_E*)>
+
+
+<!ELEMENT Anisotropic-temperature-factors_b-11_E (%INTEGER;)>
+
+<!ELEMENT Anisotropic-temperature-factors_b-12 (Anisotropic-temperature-factors_b-12_E*)>
+
+
+<!ELEMENT Anisotropic-temperature-factors_b-12_E (%INTEGER;)>
+
+<!ELEMENT Anisotropic-temperature-factors_b-13 (Anisotropic-temperature-factors_b-13_E*)>
+
+
+<!ELEMENT Anisotropic-temperature-factors_b-13_E (%INTEGER;)>
+
+<!ELEMENT Anisotropic-temperature-factors_b-22 (Anisotropic-temperature-factors_b-22_E*)>
+
+
+<!ELEMENT Anisotropic-temperature-factors_b-22_E (%INTEGER;)>
+
+<!ELEMENT Anisotropic-temperature-factors_b-23 (Anisotropic-temperature-factors_b-23_E*)>
+
+
+<!ELEMENT Anisotropic-temperature-factors_b-23_E (%INTEGER;)>
+
+<!ELEMENT Anisotropic-temperature-factors_b-33 (Anisotropic-temperature-factors_b-33_E*)>
+
+
+<!ELEMENT Anisotropic-temperature-factors_b-33_E (%INTEGER;)>
+
+
+<!ELEMENT Atomic-occupancies (
+        Atomic-occupancies_scale-factor, 
+        Atomic-occupancies_o)>
+
+<!ELEMENT Atomic-occupancies_scale-factor (%INTEGER;)>
+
+<!ELEMENT Atomic-occupancies_o (Atomic-occupancies_o_E*)>
+
+
+<!ELEMENT Atomic-occupancies_o_E (%INTEGER;)>
+
+<!--
+ An alternate conformation id is optionally associated with each coordinate. 
+ Aside from corrections due to the validation checks described above, the 
+ contents of MMDB Alternate-conformation-ids are identical to the PDB 
+ "alternate conformation" field.
+-->
+<!ELEMENT Alternate-conformation-ids (Alternate-conformation-id*)>
+
+
+<!ELEMENT Alternate-conformation-id (#PCDATA)>
+
+<!--
+ Correlated disorder ensemble is defined by a set of alternate conformation 
+ id's which identify coordinates relevant to that ensemble. These are 
+ defined from the validated and corrected contents of the PDB "alternate
+ conformation" field as described above.  A given ensemble, for example, may
+ consist of atom sites flagged by " " and "A" Alternate-conformation-ids. 
+ Names for ensembles are constructed from these flags. This example would be
+ named, in its description, "PDB Ensemble blank plus A".
+ Note that this interpretation is consistent with common PDB usage of the 
+ "alternate conformation" field, but that PDB specifications do not formally
+ distinguish between correlated and uncorrelated disorder in crystallographic
+ models. Ensembles identified in MMDB thus may not correspond to the meaning
+ intended by PDB or the depositor.  No information is lost, however, and
+ if the intended meaning is known alternative ensemble descriptions may be
+ reconstructed directly from the Alternate-conformation-ids.
+ Note that correlated disorder as defined here is allowed within an atomic 
+ coordinate set but not between the multiple sets which may define a model. 
+ Multiple sets within the same model are intended as a means to represent 
+ assemblies modeled from different experimentally determined structures,
+ where correlated disorder between coordinate sets is not relevant.
+-->
+<!ELEMENT Conformation-ensemble (
+        Conformation-ensemble_name, 
+        Conformation-ensemble_alt-conf-ids)>
+
+<!ELEMENT Conformation-ensemble_name (#PCDATA)>
+
+<!ELEMENT Conformation-ensemble_alt-conf-ids (Alternate-conformation-id*)>
+
+<!--
+ Literal surface coordinates define the chemical components whose structure
+ is described by a surface, and the surface itself.  The surface may be
+ either a regular geometric solid or a triangle-mesh of arbitrary shape.
+-->
+<!ELEMENT Surface-coordinates (
+        Surface-coordinates_contents, 
+        Surface-coordinates_surface)>
+
+<!ELEMENT Surface-coordinates_contents (Chem-graph-pntrs)>
+
+<!ELEMENT Surface-coordinates_surface (
+        Surface-coordinates_surface_sphere | 
+        Surface-coordinates_surface_cone | 
+        Surface-coordinates_surface_cylinder | 
+        Surface-coordinates_surface_brick | 
+        Surface-coordinates_surface_tmesh | 
+        Surface-coordinates_surface_triangles)>
+
+<!ELEMENT Surface-coordinates_surface_sphere (Sphere)>
+
+<!ELEMENT Surface-coordinates_surface_cone (Cone)>
+
+<!ELEMENT Surface-coordinates_surface_cylinder (Cylinder)>
+
+<!ELEMENT Surface-coordinates_surface_brick (Brick)>
+
+<!ELEMENT Surface-coordinates_surface_tmesh (T-mesh)>
+
+<!ELEMENT Surface-coordinates_surface_triangles (Triangles)>
+
+
+<!ELEMENT T-mesh (
+        T-mesh_number-of-points, 
+        T-mesh_scale-factor, 
+        T-mesh_swap, 
+        T-mesh_x, 
+        T-mesh_y, 
+        T-mesh_z)>
+
+<!ELEMENT T-mesh_number-of-points (%INTEGER;)>
+
+<!ELEMENT T-mesh_scale-factor (%INTEGER;)>
+
+<!ELEMENT T-mesh_swap (T-mesh_swap_E*)>
+
+
+<!ELEMENT T-mesh_swap_E EMPTY>
+<!ATTLIST T-mesh_swap_E value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT T-mesh_x (T-mesh_x_E*)>
+
+
+<!ELEMENT T-mesh_x_E (%INTEGER;)>
+
+<!ELEMENT T-mesh_y (T-mesh_y_E*)>
+
+
+<!ELEMENT T-mesh_y_E (%INTEGER;)>
+
+<!ELEMENT T-mesh_z (T-mesh_z_E*)>
+
+
+<!ELEMENT T-mesh_z_E (%INTEGER;)>
+
+
+<!ELEMENT Triangles (
+        Triangles_number-of-points, 
+        Triangles_scale-factor, 
+        Triangles_x, 
+        Triangles_y, 
+        Triangles_z, 
+        Triangles_number-of-triangles, 
+        Triangles_v1, 
+        Triangles_v2, 
+        Triangles_v3)>
+
+<!ELEMENT Triangles_number-of-points (%INTEGER;)>
+
+<!ELEMENT Triangles_scale-factor (%INTEGER;)>
+
+<!ELEMENT Triangles_x (Triangles_x_E*)>
+
+
+<!ELEMENT Triangles_x_E (%INTEGER;)>
+
+<!ELEMENT Triangles_y (Triangles_y_E*)>
+
+
+<!ELEMENT Triangles_y_E (%INTEGER;)>
+
+<!ELEMENT Triangles_z (Triangles_z_E*)>
+
+
+<!ELEMENT Triangles_z_E (%INTEGER;)>
+
+<!ELEMENT Triangles_number-of-triangles (%INTEGER;)>
+
+<!ELEMENT Triangles_v1 (Triangles_v1_E*)>
+
+
+<!ELEMENT Triangles_v1_E (%INTEGER;)>
+
+<!ELEMENT Triangles_v2 (Triangles_v2_E*)>
+
+
+<!ELEMENT Triangles_v2_E (%INTEGER;)>
+
+<!ELEMENT Triangles_v3 (Triangles_v3_E*)>
+
+
+<!ELEMENT Triangles_v3_E (%INTEGER;)>
+
+<!--
+ Literal density coordinates define the chemical components whose structure
+ is described by a density grid, parameters of this grid, and density values.
+-->
+<!ELEMENT Density-coordinates (
+        Density-coordinates_contents, 
+        Density-coordinates_grid-corners, 
+        Density-coordinates_grid-steps-x, 
+        Density-coordinates_grid-steps-y, 
+        Density-coordinates_grid-steps-z, 
+        Density-coordinates_fastest-varying, 
+        Density-coordinates_slowest-varying, 
+        Density-coordinates_scale-factor, 
+        Density-coordinates_density)>
+
+<!ELEMENT Density-coordinates_contents (Chem-graph-pntrs)>
+
+<!ELEMENT Density-coordinates_grid-corners (Brick)>
+
+<!ELEMENT Density-coordinates_grid-steps-x (%INTEGER;)>
+
+<!ELEMENT Density-coordinates_grid-steps-y (%INTEGER;)>
+
+<!ELEMENT Density-coordinates_grid-steps-z (%INTEGER;)>
+
+<!ELEMENT Density-coordinates_fastest-varying %ENUM;>
+<!ATTLIST Density-coordinates_fastest-varying value (
+        x |
+        y |
+        z
+        ) #REQUIRED >
+
+
+<!ELEMENT Density-coordinates_slowest-varying %ENUM;>
+<!ATTLIST Density-coordinates_slowest-varying value (
+        x |
+        y |
+        z
+        ) #REQUIRED >
+
+
+<!ELEMENT Density-coordinates_scale-factor (%INTEGER;)>
+
+<!ELEMENT Density-coordinates_density (Density-coordinates_density_E*)>
+
+
+<!ELEMENT Density-coordinates_density_E (%INTEGER;)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Access.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Access.dtd
new file mode 100644
index 0000000..5dcedf0
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Access.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "access.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_Access.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Access_module PUBLIC "-//NCBI//NCBI Access Module//EN" "NCBI_Access.mod.dtd">
+%NCBI_Access_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Access.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Access.mod.dtd
new file mode 100644
index 0000000..e83ad2a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Access.mod.dtd
@@ -0,0 +1,49 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "access.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Access"
+================================================= -->
+
+<!--
+$Revision: 6.0 $
+*********************************************************************
+
+  access.asn
+
+     messages for data access
+
+*********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Link-set -->
+<!-- ============================================ -->
+
+<!--
+ links between same class = neighbors
+ links between other classes = links
+-->
+<!ELEMENT Link-set (
+        Link-set_num, 
+        Link-set_uids?, 
+        Link-set_weights?)>
+
+<!-- number of links to this doc type -->
+<!ELEMENT Link-set_num (%INTEGER;)>
+
+<!-- the links -->
+<!ELEMENT Link-set_uids (Link-set_uids_E*)>
+
+
+<!ELEMENT Link-set_uids_E (%INTEGER;)>
+
+<!-- the weights -->
+<!ELEMENT Link-set_weights (Link-set_weights_E*)>
+
+
+<!ELEMENT Link-set_weights_E (%INTEGER;)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Biblio.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Biblio.dtd
new file mode 100644
index 0000000..17edc09
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Biblio.dtd
@@ -0,0 +1,20 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "biblio.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_Biblio.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Biblio.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Biblio.mod.dtd
new file mode 100644
index 0000000..e0b7e1d
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Biblio.mod.dtd
@@ -0,0 +1,690 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "biblio.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 12/20/2010 23:04:52
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Biblio"
+================================================= -->
+
+<!--
+$Revision: 217677 $
+****************************************************************
+
+  NCBI Bibliographic data elements
+  by James Ostell, 1990
+
+  Taken from the American National Standard for
+      Bibliographic References
+      ANSI Z39.29-1977
+  Version 3.0 - June 1994
+  PubMedId added in 1996
+  ArticleIds and eprint elements added in 1999
+
+****************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Cit-art,
+          Cit-jour,
+          Cit-book,
+          Cit-pat,
+          Cit-let,
+          Id-pat,
+          Cit-gen,
+          Cit-proc,
+          Cit-sub,
+          Title,
+          Author,
+          PubMedId,
+          DOI -->
+
+<!-- Elements referenced from other modules:
+          Person-id,
+          Date,
+          Dbtag FROM NCBI-General -->
+<!-- ============================================ -->
+
+<!--
+ Article Ids
+ can be many ids for an article
+-->
+<!ELEMENT ArticleId (
+        ArticleId_pubmed | 
+        ArticleId_medline | 
+        ArticleId_doi | 
+        ArticleId_pii | 
+        ArticleId_pmcid | 
+        ArticleId_pmcpid | 
+        ArticleId_pmpid | 
+        ArticleId_other)>
+
+<!-- see types below -->
+<!ELEMENT ArticleId_pubmed (PubMedId)>
+<!-- Id from MEDLINE -->
+<!ELEMENT ArticleId_medline (MedlineUID)>
+<!-- Document Object Identifier -->
+<!ELEMENT ArticleId_doi (DOI)>
+<!-- Controlled Publisher Identifier -->
+<!ELEMENT ArticleId_pii (PII)>
+<!-- PubMed Central Id -->
+<!ELEMENT ArticleId_pmcid (PmcID)>
+<!-- Publisher Id supplied to PubMed Central -->
+<!ELEMENT ArticleId_pmcpid (PmcPid)>
+<!-- Publisher Id supplied to PubMed -->
+<!ELEMENT ArticleId_pmpid (PmPid)>
+
+<!-- generic catch all -->
+<!ELEMENT ArticleId_other (Dbtag)>
+
+<!-- Id from the PubMed database at NCBI -->
+<!ELEMENT PubMedId (%INTEGER;)>
+
+<!-- Id from MEDLINE -->
+<!ELEMENT MedlineUID (%INTEGER;)>
+
+<!-- Document Object Identifier -->
+<!ELEMENT DOI (#PCDATA)>
+
+<!-- Controlled Publisher Identifier -->
+<!ELEMENT PII (#PCDATA)>
+
+<!-- PubMed Central Id -->
+<!ELEMENT PmcID (%INTEGER;)>
+
+<!-- Publisher Id supplied to PubMed Central -->
+<!ELEMENT PmcPid (#PCDATA)>
+
+<!-- Publisher Id supplied to PubMed -->
+<!ELEMENT PmPid (#PCDATA)>
+
+
+<!ELEMENT ArticleIdSet (ArticleId*)>
+
+<!--
+ Status Dates
+ points of publication
+-->
+<!ELEMENT PubStatus (%INTEGER;)>
+
+<!--
+    received	-  date manuscript received for review
+    accepted	-  accepted for publication
+    epublish	-  published electronically by publisher
+    ppublish	-  published in print by publisher
+    revised	-  article revised by publisher/author
+    pmc	-  article first appeared in PubMed Central
+    pmcr	-  article revision in PubMed Central
+    pubmed	-  article citation first appeared in PubMed
+    pubmedr	-  article citation revision in PubMed
+    aheadofprint	-  epublish, but will be followed by print
+    premedline	-  date into PreMedline status
+    medline	-  date made a MEDLINE record
+-->
+<!ATTLIST PubStatus value (
+        received |
+        accepted |
+        epublish |
+        ppublish |
+        revised |
+        pmc |
+        pmcr |
+        pubmed |
+        pubmedr |
+        aheadofprint |
+        premedline |
+        medline |
+        other
+        ) #IMPLIED >
+
+
+<!-- done as a structure so fields can be added -->
+<!ELEMENT PubStatusDate (
+        PubStatusDate_pubstatus, 
+        PubStatusDate_date)>
+<!--
+ Status Dates
+ points of publication
+-->
+<!ELEMENT PubStatusDate_pubstatus (PubStatus)>
+
+<!-- time may be added later -->
+<!ELEMENT PubStatusDate_date (Date)>
+
+
+<!ELEMENT PubStatusDateSet (PubStatusDate*)>
+
+<!--
+ Citation Types
+ article in journal or book
+-->
+<!ELEMENT Cit-art (
+        Cit-art_title?, 
+        Cit-art_authors?, 
+        Cit-art_from, 
+        Cit-art_ids?)>
+
+<!-- title of paper (ANSI requires) -->
+<!ELEMENT Cit-art_title (Title)>
+
+<!-- authors (ANSI requires) -->
+<!ELEMENT Cit-art_authors (Auth-list)>
+<!-- journal or book -->
+<!ELEMENT Cit-art_from (
+        Cit-art_from_journal | 
+        Cit-art_from_book | 
+        Cit-art_from_proc)>
+<!-- Journal citation -->
+<!ELEMENT Cit-art_from_journal (Cit-jour)>
+<!-- Book citation -->
+<!ELEMENT Cit-art_from_book (Cit-book)>
+<!-- Meeting proceedings -->
+<!ELEMENT Cit-art_from_proc (Cit-proc)>
+
+<!-- lots of ids -->
+<!ELEMENT Cit-art_ids (ArticleIdSet)>
+
+<!-- Journal citation -->
+<!ELEMENT Cit-jour (
+        Cit-jour_title, 
+        Cit-jour_imp)>
+
+<!-- title of journal -->
+<!ELEMENT Cit-jour_title (Title)>
+<!-- Imprint group -->
+<!ELEMENT Cit-jour_imp (Imprint)>
+
+<!-- Book citation -->
+<!ELEMENT Cit-book (
+        Cit-book_title, 
+        Cit-book_coll?, 
+        Cit-book_authors, 
+        Cit-book_imp)>
+
+<!-- Title of book -->
+<!ELEMENT Cit-book_title (Title)>
+
+<!-- part of a collection -->
+<!ELEMENT Cit-book_coll (Title)>
+
+<!-- authors -->
+<!ELEMENT Cit-book_authors (Auth-list)>
+<!-- Imprint group -->
+<!ELEMENT Cit-book_imp (Imprint)>
+
+<!-- Meeting proceedings -->
+<!ELEMENT Cit-proc (
+        Cit-proc_book, 
+        Cit-proc_meet)>
+
+<!-- citation to meeting -->
+<!ELEMENT Cit-proc_book (Cit-book)>
+
+<!-- time and location of meeting -->
+<!ELEMENT Cit-proc_meet (Meeting)>
+
+<!--
+ Patent number and date-issue were made optional in 1997 to
+   support patent applications being issued from the USPTO
+   Semantically a Cit-pat must have either a patent number or
+   an application number (or both) to be valid
+ patent citation
+-->
+<!ELEMENT Cit-pat (
+        Cit-pat_title, 
+        Cit-pat_authors, 
+        Cit-pat_country, 
+        Cit-pat_doc-type, 
+        Cit-pat_number?, 
+        Cit-pat_date-issue?, 
+        Cit-pat_class?, 
+        Cit-pat_app-number?, 
+        Cit-pat_app-date?, 
+        Cit-pat_applicants?, 
+        Cit-pat_assignees?, 
+        Cit-pat_priority?, 
+        Cit-pat_abstract?)>
+
+<!ELEMENT Cit-pat_title (#PCDATA)>
+
+<!-- author/inventor -->
+<!ELEMENT Cit-pat_authors (Auth-list)>
+
+<!-- Patent Document Country -->
+<!ELEMENT Cit-pat_country (#PCDATA)>
+
+<!-- Patent Document Type -->
+<!ELEMENT Cit-pat_doc-type (#PCDATA)>
+
+<!-- Patent Document Number -->
+<!ELEMENT Cit-pat_number (#PCDATA)>
+
+<!-- Patent Issue/Pub Date -->
+<!ELEMENT Cit-pat_date-issue (Date)>
+
+<!-- Patent Doc Class Code  -->
+<!ELEMENT Cit-pat_class (Cit-pat_class_E*)>
+
+
+<!ELEMENT Cit-pat_class_E (#PCDATA)>
+
+<!-- Patent Doc Appl Number -->
+<!ELEMENT Cit-pat_app-number (#PCDATA)>
+
+<!-- Patent Appl File Date -->
+<!ELEMENT Cit-pat_app-date (Date)>
+
+<!-- Applicants -->
+<!ELEMENT Cit-pat_applicants (Auth-list)>
+
+<!-- Assignees -->
+<!ELEMENT Cit-pat_assignees (Auth-list)>
+
+<!-- Priorities -->
+<!ELEMENT Cit-pat_priority (Patent-priority*)>
+
+<!-- abstract of patent -->
+<!ELEMENT Cit-pat_abstract (#PCDATA)>
+
+
+<!ELEMENT Patent-priority (
+        Patent-priority_country, 
+        Patent-priority_number, 
+        Patent-priority_date)>
+
+<!-- Patent country code -->
+<!ELEMENT Patent-priority_country (#PCDATA)>
+
+<!-- number assigned in that country -->
+<!ELEMENT Patent-priority_number (#PCDATA)>
+
+<!-- date of application -->
+<!ELEMENT Patent-priority_date (Date)>
+
+<!-- just to identify a patent -->
+<!ELEMENT Id-pat (
+        Id-pat_country, 
+        Id-pat_id, 
+        Id-pat_doc-type?)>
+
+<!-- Patent Document Country -->
+<!ELEMENT Id-pat_country (#PCDATA)>
+
+<!ELEMENT Id-pat_id (
+        Id-pat_id_number | 
+        Id-pat_id_app-number)>
+
+<!-- Patent Document Number -->
+<!ELEMENT Id-pat_id_number (#PCDATA)>
+
+<!-- Patent Doc Appl Number -->
+<!ELEMENT Id-pat_id_app-number (#PCDATA)>
+
+<!-- Patent Doc Type -->
+<!ELEMENT Id-pat_doc-type (#PCDATA)>
+
+<!-- letter, thesis, or manuscript -->
+<!ELEMENT Cit-let (
+        Cit-let_cit, 
+        Cit-let_man-id?, 
+        Cit-let_type?)>
+
+<!-- same fields as a book -->
+<!ELEMENT Cit-let_cit (Cit-book)>
+
+<!-- Manuscript identifier -->
+<!ELEMENT Cit-let_man-id (#PCDATA)>
+
+<!ELEMENT Cit-let_type %ENUM;>
+<!ATTLIST Cit-let_type value (
+        manuscript |
+        letter |
+        thesis
+        ) #REQUIRED >
+
+
+<!--
+ NOTE: this is just to cite a
+ direct data submission, see NCBI-Submit
+ for the form of a sequence submission
+ citation for a direct submission
+-->
+<!ELEMENT Cit-sub (
+        Cit-sub_authors, 
+        Cit-sub_imp?, 
+        Cit-sub_medium?, 
+        Cit-sub_date?, 
+        Cit-sub_descr?)>
+
+<!-- not necessarily authors of the paper -->
+<!ELEMENT Cit-sub_authors (Auth-list)>
+
+<!-- this only used to get date.. will go -->
+<!ELEMENT Cit-sub_imp (Imprint)>
+<!-- medium of submission -->
+<!ELEMENT Cit-sub_medium %ENUM;>
+<!ATTLIST Cit-sub_medium value (
+        paper |
+        tape |
+        floppy |
+        email |
+        other
+        ) #REQUIRED >
+
+
+<!-- replaces imp, will become required -->
+<!ELEMENT Cit-sub_date (Date)>
+
+<!-- description of changes for public view -->
+<!ELEMENT Cit-sub_descr (#PCDATA)>
+
+<!-- NOT from ANSI, this is a catchall -->
+<!ELEMENT Cit-gen (
+        Cit-gen_cit?, 
+        Cit-gen_authors?, 
+        Cit-gen_muid?, 
+        Cit-gen_journal?, 
+        Cit-gen_volume?, 
+        Cit-gen_issue?, 
+        Cit-gen_pages?, 
+        Cit-gen_date?, 
+        Cit-gen_serial-number?, 
+        Cit-gen_title?, 
+        Cit-gen_pmid?)>
+
+<!-- anything, not parsable -->
+<!ELEMENT Cit-gen_cit (#PCDATA)>
+<!-- Authorship Group -->
+<!ELEMENT Cit-gen_authors (Auth-list)>
+
+<!-- medline uid -->
+<!ELEMENT Cit-gen_muid (%INTEGER;)>
+<!--
+ Title Group
+ Valid for = A = Analytic (Cit-art)
+             J = Journals (Cit-jour)
+             B = Book (Cit-book)
+ Valid for:
+-->
+<!ELEMENT Cit-gen_journal (Title)>
+
+<!ELEMENT Cit-gen_volume (#PCDATA)>
+
+<!ELEMENT Cit-gen_issue (#PCDATA)>
+
+<!ELEMENT Cit-gen_pages (#PCDATA)>
+
+<!ELEMENT Cit-gen_date (Date)>
+
+<!-- for GenBank style references -->
+<!ELEMENT Cit-gen_serial-number (%INTEGER;)>
+
+<!-- eg. cit="unpublished",title="title" -->
+<!ELEMENT Cit-gen_title (#PCDATA)>
+
+<!-- PubMed Id -->
+<!ELEMENT Cit-gen_pmid (PubMedId)>
+
+<!-- Authorship Group -->
+<!ELEMENT Auth-list (
+        Auth-list_names, 
+        Auth-list_affil?)>
+
+<!ELEMENT Auth-list_names (
+        Auth-list_names_std | 
+        Auth-list_names_ml | 
+        Auth-list_names_str)>
+
+<!-- full citations -->
+<!ELEMENT Auth-list_names_std (Author*)>
+
+<!-- MEDLINE, semi-structured -->
+<!ELEMENT Auth-list_names_ml (Auth-list_names_ml_E*)>
+
+
+<!ELEMENT Auth-list_names_ml_E (#PCDATA)>
+
+<!-- free for all -->
+<!ELEMENT Auth-list_names_str (Auth-list_names_str_E*)>
+
+
+<!ELEMENT Auth-list_names_str_E (#PCDATA)>
+
+<!-- author affiliation -->
+<!ELEMENT Auth-list_affil (Affil)>
+
+
+<!ELEMENT Author (
+        Author_name, 
+        Author_level?, 
+        Author_role?, 
+        Author_affil?, 
+        Author_is-corr?)>
+
+<!-- Author, Primary or Secondary -->
+<!ELEMENT Author_name (Person-id)>
+
+<!ELEMENT Author_level %ENUM;>
+<!ATTLIST Author_level value (
+        primary |
+        secondary
+        ) #REQUIRED >
+
+<!-- Author Role Indicator -->
+<!ELEMENT Author_role %ENUM;>
+<!ATTLIST Author_role value (
+        compiler |
+        editor |
+        patent-assignee |
+        translator
+        ) #REQUIRED >
+
+
+<!ELEMENT Author_affil (Affil)>
+
+<!-- TRUE if corresponding author -->
+<!ELEMENT Author_is-corr EMPTY>
+<!ATTLIST Author_is-corr value ( true | false ) #REQUIRED >
+
+
+
+<!ELEMENT Affil (
+        Affil_str | 
+        Affil_std)>
+
+<!-- unparsed string -->
+<!ELEMENT Affil_str (#PCDATA)>
+<!-- std representation -->
+<!ELEMENT Affil_std (
+        Affil_std_affil?, 
+        Affil_std_div?, 
+        Affil_std_city?, 
+        Affil_std_sub?, 
+        Affil_std_country?, 
+        Affil_std_street?, 
+        Affil_std_email?, 
+        Affil_std_fax?, 
+        Affil_std_phone?, 
+        Affil_std_postal-code?)>
+
+<!-- Author Affiliation, Name -->
+<!ELEMENT Affil_std_affil (#PCDATA)>
+
+<!-- Author Affiliation, Division -->
+<!ELEMENT Affil_std_div (#PCDATA)>
+
+<!-- Author Affiliation, City -->
+<!ELEMENT Affil_std_city (#PCDATA)>
+
+<!-- Author Affiliation, County Sub -->
+<!ELEMENT Affil_std_sub (#PCDATA)>
+
+<!-- Author Affiliation, Country -->
+<!ELEMENT Affil_std_country (#PCDATA)>
+
+<!-- street address, not ANSI -->
+<!ELEMENT Affil_std_street (#PCDATA)>
+
+<!ELEMENT Affil_std_email (#PCDATA)>
+
+<!ELEMENT Affil_std_fax (#PCDATA)>
+
+<!ELEMENT Affil_std_phone (#PCDATA)>
+
+<!ELEMENT Affil_std_postal-code (#PCDATA)>
+
+<!--
+ Title Group
+ Valid for = A = Analytic (Cit-art)
+             J = Journals (Cit-jour)
+             B = Book (Cit-book)
+ Valid for:
+-->
+<!ELEMENT Title (Title_E*)>
+
+
+
+<!ELEMENT Title_E (
+        Title_E_name | 
+        Title_E_tsub | 
+        Title_E_trans | 
+        Title_E_jta | 
+        Title_E_iso-jta | 
+        Title_E_ml-jta | 
+        Title_E_coden | 
+        Title_E_issn | 
+        Title_E_abr | 
+        Title_E_isbn)>
+
+<!-- Title, Anal,Coll,Mono    AJB -->
+<!ELEMENT Title_E_name (#PCDATA)>
+
+<!-- Title, Subordinate       A B -->
+<!ELEMENT Title_E_tsub (#PCDATA)>
+
+<!-- Title, Translated        AJB -->
+<!ELEMENT Title_E_trans (#PCDATA)>
+
+<!-- Title, Abbreviated        J -->
+<!ELEMENT Title_E_jta (#PCDATA)>
+
+<!-- specifically ISO jta      J -->
+<!ELEMENT Title_E_iso-jta (#PCDATA)>
+
+<!-- specifically MEDLINE jta  J -->
+<!ELEMENT Title_E_ml-jta (#PCDATA)>
+
+<!-- a coden                   J -->
+<!ELEMENT Title_E_coden (#PCDATA)>
+
+<!-- ISSN                      J -->
+<!ELEMENT Title_E_issn (#PCDATA)>
+
+<!-- Title, Abbreviated         B -->
+<!ELEMENT Title_E_abr (#PCDATA)>
+
+<!-- ISBN                       B -->
+<!ELEMENT Title_E_isbn (#PCDATA)>
+
+<!-- Imprint group -->
+<!ELEMENT Imprint (
+        Imprint_date, 
+        Imprint_volume?, 
+        Imprint_issue?, 
+        Imprint_pages?, 
+        Imprint_section?, 
+        Imprint_pub?, 
+        Imprint_cprt?, 
+        Imprint_part-sup?, 
+        Imprint_language?, 
+        Imprint_prepub?, 
+        Imprint_part-supi?, 
+        Imprint_retract?, 
+        Imprint_pubstatus?, 
+        Imprint_history?)>
+
+<!-- date of publication -->
+<!ELEMENT Imprint_date (Date)>
+
+<!ELEMENT Imprint_volume (#PCDATA)>
+
+<!ELEMENT Imprint_issue (#PCDATA)>
+
+<!ELEMENT Imprint_pages (#PCDATA)>
+
+<!ELEMENT Imprint_section (#PCDATA)>
+
+<!-- publisher, required for book -->
+<!ELEMENT Imprint_pub (Affil)>
+
+<!-- copyright date, "    "   " -->
+<!ELEMENT Imprint_cprt (Date)>
+
+<!-- part/sup of volume -->
+<!ELEMENT Imprint_part-sup (#PCDATA)>
+
+<!-- put here for simplicity -->
+<!ELEMENT Imprint_language (#PCDATA)>
+<!-- for prepublication citations -->
+<!ELEMENT Imprint_prepub %ENUM;>
+
+<!--
+    submitted	-  submitted, not accepted
+    in-press	-  accepted, not published
+-->
+<!ATTLIST Imprint_prepub value (
+        submitted |
+        in-press |
+        other
+        ) #REQUIRED >
+
+
+<!-- part/sup on issue -->
+<!ELEMENT Imprint_part-supi (#PCDATA)>
+
+<!-- retraction info -->
+<!ELEMENT Imprint_retract (CitRetract)>
+
+<!-- current status of this publication -->
+<!ELEMENT Imprint_pubstatus (PubStatus)>
+
+<!-- dates for this record -->
+<!ELEMENT Imprint_history (PubStatusDateSet)>
+
+
+<!ELEMENT CitRetract (
+        CitRetract_type, 
+        CitRetract_exp?)>
+<!-- retraction of an entry -->
+<!ELEMENT CitRetract_type %ENUM;>
+
+<!--
+    retracted	-  this citation retracted
+    notice	-  this citation is a retraction notice
+    in-error	-  an erratum was published about this
+    erratum	-  this is a published erratum
+-->
+<!ATTLIST CitRetract_type value (
+        retracted |
+        notice |
+        in-error |
+        erratum
+        ) #REQUIRED >
+
+
+<!-- citation and/or explanation -->
+<!ELEMENT CitRetract_exp (#PCDATA)>
+
+
+<!ELEMENT Meeting (
+        Meeting_number, 
+        Meeting_date, 
+        Meeting_place?)>
+
+<!ELEMENT Meeting_number (#PCDATA)>
+
+<!ELEMENT Meeting_date (Date)>
+
+<!ELEMENT Meeting_place (Affil)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_BioSource.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_BioSource.dtd
new file mode 100644
index 0000000..2bfea5c
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_BioSource.dtd
@@ -0,0 +1,23 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_BioSource.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_BioSource.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_BioSource.mod.dtd
new file mode 100644
index 0000000..6b5c5da
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_BioSource.mod.dtd
@@ -0,0 +1,200 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 2.4.4
+     ::DATATOOL:: on 09/28/2012 23:04:43
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-BioSource"
+================================================= -->
+
+<!--
+**********************************************************************
+
+  NCBI BioSource
+  by James Ostell, 1994
+  version 3.0
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          BioSource,
+          SubSource -->
+
+<!-- Elements referenced from other modules:
+          Org-ref FROM NCBI-Organism -->
+<!-- ============================================ -->
+
+<!--
+********************************************************************
+
+ BioSource gives the source of the biological material
+   for sequences
+
+********************************************************************
+-->
+<!ELEMENT BioSource (
+        BioSource_genome?, 
+        BioSource_origin?, 
+        BioSource_org, 
+        BioSource_subtype?, 
+        BioSource_is-focus?, 
+        BioSource_pcr-primers?)>
+<!-- biological context -->
+<!ELEMENT BioSource_genome (%INTEGER;)>
+<!ATTLIST BioSource_genome value (
+        unknown |
+        genomic |
+        chloroplast |
+        chromoplast |
+        kinetoplast |
+        mitochondrion |
+        plastid |
+        macronuclear |
+        extrachrom |
+        plasmid |
+        transposon |
+        insertion-seq |
+        cyanelle |
+        proviral |
+        virion |
+        nucleomorph |
+        apicoplast |
+        leucoplast |
+        proplastid |
+        endogenous-virus |
+        hydrogenosome |
+        chromosome |
+        chromatophore
+        ) #IMPLIED >
+
+
+<!ELEMENT BioSource_origin (%INTEGER;)>
+
+<!--
+    natural	-  normal biological entity
+    natmut	-  naturally occurring mutant
+    mut	-  artificially mutagenized
+    artificial	-  artificially engineered
+    synthetic	-  purely synthetic
+-->
+<!ATTLIST BioSource_origin value (
+        unknown |
+        natural |
+        natmut |
+        mut |
+        artificial |
+        synthetic |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT BioSource_org (Org-ref)>
+
+<!ELEMENT BioSource_subtype (SubSource*)>
+
+<!-- to distinguish biological focus -->
+<!ELEMENT BioSource_is-focus EMPTY>
+
+<!ELEMENT BioSource_pcr-primers (PCRReactionSet)>
+
+
+<!ELEMENT PCRReactionSet (PCRReaction*)>
+
+
+<!ELEMENT PCRReaction (
+        PCRReaction_forward?, 
+        PCRReaction_reverse?)>
+
+<!ELEMENT PCRReaction_forward (PCRPrimerSet)>
+
+<!ELEMENT PCRReaction_reverse (PCRPrimerSet)>
+
+
+<!ELEMENT PCRPrimerSet (PCRPrimer*)>
+
+
+<!ELEMENT PCRPrimer (
+        PCRPrimer_seq?, 
+        PCRPrimer_name?)>
+
+<!ELEMENT PCRPrimer_seq (PCRPrimerSeq)>
+
+<!ELEMENT PCRPrimer_name (PCRPrimerName)>
+
+
+<!ELEMENT PCRPrimerSeq (#PCDATA)>
+
+
+<!ELEMENT PCRPrimerName (#PCDATA)>
+
+
+<!ELEMENT SubSource (
+        SubSource_subtype, 
+        SubSource_name, 
+        SubSource_attrib?)>
+
+<!ELEMENT SubSource_subtype (%INTEGER;)>
+
+<!--
+    lat-lon	-  +/- decimal degrees
+    collection-date	-  DD-MMM-YYYY format
+    collected-by	-  name of person who collected the sample
+    identified-by	-  name of person who identified the sample
+    fwd-primer-seq	-  sequence (possibly more than one; semicolon-separated)
+    rev-primer-seq	-  sequence (possibly more than one; semicolon-separated)
+-->
+<!ATTLIST SubSource_subtype value (
+        chromosome |
+        map |
+        clone |
+        subclone |
+        haplotype |
+        genotype |
+        sex |
+        cell-line |
+        cell-type |
+        tissue-type |
+        clone-lib |
+        dev-stage |
+        frequency |
+        germline |
+        rearranged |
+        lab-host |
+        pop-variant |
+        tissue-lib |
+        plasmid-name |
+        transposon-name |
+        insertion-seq-name |
+        plastid-name |
+        country |
+        segment |
+        endogenous-virus-name |
+        transgenic |
+        environmental-sample |
+        isolation-source |
+        lat-lon |
+        collection-date |
+        collected-by |
+        identified-by |
+        fwd-primer-seq |
+        rev-primer-seq |
+        fwd-primer-name |
+        rev-primer-name |
+        metagenomic |
+        mating-type |
+        linkage-group |
+        haplogroup |
+        whole-replicon |
+        phenotype |
+        altitude |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT SubSource_name (#PCDATA)>
+
+<!-- attribution/source of this name -->
+<!ELEMENT SubSource_attrib (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_BioTree.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_BioTree.dtd
new file mode 100644
index 0000000..26ba5d9
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_BioTree.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "biotree.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_BioTree.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_BioTree_module PUBLIC "-//NCBI//NCBI BioTree Module//EN" "NCBI_BioTree.mod.dtd">
+%NCBI_BioTree_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_BioTree.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_BioTree.mod.dtd
new file mode 100644
index 0000000..7a2ad1a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_BioTree.mod.dtd
@@ -0,0 +1,109 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "biotree.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 08/02/2010 23:05:14
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-BioTree"
+================================================= -->
+
+<!--
+$Revision: 184613 $
+*********************************************************************
+
+  biotree.asn
+
+     BioTree ASN
+     Anatoliy Kuznetsov
+
+*********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          BioTreeContainer,
+          DistanceMatrix -->
+<!-- ============================================ -->
+
+
+<!ELEMENT BioTreeContainer (
+        BioTreeContainer_treetype?, 
+        BioTreeContainer_fdict, 
+        BioTreeContainer_nodes, 
+        BioTreeContainer_label?)>
+
+<!-- hint on what kind of tree is that -->
+<!ELEMENT BioTreeContainer_treetype (#PCDATA)>
+
+<!-- features dictionary  -->
+<!ELEMENT BioTreeContainer_fdict (FeatureDictSet)>
+
+<!-- set of nodes with encoded topology -->
+<!ELEMENT BioTreeContainer_nodes (NodeSet)>
+
+<!-- bio-tree label (short name) -->
+<!ELEMENT BioTreeContainer_label (#PCDATA)>
+
+
+<!ELEMENT NodeSet (Node*)>
+
+
+<!ELEMENT Node (
+        Node_id, 
+        Node_parent?, 
+        Node_features?)>
+
+<!-- node uid -->
+<!ELEMENT Node_id (%INTEGER;)>
+
+<!-- parent node id -->
+<!ELEMENT Node_parent (%INTEGER;)>
+
+<!ELEMENT Node_features (NodeFeatureSet)>
+
+
+<!ELEMENT NodeFeatureSet (NodeFeature*)>
+
+
+<!ELEMENT NodeFeature (
+        NodeFeature_featureid, 
+        NodeFeature_value)>
+
+<!ELEMENT NodeFeature_featureid (%INTEGER;)>
+
+<!ELEMENT NodeFeature_value (#PCDATA)>
+
+
+<!ELEMENT FeatureDictSet (FeatureDescr*)>
+
+
+<!ELEMENT FeatureDescr (
+        FeatureDescr_id, 
+        FeatureDescr_name)>
+
+<!-- feature id -->
+<!ELEMENT FeatureDescr_id (%INTEGER;)>
+
+<!-- feature name -->
+<!ELEMENT FeatureDescr_name (#PCDATA)>
+
+
+<!ELEMENT DistanceMatrix (
+        DistanceMatrix_labels, 
+        DistanceMatrix_distances)>
+
+<!-- n labels -->
+<!ELEMENT DistanceMatrix_labels (DistanceMatrix_labels_E*)>
+
+
+<!ELEMENT DistanceMatrix_labels_E (#PCDATA)>
+
+<!--
+ n(n-1)/2 pairwise distances
+ (0, 1)...(0, n), (1, 2)...(1, n)...
+-->
+<!ELEMENT DistanceMatrix_distances (DistanceMatrix_distances_E*)>
+
+
+<!ELEMENT DistanceMatrix_distances_E (%REAL;)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Blast4.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Blast4.dtd
new file mode 100644
index 0000000..24437c5
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Blast4.dtd
@@ -0,0 +1,95 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "blast.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Blast4.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Blast4_module PUBLIC "-//NCBI//NCBI Blast4 Module//EN" "NCBI_Blast4.mod.dtd">
+%NCBI_Blast4_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_ScoreMat_module PUBLIC "-//NCBI//NCBI ScoreMat Module//EN" "NCBI_ScoreMat.mod.dtd">
+%NCBI_ScoreMat_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Seqset_module PUBLIC "-//NCBI//NCBI Seqset Module//EN" "NCBI_Seqset.mod.dtd">
+%NCBI_Seqset_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Blast4.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Blast4.mod.dtd
new file mode 100644
index 0000000..f001a47
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Blast4.mod.dtd
@@ -0,0 +1,1498 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "blast.asn"
+     ::DATATOOL:: by application DATATOOL version 2.3.1
+     ::DATATOOL:: on 04/01/2011 23:04:41
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Blast4"
+================================================= -->
+
+<!--
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                            PUBLIC DOMAIN NOTICE
+                National Center for Biotechnology Information
+
+  This software/database is a "United States Government Work" under the terms
+  of the United States Copyright Act.  It was written as part of the author's
+  official duties as a United States Government employee and thus cannot be
+  copyrighted.  This software/database is freely available to the public for
+  use.  The National Library of Medicine and the U.S. Government have not
+  placed any restriction on its use or reproduction.
+
+  Although all reasonable efforts have been taken to ensure the accuracy and
+  reliability of the software and data, the NLM and the U.S. Government do not
+  and cannot warrant the performance or results that may be obtained by using
+  this software or data.  The NLM and the U.S. Government disclaim all
+  warranties, express or implied, including warranties of performance,
+  merchantability or fitness for any particular purpose.
+
+  Please cite the authors in any work or product based on this material.
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  Authors: Tom Madden, Tim Boemker
+
+  ASN.1 interface to BLAST.
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-->
+
+<!-- Elements used by other modules:
+          Blast4-ka-block,
+          Blast4-value,
+          Blast4-parameter,
+          Blast4-parameters -->
+
+<!-- Elements referenced from other modules:
+          Bioseq FROM NCBI-Sequence,
+          Seq-data FROM NCBI-Sequence,
+          Bioseq-set FROM NCBI-Seqset,
+          PssmWithParameters FROM NCBI-ScoreMat,
+          Seq-id,
+          Seq-interval,
+          Seq-loc FROM NCBI-Seqloc,
+          Seq-align,
+          Seq-align-set FROM NCBI-Seqalign -->
+<!-- ============================================ -->
+
+<!--
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  Requests
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-->
+<!ELEMENT Blast4-request (
+        Blast4-request_ident?, 
+        Blast4-request_body)>
+
+<!--
+ Client identifier (email address, organization name, program/script
+ name, or any other form of contacting the owner of this request)
+-->
+<!ELEMENT Blast4-request_ident (#PCDATA)>
+
+<!-- Payload of the request -->
+<!ELEMENT Blast4-request_body (Blast4-request-body)>
+
+<!--
+ An archive format for results.  the results can be reformatted from
+ this format also.
+-->
+<!ELEMENT Blast4-archive (
+        Blast4-archive_request, 
+        Blast4-archive_results)>
+
+<!-- Query and options -->
+<!ELEMENT Blast4-archive_request (Blast4-request)>
+
+<!-- Results of search -->
+<!ELEMENT Blast4-archive_results (Blast4-get-search-results-reply)>
+
+
+<!ELEMENT Blast4-request-body (
+        Blast4-request-body_finish-params | 
+        Blast4-request-body_get-databases | 
+        Blast4-request-body_get-matrices | 
+        Blast4-request-body_get-parameters | 
+        Blast4-request-body_get-paramsets | 
+        Blast4-request-body_get-programs | 
+        Blast4-request-body_get-search-results | 
+        Blast4-request-body_get-sequences | 
+        Blast4-request-body_queue-search | 
+        Blast4-request-body_get-request-info | 
+        Blast4-request-body_get-sequence-parts | 
+        Blast4-request-body_get-windowmasked-taxids)>
+
+<!ELEMENT Blast4-request-body_finish-params (Blast4-finish-params-request)>
+
+<!-- Get all available BLAST databases -->
+<!ELEMENT Blast4-request-body_get-databases EMPTY>
+
+<!-- Get supported scoring matrices -->
+<!ELEMENT Blast4-request-body_get-matrices EMPTY>
+
+<!ELEMENT Blast4-request-body_get-parameters EMPTY>
+
+<!ELEMENT Blast4-request-body_get-paramsets EMPTY>
+
+<!ELEMENT Blast4-request-body_get-programs EMPTY>
+
+<!ELEMENT Blast4-request-body_get-search-results (Blast4-get-search-results-request)>
+<!-- Fetch sequence data from a BLAST database -->
+<!ELEMENT Blast4-request-body_get-sequences (Blast4-get-sequences-request)>
+<!--
+ Options have been broken down into three groups as part of the BLAST
+ API work.  The algorithm options essentially correspond to those
+ options available via the CBlastOptions class.
+ For definitions of the names used in the Blast4-parameter structures, see
+ c++/{include,src}/objects/blast/names.[hc]pp in the NCBI C++ toolkit.
+   algorithm-options: Options for BLAST (ie. seq comparison) algorithm.
+   program-options:   Options for controlling program execution and/or 
+                      database filtering
+   format-options:    Options for formatting BLAST results, clients should
+                      use this only if applicable, otherwise they should be
+                      ignored
+-->
+<!ELEMENT Blast4-request-body_queue-search (Blast4-queue-search-request)>
+<!-- Fetch information about the search request. -->
+<!ELEMENT Blast4-request-body_get-request-info (Blast4-get-request-info-request)>
+<!-- Fetch parts of sequences from a BLAST database -->
+<!ELEMENT Blast4-request-body_get-sequence-parts (Blast4-get-seq-parts-request)>
+
+<!ELEMENT Blast4-request-body_get-windowmasked-taxids EMPTY>
+
+
+<!ELEMENT Blast4-finish-params-request (
+        Blast4-finish-params-request_program, 
+        Blast4-finish-params-request_service, 
+        Blast4-finish-params-request_paramset?, 
+        Blast4-finish-params-request_params?)>
+
+<!ELEMENT Blast4-finish-params-request_program (#PCDATA)>
+
+<!ELEMENT Blast4-finish-params-request_service (#PCDATA)>
+
+<!ELEMENT Blast4-finish-params-request_paramset (#PCDATA)>
+
+<!ELEMENT Blast4-finish-params-request_params (Blast4-parameters)>
+
+<!-- This type allows the specification of what result types are desired -->
+<!ELEMENT Blast4-result-types %ENUM;>
+
+<!--
+    default	-  Default retrieves the following result types (if available): alignments,
+         phi-alignments masks, ka-blocks, search-stats and pssm
+-->
+<!ATTLIST Blast4-result-types value (
+        default |
+        alignments |
+        phi-alignments |
+        masks |
+        ka-blocks |
+        search-stats |
+        pssm |
+        simple-results
+        ) #REQUIRED >
+
+
+
+<!ELEMENT Blast4-get-search-results-request (
+        Blast4-get-search-results-request_request-id, 
+        Blast4-get-search-results-request_result-types?)>
+
+<!-- The request ID of the BLAST search -->
+<!ELEMENT Blast4-get-search-results-request_request-id (#PCDATA)>
+
+<!-- Logical OR of Blast4-result-types, assumes default if absent -->
+<!ELEMENT Blast4-get-search-results-request_result-types (%INTEGER;)>
+
+<!--
+ If a PSSM is used (ie. for PSI-Blast), it must contain a "query"
+ for formatting purposes.  Bioseq-set may contain any number of
+ queries, specified as data.  Seq-loc-list may contain only the
+ "whole" or "interval" types.  In the case of "whole", any number of
+ queries may be used; in the case of "interval", there should be
+ exactly one query.  (This is limited by the BlastObject.)
+-->
+<!ELEMENT Blast4-queries (
+        Blast4-queries_pssm | 
+        Blast4-queries_seq-loc-list | 
+        Blast4-queries_bioseq-set)>
+
+<!ELEMENT Blast4-queries_pssm (PssmWithParameters)>
+
+<!ELEMENT Blast4-queries_seq-loc-list (Seq-loc*)>
+
+<!ELEMENT Blast4-queries_bioseq-set (Bioseq-set)>
+
+<!--
+ Options have been broken down into three groups as part of the BLAST
+ API work.  The algorithm options essentially correspond to those
+ options available via the CBlastOptions class.
+ For definitions of the names used in the Blast4-parameter structures, see
+ c++/{include,src}/objects/blast/names.[hc]pp in the NCBI C++ toolkit.
+   algorithm-options: Options for BLAST (ie. seq comparison) algorithm.
+   program-options:   Options for controlling program execution and/or 
+                      database filtering
+   format-options:    Options for formatting BLAST results, clients should
+                      use this only if applicable, otherwise they should be
+                      ignored
+-->
+<!ELEMENT Blast4-queue-search-request (
+        Blast4-queue-search-request_program, 
+        Blast4-queue-search-request_service, 
+        Blast4-queue-search-request_queries, 
+        Blast4-queue-search-request_subject, 
+        Blast4-queue-search-request_paramset?, 
+        Blast4-queue-search-request_algorithm-options?, 
+        Blast4-queue-search-request_program-options?, 
+        Blast4-queue-search-request_format-options?)>
+
+<!ELEMENT Blast4-queue-search-request_program (#PCDATA)>
+
+<!ELEMENT Blast4-queue-search-request_service (#PCDATA)>
+<!--
+ If a PSSM is used (ie. for PSI-Blast), it must contain a "query"
+ for formatting purposes.  Bioseq-set may contain any number of
+ queries, specified as data.  Seq-loc-list may contain only the
+ "whole" or "interval" types.  In the case of "whole", any number of
+ queries may be used; in the case of "interval", there should be
+ exactly one query.  (This is limited by the BlastObject.)
+-->
+<!ELEMENT Blast4-queue-search-request_queries (Blast4-queries)>
+
+<!ELEMENT Blast4-queue-search-request_subject (Blast4-subject)>
+
+<!-- This field contains a task description -->
+<!ELEMENT Blast4-queue-search-request_paramset (#PCDATA)>
+
+<!ELEMENT Blast4-queue-search-request_algorithm-options (Blast4-parameters)>
+
+<!ELEMENT Blast4-queue-search-request_program-options (Blast4-parameters)>
+
+<!ELEMENT Blast4-queue-search-request_format-options (Blast4-parameters)>
+
+<!-- Simplified search submission structure -->
+<!ELEMENT Blast4-queue-search-request-lite (
+        Blast4-queue-search-request-lite_query, 
+        Blast4-queue-search-request-lite_database-name, 
+        Blast4-queue-search-request-lite_options)>
+
+<!-- query sequence: provide a FASTA sequence, a gi number, or an accession -->
+<!ELEMENT Blast4-queue-search-request-lite_query (#PCDATA)>
+
+<!-- Name of BLAST database to search -->
+<!ELEMENT Blast4-queue-search-request-lite_database-name (#PCDATA)>
+
+<!-- BLAST options -->
+<!ELEMENT Blast4-queue-search-request-lite_options (Blast4-options-lite)>
+
+<!-- Request to retrieve the status of a given search -->
+<!ELEMENT Blast4-get-search-status-request (
+        Blast4-get-search-status-request_request-id)>
+
+<!ELEMENT Blast4-get-search-status-request_request-id (#PCDATA)>
+
+<!-- Reply to retrieve the status of a given search -->
+<!ELEMENT Blast4-get-search-status-reply (
+        Blast4-get-search-status-reply_status)>
+
+<!ELEMENT Blast4-get-search-status-reply_status (#PCDATA)>
+
+<!-- Fetch information about the search request. -->
+<!ELEMENT Blast4-get-request-info-request (
+        Blast4-get-request-info-request_request-id)>
+
+<!ELEMENT Blast4-get-request-info-request_request-id (#PCDATA)>
+
+
+<!ELEMENT Blast4-get-request-info-reply (
+        Blast4-get-request-info-reply_database, 
+        Blast4-get-request-info-reply_program, 
+        Blast4-get-request-info-reply_service, 
+        Blast4-get-request-info-reply_created-by, 
+        Blast4-get-request-info-reply_queries, 
+        Blast4-get-request-info-reply_algorithm-options, 
+        Blast4-get-request-info-reply_program-options, 
+        Blast4-get-request-info-reply_format-options?, 
+        Blast4-get-request-info-reply_subjects?)>
+
+<!ELEMENT Blast4-get-request-info-reply_database (Blast4-database)>
+
+<!ELEMENT Blast4-get-request-info-reply_program (#PCDATA)>
+
+<!ELEMENT Blast4-get-request-info-reply_service (#PCDATA)>
+
+<!ELEMENT Blast4-get-request-info-reply_created-by (#PCDATA)>
+<!--
+ If a PSSM is used (ie. for PSI-Blast), it must contain a "query"
+ for formatting purposes.  Bioseq-set may contain any number of
+ queries, specified as data.  Seq-loc-list may contain only the
+ "whole" or "interval" types.  In the case of "whole", any number of
+ queries may be used; in the case of "interval", there should be
+ exactly one query.  (This is limited by the BlastObject.)
+-->
+<!ELEMENT Blast4-get-request-info-reply_queries (Blast4-queries)>
+
+<!ELEMENT Blast4-get-request-info-reply_algorithm-options (Blast4-parameters)>
+
+<!ELEMENT Blast4-get-request-info-reply_program-options (Blast4-parameters)>
+
+<!ELEMENT Blast4-get-request-info-reply_format-options (Blast4-parameters)>
+
+<!ELEMENT Blast4-get-request-info-reply_subjects (Blast4-subject)>
+
+<!-- Fetch the search strategy (i.e.: object used to submit the search) -->
+<!ELEMENT Blast4-get-search-strategy-request (
+        Blast4-get-search-strategy-request_request-id)>
+
+<!ELEMENT Blast4-get-search-strategy-request_request-id (#PCDATA)>
+
+<!--
+ Return the search strategy (i.e.: Blast4-request containing a
+ Blast4-queue-search-request, an object used to submit the search)
+-->
+<!ELEMENT Blast4-get-search-strategy-reply (Blast4-request)>
+
+<!-- Fetch sequence data from a BLAST database -->
+<!ELEMENT Blast4-get-sequences-request (
+        Blast4-get-sequences-request_database, 
+        Blast4-get-sequences-request_seq-ids, 
+        Blast4-get-sequences-request_skip-seq-data?, 
+        Blast4-get-sequences-request_target-only?)>
+
+<!-- Name of the BLAST database from which to retrieve the sequence data -->
+<!ELEMENT Blast4-get-sequences-request_database (Blast4-database)>
+
+<!-- Sequence identifiers for the sequence to get -->
+<!ELEMENT Blast4-get-sequences-request_seq-ids (Seq-id*)>
+
+<!--
+ Determines whether the returned Bioseqs should contain the sequence data
+ or not
+-->
+<!ELEMENT Blast4-get-sequences-request_skip-seq-data EMPTY>
+<!ATTLIST Blast4-get-sequences-request_skip-seq-data value ( true | false ) "false" >
+
+
+<!--
+ Determines whether or not the defline of the returned Bioseqs should 
+ contain only the requested seq id.  This optional field only applies
+ to non-redundant BLAST database
+-->
+<!ELEMENT Blast4-get-sequences-request_target-only EMPTY>
+<!ATTLIST Blast4-get-sequences-request_target-only value ( true | false ) #REQUIRED >
+
+
+<!-- Fetch parts of sequences from a BLAST database -->
+<!ELEMENT Blast4-get-seq-parts-request (
+        Blast4-get-seq-parts-request_database, 
+        Blast4-get-seq-parts-request_seq-locations)>
+
+<!-- Name of the BLAST database from which to retrieve the sequence data -->
+<!ELEMENT Blast4-get-seq-parts-request_database (Blast4-database)>
+
+<!--
+ Allows the specification of ranges of sequence data needed.
+ If the sequence(s) interval's end is 0, no data will be fetched. 
+ If end is past the length of the sequence, it will be adjusted to the
+ end of the sequence (this allows fetching of the first chunk in
+ cases where the length is not yet known).
+-->
+<!ELEMENT Blast4-get-seq-parts-request_seq-locations (Seq-interval*)>
+
+<!--
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  Replies
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-->
+<!ELEMENT Blast4-reply (
+        Blast4-reply_errors?, 
+        Blast4-reply_body)>
+
+<!ELEMENT Blast4-reply_errors (Blast4-error*)>
+
+<!ELEMENT Blast4-reply_body (Blast4-reply-body)>
+
+
+<!ELEMENT Blast4-reply-body (
+        Blast4-reply-body_finish-params | 
+        Blast4-reply-body_get-databases | 
+        Blast4-reply-body_get-matrices | 
+        Blast4-reply-body_get-parameters | 
+        Blast4-reply-body_get-paramsets | 
+        Blast4-reply-body_get-programs | 
+        Blast4-reply-body_get-search-results | 
+        Blast4-reply-body_get-sequences | 
+        Blast4-reply-body_queue-search | 
+        Blast4-reply-body_get-queries | 
+        Blast4-reply-body_get-request-info | 
+        Blast4-reply-body_get-sequence-parts | 
+        Blast4-reply-body_get-windowmasked-taxids)>
+
+<!ELEMENT Blast4-reply-body_finish-params (Blast4-finish-params-reply)>
+
+<!ELEMENT Blast4-reply-body_get-databases (Blast4-get-databases-reply)>
+
+<!ELEMENT Blast4-reply-body_get-matrices (Blast4-get-matrices-reply)>
+
+<!ELEMENT Blast4-reply-body_get-parameters (Blast4-get-parameters-reply)>
+<!--
+ Note: Paramsets and tasks represent the same concept: a human readable
+ description that represents a set of parameters with specific values 
+ to accomplish a given task
+-->
+<!ELEMENT Blast4-reply-body_get-paramsets (Blast4-get-paramsets-reply)>
+
+<!ELEMENT Blast4-reply-body_get-programs (Blast4-get-programs-reply)>
+
+<!ELEMENT Blast4-reply-body_get-search-results (Blast4-get-search-results-reply)>
+
+<!ELEMENT Blast4-reply-body_get-sequences (Blast4-get-sequences-reply)>
+
+<!ELEMENT Blast4-reply-body_queue-search (Blast4-queue-search-reply)>
+
+<!ELEMENT Blast4-reply-body_get-queries (Blast4-get-queries-reply)>
+
+<!ELEMENT Blast4-reply-body_get-request-info (Blast4-get-request-info-reply)>
+
+<!ELEMENT Blast4-reply-body_get-sequence-parts (Blast4-get-seq-parts-reply)>
+
+<!ELEMENT Blast4-reply-body_get-windowmasked-taxids (Blast4-get-windowmasked-taxids-reply)>
+
+
+<!ELEMENT Blast4-finish-params-reply (Blast4-parameters)>
+
+
+<!ELEMENT Blast4-get-windowmasked-taxids-reply (Blast4-get-windowmasked-taxids-reply_E*)>
+
+
+
+<!ELEMENT Blast4-get-windowmasked-taxids-reply_E (%INTEGER;)>
+
+
+<!ELEMENT Blast4-get-databases-reply (Blast4-database-info*)>
+
+
+<!ELEMENT Blast4-get-matrices-reply (Blast4-matrix-id*)>
+
+
+<!ELEMENT Blast4-get-parameters-reply (Blast4-parameter-info*)>
+
+<!--
+ Note: Paramsets and tasks represent the same concept: a human readable
+ description that represents a set of parameters with specific values 
+ to accomplish a given task
+-->
+<!ELEMENT Blast4-get-paramsets-reply (Blast4-task-info*)>
+
+
+<!ELEMENT Blast4-get-programs-reply (Blast4-program-info*)>
+
+
+<!ELEMENT Blast4-get-search-results-reply (
+        Blast4-get-search-results-reply_alignments?, 
+        Blast4-get-search-results-reply_phi-alignments?, 
+        Blast4-get-search-results-reply_masks?, 
+        Blast4-get-search-results-reply_ka-blocks?, 
+        Blast4-get-search-results-reply_search-stats?, 
+        Blast4-get-search-results-reply_pssm?, 
+        Blast4-get-search-results-reply_simple-results?)>
+
+<!ELEMENT Blast4-get-search-results-reply_alignments (Seq-align-set)>
+
+<!ELEMENT Blast4-get-search-results-reply_phi-alignments (Blast4-phi-alignments)>
+
+<!--
+ Masking locations for the query sequence(s). Each element of this set
+ corresponds to a single query's translation frame as appropriate.
+-->
+<!ELEMENT Blast4-get-search-results-reply_masks (Blast4-mask*)>
+
+<!ELEMENT Blast4-get-search-results-reply_ka-blocks (Blast4-ka-block*)>
+
+<!ELEMENT Blast4-get-search-results-reply_search-stats (Blast4-get-search-results-reply_search-stats_E*)>
+
+
+<!ELEMENT Blast4-get-search-results-reply_search-stats_E (#PCDATA)>
+
+<!ELEMENT Blast4-get-search-results-reply_pssm (PssmWithParameters)>
+<!-- Complete set of simple Blast results -->
+<!ELEMENT Blast4-get-search-results-reply_simple-results (Blast4-simple-results)>
+
+
+<!ELEMENT Blast4-get-sequences-reply (Bioseq*)>
+
+<!--
+ Bundles Seq-ids and sequence data to fulfill requests of type
+ Blast4-get-seq-parts-request
+-->
+<!ELEMENT Blast4-seq-part-data (
+        Blast4-seq-part-data_id, 
+        Blast4-seq-part-data_data)>
+
+<!-- Sequence identifier -->
+<!ELEMENT Blast4-seq-part-data_id (Seq-id)>
+
+<!-- Its sequence data (may be partial) -->
+<!ELEMENT Blast4-seq-part-data_data (Seq-data)>
+
+
+<!ELEMENT Blast4-get-seq-parts-reply (Blast4-seq-part-data*)>
+
+
+<!ELEMENT Blast4-queue-search-reply (
+        Blast4-queue-search-reply_request-id?)>
+
+<!ELEMENT Blast4-queue-search-reply_request-id (#PCDATA)>
+
+
+<!ELEMENT Blast4-get-queries-reply (
+        Blast4-get-queries-reply_queries)>
+<!--
+ If a PSSM is used (ie. for PSI-Blast), it must contain a "query"
+ for formatting purposes.  Bioseq-set may contain any number of
+ queries, specified as data.  Seq-loc-list may contain only the
+ "whole" or "interval" types.  In the case of "whole", any number of
+ queries may be used; in the case of "interval", there should be
+ exactly one query.  (This is limited by the BlastObject.)
+-->
+<!ELEMENT Blast4-get-queries-reply_queries (Blast4-queries)>
+
+<!--
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  Errors
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-->
+<!ELEMENT Blast4-error (
+        Blast4-error_code, 
+        Blast4-error_message?)>
+
+<!--
+ This is an integer to allow for flexibility, but the values assigned
+ should be of type Blast4-error-code
+-->
+<!ELEMENT Blast4-error_code (%INTEGER;)>
+
+<!ELEMENT Blast4-error_message (#PCDATA)>
+
+<!--
+ This enumeration defines values that are intended to be used with the
+ Blast4-error::code in a logical AND operation to easily determine whether a
+ given Blast4-error object contains either a warning or an error
+-->
+<!ELEMENT Blast4-error-flags %ENUM;>
+<!ATTLIST Blast4-error-flags value (
+        warning |
+        error
+        ) #REQUIRED >
+
+
+<!--
+ Defines values for use in Blast4-error::code.
+ Note: warnings should have values greater than 1024 and less than 2048,
+ errors should have values greater than 2048.
+-->
+<!ELEMENT Blast4-error-code (%INTEGER;)>
+
+<!--
+    conversion-warning	-  A conversion issue was found when converting to/from blast3 from/to 
+         blast4 protocol in the blast4 server
+    internal-error	-  Indicates internal errors in the blast4 server
+    not-implemented	-  Request type is not implemented in the blast4 server
+    not-allowed	-  Request type is not allowed in the blast4 server
+    bad-request	-  Malformed/invalid requests (e.g.: parsing errors or invalid data in request)
+    bad-request-id	-  The RID requested is unknown or it has expired
+    search-pending	-  The search is pending
+-->
+<!ATTLIST Blast4-error-code value (
+        conversion-warning |
+        internal-error |
+        not-implemented |
+        not-allowed |
+        bad-request |
+        bad-request-id |
+        search-pending
+        ) #IMPLIED >
+
+
+<!--
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  Data types to be used in BLAST4 "Lite"
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-->
+<!ELEMENT Blast4-common-options-db-restriction-by-organism (
+        Blast4-common-options-db-restriction-by-organism_organism-restriction | 
+        Blast4-common-options-db-restriction-by-organism_taxid-restriction)>
+
+<!-- additional restriction on the database to search -->
+<!ELEMENT Blast4-common-options-db-restriction-by-organism_organism-restriction (#PCDATA)>
+
+<!-- same as above, specified with a taxid -->
+<!ELEMENT Blast4-common-options-db-restriction-by-organism_taxid-restriction (%INTEGER;)>
+
+
+<!ELEMENT Blast4-common-options-db-restriction (
+        Blast4-common-options-db-restriction_entrez-query?, 
+        Blast4-common-options-db-restriction_organism?)>
+
+<!-- entrez query restriction on the database to search -->
+<!ELEMENT Blast4-common-options-db-restriction_entrez-query (#PCDATA)>
+<!--
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  Data types to be used in BLAST4 "Lite"
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-->
+<!ELEMENT Blast4-common-options-db-restriction_organism (Blast4-common-options-db-restriction-by-organism)>
+
+
+<!ELEMENT Blast4-common-options-repeats-filtering (
+        Blast4-common-options-repeats-filtering_organism-taxid?)>
+
+<!-- defaults to human -->
+<!ELEMENT Blast4-common-options-repeats-filtering_organism-taxid (%INTEGER;)>
+
+
+<!ELEMENT Blast4-common-options-query-filtering (
+        Blast4-common-options-query-filtering_use-seg-filtering?, 
+        Blast4-common-options-query-filtering_use-dust-filtering?, 
+        Blast4-common-options-query-filtering_mask-for-lookup-table-only?, 
+        Blast4-common-options-query-filtering_repeats-filtering?, 
+        Blast4-common-options-query-filtering_user-specified-masks?, 
+        Blast4-common-options-query-filtering_no-filtering?)>
+
+<!-- use SEG filtering with default parameters -->
+<!ELEMENT Blast4-common-options-query-filtering_use-seg-filtering EMPTY>
+<!ATTLIST Blast4-common-options-query-filtering_use-seg-filtering value ( true | false ) #REQUIRED >
+
+
+<!-- use DUST filtering with default parameters -->
+<!ELEMENT Blast4-common-options-query-filtering_use-dust-filtering EMPTY>
+<!ATTLIST Blast4-common-options-query-filtering_use-dust-filtering value ( true | false ) #REQUIRED >
+
+
+<!-- mask for lookup table only (i.e.: soft masking) -->
+<!ELEMENT Blast4-common-options-query-filtering_mask-for-lookup-table-only EMPTY>
+<!ATTLIST Blast4-common-options-query-filtering_mask-for-lookup-table-only value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Blast4-common-options-query-filtering_repeats-filtering (Blast4-common-options-repeats-filtering)>
+
+<!-- user specified masking locations -->
+<!ELEMENT Blast4-common-options-query-filtering_user-specified-masks (Blast4-mask*)>
+
+<!-- This overrides all other filtering options -->
+<!ELEMENT Blast4-common-options-query-filtering_no-filtering EMPTY>
+<!ATTLIST Blast4-common-options-query-filtering_no-filtering value ( true | false ) #REQUIRED >
+
+
+
+<!ELEMENT Blast4-common-options-discontiguous-megablast (
+        Blast4-common-options-discontiguous-megablast_template-type, 
+        Blast4-common-options-discontiguous-megablast_template-length)>
+
+<!ELEMENT Blast4-common-options-discontiguous-megablast_template-type (%INTEGER;)>
+
+<!ELEMENT Blast4-common-options-discontiguous-megablast_template-length (%INTEGER;)>
+
+
+<!ELEMENT Blast4-common-options-nucleotide-query (
+        Blast4-common-options-nucleotide-query_strand-type-list?, 
+        Blast4-common-options-nucleotide-query_disco-megablast-options?)>
+
+<!-- one per query -->
+<!ELEMENT Blast4-common-options-nucleotide-query_strand-type-list (Blast4-strand-type*)>
+
+<!ELEMENT Blast4-common-options-nucleotide-query_disco-megablast-options (Blast4-common-options-discontiguous-megablast)>
+
+
+<!ELEMENT Blast4-common-options-scoring (
+        Blast4-common-options-scoring_matrix-name?, 
+        Blast4-common-options-scoring_gap-opening-penalty?, 
+        Blast4-common-options-scoring_gap-extension-penalty?, 
+        Blast4-common-options-scoring_match-reward?, 
+        Blast4-common-options-scoring_mismatch-penalty?)>
+
+<!-- e.g.: BLOSUM62, PAM30, etc -->
+<!ELEMENT Blast4-common-options-scoring_matrix-name (#PCDATA)>
+
+<!ELEMENT Blast4-common-options-scoring_gap-opening-penalty (%INTEGER;)>
+
+<!ELEMENT Blast4-common-options-scoring_gap-extension-penalty (%INTEGER;)>
+
+<!ELEMENT Blast4-common-options-scoring_match-reward (%INTEGER;)>
+
+<!ELEMENT Blast4-common-options-scoring_mismatch-penalty (%INTEGER;)>
+
+
+<!ELEMENT Blast4-common-options (
+        Blast4-common-options_percent-identity?, 
+        Blast4-common-options_evalue?, 
+        Blast4-common-options_word-size?, 
+        Blast4-common-options_hitlist-size?, 
+        Blast4-common-options_db-restriction?, 
+        Blast4-common-options_query-filtering?, 
+        Blast4-common-options_nucl-query-options?, 
+        Blast4-common-options_scoring-options?, 
+        Blast4-common-options_phi-pattern?, 
+        Blast4-common-options_eff-search-space?, 
+        Blast4-common-options_comp-based-statistics?)>
+
+<!-- percent identity of matches (0-100) -->
+<!ELEMENT Blast4-common-options_percent-identity (%REAL;)>
+
+<!-- e-value threshold -->
+<!ELEMENT Blast4-common-options_evalue (%REAL;)>
+
+<!-- word size to use in lookup table construction -->
+<!ELEMENT Blast4-common-options_word-size (%INTEGER;)>
+
+<!-- max number of database sequences to align -->
+<!ELEMENT Blast4-common-options_hitlist-size (%INTEGER;)>
+
+<!ELEMENT Blast4-common-options_db-restriction (Blast4-common-options-db-restriction)>
+
+<!ELEMENT Blast4-common-options_query-filtering (Blast4-common-options-query-filtering)>
+
+<!ELEMENT Blast4-common-options_nucl-query-options (Blast4-common-options-nucleotide-query)>
+
+<!ELEMENT Blast4-common-options_scoring-options (Blast4-common-options-scoring)>
+
+<!-- PHI-BLAST pattern -->
+<!ELEMENT Blast4-common-options_phi-pattern (#PCDATA)>
+
+<!-- effective search space -->
+<!ELEMENT Blast4-common-options_eff-search-space (%REAL;)>
+
+<!-- Composition based statistics -->
+<!ELEMENT Blast4-common-options_comp-based-statistics (%INTEGER;)>
+
+
+<!ELEMENT Blast4-options-lite (
+        Blast4-options-lite_task, 
+        Blast4-options-lite_options?)>
+
+<!ELEMENT Blast4-options-lite_task (#PCDATA)>
+
+<!ELEMENT Blast4-options-lite_options (Blast4-common-options)>
+
+<!--
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  Other types in alphabetical order
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-->
+<!ELEMENT Blast4-cutoff (
+        Blast4-cutoff_e-value | 
+        Blast4-cutoff_raw-score)>
+
+<!ELEMENT Blast4-cutoff_e-value (%REAL;)>
+
+<!ELEMENT Blast4-cutoff_raw-score (%INTEGER;)>
+
+
+<!ELEMENT Blast4-database (
+        Blast4-database_name, 
+        Blast4-database_type)>
+
+<!ELEMENT Blast4-database_name (#PCDATA)>
+
+<!ELEMENT Blast4-database_type (Blast4-residue-type)>
+
+<!-- Borrowed from seq.asn -->
+<!ELEMENT Blast4-seqtech (%INTEGER;)>
+
+<!--
+    standard	-  standard sequencing
+    est	-  Expressed Sequence Tag
+    sts	-  Sequence Tagged Site
+    survey	-  one-pass genomic sequence
+    genemap	-  from genetic mapping techniques
+    physmap	-  from physical mapping techniques
+    derived	-  derived from other data, not a primary entity
+    concept-trans	-  conceptual translation
+    seq-pept	-  peptide was sequenced
+    both	-  concept transl. w/ partial pept. seq.
+    seq-pept-overlap	-  sequenced peptide, ordered by overlap
+    seq-pept-homol	-  sequenced peptide, ordered by homology
+    concept-trans-a	-  conceptual transl. supplied by author
+    htgs-1	-  unordered High Throughput sequence contig
+    htgs-2	-  ordered High Throughput sequence contig
+    htgs-3	-  finished High Throughput sequence
+    fli-cdna	-  full length insert cDNA
+    htgs-0	-  single genomic reads for coordination
+    htc	-  high throughput cDNA
+    wgs	-  whole genome shotgun sequencing
+    other	-  use Source.techexp
+-->
+<!ATTLIST Blast4-seqtech value (
+        unknown |
+        standard |
+        est |
+        sts |
+        survey |
+        genemap |
+        physmap |
+        derived |
+        concept-trans |
+        seq-pept |
+        both |
+        seq-pept-overlap |
+        seq-pept-homol |
+        concept-trans-a |
+        htgs-1 |
+        htgs-2 |
+        htgs-3 |
+        fli-cdna |
+        htgs-0 |
+        htc |
+        wgs |
+        other
+        ) #IMPLIED >
+
+
+
+<!ELEMENT Blast4-database-info (
+        Blast4-database-info_database, 
+        Blast4-database-info_description, 
+        Blast4-database-info_last-updated, 
+        Blast4-database-info_total-length, 
+        Blast4-database-info_num-sequences, 
+        Blast4-database-info_seqtech, 
+        Blast4-database-info_taxid)>
+
+<!ELEMENT Blast4-database-info_database (Blast4-database)>
+
+<!ELEMENT Blast4-database-info_description (#PCDATA)>
+
+<!ELEMENT Blast4-database-info_last-updated (#PCDATA)>
+
+<!ELEMENT Blast4-database-info_total-length (%INTEGER;)>
+
+<!ELEMENT Blast4-database-info_num-sequences (%INTEGER;)>
+<!-- Borrowed from seq.asn -->
+<!ELEMENT Blast4-database-info_seqtech (Blast4-seqtech)>
+
+<!ELEMENT Blast4-database-info_taxid (%INTEGER;)>
+
+
+<!ELEMENT Blast4-frame-type %ENUM;>
+<!ATTLIST Blast4-frame-type value (
+        notset |
+        plus1 |
+        plus2 |
+        plus3 |
+        minus1 |
+        minus2 |
+        minus3
+        ) #REQUIRED >
+
+
+
+<!ELEMENT Blast4-ka-block (
+        Blast4-ka-block_lambda, 
+        Blast4-ka-block_k, 
+        Blast4-ka-block_h, 
+        Blast4-ka-block_gapped)>
+
+<!ELEMENT Blast4-ka-block_lambda (%REAL;)>
+
+<!ELEMENT Blast4-ka-block_k (%REAL;)>
+
+<!ELEMENT Blast4-ka-block_h (%REAL;)>
+
+<!ELEMENT Blast4-ka-block_gapped EMPTY>
+<!ATTLIST Blast4-ka-block_gapped value ( true | false ) #REQUIRED >
+
+
+<!--
+ Masking locations for a query's frame. The locations field is a single
+ Seq-loc of type Packed-int, which contains all the masking locations for the
+ translation frame specified by the frame field.
+ Notes:
+ On input (i.e.: when the client specifies masking locations as a
+ Blast4-parameter), in the case of protein queries, the frame field must 
+ always be notset, in the case of nucleotide queries (regardless of whether 
+ the search will translate these or not), the frame must be plus1. Masking 
+ locations in the translated encoding are not permitted.
+ On output (i.e.: when blast 4 server encodes these as part of the 
+ Blast4-get-search-results-reply), the same conventions as above apply for
+ non-translated protein and nucleotide queries, but in the case of translated
+ nucleotide queries, the frame field can be specified in any of the
+ translation frames as appropriate.
+-->
+<!ELEMENT Blast4-mask (
+        Blast4-mask_locations, 
+        Blast4-mask_frame)>
+
+<!ELEMENT Blast4-mask_locations (Seq-loc*)>
+
+<!ELEMENT Blast4-mask_frame (Blast4-frame-type)>
+
+
+<!ELEMENT Blast4-matrix-id (
+        Blast4-matrix-id_residue-type, 
+        Blast4-matrix-id_name)>
+
+<!ELEMENT Blast4-matrix-id_residue-type (Blast4-residue-type)>
+
+<!ELEMENT Blast4-matrix-id_name (#PCDATA)>
+
+
+<!ELEMENT Blast4-parameter (
+        Blast4-parameter_name, 
+        Blast4-parameter_value)>
+
+<!ELEMENT Blast4-parameter_name (#PCDATA)>
+
+<!ELEMENT Blast4-parameter_value (Blast4-value)>
+
+
+<!ELEMENT Blast4-parameter-info (
+        Blast4-parameter-info_name, 
+        Blast4-parameter-info_type)>
+
+<!ELEMENT Blast4-parameter-info_name (#PCDATA)>
+
+<!ELEMENT Blast4-parameter-info_type (#PCDATA)>
+
+<!-- Self documenting task structure -->
+<!ELEMENT Blast4-task-info (
+        Blast4-task-info_name, 
+        Blast4-task-info_documentation)>
+
+<!-- Name of this task -->
+<!ELEMENT Blast4-task-info_name (#PCDATA)>
+
+<!-- Description of the task -->
+<!ELEMENT Blast4-task-info_documentation (#PCDATA)>
+
+
+<!ELEMENT Blast4-program-info (
+        Blast4-program-info_program, 
+        Blast4-program-info_services)>
+
+<!ELEMENT Blast4-program-info_program (#PCDATA)>
+
+<!ELEMENT Blast4-program-info_services (Blast4-program-info_services_E*)>
+
+
+<!ELEMENT Blast4-program-info_services_E (#PCDATA)>
+
+
+<!ELEMENT Blast4-residue-type %ENUM;>
+<!ATTLIST Blast4-residue-type value (
+        unknown |
+        protein |
+        nucleotide
+        ) #REQUIRED >
+
+
+
+<!ELEMENT Blast4-strand-type %ENUM;>
+<!ATTLIST Blast4-strand-type value (
+        forward-strand |
+        reverse-strand |
+        both-strands
+        ) #REQUIRED >
+
+
+
+<!ELEMENT Blast4-subject (
+        Blast4-subject_database | 
+        Blast4-subject_sequences | 
+        Blast4-subject_seq-loc-list)>
+
+<!ELEMENT Blast4-subject_database (#PCDATA)>
+
+<!ELEMENT Blast4-subject_sequences (Bioseq*)>
+
+<!ELEMENT Blast4-subject_seq-loc-list (Seq-loc*)>
+
+
+<!ELEMENT Blast4-parameters (Blast4-parameter*)>
+
+
+<!ELEMENT Blast4-phi-alignments (
+        Blast4-phi-alignments_num-alignments, 
+        Blast4-phi-alignments_seq-locs)>
+
+<!ELEMENT Blast4-phi-alignments_num-alignments (%INTEGER;)>
+
+<!ELEMENT Blast4-phi-alignments_seq-locs (Seq-loc*)>
+
+
+<!ELEMENT Blast4-value (
+        Blast4-value_big-integer | 
+        Blast4-value_bioseq | 
+        Blast4-value_boolean | 
+        Blast4-value_cutoff | 
+        Blast4-value_integer | 
+        Blast4-value_matrix | 
+        Blast4-value_real | 
+        Blast4-value_seq-align | 
+        Blast4-value_seq-id | 
+        Blast4-value_seq-loc | 
+        Blast4-value_strand-type | 
+        Blast4-value_string | 
+        Blast4-value_big-integer-list | 
+        Blast4-value_bioseq-list | 
+        Blast4-value_boolean-list | 
+        Blast4-value_cutoff-list | 
+        Blast4-value_integer-list | 
+        Blast4-value_matrix-list | 
+        Blast4-value_real-list | 
+        Blast4-value_seq-align-list | 
+        Blast4-value_seq-id-list | 
+        Blast4-value_seq-loc-list | 
+        Blast4-value_strand-type-list | 
+        Blast4-value_string-list | 
+        Blast4-value_bioseq-set | 
+        Blast4-value_seq-align-set | 
+        Blast4-value_query-mask)>
+
+<!-- scalar types -->
+<!ELEMENT Blast4-value_big-integer (%INTEGER;)>
+
+<!ELEMENT Blast4-value_bioseq (Bioseq)>
+
+<!ELEMENT Blast4-value_boolean EMPTY>
+<!ATTLIST Blast4-value_boolean value ( true | false ) #REQUIRED >
+
+<!--
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  Other types in alphabetical order
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-->
+<!ELEMENT Blast4-value_cutoff (Blast4-cutoff)>
+
+<!ELEMENT Blast4-value_integer (%INTEGER;)>
+
+<!ELEMENT Blast4-value_matrix (PssmWithParameters)>
+
+<!ELEMENT Blast4-value_real (%REAL;)>
+
+<!ELEMENT Blast4-value_seq-align (Seq-align)>
+
+<!ELEMENT Blast4-value_seq-id (Seq-id)>
+
+<!ELEMENT Blast4-value_seq-loc (Seq-loc)>
+
+<!ELEMENT Blast4-value_strand-type (Blast4-strand-type)>
+
+<!ELEMENT Blast4-value_string (#PCDATA)>
+
+<!-- lists of scalar types -->
+<!ELEMENT Blast4-value_big-integer-list (Blast4-value_big-integer-list_E*)>
+
+
+<!ELEMENT Blast4-value_big-integer-list_E (%INTEGER;)>
+
+<!ELEMENT Blast4-value_bioseq-list (Bioseq*)>
+
+<!ELEMENT Blast4-value_boolean-list (Blast4-value_boolean-list_E*)>
+
+
+<!ELEMENT Blast4-value_boolean-list_E EMPTY>
+<!ATTLIST Blast4-value_boolean-list_E value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Blast4-value_cutoff-list (Blast4-cutoff*)>
+
+<!ELEMENT Blast4-value_integer-list (Blast4-value_integer-list_E*)>
+
+
+<!ELEMENT Blast4-value_integer-list_E (%INTEGER;)>
+
+<!ELEMENT Blast4-value_matrix-list (PssmWithParameters*)>
+
+<!ELEMENT Blast4-value_real-list (Blast4-value_real-list_E*)>
+
+
+<!ELEMENT Blast4-value_real-list_E (%REAL;)>
+
+<!ELEMENT Blast4-value_seq-align-list (Seq-align*)>
+
+<!ELEMENT Blast4-value_seq-id-list (Seq-id*)>
+
+<!ELEMENT Blast4-value_seq-loc-list (Seq-loc*)>
+
+<!ELEMENT Blast4-value_strand-type-list (Blast4-strand-type*)>
+
+<!ELEMENT Blast4-value_string-list (Blast4-value_string-list_E*)>
+
+
+<!ELEMENT Blast4-value_string-list_E (#PCDATA)>
+
+<!-- imported collection types -->
+<!ELEMENT Blast4-value_bioseq-set (Bioseq-set)>
+
+<!ELEMENT Blast4-value_seq-align-set (Seq-align-set)>
+
+<!--
+ Intended to represent user-provided masking locations for a single query
+ sequence (name field in Blast4-parameter should be "LCaseMask").
+ Multiple Blast4-parameters of this type are needed to specify masking
+ locations for multiple queries.
+-->
+<!ELEMENT Blast4-value_query-mask (Blast4-mask)>
+
+<!-- Complete set of simple Blast results -->
+<!ELEMENT Blast4-simple-results (
+        Blast4-simple-results_all-alignments)>
+
+<!ELEMENT Blast4-simple-results_all-alignments (Blast4-alignments-for-query*)>
+
+<!-- Alignments for one query, compiled from the raw SeqAlign results -->
+<!ELEMENT Blast4-alignments-for-query (
+        Blast4-alignments-for-query_query-id, 
+        Blast4-alignments-for-query_alignments)>
+
+<!--
+ Query sequence identifier
+ (present if query is not a local id in the SeqAlign)
+-->
+<!ELEMENT Blast4-alignments-for-query_query-id (#PCDATA)>
+
+<!-- All the alignments for this query -->
+<!ELEMENT Blast4-alignments-for-query_alignments (Blast4-simple-alignment*)>
+
+<!-- A single alignment -->
+<!ELEMENT Blast4-simple-alignment (
+        Blast4-simple-alignment_subject-id, 
+        Blast4-simple-alignment_e-value, 
+        Blast4-simple-alignment_bit-score, 
+        Blast4-simple-alignment_num-identities?, 
+        Blast4-simple-alignment_num-indels?, 
+        Blast4-simple-alignment_full-query-range, 
+        Blast4-simple-alignment_full-subject-range)>
+
+<!--
+ Subject sequence identifier
+ (normally a GI from the SeqAlign)
+-->
+<!ELEMENT Blast4-simple-alignment_subject-id (#PCDATA)>
+
+<!-- E-Value -->
+<!ELEMENT Blast4-simple-alignment_e-value (%REAL;)>
+
+<!-- Bit score -->
+<!ELEMENT Blast4-simple-alignment_bit-score (%REAL;)>
+
+<!-- Number of identities -->
+<!ELEMENT Blast4-simple-alignment_num-identities (%INTEGER;)>
+
+<!-- Number of insertions/deletions -->
+<!ELEMENT Blast4-simple-alignment_num-indels (%INTEGER;)>
+
+<!-- Full query range covered by this HSP -->
+<!ELEMENT Blast4-simple-alignment_full-query-range (Blast4-range)>
+
+<!-- Full subject range covered by this HSP -->
+<!ELEMENT Blast4-simple-alignment_full-subject-range (Blast4-range)>
+
+<!-- Range on a sequence - zero offset -->
+<!ELEMENT Blast4-range (
+        Blast4-range_start?, 
+        Blast4-range_end?, 
+        Blast4-range_strand?)>
+
+<!ELEMENT Blast4-range_start (%INTEGER;)>
+
+<!ELEMENT Blast4-range_end (%INTEGER;)>
+
+<!--
+ The frame of the range (absent for proteins; -1/1 for nucleotides;
+ -1,-2,-3,1,2,3 for translated sequences)
+-->
+<!ELEMENT Blast4-range_strand (%INTEGER;)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_BlastDL.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_BlastDL.dtd
new file mode 100644
index 0000000..451e782
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_BlastDL.dtd
@@ -0,0 +1,89 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "blastdb.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_BlastDL.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_BlastDL_module PUBLIC "-//NCBI//NCBI BlastDL Module//EN" "NCBI_BlastDL.mod.dtd">
+%NCBI_BlastDL_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_BlastDL.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_BlastDL.mod.dtd
new file mode 100644
index 0000000..78a99bb
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_BlastDL.mod.dtd
@@ -0,0 +1,138 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "blastdb.asn"
+     ::DATATOOL:: by application DATATOOL version 2.3.1
+     ::DATATOOL:: on 07/15/2011 23:04:47
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-BlastDL"
+================================================= -->
+
+<!--
+$Id: blastdb.asn 311249 2011-07-11 14:12:16Z camacho $
+
+ Notes:
+
+ taxonomy: an integer is proposed, which would require some sort of 
+ table (or network connection) to do the conversions from integer 
+ to various names.  This could save quite a bit of space for databases 
+ that are predominantly of one organism (e.g., human in htgs).
+ I've proposed here that table contain scientific-, common-, and 
+ blast-names at the advice of Scott Federhen.  Scott also was in 
+ favor of having the complete lineage in the file, but it seems like 
+ this would be seldom used and we could have a view with a link back 
+ to the taxonomy page for anyone needing it. Since one file would 
+ suffice for all blast databases, it seems like this should be a new file.
+
+ memberships: a sequence of integers is proposed.  Each bit of an integer 
+ would indicate membership in some (virtual) blast database (e.g., pdb, 
+ swissprot) or some classification (e.g., mRNA, genomic).
+
+ links: a sequence of integers is proposed.  Each bit of an integer would 
+ indicate a link that could be established based upon the gi of the 
+ database sequence.
+
+-->
+
+<!-- Elements used by other modules:
+          Blast-def-line-set,
+          Blast-def-line -->
+
+<!-- Elements referenced from other modules:
+          Seq-id,
+          Seq-loc FROM NCBI-Seqloc -->
+<!-- ============================================ -->
+
+<!-- all deflines for an entry -->
+<!ELEMENT Blast-def-line-set (Blast-def-line*)>
+
+
+<!ELEMENT Blast-def-line (
+        Blast-def-line_title?, 
+        Blast-def-line_seqid, 
+        Blast-def-line_taxid?, 
+        Blast-def-line_memberships?, 
+        Blast-def-line_links?, 
+        Blast-def-line_other-info?)>
+
+<!-- simple title -->
+<!ELEMENT Blast-def-line_title (#PCDATA)>
+
+<!-- Regular NCBI Seq-Id -->
+<!ELEMENT Blast-def-line_seqid (Seq-id*)>
+
+<!-- taxonomy id -->
+<!ELEMENT Blast-def-line_taxid (%INTEGER;)>
+
+<!-- bit arrays -->
+<!ELEMENT Blast-def-line_memberships (Blast-def-line_memberships_E*)>
+
+
+<!ELEMENT Blast-def-line_memberships_E (%INTEGER;)>
+
+<!-- DEPRECATED, replaced by LinkoutDB -->
+<!ELEMENT Blast-def-line_links (Blast-def-line_links_E*)>
+
+
+<!ELEMENT Blast-def-line_links_E (%INTEGER;)>
+
+<!-- for future use (probably genomic sequences) -->
+<!ELEMENT Blast-def-line_other-info (Blast-def-line_other-info_E*)>
+
+
+<!ELEMENT Blast-def-line_other-info_E (%INTEGER;)>
+
+<!--
+ This defines the possible sequence filtering algorithms to be used in a
+ BLAST database
+-->
+<!ELEMENT Blast-filter-program (%INTEGER;)>
+<!ATTLIST Blast-filter-program value (
+        not-set |
+        dust |
+        seg |
+        windowmasker |
+        repeat |
+        other |
+        max
+        ) #IMPLIED >
+
+
+
+<!ELEMENT Blast-mask-list (
+        Blast-mask-list_masks, 
+        Blast-mask-list_more)>
+
+<!-- masks for a single sequence should be grouped in a Packed-seqint -->
+<!ELEMENT Blast-mask-list_masks (Seq-loc*)>
+
+<!--
+ as of 01/21/2010, this field is set to false always, indicating that the
+ entire object (set of Seq-loc) is contained in this object
+-->
+<!ELEMENT Blast-mask-list_more EMPTY>
+<!ATTLIST Blast-mask-list_more value ( true | false ) #REQUIRED >
+
+
+<!-- Defines the masking information for a set of sequences -->
+<!ELEMENT Blast-db-mask-info (
+        Blast-db-mask-info_algo-id, 
+        Blast-db-mask-info_algo-program, 
+        Blast-db-mask-info_algo-options, 
+        Blast-db-mask-info_masks)>
+
+<!ELEMENT Blast-db-mask-info_algo-id (%INTEGER;)>
+<!--
+ This defines the possible sequence filtering algorithms to be used in a
+ BLAST database
+-->
+<!ELEMENT Blast-db-mask-info_algo-program (Blast-filter-program)>
+
+<!ELEMENT Blast-db-mask-info_algo-options (#PCDATA)>
+
+<!--
+ This object was originally created to allow pagination of the sequence
+ masks, but this feature was discontinued in 01/21/2010
+-->
+<!ELEMENT Blast-db-mask-info_masks (Blast-mask-list)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_BlastOutput.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_BlastOutput.dtd
new file mode 100644
index 0000000..307176a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_BlastOutput.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "blastxml.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_BlastOutput.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_BlastOutput_module PUBLIC "-//NCBI//NCBI BlastOutput Module//EN" "NCBI_BlastOutput.mod.dtd">
+%NCBI_BlastOutput_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_BlastOutput.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_BlastOutput.mod.dtd
new file mode 100644
index 0000000..7b0f47f
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_BlastOutput.mod.dtd
@@ -0,0 +1,273 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "blastxml.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 08/02/2010 23:05:14
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-BlastOutput"
+================================================= -->
+
+<!--$Id: blastxml.asn 100080 2007-03-12 16:05:35Z kazimird $ -->
+
+
+<!ELEMENT BlastOutput (
+        BlastOutput_program, 
+        BlastOutput_version, 
+        BlastOutput_reference, 
+        BlastOutput_db, 
+        BlastOutput_query-ID, 
+        BlastOutput_query-def, 
+        BlastOutput_query-len, 
+        BlastOutput_query-seq?, 
+        BlastOutput_param, 
+        BlastOutput_iterations, 
+        BlastOutput_mbstat?)>
+
+<!-- BLAST program: blastp, tblastx etc. -->
+<!ELEMENT BlastOutput_program (#PCDATA)>
+
+<!-- Program version  -->
+<!ELEMENT BlastOutput_version (#PCDATA)>
+
+<!-- Steven, David, Tom and others -->
+<!ELEMENT BlastOutput_reference (#PCDATA)>
+
+<!-- BLAST Database name -->
+<!ELEMENT BlastOutput_db (#PCDATA)>
+
+<!-- SeqId of query -->
+<!ELEMENT BlastOutput_query-ID (#PCDATA)>
+
+<!-- Definition line of query -->
+<!ELEMENT BlastOutput_query-def (#PCDATA)>
+
+<!-- length of query sequence -->
+<!ELEMENT BlastOutput_query-len (%INTEGER;)>
+
+<!-- query sequence itself -->
+<!ELEMENT BlastOutput_query-seq (#PCDATA)>
+
+<!-- search parameters -->
+<!ELEMENT BlastOutput_param (Parameters)>
+
+<!ELEMENT BlastOutput_iterations (Iteration*)>
+
+<!-- Mega BLAST search statistics -->
+<!ELEMENT BlastOutput_mbstat (Statistics)>
+
+
+<!ELEMENT Iteration (
+        Iteration_iter-num, 
+        Iteration_query-ID?, 
+        Iteration_query-def?, 
+        Iteration_query-len?, 
+        Iteration_hits?, 
+        Iteration_stat?, 
+        Iteration_message?)>
+
+<!-- iteration number -->
+<!ELEMENT Iteration_iter-num (%INTEGER;)>
+
+<!-- SeqId of query -->
+<!ELEMENT Iteration_query-ID (#PCDATA)>
+
+<!-- Definition line of query -->
+<!ELEMENT Iteration_query-def (#PCDATA)>
+
+<!-- length of query sequence -->
+<!ELEMENT Iteration_query-len (%INTEGER;)>
+
+<!-- Hits one for every db sequence -->
+<!ELEMENT Iteration_hits (Hit*)>
+
+<!-- search statistics             -->
+<!ELEMENT Iteration_stat (Statistics)>
+
+<!-- Some (error?) information -->
+<!ELEMENT Iteration_message (#PCDATA)>
+
+
+<!ELEMENT Parameters (
+        Parameters_matrix?, 
+        Parameters_expect, 
+        Parameters_include?, 
+        Parameters_sc-match?, 
+        Parameters_sc-mismatch?, 
+        Parameters_gap-open, 
+        Parameters_gap-extend, 
+        Parameters_filter?, 
+        Parameters_pattern?, 
+        Parameters_entrez-query?)>
+
+<!-- Matrix used (-M) -->
+<!ELEMENT Parameters_matrix (#PCDATA)>
+
+<!-- Expectation threshold (-e) -->
+<!ELEMENT Parameters_expect (%REAL;)>
+
+<!-- Inclusion threshold (-h) -->
+<!ELEMENT Parameters_include (%REAL;)>
+
+<!-- match score for NT (-r) -->
+<!ELEMENT Parameters_sc-match (%INTEGER;)>
+
+<!-- mismatch score for NT (-q) -->
+<!ELEMENT Parameters_sc-mismatch (%INTEGER;)>
+
+<!-- Gap opening cost (-G) -->
+<!ELEMENT Parameters_gap-open (%INTEGER;)>
+
+<!-- Gap extension cost (-E) -->
+<!ELEMENT Parameters_gap-extend (%INTEGER;)>
+
+<!-- Filtering options (-F) -->
+<!ELEMENT Parameters_filter (#PCDATA)>
+
+<!-- PHI-BLAST pattern -->
+<!ELEMENT Parameters_pattern (#PCDATA)>
+
+<!-- Limit of request to Entrez query -->
+<!ELEMENT Parameters_entrez-query (#PCDATA)>
+
+
+<!ELEMENT Statistics (
+        Statistics_db-num, 
+        Statistics_db-len, 
+        Statistics_hsp-len, 
+        Statistics_eff-space, 
+        Statistics_kappa, 
+        Statistics_lambda, 
+        Statistics_entropy)>
+
+<!-- Number of sequences in BLAST db -->
+<!ELEMENT Statistics_db-num (%INTEGER;)>
+
+<!-- Length of BLAST db -->
+<!ELEMENT Statistics_db-len (%INTEGER;)>
+
+<!-- Effective HSP length -->
+<!ELEMENT Statistics_hsp-len (%INTEGER;)>
+
+<!-- Effective search space -->
+<!ELEMENT Statistics_eff-space (%REAL;)>
+
+<!-- Karlin-Altschul parameter K -->
+<!ELEMENT Statistics_kappa (%REAL;)>
+
+<!-- Karlin-Altschul parameter Lambda -->
+<!ELEMENT Statistics_lambda (%REAL;)>
+
+<!-- Karlin-Altschul parameter H -->
+<!ELEMENT Statistics_entropy (%REAL;)>
+
+
+<!ELEMENT Hit (
+        Hit_num, 
+        Hit_id, 
+        Hit_def, 
+        Hit_accession, 
+        Hit_len, 
+        Hit_hsps?)>
+
+<!-- hit number -->
+<!ELEMENT Hit_num (%INTEGER;)>
+
+<!-- SeqId of subject -->
+<!ELEMENT Hit_id (#PCDATA)>
+
+<!-- definition line of subject -->
+<!ELEMENT Hit_def (#PCDATA)>
+
+<!-- accession -->
+<!ELEMENT Hit_accession (#PCDATA)>
+
+<!-- length of subject -->
+<!ELEMENT Hit_len (%INTEGER;)>
+
+<!-- all HSP regions for the given subject -->
+<!ELEMENT Hit_hsps (Hsp*)>
+
+
+<!ELEMENT Hsp (
+        Hsp_num, 
+        Hsp_bit-score, 
+        Hsp_score, 
+        Hsp_evalue, 
+        Hsp_query-from, 
+        Hsp_query-to, 
+        Hsp_hit-from, 
+        Hsp_hit-to, 
+        Hsp_pattern-from?, 
+        Hsp_pattern-to?, 
+        Hsp_query-frame?, 
+        Hsp_hit-frame?, 
+        Hsp_identity?, 
+        Hsp_positive?, 
+        Hsp_gaps?, 
+        Hsp_align-len?, 
+        Hsp_density?, 
+        Hsp_qseq, 
+        Hsp_hseq, 
+        Hsp_midline?)>
+
+<!-- HSP number -->
+<!ELEMENT Hsp_num (%INTEGER;)>
+
+<!-- score (in bits) of HSP -->
+<!ELEMENT Hsp_bit-score (%REAL;)>
+
+<!-- score of HSP -->
+<!ELEMENT Hsp_score (%REAL;)>
+
+<!-- e-value of HSP -->
+<!ELEMENT Hsp_evalue (%REAL;)>
+
+<!-- start of HSP in query -->
+<!ELEMENT Hsp_query-from (%INTEGER;)>
+
+<!-- end of HSP -->
+<!ELEMENT Hsp_query-to (%INTEGER;)>
+
+<!-- start of HSP in subject -->
+<!ELEMENT Hsp_hit-from (%INTEGER;)>
+
+<!-- end of HSP in subject -->
+<!ELEMENT Hsp_hit-to (%INTEGER;)>
+
+<!-- start of PHI-BLAST pattern -->
+<!ELEMENT Hsp_pattern-from (%INTEGER;)>
+
+<!-- end of PHI-BLAST pattern -->
+<!ELEMENT Hsp_pattern-to (%INTEGER;)>
+
+<!-- translation frame of query -->
+<!ELEMENT Hsp_query-frame (%INTEGER;)>
+
+<!-- translation frame of subject -->
+<!ELEMENT Hsp_hit-frame (%INTEGER;)>
+
+<!-- number of identities in HSP -->
+<!ELEMENT Hsp_identity (%INTEGER;)>
+
+<!-- number of positives in HSP -->
+<!ELEMENT Hsp_positive (%INTEGER;)>
+
+<!-- number of gaps in HSP -->
+<!ELEMENT Hsp_gaps (%INTEGER;)>
+
+<!-- length of the alignment used -->
+<!ELEMENT Hsp_align-len (%INTEGER;)>
+
+<!-- score density -->
+<!ELEMENT Hsp_density (%INTEGER;)>
+
+<!-- alignment string for the query (with gaps) -->
+<!ELEMENT Hsp_qseq (#PCDATA)>
+
+<!-- alignment string for subject (with gaps) -->
+<!ELEMENT Hsp_hseq (#PCDATA)>
+
+<!-- formating middle line -->
+<!ELEMENT Hsp_midline (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Cdd.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Cdd.dtd
new file mode 100644
index 0000000..b2d06ad
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Cdd.dtd
@@ -0,0 +1,110 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "cdd.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Cdd.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % MMDB_module PUBLIC "-//NCBI//MMDB Module//EN" "MMDB.mod.dtd">
+%MMDB_module;
+
+<!ENTITY % MMDB_Chemical_graph_module PUBLIC "-//NCBI//MMDB Chemical graph Module//EN" "MMDB_Chemical_graph.mod.dtd">
+%MMDB_Chemical_graph_module;
+
+<!ENTITY % MMDB_Features_module PUBLIC "-//NCBI//MMDB Features Module//EN" "MMDB_Features.mod.dtd">
+%MMDB_Features_module;
+
+<!ENTITY % MMDB_Structural_model_module PUBLIC "-//NCBI//MMDB Structural model Module//EN" "MMDB_Structural_model.mod.dtd">
+%MMDB_Structural_model_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Cdd_module PUBLIC "-//NCBI//NCBI Cdd Module//EN" "NCBI_Cdd.mod.dtd">
+%NCBI_Cdd_module;
+
+<!ENTITY % NCBI_Cn3d_module PUBLIC "-//NCBI//NCBI Cn3d Module//EN" "NCBI_Cn3d.mod.dtd">
+%NCBI_Cn3d_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_ScoreMat_module PUBLIC "-//NCBI//NCBI ScoreMat Module//EN" "NCBI_ScoreMat.mod.dtd">
+%NCBI_ScoreMat_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Seqset_module PUBLIC "-//NCBI//NCBI Seqset Module//EN" "NCBI_Seqset.mod.dtd">
+%NCBI_Seqset_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Cdd.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Cdd.mod.dtd
new file mode 100644
index 0000000..7cf68d6
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Cdd.mod.dtd
@@ -0,0 +1,1088 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "cdd.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 08/02/2010 23:05:14
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Cdd"
+================================================= -->
+
+<!--
+$Revision: 194512 $
+**********************************************************************
+
+  Definitions for CDD's 
+
+  NCBI Structure Group
+
+  National Center for Biotechnology Information
+  National Institutes of Health
+  Bethesda, MD 20894 USA
+
+  October 1999
+
+  asntool -m cdd.asn -w 100 -o cdd.h
+  asntool -B objcdd -m cdd.asn -G -w 100 -I objseq.h objsset.h -K cdd.h \
+          -M asn.all
+**********************************************************************
+ NCBI Conserved Domain Definition
+-->
+
+<!-- Elements used by other modules:
+          Cdd-id,
+          Cdd-id-set,
+          Cdd,
+          Cdd-set,
+          Cdd-tree,
+          Cdd-tree-set,
+          Cdd-pref-nodes,
+          Cdd-Project -->
+
+<!-- Elements referenced from other modules:
+          Date FROM NCBI-General,
+          Pub FROM NCBI-Pub,
+          Biostruc-annot-set FROM MMDB,
+          Bioseq FROM NCBI-Sequence,
+          Seq-annot FROM NCBI-Sequence,
+          Seq-entry FROM NCBI-Seqset,
+          Org-ref FROM NCBI-Organism,
+          Seq-id FROM NCBI-Seqloc,
+          Seq-interval FROM NCBI-Seqloc,
+          Seq-loc FROM NCBI-Seqloc,
+          Seq-feat FROM NCBI-Seqfeat,
+          Score-set FROM NCBI-Seqalign,
+          Cn3d-style-dictionary,
+          Cn3d-user-annotations FROM NCBI-Cn3d,
+          PssmWithParameters FROM NCBI-ScoreMat -->
+<!-- ============================================ -->
+
+<!-- dealing with lists of preferred tax-nodes  -->
+<!ELEMENT Cdd-org-ref (
+        Cdd-org-ref_reference, 
+        Cdd-org-ref_active?, 
+        Cdd-org-ref_parent-tax-id?, 
+        Cdd-org-ref_rank?)>
+
+<!ELEMENT Cdd-org-ref_reference (Org-ref)>
+
+<!ELEMENT Cdd-org-ref_active EMPTY>
+<!ATTLIST Cdd-org-ref_active value ( true | false ) "true" >
+
+
+<!ELEMENT Cdd-org-ref_parent-tax-id (%INTEGER;)>
+
+<!ELEMENT Cdd-org-ref_rank (#PCDATA)>
+
+
+<!ELEMENT Cdd-org-ref-set (Cdd-org-ref*)>
+
+
+<!ELEMENT Cdd-pref-node-descr (
+        Cdd-pref-node-descr_create-date | 
+        Cdd-pref-node-descr_description)>
+
+<!ELEMENT Cdd-pref-node-descr_create-date (Date)>
+
+<!ELEMENT Cdd-pref-node-descr_description (#PCDATA)>
+
+
+<!ELEMENT Cdd-pref-node-descr-set (Cdd-pref-node-descr*)>
+
+
+<!ELEMENT Cdd-pref-nodes (
+        Cdd-pref-nodes_preferred-nodes, 
+        Cdd-pref-nodes_model-organisms?, 
+        Cdd-pref-nodes_optional-nodes?, 
+        Cdd-pref-nodes_description?)>
+
+<!ELEMENT Cdd-pref-nodes_preferred-nodes (Cdd-org-ref-set)>
+
+<!ELEMENT Cdd-pref-nodes_model-organisms (Cdd-org-ref-set)>
+
+<!ELEMENT Cdd-pref-nodes_optional-nodes (Cdd-org-ref-set)>
+
+<!ELEMENT Cdd-pref-nodes_description (Cdd-pref-node-descr-set)>
+
+<!--
+ Cdd's should not exist without a unique accession, but alternative id's may
+ be present as well. It is conceivable that a CD which is created as a merged
+ product of two highly redundant CDs will retain the source ids in addition 
+ to its new unique id
+ database the object resides in
+ currently not in use
+-->
+<!ELEMENT Global-id (
+        Global-id_accession, 
+        Global-id_release?, 
+        Global-id_version?, 
+        Global-id_database?)>
+
+<!-- SMART, Pfam, LOAD or CD accession -->
+<!ELEMENT Global-id_accession (#PCDATA)>
+
+<!--
+ to hold CD-Database release number
+ if desired, currently not used
+-->
+<!ELEMENT Global-id_release (#PCDATA)>
+
+<!--
+ version 0 is the seed, version
+ numbers increase with update/curate
+ cycles
+-->
+<!ELEMENT Global-id_version (%INTEGER;)>
+
+<!-- this is NOT the source!, rather the -->
+<!ELEMENT Global-id_database (#PCDATA)>
+
+
+<!ELEMENT Cdd-id (
+        Cdd-id_uid | 
+        Cdd-id_gid)>
+
+<!--
+ for synchronization with Entrez
+ holds PSSM-Ids
+-->
+<!ELEMENT Cdd-id_uid (%INTEGER;)>
+
+<!-- holds accession/version pairs -->
+<!ELEMENT Cdd-id_gid (Global-id)>
+
+
+<!ELEMENT Cdd-id-set (Cdd-id*)>
+
+<!--
+ record whether the CD contains
+ repeated sequence/structure motifs
+-->
+<!ELEMENT Cdd-repeat (
+        Cdd-repeat_count, 
+        Cdd-repeat_location?, 
+        Cdd-repeat_avglen?)>
+
+<!-- number of tandem repeats in the CD -->
+<!ELEMENT Cdd-repeat_count (%INTEGER;)>
+
+<!-- location on the representative -->
+<!ELEMENT Cdd-repeat_location (Seq-loc)>
+
+<!-- average repeat length -->
+<!ELEMENT Cdd-repeat_avglen (%INTEGER;)>
+
+<!-- record a link to Entrez Books -->
+<!ELEMENT Cdd-book-ref (
+        Cdd-book-ref_bookname, 
+        Cdd-book-ref_textelement, 
+        Cdd-book-ref_elementid?, 
+        Cdd-book-ref_subelementid?, 
+        Cdd-book-ref_celementid?, 
+        Cdd-book-ref_csubelementid?)>
+
+<!-- abbreviated book title -->
+<!ELEMENT Cdd-book-ref_bookname (#PCDATA)>
+
+<!ELEMENT Cdd-book-ref_textelement %ENUM;>
+
+<!--
+    unassigned	-  type of element 
+    section	-  a section or paragraph
+    figgrp	-  a figure or set of figures
+    table	-  a table
+    chapter	-  a whole chapter
+    biblist	-  a lisf of references
+    box	-  an inserted box
+    glossary	-  glossary
+    appendix	-  appendix
+-->
+<!ATTLIST Cdd-book-ref_textelement value (
+        unassigned |
+        section |
+        figgrp |
+        table |
+        chapter |
+        biblist |
+        box |
+        glossary |
+        appendix |
+        other
+        ) #REQUIRED >
+
+
+<!-- numerical address of the text-element -->
+<!ELEMENT Cdd-book-ref_elementid (%INTEGER;)>
+
+<!-- exact address, used with section -->
+<!ELEMENT Cdd-book-ref_subelementid (%INTEGER;)>
+
+<!-- address of the text element, if character string -->
+<!ELEMENT Cdd-book-ref_celementid (#PCDATA)>
+
+<!-- exact address, if character string -->
+<!ELEMENT Cdd-book-ref_csubelementid (#PCDATA)>
+
+<!--
+ The description of CDD's refers to the specific set of aligned sequences,
+ the region that is being aligned and the information contained in the
+ alignment. It may contain a lengthy comment
+ describing the function of the domain as well as its origin and all
+ other anecdotal information that can't be pressed into a rigid scheme.
+ Crosslinks to reference papers available in PubMed are possible as well.
+ There can be as many of these as you want in the CDD.
+-->
+<!ELEMENT Cdd-descr (
+        Cdd-descr_othername | 
+        Cdd-descr_category | 
+        Cdd-descr_comment | 
+        Cdd-descr_reference | 
+        Cdd-descr_create-date | 
+        Cdd-descr_tax-source | 
+        Cdd-descr_source | 
+        Cdd-descr_status | 
+        Cdd-descr_update-date | 
+        Cdd-descr_scrapbook | 
+        Cdd-descr_source-id | 
+        Cdd-descr_repeats | 
+        Cdd-descr_old-root | 
+        Cdd-descr_curation-status | 
+        Cdd-descr_readonly-status | 
+        Cdd-descr_book-ref | 
+        Cdd-descr_attribution | 
+        Cdd-descr_title)>
+
+<!--
+ alternative names for the CDD
+ if domain has several common names
+-->
+<!ELEMENT Cdd-descr_othername (#PCDATA)>
+
+<!--
+ intracellular, extracellular, etc.
+ to record spatial and/or temporal
+ expression in free-text format
+-->
+<!ELEMENT Cdd-descr_category (#PCDATA)>
+
+<!-- this is where descriptions go -->
+<!ELEMENT Cdd-descr_comment (#PCDATA)>
+
+<!-- a citation describing the domain -->
+<!ELEMENT Cdd-descr_reference (Pub)>
+
+<!-- Date of first creation/dump -->
+<!ELEMENT Cdd-descr_create-date (Date)>
+
+<!-- holds the highest common tax node -->
+<!ELEMENT Cdd-descr_tax-source (Org-ref)>
+
+<!--
+ the database the seeds were created
+ from, e.g. SMART, PFAM, etc..
+-->
+<!ELEMENT Cdd-descr_source (#PCDATA)>
+
+<!ELEMENT Cdd-descr_status (%INTEGER;)>
+
+<!--
+    finished-ok	-  a public curated CD
+    pending-release	-  needs work done, not yet released
+    other-asis	-  imported as-is, immediate release
+    matrix-only	-  CD holds a Psi-Blast PSSM only,
+         does not contain alignment data
+    update-running	-  has been flagged for
+         update (in queue)
+    auto-updated	-  update finished, no
+         work necessary
+    claimed	-  is earmarked for curation
+    curated-complete	-  public curated member of a
+         completed family
+    other	-  for CD production?
+-->
+<!ATTLIST Cdd-descr_status value (
+        unassigned |
+        finished-ok |
+        pending-release |
+        other-asis |
+        matrix-only |
+        update-running |
+        auto-updated |
+        claimed |
+        curated-complete |
+        other
+        ) #IMPLIED >
+
+
+<!-- Date of last version change -->
+<!ELEMENT Cdd-descr_update-date (Date)>
+
+<!--
+ for storing curation notes
+ those won't make it into public
+ distributions
+-->
+<!ELEMENT Cdd-descr_scrapbook (Cdd-descr_scrapbook_E*)>
+
+
+<!ELEMENT Cdd-descr_scrapbook_E (#PCDATA)>
+
+<!-- for linking back to source db -->
+<!ELEMENT Cdd-descr_source-id (Cdd-id-set)>
+
+<!-- to record repeat counts -->
+<!ELEMENT Cdd-descr_repeats (Cdd-repeat)>
+
+<!-- to record short-term history -->
+<!ELEMENT Cdd-descr_old-root (Cdd-id-set)>
+
+<!ELEMENT Cdd-descr_curation-status (%INTEGER;)>
+
+<!--
+    unassigned	-  to record curation status
+    prein	-  when CD is checked out from
+    ofc	-  the tracking database, for 
+    iac	-  use within curation software
+-->
+<!ATTLIST Cdd-descr_curation-status value (
+        unassigned |
+        prein |
+        ofc |
+        iac |
+        ofv1 |
+        iav1 |
+        ofv2 |
+        iav2 |
+        postin |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Cdd-descr_readonly-status (%INTEGER;)>
+
+<!--
+    unassigned	-  to record read-only status
+    readonly	-  when CD is checked out from
+    readwrite	-  the tracking database, for
+    other	-  use within curation software
+-->
+<!ATTLIST Cdd-descr_readonly-status value (
+        unassigned |
+        readonly |
+        readwrite |
+        other
+        ) #IMPLIED >
+
+
+<!-- links to Entrez/books -->
+<!ELEMENT Cdd-descr_book-ref (Cdd-book-ref)>
+
+<!-- add citations and/or author names -->
+<!ELEMENT Cdd-descr_attribution (Pub)>
+
+<!-- hold short descriptive text -->
+<!ELEMENT Cdd-descr_title (#PCDATA)>
+
+
+<!ELEMENT Cdd-descr-set (Cdd-descr*)>
+
+<!--
+ the Cdd-tree stores the hierarchy of CDDs. These objects are stored separate
+ from the CDs to allow for fast retrieval and use as an 'index' into CDs
+ all the components in a CD-tree match components in the full-sized CD
+ and should be synchronized
+-->
+<!ELEMENT Cdd-tree (
+        Cdd-tree_name, 
+        Cdd-tree_id, 
+        Cdd-tree_description?, 
+        Cdd-tree_parent?, 
+        Cdd-tree_children?, 
+        Cdd-tree_siblings?, 
+        Cdd-tree_neighbors?)>
+
+<!-- short name  copied from CD -->
+<!ELEMENT Cdd-tree_name (#PCDATA)>
+
+<!-- IDs         copied from CD -->
+<!ELEMENT Cdd-tree_id (Cdd-id-set)>
+
+<!-- description copied from CD -->
+<!ELEMENT Cdd-tree_description (Cdd-descr-set)>
+
+<!-- CD is the result of a split/merge -->
+<!ELEMENT Cdd-tree_parent (Cdd-id)>
+
+<!-- this CD has been split -->
+<!ELEMENT Cdd-tree_children (Cdd-id-set)>
+
+<!-- related CDs (have common hits) -->
+<!ELEMENT Cdd-tree_siblings (Cdd-id-set)>
+
+<!--
+ co-occurring CDs (non-overlapping 
+ hits to same sequences)
+-->
+<!ELEMENT Cdd-tree_neighbors (Cdd-id-set)>
+
+
+<!ELEMENT Cdd-tree-set (Cdd-tree*)>
+
+<!--
+ Matrix definitions, these are supposed to store PSSMs and corresponding 
+ matrices of relative residue frequencies.
+ the number of columns and rows is listed explicitly, values in columns
+ are stored column by column, i.e. in groups of nrows values for each column
+-->
+<!ELEMENT Matrix (
+        Matrix_ncolumns, 
+        Matrix_nrows, 
+        Matrix_row-labels?, 
+        Matrix_scale-factor, 
+        Matrix_columns)>
+
+<!ELEMENT Matrix_ncolumns (%INTEGER;)>
+
+<!ELEMENT Matrix_nrows (%INTEGER;)>
+
+<!ELEMENT Matrix_row-labels (Matrix_row-labels_E*)>
+
+
+<!ELEMENT Matrix_row-labels_E (#PCDATA)>
+
+<!ELEMENT Matrix_scale-factor (%INTEGER;)>
+
+<!ELEMENT Matrix_columns (Matrix_columns_E*)>
+
+
+<!ELEMENT Matrix_columns_E (%INTEGER;)>
+
+<!--
+ definition for matrix of pairwise "distances", stored as the upper 
+ triangle of a squared n x n matrix (excluding the diagonal), this is
+ supposed to store pairwise percentages of identical residues, pairwise
+ alignment scores or E-values from pairwise BLAST sequence comparisons
+-->
+<!ELEMENT Triangle (
+        Triangle_nelements, 
+        Triangle_scores?, 
+        Triangle_div-ranks?)>
+
+<!ELEMENT Triangle_nelements (%INTEGER;)>
+
+<!ELEMENT Triangle_scores (Score-set)>
+
+<!ELEMENT Triangle_div-ranks (Triangle_div-ranks_E*)>
+
+
+<!ELEMENT Triangle_div-ranks_E (%INTEGER;)>
+
+<!--
+ Update-align is supposed to contain alignments that still need some work
+ done to fit into the CD-proper alignment. These originate from the
+ CD update process (generated by Blast, for example) or may be created in
+ an editing session to save its state
+-->
+<!ELEMENT Update-comment (
+        Update-comment_comment | 
+        Update-comment_addthis | 
+        Update-comment_replaces | 
+        Update-comment_reject-loc | 
+        Update-comment_reference)>
+
+<!--
+ free text to describe nature of
+ Update-align
+-->
+<!ELEMENT Update-comment_comment (#PCDATA)>
+
+<!--
+ suggestion for inclusion in the CD
+ without corresponding alignment
+-->
+<!ELEMENT Update-comment_addthis (Seq-loc)>
+
+<!--
+ if one or several alignment rows are
+ to be replaced by the Update-align
+-->
+<!ELEMENT Update-comment_replaces (Seq-loc)>
+
+<!--
+ if used with Reject-id, specify a
+ location on a sequence which should
+ not be used
+-->
+<!ELEMENT Update-comment_reject-loc (Seq-loc)>
+
+<!--
+ if update alignment imported from
+ citation and for whenever it seems
+ necessary to cite
+-->
+<!ELEMENT Update-comment_reference (Pub)>
+
+<!--
+ Both fields are optional, as the Update-align may be a Seq-annot without
+ description, or a suggestion to add a sequence without the corresponding
+ alignment
+-->
+<!ELEMENT Update-align (
+        Update-align_description?, 
+        Update-align_seqannot?, 
+        Update-align_type)>
+
+<!ELEMENT Update-align_description (Update-comment*)>
+
+<!-- contains the SeqAlign -->
+<!ELEMENT Update-align_seqannot (Seq-annot)>
+
+<!ELEMENT Update-align_type (%INTEGER;)>
+<!ATTLIST Update-align_type value (
+        unassigned |
+        update |
+        update-3d |
+        demoted |
+        demoted-3d |
+        other
+        ) #IMPLIED >
+
+
+
+<!ELEMENT Reject-id (
+        Reject-id_description?, 
+        Reject-id_ids)>
+
+<!ELEMENT Reject-id_description (Update-comment*)>
+
+<!ELEMENT Reject-id_ids (Seq-id*)>
+
+
+<!ELEMENT Feature-evidence (
+        Feature-evidence_comment | 
+        Feature-evidence_reference | 
+        Feature-evidence_bsannot | 
+        Feature-evidence_seqfeat | 
+        Feature-evidence_book-ref)>
+
+<!--
+ so we can spell out what doesn't
+ fit in any other category
+-->
+<!ELEMENT Feature-evidence_comment (#PCDATA)>
+
+<!-- evidence via a literature reference -->
+<!ELEMENT Feature-evidence_reference (Pub)>
+
+<!--
+ evidence via Biostruc-features, such
+ as structure superpositions 
+-->
+<!ELEMENT Feature-evidence_bsannot (Biostruc-annot-set)>
+
+<!--
+ evidence is a Sequence feature found
+ elsewhere
+-->
+<!ELEMENT Feature-evidence_seqfeat (Seq-feat)>
+
+<!-- evidence is a book chapter or figure -->
+<!ELEMENT Feature-evidence_book-ref (Cdd-book-ref)>
+
+
+<!ELEMENT Align-annot (
+        Align-annot_location, 
+        Align-annot_description?, 
+        Align-annot_evidence?, 
+        Align-annot_type?, 
+        Align-annot_aliases?, 
+        Align-annot_motif?, 
+        Align-annot_motifuse?)>
+
+<!--
+ points to a location in one of the
+ aligned sequences, usually the
+ master/representative
+-->
+<!ELEMENT Align-annot_location (Seq-loc)>
+
+<!--
+ to hold descriptions/names like
+ "Heme binding site" or "catalytic
+ triad" etc., something that should
+ be used for labels in visualization
+-->
+<!ELEMENT Align-annot_description (#PCDATA)>
+
+<!--
+ evidence we can
+ compute with
+-->
+<!ELEMENT Align-annot_evidence (Feature-evidence*)>
+
+<!--
+ for typing annotated features
+ 0 .. no type assigned
+ 1 .. active site
+ 2 .. polypeptide binding site
+ 3 .. nucleic acid binding site
+ 4 .. ion binding site
+ 5 .. chemical binding site
+ 6 .. posttranslational modification site
+-->
+<!ELEMENT Align-annot_type (%INTEGER;)>
+
+<!-- adding more names for indexing -->
+<!ELEMENT Align-annot_aliases (Align-annot_aliases_E*)>
+
+
+<!ELEMENT Align-annot_aliases_E (#PCDATA)>
+
+<!-- to validate mapping of sites -->
+<!ELEMENT Align-annot_motif (#PCDATA)>
+
+<!--
+ 0 for validation,
+ 1 for motif somewhere in seqloc
+ 2 for multiple motifs in seqloc
+-->
+<!ELEMENT Align-annot_motifuse (%INTEGER;)>
+
+
+<!ELEMENT Align-annot-set (Align-annot*)>
+
+<!--
+ the Domain-parent records an evolutionary relationship which may not be
+ as simple as a classical parent-child relationship in a typical hierarchy,
+ i.e. where a CD is merely a specific subgroup ("child") of a more general
+ diverse alignment model ("parent"). A CD alignment model may be the result
+ of an ancient fusion event, combining two or more domains into a bigger unit
+ which has subsequently undergone a divergent evolutionary process similar to
+ what may have happened to a single "domain". A CD alignment model may 
+ also reflect the result of a deletion event, where a specific subgroup
+ lacks part of a (set of) domain(s), but where the part present is found to
+ be highly similar to a putative "parent", with some added evidence for
+ an actual deletion, like from the distribution of truncated copies in phylogenetic
+ lineages. Deletion events which affect different parts of a set of
+ duplicated domain architectures may be indistinguishable from actual
+ fission events, which means that we may want to represent the latter as
+ deletions after duplication and do not need a special case for fissions.
+-->
+<!ELEMENT Domain-parent (
+        Domain-parent_parent-type, 
+        Domain-parent_parentid, 
+        Domain-parent_seqannot?)>
+
+<!ELEMENT Domain-parent_parent-type (%INTEGER;)>
+
+<!--
+    classical	-  the classification of parent child relations
+-->
+<!ATTLIST Domain-parent_parent-type value (
+        classical |
+        fusion |
+        deletion |
+        permutation |
+        other
+        ) #IMPLIED >
+
+
+<!-- identify the section parent by accession -->
+<!ELEMENT Domain-parent_parentid (Cdd-id)>
+
+<!--
+ contains the sequence alignment linking
+ CD alignment models, should align the 
+ masters/representatives of each CD
+-->
+<!ELEMENT Domain-parent_seqannot (Seq-annot)>
+
+<!-- record sequence trees generated by a suitable algorithm. -->
+<!ELEMENT Sequence-tree (
+        Sequence-tree_cdAccession?, 
+        Sequence-tree_algorithm, 
+        Sequence-tree_isAnnotated?, 
+        Sequence-tree_root)>
+
+<!ELEMENT Sequence-tree_cdAccession (#PCDATA)>
+
+<!ELEMENT Sequence-tree_algorithm (Algorithm-type)>
+
+<!ELEMENT Sequence-tree_isAnnotated EMPTY>
+<!ATTLIST Sequence-tree_isAnnotated value ( true | false ) "false" >
+
+
+<!ELEMENT Sequence-tree_root (SeqTree-node)>
+
+
+<!ELEMENT SeqTree-node (
+        SeqTree-node_isAnnotated?, 
+        SeqTree-node_name?, 
+        SeqTree-node_distance?, 
+        SeqTree-node_children, 
+        SeqTree-node_annotation?)>
+
+<!ELEMENT SeqTree-node_isAnnotated EMPTY>
+<!ATTLIST SeqTree-node_isAnnotated value ( true | false ) "false" >
+
+
+<!ELEMENT SeqTree-node_name (#PCDATA)>
+
+<!ELEMENT SeqTree-node_distance (%REAL;)>
+
+<!ELEMENT SeqTree-node_children (
+        SeqTree-node_children_children | 
+        SeqTree-node_children_footprint)>
+
+<!ELEMENT SeqTree-node_children_children (SeqTree-node*)>
+
+<!ELEMENT SeqTree-node_children_footprint (
+        SeqTree-node_children_footprint_seqRange, 
+        SeqTree-node_children_footprint_rowId?)>
+
+<!ELEMENT SeqTree-node_children_footprint_seqRange (Seq-interval)>
+
+<!ELEMENT SeqTree-node_children_footprint_rowId (%INTEGER;)>
+
+<!ELEMENT SeqTree-node_annotation (Node-annotation)>
+
+
+<!ELEMENT Algorithm-type (
+        Algorithm-type_scoring-Scheme, 
+        Algorithm-type_clustering-Method, 
+        Algorithm-type_score-Matrix?, 
+        Algorithm-type_gapOpen?, 
+        Algorithm-type_gapExtend?, 
+        Algorithm-type_gapScaleFactor?, 
+        Algorithm-type_nTerminalExt?, 
+        Algorithm-type_cTerminalExt?, 
+        Algorithm-type_tree-scope?, 
+        Algorithm-type_coloring-scope?)>
+
+<!ELEMENT Algorithm-type_scoring-Scheme (%INTEGER;)>
+<!ATTLIST Algorithm-type_scoring-Scheme value (
+        unassigned |
+        percent-id |
+        kimura-corrected |
+        aligned-score |
+        aligned-score-ext |
+        aligned-score-filled |
+        blast-footprint |
+        blast-full |
+        hybrid-aligned-score |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Algorithm-type_clustering-Method (%INTEGER;)>
+<!ATTLIST Algorithm-type_clustering-Method value (
+        unassigned |
+        single-linkage |
+        neighbor-joining |
+        fast-minimum-evolution |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Algorithm-type_score-Matrix (%INTEGER;)>
+<!ATTLIST Algorithm-type_score-Matrix value (
+        unassigned |
+        blosum45 |
+        blosum62 |
+        blosum80 |
+        pam30 |
+        pam70 |
+        pam250 |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Algorithm-type_gapOpen (%INTEGER;)>
+
+<!ELEMENT Algorithm-type_gapExtend (%INTEGER;)>
+
+<!ELEMENT Algorithm-type_gapScaleFactor (%INTEGER;)>
+
+<!ELEMENT Algorithm-type_nTerminalExt (%INTEGER;)>
+
+<!ELEMENT Algorithm-type_cTerminalExt (%INTEGER;)>
+
+<!ELEMENT Algorithm-type_tree-scope (%INTEGER;)>
+<!ATTLIST Algorithm-type_tree-scope value (
+        allDescendants |
+        immediateChildrenOnly |
+        selfOnly |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Algorithm-type_coloring-scope (%INTEGER;)>
+<!ATTLIST Algorithm-type_coloring-scope value (
+        allDescendants |
+        immediateChildrenOnly |
+        other
+        ) #IMPLIED >
+
+
+
+<!ELEMENT Node-annotation (
+        Node-annotation_presentInChildCD?, 
+        Node-annotation_note?)>
+
+<!ELEMENT Node-annotation_presentInChildCD (#PCDATA)>
+
+<!ELEMENT Node-annotation_note (#PCDATA)>
+
+<!--
+ the Cdd is the basic ASN.1 object storing an annotated and curated set of
+ alignments (formulated as a set of pairwise master-slave alignments). 
+ The alignment data are contained in Seq-annots, and a special type of
+ object, the Update-align, contains additional alignment data from unfinished
+ editing sessions and update processes. The Biostruc-annot-set holds 
+ structure superposition information for multiple structure-derived rows in
+ the alignment.
+ Version numbers in Global-ids are meant to be updated every time the Cdd is
+ changed in a way that does not require Global-ids to be changed (sequences
+ added in update cycle, annotation changed, alignment errors fixed)
+-->
+<!ELEMENT Cdd (
+        Cdd_name, 
+        Cdd_id, 
+        Cdd_description?, 
+        Cdd_seqannot?, 
+        Cdd_features?, 
+        Cdd_sequences?, 
+        Cdd_profile-range?, 
+        Cdd_trunc-master?, 
+        Cdd_posfreq?, 
+        Cdd_scoremat?, 
+        Cdd_distance?, 
+        Cdd_parent?, 
+        Cdd_children?, 
+        Cdd_siblings?, 
+        Cdd_neighbors?, 
+        Cdd_pending?, 
+        Cdd_rejects?, 
+        Cdd_master3d?, 
+        Cdd_alignannot?, 
+        Cdd_style-dictionary?, 
+        Cdd_user-annotations?, 
+        Cdd_ancestors?, 
+        Cdd_scoreparams?, 
+        Cdd_seqtree?)>
+
+<!-- a short name (can be the accession..) -->
+<!ELEMENT Cdd_name (#PCDATA)>
+
+<!-- this CD's Ids -->
+<!ELEMENT Cdd_id (Cdd-id-set)>
+
+<!-- status, references, etc. -->
+<!ELEMENT Cdd_description (Cdd-descr-set)>
+
+<!-- contains the CD alignment -->
+<!ELEMENT Cdd_seqannot (Seq-annot*)>
+
+<!--
+ contains structure
+ alignment data
+ or "core" definitions
+-->
+<!ELEMENT Cdd_features (Biostruc-annot-set)>
+
+<!-- store as bioseq-set inside seq-entry -->
+<!ELEMENT Cdd_sequences (Seq-entry)>
+
+<!--
+ profile for this region only
+ also stores the Seq-id of the master
+-->
+<!ELEMENT Cdd_profile-range (Seq-interval)>
+
+<!--
+ holds the truncated master, which
+ may be something like a consensus,
+ uses the same sequence coordinate
+ frame as the profile-range
+-->
+<!ELEMENT Cdd_trunc-master (Bioseq)>
+
+<!-- relative residue frequencies -->
+<!ELEMENT Cdd_posfreq (Matrix)>
+
+<!-- Position dependent score matrix -->
+<!ELEMENT Cdd_scoremat (Matrix)>
+
+<!-- pairwise distances for all seqs. -->
+<!ELEMENT Cdd_distance (Triangle)>
+
+<!-- this CD is the result of a split -->
+<!ELEMENT Cdd_parent (Cdd-id)>
+
+<!-- this CD has been split, not used -->
+<!ELEMENT Cdd_children (Cdd-id-set)>
+
+<!-- related CDs (common hits), clusters -->
+<!ELEMENT Cdd_siblings (Cdd-id-set)>
+
+<!-- co-occurring CDs, not used -->
+<!ELEMENT Cdd_neighbors (Cdd-id-set)>
+
+<!--
+ contains alignments from
+ update or "lower panel"
+-->
+<!ELEMENT Cdd_pending (Update-align*)>
+
+<!--
+ SeqIds of rejected CD-
+ members, ignore in update
+-->
+<!ELEMENT Cdd_rejects (Reject-id*)>
+
+<!-- record if CD has a 3D representative -->
+<!ELEMENT Cdd_master3d (Seq-id*)>
+
+<!-- alignment annotation -->
+<!ELEMENT Cdd_alignannot (Align-annot-set)>
+
+<!-- record rendering styles -->
+<!ELEMENT Cdd_style-dictionary (Cn3d-style-dictionary)>
+
+<!-- user annotations in Cn3D -->
+<!ELEMENT Cdd_user-annotations (Cn3d-user-annotations)>
+
+<!-- list of parents -->
+<!ELEMENT Cdd_ancestors (Domain-parent*)>
+
+<!ELEMENT Cdd_scoreparams (PssmWithParameters)>
+<!-- record sequence trees generated by a suitable algorithm. -->
+<!ELEMENT Cdd_seqtree (Sequence-tree)>
+
+
+<!ELEMENT Cdd-set (Cdd*)>
+
+<!--
+ Cdd projects store a set of CDs, typically related to each other
+ relationships would be specified using the ancestors fields in the
+ individual CD objects. For use with CD-Tree, a program to visualize
+ curated CD hierarchies and evidence for hierarchical family structures.
+-->
+<!ELEMENT Cdd-Viewer-Rect (
+        Cdd-Viewer-Rect_top, 
+        Cdd-Viewer-Rect_left, 
+        Cdd-Viewer-Rect_width, 
+        Cdd-Viewer-Rect_height)>
+
+<!-- top coordinate -->
+<!ELEMENT Cdd-Viewer-Rect_top (%INTEGER;)>
+
+<!-- left  coordinate -->
+<!ELEMENT Cdd-Viewer-Rect_left (%INTEGER;)>
+
+<!-- width  -->
+<!ELEMENT Cdd-Viewer-Rect_width (%INTEGER;)>
+
+<!-- height -->
+<!ELEMENT Cdd-Viewer-Rect_height (%INTEGER;)>
+
+
+<!ELEMENT Cdd-Viewer (
+        Cdd-Viewer_ctrl, 
+        Cdd-Viewer_rect?, 
+        Cdd-Viewer_accessions)>
+<!-- viewer type -->
+<!ELEMENT Cdd-Viewer_ctrl (%INTEGER;)>
+<!ATTLIST Cdd-Viewer_ctrl value (
+        unassigned |
+        cd-info |
+        align-annot |
+        seq-list |
+        seq-tree |
+        merge-preview |
+        cross-hits |
+        notes |
+        tax-tree |
+        dart |
+        dart-selected-rows |
+        other
+        ) #IMPLIED >
+
+
+<!-- viewer rectangle -->
+<!ELEMENT Cdd-Viewer_rect (Cdd-Viewer-Rect)>
+
+<!-- list of accessions associated with a viewer -->
+<!ELEMENT Cdd-Viewer_accessions (Cdd-Viewer_accessions_E*)>
+
+
+<!ELEMENT Cdd-Viewer_accessions_E (#PCDATA)>
+
+
+<!ELEMENT Cdd-Script (
+        Cdd-Script_type?, 
+        Cdd-Script_name?, 
+        Cdd-Script_commands)>
+
+<!ELEMENT Cdd-Script_type (%INTEGER;)>
+<!ATTLIST Cdd-Script_type value (
+        unassigned |
+        user-recorded |
+        server-generated |
+        other
+        ) #IMPLIED >
+
+
+<!-- user assigned name/description -->
+<!ELEMENT Cdd-Script_name (#PCDATA)>
+
+<!-- actual script commands -->
+<!ELEMENT Cdd-Script_commands (#PCDATA)>
+
+<!-- cd colors are as:  0000FF for red, 00FF00 for green, FF0000 for blue -->
+<!ELEMENT Cdd-Project (
+        Cdd-Project_cds, 
+        Cdd-Project_cdcolor, 
+        Cdd-Project_viewers, 
+        Cdd-Project_log, 
+        Cdd-Project_scripts?, 
+        Cdd-Project_id?, 
+        Cdd-Project_rids?, 
+        Cdd-Project_create-date?, 
+        Cdd-Project_update-date?, 
+        Cdd-Project_project-id?)>
+
+<!-- cds -->
+<!ELEMENT Cdd-Project_cds (Cdd*)>
+
+<!-- colors   -->
+<!ELEMENT Cdd-Project_cdcolor (Cdd-Project_cdcolor_E*)>
+
+
+<!ELEMENT Cdd-Project_cdcolor_E (%INTEGER;)>
+
+<!-- Sequence viewers -->
+<!ELEMENT Cdd-Project_viewers (Cdd-Viewer*)>
+
+<!-- log -->
+<!ELEMENT Cdd-Project_log (#PCDATA)>
+
+<!-- command scripts -->
+<!ELEMENT Cdd-Project_scripts (Cdd-Script*)>
+
+<!-- to assign unique project id -->
+<!ELEMENT Cdd-Project_id (Cdd-id-set)>
+
+<!-- to store request IDs for batch CD-Searches -->
+<!ELEMENT Cdd-Project_rids (Cdd-Project_rids_E*)>
+
+
+<!ELEMENT Cdd-Project_rids_E (#PCDATA)>
+
+<!ELEMENT Cdd-Project_create-date (Date)>
+
+<!ELEMENT Cdd-Project_update-date (Date)>
+
+<!-- for temporary tracking in the database -->
+<!ELEMENT Cdd-Project_project-id (%INTEGER;)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Cn3d.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Cn3d.dtd
new file mode 100644
index 0000000..9558045
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Cn3d.dtd
@@ -0,0 +1,101 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "cn3d.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Cn3d.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % MMDB_module PUBLIC "-//NCBI//MMDB Module//EN" "MMDB.mod.dtd">
+%MMDB_module;
+
+<!ENTITY % MMDB_Chemical_graph_module PUBLIC "-//NCBI//MMDB Chemical graph Module//EN" "MMDB_Chemical_graph.mod.dtd">
+%MMDB_Chemical_graph_module;
+
+<!ENTITY % MMDB_Features_module PUBLIC "-//NCBI//MMDB Features Module//EN" "MMDB_Features.mod.dtd">
+%MMDB_Features_module;
+
+<!ENTITY % MMDB_Structural_model_module PUBLIC "-//NCBI//MMDB Structural model Module//EN" "MMDB_Structural_model.mod.dtd">
+%MMDB_Structural_model_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Cn3d_module PUBLIC "-//NCBI//NCBI Cn3d Module//EN" "NCBI_Cn3d.mod.dtd">
+%NCBI_Cn3d_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Cn3d.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Cn3d.mod.dtd
new file mode 100644
index 0000000..16a815f
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Cn3d.mod.dtd
@@ -0,0 +1,534 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "cn3d.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Cn3d"
+================================================= -->
+
+<!--
+$Revision: 1.15 $
+**********************************************************************
+
+  Definitions for Cn3D-specific data (rendering settings,
+    user annotations, etc.)
+
+  by Paul Thiessen
+
+  National Center for Biotechnology Information
+  National Institutes of Health
+  Bethesda, MD 20894 USA
+
+ asntool -m cn3d.asn -w 100 -o cn3d.h
+ asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \
+   -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn
+**********************************************************************
+ Cn3D-specific information
+-->
+
+<!-- Elements used by other modules:
+          Cn3d-style-dictionary,
+          Cn3d-user-annotations -->
+
+<!-- Elements referenced from other modules:
+          Biostruc-id FROM MMDB,
+          Molecule-id,
+          Residue-id FROM MMDB-Chemical-graph -->
+<!-- ============================================ -->
+
+<!--
+ values of enumerations must match those in cn3d/style_manager.hpp!
+ for different types of backbones
+-->
+<!ELEMENT Cn3d-backbone-type %ENUM;>
+<!ATTLIST Cn3d-backbone-type value (
+        off |
+        trace |
+        partial |
+        complete
+        ) #REQUIRED >
+
+
+<!--
+ atom/bond/object rendering styles
+ for atoms and bonds
+-->
+<!ELEMENT Cn3d-drawing-style %ENUM;>
+
+<!--
+    with-arrows	-  for 3d-objects
+-->
+<!ATTLIST Cn3d-drawing-style value (
+        wire |
+        tubes |
+        ball-and-stick |
+        space-fill |
+        wire-worm |
+        tube-worm |
+        with-arrows |
+        without-arrows
+        ) #REQUIRED >
+
+
+<!--
+ available color schemes (not all
+ necessarily applicable to all objects)
+-->
+<!ELEMENT Cn3d-color-scheme %ENUM;>
+
+<!--
+    aligned	-  different alignment conservation coloring (currently only for proteins)
+    temperature	-  other schemes
+-->
+<!ATTLIST Cn3d-color-scheme value (
+        element |
+        object |
+        molecule |
+        domain |
+        residue |
+        secondary-structure |
+        user-select |
+        aligned |
+        identity |
+        variety |
+        weighted-variety |
+        information-content |
+        fit |
+        block-fit |
+        block-z-fit |
+        block-row-fit |
+        temperature |
+        hydrophobicity |
+        charge |
+        rainbow
+        ) #REQUIRED >
+
+
+<!--
+ RGB triplet, interpreted (after division by the scale-factor) as floating
+ point values which should range from [0..1]. The default scale-factor is
+ 255, so that one can conveniently set integer byte values [0..255] for
+ colors with the scale-factor already set appropriately to map to [0..1].
+    An alpha value is allowed, but is currently ignored by Cn3D.
+-->
+<!ELEMENT Cn3d-color (
+        Cn3d-color_scale-factor?, 
+        Cn3d-color_red, 
+        Cn3d-color_green, 
+        Cn3d-color_blue, 
+        Cn3d-color_alpha?)>
+
+<!ELEMENT Cn3d-color_scale-factor (%INTEGER;)>
+
+<!ELEMENT Cn3d-color_red (%INTEGER;)>
+
+<!ELEMENT Cn3d-color_green (%INTEGER;)>
+
+<!ELEMENT Cn3d-color_blue (%INTEGER;)>
+
+<!ELEMENT Cn3d-color_alpha (%INTEGER;)>
+
+<!-- style blob for backbones only -->
+<!ELEMENT Cn3d-backbone-style (
+        Cn3d-backbone-style_type, 
+        Cn3d-backbone-style_style, 
+        Cn3d-backbone-style_color-scheme, 
+        Cn3d-backbone-style_user-color)>
+<!--
+ values of enumerations must match those in cn3d/style_manager.hpp!
+ for different types of backbones
+-->
+<!ELEMENT Cn3d-backbone-style_type (Cn3d-backbone-type)>
+<!--
+ atom/bond/object rendering styles
+ for atoms and bonds
+-->
+<!ELEMENT Cn3d-backbone-style_style (Cn3d-drawing-style)>
+<!--
+ available color schemes (not all
+ necessarily applicable to all objects)
+-->
+<!ELEMENT Cn3d-backbone-style_color-scheme (Cn3d-color-scheme)>
+<!--
+ RGB triplet, interpreted (after division by the scale-factor) as floating
+ point values which should range from [0..1]. The default scale-factor is
+ 255, so that one can conveniently set integer byte values [0..255] for
+ colors with the scale-factor already set appropriately to map to [0..1].
+    An alpha value is allowed, but is currently ignored by Cn3D.
+-->
+<!ELEMENT Cn3d-backbone-style_user-color (Cn3d-color)>
+
+<!-- style blob for other objects -->
+<!ELEMENT Cn3d-general-style (
+        Cn3d-general-style_is-on, 
+        Cn3d-general-style_style, 
+        Cn3d-general-style_color-scheme, 
+        Cn3d-general-style_user-color)>
+
+<!ELEMENT Cn3d-general-style_is-on EMPTY>
+<!ATTLIST Cn3d-general-style_is-on value ( true | false ) #REQUIRED >
+
+<!--
+ atom/bond/object rendering styles
+ for atoms and bonds
+-->
+<!ELEMENT Cn3d-general-style_style (Cn3d-drawing-style)>
+<!--
+ available color schemes (not all
+ necessarily applicable to all objects)
+-->
+<!ELEMENT Cn3d-general-style_color-scheme (Cn3d-color-scheme)>
+<!--
+ RGB triplet, interpreted (after division by the scale-factor) as floating
+ point values which should range from [0..1]. The default scale-factor is
+ 255, so that one can conveniently set integer byte values [0..255] for
+ colors with the scale-factor already set appropriately to map to [0..1].
+    An alpha value is allowed, but is currently ignored by Cn3D.
+-->
+<!ELEMENT Cn3d-general-style_user-color (Cn3d-color)>
+
+<!-- style blob for backbone labels -->
+<!ELEMENT Cn3d-backbone-label-style (
+        Cn3d-backbone-label-style_spacing, 
+        Cn3d-backbone-label-style_type, 
+        Cn3d-backbone-label-style_number, 
+        Cn3d-backbone-label-style_termini, 
+        Cn3d-backbone-label-style_white)>
+
+<!-- zero means none -->
+<!ELEMENT Cn3d-backbone-label-style_spacing (%INTEGER;)>
+
+<!ELEMENT Cn3d-backbone-label-style_type %ENUM;>
+<!ATTLIST Cn3d-backbone-label-style_type value (
+        one-letter |
+        three-letter
+        ) #REQUIRED >
+
+
+<!ELEMENT Cn3d-backbone-label-style_number %ENUM;>
+
+<!--
+    sequential	-  from 1, by residues present, to match sequence
+    pdb	-  use number assigned by PDB
+-->
+<!ATTLIST Cn3d-backbone-label-style_number value (
+        none |
+        sequential |
+        pdb
+        ) #REQUIRED >
+
+
+<!ELEMENT Cn3d-backbone-label-style_termini EMPTY>
+<!ATTLIST Cn3d-backbone-label-style_termini value ( true | false ) #REQUIRED >
+
+
+<!-- all white, or (if false) color of alpha carbon -->
+<!ELEMENT Cn3d-backbone-label-style_white EMPTY>
+<!ATTLIST Cn3d-backbone-label-style_white value ( true | false ) #REQUIRED >
+
+
+<!-- rendering settings for Cn3D (mirrors StyleSettings class) -->
+<!ELEMENT Cn3d-style-settings (
+        Cn3d-style-settings_name?, 
+        Cn3d-style-settings_protein-backbone, 
+        Cn3d-style-settings_nucleotide-backbone, 
+        Cn3d-style-settings_protein-sidechains, 
+        Cn3d-style-settings_nucleotide-sidechains, 
+        Cn3d-style-settings_heterogens, 
+        Cn3d-style-settings_solvents, 
+        Cn3d-style-settings_connections, 
+        Cn3d-style-settings_helix-objects, 
+        Cn3d-style-settings_strand-objects, 
+        Cn3d-style-settings_virtual-disulfides-on, 
+        Cn3d-style-settings_virtual-disulfide-color, 
+        Cn3d-style-settings_hydrogens-on, 
+        Cn3d-style-settings_background-color, 
+        Cn3d-style-settings_scale-factor, 
+        Cn3d-style-settings_space-fill-proportion, 
+        Cn3d-style-settings_ball-radius, 
+        Cn3d-style-settings_stick-radius, 
+        Cn3d-style-settings_tube-radius, 
+        Cn3d-style-settings_tube-worm-radius, 
+        Cn3d-style-settings_helix-radius, 
+        Cn3d-style-settings_strand-width, 
+        Cn3d-style-settings_strand-thickness, 
+        Cn3d-style-settings_protein-labels?, 
+        Cn3d-style-settings_nucleotide-labels?, 
+        Cn3d-style-settings_ion-labels?)>
+
+<!-- a name (for favorites) -->
+<!ELEMENT Cn3d-style-settings_name (#PCDATA)>
+
+<!-- backbone styles -->
+<!ELEMENT Cn3d-style-settings_protein-backbone (Cn3d-backbone-style)>
+<!-- style blob for backbones only -->
+<!ELEMENT Cn3d-style-settings_nucleotide-backbone (Cn3d-backbone-style)>
+
+<!-- styles for other stuff -->
+<!ELEMENT Cn3d-style-settings_protein-sidechains (Cn3d-general-style)>
+<!-- style blob for other objects -->
+<!ELEMENT Cn3d-style-settings_nucleotide-sidechains (Cn3d-general-style)>
+<!-- style blob for other objects -->
+<!ELEMENT Cn3d-style-settings_heterogens (Cn3d-general-style)>
+<!-- style blob for other objects -->
+<!ELEMENT Cn3d-style-settings_solvents (Cn3d-general-style)>
+<!-- style blob for other objects -->
+<!ELEMENT Cn3d-style-settings_connections (Cn3d-general-style)>
+<!-- style blob for other objects -->
+<!ELEMENT Cn3d-style-settings_helix-objects (Cn3d-general-style)>
+<!-- style blob for other objects -->
+<!ELEMENT Cn3d-style-settings_strand-objects (Cn3d-general-style)>
+
+<!-- virtual disulfides -->
+<!ELEMENT Cn3d-style-settings_virtual-disulfides-on EMPTY>
+<!ATTLIST Cn3d-style-settings_virtual-disulfides-on value ( true | false ) #REQUIRED >
+
+<!--
+ RGB triplet, interpreted (after division by the scale-factor) as floating
+ point values which should range from [0..1]. The default scale-factor is
+ 255, so that one can conveniently set integer byte values [0..255] for
+ colors with the scale-factor already set appropriately to map to [0..1].
+    An alpha value is allowed, but is currently ignored by Cn3D.
+-->
+<!ELEMENT Cn3d-style-settings_virtual-disulfide-color (Cn3d-color)>
+
+<!-- hydrogens -->
+<!ELEMENT Cn3d-style-settings_hydrogens-on EMPTY>
+<!ATTLIST Cn3d-style-settings_hydrogens-on value ( true | false ) #REQUIRED >
+
+
+<!--
+ background
+ floating point parameters - scale-factor applies to all the following:
+-->
+<!ELEMENT Cn3d-style-settings_background-color (Cn3d-color)>
+
+<!ELEMENT Cn3d-style-settings_scale-factor (%INTEGER;)>
+
+<!ELEMENT Cn3d-style-settings_space-fill-proportion (%INTEGER;)>
+
+<!ELEMENT Cn3d-style-settings_ball-radius (%INTEGER;)>
+
+<!ELEMENT Cn3d-style-settings_stick-radius (%INTEGER;)>
+
+<!ELEMENT Cn3d-style-settings_tube-radius (%INTEGER;)>
+
+<!ELEMENT Cn3d-style-settings_tube-worm-radius (%INTEGER;)>
+
+<!ELEMENT Cn3d-style-settings_helix-radius (%INTEGER;)>
+
+<!ELEMENT Cn3d-style-settings_strand-width (%INTEGER;)>
+
+<!ELEMENT Cn3d-style-settings_strand-thickness (%INTEGER;)>
+
+<!-- backbone labels (no labels if not present) -->
+<!ELEMENT Cn3d-style-settings_protein-labels (Cn3d-backbone-label-style)>
+<!-- style blob for backbone labels -->
+<!ELEMENT Cn3d-style-settings_nucleotide-labels (Cn3d-backbone-label-style)>
+
+<!-- ion labels -->
+<!ELEMENT Cn3d-style-settings_ion-labels EMPTY>
+<!ATTLIST Cn3d-style-settings_ion-labels value ( true | false ) #REQUIRED >
+
+
+
+<!ELEMENT Cn3d-style-settings-set (Cn3d-style-settings*)>
+
+
+<!ELEMENT Cn3d-style-table-id (%INTEGER;)>
+
+
+<!ELEMENT Cn3d-style-table-item (
+        Cn3d-style-table-item_id, 
+        Cn3d-style-table-item_style)>
+
+<!ELEMENT Cn3d-style-table-item_id (Cn3d-style-table-id)>
+<!-- rendering settings for Cn3D (mirrors StyleSettings class) -->
+<!ELEMENT Cn3d-style-table-item_style (Cn3d-style-settings)>
+
+<!-- the global settings, and a lookup table of styles for user annotations. -->
+<!ELEMENT Cn3d-style-dictionary (
+        Cn3d-style-dictionary_global-style, 
+        Cn3d-style-dictionary_style-table?)>
+<!-- rendering settings for Cn3D (mirrors StyleSettings class) -->
+<!ELEMENT Cn3d-style-dictionary_global-style (Cn3d-style-settings)>
+
+<!ELEMENT Cn3d-style-dictionary_style-table (Cn3d-style-table-item*)>
+
+<!--
+ a range of residues in a chain, identified by MMDB residue-id
+ (e.g., numbered from 1)
+-->
+<!ELEMENT Cn3d-residue-range (
+        Cn3d-residue-range_from, 
+        Cn3d-residue-range_to)>
+
+<!ELEMENT Cn3d-residue-range_from (Residue-id)>
+
+<!ELEMENT Cn3d-residue-range_to (Residue-id)>
+
+<!-- set of locations on a particular chain -->
+<!ELEMENT Cn3d-molecule-location (
+        Cn3d-molecule-location_molecule-id, 
+        Cn3d-molecule-location_residues?)>
+
+<!--
+ MMDB molecule id
+ which residues; whole molecule implied if absent
+-->
+<!ELEMENT Cn3d-molecule-location_molecule-id (Molecule-id)>
+
+<!ELEMENT Cn3d-molecule-location_residues (Cn3d-residue-range*)>
+
+<!--
+ set of locations on a particular structure object (e.g., a PDB/MMDB
+ structure), which may include multiple ranges of residues each on
+ multiple chains.
+-->
+<!ELEMENT Cn3d-object-location (
+        Cn3d-object-location_structure-id, 
+        Cn3d-object-location_residues)>
+
+<!ELEMENT Cn3d-object-location_structure-id (Biostruc-id)>
+
+<!ELEMENT Cn3d-object-location_residues (Cn3d-molecule-location*)>
+
+<!-- information for an individual user annotation -->
+<!ELEMENT Cn3d-user-annotation (
+        Cn3d-user-annotation_name, 
+        Cn3d-user-annotation_description?, 
+        Cn3d-user-annotation_style-id, 
+        Cn3d-user-annotation_residues, 
+        Cn3d-user-annotation_is-on)>
+
+<!-- a (short) name for this annotation -->
+<!ELEMENT Cn3d-user-annotation_name (#PCDATA)>
+
+<!-- an optional longer description -->
+<!ELEMENT Cn3d-user-annotation_description (#PCDATA)>
+
+<!-- how to draw this annotation -->
+<!ELEMENT Cn3d-user-annotation_style-id (Cn3d-style-table-id)>
+
+<!-- which residues to cover -->
+<!ELEMENT Cn3d-user-annotation_residues (Cn3d-object-location*)>
+
+<!-- whether this annotation is to be turned on in Cn3D -->
+<!ELEMENT Cn3d-user-annotation_is-on EMPTY>
+<!ATTLIST Cn3d-user-annotation_is-on value ( true | false ) #REQUIRED >
+
+
+<!-- a GL-ordered transformation matrix -->
+<!ELEMENT Cn3d-GL-matrix (
+        Cn3d-GL-matrix_m0, 
+        Cn3d-GL-matrix_m1, 
+        Cn3d-GL-matrix_m2, 
+        Cn3d-GL-matrix_m3, 
+        Cn3d-GL-matrix_m4, 
+        Cn3d-GL-matrix_m5, 
+        Cn3d-GL-matrix_m6, 
+        Cn3d-GL-matrix_m7, 
+        Cn3d-GL-matrix_m8, 
+        Cn3d-GL-matrix_m9, 
+        Cn3d-GL-matrix_m10, 
+        Cn3d-GL-matrix_m11, 
+        Cn3d-GL-matrix_m12, 
+        Cn3d-GL-matrix_m13, 
+        Cn3d-GL-matrix_m14, 
+        Cn3d-GL-matrix_m15)>
+
+<!ELEMENT Cn3d-GL-matrix_m0 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m1 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m2 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m3 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m4 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m5 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m6 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m7 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m8 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m9 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m10 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m11 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m12 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m13 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m14 (%REAL;)>
+
+<!ELEMENT Cn3d-GL-matrix_m15 (%REAL;)>
+
+<!-- a floating point 3d vector -->
+<!ELEMENT Cn3d-vector (
+        Cn3d-vector_x, 
+        Cn3d-vector_y, 
+        Cn3d-vector_z)>
+
+<!ELEMENT Cn3d-vector_x (%REAL;)>
+
+<!ELEMENT Cn3d-vector_y (%REAL;)>
+
+<!ELEMENT Cn3d-vector_z (%REAL;)>
+
+<!-- parameters used to set up the camera in Cn3D -->
+<!ELEMENT Cn3d-view-settings (
+        Cn3d-view-settings_camera-distance, 
+        Cn3d-view-settings_camera-angle-rad, 
+        Cn3d-view-settings_camera-look-at-X, 
+        Cn3d-view-settings_camera-look-at-Y, 
+        Cn3d-view-settings_camera-clip-near, 
+        Cn3d-view-settings_camera-clip-far, 
+        Cn3d-view-settings_matrix, 
+        Cn3d-view-settings_rotation-center)>
+
+<!-- camera on +Z axis this distance from origin -->
+<!ELEMENT Cn3d-view-settings_camera-distance (%REAL;)>
+
+<!-- camera angle -->
+<!ELEMENT Cn3d-view-settings_camera-angle-rad (%REAL;)>
+
+<!-- X,Y of point in Z=0 plane camera points at -->
+<!ELEMENT Cn3d-view-settings_camera-look-at-X (%REAL;)>
+
+<!ELEMENT Cn3d-view-settings_camera-look-at-Y (%REAL;)>
+
+<!-- distance of clipping planes from camera -->
+<!ELEMENT Cn3d-view-settings_camera-clip-near (%REAL;)>
+
+<!ELEMENT Cn3d-view-settings_camera-clip-far (%REAL;)>
+
+<!-- transformation of objects in the scene -->
+<!ELEMENT Cn3d-view-settings_matrix (Cn3d-GL-matrix)>
+
+<!-- center of rotation of whole scene -->
+<!ELEMENT Cn3d-view-settings_rotation-center (Cn3d-vector)>
+
+<!--
+ The list of annotations for a given CDD/mime. If residue regions overlap
+ between annotations that are turned on, the last annotation in this list
+ that contains these residues will be used as the display style for these
+ residues.
+   Also contains the current viewpoint, so that user's camera angle
+ can be stored and reproduced, for illustrations, on-line figures, etc.
+-->
+<!ELEMENT Cn3d-user-annotations (
+        Cn3d-user-annotations_annotations?, 
+        Cn3d-user-annotations_view?)>
+
+<!ELEMENT Cn3d-user-annotations_annotations (Cn3d-user-annotation*)>
+<!-- parameters used to set up the camera in Cn3D -->
+<!ELEMENT Cn3d-user-annotations_view (Cn3d-view-settings)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Entity.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Entity.mod.dtd
new file mode 100644
index 0000000..3919c3e
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Entity.mod.dtd
@@ -0,0 +1,13 @@
+<!-- ======================== -->
+<!-- NCBI DTD                 -->
+<!-- NCBI ASN.1 mapped to XML -->
+<!-- ======================== -->
+ 
+<!-- Entities used to give specificity to #PCDATA -->
+<!ENTITY % INTEGER '#PCDATA'>
+<!ENTITY % ENUM 'EMPTY'>
+<!ENTITY % BOOLEAN 'EMPTY'>
+<!ENTITY % NULL 'EMPTY'>
+<!ENTITY % REAL '#PCDATA'>
+<!ENTITY % OCTETS '#PCDATA'>
+<!-- ============================================ -->
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Entrez2.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Entrez2.dtd
new file mode 100644
index 0000000..a08a907
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Entrez2.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "entrez2.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_Entrez2.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Entrez2_module PUBLIC "-//NCBI//NCBI Entrez2 Module//EN" "NCBI_Entrez2.mod.dtd">
+%NCBI_Entrez2_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Entrez2.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Entrez2.mod.dtd
new file mode 100644
index 0000000..5eb72c9
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Entrez2.mod.dtd
@@ -0,0 +1,747 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "entrez2.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Entrez2"
+================================================= -->
+
+<!--
+$Revision: 1.12 $********************************************
+
+  entrez2.asn
+   Version 1
+
+   API to Entrez Engine (1999)
+   Retrieval of sequence done through ID1 module
+     Also, SeqId queries
+   Retrieval of PubMed records through PubMed module
+   Retrieval of Structures through PubStruct module
+   Retrieval of Genomes through Genomes module
+
+***************************************************************
+-->
+
+<!--
+**************************************
+  Entrez2 common elements
+**************************************
+ a date/time stamp
+-->
+<!ELEMENT Entrez2-dt (%INTEGER;)>
+
+<!-- database name -->
+<!ELEMENT Entrez2-db-id (#PCDATA)>
+
+<!-- field name -->
+<!ELEMENT Entrez2-field-id (#PCDATA)>
+
+<!-- link name -->
+<!ELEMENT Entrez2-link-id (#PCDATA)>
+
+<!-- list of record UIDs -->
+<!ELEMENT Entrez2-id-list (
+        Entrez2-id-list_db, 
+        Entrez2-id-list_num, 
+        Entrez2-id-list_uids?)>
+
+<!-- the database -->
+<!ELEMENT Entrez2-id-list_db (Entrez2-db-id)>
+
+<!-- number of uids -->
+<!ELEMENT Entrez2-id-list_num (%INTEGER;)>
+
+<!-- coded uids -->
+<!ELEMENT Entrez2-id-list_uids (%OCTETS;)>
+
+<!--
+****************************************
+ The structured form of the boolean is the same in a request or
+    return so that it easy to modify a query. This means some
+    fields are only considered in a return value, like counts
+    by term. They are ignored in a request.
+ The structured boolean supports specific boolean components,
+    an unparsed string in query syntax, and UID lists as
+    elements of a boolean. This makes it possible to submit
+    a single string, a fully structured query, or a mixture.
+    The UID list feature means one can also perform refinements
+    on UID lists from links, neighbors, or other operations.
+    UID list query now returns a history key for subsequent use.
+*****************************************
+-->
+<!ELEMENT Entrez2-boolean-exp (
+        Entrez2-boolean-exp_db, 
+        Entrez2-boolean-exp_exp, 
+        Entrez2-boolean-exp_limits?)>
+
+<!-- database for this query -->
+<!ELEMENT Entrez2-boolean-exp_db (Entrez2-db-id)>
+
+<!-- the Boolean -->
+<!ELEMENT Entrez2-boolean-exp_exp (Entrez2-boolean-element*)>
+
+<!-- date bounds -->
+<!ELEMENT Entrez2-boolean-exp_limits (Entrez2-limits)>
+
+
+<!ELEMENT Entrez2-boolean-element (
+        Entrez2-boolean-element_str | 
+        Entrez2-boolean-element_op | 
+        Entrez2-boolean-element_term | 
+        Entrez2-boolean-element_ids | 
+        Entrez2-boolean-element_key)>
+
+<!-- unparsed query string -->
+<!ELEMENT Entrez2-boolean-element_str (#PCDATA)>
+
+<!-- logical operator -->
+<!ELEMENT Entrez2-boolean-element_op (Entrez2-operator)>
+
+<!-- fielded term -->
+<!ELEMENT Entrez2-boolean-element_term (Entrez2-boolean-term)>
+
+<!-- list of UIDs - returns history key in reply -->
+<!ELEMENT Entrez2-boolean-element_ids (Entrez2-id-list)>
+
+<!-- history key for uploaded UID list or other query -->
+<!ELEMENT Entrez2-boolean-element_key (#PCDATA)>
+
+<!--
+*****************************************
+ the term is both sent and received as parts of
+   queries and replies. The attributes can be filled in
+   by either, but may be ignored by one or the other. Flags are
+   shown if a real value is only of use in the query (Q), only
+   in the reply (R), or used in both (B)
+ do-not-explode and do-not-translate are only active set by
+   by the query. However, they retain those settings in the
+   return value so they can be resent with a new query
+******************************************
+-->
+<!ELEMENT Entrez2-boolean-term (
+        Entrez2-boolean-term_field, 
+        Entrez2-boolean-term_term, 
+        Entrez2-boolean-term_term-count?, 
+        Entrez2-boolean-term_do-not-explode?, 
+        Entrez2-boolean-term_do-not-translate?)>
+
+<!-- B -->
+<!ELEMENT Entrez2-boolean-term_field (Entrez2-field-id)>
+
+<!-- B -->
+<!ELEMENT Entrez2-boolean-term_term (#PCDATA)>
+
+<!-- R count of records with term -->
+<!ELEMENT Entrez2-boolean-term_term-count (%INTEGER;)>
+
+<!-- Q do not explode term -->
+<!ELEMENT Entrez2-boolean-term_do-not-explode EMPTY>
+<!ATTLIST Entrez2-boolean-term_do-not-explode value ( true | false ) "false" >
+
+
+<!-- Q do not use synonyms -->
+<!ELEMENT Entrez2-boolean-term_do-not-translate EMPTY>
+<!ATTLIST Entrez2-boolean-term_do-not-translate value ( true | false ) "false" >
+
+
+
+<!ELEMENT Entrez2-operator (%INTEGER;)>
+<!ATTLIST Entrez2-operator value (
+        and |
+        or |
+        butnot |
+        range |
+        left-paren |
+        right-paren
+        ) #IMPLIED >
+
+
+<!--
+***************************************
+  Entrez2 Request types
+***************************************
+****************************************
+ The basic request wrapper leaves space for a version which
+   allow the server to support older clients
+ The tool parameter allows us to log the client types for
+   debugging and tuning
+ The cookie is a session ID returned by the first Entrez2-reply
+****************************************
+ a standard request
+-->
+<!ELEMENT Entrez2-request (
+        Entrez2-request_request, 
+        Entrez2-request_version, 
+        Entrez2-request_tool?, 
+        Entrez2-request_cookie?, 
+        Entrez2-request_use-history?)>
+
+<!-- the actual request -->
+<!ELEMENT Entrez2-request_request (E2Request)>
+
+<!-- ASN1 spec version -->
+<!ELEMENT Entrez2-request_version (%INTEGER;)>
+
+<!-- tool making request -->
+<!ELEMENT Entrez2-request_tool (#PCDATA)>
+
+<!-- history session cookie -->
+<!ELEMENT Entrez2-request_cookie (#PCDATA)>
+
+<!-- request should use history -->
+<!ELEMENT Entrez2-request_use-history EMPTY>
+<!ATTLIST Entrez2-request_use-history value ( true | false ) "false" >
+
+
+<!-- request types -->
+<!ELEMENT E2Request (
+        E2Request_get-info | 
+        E2Request_eval-boolean | 
+        E2Request_get-docsum | 
+        E2Request_get-term-pos | 
+        E2Request_get-term-list | 
+        E2Request_get-term-hierarchy | 
+        E2Request_get-links | 
+        E2Request_get-linked | 
+        E2Request_get-link-counts)>
+
+<!-- ask for info block -->
+<!ELEMENT E2Request_get-info EMPTY>
+
+<!-- Boolean lookup -->
+<!ELEMENT E2Request_eval-boolean (Entrez2-eval-boolean)>
+
+<!-- get the DocSums -->
+<!ELEMENT E2Request_get-docsum (Entrez2-id-list)>
+
+<!-- get position in term list -->
+<!ELEMENT E2Request_get-term-pos (Entrez2-term-query)>
+
+<!-- get Term list by position -->
+<!ELEMENT E2Request_get-term-list (Entrez2-term-pos)>
+
+<!-- get a hierarchy from a term -->
+<!ELEMENT E2Request_get-term-hierarchy (Entrez2-hier-query)>
+
+<!-- get specific links from a UID list -->
+<!ELEMENT E2Request_get-links (Entrez2-get-links)>
+
+<!-- get subset of UID list which has links -->
+<!ELEMENT E2Request_get-linked (Entrez2-get-links)>
+
+<!-- get all links from one UID -->
+<!ELEMENT E2Request_get-link-counts (Entrez2-id)>
+
+<!--
+****************************************
+ When evaluating a boolean query the counts of hits is always
+    returned.
+ In addition, you can request the UIDs of the hits or the
+    the parsed query in structured form (with counts by term),
+    or both.
+****************************************
+ evaluate Boolean query
+-->
+<!ELEMENT Entrez2-eval-boolean (
+        Entrez2-eval-boolean_return-UIDs?, 
+        Entrez2-eval-boolean_return-parse?, 
+        Entrez2-eval-boolean_query)>
+
+<!-- return UID list? -->
+<!ELEMENT Entrez2-eval-boolean_return-UIDs EMPTY>
+<!ATTLIST Entrez2-eval-boolean_return-UIDs value ( true | false ) "false" >
+
+
+<!-- return parsed query? -->
+<!ELEMENT Entrez2-eval-boolean_return-parse EMPTY>
+<!ATTLIST Entrez2-eval-boolean_return-parse value ( true | false ) "false" >
+
+
+<!-- the actual query -->
+<!ELEMENT Entrez2-eval-boolean_query (Entrez2-boolean-exp)>
+
+
+<!ELEMENT Entrez2-dt-filter (
+        Entrez2-dt-filter_begin-date, 
+        Entrez2-dt-filter_end-date, 
+        Entrez2-dt-filter_type-date)>
+<!--
+**************************************
+  Entrez2 common elements
+**************************************
+ a date/time stamp
+-->
+<!ELEMENT Entrez2-dt-filter_begin-date (Entrez2-dt)>
+<!--
+**************************************
+  Entrez2 common elements
+**************************************
+ a date/time stamp
+-->
+<!ELEMENT Entrez2-dt-filter_end-date (Entrez2-dt)>
+<!-- field name -->
+<!ELEMENT Entrez2-dt-filter_type-date (Entrez2-field-id)>
+
+<!-- date limits -->
+<!ELEMENT Entrez2-limits (
+        Entrez2-limits_filter-date?, 
+        Entrez2-limits_max-UIDs?, 
+        Entrez2-limits_offset-UIDs?)>
+
+<!ELEMENT Entrez2-limits_filter-date (Entrez2-dt-filter)>
+
+<!-- max UIDs to return in list -->
+<!ELEMENT Entrez2-limits_max-UIDs (%INTEGER;)>
+
+<!-- start partway into UID list -->
+<!ELEMENT Entrez2-limits_offset-UIDs (%INTEGER;)>
+
+<!-- a single UID -->
+<!ELEMENT Entrez2-id (
+        Entrez2-id_db, 
+        Entrez2-id_uid)>
+<!-- database name -->
+<!ELEMENT Entrez2-id_db (Entrez2-db-id)>
+
+<!ELEMENT Entrez2-id_uid (%INTEGER;)>
+
+
+<!ELEMENT Entrez2-term-query (
+        Entrez2-term-query_db, 
+        Entrez2-term-query_field, 
+        Entrez2-term-query_term)>
+<!-- database name -->
+<!ELEMENT Entrez2-term-query_db (Entrez2-db-id)>
+<!-- field name -->
+<!ELEMENT Entrez2-term-query_field (Entrez2-field-id)>
+
+<!ELEMENT Entrez2-term-query_term (#PCDATA)>
+
+
+<!ELEMENT Entrez2-hier-query (
+        Entrez2-hier-query_db, 
+        Entrez2-hier-query_field, 
+        Entrez2-hier-query_term?, 
+        Entrez2-hier-query_txid?)>
+<!-- database name -->
+<!ELEMENT Entrez2-hier-query_db (Entrez2-db-id)>
+<!-- field name -->
+<!ELEMENT Entrez2-hier-query_field (Entrez2-field-id)>
+
+<!-- query with either term -->
+<!ELEMENT Entrez2-hier-query_term (#PCDATA)>
+
+<!-- or Taxonomy ID -->
+<!ELEMENT Entrez2-hier-query_txid (%INTEGER;)>
+
+<!-- request portions of term list -->
+<!ELEMENT Entrez2-term-pos (
+        Entrez2-term-pos_db, 
+        Entrez2-term-pos_field, 
+        Entrez2-term-pos_first-term-pos, 
+        Entrez2-term-pos_number-of-terms?)>
+<!-- database name -->
+<!ELEMENT Entrez2-term-pos_db (Entrez2-db-id)>
+<!-- field name -->
+<!ELEMENT Entrez2-term-pos_field (Entrez2-field-id)>
+
+<!ELEMENT Entrez2-term-pos_first-term-pos (%INTEGER;)>
+
+<!-- optional for hierarchy only -->
+<!ELEMENT Entrez2-term-pos_number-of-terms (%INTEGER;)>
+
+<!-- request links of one type -->
+<!ELEMENT Entrez2-get-links (
+        Entrez2-get-links_uids, 
+        Entrez2-get-links_linktype, 
+        Entrez2-get-links_max-UIDS?, 
+        Entrez2-get-links_count-only?, 
+        Entrez2-get-links_parents-persist?)>
+
+<!-- docs to link from -->
+<!ELEMENT Entrez2-get-links_uids (Entrez2-id-list)>
+
+<!-- type of link -->
+<!ELEMENT Entrez2-get-links_linktype (Entrez2-link-id)>
+
+<!-- maximum number of links to return -->
+<!ELEMENT Entrez2-get-links_max-UIDS (%INTEGER;)>
+
+<!-- return only the counts -->
+<!ELEMENT Entrez2-get-links_count-only EMPTY>
+<!ATTLIST Entrez2-get-links_count-only value ( true | false ) #REQUIRED >
+
+
+<!-- allow original uids in list -->
+<!ELEMENT Entrez2-get-links_parents-persist EMPTY>
+<!ATTLIST Entrez2-get-links_parents-persist value ( true | false ) #REQUIRED >
+
+
+<!--
+**********************************************************
+ Replies from the Entrez server
+  all replies contain the date/time stamp when they were executed
+  to do reqular date bounded searches use this value+1 to search
+  again later instead of recording the date/time on the client machine
+  the cookie allows a simple key string to represent UID lists in the history
+**********************************************************
+-->
+<!ELEMENT Entrez2-reply (
+        Entrez2-reply_reply, 
+        Entrez2-reply_dt, 
+        Entrez2-reply_server, 
+        Entrez2-reply_msg?, 
+        Entrez2-reply_key?, 
+        Entrez2-reply_cookie?)>
+
+<!-- the actual reply -->
+<!ELEMENT Entrez2-reply_reply (E2Reply)>
+
+<!-- date/time stamp from server -->
+<!ELEMENT Entrez2-reply_dt (Entrez2-dt)>
+
+<!-- server version info -->
+<!ELEMENT Entrez2-reply_server (#PCDATA)>
+
+<!-- possibly a message to the user -->
+<!ELEMENT Entrez2-reply_msg (#PCDATA)>
+
+<!-- history key for query -->
+<!ELEMENT Entrez2-reply_key (#PCDATA)>
+
+<!-- history session cookie -->
+<!ELEMENT Entrez2-reply_cookie (#PCDATA)>
+
+
+<!ELEMENT E2Reply (
+        E2Reply_error | 
+        E2Reply_get-info | 
+        E2Reply_eval-boolean | 
+        E2Reply_get-docsum | 
+        E2Reply_get-term-pos | 
+        E2Reply_get-term-list | 
+        E2Reply_get-term-hierarchy | 
+        E2Reply_get-links | 
+        E2Reply_get-linked | 
+        E2Reply_get-link-counts)>
+
+<!-- if nothing can be returned -->
+<!ELEMENT E2Reply_error (#PCDATA)>
+
+<!-- the database info -->
+<!ELEMENT E2Reply_get-info (Entrez2-info)>
+
+<!-- result of boolean query -->
+<!ELEMENT E2Reply_eval-boolean (Entrez2-boolean-reply)>
+
+<!ELEMENT E2Reply_get-docsum (Entrez2-docsum-list)>
+
+<!-- position of the term -->
+<!ELEMENT E2Reply_get-term-pos (%INTEGER;)>
+
+<!ELEMENT E2Reply_get-term-list (Entrez2-term-list)>
+<!-- for hierarchical index -->
+<!ELEMENT E2Reply_get-term-hierarchy (Entrez2-hier-node)>
+<!--
+*******************************************
+ Links are returned in sets also using OCTET STRINGS
+*******************************************
+ set of links
+-->
+<!ELEMENT E2Reply_get-links (Entrez2-link-set)>
+<!-- list of record UIDs -->
+<!ELEMENT E2Reply_get-linked (Entrez2-id-list)>
+<!-- all links from 1 uid -->
+<!ELEMENT E2Reply_get-link-counts (Entrez2-link-count-list)>
+
+<!-- describes all the databases -->
+<!ELEMENT Entrez2-info (
+        Entrez2-info_db-count, 
+        Entrez2-info_build-date, 
+        Entrez2-info_db-info)>
+
+<!-- number of databases -->
+<!ELEMENT Entrez2-info_db-count (%INTEGER;)>
+
+<!-- build date of databases -->
+<!ELEMENT Entrez2-info_build-date (Entrez2-dt)>
+
+<!-- info by database -->
+<!ELEMENT Entrez2-info_db-info (Entrez2-db-info*)>
+
+<!-- info for one database -->
+<!ELEMENT Entrez2-db-info (
+        Entrez2-db-info_db-name, 
+        Entrez2-db-info_db-menu, 
+        Entrez2-db-info_db-descr, 
+        Entrez2-db-info_doc-count, 
+        Entrez2-db-info_field-count, 
+        Entrez2-db-info_fields, 
+        Entrez2-db-info_link-count, 
+        Entrez2-db-info_links, 
+        Entrez2-db-info_docsum-field-count, 
+        Entrez2-db-info_docsum-fields)>
+
+<!-- internal name -->
+<!ELEMENT Entrez2-db-info_db-name (Entrez2-db-id)>
+
+<!-- short name for menu -->
+<!ELEMENT Entrez2-db-info_db-menu (#PCDATA)>
+
+<!-- longer explanatory name -->
+<!ELEMENT Entrez2-db-info_db-descr (#PCDATA)>
+
+<!-- total number of records -->
+<!ELEMENT Entrez2-db-info_doc-count (%INTEGER;)>
+
+<!-- number of field types -->
+<!ELEMENT Entrez2-db-info_field-count (%INTEGER;)>
+
+<!ELEMENT Entrez2-db-info_fields (Entrez2-field-info*)>
+
+<!-- number of link types -->
+<!ELEMENT Entrez2-db-info_link-count (%INTEGER;)>
+
+<!ELEMENT Entrez2-db-info_links (Entrez2-link-info*)>
+
+<!ELEMENT Entrez2-db-info_docsum-field-count (%INTEGER;)>
+
+<!ELEMENT Entrez2-db-info_docsum-fields (Entrez2-docsum-field-info*)>
+
+<!-- info about one field -->
+<!ELEMENT Entrez2-field-info (
+        Entrez2-field-info_field-name, 
+        Entrez2-field-info_field-menu, 
+        Entrez2-field-info_field-descr, 
+        Entrez2-field-info_term-count, 
+        Entrez2-field-info_is-date?, 
+        Entrez2-field-info_is-numerical?, 
+        Entrez2-field-info_single-token?, 
+        Entrez2-field-info_hierarchy-avail?, 
+        Entrez2-field-info_is-rangable?, 
+        Entrez2-field-info_is-truncatable?)>
+
+<!-- the internal name -->
+<!ELEMENT Entrez2-field-info_field-name (Entrez2-field-id)>
+
+<!-- short string suitable for menu -->
+<!ELEMENT Entrez2-field-info_field-menu (#PCDATA)>
+
+<!-- longer, explanatory name -->
+<!ELEMENT Entrez2-field-info_field-descr (#PCDATA)>
+
+<!-- number of terms in field -->
+<!ELEMENT Entrez2-field-info_term-count (%INTEGER;)>
+
+<!ELEMENT Entrez2-field-info_is-date EMPTY>
+<!ATTLIST Entrez2-field-info_is-date value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Entrez2-field-info_is-numerical EMPTY>
+<!ATTLIST Entrez2-field-info_is-numerical value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Entrez2-field-info_single-token EMPTY>
+<!ATTLIST Entrez2-field-info_single-token value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Entrez2-field-info_hierarchy-avail EMPTY>
+<!ATTLIST Entrez2-field-info_hierarchy-avail value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Entrez2-field-info_is-rangable EMPTY>
+<!ATTLIST Entrez2-field-info_is-rangable value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Entrez2-field-info_is-truncatable EMPTY>
+<!ATTLIST Entrez2-field-info_is-truncatable value ( true | false ) #REQUIRED >
+
+
+<!-- info about one link -->
+<!ELEMENT Entrez2-link-info (
+        Entrez2-link-info_link-name, 
+        Entrez2-link-info_link-menu, 
+        Entrez2-link-info_link-descr, 
+        Entrez2-link-info_db-to, 
+        Entrez2-link-info_data-size?)>
+<!-- link name -->
+<!ELEMENT Entrez2-link-info_link-name (Entrez2-link-id)>
+
+<!ELEMENT Entrez2-link-info_link-menu (#PCDATA)>
+
+<!ELEMENT Entrez2-link-info_link-descr (#PCDATA)>
+
+<!-- database it links to -->
+<!ELEMENT Entrez2-link-info_db-to (Entrez2-db-id)>
+
+<!-- size of link data element     -->
+<!ELEMENT Entrez2-link-info_data-size (%INTEGER;)>
+
+
+<!ELEMENT Entrez2-docsum-field-type (%INTEGER;)>
+<!ATTLIST Entrez2-docsum-field-type value (
+        string |
+        int |
+        float |
+        date-pubmed
+        ) #IMPLIED >
+
+
+
+<!ELEMENT Entrez2-docsum-field-info (
+        Entrez2-docsum-field-info_field-name, 
+        Entrez2-docsum-field-info_field-description, 
+        Entrez2-docsum-field-info_field-type)>
+
+<!ELEMENT Entrez2-docsum-field-info_field-name (#PCDATA)>
+
+<!ELEMENT Entrez2-docsum-field-info_field-description (#PCDATA)>
+
+<!ELEMENT Entrez2-docsum-field-info_field-type (Entrez2-docsum-field-type)>
+
+
+<!ELEMENT Entrez2-boolean-reply (
+        Entrez2-boolean-reply_count, 
+        Entrez2-boolean-reply_uids?, 
+        Entrez2-boolean-reply_query?)>
+
+<!-- records hit -->
+<!ELEMENT Entrez2-boolean-reply_count (%INTEGER;)>
+
+<!-- if uids requested -->
+<!ELEMENT Entrez2-boolean-reply_uids (Entrez2-id-list)>
+
+<!-- if parsed query requested -->
+<!ELEMENT Entrez2-boolean-reply_query (Entrez2-boolean-exp)>
+
+
+<!ELEMENT Entrez2-docsum-list (
+        Entrez2-docsum-list_count, 
+        Entrez2-docsum-list_list)>
+
+<!-- number of docsums -->
+<!ELEMENT Entrez2-docsum-list_count (%INTEGER;)>
+
+<!ELEMENT Entrez2-docsum-list_list (Entrez2-docsum*)>
+
+
+<!ELEMENT Entrez2-docsum (
+        Entrez2-docsum_uid, 
+        Entrez2-docsum_docsum-data)>
+
+<!-- primary uid (gi, pubmedid) -->
+<!ELEMENT Entrez2-docsum_uid (%INTEGER;)>
+
+<!ELEMENT Entrez2-docsum_docsum-data (Entrez2-docsum-data*)>
+
+
+<!ELEMENT Entrez2-docsum-data (
+        Entrez2-docsum-data_field-name, 
+        Entrez2-docsum-data_field-value)>
+
+<!ELEMENT Entrez2-docsum-data_field-name (#PCDATA)>
+
+<!ELEMENT Entrez2-docsum-data_field-value (#PCDATA)>
+
+
+<!ELEMENT Entrez2-term-list (
+        Entrez2-term-list_pos, 
+        Entrez2-term-list_num, 
+        Entrez2-term-list_list)>
+
+<!-- position of first term in list -->
+<!ELEMENT Entrez2-term-list_pos (%INTEGER;)>
+
+<!-- number of terms in list -->
+<!ELEMENT Entrez2-term-list_num (%INTEGER;)>
+
+<!ELEMENT Entrez2-term-list_list (Entrez2-term*)>
+
+
+<!ELEMENT Entrez2-term (
+        Entrez2-term_term, 
+        Entrez2-term_txid?, 
+        Entrez2-term_count, 
+        Entrez2-term_is-leaf-node?)>
+
+<!ELEMENT Entrez2-term_term (#PCDATA)>
+
+<!ELEMENT Entrez2-term_txid (%INTEGER;)>
+
+<!-- count of records with this term -->
+<!ELEMENT Entrez2-term_count (%INTEGER;)>
+
+<!-- used for hierarchy only -->
+<!ELEMENT Entrez2-term_is-leaf-node EMPTY>
+<!ATTLIST Entrez2-term_is-leaf-node value ( true | false ) #REQUIRED >
+
+
+<!-- for hierarchical index -->
+<!ELEMENT Entrez2-hier-node (
+        Entrez2-hier-node_cannonical-form, 
+        Entrez2-hier-node_lineage-count, 
+        Entrez2-hier-node_lineage?, 
+        Entrez2-hier-node_child-count, 
+        Entrez2-hier-node_children, 
+        Entrez2-hier-node_is-ambiguous?)>
+
+<!-- the official name -->
+<!ELEMENT Entrez2-hier-node_cannonical-form (#PCDATA)>
+
+<!-- number of strings in lineage -->
+<!ELEMENT Entrez2-hier-node_lineage-count (%INTEGER;)>
+
+<!-- strings up the lineage -->
+<!ELEMENT Entrez2-hier-node_lineage (Entrez2-term*)>
+
+<!-- number of children of this node -->
+<!ELEMENT Entrez2-hier-node_child-count (%INTEGER;)>
+
+<!-- the children -->
+<!ELEMENT Entrez2-hier-node_children (Entrez2-term*)>
+
+<!-- used for hierarchy only -->
+<!ELEMENT Entrez2-hier-node_is-ambiguous EMPTY>
+<!ATTLIST Entrez2-hier-node_is-ambiguous value ( true | false ) #REQUIRED >
+
+
+<!--
+*******************************************
+ Links are returned in sets also using OCTET STRINGS
+*******************************************
+ set of links
+-->
+<!ELEMENT Entrez2-link-set (
+        Entrez2-link-set_ids, 
+        Entrez2-link-set_data-size?, 
+        Entrez2-link-set_data?)>
+<!-- list of record UIDs -->
+<!ELEMENT Entrez2-link-set_ids (Entrez2-id-list)>
+
+<!-- size of data elements -->
+<!ELEMENT Entrez2-link-set_data-size (%INTEGER;)>
+
+<!-- coded scores -->
+<!ELEMENT Entrez2-link-set_data (%OCTETS;)>
+
+<!-- all links from 1 uid -->
+<!ELEMENT Entrez2-link-count-list (
+        Entrez2-link-count-list_link-type-count, 
+        Entrez2-link-count-list_links)>
+
+<!-- number of types of links -->
+<!ELEMENT Entrez2-link-count-list_link-type-count (%INTEGER;)>
+
+<!ELEMENT Entrez2-link-count-list_links (Entrez2-link-count*)>
+
+<!-- link count of one type -->
+<!ELEMENT Entrez2-link-count (
+        Entrez2-link-count_link-type, 
+        Entrez2-link-count_link-count)>
+<!-- link name -->
+<!ELEMENT Entrez2-link-count_link-type (Entrez2-link-id)>
+
+<!ELEMENT Entrez2-link-count_link-count (%INTEGER;)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Entrezgene.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Entrezgene.dtd
new file mode 100644
index 0000000..36e206f
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Entrezgene.dtd
@@ -0,0 +1,89 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "entrezgene.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Entrezgene.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Entrezgene_module PUBLIC "-//NCBI//NCBI Entrezgene Module//EN" "NCBI_Entrezgene.mod.dtd">
+%NCBI_Entrezgene_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Entrezgene.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Entrezgene.mod.dtd
new file mode 100644
index 0000000..c75d32f
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Entrezgene.mod.dtd
@@ -0,0 +1,394 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "entrezgene.asn"
+     ::DATATOOL:: by application DATATOOL version 2.4.4
+     ::DATATOOL:: on 03/01/2017 23:04:04
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Entrezgene"
+================================================= -->
+
+<!--
+$Revision: 529103 $ 
+********************************************************************** 
+ 
+  NCBI Entrezgene 
+  by James Ostell, 2001 
+   
+  Generic "Gene" object for Entrez Genes 
+    This object is designed to incorporate a subset of information from 
+    LocusLink and from records in Entrez Genomes to provide indexing, 
+    linkage, and a useful summary report in Entrez for "Genes" 
+ 
+********************************************************************** 
+-->
+
+<!-- Elements used by other modules:
+          Entrezgene,
+          Entrezgene-Set,
+          Gene-track,
+          Gene-commentary -->
+
+<!-- Elements referenced from other modules:
+          Gene-ref FROM NCBI-Gene,
+          Prot-ref FROM NCBI-Protein,
+          BioSource FROM NCBI-BioSource,
+          RNA-ref FROM NCBI-RNA,
+          Dbtag,
+          Date FROM NCBI-General,
+          Seq-loc FROM NCBI-Seqloc,
+          Pub FROM NCBI-Pub -->
+<!-- ============================================ -->
+
+<!--
+******************************************** 
+ Entrezgene is the "document" indexed in Entrez 
+  and presented in the full display 
+ It also contains the Entrez ID and date information 
+******************************************* 
+-->
+<!ELEMENT Entrezgene (
+        Entrezgene_track-info?, 
+        Entrezgene_type, 
+        Entrezgene_source, 
+        Entrezgene_gene, 
+        Entrezgene_prot?, 
+        Entrezgene_rna?, 
+        Entrezgene_summary?, 
+        Entrezgene_location?, 
+        Entrezgene_gene-source?, 
+        Entrezgene_locus?, 
+        Entrezgene_properties?, 
+        Entrezgene_refgene?, 
+        Entrezgene_homology?, 
+        Entrezgene_comments?, 
+        Entrezgene_unique-keys?, 
+        Entrezgene_xtra-index-terms?, 
+        Entrezgene_xtra-properties?, 
+        Entrezgene_xtra-iq?, 
+        Entrezgene_non-unique-keys?)>
+
+<!-- not in submission, but in retrieval  -->
+<!ELEMENT Entrezgene_track-info (Gene-track)>
+<!-- type of Gene -->
+<!ELEMENT Entrezgene_type (%INTEGER;)>
+<!ATTLIST Entrezgene_type value (
+        unknown |
+        tRNA |
+        rRNA |
+        snRNA |
+        scRNA |
+        snoRNA |
+        protein-coding |
+        pseudo |
+        transposon |
+        miscRNA |
+        ncRNA |
+        biological-region |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Entrezgene_source (BioSource)>
+
+<!-- for locus-tag see note 3 -->
+<!ELEMENT Entrezgene_gene (Gene-ref)>
+
+<!ELEMENT Entrezgene_prot (Prot-ref)>
+
+<!ELEMENT Entrezgene_rna (RNA-ref)>
+
+<!-- short summary  -->
+<!ELEMENT Entrezgene_summary (#PCDATA)>
+
+<!ELEMENT Entrezgene_location (Maps*)>
+
+<!-- NCBI source to Entrez  -->
+<!ELEMENT Entrezgene_gene-source (Gene-source)>
+
+<!--
+ location of gene on chromosome (if known)
+ and all information about products
+ (mRNA, proteins and so on)
+-->
+<!ELEMENT Entrezgene_locus (Gene-commentary*)>
+
+<!ELEMENT Entrezgene_properties (Gene-commentary*)>
+
+<!-- NG for this?  -->
+<!ELEMENT Entrezgene_refgene (Gene-commentary*)>
+
+<!ELEMENT Entrezgene_homology (Gene-commentary*)>
+
+<!ELEMENT Entrezgene_comments (Gene-commentary*)>
+
+<!-- see note 3 -->
+<!ELEMENT Entrezgene_unique-keys (Dbtag*)>
+
+<!-- see note 2 -->
+<!ELEMENT Entrezgene_xtra-index-terms (Entrezgene_xtra-index-terms_E*)>
+
+
+<!ELEMENT Entrezgene_xtra-index-terms_E (#PCDATA)>
+
+<!-- see note 2 -->
+<!ELEMENT Entrezgene_xtra-properties (Xtra-Terms*)>
+
+<!-- see note 2 -->
+<!ELEMENT Entrezgene_xtra-iq (Xtra-Terms*)>
+
+<!ELEMENT Entrezgene_non-unique-keys (Dbtag*)>
+
+
+<!ELEMENT Entrezgene-Set (Entrezgene*)>
+
+
+<!ELEMENT Gene-track (
+        Gene-track_geneid, 
+        Gene-track_status?, 
+        Gene-track_current-id?, 
+        Gene-track_create-date, 
+        Gene-track_update-date, 
+        Gene-track_discontinue-date?)>
+
+<!-- required unique document id  -->
+<!ELEMENT Gene-track_geneid (%INTEGER;)>
+
+<!ELEMENT Gene-track_status (%INTEGER;)>
+
+<!--
+    secondary	-  synonym with merged
+    discontinued	-  'deleted', still index and display to public
+-->
+<!ATTLIST Gene-track_status value (
+        live |
+        secondary |
+        discontinued
+        ) #IMPLIED >
+
+
+<!-- see note 1 below -->
+<!ELEMENT Gene-track_current-id (Dbtag*)>
+
+<!-- date created in Entrez  -->
+<!ELEMENT Gene-track_create-date (Date)>
+
+<!-- last date updated in Entrez  -->
+<!ELEMENT Gene-track_update-date (Date)>
+
+<!-- -->
+<!ELEMENT Gene-track_discontinue-date (Date)>
+
+
+<!ELEMENT Gene-source (
+        Gene-source_src, 
+        Gene-source_src-int?, 
+        Gene-source_src-str1?, 
+        Gene-source_src-str2?, 
+        Gene-source_gene-display?, 
+        Gene-source_locus-display?, 
+        Gene-source_extra-terms?)>
+
+<!-- key to the source within NCBI locuslink, Ecoli, etc  -->
+<!ELEMENT Gene-source_src (#PCDATA)>
+
+<!-- eg. locuslink id  -->
+<!ELEMENT Gene-source_src-int (%INTEGER;)>
+
+<!-- eg. chromosome1  -->
+<!ELEMENT Gene-source_src-str1 (#PCDATA)>
+
+<!-- see note 3 -->
+<!ELEMENT Gene-source_src-str2 (#PCDATA)>
+
+<!-- do we have a URL for gene display?  -->
+<!ELEMENT Gene-source_gene-display EMPTY>
+<!ATTLIST Gene-source_gene-display value ( true | false ) "false" >
+
+
+<!-- do we have a URL for map/locus display?  -->
+<!ELEMENT Gene-source_locus-display EMPTY>
+<!ATTLIST Gene-source_locus-display value ( true | false ) "false" >
+
+
+<!-- do we have a URL for extra indexing terms?  -->
+<!ELEMENT Gene-source_extra-terms EMPTY>
+<!ATTLIST Gene-source_extra-terms value ( true | false ) "false" >
+
+
+
+<!ELEMENT Gene-commentary (
+        Gene-commentary_type, 
+        Gene-commentary_heading?, 
+        Gene-commentary_label?, 
+        Gene-commentary_text?, 
+        Gene-commentary_accession?, 
+        Gene-commentary_version?, 
+        Gene-commentary_xtra-properties?, 
+        Gene-commentary_refs?, 
+        Gene-commentary_source?, 
+        Gene-commentary_genomic-coords?, 
+        Gene-commentary_seqs?, 
+        Gene-commentary_products?, 
+        Gene-commentary_properties?, 
+        Gene-commentary_comment?, 
+        Gene-commentary_create-date?, 
+        Gene-commentary_update-date?, 
+        Gene-commentary_rna?)>
+<!-- type of Gene Commentary -->
+<!ELEMENT Gene-commentary_type (%INTEGER;)>
+
+<!--
+    property	-  used to display tag/value pair
+         for this type label is used as property tag, text is used as property value, 
+         other fields are not used.
+    reference	-  currently not used             
+    generif	-  to include generif in the main blob             
+    phenotype	-  to display phenotype information
+    complex	-  used (but not limited) to identify resulting 
+         interaction complexes
+    compound	-  pubchem entities
+    gene-group	-  for relationship sets (such as pseudogene / parent gene)
+    assembly	-  for full assembly accession
+    assembly-unit	-  for the assembly unit corresponding to the refseq
+-->
+<!ATTLIST Gene-commentary_type value (
+        genomic |
+        pre-RNA |
+        mRNA |
+        rRNA |
+        tRNA |
+        snRNA |
+        scRNA |
+        peptide |
+        other-genetic |
+        genomic-mRNA |
+        cRNA |
+        mature-peptide |
+        pre-protein |
+        miscRNA |
+        snoRNA |
+        property |
+        reference |
+        generif |
+        phenotype |
+        complex |
+        compound |
+        ncRNA |
+        gene-group |
+        assembly |
+        assembly-unit |
+        c-region |
+        d-segment |
+        j-segment |
+        v-segment |
+        comment |
+        other
+        ) #IMPLIED >
+
+
+<!-- appears above text  -->
+<!ELEMENT Gene-commentary_heading (#PCDATA)>
+
+<!--
+ occurs to left of text
+ for protein and RNA types it is a name
+ for property type it is a property tag  
+-->
+<!ELEMENT Gene-commentary_label (#PCDATA)>
+
+<!--
+ block of text 
+ for property type it is a property value  
+-->
+<!ELEMENT Gene-commentary_text (#PCDATA)>
+
+<!-- accession for the gi in the seqloc, see note 3 -->
+<!ELEMENT Gene-commentary_accession (#PCDATA)>
+
+<!-- version for the accession above -->
+<!ELEMENT Gene-commentary_version (%INTEGER;)>
+
+<!-- see note 2 -->
+<!ELEMENT Gene-commentary_xtra-properties (Xtra-Terms*)>
+
+<!-- refs for this  -->
+<!ELEMENT Gene-commentary_refs (Pub*)>
+
+<!-- links and refs  -->
+<!ELEMENT Gene-commentary_source (Other-source*)>
+
+<!-- referenced sequences in genomic coords -->
+<!ELEMENT Gene-commentary_genomic-coords (Seq-loc*)>
+
+<!-- referenced sequences in non-genomic coords -->
+<!ELEMENT Gene-commentary_seqs (Seq-loc*)>
+
+<!ELEMENT Gene-commentary_products (Gene-commentary*)>
+
+<!ELEMENT Gene-commentary_properties (Gene-commentary*)>
+
+<!ELEMENT Gene-commentary_comment (Gene-commentary*)>
+
+<!ELEMENT Gene-commentary_create-date (Date)>
+
+<!ELEMENT Gene-commentary_update-date (Date)>
+
+<!ELEMENT Gene-commentary_rna (RNA-ref)>
+
+
+<!ELEMENT Other-source (
+        Other-source_src?, 
+        Other-source_pre-text?, 
+        Other-source_anchor?, 
+        Other-source_url?, 
+        Other-source_post-text?)>
+
+<!-- key to non-ncbi source  -->
+<!ELEMENT Other-source_src (Dbtag)>
+
+<!-- text before anchor  -->
+<!ELEMENT Other-source_pre-text (#PCDATA)>
+
+<!-- text to show as highlight  -->
+<!ELEMENT Other-source_anchor (#PCDATA)>
+
+<!-- if present, use this URL not Dbtag and datbase  -->
+<!ELEMENT Other-source_url (#PCDATA)>
+
+<!-- text after anchor  -->
+<!ELEMENT Other-source_post-text (#PCDATA)>
+
+
+<!ELEMENT Maps (
+        Maps_display-str, 
+        Maps_method)>
+
+<!ELEMENT Maps_display-str (#PCDATA)>
+
+<!ELEMENT Maps_method (
+        Maps_method_proxy | 
+        Maps_method_map-type)>
+
+<!--url to non mapviewer mapviewing resource -->
+<!ELEMENT Maps_method_proxy (#PCDATA)>
+<!-- units used in display-str to query mapviewer  -->
+<!ELEMENT Maps_method_map-type %ENUM;>
+<!ATTLIST Maps_method_map-type value (
+        cyto |
+        bp |
+        cM |
+        cR |
+        min
+        ) #REQUIRED >
+
+
+<!-- see note 2 -->
+<!ELEMENT Xtra-Terms (
+        Xtra-Terms_tag, 
+        Xtra-Terms_value)>
+
+<!ELEMENT Xtra-Terms_tag (#PCDATA)>
+
+<!ELEMENT Xtra-Terms_value (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_FeatDef.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_FeatDef.dtd
new file mode 100644
index 0000000..a3ce559
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_FeatDef.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "featdef.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_FeatDef.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_FeatDef_module PUBLIC "-//NCBI//NCBI FeatDef Module//EN" "NCBI_FeatDef.mod.dtd">
+%NCBI_FeatDef_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_FeatDef.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_FeatDef.mod.dtd
new file mode 100644
index 0000000..65fbf90
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_FeatDef.mod.dtd
@@ -0,0 +1,97 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "featdef.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-FeatDef"
+================================================= -->
+
+<!--
+$Revision: 6.0 $
+**********************************************************************
+
+  NCBI Sequence Feature Definition Module
+  by James Ostell, 1994
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          FeatDef,
+          FeatDefSet,
+          FeatDispGroup,
+          FeatDispGroupSet -->
+<!-- ============================================ -->
+
+
+<!ELEMENT FeatDef (
+        FeatDef_typelabel, 
+        FeatDef_menulabel, 
+        FeatDef_featdef-key, 
+        FeatDef_seqfeat-key, 
+        FeatDef_entrygroup, 
+        FeatDef_displaygroup, 
+        FeatDef_molgroup)>
+
+<!-- short label for type eg "CDS" -->
+<!ELEMENT FeatDef_typelabel (#PCDATA)>
+
+<!-- label for a menu eg "Coding Region" -->
+<!ELEMENT FeatDef_menulabel (#PCDATA)>
+
+<!-- unique for this feature definition -->
+<!ELEMENT FeatDef_featdef-key (%INTEGER;)>
+
+<!-- SeqFeat.data.choice from objfeat.h -->
+<!ELEMENT FeatDef_seqfeat-key (%INTEGER;)>
+
+<!-- Group for data entry -->
+<!ELEMENT FeatDef_entrygroup (%INTEGER;)>
+
+<!-- Group for data display -->
+<!ELEMENT FeatDef_displaygroup (%INTEGER;)>
+
+<!-- Type of Molecule used for -->
+<!ELEMENT FeatDef_molgroup (FeatMolType)>
+
+
+<!ELEMENT FeatMolType %ENUM;>
+
+<!--
+    aa	-  proteins
+    na	-  nucleic acids
+    both	-  both
+-->
+<!ATTLIST FeatMolType value (
+        aa |
+        na |
+        both
+        ) #REQUIRED >
+
+
+<!-- collections of defintions -->
+<!ELEMENT FeatDefSet (FeatDef*)>
+
+
+<!ELEMENT FeatDispGroup (
+        FeatDispGroup_groupkey, 
+        FeatDispGroup_groupname)>
+
+<!ELEMENT FeatDispGroup_groupkey (%INTEGER;)>
+
+<!ELEMENT FeatDispGroup_groupname (#PCDATA)>
+
+
+<!ELEMENT FeatDispGroupSet (FeatDispGroup*)>
+
+
+<!ELEMENT FeatDefGroupSet (
+        FeatDefGroupSet_groups, 
+        FeatDefGroupSet_defs)>
+
+<!ELEMENT FeatDefGroupSet_groups (FeatDispGroupSet)>
+<!-- collections of defintions -->
+<!ELEMENT FeatDefGroupSet_defs (FeatDefSet)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_GBSeq.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_GBSeq.dtd
new file mode 100644
index 0000000..d317e96
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_GBSeq.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "gbseq.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_GBSeq.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_GBSeq_module PUBLIC "-//NCBI//NCBI GBSeq Module//EN" "NCBI_GBSeq.mod.dtd">
+%NCBI_GBSeq_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_GBSeq.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_GBSeq.mod.dtd
new file mode 100644
index 0000000..95be4f3
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_GBSeq.mod.dtd
@@ -0,0 +1,407 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "gbseq.asn"
+     ::DATATOOL:: by application DATATOOL version 2.4.4
+     ::DATATOOL:: on 12/18/2013 23:04:02
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-GBSeq"
+================================================= -->
+
+<!--
+$Revision: 413850 $
+*********************************************************
+
+ ASN.1 and XML for the components of a GenBank format sequence
+ J.Ostell 2002
+ Updated 25 May 2010
+
+*********************************************************
+-->
+
+<!--
+********
+  GBSeq represents the elements in a GenBank style report
+    of a sequence with some small additions to structure and support
+    for protein (GenPept) versions of GenBank format as seen in
+    Entrez. While this represents the simplification, reduction of
+    detail, and flattening to a single sequence perspective of GenBank
+    format (compared with the full ASN.1 or XML from which GenBank and
+    this format is derived at NCBI), it is presented in ASN.1 or XML for
+    automated parsing and processing. It is hoped that this compromise
+    will be useful for those bulk processing at the GenBank format level
+    of detail today. Since it is a compromise, a number of pragmatic
+    decisions have been made.
+
+  In pursuit of simplicity and familiarity a number of
+    fields do not have full substructure defined here where there is
+    already a standard GenBank format string. For example:
+
+   Date  DD-Mon-YYYY
+   Authors   LastName, Intials (with periods)
+   Journal   JounalName Volume (issue), page-range (year)
+   FeatureLocations as per GenBank feature table, but FeatureIntervals
+    may also be provided as a convenience
+   FeatureQualifiers  as per GenBank feature table
+   Primary has a string that represents a table to construct
+    a third party (TPA) sequence.
+   other-seqids can have strings with the "vertical bar format" sequence
+    identifiers used in BLAST for example, when they are non-genbank types.
+    Currently in GenBank format you only see GI, but there are others, like
+    patents, submitter clone names, etc which will appear here, as they
+    always have in the ASN.1 format, and full XML format.
+   source-db is a formatted text block for peptides in GenPept format that
+    carries information from the source protein database.
+
+  There are also a number of elements that could have been
+   more exactly specified, but in the interest of simplicity
+   have been simply left as options. For example..
+
+  accession and accession.version will always appear in a GenBank record
+   they are optional because this format can also be used for non-GenBank
+   sequences, and in that case will have only "other-seqids".
+
+  sequences will normally all have "sequence" filled in. But contig records
+    will have a "join" statement in the "contig" slot, and no "sequence".
+    We also may consider a retrieval option with no sequence of any kind
+     and no feature table to quickly check minimal values.
+
+  a reference may have an author list, or be from a consortium, or both.
+
+  some fields, such as taxonomy, do appear as separate elements in GenBank
+    format but without a specific linetype (in GenBank format this comes
+    under ORGANISM). Another example is the separation of primary accession
+    from the list of secondary accessions. In GenBank format primary
+    accession is just the first one on the list that includes all secondaries
+    after it.
+
+  create-date deserves special comment. The date you see on the right hand
+    side of the LOCUS line in GenBank format is actually the last date the
+    the record was modified (or the update-date). The date the record was
+    first submitted to GenBank appears in the first submission citation in
+    the reference section. Internally in the databases and ASN.1 NCBI keeps
+    the first date the record was released into the sequence database at
+    NCBI as create-date. For records from EMBL, which supports create-date,
+    it is the date provided by EMBL. For DDBJ records, which do not supply
+    a create-date (same as GenBank format) the create-date is the first date
+    NCBI saw the record from DDBJ. For older GenBank records, before NCBI
+    took responsibility for GenBank, it is just the first date NCBI saw the
+    record. Create-date can be very useful, so we expose it here, but users
+    must understand it is only an approximation and comes from many sources,
+    and with many exceptions and caveats. It does NOT tell you the first
+    date the public might have seen this record and thus is NOT an accurate
+    measure for legal issues of precedence.
+
+********
+-->
+<!ELEMENT GBSet (GBSeq*)>
+
+
+<!ELEMENT GBSeq (
+        GBSeq_locus?, 
+        GBSeq_length, 
+        GBSeq_strandedness?, 
+        GBSeq_moltype, 
+        GBSeq_topology?, 
+        GBSeq_division?, 
+        GBSeq_update-date?, 
+        GBSeq_create-date?, 
+        GBSeq_update-release?, 
+        GBSeq_create-release?, 
+        GBSeq_definition?, 
+        GBSeq_primary-accession?, 
+        GBSeq_entry-version?, 
+        GBSeq_accession-version?, 
+        GBSeq_other-seqids?, 
+        GBSeq_secondary-accessions?, 
+        GBSeq_project?, 
+        GBSeq_keywords?, 
+        GBSeq_segment?, 
+        GBSeq_source?, 
+        GBSeq_organism?, 
+        GBSeq_taxonomy?, 
+        GBSeq_references?, 
+        GBSeq_comment?, 
+        GBSeq_comment-set?, 
+        GBSeq_struc-comments?, 
+        GBSeq_primary?, 
+        GBSeq_source-db?, 
+        GBSeq_database-reference?, 
+        GBSeq_feature-table?, 
+        GBSeq_feature-set?, 
+        GBSeq_sequence?, 
+        GBSeq_contig?, 
+        GBSeq_alt-seq?, 
+        GBSeq_xrefs?)>
+
+<!ELEMENT GBSeq_locus (#PCDATA)>
+
+<!ELEMENT GBSeq_length (%INTEGER;)>
+
+<!ELEMENT GBSeq_strandedness (#PCDATA)>
+
+<!ELEMENT GBSeq_moltype (#PCDATA)>
+
+<!ELEMENT GBSeq_topology (#PCDATA)>
+
+<!ELEMENT GBSeq_division (#PCDATA)>
+
+<!ELEMENT GBSeq_update-date (#PCDATA)>
+
+<!ELEMENT GBSeq_create-date (#PCDATA)>
+
+<!ELEMENT GBSeq_update-release (#PCDATA)>
+
+<!ELEMENT GBSeq_create-release (#PCDATA)>
+
+<!ELEMENT GBSeq_definition (#PCDATA)>
+
+<!ELEMENT GBSeq_primary-accession (#PCDATA)>
+
+<!ELEMENT GBSeq_entry-version (#PCDATA)>
+
+<!ELEMENT GBSeq_accession-version (#PCDATA)>
+
+<!ELEMENT GBSeq_other-seqids (GBSeqid*)>
+
+<!ELEMENT GBSeq_secondary-accessions (GBSecondary-accn*)>
+
+<!ELEMENT GBSeq_project (#PCDATA)>
+
+<!ELEMENT GBSeq_keywords (GBKeyword*)>
+
+<!ELEMENT GBSeq_segment (#PCDATA)>
+
+<!ELEMENT GBSeq_source (#PCDATA)>
+
+<!ELEMENT GBSeq_organism (#PCDATA)>
+
+<!ELEMENT GBSeq_taxonomy (#PCDATA)>
+
+<!ELEMENT GBSeq_references (GBReference*)>
+
+<!ELEMENT GBSeq_comment (#PCDATA)>
+
+<!ELEMENT GBSeq_comment-set (GBComment*)>
+
+<!ELEMENT GBSeq_struc-comments (GBStrucComment*)>
+
+<!ELEMENT GBSeq_primary (#PCDATA)>
+
+<!ELEMENT GBSeq_source-db (#PCDATA)>
+
+<!ELEMENT GBSeq_database-reference (#PCDATA)>
+
+<!ELEMENT GBSeq_feature-table (GBFeature*)>
+
+<!ELEMENT GBSeq_feature-set (GBFeatureSet*)>
+
+<!-- Optional for contig, wgs, etc. -->
+<!ELEMENT GBSeq_sequence (#PCDATA)>
+
+<!ELEMENT GBSeq_contig (#PCDATA)>
+
+<!ELEMENT GBSeq_alt-seq (GBAltSeqData*)>
+
+<!ELEMENT GBSeq_xrefs (GBXref*)>
+
+
+<!ELEMENT GBSeqid (#PCDATA)>
+
+
+<!ELEMENT GBSecondary-accn (#PCDATA)>
+
+
+<!ELEMENT GBKeyword (#PCDATA)>
+
+
+<!ELEMENT GBReference (
+        GBReference_reference, 
+        GBReference_position?, 
+        GBReference_authors?, 
+        GBReference_consortium?, 
+        GBReference_title?, 
+        GBReference_journal, 
+        GBReference_xref?, 
+        GBReference_pubmed?, 
+        GBReference_remark?)>
+
+<!ELEMENT GBReference_reference (#PCDATA)>
+
+<!ELEMENT GBReference_position (#PCDATA)>
+
+<!ELEMENT GBReference_authors (GBAuthor*)>
+
+<!ELEMENT GBReference_consortium (#PCDATA)>
+
+<!ELEMENT GBReference_title (#PCDATA)>
+
+<!ELEMENT GBReference_journal (#PCDATA)>
+
+<!ELEMENT GBReference_xref (GBXref*)>
+
+<!ELEMENT GBReference_pubmed (%INTEGER;)>
+
+<!ELEMENT GBReference_remark (#PCDATA)>
+
+
+<!ELEMENT GBAuthor (#PCDATA)>
+
+
+<!ELEMENT GBXref (
+        GBXref_dbname, 
+        GBXref_id)>
+
+<!ELEMENT GBXref_dbname (#PCDATA)>
+
+<!ELEMENT GBXref_id (#PCDATA)>
+
+
+<!ELEMENT GBComment (
+        GBComment_type?, 
+        GBComment_paragraphs)>
+
+<!ELEMENT GBComment_type (#PCDATA)>
+
+<!ELEMENT GBComment_paragraphs (GBCommentParagraph*)>
+
+
+<!ELEMENT GBCommentParagraph (#PCDATA)>
+
+
+<!ELEMENT GBStrucComment (
+        GBStrucComment_name?, 
+        GBStrucComment_items)>
+
+<!ELEMENT GBStrucComment_name (#PCDATA)>
+
+<!ELEMENT GBStrucComment_items (GBStrucCommentItem*)>
+
+
+<!ELEMENT GBStrucCommentItem (
+        GBStrucCommentItem_tag?, 
+        GBStrucCommentItem_value?, 
+        GBStrucCommentItem_url?)>
+
+<!ELEMENT GBStrucCommentItem_tag (#PCDATA)>
+
+<!ELEMENT GBStrucCommentItem_value (#PCDATA)>
+
+<!ELEMENT GBStrucCommentItem_url (#PCDATA)>
+
+
+<!ELEMENT GBFeatureSet (
+        GBFeatureSet_annot-source?, 
+        GBFeatureSet_features)>
+
+<!ELEMENT GBFeatureSet_annot-source (#PCDATA)>
+
+<!ELEMENT GBFeatureSet_features (GBFeature*)>
+
+
+<!ELEMENT GBFeature (
+        GBFeature_key, 
+        GBFeature_location, 
+        GBFeature_intervals?, 
+        GBFeature_operator?, 
+        GBFeature_partial5?, 
+        GBFeature_partial3?, 
+        GBFeature_quals?, 
+        GBFeature_xrefs?)>
+
+<!ELEMENT GBFeature_key (#PCDATA)>
+
+<!ELEMENT GBFeature_location (#PCDATA)>
+
+<!ELEMENT GBFeature_intervals (GBInterval*)>
+
+<!ELEMENT GBFeature_operator (#PCDATA)>
+
+<!ELEMENT GBFeature_partial5 EMPTY>
+<!ATTLIST GBFeature_partial5 value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT GBFeature_partial3 EMPTY>
+<!ATTLIST GBFeature_partial3 value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT GBFeature_quals (GBQualifier*)>
+
+<!ELEMENT GBFeature_xrefs (GBXref*)>
+
+
+<!ELEMENT GBInterval (
+        GBInterval_from?, 
+        GBInterval_to?, 
+        GBInterval_point?, 
+        GBInterval_iscomp?, 
+        GBInterval_interbp?, 
+        GBInterval_accession)>
+
+<!ELEMENT GBInterval_from (%INTEGER;)>
+
+<!ELEMENT GBInterval_to (%INTEGER;)>
+
+<!ELEMENT GBInterval_point (%INTEGER;)>
+
+<!ELEMENT GBInterval_iscomp EMPTY>
+<!ATTLIST GBInterval_iscomp value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT GBInterval_interbp EMPTY>
+<!ATTLIST GBInterval_interbp value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT GBInterval_accession (#PCDATA)>
+
+
+<!ELEMENT GBQualifier (
+        GBQualifier_name, 
+        GBQualifier_value?)>
+
+<!ELEMENT GBQualifier_name (#PCDATA)>
+
+<!ELEMENT GBQualifier_value (#PCDATA)>
+
+
+<!ELEMENT GBAltSeqData (
+        GBAltSeqData_name, 
+        GBAltSeqData_items?)>
+
+<!-- e.g., contig, wgs, scaffold, cage, genome -->
+<!ELEMENT GBAltSeqData_name (#PCDATA)>
+
+<!ELEMENT GBAltSeqData_items (GBAltSeqItem*)>
+
+
+<!ELEMENT GBAltSeqItem (
+        GBAltSeqItem_interval?, 
+        GBAltSeqItem_isgap?, 
+        GBAltSeqItem_gap-length?, 
+        GBAltSeqItem_gap-type?, 
+        GBAltSeqItem_gap-linkage?, 
+        GBAltSeqItem_gap-comment?, 
+        GBAltSeqItem_first-accn?, 
+        GBAltSeqItem_last-accn?, 
+        GBAltSeqItem_value?)>
+
+<!ELEMENT GBAltSeqItem_interval (GBInterval)>
+
+<!ELEMENT GBAltSeqItem_isgap EMPTY>
+<!ATTLIST GBAltSeqItem_isgap value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT GBAltSeqItem_gap-length (%INTEGER;)>
+
+<!ELEMENT GBAltSeqItem_gap-type (#PCDATA)>
+
+<!ELEMENT GBAltSeqItem_gap-linkage (#PCDATA)>
+
+<!ELEMENT GBAltSeqItem_gap-comment (#PCDATA)>
+
+<!ELEMENT GBAltSeqItem_first-accn (#PCDATA)>
+
+<!ELEMENT GBAltSeqItem_last-accn (#PCDATA)>
+
+<!ELEMENT GBAltSeqItem_value (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Gene.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Gene.dtd
new file mode 100644
index 0000000..cd6d122
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Gene.dtd
@@ -0,0 +1,20 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_Gene.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Gene.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Gene.mod.dtd
new file mode 100644
index 0000000..be703af
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Gene.mod.dtd
@@ -0,0 +1,97 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 10/09/2008 23:08:21
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Gene"
+================================================= -->
+
+<!--
+**********************************************************************
+
+  NCBI Genes
+  by James Ostell, 1990
+  version 0.8
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Gene-ref,
+          Gene-nomenclature -->
+
+<!-- Elements referenced from other modules:
+          Dbtag FROM NCBI-General -->
+<!-- ============================================ -->
+
+<!--
+*** Gene ***********************************************
+*
+*  reference to a gene
+*
+-->
+<!ELEMENT Gene-ref (
+        Gene-ref_locus?, 
+        Gene-ref_allele?, 
+        Gene-ref_desc?, 
+        Gene-ref_maploc?, 
+        Gene-ref_pseudo?, 
+        Gene-ref_db?, 
+        Gene-ref_syn?, 
+        Gene-ref_locus-tag?, 
+        Gene-ref_formal-name?)>
+
+<!-- Official gene symbol -->
+<!ELEMENT Gene-ref_locus (#PCDATA)>
+
+<!-- Official allele designation -->
+<!ELEMENT Gene-ref_allele (#PCDATA)>
+
+<!-- descriptive name -->
+<!ELEMENT Gene-ref_desc (#PCDATA)>
+
+<!-- descriptive map location -->
+<!ELEMENT Gene-ref_maploc (#PCDATA)>
+
+<!-- pseudogene -->
+<!ELEMENT Gene-ref_pseudo EMPTY>
+<!ATTLIST Gene-ref_pseudo value ( true | false ) "false" >
+
+
+<!-- ids in other dbases -->
+<!ELEMENT Gene-ref_db (Dbtag*)>
+
+<!-- synonyms for locus -->
+<!ELEMENT Gene-ref_syn (Gene-ref_syn_E*)>
+
+
+<!ELEMENT Gene-ref_syn_E (#PCDATA)>
+
+<!-- systematic gene name (e.g., MI0001, ORF0069) -->
+<!ELEMENT Gene-ref_locus-tag (#PCDATA)>
+
+<!ELEMENT Gene-ref_formal-name (Gene-nomenclature)>
+
+
+<!ELEMENT Gene-nomenclature (
+        Gene-nomenclature_status, 
+        Gene-nomenclature_symbol?, 
+        Gene-nomenclature_name?, 
+        Gene-nomenclature_source?)>
+
+<!ELEMENT Gene-nomenclature_status %ENUM;>
+<!ATTLIST Gene-nomenclature_status value (
+        unknown |
+        official |
+        interim
+        ) #REQUIRED >
+
+
+<!ELEMENT Gene-nomenclature_symbol (#PCDATA)>
+
+<!ELEMENT Gene-nomenclature_name (#PCDATA)>
+
+<!ELEMENT Gene-nomenclature_source (Dbtag)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_General.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_General.dtd
new file mode 100644
index 0000000..a8bb6c1
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_General.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "general.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_General.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_General.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_General.mod.dtd
new file mode 100644
index 0000000..c573ca5
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_General.mod.dtd
@@ -0,0 +1,333 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "general.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 08/02/2010 23:05:14
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-General"
+================================================= -->
+
+<!--
+$Revision: 98896 $
+**********************************************************************
+
+  NCBI General Data elements
+  by James Ostell, 1990
+  Version 3.0 - June 1994
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Date,
+          Person-id,
+          Object-id,
+          Dbtag,
+          Int-fuzz,
+          User-object,
+          User-field -->
+<!-- ============================================ -->
+
+<!--
+ StringStore is really a VisibleString.  It is used to define very
+   long strings which may need to be stored by the receiving program
+   in special structures, such as a ByteStore, but it's just a hint.
+   AsnTool stores StringStores in ByteStore structures.
+ OCTET STRINGs are also stored in ByteStores by AsnTool
+ 
+ typedef struct bsunit {             /* for building multiline strings 
+ Nlm_Handle str;            /* the string piece 
+ Nlm_Int2 len_avail,
+ len;
+ struct bsunit PNTR next; }       /* the next one 
+ Nlm_BSUnit, PNTR Nlm_BSUnitPtr;
+ 
+ typedef struct bytestore {
+ Nlm_Int4 seekptr,       /* current position 
+ totlen,             /* total stored data length in bytes 
+ chain_offset;       /* offset in ByteStore of first byte in curchain 
+ Nlm_BSUnitPtr chain,       /* chain of elements 
+ curchain;           /* the BSUnit containing seekptr 
+ } Nlm_ByteStore, PNTR Nlm_ByteStorePtr;
+
+ AsnTool incorporates this as a primitive type, so the definition
+   is here just for completeness
+ 
+  StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
+
+ BigInt is really an INTEGER. It is used to warn the receiving code to expect
+   a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue
+
+   Like StringStore, AsnTool incorporates it as a primitive. The definition would be:
+   BigInt ::= [APPLICATION 2] IMPLICIT INTEGER
+
+ Date is used to replace the (overly complex) UTCTtime, GeneralizedTime
+  of ASN.1
+  It stores only a date
+
+-->
+<!ELEMENT Date (
+        Date_str | 
+        Date_std)>
+
+<!-- for those unparsed dates -->
+<!ELEMENT Date_str (#PCDATA)>
+
+<!-- use this if you can -->
+<!ELEMENT Date_std (Date-std)>
+
+<!-- NOTE: this is NOT a unix tm struct -->
+<!ELEMENT Date-std (
+        Date-std_year, 
+        Date-std_month?, 
+        Date-std_day?, 
+        Date-std_season?, 
+        Date-std_hour?, 
+        Date-std_minute?, 
+        Date-std_second?)>
+
+<!-- full year (including 1900) -->
+<!ELEMENT Date-std_year (%INTEGER;)>
+
+<!-- month (1-12) -->
+<!ELEMENT Date-std_month (%INTEGER;)>
+
+<!-- day of month (1-31) -->
+<!ELEMENT Date-std_day (%INTEGER;)>
+
+<!-- for "spring", "may-june", etc -->
+<!ELEMENT Date-std_season (#PCDATA)>
+
+<!-- hour of day (0-23) -->
+<!ELEMENT Date-std_hour (%INTEGER;)>
+
+<!-- minute of hour (0-59) -->
+<!ELEMENT Date-std_minute (%INTEGER;)>
+
+<!-- second of minute (0-59) -->
+<!ELEMENT Date-std_second (%INTEGER;)>
+
+<!--
+ Dbtag is generalized for tagging
+ eg. { "Social Security", str "023-79-8841" }
+ or  { "member", id 8882224 }
+-->
+<!ELEMENT Dbtag (
+        Dbtag_db, 
+        Dbtag_tag)>
+
+<!-- name of database or system -->
+<!ELEMENT Dbtag_db (#PCDATA)>
+
+<!-- appropriate tag -->
+<!ELEMENT Dbtag_tag (Object-id)>
+
+<!--
+ Object-id can tag or name anything
+
+-->
+<!ELEMENT Object-id (
+        Object-id_id | 
+        Object-id_str)>
+
+<!ELEMENT Object-id_id (%INTEGER;)>
+
+<!ELEMENT Object-id_str (#PCDATA)>
+
+<!--
+ Person-id is to define a std element for people
+
+-->
+<!ELEMENT Person-id (
+        Person-id_dbtag | 
+        Person-id_name | 
+        Person-id_ml | 
+        Person-id_str | 
+        Person-id_consortium)>
+
+<!-- any defined database tag -->
+<!ELEMENT Person-id_dbtag (Dbtag)>
+
+<!-- structured name -->
+<!ELEMENT Person-id_name (Name-std)>
+
+<!--
+ MEDLINE name (semi-structured)
+    eg. "Jones RM"
+-->
+<!ELEMENT Person-id_ml (#PCDATA)>
+
+<!-- unstructured name -->
+<!ELEMENT Person-id_str (#PCDATA)>
+
+<!-- consortium name -->
+<!ELEMENT Person-id_consortium (#PCDATA)>
+
+<!-- Structured names -->
+<!ELEMENT Name-std (
+        Name-std_last, 
+        Name-std_first?, 
+        Name-std_middle?, 
+        Name-std_full?, 
+        Name-std_initials?, 
+        Name-std_suffix?, 
+        Name-std_title?)>
+
+<!ELEMENT Name-std_last (#PCDATA)>
+
+<!ELEMENT Name-std_first (#PCDATA)>
+
+<!ELEMENT Name-std_middle (#PCDATA)>
+
+<!-- full name eg. "J. John Smith, Esq" -->
+<!ELEMENT Name-std_full (#PCDATA)>
+
+<!-- first + middle initials -->
+<!ELEMENT Name-std_initials (#PCDATA)>
+
+<!-- Jr, Sr, III -->
+<!ELEMENT Name-std_suffix (#PCDATA)>
+
+<!-- Dr., Sister, etc -->
+<!ELEMENT Name-std_title (#PCDATA)>
+
+<!--
+**** Int-fuzz **********************************************
+*
+*   uncertainties in integer values
+-->
+<!ELEMENT Int-fuzz (
+        Int-fuzz_p-m | 
+        Int-fuzz_range | 
+        Int-fuzz_pct | 
+        Int-fuzz_lim | 
+        Int-fuzz_alt)>
+
+<!-- plus or minus fixed amount -->
+<!ELEMENT Int-fuzz_p-m (%INTEGER;)>
+<!-- max to min -->
+<!ELEMENT Int-fuzz_range (
+        Int-fuzz_range_max, 
+        Int-fuzz_range_min)>
+
+<!ELEMENT Int-fuzz_range_max (%INTEGER;)>
+
+<!ELEMENT Int-fuzz_range_min (%INTEGER;)>
+
+<!-- % plus or minus (x10) 0-1000 -->
+<!ELEMENT Int-fuzz_pct (%INTEGER;)>
+<!-- some limit value -->
+<!ELEMENT Int-fuzz_lim %ENUM;>
+
+<!--
+    unk	-  unknown
+    gt	-  greater than
+    lt	-  less than
+    tr	-  space to right of position
+    tl	-  space to left of position
+    circle	-  artificial break at origin of circle
+    other	-  something else
+-->
+<!ATTLIST Int-fuzz_lim value (
+        unk |
+        gt |
+        lt |
+        tr |
+        tl |
+        circle |
+        other
+        ) #REQUIRED >
+
+
+<!-- set of alternatives for the integer -->
+<!ELEMENT Int-fuzz_alt (Int-fuzz_alt_E*)>
+
+
+<!ELEMENT Int-fuzz_alt_E (%INTEGER;)>
+
+<!--
+**** User-object **********************************************
+*
+*   a general object for a user defined structured data item
+*    used by Seq-feat and Seq-descr
+-->
+<!ELEMENT User-object (
+        User-object_class?, 
+        User-object_type, 
+        User-object_data)>
+
+<!-- endeavor which designed this object -->
+<!ELEMENT User-object_class (#PCDATA)>
+
+<!-- type of object within class -->
+<!ELEMENT User-object_type (Object-id)>
+
+<!-- the object itself -->
+<!ELEMENT User-object_data (User-field*)>
+
+
+<!ELEMENT User-field (
+        User-field_label, 
+        User-field_num?, 
+        User-field_data)>
+
+<!-- field label -->
+<!ELEMENT User-field_label (Object-id)>
+
+<!-- required for strs, ints, reals, oss -->
+<!ELEMENT User-field_num (%INTEGER;)>
+<!-- field contents -->
+<!ELEMENT User-field_data (
+        User-field_data_str | 
+        User-field_data_int | 
+        User-field_data_real | 
+        User-field_data_bool | 
+        User-field_data_os | 
+        User-field_data_object | 
+        User-field_data_strs | 
+        User-field_data_ints | 
+        User-field_data_reals | 
+        User-field_data_oss | 
+        User-field_data_fields | 
+        User-field_data_objects)>
+
+<!ELEMENT User-field_data_str (#PCDATA)>
+
+<!ELEMENT User-field_data_int (%INTEGER;)>
+
+<!ELEMENT User-field_data_real (%REAL;)>
+
+<!ELEMENT User-field_data_bool EMPTY>
+<!ATTLIST User-field_data_bool value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT User-field_data_os (%OCTETS;)>
+
+<!-- for using other definitions -->
+<!ELEMENT User-field_data_object (User-object)>
+
+<!ELEMENT User-field_data_strs (User-field_data_strs_E*)>
+
+
+<!ELEMENT User-field_data_strs_E (#PCDATA)>
+
+<!ELEMENT User-field_data_ints (User-field_data_ints_E*)>
+
+
+<!ELEMENT User-field_data_ints_E (%INTEGER;)>
+
+<!ELEMENT User-field_data_reals (User-field_data_reals_E*)>
+
+
+<!ELEMENT User-field_data_reals_E (%REAL;)>
+
+<!ELEMENT User-field_data_oss (User-field_data_oss_E*)>
+
+
+<!ELEMENT User-field_data_oss_E (%OCTETS;)>
+
+<!ELEMENT User-field_data_fields (User-field*)>
+
+<!ELEMENT User-field_data_objects (User-object*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_ID1Access.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_ID1Access.dtd
new file mode 100644
index 0000000..8e57ced
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_ID1Access.dtd
@@ -0,0 +1,92 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "id1.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_ID1Access.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_ID1Access_module PUBLIC "-//NCBI//NCBI ID1Access Module//EN" "NCBI_ID1Access.mod.dtd">
+%NCBI_ID1Access_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Seqset_module PUBLIC "-//NCBI//NCBI Seqset Module//EN" "NCBI_Seqset.mod.dtd">
+%NCBI_Seqset_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_ID1Access.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_ID1Access.mod.dtd
new file mode 100644
index 0000000..b489907
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_ID1Access.mod.dtd
@@ -0,0 +1,218 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "id1.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-ID1Access"
+================================================= -->
+
+<!--
+$Revision: 1.12 $
+********************************************************************
+
+  Network Id server network access
+  Yaschenko 1996
+
+
+*********************************************************************
+
+  ID1.asn
+
+     messages for id server network access
+
+*********************************************************************
+-->
+
+<!-- Elements referenced from other modules:
+          Seq-id FROM NCBI-Seqloc,
+          Seq-entry FROM NCBI-Seqset,
+          Seq-hist FROM NCBI-Sequence -->
+<!-- ============================================ -->
+
+<!--
+**********************************
+ requests
+
+-->
+<!ELEMENT ID1server-request (
+        ID1server-request_init | 
+        ID1server-request_getgi | 
+        ID1server-request_getsefromgi | 
+        ID1server-request_fini | 
+        ID1server-request_getseqidsfromgi | 
+        ID1server-request_getgihist | 
+        ID1server-request_getgirev | 
+        ID1server-request_getgistate | 
+        ID1server-request_getsewithinfo | 
+        ID1server-request_getblobinfo)>
+
+<!-- DlInit -->
+<!ELEMENT ID1server-request_init EMPTY>
+
+<!-- get a gi given a Seq-id -->
+<!ELEMENT ID1server-request_getgi (Seq-id)>
+
+<!-- given a gi, get the Seq-entry -->
+<!ELEMENT ID1server-request_getsefromgi (ID1server-maxcomplex)>
+
+<!-- DlFini -->
+<!ELEMENT ID1server-request_fini EMPTY>
+
+<!--get all Seq-ids of given gi -->
+<!ELEMENT ID1server-request_getseqidsfromgi (%INTEGER;)>
+
+<!--get an historical list of gis  -->
+<!ELEMENT ID1server-request_getgihist (%INTEGER;)>
+
+<!--get a revision history of gi -->
+<!ELEMENT ID1server-request_getgirev (%INTEGER;)>
+
+<!--get a state of gi -->
+<!ELEMENT ID1server-request_getgistate (%INTEGER;)>
+<!--  Complexity stuff will be for ID1 -->
+<!ELEMENT ID1server-request_getsewithinfo (ID1server-maxcomplex)>
+<!--  Complexity stuff will be for ID1 -->
+<!ELEMENT ID1server-request_getblobinfo (ID1server-maxcomplex)>
+
+<!--  Complexity stuff will be for ID1 -->
+<!ELEMENT ID1server-maxcomplex (
+        ID1server-maxcomplex_maxplex, 
+        ID1server-maxcomplex_gi, 
+        ID1server-maxcomplex_ent?, 
+        ID1server-maxcomplex_sat?)>
+
+<!ELEMENT ID1server-maxcomplex_maxplex (Entry-complexities)>
+
+<!ELEMENT ID1server-maxcomplex_gi (%INTEGER;)>
+
+<!-- needed when you want to retrieve a given ent -->
+<!ELEMENT ID1server-maxcomplex_ent (%INTEGER;)>
+
+<!-- satellite 0-id,1-dbEST -->
+<!ELEMENT ID1server-maxcomplex_sat (#PCDATA)>
+
+
+<!ELEMENT Entry-complexities (%INTEGER;)>
+
+<!--
+    entry	-  the "natural" entry for this (nuc-prot) 
+    bioseq	-  only the bioseq identified
+    bioseq-set	-  any seg-set it may be part of
+    nuc-prot	-  any nuc-prot it may be part of
+-->
+<!ATTLIST Entry-complexities value (
+        entry |
+        bioseq |
+        bioseq-set |
+        nuc-prot |
+        pub-set
+        ) #IMPLIED >
+
+
+
+<!ELEMENT ID1Seq-hist (
+        ID1Seq-hist_hist)>
+
+<!ELEMENT ID1Seq-hist_hist (Seq-hist)>
+
+
+<!ELEMENT ID1server-back (
+        ID1server-back_init | 
+        ID1server-back_error | 
+        ID1server-back_gotgi | 
+        ID1server-back_gotseqentry | 
+        ID1server-back_gotdeadseqentry | 
+        ID1server-back_fini | 
+        ID1server-back_gistate | 
+        ID1server-back_ids | 
+        ID1server-back_gihist | 
+        ID1server-back_girevhist | 
+        ID1server-back_gotsewithinfo | 
+        ID1server-back_gotblobinfo)>
+
+<!-- DlInit -->
+<!ELEMENT ID1server-back_init EMPTY>
+
+<!ELEMENT ID1server-back_error (%INTEGER;)>
+
+<!ELEMENT ID1server-back_gotgi (%INTEGER;)>
+
+<!-- live -->
+<!ELEMENT ID1server-back_gotseqentry (Seq-entry)>
+
+<!-- dead -->
+<!ELEMENT ID1server-back_gotdeadseqentry (Seq-entry)>
+
+<!-- DlFini -->
+<!ELEMENT ID1server-back_fini EMPTY>
+
+<!ELEMENT ID1server-back_gistate (%INTEGER;)>
+
+<!ELEMENT ID1server-back_ids (Seq-id*)>
+
+<!--
+ because hand crafted Seq-hist does not follow 
+ same conventions 
+-->
+<!ELEMENT ID1server-back_gihist (ID1Seq-hist*)>
+
+<!ELEMENT ID1server-back_girevhist (ID1Seq-hist*)>
+
+<!ELEMENT ID1server-back_gotsewithinfo (ID1SeqEntry-info)>
+
+<!ELEMENT ID1server-back_gotblobinfo (ID1blob-info)>
+
+
+<!ELEMENT ID1server-debug (ID1server-back*)>
+
+
+<!ELEMENT ID1blob-info (
+        ID1blob-info_gi, 
+        ID1blob-info_sat, 
+        ID1blob-info_sat-key, 
+        ID1blob-info_satname, 
+        ID1blob-info_suppress, 
+        ID1blob-info_withdrawn, 
+        ID1blob-info_confidential, 
+        ID1blob-info_blob-state, 
+        ID1blob-info_comment?, 
+        ID1blob-info_extfeatmask?)>
+
+<!ELEMENT ID1blob-info_gi (%INTEGER;)>
+
+<!ELEMENT ID1blob-info_sat (%INTEGER;)>
+
+<!ELEMENT ID1blob-info_sat-key (%INTEGER;)>
+
+<!ELEMENT ID1blob-info_satname (#PCDATA)>
+
+<!ELEMENT ID1blob-info_suppress (%INTEGER;)>
+
+<!ELEMENT ID1blob-info_withdrawn (%INTEGER;)>
+
+<!ELEMENT ID1blob-info_confidential (%INTEGER;)>
+
+<!--
+ blob-state now contains blob version info.
+ it's actually minutes from 01/01/1970
+ and it's negative if blob is dead.
+-->
+<!ELEMENT ID1blob-info_blob-state (%INTEGER;)>
+
+<!-- public comment for withdrawn record  -->
+<!ELEMENT ID1blob-info_comment (#PCDATA)>
+
+<!-- mask for external features (SNP,...) -->
+<!ELEMENT ID1blob-info_extfeatmask (%INTEGER;)>
+
+
+<!ELEMENT ID1SeqEntry-info (
+        ID1SeqEntry-info_blob-info, 
+        ID1SeqEntry-info_blob?)>
+
+<!ELEMENT ID1SeqEntry-info_blob-info (ID1blob-info)>
+
+<!ELEMENT ID1SeqEntry-info_blob (Seq-entry)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_ID2Access.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_ID2Access.dtd
new file mode 100644
index 0000000..4adbfa5
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_ID2Access.dtd
@@ -0,0 +1,95 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "id2.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_ID2Access.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_ID2Access_module PUBLIC "-//NCBI//NCBI ID2Access Module//EN" "NCBI_ID2Access.mod.dtd">
+%NCBI_ID2Access_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_Seq_split_module PUBLIC "-//NCBI//NCBI Seq split Module//EN" "NCBI_Seq_split.mod.dtd">
+%NCBI_Seq_split_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Seqset_module PUBLIC "-//NCBI//NCBI Seqset Module//EN" "NCBI_Seqset.mod.dtd">
+%NCBI_Seqset_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_ID2Access.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_ID2Access.mod.dtd
new file mode 100644
index 0000000..5d5ecf7
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_ID2Access.mod.dtd
@@ -0,0 +1,759 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "id2.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 08/02/2010 23:05:14
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-ID2Access"
+================================================= -->
+
+<!--
+$Revision: 196770 $
+********************************************************************
+
+  Network Id server network access
+  Vasilchenko 2003
+
+
+*********************************************************************
+
+  ID2.asn
+
+     messages for id server network access
+
+*********************************************************************
+-->
+
+<!-- Elements referenced from other modules:
+          Seq-id,
+          Seq-loc FROM NCBI-Seqloc,
+          ID2S-Chunk-Id,
+          ID2S-Seq-annot-Info FROM NCBI-Seq-split -->
+<!-- ============================================ -->
+
+<!--
+*********************************************************************
+ request types
+*********************************************************************
+ Requests are sent in packets to allow sending several requests at once
+ to avoid network latency, without possiblity of deadlock with server.
+ Server will not start sending replies until it will read the whole packet.
+-->
+<!ELEMENT ID2-Request-Packet (ID2-Request*)>
+
+
+<!ELEMENT ID2-Request (
+        ID2-Request_serial-number?, 
+        ID2-Request_params?, 
+        ID2-Request_request)>
+
+<!--
+ request's serial number, can be used in asynchronic clients
+ server should copy it to corresponding field in reply
+-->
+<!ELEMENT ID2-Request_serial-number (%INTEGER;)>
+
+<!ELEMENT ID2-Request_params (ID2-Params)>
+
+<!ELEMENT ID2-Request_request (
+        ID2-Request_request_init | 
+        ID2-Request_request_get-packages | 
+        ID2-Request_request_get-seq-id | 
+        ID2-Request_request_get-blob-id | 
+        ID2-Request_request_get-blob-info | 
+        ID2-Request_request_reget-blob | 
+        ID2-Request_request_get-chunks)>
+
+<!ELEMENT ID2-Request_request_init EMPTY>
+<!--
+ Request for set of params packages know by server.
+ Packages can be used to abbreviate parameters of request.
+-->
+<!ELEMENT ID2-Request_request_get-packages (ID2-Request-Get-Packages)>
+<!--
+ Requested sequence ID, can be any string or Seq-id.
+ This request will be replied with one or more ID2-Reply-Get-Seq-id.
+-->
+<!ELEMENT ID2-Request_request_get-seq-id (ID2-Request-Get-Seq-id)>
+<!--
+ Return blob-id with specified seq-id.
+ This request with be replied with one or more ID2-Reply-Get-Blob-Id.
+-->
+<!ELEMENT ID2-Request_request_get-blob-id (ID2-Request-Get-Blob-Id)>
+<!--
+ Return some information related to the blob.
+ This request with be replied with one or more of:
+   ID2-Reply-Get-Blob-Seq-ids - if requested by get-seq-ids field
+   ID2-Reply-Get-Blob         - if requested by get-data field
+   ID2S-Reply-Get-Split-Info
+   ID2S-Reply-Get-Chunk
+ Last two can be sent in addition to ID2-Reply-Get-Blob
+ if the blob is split on the server.
+ The replies are made separate to allow server to create replies easier
+ from precalculated data. Each of these replies have ID2-Reply-Data field.
+-->
+<!ELEMENT ID2-Request_request_get-blob-info (ID2-Request-Get-Blob-Info)>
+<!--
+ This is similar to FTP reget command.
+ It may be unsupported by server.
+ It's defined only for plain blobs (returned in ID2-Reply-Get-Blob)
+ as all split data comes in small chunks, so reget doesn't make sense.
+-->
+<!ELEMENT ID2-Request_request_reget-blob (ID2-Request-ReGet-Blob)>
+<!--
+ Request for specific chunks.
+ Server will reply with one or more ID2S-Reply-Get-Chunk.
+-->
+<!ELEMENT ID2-Request_request_get-chunks (ID2S-Request-Get-Chunks)>
+
+<!--
+ Request for set of params packages know by server.
+ Packages can be used to abbreviate parameters of request.
+-->
+<!ELEMENT ID2-Request-Get-Packages (
+        ID2-Request-Get-Packages_names?, 
+        ID2-Request-Get-Packages_no-contents?)>
+
+<!--
+ return known packages from this list
+ if unset - return all known packages
+-->
+<!ELEMENT ID2-Request-Get-Packages_names (ID2-Request-Get-Packages_names_E*)>
+
+
+<!ELEMENT ID2-Request-Get-Packages_names_E (#PCDATA)>
+
+<!-- return packages' names only -->
+<!ELEMENT ID2-Request-Get-Packages_no-contents EMPTY>
+
+<!--
+ Requested sequence ID, can be any string or Seq-id.
+ This request will be replied with one or more ID2-Reply-Get-Seq-id.
+-->
+<!ELEMENT ID2-Request-Get-Seq-id (
+        ID2-Request-Get-Seq-id_seq-id, 
+        ID2-Request-Get-Seq-id_seq-id-type?)>
+
+<!ELEMENT ID2-Request-Get-Seq-id_seq-id (ID2-Seq-id)>
+
+<!ELEMENT ID2-Request-Get-Seq-id_seq-id-type (%INTEGER;)>
+
+<!--
+    any	-  return any qualified Seq-id
+    gi	-  gi is preferred
+    text	-  text Seq-id (accession etc) is preferred
+    general	-  general Seq-id is preferred
+    all	-  return all qualified Seq-ids of the sequence
+    label	-  return a sequence string label as string
+         in general id with db "LABEL"
+    taxid	-  return a sequence taxonomy ID as integer
+         in general id with db "TAXID"
+-->
+<!ATTLIST ID2-Request-Get-Seq-id_seq-id-type value (
+        any |
+        gi |
+        text |
+        general |
+        all |
+        label |
+        taxid
+        ) #IMPLIED >
+
+
+
+<!ELEMENT ID2-Seq-id (
+        ID2-Seq-id_string | 
+        ID2-Seq-id_seq-id)>
+
+<!ELEMENT ID2-Seq-id_string (#PCDATA)>
+
+<!ELEMENT ID2-Seq-id_seq-id (Seq-id)>
+
+<!--
+ Return blob-id with specified seq-id.
+ This request with be replied with one or more ID2-Reply-Get-Blob-Id.
+-->
+<!ELEMENT ID2-Request-Get-Blob-Id (
+        ID2-Request-Get-Blob-Id_seq-id, 
+        ID2-Request-Get-Blob-Id_sources?, 
+        ID2-Request-Get-Blob-Id_external?)>
+
+<!-- id can be supplied by inner request -->
+<!ELEMENT ID2-Request-Get-Blob-Id_seq-id (ID2-Request-Get-Seq-id)>
+
+<!-- return id of blob with sequence -->
+<!ELEMENT ID2-Request-Get-Blob-Id_sources (ID2-Request-Get-Blob-Id_sources_E*)>
+
+
+<!ELEMENT ID2-Request-Get-Blob-Id_sources_E (#PCDATA)>
+
+<!-- return Blob-Ids with external features on this Seq-id -->
+<!ELEMENT ID2-Request-Get-Blob-Id_external EMPTY>
+
+<!--
+ Return some information related to the blob.
+ This request with be replied with one or more of:
+   ID2-Reply-Get-Blob-Seq-ids - if requested by get-seq-ids field
+   ID2-Reply-Get-Blob         - if requested by get-data field
+   ID2S-Reply-Get-Split-Info
+   ID2S-Reply-Get-Chunk
+ Last two can be sent in addition to ID2-Reply-Get-Blob
+ if the blob is split on the server.
+ The replies are made separate to allow server to create replies easier
+ from precalculated data. Each of these replies have ID2-Reply-Data field.
+-->
+<!ELEMENT ID2-Request-Get-Blob-Info (
+        ID2-Request-Get-Blob-Info_blob-id, 
+        ID2-Request-Get-Blob-Info_get-seq-ids?, 
+        ID2-Request-Get-Blob-Info_get-data?)>
+<!-- id can be supplied by inner request -->
+<!ELEMENT ID2-Request-Get-Blob-Info_blob-id (
+        ID2-Request-Get-Blob-Info_blob-id_blob-id | 
+        ID2-Request-Get-Blob-Info_blob-id_resolve)>
+
+<!-- id can be supplied by inner request -->
+<!ELEMENT ID2-Request-Get-Blob-Info_blob-id_blob-id (ID2-Blob-Id)>
+<!-- generate blob-ids from request -->
+<!ELEMENT ID2-Request-Get-Blob-Info_blob-id_resolve (
+        ID2-Request-Get-Blob-Info_blob-id_resolve_request, 
+        ID2-Request-Get-Blob-Info_blob-id_resolve_exclude-blobs?)>
+<!--
+ Return blob-id with specified seq-id.
+ This request with be replied with one or more ID2-Reply-Get-Blob-Id.
+-->
+<!ELEMENT ID2-Request-Get-Blob-Info_blob-id_resolve_request (ID2-Request-Get-Blob-Id)>
+
+<!-- server will not send blobs listed here -->
+<!ELEMENT ID2-Request-Get-Blob-Info_blob-id_resolve_exclude-blobs (ID2-Blob-Id*)>
+
+<!-- return in addition list of Seq-ids also resolving to this blob -->
+<!ELEMENT ID2-Request-Get-Blob-Info_get-seq-ids EMPTY>
+
+<!--
+ level of details requested immediately
+ server will send relevant chunks if blob is splitted
+-->
+<!ELEMENT ID2-Request-Get-Blob-Info_get-data (ID2-Get-Blob-Details)>
+
+<!--
+ This is similar to FTP reget command.
+ It may be unsupported by server.
+ It's defined only for plain blobs (returned in ID2-Reply-Get-Blob)
+ as all split data comes in small chunks, so reget doesn't make sense.
+-->
+<!ELEMENT ID2-Request-ReGet-Blob (
+        ID2-Request-ReGet-Blob_blob-id, 
+        ID2-Request-ReGet-Blob_split-version, 
+        ID2-Request-ReGet-Blob_offset)>
+<!--
+*********************************************************************
+ utility types
+*********************************************************************
+-->
+<!ELEMENT ID2-Request-ReGet-Blob_blob-id (ID2-Blob-Id)>
+
+<!-- blob split version to resend -->
+<!ELEMENT ID2-Request-ReGet-Blob_split-version (%INTEGER;)>
+
+<!-- start offset of data to get -->
+<!ELEMENT ID2-Request-ReGet-Blob_offset (%INTEGER;)>
+
+<!--
+ Request for specific chunks.
+ Server will reply with one or more ID2S-Reply-Get-Chunk.
+-->
+<!ELEMENT ID2S-Request-Get-Chunks (
+        ID2S-Request-Get-Chunks_blob-id, 
+        ID2S-Request-Get-Chunks_chunks, 
+        ID2S-Request-Get-Chunks_split-version?)>
+<!--
+*********************************************************************
+ utility types
+*********************************************************************
+-->
+<!ELEMENT ID2S-Request-Get-Chunks_blob-id (ID2-Blob-Id)>
+
+<!-- requests for specific chunks of splitted blob -->
+<!ELEMENT ID2S-Request-Get-Chunks_chunks (ID2S-Chunk-Id*)>
+
+<!-- blob split version -->
+<!ELEMENT ID2S-Request-Get-Chunks_split-version (%INTEGER;)>
+
+<!--
+ The following structure describes what parts of blob are required
+ immediately after ID2-Request-Get-Blob-Info in case blob is split.
+ Seq-entry level will have probably the same values as Entry-complexities.
+-->
+<!ELEMENT ID2-Get-Blob-Details (
+        ID2-Get-Blob-Details_location?, 
+        ID2-Get-Blob-Details_seq-class-level?, 
+        ID2-Get-Blob-Details_descr-level?, 
+        ID2-Get-Blob-Details_descr-type-mask?, 
+        ID2-Get-Blob-Details_annot-type-mask?, 
+        ID2-Get-Blob-Details_feat-type-mask?, 
+        ID2-Get-Blob-Details_sequence-level?)>
+
+<!-- reference location for details - can be only part of sequence -->
+<!ELEMENT ID2-Get-Blob-Details_location (Seq-loc)>
+
+<!-- Seq-entry level for all data except descriptors (sequnence, annots) -->
+<!ELEMENT ID2-Get-Blob-Details_seq-class-level (%INTEGER;)>
+
+<!-- Seq-entry level for descriptors -->
+<!ELEMENT ID2-Get-Blob-Details_descr-level (%INTEGER;)>
+
+<!-- mask of descriptor types - see Seqdesc for variants' values -->
+<!ELEMENT ID2-Get-Blob-Details_descr-type-mask (%INTEGER;)>
+
+<!-- mask of annotation types - see Seq-annot.data for values -->
+<!ELEMENT ID2-Get-Blob-Details_annot-type-mask (%INTEGER;)>
+
+<!-- mask of feature types - see SeqFeatData for values -->
+<!ELEMENT ID2-Get-Blob-Details_feat-type-mask (%INTEGER;)>
+<!-- level of sequence data to load -->
+<!ELEMENT ID2-Get-Blob-Details_sequence-level %ENUM;>
+
+<!--
+    none	-  not required
+    seq-map	-  at least seq-map
+    seq-data	-  include seq-data
+-->
+<!ATTLIST ID2-Get-Blob-Details_sequence-level value (
+        none |
+        seq-map |
+        seq-data
+        ) #REQUIRED >
+
+
+<!--
+*********************************************************************
+ reply types
+*********************************************************************
+-->
+<!ELEMENT ID2-Reply (
+        ID2-Reply_serial-number?, 
+        ID2-Reply_params?, 
+        ID2-Reply_error?, 
+        ID2-Reply_end-of-reply?, 
+        ID2-Reply_reply, 
+        ID2-Reply_discard?)>
+
+<!-- request's serial number, copy from request -->
+<!ELEMENT ID2-Reply_serial-number (%INTEGER;)>
+
+<!ELEMENT ID2-Reply_params (ID2-Params)>
+
+<!ELEMENT ID2-Reply_error (ID2-Error*)>
+
+<!--
+ true if this reply is the last one for the request
+ false if more replies will follow
+-->
+<!ELEMENT ID2-Reply_end-of-reply EMPTY>
+<!--
+ reply data moved at the end to make it easier to construct
+ the reply data manually from precalculated data
+-->
+<!ELEMENT ID2-Reply_reply (
+        ID2-Reply_reply_init | 
+        ID2-Reply_reply_empty | 
+        ID2-Reply_reply_get-package | 
+        ID2-Reply_reply_get-seq-id | 
+        ID2-Reply_reply_get-blob-id | 
+        ID2-Reply_reply_get-blob-seq-ids | 
+        ID2-Reply_reply_get-blob | 
+        ID2-Reply_reply_reget-blob | 
+        ID2-Reply_reply_get-split-info | 
+        ID2-Reply_reply_get-chunk)>
+
+<!ELEMENT ID2-Reply_reply_init EMPTY>
+
+<!ELEMENT ID2-Reply_reply_empty EMPTY>
+<!-- Reply to ID2-Request-Get-Packages. -->
+<!ELEMENT ID2-Reply_reply_get-package (ID2-Reply-Get-Package)>
+<!-- Reply to ID2-Request-Get-Seq-id. -->
+<!ELEMENT ID2-Reply_reply_get-seq-id (ID2-Reply-Get-Seq-id)>
+<!-- Reply to ID2-Request-Get-Blob-Id. -->
+<!ELEMENT ID2-Reply_reply_get-blob-id (ID2-Reply-Get-Blob-Id)>
+<!-- Reply to ID2-Request-Get-Blob-Info. -->
+<!ELEMENT ID2-Reply_reply_get-blob-seq-ids (ID2-Reply-Get-Blob-Seq-ids)>
+<!-- Reply to ID2-Request-Get-Blob-Info. -->
+<!ELEMENT ID2-Reply_reply_get-blob (ID2-Reply-Get-Blob)>
+<!-- Reply to ID2-Request-ReGet-Blob. -->
+<!ELEMENT ID2-Reply_reply_reget-blob (ID2-Reply-ReGet-Blob)>
+<!-- Reply to ID2-Request-Get-Blob-Info. -->
+<!ELEMENT ID2-Reply_reply_get-split-info (ID2S-Reply-Get-Split-Info)>
+<!-- Reply to ID2S-Request-Get-Chunks. -->
+<!ELEMENT ID2-Reply_reply_get-chunk (ID2S-Reply-Get-Chunk)>
+<!--
+ additional error flag if the reply is broken in the middle
+ of transfer.
+ 'last-octet-string', and 'nothing' mean that
+ client may use ReGet request to get the remaining data.
+-->
+<!ELEMENT ID2-Reply_discard %ENUM;>
+
+<!--
+    reply	-  whole reply should be discarded
+    last-octet-string	-  all data in embedded ID2-Reply-Data
+         except last OCTET STRING is correct
+    nothing	-  all data in embedded ID2-Reply-Data
+         is correct, but is incomplete
+-->
+<!ATTLIST ID2-Reply_discard value (
+        reply |
+        last-octet-string |
+        nothing
+        ) #REQUIRED >
+
+
+
+<!ELEMENT ID2-Error (
+        ID2-Error_severity, 
+        ID2-Error_retry-delay?, 
+        ID2-Error_message?)>
+
+<!ELEMENT ID2-Error_severity %ENUM;>
+
+<!--
+    warning	-  nothing harmful happened
+    failed-command	-  command cannot be completed this time
+    failed-connection	-  connection cannot be reused, reconnect is required
+    failed-server	-  server cannot be used for a while
+    no-data	-  resolve request gives no data
+         probably temporarily (see retry-delay field)
+    restricted-data	-  data exists but client doesn't have permission to get it
+    unsupported-command	-  this request type is not supported by server
+    invalid-arguments	-  error in request packet, cannot retry
+-->
+<!ATTLIST ID2-Error_severity value (
+        warning |
+        failed-command |
+        failed-connection |
+        failed-server |
+        no-data |
+        restricted-data |
+        unsupported-command |
+        invalid-arguments
+        ) #REQUIRED >
+
+
+<!-- client may retry the request after specified time in seconds  -->
+<!ELEMENT ID2-Error_retry-delay (%INTEGER;)>
+
+<!ELEMENT ID2-Error_message (#PCDATA)>
+
+<!-- Reply to ID2-Request-Get-Packages. -->
+<!ELEMENT ID2-Reply-Get-Package (
+        ID2-Reply-Get-Package_name, 
+        ID2-Reply-Get-Package_params?)>
+
+<!ELEMENT ID2-Reply-Get-Package_name (#PCDATA)>
+
+<!ELEMENT ID2-Reply-Get-Package_params (ID2-Params)>
+
+<!-- Reply to ID2-Request-Get-Seq-id. -->
+<!ELEMENT ID2-Reply-Get-Seq-id (
+        ID2-Reply-Get-Seq-id_request, 
+        ID2-Reply-Get-Seq-id_seq-id?, 
+        ID2-Reply-Get-Seq-id_end-of-reply?)>
+
+<!-- copy of request -->
+<!ELEMENT ID2-Reply-Get-Seq-id_request (ID2-Request-Get-Seq-id)>
+
+<!--
+ resolved Seq-id
+ not set if error occurred
+-->
+<!ELEMENT ID2-Reply-Get-Seq-id_seq-id (Seq-id*)>
+
+<!-- this Seq-id is the last one in the request -->
+<!ELEMENT ID2-Reply-Get-Seq-id_end-of-reply EMPTY>
+
+
+<!ELEMENT ID2-Blob-State %ENUM;>
+<!ATTLIST ID2-Blob-State value (
+        live |
+        suppressed-temp |
+        suppressed |
+        dead |
+        protected |
+        withdrawn
+        ) #REQUIRED >
+
+
+<!-- Reply to ID2-Request-Get-Blob-Id. -->
+<!ELEMENT ID2-Reply-Get-Blob-Id (
+        ID2-Reply-Get-Blob-Id_seq-id, 
+        ID2-Reply-Get-Blob-Id_blob-id?, 
+        ID2-Reply-Get-Blob-Id_split-version?, 
+        ID2-Reply-Get-Blob-Id_annot-info?, 
+        ID2-Reply-Get-Blob-Id_end-of-reply?, 
+        ID2-Reply-Get-Blob-Id_blob-state?)>
+
+<!-- requested Seq-id -->
+<!ELEMENT ID2-Reply-Get-Blob-Id_seq-id (Seq-id)>
+
+<!-- result -->
+<!ELEMENT ID2-Reply-Get-Blob-Id_blob-id (ID2-Blob-Id)>
+
+<!--
+ version of split data
+ (0 for non split)
+-->
+<!ELEMENT ID2-Reply-Get-Blob-Id_split-version (%INTEGER;)>
+
+<!--
+ annotation types in this blob
+ annotation are unknown if this field is omitted
+-->
+<!ELEMENT ID2-Reply-Get-Blob-Id_annot-info (ID2S-Seq-annot-Info*)>
+
+<!-- this Blob-id is the last one in the request -->
+<!ELEMENT ID2-Reply-Get-Blob-Id_end-of-reply EMPTY>
+
+<!-- state of the blob, 0 or missing means regular live data -->
+<!ELEMENT ID2-Reply-Get-Blob-Id_blob-state (%INTEGER;)>
+
+<!-- Reply to ID2-Request-Get-Blob-Info. -->
+<!ELEMENT ID2-Reply-Get-Blob-Seq-ids (
+        ID2-Reply-Get-Blob-Seq-ids_blob-id, 
+        ID2-Reply-Get-Blob-Seq-ids_ids?)>
+<!--
+*********************************************************************
+ utility types
+*********************************************************************
+-->
+<!ELEMENT ID2-Reply-Get-Blob-Seq-ids_blob-id (ID2-Blob-Id)>
+
+<!--
+ list of Seq-id resolving to this Blob-Id
+ in compressed format
+-->
+<!ELEMENT ID2-Reply-Get-Blob-Seq-ids_ids (ID2-Reply-Data)>
+
+<!-- Reply to ID2-Request-Get-Blob-Info. -->
+<!ELEMENT ID2-Reply-Get-Blob (
+        ID2-Reply-Get-Blob_blob-id, 
+        ID2-Reply-Get-Blob_split-version?, 
+        ID2-Reply-Get-Blob_data?)>
+<!--
+*********************************************************************
+ utility types
+*********************************************************************
+-->
+<!ELEMENT ID2-Reply-Get-Blob_blob-id (ID2-Blob-Id)>
+
+<!--
+ version of split data
+ (0 for non split)
+-->
+<!ELEMENT ID2-Reply-Get-Blob_split-version (%INTEGER;)>
+
+<!--
+ whole blob or blob skeleton
+ not set if error occurred
+-->
+<!ELEMENT ID2-Reply-Get-Blob_data (ID2-Reply-Data)>
+
+<!-- Reply to ID2-Request-Get-Blob-Info. -->
+<!ELEMENT ID2S-Reply-Get-Split-Info (
+        ID2S-Reply-Get-Split-Info_blob-id, 
+        ID2S-Reply-Get-Split-Info_split-version, 
+        ID2S-Reply-Get-Split-Info_data?)>
+<!--
+*********************************************************************
+ utility types
+*********************************************************************
+-->
+<!ELEMENT ID2S-Reply-Get-Split-Info_blob-id (ID2-Blob-Id)>
+
+<!-- version of split data -->
+<!ELEMENT ID2S-Reply-Get-Split-Info_split-version (%INTEGER;)>
+
+<!--
+ blob split info
+ not set if error occurred
+-->
+<!ELEMENT ID2S-Reply-Get-Split-Info_data (ID2-Reply-Data)>
+
+<!-- Reply to ID2-Request-ReGet-Blob. -->
+<!ELEMENT ID2-Reply-ReGet-Blob (
+        ID2-Reply-ReGet-Blob_blob-id, 
+        ID2-Reply-ReGet-Blob_split-version, 
+        ID2-Reply-ReGet-Blob_offset, 
+        ID2-Reply-ReGet-Blob_data?)>
+<!--
+*********************************************************************
+ utility types
+*********************************************************************
+-->
+<!ELEMENT ID2-Reply-ReGet-Blob_blob-id (ID2-Blob-Id)>
+
+<!-- version of data split -->
+<!ELEMENT ID2-Reply-ReGet-Blob_split-version (%INTEGER;)>
+
+<!-- offset of data -->
+<!ELEMENT ID2-Reply-ReGet-Blob_offset (%INTEGER;)>
+
+<!--
+ blob split info
+ not set if error occurred
+-->
+<!ELEMENT ID2-Reply-ReGet-Blob_data (ID2-Reply-Data)>
+
+<!-- Reply to ID2S-Request-Get-Chunks. -->
+<!ELEMENT ID2S-Reply-Get-Chunk (
+        ID2S-Reply-Get-Chunk_blob-id, 
+        ID2S-Reply-Get-Chunk_chunk-id, 
+        ID2S-Reply-Get-Chunk_data?)>
+<!--
+*********************************************************************
+ utility types
+*********************************************************************
+-->
+<!ELEMENT ID2S-Reply-Get-Chunk_blob-id (ID2-Blob-Id)>
+
+<!-- id of chunk to send -->
+<!ELEMENT ID2S-Reply-Get-Chunk_chunk-id (ID2S-Chunk-Id)>
+
+<!--
+ chunk data
+ not set if error occurred
+-->
+<!ELEMENT ID2S-Reply-Get-Chunk_data (ID2-Reply-Data)>
+
+<!-- Data packing. -->
+<!ELEMENT ID2-Reply-Data (
+        ID2-Reply-Data_data-type?, 
+        ID2-Reply-Data_data-format?, 
+        ID2-Reply-Data_data-compression?, 
+        ID2-Reply-Data_data)>
+<!--
+ index of negotiated types
+ recommended types
+   Seq-entry,
+   ID2S-Split-Info,
+   ID2S-Chunk
+-->
+<!ELEMENT ID2-Reply-Data_data-type (%INTEGER;)>
+<!ATTLIST ID2-Reply-Data_data-type value (
+        seq-entry |
+        seq-annot |
+        id2s-split-info |
+        id2s-chunk
+        ) #IMPLIED >
+
+<!--
+ serialization format (ASN.1 binary, ASN.1 text)
+ index of negotiated formats
+-->
+<!ELEMENT ID2-Reply-Data_data-format (%INTEGER;)>
+<!ATTLIST ID2-Reply-Data_data-format value (
+        asn-binary |
+        asn-text |
+        xml
+        ) #IMPLIED >
+
+<!--
+ post serialization compression (plain, gzip, etc.)
+ index of negotiated compressions
+-->
+<!ELEMENT ID2-Reply-Data_data-compression (%INTEGER;)>
+<!ATTLIST ID2-Reply-Data_data-compression value (
+        none |
+        gzip |
+        nlmzip |
+        bzip2
+        ) #IMPLIED >
+
+
+<!-- data blob -->
+<!ELEMENT ID2-Reply-Data_data (ID2-Reply-Data_data_E*)>
+
+
+<!ELEMENT ID2-Reply-Data_data_E (%OCTETS;)>
+
+<!-- Data packed within ID2-Reply-Get-Blob-Seq-ids reply. -->
+<!ELEMENT ID2-Blob-Seq-ids (ID2-Blob-Seq-id*)>
+
+
+<!ELEMENT ID2-Blob-Seq-id (
+        ID2-Blob-Seq-id_seq-id, 
+        ID2-Blob-Seq-id_replaced?)>
+
+<!ELEMENT ID2-Blob-Seq-id_seq-id (Seq-id)>
+
+<!-- this Seq-id is replaced by sequence in another blob -->
+<!ELEMENT ID2-Blob-Seq-id_replaced EMPTY>
+
+<!--
+*********************************************************************
+ utility types
+*********************************************************************
+-->
+<!ELEMENT ID2-Blob-Id (
+        ID2-Blob-Id_sat, 
+        ID2-Blob-Id_sub-sat?, 
+        ID2-Blob-Id_sat-key, 
+        ID2-Blob-Id_version?)>
+
+<!ELEMENT ID2-Blob-Id_sat (%INTEGER;)>
+
+<!ELEMENT ID2-Blob-Id_sub-sat (%INTEGER;)>
+<!ATTLIST ID2-Blob-Id_sub-sat value (
+        main |
+        snp |
+        snp-graph |
+        cdd |
+        mgc |
+        hprd |
+        sts |
+        trna |
+        exon
+        ) #IMPLIED >
+
+
+<!ELEMENT ID2-Blob-Id_sat-key (%INTEGER;)>
+
+<!-- version of blob, optional in some requests -->
+<!ELEMENT ID2-Blob-Id_version (%INTEGER;)>
+
+
+<!ELEMENT ID2-Params (ID2-Param*)>
+
+
+<!ELEMENT ID2-Param (
+        ID2-Param_name, 
+        ID2-Param_value?, 
+        ID2-Param_type?)>
+
+<!ELEMENT ID2-Param_name (#PCDATA)>
+
+<!ELEMENT ID2-Param_value (ID2-Param_value_E*)>
+
+
+<!ELEMENT ID2-Param_value_E (#PCDATA)>
+
+<!ELEMENT ID2-Param_type %ENUM;>
+
+<!--
+    set-value	-  no response expected
+    get-value	-  this option is for client only
+         server replies with its value of param if known
+         server omits this param in reply if unknown to server
+    force-value	-  no direct response expected,
+         but if the param or its value is not supported
+         an error is reported and the request is not be completed
+    use-package	-  use named package
+         value should be unset
+-->
+<!ATTLIST ID2-Param_type value (
+        set-value |
+        get-value |
+        force-value |
+        use-package
+        ) #REQUIRED >
+
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_MedArchive.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_MedArchive.dtd
new file mode 100644
index 0000000..1082302
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_MedArchive.dtd
@@ -0,0 +1,35 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "mla.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_MedArchive.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_MedArchive_module PUBLIC "-//NCBI//NCBI MedArchive Module//EN" "NCBI_MedArchive.mod.dtd">
+%NCBI_MedArchive_module;
+
+<!ENTITY % NCBI_Medlars_module PUBLIC "-//NCBI//NCBI Medlars Module//EN" "NCBI_Medlars.mod.dtd">
+%NCBI_Medlars_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_PubMed_module PUBLIC "-//NCBI//NCBI PubMed Module//EN" "NCBI_PubMed.mod.dtd">
+%NCBI_PubMed_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_MedArchive.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_MedArchive.mod.dtd
new file mode 100644
index 0000000..b4c2701
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_MedArchive.mod.dtd
@@ -0,0 +1,271 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "mla.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 11/02/2010 23:04:52
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-MedArchive"
+================================================= -->
+
+<!--
+$Revision: 209893 $
+********************************************************************
+
+  Network MEDLINE Archive message formats
+  Ostell 1993
+
+
+*********************************************************************
+
+  mla.asn
+
+     messages for medline archive data access
+
+*********************************************************************
+-->
+
+<!-- Elements referenced from other modules:
+          Medline-entry FROM NCBI-Medline,
+          Medlars-entry FROM NCBI-Medlars,
+          Pubmed-entry FROM NCBI-PubMed,
+          Medline-si FROM NCBI-Medline,
+          Pub FROM NCBI-Pub,
+          Title,
+          PubMedId FROM NCBI-Biblio -->
+<!-- ============================================ -->
+
+<!--
+**********************************
+ requests
+
+-->
+<!ELEMENT Mla-request (
+        Mla-request_init | 
+        Mla-request_getmle | 
+        Mla-request_getpub | 
+        Mla-request_gettitle | 
+        Mla-request_citmatch | 
+        Mla-request_fini | 
+        Mla-request_getmriuids | 
+        Mla-request_getaccuids | 
+        Mla-request_uidtopmid | 
+        Mla-request_pmidtouid | 
+        Mla-request_getmlepmid | 
+        Mla-request_getpubpmid | 
+        Mla-request_citmatchpmid | 
+        Mla-request_getmripmids | 
+        Mla-request_getaccpmids | 
+        Mla-request_citlstpmids | 
+        Mla-request_getmleuid | 
+        Mla-request_getmlrpmid | 
+        Mla-request_getmlruid)>
+
+<!-- DlInit -->
+<!ELEMENT Mla-request_init EMPTY>
+
+<!-- get MedlineEntry -->
+<!ELEMENT Mla-request_getmle (%INTEGER;)>
+
+<!-- get citation by muid -->
+<!ELEMENT Mla-request_getpub (%INTEGER;)>
+
+<!-- match titles -->
+<!ELEMENT Mla-request_gettitle (Title-msg)>
+
+<!-- -->
+<!ELEMENT Mla-request_citmatch (Pub)>
+
+<!-- DlFini -->
+<!ELEMENT Mla-request_fini EMPTY>
+
+<!-- Get MUIDs for an MRI -->
+<!ELEMENT Mla-request_getmriuids (%INTEGER;)>
+
+<!-- Get MUIDs for an Accessions -->
+<!ELEMENT Mla-request_getaccuids (Medline-si)>
+
+<!-- get PMID for MUID -->
+<!ELEMENT Mla-request_uidtopmid (%INTEGER;)>
+
+<!-- get MUID for PMID -->
+<!ELEMENT Mla-request_pmidtouid (PubMedId)>
+
+<!-- get MedlineEntry by PubMed id -->
+<!ELEMENT Mla-request_getmlepmid (PubMedId)>
+
+<!-- get citation by PubMed id -->
+<!ELEMENT Mla-request_getpubpmid (PubMedId)>
+
+<!-- citation match, PMID on out -->
+<!ELEMENT Mla-request_citmatchpmid (Pub)>
+
+<!-- get PMIDs for an MRI -->
+<!ELEMENT Mla-request_getmripmids (%INTEGER;)>
+
+<!-- get PMIDs for an Accessions -->
+<!ELEMENT Mla-request_getaccpmids (Medline-si)>
+
+<!-- generate list of PMID for Pub -->
+<!ELEMENT Mla-request_citlstpmids (Pub)>
+
+<!-- get MedlineEntry by Medline id -->
+<!ELEMENT Mla-request_getmleuid (%INTEGER;)>
+
+<!-- get MedlarsEntry by PubMed id -->
+<!ELEMENT Mla-request_getmlrpmid (PubMedId)>
+
+<!-- get MedlarsEntry by Medline id -->
+<!ELEMENT Mla-request_getmlruid (%INTEGER;)>
+
+<!--
+**********************************************************************
+
+  if request = all
+	if one row returned
+	   reply=all, return every column
+	else 
+	   reply=ml-jta for each row
+
+  if request = not-set, reply=ml-jta
+
+  otherwise,
+	if request != ml-jta
+	   if column exist, reply=column, else reply=ml-jta
+
+**********************************************************************
+-->
+<!ELEMENT Title-type %ENUM;>
+
+<!--
+    not-set	-  request=ml-jta (default), reply=not-found
+-->
+<!ATTLIST Title-type value (
+        not-set |
+        name |
+        tsub |
+        trans |
+        jta |
+        iso-jta |
+        ml-jta |
+        coden |
+        issn |
+        abr |
+        isbn |
+        all
+        ) #REQUIRED >
+
+
+<!-- Title match request/response -->
+<!ELEMENT Title-msg (
+        Title-msg_type, 
+        Title-msg_title)>
+
+<!-- type to get, or type returned -->
+<!ELEMENT Title-msg_type (Title-type)>
+
+<!-- title(s) to look up, or title(s) found -->
+<!ELEMENT Title-msg_title (Title)>
+
+
+<!ELEMENT Title-msg-list (
+        Title-msg-list_num, 
+        Title-msg-list_titles)>
+
+<!-- number of titles -->
+<!ELEMENT Title-msg-list_num (%INTEGER;)>
+
+<!ELEMENT Title-msg-list_titles (Title-msg*)>
+
+
+<!ELEMENT Error-val %ENUM;>
+
+<!--
+    not-found	-  Entry was not found
+    operational-error	-  A run-time operation error was occurred
+    cannot-connect-jrsrv	-  Cannot connect to Journal server
+    cannot-connect-pmdb	-  Cannot connect to PubMed
+    journal-not-found	-  Journal title not found
+    citation-not-found	-  Volume, Page and Author do not match any
+         article
+    citation-ambiguous	-  More than one article found
+    citation-too-many	-  Too many article was found
+    cannot-connect-searchbackend-jrsrv	-  Cannot connect to searchbackend Journals db
+    cannot-connect-searchbackend-pmdb	-  Cannot connect to searchbackend PubMed db
+    cannot-connect-docsumbackend	-  Cannot connect to docsumbackend
+-->
+<!ATTLIST Error-val value (
+        not-found |
+        operational-error |
+        cannot-connect-jrsrv |
+        cannot-connect-pmdb |
+        journal-not-found |
+        citation-not-found |
+        citation-ambiguous |
+        citation-too-many |
+        cannot-connect-searchbackend-jrsrv |
+        cannot-connect-searchbackend-pmdb |
+        cannot-connect-docsumbackend
+        ) #REQUIRED >
+
+
+
+<!ELEMENT Mla-back (
+        Mla-back_init | 
+        Mla-back_error | 
+        Mla-back_getmle | 
+        Mla-back_getpub | 
+        Mla-back_gettitle | 
+        Mla-back_citmatch | 
+        Mla-back_fini | 
+        Mla-back_getuids | 
+        Mla-back_getpmids | 
+        Mla-back_outuid | 
+        Mla-back_outpmid | 
+        Mla-back_getpme | 
+        Mla-back_getmlr)>
+
+<!-- DlInit -->
+<!ELEMENT Mla-back_init EMPTY>
+
+<!-- not found for getmle/getpub/citmatch -->
+<!ELEMENT Mla-back_error (Error-val)>
+
+<!-- got Medline Entry -->
+<!ELEMENT Mla-back_getmle (Medline-entry)>
+
+<!ELEMENT Mla-back_getpub (Pub)>
+
+<!-- match titles -->
+<!ELEMENT Mla-back_gettitle (Title-msg-list)>
+
+<!-- citation lookup muid or 0 -->
+<!ELEMENT Mla-back_citmatch (%INTEGER;)>
+
+<!-- DlFini -->
+<!ELEMENT Mla-back_fini EMPTY>
+
+<!-- got a set of MUIDs -->
+<!ELEMENT Mla-back_getuids (Mla-back_getuids_E*)>
+
+
+<!ELEMENT Mla-back_getuids_E (%INTEGER;)>
+
+<!-- got a set of PMIDs -->
+<!ELEMENT Mla-back_getpmids (Mla-back_getpmids_E*)>
+
+
+<!ELEMENT Mla-back_getpmids_E (%INTEGER;)>
+
+<!-- result muid or 0 if not found -->
+<!ELEMENT Mla-back_outuid (%INTEGER;)>
+
+<!-- result pmid or 0 if not found -->
+<!ELEMENT Mla-back_outpmid (PubMedId)>
+
+<!-- got Pubmed Entry -->
+<!ELEMENT Mla-back_getpme (Pubmed-entry)>
+
+<!-- got Medlars Entry -->
+<!ELEMENT Mla-back_getmlr (Medlars-entry)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Medlars.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Medlars.dtd
new file mode 100644
index 0000000..6d1410c
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Medlars.dtd
@@ -0,0 +1,23 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "medlars.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_Medlars.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medlars_module PUBLIC "-//NCBI//NCBI Medlars Module//EN" "NCBI_Medlars.mod.dtd">
+%NCBI_Medlars_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Medlars.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Medlars.mod.dtd
new file mode 100644
index 0000000..cb8d48a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Medlars.mod.dtd
@@ -0,0 +1,58 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "medlars.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Medlars"
+================================================= -->
+
+<!--
+$Revision: 6.0 $
+**********************************************************************
+
+  MEDLARS data definitions
+  Grigoriy Starchenko, 1997
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Medlars-entry,
+          Medlars-record -->
+
+<!-- Elements referenced from other modules:
+          PubMedId FROM NCBI-Biblio -->
+<!-- ============================================ -->
+
+<!-- a MEDLARS entry -->
+<!ELEMENT Medlars-entry (
+        Medlars-entry_pmid, 
+        Medlars-entry_muid?, 
+        Medlars-entry_recs)>
+
+<!-- All entries in PubMed must have it -->
+<!ELEMENT Medlars-entry_pmid (PubMedId)>
+
+<!-- Medline(OCCS) id -->
+<!ELEMENT Medlars-entry_muid (%INTEGER;)>
+
+<!-- List of Medlars records -->
+<!ELEMENT Medlars-entry_recs (Medlars-record*)>
+
+
+<!ELEMENT Medlars-record (
+        Medlars-record_code, 
+        Medlars-record_abbr?, 
+        Medlars-record_data)>
+
+<!-- Unit record field type integer form -->
+<!ELEMENT Medlars-record_code (%INTEGER;)>
+
+<!-- Unit record field type abbreviation form -->
+<!ELEMENT Medlars-record_abbr (#PCDATA)>
+
+<!-- Unit record data -->
+<!ELEMENT Medlars-record_data (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Medline.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Medline.dtd
new file mode 100644
index 0000000..9495345
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Medline.dtd
@@ -0,0 +1,23 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "medline.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_Medline.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Medline.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Medline.mod.dtd
new file mode 100644
index 0000000..b05a78e
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Medline.mod.dtd
@@ -0,0 +1,245 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "medline.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Medline"
+================================================= -->
+
+<!--
+$Revision: 6.0 $
+**********************************************************************
+
+  MEDLINE data definitions
+  James Ostell, 1990
+
+  enhanced in 1996 to support PubMed records as well by simply adding
+    the PubMedId and making MedlineId optional
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Medline-entry,
+          Medline-si -->
+
+<!-- Elements referenced from other modules:
+          Cit-art,
+          PubMedId FROM NCBI-Biblio,
+          Date FROM NCBI-General -->
+<!-- ============================================ -->
+
+<!--
+ a MEDLINE or PubMed entry
+ regular medline record
+-->
+<!ELEMENT Medline-entry (
+        Medline-entry_uid?, 
+        Medline-entry_em, 
+        Medline-entry_cit, 
+        Medline-entry_abstract?, 
+        Medline-entry_mesh?, 
+        Medline-entry_substance?, 
+        Medline-entry_xref?, 
+        Medline-entry_idnum?, 
+        Medline-entry_gene?, 
+        Medline-entry_pmid?, 
+        Medline-entry_pub-type?, 
+        Medline-entry_mlfield?, 
+        Medline-entry_status?)>
+
+<!-- MEDLINE UID, sometimes not yet available if from PubMed -->
+<!ELEMENT Medline-entry_uid (%INTEGER;)>
+
+<!-- Entry Month -->
+<!ELEMENT Medline-entry_em (Date)>
+
+<!-- article citation -->
+<!ELEMENT Medline-entry_cit (Cit-art)>
+
+<!ELEMENT Medline-entry_abstract (#PCDATA)>
+
+<!ELEMENT Medline-entry_mesh (Medline-mesh*)>
+
+<!ELEMENT Medline-entry_substance (Medline-rn*)>
+
+<!ELEMENT Medline-entry_xref (Medline-si*)>
+
+<!-- ID Number (grants, contracts) -->
+<!ELEMENT Medline-entry_idnum (Medline-entry_idnum_E*)>
+
+
+<!ELEMENT Medline-entry_idnum_E (#PCDATA)>
+
+<!ELEMENT Medline-entry_gene (Medline-entry_gene_E*)>
+
+
+<!ELEMENT Medline-entry_gene_E (#PCDATA)>
+
+<!-- MEDLINE records may include the PubMedId -->
+<!ELEMENT Medline-entry_pmid (PubMedId)>
+
+<!-- may show publication types (review, etc) -->
+<!ELEMENT Medline-entry_pub-type (Medline-entry_pub-type_E*)>
+
+
+<!ELEMENT Medline-entry_pub-type_E (#PCDATA)>
+
+<!-- additional Medline field types -->
+<!ELEMENT Medline-entry_mlfield (Medline-field*)>
+
+<!ELEMENT Medline-entry_status (%INTEGER;)>
+
+<!--
+    publisher	-  record as supplied by publisher
+    premedline	-  premedline record
+-->
+<!ATTLIST Medline-entry_status value (
+        publisher |
+        premedline |
+        medline
+        ) #IMPLIED >
+
+
+
+<!ELEMENT Medline-mesh (
+        Medline-mesh_mp?, 
+        Medline-mesh_term, 
+        Medline-mesh_qual?)>
+
+<!-- TRUE if main point (*) -->
+<!ELEMENT Medline-mesh_mp EMPTY>
+<!ATTLIST Medline-mesh_mp value ( true | false ) "false" >
+
+
+<!-- the MeSH term -->
+<!ELEMENT Medline-mesh_term (#PCDATA)>
+
+<!-- qualifiers -->
+<!ELEMENT Medline-mesh_qual (Medline-qual*)>
+
+
+<!ELEMENT Medline-qual (
+        Medline-qual_mp?, 
+        Medline-qual_subh)>
+
+<!-- TRUE if main point -->
+<!ELEMENT Medline-qual_mp EMPTY>
+<!ATTLIST Medline-qual_mp value ( true | false ) "false" >
+
+
+<!-- the subheading -->
+<!ELEMENT Medline-qual_subh (#PCDATA)>
+
+<!-- medline substance records -->
+<!ELEMENT Medline-rn (
+        Medline-rn_type, 
+        Medline-rn_cit?, 
+        Medline-rn_name)>
+<!-- type of record -->
+<!ELEMENT Medline-rn_type %ENUM;>
+
+<!--
+    cas	-  CAS number
+    ec	-  EC number
+-->
+<!ATTLIST Medline-rn_type value (
+        nameonly |
+        cas |
+        ec
+        ) #REQUIRED >
+
+
+<!-- CAS or EC number if present -->
+<!ELEMENT Medline-rn_cit (#PCDATA)>
+
+<!-- name (always present) -->
+<!ELEMENT Medline-rn_name (#PCDATA)>
+
+<!-- medline cross reference records -->
+<!ELEMENT Medline-si (
+        Medline-si_type, 
+        Medline-si_cit?)>
+<!-- type of xref -->
+<!ELEMENT Medline-si_type %ENUM;>
+
+<!--
+    ddbj	-  DNA Data Bank of Japan
+    carbbank	-  Carbohydrate Structure Database
+    embl	-  EMBL Data Library
+    hdb	-  Hybridoma Data Bank
+    genbank	-  GenBank
+    hgml	-  Human Gene Map Library
+    mim	-  Mendelian Inheritance in Man
+    msd	-  Microbial Strains Database
+    pdb	-  Protein Data Bank (Brookhaven)
+    pir	-  Protein Identification Resource
+    prfseqdb	-  Protein Research Foundation (Japan)
+    psd	-  Protein Sequence Database (Japan)
+    swissprot	-  SwissProt
+    gdb	-  Genome Data Base
+-->
+<!ATTLIST Medline-si_type value (
+        ddbj |
+        carbbank |
+        embl |
+        hdb |
+        genbank |
+        hgml |
+        mim |
+        msd |
+        pdb |
+        pir |
+        prfseqdb |
+        psd |
+        swissprot |
+        gdb
+        ) #REQUIRED >
+
+
+<!-- the citation/accession number -->
+<!ELEMENT Medline-si_cit (#PCDATA)>
+
+
+<!ELEMENT Medline-field (
+        Medline-field_type, 
+        Medline-field_str, 
+        Medline-field_ids?)>
+<!-- Keyed type -->
+<!ELEMENT Medline-field_type (%INTEGER;)>
+
+<!--
+    other	-  look in line code
+    comment	-  comment line
+    erratum	-  retracted, corrected, etc
+-->
+<!ATTLIST Medline-field_type value (
+        other |
+        comment |
+        erratum
+        ) #IMPLIED >
+
+
+<!-- the text -->
+<!ELEMENT Medline-field_str (#PCDATA)>
+
+<!-- pointers relevant to this text -->
+<!ELEMENT Medline-field_ids (DocRef*)>
+
+<!-- reference to a document -->
+<!ELEMENT DocRef (
+        DocRef_type, 
+        DocRef_uid)>
+
+<!ELEMENT DocRef_type (%INTEGER;)>
+<!ATTLIST DocRef_type value (
+        medline |
+        pubmed |
+        ncbigi
+        ) #IMPLIED >
+
+
+<!ELEMENT DocRef_uid (%INTEGER;)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Mim.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Mim.dtd
new file mode 100644
index 0000000..7dc862b
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Mim.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "mim.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_Mim.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Mim_module PUBLIC "-//NCBI//NCBI Mim Module//EN" "NCBI_Mim.mod.dtd">
+%NCBI_Mim_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Mim.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Mim.mod.dtd
new file mode 100644
index 0000000..664a851
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Mim.mod.dtd
@@ -0,0 +1,354 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "mim.asn"
+     ::DATATOOL:: by application DATATOOL version 1.8.1
+     ::DATATOOL:: on 01/18/2007 23:07:18
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Mim"
+================================================= -->
+
+<!--
+********************************************************************
+
+  MIM data definitions
+  Brandon Brylawski, 1996.
+  version 2.1
+
+********************************************************************
+-->
+
+
+<!ELEMENT Mim-entries (Mim-entry*)>
+
+
+<!ELEMENT Mim-set (
+        Mim-set_releaseDate, 
+        Mim-set_mimEntries)>
+
+<!ELEMENT Mim-set_releaseDate (Mim-date)>
+
+<!ELEMENT Mim-set_mimEntries (Mim-entry*)>
+
+
+<!ELEMENT Mim-entry (
+        Mim-entry_mimNumber, 
+        Mim-entry_mimType, 
+        Mim-entry_title, 
+        Mim-entry_copyright?, 
+        Mim-entry_symbol?, 
+        Mim-entry_locus?, 
+        Mim-entry_synonyms?, 
+        Mim-entry_aliases?, 
+        Mim-entry_included?, 
+        Mim-entry_seeAlso?, 
+        Mim-entry_text?, 
+        Mim-entry_textfields?, 
+        Mim-entry_hasSummary?, 
+        Mim-entry_summary?, 
+        Mim-entry_summaryAttribution?, 
+        Mim-entry_summaryEditHistory?, 
+        Mim-entry_summaryCreationDate?, 
+        Mim-entry_allelicVariants?, 
+        Mim-entry_hasSynopsis?, 
+        Mim-entry_clinicalSynopsis?, 
+        Mim-entry_synopsisAttribution?, 
+        Mim-entry_synopsisEditHistory?, 
+        Mim-entry_synopsisCreationDate?, 
+        Mim-entry_editHistory?, 
+        Mim-entry_creationDate?, 
+        Mim-entry_references?, 
+        Mim-entry_attribution?, 
+        Mim-entry_numGeneMaps, 
+        Mim-entry_medlineLinks?, 
+        Mim-entry_proteinLinks?, 
+        Mim-entry_nucleotideLinks?, 
+        Mim-entry_structureLinks?, 
+        Mim-entry_genomeLinks?)>
+
+<!ELEMENT Mim-entry_mimNumber (#PCDATA)>
+
+<!ELEMENT Mim-entry_mimType (%INTEGER;)>
+<!ATTLIST Mim-entry_mimType value (
+        none |
+        star |
+        caret |
+        pound |
+        plus |
+        perc
+        ) #IMPLIED >
+
+
+<!ELEMENT Mim-entry_title (#PCDATA)>
+
+<!ELEMENT Mim-entry_copyright (#PCDATA)>
+
+<!ELEMENT Mim-entry_symbol (#PCDATA)>
+
+<!ELEMENT Mim-entry_locus (#PCDATA)>
+
+<!ELEMENT Mim-entry_synonyms (Mim-entry_synonyms_E*)>
+
+
+<!ELEMENT Mim-entry_synonyms_E (#PCDATA)>
+
+<!ELEMENT Mim-entry_aliases (Mim-entry_aliases_E*)>
+
+
+<!ELEMENT Mim-entry_aliases_E (#PCDATA)>
+
+<!ELEMENT Mim-entry_included (Mim-entry_included_E*)>
+
+
+<!ELEMENT Mim-entry_included_E (#PCDATA)>
+
+<!ELEMENT Mim-entry_seeAlso (Mim-cit*)>
+
+<!ELEMENT Mim-entry_text (Mim-text*)>
+
+<!ELEMENT Mim-entry_textfields (Mim-text*)>
+
+<!ELEMENT Mim-entry_hasSummary EMPTY>
+<!ATTLIST Mim-entry_hasSummary value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Mim-entry_summary (Mim-text*)>
+
+<!ELEMENT Mim-entry_summaryAttribution (Mim-edit-item*)>
+
+<!ELEMENT Mim-entry_summaryEditHistory (Mim-edit-item*)>
+
+<!ELEMENT Mim-entry_summaryCreationDate (Mim-edit-item)>
+
+<!ELEMENT Mim-entry_allelicVariants (Mim-allelic-variant*)>
+
+<!ELEMENT Mim-entry_hasSynopsis EMPTY>
+<!ATTLIST Mim-entry_hasSynopsis value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Mim-entry_clinicalSynopsis (Mim-index-term*)>
+
+<!ELEMENT Mim-entry_synopsisAttribution (Mim-edit-item*)>
+
+<!ELEMENT Mim-entry_synopsisEditHistory (Mim-edit-item*)>
+
+<!ELEMENT Mim-entry_synopsisCreationDate (Mim-edit-item)>
+
+<!ELEMENT Mim-entry_editHistory (Mim-edit-item*)>
+
+<!ELEMENT Mim-entry_creationDate (Mim-edit-item)>
+
+<!ELEMENT Mim-entry_references (Mim-reference*)>
+
+<!ELEMENT Mim-entry_attribution (Mim-edit-item*)>
+
+<!ELEMENT Mim-entry_numGeneMaps (%INTEGER;)>
+
+<!ELEMENT Mim-entry_medlineLinks (Mim-link)>
+
+<!ELEMENT Mim-entry_proteinLinks (Mim-link)>
+
+<!ELEMENT Mim-entry_nucleotideLinks (Mim-link)>
+
+<!ELEMENT Mim-entry_structureLinks (Mim-link)>
+
+<!ELEMENT Mim-entry_genomeLinks (Mim-link)>
+
+
+<!ELEMENT Mim-text (
+        Mim-text_label, 
+        Mim-text_text, 
+        Mim-text_neighbors?)>
+
+<!ELEMENT Mim-text_label (#PCDATA)>
+
+<!ELEMENT Mim-text_text (#PCDATA)>
+
+<!ELEMENT Mim-text_neighbors (Mim-link)>
+
+
+<!ELEMENT Mim-allelic-variant (
+        Mim-allelic-variant_number, 
+        Mim-allelic-variant_name, 
+        Mim-allelic-variant_aliases?, 
+        Mim-allelic-variant_mutation?, 
+        Mim-allelic-variant_description?, 
+        Mim-allelic-variant_snpLinks?)>
+
+<!ELEMENT Mim-allelic-variant_number (#PCDATA)>
+
+<!ELEMENT Mim-allelic-variant_name (#PCDATA)>
+
+<!ELEMENT Mim-allelic-variant_aliases (Mim-allelic-variant_aliases_E*)>
+
+
+<!ELEMENT Mim-allelic-variant_aliases_E (#PCDATA)>
+
+<!ELEMENT Mim-allelic-variant_mutation (Mim-text*)>
+
+<!ELEMENT Mim-allelic-variant_description (Mim-text*)>
+
+<!ELEMENT Mim-allelic-variant_snpLinks (Mim-link)>
+
+
+<!ELEMENT Mim-link (
+        Mim-link_num, 
+        Mim-link_uids, 
+        Mim-link_numRelevant?)>
+
+<!ELEMENT Mim-link_num (%INTEGER;)>
+
+<!ELEMENT Mim-link_uids (#PCDATA)>
+
+<!ELEMENT Mim-link_numRelevant (%INTEGER;)>
+
+
+<!ELEMENT Mim-author (
+        Mim-author_name, 
+        Mim-author_index)>
+
+<!ELEMENT Mim-author_name (#PCDATA)>
+
+<!ELEMENT Mim-author_index (%INTEGER;)>
+
+
+<!ELEMENT Mim-cit (
+        Mim-cit_number, 
+        Mim-cit_author, 
+        Mim-cit_others, 
+        Mim-cit_year)>
+
+<!ELEMENT Mim-cit_number (%INTEGER;)>
+
+<!ELEMENT Mim-cit_author (#PCDATA)>
+
+<!ELEMENT Mim-cit_others (#PCDATA)>
+
+<!ELEMENT Mim-cit_year (%INTEGER;)>
+
+
+<!ELEMENT Mim-reference (
+        Mim-reference_number, 
+        Mim-reference_origNumber?, 
+        Mim-reference_type?, 
+        Mim-reference_authors, 
+        Mim-reference_primaryAuthor, 
+        Mim-reference_otherAuthors, 
+        Mim-reference_citationTitle, 
+        Mim-reference_citationType?, 
+        Mim-reference_bookTitle?, 
+        Mim-reference_editors?, 
+        Mim-reference_volume?, 
+        Mim-reference_edition?, 
+        Mim-reference_journal?, 
+        Mim-reference_series?, 
+        Mim-reference_publisher?, 
+        Mim-reference_place?, 
+        Mim-reference_commNote?, 
+        Mim-reference_pubDate, 
+        Mim-reference_pages?, 
+        Mim-reference_miscInfo?, 
+        Mim-reference_pubmedUID?, 
+        Mim-reference_ambiguous, 
+        Mim-reference_noLink?)>
+
+<!ELEMENT Mim-reference_number (%INTEGER;)>
+
+<!ELEMENT Mim-reference_origNumber (%INTEGER;)>
+
+<!ELEMENT Mim-reference_type %ENUM;>
+<!ATTLIST Mim-reference_type value (
+        not-set |
+        citation |
+        book |
+        personal-communication |
+        book-citation
+        ) #REQUIRED >
+
+
+<!ELEMENT Mim-reference_authors (Mim-author*)>
+
+<!ELEMENT Mim-reference_primaryAuthor (#PCDATA)>
+
+<!ELEMENT Mim-reference_otherAuthors (#PCDATA)>
+
+<!ELEMENT Mim-reference_citationTitle (#PCDATA)>
+
+<!ELEMENT Mim-reference_citationType (%INTEGER;)>
+
+<!ELEMENT Mim-reference_bookTitle (#PCDATA)>
+
+<!ELEMENT Mim-reference_editors (Mim-author*)>
+
+<!ELEMENT Mim-reference_volume (#PCDATA)>
+
+<!ELEMENT Mim-reference_edition (#PCDATA)>
+
+<!ELEMENT Mim-reference_journal (#PCDATA)>
+
+<!ELEMENT Mim-reference_series (#PCDATA)>
+
+<!ELEMENT Mim-reference_publisher (#PCDATA)>
+
+<!ELEMENT Mim-reference_place (#PCDATA)>
+
+<!ELEMENT Mim-reference_commNote (#PCDATA)>
+
+<!ELEMENT Mim-reference_pubDate (Mim-date)>
+
+<!ELEMENT Mim-reference_pages (Mim-page*)>
+
+<!ELEMENT Mim-reference_miscInfo (#PCDATA)>
+
+<!ELEMENT Mim-reference_pubmedUID (%INTEGER;)>
+
+<!ELEMENT Mim-reference_ambiguous EMPTY>
+<!ATTLIST Mim-reference_ambiguous value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Mim-reference_noLink EMPTY>
+<!ATTLIST Mim-reference_noLink value ( true | false ) #REQUIRED >
+
+
+
+<!ELEMENT Mim-index-term (
+        Mim-index-term_key, 
+        Mim-index-term_terms)>
+
+<!ELEMENT Mim-index-term_key (#PCDATA)>
+
+<!ELEMENT Mim-index-term_terms (Mim-index-term_terms_E*)>
+
+
+<!ELEMENT Mim-index-term_terms_E (#PCDATA)>
+
+
+<!ELEMENT Mim-edit-item (
+        Mim-edit-item_author, 
+        Mim-edit-item_modDate)>
+
+<!ELEMENT Mim-edit-item_author (#PCDATA)>
+
+<!ELEMENT Mim-edit-item_modDate (Mim-date)>
+
+
+<!ELEMENT Mim-date (
+        Mim-date_year, 
+        Mim-date_month?, 
+        Mim-date_day?)>
+
+<!ELEMENT Mim-date_year (%INTEGER;)>
+
+<!ELEMENT Mim-date_month (%INTEGER;)>
+
+<!ELEMENT Mim-date_day (%INTEGER;)>
+
+
+<!ELEMENT Mim-page (
+        Mim-page_from, 
+        Mim-page_to?)>
+
+<!ELEMENT Mim-page_from (#PCDATA)>
+
+<!ELEMENT Mim-page_to (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Mime.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Mime.dtd
new file mode 100644
index 0000000..a7be929
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Mime.dtd
@@ -0,0 +1,113 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "ncbimime.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Mime.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % MMDB_module PUBLIC "-//NCBI//MMDB Module//EN" "MMDB.mod.dtd">
+%MMDB_module;
+
+<!ENTITY % MMDB_Chemical_graph_module PUBLIC "-//NCBI//MMDB Chemical graph Module//EN" "MMDB_Chemical_graph.mod.dtd">
+%MMDB_Chemical_graph_module;
+
+<!ENTITY % MMDB_Features_module PUBLIC "-//NCBI//MMDB Features Module//EN" "MMDB_Features.mod.dtd">
+%MMDB_Features_module;
+
+<!ENTITY % MMDB_Structural_model_module PUBLIC "-//NCBI//MMDB Structural model Module//EN" "MMDB_Structural_model.mod.dtd">
+%MMDB_Structural_model_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Cdd_module PUBLIC "-//NCBI//NCBI Cdd Module//EN" "NCBI_Cdd.mod.dtd">
+%NCBI_Cdd_module;
+
+<!ENTITY % NCBI_Cn3d_module PUBLIC "-//NCBI//NCBI Cn3d Module//EN" "NCBI_Cn3d.mod.dtd">
+%NCBI_Cn3d_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Mime_module PUBLIC "-//NCBI//NCBI Mime Module//EN" "NCBI_Mime.mod.dtd">
+%NCBI_Mime_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_ScoreMat_module PUBLIC "-//NCBI//NCBI ScoreMat Module//EN" "NCBI_ScoreMat.mod.dtd">
+%NCBI_ScoreMat_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Seqset_module PUBLIC "-//NCBI//NCBI Seqset Module//EN" "NCBI_Seqset.mod.dtd">
+%NCBI_Seqset_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Mime.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Mime.mod.dtd
new file mode 100644
index 0000000..a7f8ef4
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Mime.mod.dtd
@@ -0,0 +1,251 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "ncbimime.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Mime"
+================================================= -->
+
+<!--
+$Revision: 6.12 $
+****************************************************************
+
+  NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
+  by Jonathan Epstein, February 1996
+
+****************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Ncbi-mime-asn1 -->
+
+<!-- Elements referenced from other modules:
+          Biostruc,
+          Biostruc-annot-set FROM MMDB,
+          Cdd FROM NCBI-Cdd,
+          Seq-entry FROM NCBI-Seqset,
+          Seq-annot FROM NCBI-Sequence,
+          Medline-entry FROM NCBI-Medline,
+          Cn3d-style-dictionary,
+          Cn3d-user-annotations FROM NCBI-Cn3d -->
+<!-- ============================================ -->
+
+
+<!ELEMENT Ncbi-mime-asn1 (
+        Ncbi-mime-asn1_entrez | 
+        Ncbi-mime-asn1_alignstruc | 
+        Ncbi-mime-asn1_alignseq | 
+        Ncbi-mime-asn1_strucseq | 
+        Ncbi-mime-asn1_strucseqs | 
+        Ncbi-mime-asn1_general)>
+
+<!-- just a structure -->
+<!ELEMENT Ncbi-mime-asn1_entrez (Entrez-general)>
+
+<!-- structures & sequences & alignments -->
+<!ELEMENT Ncbi-mime-asn1_alignstruc (Biostruc-align)>
+
+<!-- sequence alignment -->
+<!ELEMENT Ncbi-mime-asn1_alignseq (Biostruc-align-seq)>
+
+<!-- structure & sequences -->
+<!ELEMENT Ncbi-mime-asn1_strucseq (Biostruc-seq)>
+
+<!-- structure & sequences & alignments -->
+<!ELEMENT Ncbi-mime-asn1_strucseqs (Biostruc-seqs)>
+
+<!--
+ all-purpose "grab bag"
+ others may be added here in the future
+-->
+<!ELEMENT Ncbi-mime-asn1_general (Biostruc-seqs-aligns-cdd)>
+
+<!-- generic bundle of sequence and alignment info -->
+<!ELEMENT Bundle-seqs-aligns (
+        Bundle-seqs-aligns_sequences?, 
+        Bundle-seqs-aligns_seqaligns?, 
+        Bundle-seqs-aligns_strucaligns?, 
+        Bundle-seqs-aligns_imports?, 
+        Bundle-seqs-aligns_style-dictionary?, 
+        Bundle-seqs-aligns_user-annotations?)>
+
+<!-- sequences -->
+<!ELEMENT Bundle-seqs-aligns_sequences (Seq-entry*)>
+
+<!-- sequence alignments -->
+<!ELEMENT Bundle-seqs-aligns_seqaligns (Seq-annot*)>
+
+<!-- structure alignments -->
+<!ELEMENT Bundle-seqs-aligns_strucaligns (Biostruc-annot-set)>
+
+<!-- imports (updates in Cn3D) -->
+<!ELEMENT Bundle-seqs-aligns_imports (Seq-annot*)>
+
+<!-- Cn3D stuff -->
+<!ELEMENT Bundle-seqs-aligns_style-dictionary (Cn3d-style-dictionary)>
+
+<!ELEMENT Bundle-seqs-aligns_user-annotations (Cn3d-user-annotations)>
+
+
+<!ELEMENT Biostruc-seqs-aligns-cdd (
+        Biostruc-seqs-aligns-cdd_seq-align-data, 
+        Biostruc-seqs-aligns-cdd_structures?, 
+        Biostruc-seqs-aligns-cdd_structure-type?)>
+
+<!ELEMENT Biostruc-seqs-aligns-cdd_seq-align-data (
+        Biostruc-seqs-aligns-cdd_seq-align-data_bundle | 
+        Biostruc-seqs-aligns-cdd_seq-align-data_cdd)>
+
+<!-- either seqs + alignments -->
+<!ELEMENT Biostruc-seqs-aligns-cdd_seq-align-data_bundle (Bundle-seqs-aligns)>
+
+<!-- or CDD (which contains these) -->
+<!ELEMENT Biostruc-seqs-aligns-cdd_seq-align-data_cdd (Cdd)>
+
+<!-- structures -->
+<!ELEMENT Biostruc-seqs-aligns-cdd_structures (Biostruc*)>
+<!-- type of structures to load if -->
+<!ELEMENT Biostruc-seqs-aligns-cdd_structure-type %ENUM;>
+
+<!--
+    ncbi-backbone	-  not present; meanings and
+    ncbi-all-atom	-  values are same as MMDB's
+    pdb-model	-  Model-type
+-->
+<!ATTLIST Biostruc-seqs-aligns-cdd_structure-type value (
+        ncbi-backbone |
+        ncbi-all-atom |
+        pdb-model
+        ) #REQUIRED >
+
+
+
+<!ELEMENT Biostruc-align (
+        Biostruc-align_master, 
+        Biostruc-align_slaves, 
+        Biostruc-align_alignments, 
+        Biostruc-align_sequences, 
+        Biostruc-align_seqalign, 
+        Biostruc-align_style-dictionary?, 
+        Biostruc-align_user-annotations?)>
+
+<!ELEMENT Biostruc-align_master (Biostruc)>
+
+<!ELEMENT Biostruc-align_slaves (Biostruc*)>
+
+<!-- structure alignments -->
+<!ELEMENT Biostruc-align_alignments (Biostruc-annot-set)>
+
+<!-- sequences -->
+<!ELEMENT Biostruc-align_sequences (Seq-entry*)>
+
+<!ELEMENT Biostruc-align_seqalign (Seq-annot*)>
+
+<!ELEMENT Biostruc-align_style-dictionary (Cn3d-style-dictionary)>
+
+<!ELEMENT Biostruc-align_user-annotations (Cn3d-user-annotations)>
+
+<!-- display seq structure align only -->
+<!ELEMENT Biostruc-align-seq (
+        Biostruc-align-seq_sequences, 
+        Biostruc-align-seq_seqalign, 
+        Biostruc-align-seq_style-dictionary?, 
+        Biostruc-align-seq_user-annotations?)>
+
+<!-- sequences -->
+<!ELEMENT Biostruc-align-seq_sequences (Seq-entry*)>
+
+<!ELEMENT Biostruc-align-seq_seqalign (Seq-annot*)>
+
+<!ELEMENT Biostruc-align-seq_style-dictionary (Cn3d-style-dictionary)>
+
+<!ELEMENT Biostruc-align-seq_user-annotations (Cn3d-user-annotations)>
+
+<!-- display  structure seq added by yanli -->
+<!ELEMENT Biostruc-seq (
+        Biostruc-seq_structure, 
+        Biostruc-seq_sequences, 
+        Biostruc-seq_style-dictionary?, 
+        Biostruc-seq_user-annotations?)>
+
+<!ELEMENT Biostruc-seq_structure (Biostruc)>
+
+<!ELEMENT Biostruc-seq_sequences (Seq-entry*)>
+
+<!ELEMENT Biostruc-seq_style-dictionary (Cn3d-style-dictionary)>
+
+<!ELEMENT Biostruc-seq_user-annotations (Cn3d-user-annotations)>
+
+<!-- display blast alignment along with neighbor's structure added by yanli -->
+<!ELEMENT Biostruc-seqs (
+        Biostruc-seqs_structure, 
+        Biostruc-seqs_sequences, 
+        Biostruc-seqs_seqalign, 
+        Biostruc-seqs_style-dictionary?, 
+        Biostruc-seqs_user-annotations?)>
+
+<!ELEMENT Biostruc-seqs_structure (Biostruc)>
+
+<!-- sequences -->
+<!ELEMENT Biostruc-seqs_sequences (Seq-entry*)>
+
+<!ELEMENT Biostruc-seqs_seqalign (Seq-annot*)>
+
+<!ELEMENT Biostruc-seqs_style-dictionary (Cn3d-style-dictionary)>
+
+<!ELEMENT Biostruc-seqs_user-annotations (Cn3d-user-annotations)>
+
+
+<!ELEMENT Entrez-style %ENUM;>
+<!ATTLIST Entrez-style value (
+        docsum |
+        genbank |
+        genpept |
+        fasta |
+        asn1 |
+        graphic |
+        alignment |
+        globalview |
+        report |
+        medlars |
+        embl |
+        pdb |
+        kinemage
+        ) #REQUIRED >
+
+
+
+<!ELEMENT Entrez-general (
+        Entrez-general_title?, 
+        Entrez-general_data, 
+        Entrez-general_style, 
+        Entrez-general_location?)>
+
+<!ELEMENT Entrez-general_title (#PCDATA)>
+
+<!ELEMENT Entrez-general_data (
+        Entrez-general_data_ml | 
+        Entrez-general_data_prot | 
+        Entrez-general_data_nuc | 
+        Entrez-general_data_genome | 
+        Entrez-general_data_structure | 
+        Entrez-general_data_strucAnnot)>
+
+<!ELEMENT Entrez-general_data_ml (Medline-entry)>
+
+<!ELEMENT Entrez-general_data_prot (Seq-entry)>
+
+<!ELEMENT Entrez-general_data_nuc (Seq-entry)>
+
+<!ELEMENT Entrez-general_data_genome (Seq-entry)>
+
+<!ELEMENT Entrez-general_data_structure (Biostruc)>
+
+<!ELEMENT Entrez-general_data_strucAnnot (Biostruc-annot-set)>
+
+<!ELEMENT Entrez-general_style (Entrez-style)>
+
+<!ELEMENT Entrez-general_location (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_ObjPrt.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_ObjPrt.dtd
new file mode 100644
index 0000000..ae5196a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_ObjPrt.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "objprt.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_ObjPrt.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_ObjPrt_module PUBLIC "-//NCBI//NCBI ObjPrt Module//EN" "NCBI_ObjPrt.mod.dtd">
+%NCBI_ObjPrt_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_ObjPrt.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_ObjPrt.mod.dtd
new file mode 100644
index 0000000..23f916a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_ObjPrt.mod.dtd
@@ -0,0 +1,133 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "objprt.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-ObjPrt"
+================================================= -->
+
+<!--
+$Revision: 6.0 $
+********************************************************************
+
+  Print Templates
+  James Ostell, 1993
+
+
+********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          PrintTemplate,
+          PrintTemplateSet -->
+<!-- ============================================ -->
+
+
+<!ELEMENT PrintTemplate (
+        PrintTemplate_name, 
+        PrintTemplate_labelfrom?, 
+        PrintTemplate_format)>
+
+<!-- name for this template -->
+<!ELEMENT PrintTemplate_name (TemplateName)>
+
+<!-- ASN.1 path to get label from -->
+<!ELEMENT PrintTemplate_labelfrom (#PCDATA)>
+
+<!ELEMENT PrintTemplate_format (PrintFormat)>
+
+
+<!ELEMENT TemplateName (#PCDATA)>
+
+
+<!ELEMENT PrintTemplateSet (PrintTemplate*)>
+
+
+<!ELEMENT PrintFormat (
+        PrintFormat_asn1, 
+        PrintFormat_label?, 
+        PrintFormat_prefix?, 
+        PrintFormat_suffix?, 
+        PrintFormat_form)>
+
+<!-- ASN.1 partial path for this -->
+<!ELEMENT PrintFormat_asn1 (#PCDATA)>
+
+<!-- printable label -->
+<!ELEMENT PrintFormat_label (#PCDATA)>
+
+<!ELEMENT PrintFormat_prefix (#PCDATA)>
+
+<!ELEMENT PrintFormat_suffix (#PCDATA)>
+<!-- Forms for various ASN.1 components -->
+<!ELEMENT PrintFormat_form (PrintForm)>
+
+<!-- Forms for various ASN.1 components -->
+<!ELEMENT PrintForm (
+        PrintForm_block | 
+        PrintForm_boolean | 
+        PrintForm_enum | 
+        PrintForm_text | 
+        PrintForm_use-template | 
+        PrintForm_user | 
+        PrintForm_null)>
+<!-- for SEQUENCE, SET -->
+<!ELEMENT PrintForm_block (PrintFormBlock)>
+
+<!ELEMENT PrintForm_boolean (PrintFormBoolean)>
+
+<!ELEMENT PrintForm_enum (PrintFormEnum)>
+
+<!ELEMENT PrintForm_text (PrintFormText)>
+
+<!ELEMENT PrintForm_use-template (TemplateName)>
+
+<!ELEMENT PrintForm_user (UserFormat)>
+
+<!-- rarely used -->
+<!ELEMENT PrintForm_null EMPTY>
+
+
+<!ELEMENT UserFormat (
+        UserFormat_printfunc, 
+        UserFormat_defaultfunc?)>
+
+<!ELEMENT UserFormat_printfunc (#PCDATA)>
+
+<!ELEMENT UserFormat_defaultfunc (#PCDATA)>
+
+<!-- for SEQUENCE, SET -->
+<!ELEMENT PrintFormBlock (
+        PrintFormBlock_separator?, 
+        PrintFormBlock_components)>
+
+<!ELEMENT PrintFormBlock_separator (#PCDATA)>
+
+<!ELEMENT PrintFormBlock_components (PrintFormat*)>
+
+
+<!ELEMENT PrintFormBoolean (
+        PrintFormBoolean_true?, 
+        PrintFormBoolean_false?)>
+
+<!ELEMENT PrintFormBoolean_true (#PCDATA)>
+
+<!ELEMENT PrintFormBoolean_false (#PCDATA)>
+
+
+<!ELEMENT PrintFormEnum (
+        PrintFormEnum_values?)>
+
+<!ELEMENT PrintFormEnum_values (PrintFormEnum_values_E*)>
+
+
+<!ELEMENT PrintFormEnum_values_E (#PCDATA)>
+
+
+<!ELEMENT PrintFormText (
+        PrintFormText_textfunc?)>
+
+<!ELEMENT PrintFormText_textfunc (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Organism.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Organism.dtd
new file mode 100644
index 0000000..b06e17e
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Organism.dtd
@@ -0,0 +1,20 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_Organism.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Organism.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Organism.mod.dtd
new file mode 100644
index 0000000..9c36c43
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Organism.mod.dtd
@@ -0,0 +1,226 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 07/07/2010 23:05:04
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Organism"
+================================================= -->
+
+<!--
+**********************************************************************
+
+  NCBI Organism
+  by James Ostell, 1994
+  version 3.0
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Org-ref -->
+
+<!-- Elements referenced from other modules:
+          Dbtag FROM NCBI-General -->
+<!-- ============================================ -->
+
+<!--
+*** Org-ref ***********************************************
+*
+*  Reference to an organism
+*     defines only the organism.. lower levels of detail for biological
+*     molecules are provided by the Source object
+*
+-->
+<!ELEMENT Org-ref (
+        Org-ref_taxname?, 
+        Org-ref_common?, 
+        Org-ref_mod?, 
+        Org-ref_db?, 
+        Org-ref_syn?, 
+        Org-ref_orgname?)>
+
+<!-- preferred formal name -->
+<!ELEMENT Org-ref_taxname (#PCDATA)>
+
+<!-- common name -->
+<!ELEMENT Org-ref_common (#PCDATA)>
+
+<!-- unstructured modifiers -->
+<!ELEMENT Org-ref_mod (Org-ref_mod_E*)>
+
+
+<!ELEMENT Org-ref_mod_E (#PCDATA)>
+
+<!-- ids in taxonomic or culture dbases -->
+<!ELEMENT Org-ref_db (Dbtag*)>
+
+<!-- synonyms for taxname or common -->
+<!ELEMENT Org-ref_syn (Org-ref_syn_E*)>
+
+
+<!ELEMENT Org-ref_syn_E (#PCDATA)>
+
+<!ELEMENT Org-ref_orgname (OrgName)>
+
+
+<!ELEMENT OrgName (
+        OrgName_name?, 
+        OrgName_attrib?, 
+        OrgName_mod?, 
+        OrgName_lineage?, 
+        OrgName_gcode?, 
+        OrgName_mgcode?, 
+        OrgName_div?, 
+        OrgName_pgcode?)>
+
+<!ELEMENT OrgName_name (
+        OrgName_name_binomial | 
+        OrgName_name_virus | 
+        OrgName_name_hybrid | 
+        OrgName_name_namedhybrid | 
+        OrgName_name_partial)>
+
+<!-- genus/species type name -->
+<!ELEMENT OrgName_name_binomial (BinomialOrgName)>
+
+<!-- virus names are different -->
+<!ELEMENT OrgName_name_virus (#PCDATA)>
+
+<!-- hybrid between organisms -->
+<!ELEMENT OrgName_name_hybrid (MultiOrgName)>
+
+<!-- some hybrids have genus x species name -->
+<!ELEMENT OrgName_name_namedhybrid (BinomialOrgName)>
+
+<!-- when genus not known -->
+<!ELEMENT OrgName_name_partial (PartialOrgName)>
+
+<!-- attribution of name -->
+<!ELEMENT OrgName_attrib (#PCDATA)>
+
+<!ELEMENT OrgName_mod (OrgMod*)>
+
+<!-- lineage with semicolon separators -->
+<!ELEMENT OrgName_lineage (#PCDATA)>
+
+<!-- genetic code (see CdRegion) -->
+<!ELEMENT OrgName_gcode (%INTEGER;)>
+
+<!-- mitochondrial genetic code -->
+<!ELEMENT OrgName_mgcode (%INTEGER;)>
+
+<!-- GenBank division code -->
+<!ELEMENT OrgName_div (#PCDATA)>
+
+<!-- plastid genetic code -->
+<!ELEMENT OrgName_pgcode (%INTEGER;)>
+
+
+<!ELEMENT OrgMod (
+        OrgMod_subtype, 
+        OrgMod_subname, 
+        OrgMod_attrib?)>
+
+<!ELEMENT OrgMod_subtype (%INTEGER;)>
+
+<!--
+    dosage	-  chromosome dosage of hybrid
+    nat-host	-  natural host of this specimen
+    gb-acronym	-  used by taxonomy database
+    gb-anamorph	-  used by taxonomy database
+    gb-synonym	-  used by taxonomy database
+    other	-  ASN5: old-name (254) will be added to next spec
+-->
+<!ATTLIST OrgMod_subtype value (
+        strain |
+        substrain |
+        type |
+        subtype |
+        variety |
+        serotype |
+        serogroup |
+        serovar |
+        cultivar |
+        pathovar |
+        chemovar |
+        biovar |
+        biotype |
+        group |
+        subgroup |
+        isolate |
+        common |
+        acronym |
+        dosage |
+        nat-host |
+        sub-species |
+        specimen-voucher |
+        authority |
+        forma |
+        forma-specialis |
+        ecotype |
+        synonym |
+        anamorph |
+        teleomorph |
+        breed |
+        gb-acronym |
+        gb-anamorph |
+        gb-synonym |
+        culture-collection |
+        bio-material |
+        metagenome-source |
+        old-lineage |
+        old-name |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT OrgMod_subname (#PCDATA)>
+
+<!-- attribution/source of name -->
+<!ELEMENT OrgMod_attrib (#PCDATA)>
+
+
+<!ELEMENT BinomialOrgName (
+        BinomialOrgName_genus, 
+        BinomialOrgName_species?, 
+        BinomialOrgName_subspecies?)>
+
+<!-- required -->
+<!ELEMENT BinomialOrgName_genus (#PCDATA)>
+
+<!-- species required if subspecies used -->
+<!ELEMENT BinomialOrgName_species (#PCDATA)>
+
+<!ELEMENT BinomialOrgName_subspecies (#PCDATA)>
+
+<!-- the first will be used to assign division -->
+<!ELEMENT MultiOrgName (OrgName*)>
+
+<!-- when we don't know the genus -->
+<!ELEMENT PartialOrgName (TaxElement*)>
+
+
+<!ELEMENT TaxElement (
+        TaxElement_fixed-level, 
+        TaxElement_level?, 
+        TaxElement_name)>
+
+<!ELEMENT TaxElement_fixed-level (%INTEGER;)>
+
+<!--
+    other	-  level must be set in string
+-->
+<!ATTLIST TaxElement_fixed-level value (
+        other |
+        family |
+        order |
+        class
+        ) #IMPLIED >
+
+
+<!ELEMENT TaxElement_level (#PCDATA)>
+
+<!ELEMENT TaxElement_name (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_PCAssay.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_PCAssay.dtd
new file mode 100644
index 0000000..7b35bb2
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_PCAssay.dtd
@@ -0,0 +1,38 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "pcassay.asn"
+     ::DATATOOL:: by application DATATOOL version 2.3.1
+     ::DATATOOL:: on 01/17/2012 23:05:10
+     ============================================ -->
+
+<!-- NCBI_PCAssay.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_PCAssay_module PUBLIC "-//NCBI//NCBI PCAssay Module//EN" "NCBI_PCAssay.mod.dtd">
+%NCBI_PCAssay_module;
+
+<!ENTITY % NCBI_PCSubstance_module PUBLIC "-//NCBI//NCBI PCSubstance Module//EN" "NCBI_PCSubstance.mod.dtd">
+%NCBI_PCSubstance_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_PCAssay.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_PCAssay.mod.dtd
new file mode 100644
index 0000000..020ab07
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_PCAssay.mod.dtd
@@ -0,0 +1,1006 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "pcassay.asn"
+     ::DATATOOL:: by application DATATOOL version 2.3.1
+     ::DATATOOL:: on 01/17/2012 23:05:10
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-PCAssay"
+================================================= -->
+
+<!--
+ $Id: pcassay.asn 282508 2011-05-11 22:47:31Z ywang $
+ ===========================================================================
+
+                            PUBLIC DOMAIN NOTICE
+               National Center for Biotechnology Information
+
+  This software/database is a "United States Government Work" under the
+  terms of the United States Copyright Act.  It was written as part of
+  the author's official duties as a United States Government employee and
+  thus cannot be copyrighted.  This software/database is freely available
+  to the public for use. The National Library of Medicine and the U.S.
+  Government have not placed any restriction on its use or reproduction.
+
+  Although all reasonable efforts have been taken to ensure the accuracy
+  and reliability of the software and data, the NLM and the U.S.
+  Government do not and cannot warrant the performance or results that
+  may be obtained by using this software or data. The NLM and the U.S.
+  Government disclaim all warranties, express or implied, including
+  warranties of performance, merchantability or fitness for any particular
+  purpose.
+
+  Please cite the author in any work or product based on this material.
+
+ ===========================================================================
+
+ Authors:  NCBI Structure Group
+
+ File Description:
+      ASN.1 definitions for PubChem biological assay data database
+
+ ===========================================================================
+-->
+
+<!-- Elements referenced from other modules:
+          Pub FROM NCBI-Pub,
+          BioSource FROM NCBI-BioSource,
+          Date,
+          Object-id FROM NCBI-General,
+          PC-ID,
+          PC-Source,
+          PC-XRefData FROM NCBI-PCSubstance -->
+<!-- ============================================ -->
+
+<!--
+ EXPORTS ;
+ Container for multiple Assay Data Submissions
+-->
+<!ELEMENT PC-AssayContainer (PC-AssaySubmit*)>
+
+<!-- Container for Data Depositions and Assay Definitions -->
+<!ELEMENT PC-AssaySubmit (
+        PC-AssaySubmit_assay, 
+        PC-AssaySubmit_data?, 
+        PC-AssaySubmit_revoke?)>
+<!-- Assay Description or pre-existing Identifier -->
+<!ELEMENT PC-AssaySubmit_assay (
+        PC-AssaySubmit_assay_aid | 
+        PC-AssaySubmit_assay_aid-source | 
+        PC-AssaySubmit_assay_descr | 
+        PC-AssaySubmit_assay_aidver)>
+
+<!--   Assay Identifier -->
+<!ELEMENT PC-AssaySubmit_assay_aid (%INTEGER;)>
+
+<!--   External Assay Identifier -->
+<!ELEMENT PC-AssaySubmit_assay_aid-source (PC-Source)>
+
+<!--   Assay Description (new or updated) -->
+<!ELEMENT PC-AssaySubmit_assay_descr (PC-AssayDescription)>
+
+<!--   Assay Identifier/Version (for internal use) -->
+<!ELEMENT PC-AssaySubmit_assay_aidver (PC-ID)>
+
+<!-- Assay Data Deposition (vector) -->
+<!ELEMENT PC-AssaySubmit_data (PC-AssayResults*)>
+
+<!-- List of SID's whose data is to be suppressed -->
+<!ELEMENT PC-AssaySubmit_revoke (PC-AssaySubmit_revoke_E*)>
+
+
+<!ELEMENT PC-AssaySubmit_revoke_E (%INTEGER;)>
+
+<!--
+ Container for multiple Assay Result Sets
+ PC-AssayResultsSet ::= SEQUENCE OF PC-AssayResults
+ Assay Results provided for a given Substance tested, with respect to the results types defined in the 
+   referenced Assay Description
+-->
+<!ELEMENT PC-AssayResults (
+        PC-AssayResults_sid, 
+        PC-AssayResults_sid-source?, 
+        PC-AssayResults_version?, 
+        PC-AssayResults_comment?, 
+        PC-AssayResults_outcome?, 
+        PC-AssayResults_rank?, 
+        PC-AssayResults_data?, 
+        PC-AssayResults_url?, 
+        PC-AssayResults_xref?, 
+        PC-AssayResults_date?)>
+
+<!--
+ Internal/External Tracking Information
+ Tested Substance ID/Version  [Either valid ID or, 
+   if "sid-source" is used, this is a "0" value]
+   Note: A valid ID is greater than "0"
+-->
+<!ELEMENT PC-AssayResults_sid (%INTEGER;)>
+
+<!--
+ External Identifier for this Substance
+   Note: May be used in-lieu of "sid"
+   Note: This is non-optional if "sid" is "0"
+-->
+<!ELEMENT PC-AssayResults_sid-source (PC-Source)>
+
+<!--
+ Version identifier for this AID-SID Result
+   Note: Incoming data should set this to be "0"
+-->
+<!ELEMENT PC-AssayResults_version (%INTEGER;)>
+
+<!--
+ Data Annotation/Qualifier and URL to further Depositor Information
+ Annotation or qualifier for this Result
+-->
+<!ELEMENT PC-AssayResults_comment (#PCDATA)>
+<!--
+ Assay Result Data for this Sample
+   Note: Users need populate only those "tid"s, for which there is data, in any order.
+ Assay Outcome
+-->
+<!ELEMENT PC-AssayResults_outcome (%INTEGER;)>
+
+<!--
+    inactive	-    Substance is considered Inactive
+    active	-    Substance is considered Active
+    inconclusive	-    Substance is Inconclusive
+    unspecified	-    Substance Outcome is Unspecified
+    probe	-    Substance Outcome is Unspecified
+-->
+<!ATTLIST PC-AssayResults_outcome value (
+        inactive |
+        active |
+        inconclusive |
+        unspecified |
+        probe
+        ) #IMPLIED >
+
+
+<!--
+ Rank of Assay Outcome (for result ordering)
+   Note: Larger numbers are more active
+-->
+<!ELEMENT PC-AssayResults_rank (%INTEGER;)>
+
+<!-- Assay Data Reported for this SID (vector) -->
+<!ELEMENT PC-AssayResults_data (PC-AssayData*)>
+
+<!-- Depositor provided URL for this Result -->
+<!ELEMENT PC-AssayResults_url (#PCDATA)>
+
+<!--
+ annotated Cross-Reference Information
+ to be removed, 
+ instead regulard TID will be generated
+ which are then annotated by xref type
+-->
+<!ELEMENT PC-AssayResults_xref (PC-AnnotatedXRef*)>
+
+<!-- Pubchem Release Date -->
+<!ELEMENT PC-AssayResults_date (Date)>
+
+<!-- Assay Readouts/Results for a Tested Substance -->
+<!ELEMENT PC-AssayData (
+        PC-AssayData_tid, 
+        PC-AssayData_value)>
+
+<!--
+ Assay Result Field Type ID (TID)
+   Note: Result Field ID's must be greater than "0"
+-->
+<!ELEMENT PC-AssayData_tid (%INTEGER;)>
+<!-- Assay Result, must be the same type as defined for TID -->
+<!ELEMENT PC-AssayData_value (
+        PC-AssayData_value_ival | 
+        PC-AssayData_value_fval | 
+        PC-AssayData_value_bval | 
+        PC-AssayData_value_sval)>
+
+<!ELEMENT PC-AssayData_value_ival (%INTEGER;)>
+
+<!ELEMENT PC-AssayData_value_fval (%REAL;)>
+
+<!ELEMENT PC-AssayData_value_bval EMPTY>
+<!ATTLIST PC-AssayData_value_bval value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT PC-AssayData_value_sval (#PCDATA)>
+
+<!--
+ Assay Description provided by an Organization that describes the assay/protocol performed and defines the 
+   measured end-points and parameters to be stored.  An Assay Description is not a database table.  You can 
+   define as many Result Definitions as needed and they need not be used by all Substances tested.
+ Assay Descriptions can be modified on both description text and Result Definitions after initial submission
+ as desired, and such udpates will be tracked in PubChem
+-->
+<!ELEMENT PC-AssayDescription (
+        PC-AssayDescription_aid, 
+        PC-AssayDescription_aid-source?, 
+        PC-AssayDescription_name, 
+        PC-AssayDescription_description?, 
+        PC-AssayDescription_protocol?, 
+        PC-AssayDescription_comment?, 
+        PC-AssayDescription_xref?, 
+        PC-AssayDescription_results?, 
+        PC-AssayDescription_pub?, 
+        PC-AssayDescription_revision?, 
+        PC-AssayDescription_target?, 
+        PC-AssayDescription_activity-outcome-method?, 
+        PC-AssayDescription_dr?, 
+        PC-AssayDescription_substance-type?, 
+        PC-AssayDescription_grant-number?, 
+        PC-AssayDescription_project-category?, 
+        PC-AssayDescription_is-panel?, 
+        PC-AssayDescription_assay-group?, 
+        PC-AssayDescription_panel-info?, 
+        PC-AssayDescription_is-mlp-late-stage?, 
+        PC-AssayDescription_categorized-comment?)>
+
+<!--
+ Internal/External Tracking Information
+ Assay Description ID/Version  [Either valid ID
+   or, if "aid-source" is used, a "0" dummy value]
+   Note: Version is for internal use (only?)
+   Note: A valid ID is greater than "0"
+-->
+<!ELEMENT PC-AssayDescription_aid (PC-ID)>
+
+<!--
+ External Identifier for this Assay Description
+   Note: May be used in-lieu of "aid"
+   Note: This is non-optional if "aid" ID is "0"
+-->
+<!ELEMENT PC-AssayDescription_aid-source (PC-Source)>
+
+<!--
+ Assay Description Information
+ Short Assay Name (for display purposes)
+-->
+<!ELEMENT PC-AssayDescription_name (#PCDATA)>
+
+<!-- Description of Assay -->
+<!ELEMENT PC-AssayDescription_description (PC-AssayDescription_description_E*)>
+
+
+<!ELEMENT PC-AssayDescription_description_E (#PCDATA)>
+
+<!-- Procedure used to generate results -->
+<!ELEMENT PC-AssayDescription_protocol (PC-AssayDescription_protocol_E*)>
+
+
+<!ELEMENT PC-AssayDescription_protocol_E (#PCDATA)>
+
+<!-- Comments or additional information -->
+<!ELEMENT PC-AssayDescription_comment (PC-AssayDescription_comment_E*)>
+
+
+<!ELEMENT PC-AssayDescription_comment_E (#PCDATA)>
+
+<!-- Annotated Cross-Reference Information -->
+<!ELEMENT PC-AssayDescription_xref (PC-AnnotatedXRef*)>
+
+<!--
+ Allowed Assay Result Types
+ Result Definitions (vector)
+-->
+<!ELEMENT PC-AssayDescription_results (PC-ResultType*)>
+
+<!--
+ Additional Information
+ Depositor provided publications for this assay
+-->
+<!ELEMENT PC-AssayDescription_pub (Pub*)>
+
+<!-- Revision identifier for textual description -->
+<!ELEMENT PC-AssayDescription_revision (%INTEGER;)>
+
+<!-- Target information -->
+<!ELEMENT PC-AssayDescription_target (PC-AssayTargetInfo*)>
+<!-- Assay Outcome Qualifier -->
+<!ELEMENT PC-AssayDescription_activity-outcome-method (%INTEGER;)>
+
+<!--
+    other	-    All Other Type
+    screening	-    Primary Screen Assay
+    confirmatory	-    Confirmatory Assay
+    summary	-    Probe Summary Assay
+-->
+<!ATTLIST PC-AssayDescription_activity-outcome-method value (
+        other |
+        screening |
+        confirmatory |
+        summary
+        ) #IMPLIED >
+
+
+<!-- Dose-Response Attribution -->
+<!ELEMENT PC-AssayDescription_dr (PC-AssayDRAttr*)>
+
+<!-- to distinguish the type of substance used in the screening -->
+<!ELEMENT PC-AssayDescription_substance-type (%INTEGER;)>
+<!ATTLIST PC-AssayDescription_substance-type value (
+        small-molecule |
+        nucleotide |
+        other
+        ) #IMPLIED >
+
+
+<!--
+ Grant and project category information
+ grant proposal number
+ required for 'MLSCN' & 'MLPCN' projects
+-->
+<!ELEMENT PC-AssayDescription_grant-number (PC-AssayDescription_grant-number_E*)>
+
+
+<!ELEMENT PC-AssayDescription_grant-number_E (#PCDATA)>
+
+<!--
+ to distinguish projects funded through MLSCN, MLPCN or other
+ mlscn: assay depositions from MLSCN screen center
+ mlpcn: assay depositions from MLPCN screen center
+ mlscn-ap: assay depositions from MLSCN assay provider
+ mlpcn-ap: assay depositions from MLPCN assay provider
+ required for 'MLSCN' & 'MLPCN' projects
+ journal-article: to be deprecated; replaced by option 7,8 & 9 to better characterize data from literature 
+ literature-extracted: data from literature, extracted by curators
+ literature-author: data from literature, submitted by author of articles
+ literature-publisher: data from literature, submitted by journals/publishers
+ rnaigi: RNAi screenings from RNAi Global Initiative 
+-->
+<!ELEMENT PC-AssayDescription_project-category (%INTEGER;)>
+
+<!--
+    journal-article	-  to be deprecated
+-->
+<!ATTLIST PC-AssayDescription_project-category value (
+        mlscn |
+        mlpcn |
+        mlscn-ap |
+        mlpcn-ap |
+        journal-article |
+        assay-vendor |
+        literature-extracted |
+        literature-author |
+        literature-publisher |
+        rnaigi |
+        other
+        ) #IMPLIED >
+
+
+<!--
+ annotation to indicate whether an assay is a panel, e.g. containing multiple components, or belongs to a group    
+ annotation for panel assay, e.g. to indicate that this assay
+ contains multiple members/components. 
+ A panel assay can be one assay reporting readouts for
+ many targets, or reporting readouts for different
+ cell lines, or different organisms
+ examples such as bioassay containing kinase profiling data
+ or bioassay containing cytoxicity data for multiple cell lines
+ or bioassay containing screening data from different stages, 
+ such as primary screening, follow ups
+-->
+<!ELEMENT PC-AssayDescription_is-panel EMPTY>
+<!ATTLIST PC-AssayDescription_is-panel value ( true | false ) #REQUIRED >
+
+
+<!--
+ annotation for assay gruop information, e.g. this 
+ assay belongs to a group of assay associated by 
+ a unique name, e.g. the value assigned to 'assay-group',  
+ assays belonging to this group can be 
+ retrieved using this unique name
+ it is depositor's responsibility to make the name 
+ distinct if necessary
+ this can be used as the mechanism to specify 'related bioassays'
+ before hand so it is not necessary to update the descriptions of
+ related assays when new group member submitted to PubChem 
+-->
+<!ELEMENT PC-AssayDescription_assay-group (PC-AssayDescription_assay-group_E*)>
+
+
+<!ELEMENT PC-AssayDescription_assay-group_E (#PCDATA)>
+
+<!--
+ for 'panel' type of bioassay only
+ store assay panel member information
+-->
+<!ELEMENT PC-AssayDescription_panel-info (PC-AssayPanel)>
+
+<!-- verification for 'late-stage-data' deposition -->
+<!ELEMENT PC-AssayDescription_is-mlp-late-stage EMPTY>
+<!ATTLIST PC-AssayDescription_is-mlp-late-stage value ( true | false ) #REQUIRED >
+
+
+<!-- to report categorized description/comment by associating with a category title  -->
+<!ELEMENT PC-AssayDescription_categorized-comment (PC-CategorizedComment*)>
+
+<!--
+ Description for Panel Assay
+ Describe general information about the panel, and link to information for each panel member 
+-->
+<!ELEMENT PC-AssayPanel (
+        PC-AssayPanel_name, 
+        PC-AssayPanel_descr?, 
+        PC-AssayPanel_member?)>
+
+<!-- short name for the panel, such as 'Kinase Profiling' -->
+<!ELEMENT PC-AssayPanel_name (#PCDATA)>
+
+<!-- short description of this panel -->
+<!ELEMENT PC-AssayPanel_descr (#PCDATA)>
+
+<!-- store panel member information -->
+<!ELEMENT PC-AssayPanel_member (PC-AssayPanelMember*)>
+
+<!-- Specific information about each panel member(or component), such as target, cell line name, cross-reference ...  -->
+<!ELEMENT PC-AssayPanelMember (
+        PC-AssayPanelMember_mid, 
+        PC-AssayPanelMember_name?, 
+        PC-AssayPanelMember_description?, 
+        PC-AssayPanelMember_protocol?, 
+        PC-AssayPanelMember_comment?, 
+        PC-AssayPanelMember_target?, 
+        PC-AssayPanelMember_xref?, 
+        PC-AssayPanelMember_activity-outcome-method?, 
+        PC-AssayPanelMember_dr?, 
+        PC-AssayPanelMember_categorized-comment?)>
+
+<!--
+ ID for panel member 
+ if a kinase panel with 300 kinases, ID will range from 1 to 300  
+ TIDs of the same panel member to be grouped based on panel member ID
+-->
+<!ELEMENT PC-AssayPanelMember_mid (%INTEGER;)>
+
+<!-- short name for this panel member  -->
+<!ELEMENT PC-AssayPanelMember_name (#PCDATA)>
+
+<!--
+ description about specifics of this panel member
+ such as about cell line, or target information 
+-->
+<!ELEMENT PC-AssayPanelMember_description (#PCDATA)>
+
+<!-- Specific procedure used to generate results for the panel member -->
+<!ELEMENT PC-AssayPanelMember_protocol (PC-AssayPanelMember_protocol_E*)>
+
+
+<!ELEMENT PC-AssayPanelMember_protocol_E (#PCDATA)>
+
+<!-- Comments or additional information -->
+<!ELEMENT PC-AssayPanelMember_comment (PC-AssayPanelMember_comment_E*)>
+
+
+<!ELEMENT PC-AssayPanelMember_comment_E (#PCDATA)>
+
+<!-- often provided for profiling assays across protein families -->
+<!ELEMENT PC-AssayPanelMember_target (PC-AssayTargetInfo*)>
+
+<!-- annotated Cross-Reference Information -->
+<!ELEMENT PC-AssayPanelMember_xref (PC-AnnotatedXRef*)>
+<!-- Assay Outcome Qualifier -->
+<!ELEMENT PC-AssayPanelMember_activity-outcome-method (%INTEGER;)>
+
+<!--
+    other	-    All Other Type
+    screening	-    Primary Screen Assay
+    confirmatory	-    Confirmatory Assay
+    summary	-    Probe Summary Assay
+-->
+<!ATTLIST PC-AssayPanelMember_activity-outcome-method value (
+        other |
+        screening |
+        confirmatory |
+        summary
+        ) #IMPLIED >
+
+
+<!-- Dose-Response Attribution within the panel member -->
+<!ELEMENT PC-AssayPanelMember_dr (PC-AssayDRAttr*)>
+
+<!-- to report categorized description/comment by associating with a category title  -->
+<!ELEMENT PC-AssayPanelMember_categorized-comment (PC-CategorizedComment*)>
+
+<!--
+ Definition for Categorized description/comment 
+ This field is added to provide flexibility for depositors to present textual description/comments in a desirable way 
+ and to facilitate information validation by the depositor and data exchange with PubChem 
+-->
+<!ELEMENT PC-CategorizedComment (
+        PC-CategorizedComment_title, 
+        PC-CategorizedComment_comment)>
+
+<!-- title for the description/comment -->
+<!ELEMENT PC-CategorizedComment_title (#PCDATA)>
+
+<!-- description/comment content -->
+<!ELEMENT PC-CategorizedComment_comment (PC-CategorizedComment_comment_E*)>
+
+
+<!ELEMENT PC-CategorizedComment_comment_E (#PCDATA)>
+
+<!--
+  Assay Dose-response attribute information used to define a set of readouts
+    as being part of a dose-response curve (for curve plotting/analysis)
+-->
+<!ELEMENT PC-AssayDRAttr (
+        PC-AssayDRAttr_id, 
+        PC-AssayDRAttr_descr?, 
+        PC-AssayDRAttr_dn?, 
+        PC-AssayDRAttr_rn?, 
+        PC-AssayDRAttr_type?)>
+
+<!--
+ Unique dose-response test set identifier
+   Note: A valid ID is greater than "0"
+-->
+<!ELEMENT PC-AssayDRAttr_id (%INTEGER;)>
+
+<!-- Dose-Response Curve Description (used as curve title) -->
+<!ELEMENT PC-AssayDRAttr_descr (#PCDATA)>
+
+<!-- Dose Axis Description (used as axis name) -->
+<!ELEMENT PC-AssayDRAttr_dn (#PCDATA)>
+
+<!-- Response Axis Description (used as axis name) -->
+<!ELEMENT PC-AssayDRAttr_rn (#PCDATA)>
+
+<!ELEMENT PC-AssayDRAttr_type (%INTEGER;)>
+
+<!--
+    experimental	-  dose-response data points measured directly by experiment
+    calculated	-  dose-response data points derived from fitted curve
+-->
+<!ATTLIST PC-AssayDRAttr_type value (
+        experimental |
+        calculated
+        ) #IMPLIED >
+
+
+<!--
+ Molecular target information provides by organization describes the functionality of the target, 
+ facilitates the linking between PubChem bioassays, and the linking between target molecule to other NCBI resources
+-->
+<!ELEMENT PC-AssayTargetInfo (
+        PC-AssayTargetInfo_name, 
+        PC-AssayTargetInfo_mol-id, 
+        PC-AssayTargetInfo_molecule-type?, 
+        PC-AssayTargetInfo_organism?, 
+        PC-AssayTargetInfo_descr?, 
+        PC-AssayTargetInfo_comment?)>
+
+<!-- Molecular name of target -->
+<!ELEMENT PC-AssayTargetInfo_name (#PCDATA)>
+
+<!-- NCBI database identifier of the target molecule -->
+<!ELEMENT PC-AssayTargetInfo_mol-id (%INTEGER;)>
+<!-- Assay Target Type -->
+<!ELEMENT PC-AssayTargetInfo_molecule-type (%INTEGER;)>
+
+<!--
+    protein	-  mol-id: NCBI Protein GI 
+    dna	-  mol-id: NCBI Nucleotide GI
+    rna	-  mol-id: NCBI Nucleotide GI
+    gene	-  mol-id: NCBI Gene ID
+    biosystem	-  mol-id: NCBI BioSystems ID
+-->
+<!ATTLIST PC-AssayTargetInfo_molecule-type value (
+        protein |
+        dna |
+        rna |
+        gene |
+        biosystem |
+        other
+        ) #IMPLIED >
+
+
+<!-- Target Organism -->
+<!ELEMENT PC-AssayTargetInfo_organism (BioSource)>
+
+<!-- Target Description  (e.g., cellular functionality and location) -->
+<!ELEMENT PC-AssayTargetInfo_descr (#PCDATA)>
+
+<!-- Comments or Additional Information -->
+<!ELEMENT PC-AssayTargetInfo_comment (PC-AssayTargetInfo_comment_E*)>
+
+
+<!ELEMENT PC-AssayTargetInfo_comment_E (#PCDATA)>
+
+<!-- Annotated Cross-Reference (XRef) Information to allow the XRef to be qualified, as to its meaning or context -->
+<!ELEMENT PC-AnnotatedXRef (
+        PC-AnnotatedXRef_xref, 
+        PC-AnnotatedXRef_comment?, 
+        PC-AnnotatedXRef_type?)>
+
+<!-- Cross-Reference Information -->
+<!ELEMENT PC-AnnotatedXRef_xref (PC-XRefData)>
+
+<!-- Annotation qualifier describing Cross-Reference meaning -->
+<!ELEMENT PC-AnnotatedXRef_comment (#PCDATA)>
+
+<!ELEMENT PC-AnnotatedXRef_type (%INTEGER;)>
+
+<!--
+    pcit	-  primary PMID/citation directly associated with the current assay data 
+    pgene	-  gene encoding the protein assay target
+-->
+<!ATTLIST PC-AnnotatedXRef_type value (
+        pcit |
+        pgene
+        ) #IMPLIED >
+
+
+<!-- Definition of Allowed Result Types for a given Assay -->
+<!ELEMENT PC-ResultType (
+        PC-ResultType_tid, 
+        PC-ResultType_name, 
+        PC-ResultType_description?, 
+        PC-ResultType_type, 
+        PC-ResultType_constraints?, 
+        PC-ResultType_unit?, 
+        PC-ResultType_sunit?, 
+        PC-ResultType_transform?, 
+        PC-ResultType_stransform?, 
+        PC-ResultType_tc?, 
+        PC-ResultType_ac?, 
+        PC-ResultType_panel-info?, 
+        PC-ResultType_annot?)>
+
+<!--
+ Tracking or Description Information
+ Assay Result Field Type ID (TID)
+-->
+<!ELEMENT PC-ResultType_tid (%INTEGER;)>
+
+<!-- Result Field Name (short name for display) -->
+<!ELEMENT PC-ResultType_name (#PCDATA)>
+
+<!-- Result Field Description -->
+<!ELEMENT PC-ResultType_description (PC-ResultType_description_E*)>
+
+
+<!ELEMENT PC-ResultType_description_E (#PCDATA)>
+<!--
+ Result Data Type and Validation Information
+ Result Data Type
+-->
+<!ELEMENT PC-ResultType_type (%INTEGER;)>
+<!ATTLIST PC-ResultType_type value (
+        float |
+        int |
+        bool |
+        string
+        ) #IMPLIED >
+
+<!--
+ Allowed Values, used for validating incoming data
+ If type is "float"
+-->
+<!ELEMENT PC-ResultType_constraints (
+        PC-ResultType_constraints_fset | 
+        PC-ResultType_constraints_fmin | 
+        PC-ResultType_constraints_fmax | 
+        PC-ResultType_constraints_frange | 
+        PC-ResultType_constraints_iset | 
+        PC-ResultType_constraints_imin | 
+        PC-ResultType_constraints_imax | 
+        PC-ResultType_constraints_irange | 
+        PC-ResultType_constraints_sset)>
+
+<!--   Allowed values must be equal to one of these -->
+<!ELEMENT PC-ResultType_constraints_fset (PC-ResultType_constraints_fset_E*)>
+
+
+<!ELEMENT PC-ResultType_constraints_fset_E (%REAL;)>
+
+<!--   Allowed values (x) must be [ fmin <= x ] -->
+<!ELEMENT PC-ResultType_constraints_fmin (%REAL;)>
+
+<!--   Allowed values (x) must be [ x <= fmax ] -->
+<!ELEMENT PC-ResultType_constraints_fmax (%REAL;)>
+
+<!--   Minimum/Maximum Range [ min <= x <= max ] -->
+<!ELEMENT PC-ResultType_constraints_frange (PC-RealMinMax)>
+
+<!--
+ If type is "int"
+   Allowed values must be equal to one of these
+-->
+<!ELEMENT PC-ResultType_constraints_iset (PC-ResultType_constraints_iset_E*)>
+
+
+<!ELEMENT PC-ResultType_constraints_iset_E (%INTEGER;)>
+
+<!--   Allowed values (x) must be [ imin <= x ] -->
+<!ELEMENT PC-ResultType_constraints_imin (%INTEGER;)>
+
+<!--   Allowed values (x) must be [ x <= imax ] -->
+<!ELEMENT PC-ResultType_constraints_imax (%INTEGER;)>
+
+<!--   Minimum/Maximum Range [ min <= x <= max ] -->
+<!ELEMENT PC-ResultType_constraints_irange (PC-IntegerMinMax)>
+
+<!--
+ If type is "string"
+   Allowed values must be equal to one of these
+-->
+<!ELEMENT PC-ResultType_constraints_sset (PC-ResultType_constraints_sset_E*)>
+
+
+<!ELEMENT PC-ResultType_constraints_sset_E (#PCDATA)>
+<!--
+ Unit information provides the units for the values reported for this TID.  For example, if the values 
+   reported for this TID are a concentration, e.g., micro-molar, setting the unit "um" allows PubChem to 
+   know that the value, e.g., "1.3", is actually "1.3 uM".  This also allows PubChem to properly report the 
+   units when displaying the reported values for this TID.  If the enumerated units provided below are 
+   insufficient, you may represent the units as a string in the optional "sunit" field (see below).
+ Units for Value
+-->
+<!ELEMENT PC-ResultType_unit (%INTEGER;)>
+
+<!--
+    ppt	-  Parts per Thousand
+    ppm	-  Parts per Million
+    ppb	-  Parts per Billion
+    mm	-  milliM
+    um	-  microM
+    nm	-  nanoM
+    pm	-  picoM
+    fm	-  femtoM
+    mgml	-  milligrams per mL
+    ugml	-  micrograms per mL
+    ngml	-  nanograms per mL
+    pgml	-  picograms per mL
+    fgml	-  femtograms per mL
+    m	-  Molar
+    percent	-  Percent
+    ratio	-  Ratio
+    sec	-  Seconds
+    rsec	-  Reciprocal Seconds
+    min	-  Minutes
+    rmin	-  Reciprocal Minutes
+    day	-  Days
+    rday	-  Reciprocal Days
+    ml-min-kg	-  milliliter / minute / kilogram
+    l-kg	-  liter / kilogram
+    hr-ng-ml	-  hour * nanogram / milliliter
+    cm-sec	-  centimeter / second
+    mg-kg	-  milligram / kilogram
+-->
+<!ATTLIST PC-ResultType_unit value (
+        ppt |
+        ppm |
+        ppb |
+        mm |
+        um |
+        nm |
+        pm |
+        fm |
+        mgml |
+        ugml |
+        ngml |
+        pgml |
+        fgml |
+        m |
+        percent |
+        ratio |
+        sec |
+        rsec |
+        min |
+        rmin |
+        day |
+        rday |
+        ml-min-kg |
+        l-kg |
+        hr-ng-ml |
+        cm-sec |
+        mg-kg |
+        none |
+        unspecified
+        ) #IMPLIED >
+
+
+<!--
+ ATTENTION: sunit field is DEPRECATED. It is no longer
+            supported and remains for legacy data only.
+ Unit Type (as a String)
+-->
+<!ELEMENT PC-ResultType_sunit (#PCDATA)>
+<!--
+ Value Transform information qualifies the values reported for this TID.  For example, if the values
+   reported for this TID are "-Log10 GI50", you may want to consider setting
+   the "nlog" value below.  In doing so, PubChem would know that the value, e.g., "5.0" 
+   is actually "1.0e-5".  If the transformation applied is not listed, you may represent
+   this transformation as a string in the "stransform" (see below) for eventual inclusion 
+   in the enumerated transform list below.
+
+
+ ATTENTION: transform field is DEPRECATED. It is no longer
+            supported and remains for legacy data only.
+ Value Type Details
+-->
+<!ELEMENT PC-ResultType_transform (%INTEGER;)>
+
+<!--
+    linear	-  Linear Scale (x)
+    ln	-  Natural Log Scale (ln x)
+    log	-  Log Base 10 Scale (log10 x)
+    reciprocal	-  Reciprocal Scale (1/x)
+    negative	-  Negative Linear Scale (-x)
+    nlog	-  Negative Log Base 10 Scale (-log10 x)
+    nln	-  Negative Natural Log Scane (-ln x)
+-->
+<!ATTLIST PC-ResultType_transform value (
+        linear |
+        ln |
+        log |
+        reciprocal |
+        negative |
+        nlog |
+        nln
+        ) #IMPLIED >
+
+
+<!--
+ ATTENTION: stransform field is DEPRECATED. It is no longer
+            supported and remains for legacy data only.
+ Value Transform Type (as a String)
+-->
+<!ELEMENT PC-ResultType_stransform (#PCDATA)>
+
+<!-- Tested concentration attribute -->
+<!ELEMENT PC-ResultType_tc (PC-ConcentrationAttr)>
+
+<!--
+ if true, indicates that this TID field 
+ provides active concentration summary by 
+ reporting the concentration which produces 
+ 50% of the maximum possible biological response
+ such as IC50, EC50, AC50, GI50 etc. 
+ or by reporting constant parameters such as Ki, 
+ that based on which the activity outcome in this assay is called   
+-->
+<!ELEMENT PC-ResultType_ac EMPTY>
+<!ATTLIST PC-ResultType_ac value ( true | false ) #REQUIRED >
+
+
+<!--
+ needed for panel assay only
+ each panel member may have a number of TID columns reported
+ such TIDs are grouped by panel member ID, see PC-AssayPanelMemberInfo
+-->
+<!ELEMENT PC-ResultType_panel-info (PC-AssayPanelTestResult)>
+
+<!--
+ treat substance associated cross-reference as regular TID
+ web servers would make a link 
+ to the corresponding record in Entrez databases
+ treat substance associated target information as regular TID
+ for example, for RNAi screening data, each 'substance' may
+ correspond to a specific gene target
+-->
+<!ELEMENT PC-ResultType_annot (%INTEGER;)>
+
+<!--
+    pmid	-  PubMed ID
+    mmdb	-  MMDB ID
+    url	-  indicate  TID data is a url that provides supplementary information
+    protein-gi	-  GenBank General ID (GI) for a Protein
+    nucleotide-gi	-  GenBank General ID (GI) for a Nucleotide 
+    taxonomy	-  Taxonomy ID for an Organism
+    mim	-  MIM, Mendelian Inheritance in Man, ID 
+    gene	-  Entrez Gene ID
+    probe	-  Entrez Probe ID
+    aid	-  PubChem BioAssay ID, may be used in 'Summary' assay
+    sid	-  PubChem Substance ID, may be used in 'Summary' assay 
+    cid	-  PubChem Compound ID 
+    protein-target-gi	-  GenBank General ID (GI) for a Protein target
+    biosystems-target-id	-  NCBI BioSystems ID 
+    target-name	-  target name
+    target-descr	-  brief target description 
+    target-tax-id	-  NCBI Taxonomy ID for target molecule 
+    gene-target-id	-  NCBI Gene ID for a gene target 
+    dna-nucleotide-target-gi	-  GenBank General ID (GI) for a DNA Nucleotide target 
+    rna-nucleotide-target-gi	-  GenBank General ID (GI) for a RNA Nucleotide target 
+-->
+<!ATTLIST PC-ResultType_annot value (
+        pmid |
+        mmdb |
+        url |
+        protein-gi |
+        nucleotide-gi |
+        taxonomy |
+        mim |
+        gene |
+        probe |
+        aid |
+        sid |
+        cid |
+        protein-target-gi |
+        biosystems-target-id |
+        target-name |
+        target-descr |
+        target-tax-id |
+        gene-target-id |
+        dna-nucleotide-target-gi |
+        rna-nucleotide-target-gi
+        ) #IMPLIED >
+
+
+
+<!ELEMENT PC-AssayPanelTestResult (
+        PC-AssayPanelTestResult_mid, 
+        PC-AssayPanelTestResult_readout-annot?)>
+
+<!--
+ panel member ID, see PC-AssayPanelMemberInfo
+ track association between a group of TIDs and panel member
+-->
+<!ELEMENT PC-AssayPanelTestResult_mid (%INTEGER;)>
+
+<!ELEMENT PC-AssayPanelTestResult_readout-annot (%INTEGER;)>
+
+<!--
+    regular	-  to indicate this TID column is a regular readout
+    outcome	-  to indicate this TID column is  "outcome" of the particular panel member
+    score	-  to indicate this TID column reports activity score of the particular panel member
+         to be used for neighboring assays
+    ac	-  to indicate this TID column is "active concentration"   
+         'outcome' and 'ac' type of TID are to be used for data analysis 
+         across members of panel
+         or across multiple assays including other panel assays
+-->
+<!ATTLIST PC-AssayPanelTestResult_readout-annot value (
+        regular |
+        outcome |
+        score |
+        ac
+        ) #IMPLIED >
+
+
+<!--The concentration attribute is to indicate that the readout under this test result field is biological concentration-response data, the attribute provides the value and unit of the tested concentration -->
+<!ELEMENT PC-ConcentrationAttr (
+        PC-ConcentrationAttr_concentration, 
+        PC-ConcentrationAttr_unit, 
+        PC-ConcentrationAttr_dr-id?)>
+
+<!ELEMENT PC-ConcentrationAttr_concentration (%REAL;)>
+<!-- Units for Concentration -->
+<!ELEMENT PC-ConcentrationAttr_unit (%INTEGER;)>
+
+<!--
+    um	-  microM
+-->
+<!ATTLIST PC-ConcentrationAttr_unit value (
+        um
+        ) #IMPLIED >
+
+
+<!-- Dose-Response Attribution ID (if applicable) -->
+<!ELEMENT PC-ConcentrationAttr_dr-id (%INTEGER;)>
+
+<!-- Mininum and Maximum Constraints on an Integer Value (used for validating incoming data) -->
+<!ELEMENT PC-IntegerMinMax (
+        PC-IntegerMinMax_min, 
+        PC-IntegerMinMax_max)>
+
+<!-- Minimum Value Allowed -->
+<!ELEMENT PC-IntegerMinMax_min (%INTEGER;)>
+
+<!-- Maximum Value Allowed -->
+<!ELEMENT PC-IntegerMinMax_max (%INTEGER;)>
+
+<!-- Mininum and Maximum Constraints on a Real Value (used for validating incoming data) -->
+<!ELEMENT PC-RealMinMax (
+        PC-RealMinMax_min, 
+        PC-RealMinMax_max)>
+
+<!-- Minimum Value Allowed -->
+<!ELEMENT PC-RealMinMax_min (%REAL;)>
+
+<!-- Maximum Value Allowed -->
+<!ELEMENT PC-RealMinMax_max (%REAL;)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_PCSubstance.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_PCSubstance.dtd
new file mode 100644
index 0000000..0efe6fd
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_PCSubstance.dtd
@@ -0,0 +1,29 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "pcsubstance.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_PCSubstance.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_PCSubstance_module PUBLIC "-//NCBI//NCBI PCSubstance Module//EN" "NCBI_PCSubstance.mod.dtd">
+%NCBI_PCSubstance_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_PCSubstance.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_PCSubstance.mod.dtd
new file mode 100644
index 0000000..479ed86
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_PCSubstance.mod.dtd
@@ -0,0 +1,1628 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "pcsubstance.asn"
+     ::DATATOOL:: by application DATATOOL version 2.3.1
+     ::DATATOOL:: on 02/14/2012 23:05:04
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-PCSubstance"
+================================================= -->
+
+<!--
+ $Id: pcsubstance.asn 353134 2012-02-13 19:08:39Z bolton $
+ ===========================================================================
+
+                            PUBLIC DOMAIN NOTICE
+               National Center for Biotechnology Information
+
+  This software/database is a "United States Government Work" under the
+  terms of the United States Copyright Act.  It was written as part of
+  the author's official duties as a United States Government employee and
+  thus cannot be copyrighted.  This software/database is freely available
+  to the public for use. The National Library of Medicine and the U.S.
+  Government have not placed any restriction on its use or reproduction.
+
+  Although all reasonable efforts have been taken to ensure the accuracy
+  and reliability of the software and data, the NLM and the U.S.
+  Government do not and cannot warrant the performance or results that
+  may be obtained by using this software or data. The NLM and the U.S.
+  Government disclaim all warranties, express or implied, including
+  warranties of performance, merchantability or fitness for any particular
+  purpose.
+
+  Please cite the author in any work or product based on this material.
+
+ ===========================================================================
+
+ Authors:  NCBI Structure Group
+
+ File Description:
+      ASN.1 definitions for PubChem small molecule database
+
+ ===========================================================================
+-->
+
+<!-- Elements used by other modules:
+          PC-Substance,
+          PC-Compound,
+          PC-Substances,
+          PC-Compounds,
+          PC-Source,
+          PC-ID,
+          PC-InfoData,
+          PC-XRefData -->
+
+<!-- Elements referenced from other modules:
+          Pub FROM NCBI-Pub,
+          Date,
+          Object-id FROM NCBI-General -->
+<!-- ============================================ -->
+
+<!-- Root Record for Chemical Substance Definition -->
+<!ELEMENT PC-Substance (
+        PC-Substance_sid, 
+        PC-Substance_source, 
+        PC-Substance_pub?, 
+        PC-Substance_synonyms?, 
+        PC-Substance_comment?, 
+        PC-Substance_xref?, 
+        PC-Substance_compound?)>
+
+<!--
+ Internal Tracking Information
+ Substance ID/Version  [Either valid ID or a "0" dummy
+   value, if "source" is to be used]
+   Note: Version is for internal use (only?)
+   Note: A valid ID is greater than "0"
+-->
+<!ELEMENT PC-Substance_sid (PC-ID)>
+
+<!-- Data Source for this Submission -->
+<!ELEMENT PC-Substance_source (PC-Source)>
+
+<!--
+ Substance Description Information
+ Articles Describing this Substance
+-->
+<!ELEMENT PC-Substance_pub (Pub*)>
+
+<!-- Substance Names provided by Depositor -->
+<!ELEMENT PC-Substance_synonyms (PC-Substance_synonyms_E*)>
+
+
+<!ELEMENT PC-Substance_synonyms_E (#PCDATA)>
+
+<!-- Comments and Description provided by Depositor -->
+<!ELEMENT PC-Substance_comment (PC-Substance_comment_E*)>
+
+
+<!ELEMENT PC-Substance_comment_E (#PCDATA)>
+
+<!-- X-Ref/LinkOut Data provided by Depositor -->
+<!ELEMENT PC-Substance_xref (PC-XRefData*)>
+
+<!--
+ Structure Description
+ Original Deposited Structure Information
+-->
+<!ELEMENT PC-Substance_compound (PC-Compounds)>
+
+<!-- Holder for groups of Substances -->
+<!ELEMENT PC-Substances (PC-Substance*)>
+
+<!-- ID and Version Description Information -->
+<!ELEMENT PC-ID (
+        PC-ID_id, 
+        PC-ID_version)>
+
+<!--
+ Unique "Global" ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-ID_id (%INTEGER;)>
+
+<!--
+ Incremented when Depositor updates record
+   Note: For Internal Use (only?)
+-->
+<!ELEMENT PC-ID_version (%INTEGER;)>
+
+<!-- Describes Substance Source, if from another database -->
+<!ELEMENT PC-Source (
+        PC-Source_individual | 
+        PC-Source_db | 
+        PC-Source_mmdb)>
+
+<!-- Individual Submission -->
+<!ELEMENT PC-Source_individual (Pub)>
+
+<!-- External DB Submission -->
+<!ELEMENT PC-Source_db (PC-DBTracking)>
+
+<!-- MMDB Submission (deprecated) -->
+<!ELEMENT PC-Source_mmdb (PC-MMDBSource)>
+
+<!-- External DB Tracking Information -->
+<!ELEMENT PC-DBTracking (
+        PC-DBTracking_name, 
+        PC-DBTracking_source-id, 
+        PC-DBTracking_date?, 
+        PC-DBTracking_description?, 
+        PC-DBTracking_pub?)>
+
+<!-- Unique Name of External Database -->
+<!ELEMENT PC-DBTracking_name (#PCDATA)>
+
+<!-- Primary Unique ID used by External DB -->
+<!ELEMENT PC-DBTracking_source-id (Object-id)>
+
+<!-- External Database Release Date -->
+<!ELEMENT PC-DBTracking_date (Date)>
+
+<!-- External Database Release Code/Description -->
+<!ELEMENT PC-DBTracking_description (#PCDATA)>
+
+<!-- Data Submission to same DB by original Author -->
+<!ELEMENT PC-DBTracking_pub (Pub)>
+
+<!-- MMDB Source Record detailing specific location or part of an MMDB Record -->
+<!ELEMENT PC-MMDBSource (
+        PC-MMDBSource_mmdb-id, 
+        PC-MMDBSource_molecule-id, 
+        PC-MMDBSource_molecule-name, 
+        PC-MMDBSource_residue-id?, 
+        PC-MMDBSource_residue-name?, 
+        PC-MMDBSource_atom-id?, 
+        PC-MMDBSource_atom-name?)>
+
+<!--
+ MMDB Record ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-MMDBSource_mmdb-id (%INTEGER;)>
+
+<!--
+ MMDB Molecule ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-MMDBSource_molecule-id (%INTEGER;)>
+
+<!-- MMDB Molecule Name -->
+<!ELEMENT PC-MMDBSource_molecule-name (PC-MMDBSource_molecule-name_E*)>
+
+
+<!ELEMENT PC-MMDBSource_molecule-name_E (#PCDATA)>
+
+<!--
+ Residue ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-MMDBSource_residue-id (%INTEGER;)>
+
+<!-- Residue Name -->
+<!ELEMENT PC-MMDBSource_residue-name (#PCDATA)>
+
+<!--
+ Atom ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-MMDBSource_atom-id (%INTEGER;)>
+
+<!-- Atom Name -->
+<!ELEMENT PC-MMDBSource_atom-name (#PCDATA)>
+
+<!-- Depositor Provided X-Ref and LinkOut data for Entrez -->
+<!ELEMENT PC-XRefData (
+        PC-XRefData_regid | 
+        PC-XRefData_rn | 
+        PC-XRefData_mesh | 
+        PC-XRefData_pmid | 
+        PC-XRefData_gi | 
+        PC-XRefData_mmdb | 
+        PC-XRefData_sid | 
+        PC-XRefData_cid | 
+        PC-XRefData_dburl | 
+        PC-XRefData_sburl | 
+        PC-XRefData_asurl | 
+        PC-XRefData_protein-gi | 
+        PC-XRefData_nucleotide-gi | 
+        PC-XRefData_taxonomy | 
+        PC-XRefData_aid | 
+        PC-XRefData_mim | 
+        PC-XRefData_gene | 
+        PC-XRefData_probe | 
+        PC-XRefData_biosystem | 
+        PC-XRefData_geogse | 
+        PC-XRefData_geogsm | 
+        PC-XRefData_patent)>
+
+<!-- External Database Registry ID -->
+<!ELEMENT PC-XRefData_regid (#PCDATA)>
+
+<!-- Registry Number (e.g., EC Number, CAS Number) -->
+<!ELEMENT PC-XRefData_rn (#PCDATA)>
+
+<!-- MESH Index Term -->
+<!ELEMENT PC-XRefData_mesh (#PCDATA)>
+
+<!--
+ PubMed ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_pmid (%INTEGER;)>
+
+<!--
+ GenBank General ID
+   Note: Please use protein-gi or nucleotide-gi, if possible
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_gi (%INTEGER;)>
+
+<!--
+ MMDB ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_mmdb (%INTEGER;)>
+
+<!--
+ PubChem Substance ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_sid (%INTEGER;)>
+
+<!--
+ PubChem Compound ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_cid (%INTEGER;)>
+
+<!-- Depositor Source Database Homepage -->
+<!ELEMENT PC-XRefData_dburl (#PCDATA)>
+
+<!-- Depositor Homepage for a Substance -->
+<!ELEMENT PC-XRefData_sburl (#PCDATA)>
+
+<!-- Depositor Homepage for an Assay -->
+<!ELEMENT PC-XRefData_asurl (#PCDATA)>
+
+<!--
+ GenBank General ID for a Protein
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_protein-gi (%INTEGER;)>
+
+<!--
+ GenBank General ID for a Nucleotide
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_nucleotide-gi (%INTEGER;)>
+
+<!--
+ Taxonomy ID for an Organism
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_taxonomy (%INTEGER;)>
+
+<!--
+ PubChem BioAssay ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_aid (%INTEGER;)>
+
+<!--
+ MIM, Mendelian Inheritance in Man, Number 
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_mim (%INTEGER;)>
+
+<!--
+ Entrez Gene ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_gene (%INTEGER;)>
+
+<!--
+ Probe ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_probe (%INTEGER;)>
+
+<!--
+ BioSystem ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_biosystem (%INTEGER;)>
+
+<!--
+ Gene Expression Omnibus Series Accession (GEO GSE) ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_geogse (%INTEGER;)>
+
+<!--
+ Gene Expression Omnibus Sample Accession (GEO GSM) ID
+   Note: Must be greater than "0" or, if invalid, "0"
+-->
+<!ELEMENT PC-XRefData_geogsm (%INTEGER;)>
+
+<!-- Patent Identifier (e.g., USPTO, EPO, WPO, JPO, CPO) -->
+<!ELEMENT PC-XRefData_patent (#PCDATA)>
+
+<!-- Compound Record -->
+<!ELEMENT PC-Compound (
+        PC-Compound_id, 
+        PC-Compound_atoms?, 
+        PC-Compound_bonds?, 
+        PC-Compound_stereo?, 
+        PC-Compound_coords?, 
+        PC-Compound_charge?, 
+        PC-Compound_props?, 
+        PC-Compound_stereogroups?, 
+        PC-Compound_count?, 
+        PC-Compound_vbalt?)>
+
+<!--
+ Tracking Information
+ Compound Qualifier (Type/ID)
+-->
+<!ELEMENT PC-Compound_id (PC-CompoundType)>
+
+<!-- AtomID/Type Information -->
+<!ELEMENT PC-Compound_atoms (PC-Atoms)>
+
+<!-- BondID/Type/Atom Information -->
+<!ELEMENT PC-Compound_bonds (PC-Bonds)>
+
+<!-- StereoCenter Descriptions -->
+<!ELEMENT PC-Compound_stereo (PC-StereoCenter*)>
+
+<!-- 2D/3D Coordinate Sets of Compound -->
+<!ELEMENT PC-Compound_coords (PC-Coordinates*)>
+
+<!-- Provided Total Formal Charge  (Signed Integer) -->
+<!ELEMENT PC-Compound_charge (%INTEGER;)>
+
+<!-- Derived (computed) Properties -->
+<!ELEMENT PC-Compound_props (PC-InfoData*)>
+
+<!-- Relative stereochemistry groups -->
+<!ELEMENT PC-Compound_stereogroups (PC-StereoGroup*)>
+
+<!-- Counts of various properties -->
+<!ELEMENT PC-Compound_count (PC-Count)>
+
+<!-- Alternate Valence-Bond Forms -->
+<!ELEMENT PC-Compound_vbalt (PC-Compounds)>
+
+<!-- Holder for groups of Compounds -->
+<!ELEMENT PC-Compounds (PC-Compound*)>
+
+<!--
+ Qualification used to describe the type of Compound deposited, standardized, or derived.
+    Please note that mixtures/cocktails may be specified using previously deposited substances.
+-->
+<!ELEMENT PC-CompoundType (
+        PC-CompoundType_type?, 
+        PC-CompoundType_id?)>
+<!--
+  Compound Qualifier or Type
+ For Compound Depositions
+-->
+<!ELEMENT PC-CompoundType_type (%INTEGER;)>
+
+<!--
+    deposited	-  Original Deposited Compound
+         For Standardized Compounds
+    standardized	-  Standardized Form of a Deposited Compound
+    component	-  Component of a Standardized Compound
+    neutralized	-  Neutralized Form of a Standardized Compound
+         For Mixture/Cocktail Depositions
+    mixture	-  Substance that is a component of a mixture
+         For Theoretical Compounds
+    tautomer	-  Predicted Tautomer Form
+    pka-state	-  Predicted Ionized pKa Form
+    unknown	-  Unknown Compound Type
+-->
+<!ATTLIST PC-CompoundType_type value (
+        deposited |
+        standardized |
+        component |
+        neutralized |
+        mixture |
+        tautomer |
+        pka-state |
+        unknown
+        ) #IMPLIED >
+
+<!--  Compound Namespace and ID  (absent for "deposited" type compounds) -->
+<!ELEMENT PC-CompoundType_id (
+        PC-CompoundType_id_cid | 
+        PC-CompoundType_id_sid | 
+        PC-CompoundType_id_xid)>
+
+<!--  Standardized Compound -->
+<!ELEMENT PC-CompoundType_id_cid (%INTEGER;)>
+
+<!--  PubChem Substance (for "mixture" type compounds) -->
+<!ELEMENT PC-CompoundType_id_sid (%INTEGER;)>
+
+<!--  PubChem Theoretical Compound -->
+<!ELEMENT PC-CompoundType_id_xid (%INTEGER;)>
+
+<!-- Counts of various properties of a Compound -->
+<!ELEMENT PC-Count (
+        PC-Count_heavy-atom, 
+        PC-Count_atom-chiral, 
+        PC-Count_atom-chiral-def, 
+        PC-Count_atom-chiral-undef, 
+        PC-Count_bond-chiral, 
+        PC-Count_bond-chiral-def, 
+        PC-Count_bond-chiral-undef, 
+        PC-Count_isotope-atom, 
+        PC-Count_covalent-unit, 
+        PC-Count_tautomers)>
+
+<!-- Total count of non-Hydrogen (Heavy) Atoms -->
+<!ELEMENT PC-Count_heavy-atom (%INTEGER;)>
+
+<!--
+ StereoChemistry Counts
+ Total count of (SP3) Chiral Atoms
+-->
+<!ELEMENT PC-Count_atom-chiral (%INTEGER;)>
+
+<!-- Total count of Defined (SP3) Chiral Atoms -->
+<!ELEMENT PC-Count_atom-chiral-def (%INTEGER;)>
+
+<!-- Total count of Undefined (SP3) Chiral Atoms -->
+<!ELEMENT PC-Count_atom-chiral-undef (%INTEGER;)>
+
+<!-- Total count of (SP2) Chiral Bonds -->
+<!ELEMENT PC-Count_bond-chiral (%INTEGER;)>
+
+<!-- Total count of (SP2) Defined Chiral Bonds -->
+<!ELEMENT PC-Count_bond-chiral-def (%INTEGER;)>
+
+<!-- Total count of (SP2) Undefined Chiral Bonds -->
+<!ELEMENT PC-Count_bond-chiral-undef (%INTEGER;)>
+
+<!--
+ Isotopic Counts
+ Total count of Atoms with Isotopic Information
+-->
+<!ELEMENT PC-Count_isotope-atom (%INTEGER;)>
+
+<!--
+ Discrete Structure Counts
+ Total count of covalently-bonded units in the record
+-->
+<!ELEMENT PC-Count_covalent-unit (%INTEGER;)>
+
+<!-- Number of possible tautomers (Max. 999) -->
+<!ELEMENT PC-Count_tautomers (%INTEGER;)>
+
+<!--
+ List of atom identifiers which are in a common stereochemistry group.
+ All atoms in this group possess the characteristic of the type specified.
+ The convention adopted is intended to be compatible with MDL's Enhanced
+ Stereochemical Representation white paper.
+ An atom can only be member of a single stereo group, and all atoms
+ in a stereo group must have a stereo descriptor.
+ Stereogroups only apply to stereocenters that can have parity.
+-->
+<!ELEMENT PC-StereoGroup (
+        PC-StereoGroup_type, 
+        PC-StereoGroup_aid)>
+
+<!ELEMENT PC-StereoGroup_type (%INTEGER;)>
+
+<!--
+    absolute	-  Absolute configuration is known
+    or	-  Relative configuration is known (absolute configuration is unknown)
+    and	-  Mixture of stereoisomers
+    unknown	-  Unknown configuration type
+-->
+<!ATTLIST PC-StereoGroup_type value (
+        absolute |
+        or |
+        and |
+        unknown
+        ) #IMPLIED >
+
+
+<!--
+ Atom Identifiers of atoms in this group
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoGroup_aid (PC-StereoGroup_aid_E*)>
+
+
+<!ELEMENT PC-StereoGroup_aid_E (%INTEGER;)>
+
+<!-- Compound Description/Descriptor Data -->
+<!ELEMENT PC-InfoData (
+        PC-InfoData_urn, 
+        PC-InfoData_value)>
+
+<!-- Universal Resource Name  [for Value Qualification] -->
+<!ELEMENT PC-InfoData_urn (PC-Urn)>
+<!-- Data Value -->
+<!ELEMENT PC-InfoData_value (
+        PC-InfoData_value_bval | 
+        PC-InfoData_value_bvec | 
+        PC-InfoData_value_ival | 
+        PC-InfoData_value_ivec | 
+        PC-InfoData_value_fval | 
+        PC-InfoData_value_fvec | 
+        PC-InfoData_value_sval | 
+        PC-InfoData_value_slist | 
+        PC-InfoData_value_date | 
+        PC-InfoData_value_binary | 
+        PC-InfoData_value_bitlist)>
+
+<!-- Boolean or Binary -->
+<!ELEMENT PC-InfoData_value_bval EMPTY>
+<!ATTLIST PC-InfoData_value_bval value ( true | false ) #REQUIRED >
+
+
+<!-- Boolean Vector -->
+<!ELEMENT PC-InfoData_value_bvec (PC-InfoData_value_bvec_E*)>
+
+
+<!ELEMENT PC-InfoData_value_bvec_E EMPTY>
+<!ATTLIST PC-InfoData_value_bvec_E value ( true | false ) #REQUIRED >
+
+
+<!-- Integer (signed or unsigned) -->
+<!ELEMENT PC-InfoData_value_ival (%INTEGER;)>
+
+<!-- Integer Vector -->
+<!ELEMENT PC-InfoData_value_ivec (PC-InfoData_value_ivec_E*)>
+
+
+<!ELEMENT PC-InfoData_value_ivec_E (%INTEGER;)>
+
+<!-- Float or Double -->
+<!ELEMENT PC-InfoData_value_fval (%REAL;)>
+
+<!-- Double Vector -->
+<!ELEMENT PC-InfoData_value_fvec (PC-InfoData_value_fvec_E*)>
+
+
+<!ELEMENT PC-InfoData_value_fvec_E (%REAL;)>
+
+<!-- String -->
+<!ELEMENT PC-InfoData_value_sval (#PCDATA)>
+
+<!-- List of Strings -->
+<!ELEMENT PC-InfoData_value_slist (PC-InfoData_value_slist_E*)>
+
+
+<!ELEMENT PC-InfoData_value_slist_E (#PCDATA)>
+
+<!-- Date -->
+<!ELEMENT PC-InfoData_value_date (Date)>
+
+<!-- Binary Data -->
+<!ELEMENT PC-InfoData_value_binary (%OCTETS;)>
+
+<!-- Bit List (specialized version of Boolean vector) -->
+<!ELEMENT PC-InfoData_value_bitlist (%BITS;)>
+
+<!--
+ Universal Resource Name 
+    Provides explicit source information on derived or calculated data 
+-->
+<!ELEMENT PC-Urn (
+        PC-Urn_label, 
+        PC-Urn_name?, 
+        PC-Urn_datatype?, 
+        PC-Urn_parameters?, 
+        PC-Urn_implementation?, 
+        PC-Urn_version?, 
+        PC-Urn_software?, 
+        PC-Urn_source?, 
+        PC-Urn_release?)>
+
+<!-- Generic Name or Label for Display  [e.g., "Log P"] -->
+<!ELEMENT PC-Urn_label (#PCDATA)>
+
+<!-- Qualified Name  [e.g., "XlogP"] -->
+<!ELEMENT PC-Urn_name (#PCDATA)>
+
+<!-- Specific Data Type of Value  [e.g., binary] -->
+<!ELEMENT PC-Urn_datatype (PC-UrnDataType)>
+
+<!-- Implementation Parameter  [e.g., "metal=0"] -->
+<!ELEMENT PC-Urn_parameters (#PCDATA)>
+
+<!-- Implementation Name  [e.g., "E_XlogP"] -->
+<!ELEMENT PC-Urn_implementation (#PCDATA)>
+
+<!-- Implementation Version  [e.g., "3.317"] -->
+<!ELEMENT PC-Urn_version (#PCDATA)>
+
+<!-- Implementation Software  [e.g., "Cactvs"] -->
+<!ELEMENT PC-Urn_software (#PCDATA)>
+
+<!-- Implementation Organization  [e.g., "xemistry.com"] -->
+<!ELEMENT PC-Urn_source (#PCDATA)>
+
+<!-- NCBI Implementation Release  [e.g., "10.25.2005"] -->
+<!ELEMENT PC-Urn_release (#PCDATA)>
+
+<!--
+ URN Data Type
+   Provides the ability to use more specific data types than that directly provided by ASN.1.
+   Provides for more specific validation of specified data.
+-->
+<!ELEMENT PC-UrnDataType (%INTEGER;)>
+
+<!--
+    string	-  Basic Data Types
+         String                             [maps to a VisibleString]
+    stringlist	-  List of Strings                    [maps to VisibleString list]
+    int	-  32-Bit Signed Integer              [maps to an INTEGER]
+    intvec	-  Vector of 32-Bit Signed Integer    [maps to INTEGER vector]
+    uint	-  32-Bit Unsigned Integer            [maps to an INTEGER]
+    uintvec	-  Vector of 32-Bit Unsigned Integer  [maps to INTEGER vector]
+    double	-  64-Bit Float                       [maps to a REAL]
+    doublevec	-  Vector of Double                   [maps to REAL vector]
+    bool	-  Boolean or Binary value            [maps to a BOOLEAN]
+    boolvec	-  Boolean Vector                     [maps to BOOLEAN vector]
+    uint64	-  Specialized Data Types
+         64-Bit Unsigned Integer (Hex form) [maps to a VisibleString]
+    binary	-  Binary Data Blob                   [maps to an OCTET STRING]
+    url	-  URL                                [maps to a VisibleString]
+    unicode	-  UniCode String                     [maps to a VisibleString]
+    date	-  ISO8601 Date                       [maps to a Date]
+    fingerprint	-  Binary Fingerprint (Gzip'ped bit   [maps to an OCTET STRING]
+           list w/ 4-Byte prefix denoting bit list length)
+    unknown	-  Unknown Data Type               [maps to a set of VisibleString]
+-->
+<!ATTLIST PC-UrnDataType value (
+        string |
+        stringlist |
+        int |
+        intvec |
+        uint |
+        uintvec |
+        double |
+        doublevec |
+        bool |
+        boolvec |
+        uint64 |
+        binary |
+        url |
+        unicode |
+        date |
+        fingerprint |
+        unknown
+        ) #IMPLIED >
+
+
+<!-- Coordinates for the Compound of a given type -->
+<!ELEMENT PC-Coordinates (
+        PC-Coordinates_type, 
+        PC-Coordinates_aid, 
+        PC-Coordinates_conformers?, 
+        PC-Coordinates_atomlabels?, 
+        PC-Coordinates_data?)>
+
+<!-- Coordinate Type Information (vector) -->
+<!ELEMENT PC-Coordinates_type (PC-CoordinateType*)>
+
+<!--
+ Conformer Atom IDs (vector)
+   (to be kept synchronized with Conformers)
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-Coordinates_aid (PC-Coordinates_aid_E*)>
+
+
+<!ELEMENT PC-Coordinates_aid_E (%INTEGER;)>
+
+<!-- Conformers for this Coordinate Set -->
+<!ELEMENT PC-Coordinates_conformers (PC-Conformer*)>
+
+<!-- Atom labels for Conformer Set -->
+<!ELEMENT PC-Coordinates_atomlabels (PC-AtomString*)>
+
+<!-- Data Associated with these Coordinates -->
+<!ELEMENT PC-Coordinates_data (PC-InfoData*)>
+
+<!--
+ Drawing/Conformer Definition (in Parallel Arrays, synchronized to aid integer list)
+   3D coordinates are specified in a right-handed coordinate system. For 2D plots, Y axis leads upwards.
+-->
+<!ELEMENT PC-Conformer (
+        PC-Conformer_x, 
+        PC-Conformer_y, 
+        PC-Conformer_z?, 
+        PC-Conformer_style?, 
+        PC-Conformer_data?)>
+
+<!--
+  [Note: Parallel Arrays must be kept Synchronized]
+ X Coordinates (vector)
+-->
+<!ELEMENT PC-Conformer_x (PC-Conformer_x_E*)>
+
+
+<!ELEMENT PC-Conformer_x_E (%REAL;)>
+
+<!-- Y Coordinates (vector) -->
+<!ELEMENT PC-Conformer_y (PC-Conformer_y_E*)>
+
+
+<!ELEMENT PC-Conformer_y_E (%REAL;)>
+
+<!-- Z Coordinates (vector) -->
+<!ELEMENT PC-Conformer_z (PC-Conformer_z_E*)>
+
+
+<!ELEMENT PC-Conformer_z_E (%REAL;)>
+
+<!-- Structure Annotations -->
+<!ELEMENT PC-Conformer_style (PC-DrawAnnotations)>
+
+<!-- Data Associated with this Conformer -->
+<!ELEMENT PC-Conformer_data (PC-InfoData*)>
+
+<!-- Holder for groups of Conformers -->
+<!ELEMENT PC-Conformers (PC-Conformer*)>
+
+<!-- Coordinate Set Type Distinctions -->
+<!ELEMENT PC-CoordinateType (%INTEGER;)>
+
+<!--
+    twod	-  2D Coordinates
+    threed	-  3D Coordinates (should also indicate units, below)
+    submitted	-  Depositor Provided Coordinates
+    experimental	-  Experimentally Determined Coordinates
+    computed	-  Computed Coordinates
+    standardized	-  Standardized Coordinates
+    augmented	-  Hybrid Original with Computed Coordinates (e.g., explicit H)
+    aligned	-  Template used to align drawing
+    compact	-  Drawing uses shorthand forms (e.g., COOH, OCH3, Et, etc.)
+    units-angstroms	-  (3D) Coordinate units are Angstroms
+    units-nanometers	-  (3D) Coordinate units are nanometers
+    units-pixel	-  (2D) Coordinate units are pixels
+    units-points	-  (2D) Coordinate units are points
+    units-stdbonds	-  (2D) Coordinate units are standard bond lengths (1.0)
+    units-unknown	-  Coordinate units are unknown or unspecified
+-->
+<!ATTLIST PC-CoordinateType value (
+        twod |
+        threed |
+        submitted |
+        experimental |
+        computed |
+        standardized |
+        augmented |
+        aligned |
+        compact |
+        units-angstroms |
+        units-nanometers |
+        units-pixel |
+        units-points |
+        units-stdbonds |
+        units-unknown
+        ) #IMPLIED >
+
+
+<!--
+ Drawing Annotations (in Parallel Arrays)
+    [Note: A pair of atoms can have multiple annotations]
+-->
+<!ELEMENT PC-DrawAnnotations (
+        PC-DrawAnnotations_annotation, 
+        PC-DrawAnnotations_aid1, 
+        PC-DrawAnnotations_aid2)>
+
+<!--
+  [Note: Parallel Arrays must be kept Synchronized]
+ Bond Annotations (vector)
+-->
+<!ELEMENT PC-DrawAnnotations_annotation (PC-BondAnnotation*)>
+
+<!--
+ Atom1 Identifier (vector)
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-DrawAnnotations_aid1 (PC-DrawAnnotations_aid1_E*)>
+
+
+<!ELEMENT PC-DrawAnnotations_aid1_E (%INTEGER;)>
+
+<!--
+ Atom2 Identifier (vector)
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-DrawAnnotations_aid2 (PC-DrawAnnotations_aid2_E*)>
+
+
+<!ELEMENT PC-DrawAnnotations_aid2_E (%INTEGER;)>
+
+<!-- Atom-Atom Annotation Information -->
+<!ELEMENT PC-BondAnnotation (%INTEGER;)>
+
+<!--
+    crossed	-  Double Bond that can be both Cis/Trans
+    dashed	-  Hydrogen-Bond (3D Only?)
+    wavy	-  Unknown Stereochemistry
+    dotted	-  Complex/Fractional
+    wedge-up	-  Above-Plane
+    wedge-down	-  Below-Plane
+    arrow	-  Dative
+    aromatic	-  Aromatic
+    resonance	-  Resonance
+    bold	-  Fat Bond (Non-Specific User Interpreted Information)
+    fischer	-  Interpret Bond Stereo using Fischer Conventions
+    closeContact	-  Identification of Atom-Atom Close Contacts (3D Only)
+    unknown	-  Unspecified or Unknown Atom-Atom Annotation
+-->
+<!ATTLIST PC-BondAnnotation value (
+        crossed |
+        dashed |
+        wavy |
+        dotted |
+        wedge-up |
+        wedge-down |
+        arrow |
+        aromatic |
+        resonance |
+        bold |
+        fischer |
+        closeContact |
+        unknown
+        ) #IMPLIED >
+
+
+<!-- Atom Information  (in Parallel Arrays) -->
+<!ELEMENT PC-Atoms (
+        PC-Atoms_aid, 
+        PC-Atoms_element, 
+        PC-Atoms_label?, 
+        PC-Atoms_isotope?, 
+        PC-Atoms_charge?, 
+        PC-Atoms_radical?, 
+        PC-Atoms_source?, 
+        PC-Atoms_comment?)>
+
+<!--
+  [Note: Parallel Arrays must be kept Synchronized]
+ Atom Identifiers (vector)
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-Atoms_aid (PC-Atoms_aid_E*)>
+
+
+<!ELEMENT PC-Atoms_aid_E (%INTEGER;)>
+
+<!-- Atomic Numbers (vector) -->
+<!ELEMENT PC-Atoms_element (PC-Element*)>
+
+<!--
+ Independent Arrays of ID-Value Pairs  (Technically allows multiple values per Atom)
+ Atom labels
+-->
+<!ELEMENT PC-Atoms_label (PC-AtomString*)>
+
+<!-- Isotopic Information -->
+<!ELEMENT PC-Atoms_isotope (PC-AtomInt*)>
+
+<!-- Formal Charges -->
+<!ELEMENT PC-Atoms_charge (PC-AtomInt*)>
+
+<!-- Radical Information -->
+<!ELEMENT PC-Atoms_radical (PC-AtomRadical*)>
+
+<!-- E.g. identity of MMDB "R" groups -->
+<!ELEMENT PC-Atoms_source (PC-AtomSource*)>
+
+<!-- Atom Comments -->
+<!ELEMENT PC-Atoms_comment (PC-AtomString*)>
+
+<!-- Specification of an Association between an Atom Identifier and Source -->
+<!ELEMENT PC-AtomSource (
+        PC-AtomSource_aid, 
+        PC-AtomSource_source)>
+
+<!--
+ Atom Identifier for the R-Group Source
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-AtomSource_aid (%INTEGER;)>
+
+<!-- Atom Specific MMDB Record -->
+<!ELEMENT PC-AtomSource_source (PC-MMDBSource)>
+
+<!-- Specification of an Association between an Atom Identifier and an Integer Value -->
+<!ELEMENT PC-AtomInt (
+        PC-AtomInt_aid, 
+        PC-AtomInt_value)>
+
+<!--
+ Atom Identifier for the Value
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-AtomInt_aid (%INTEGER;)>
+
+<!-- Value Associated to the ID -->
+<!ELEMENT PC-AtomInt_value (%INTEGER;)>
+
+<!-- Specification of an Association between an Atom Identifier and a String Value -->
+<!ELEMENT PC-AtomString (
+        PC-AtomString_aid, 
+        PC-AtomString_value)>
+
+<!--
+ Atom Identifier for the Value
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-AtomString_aid (%INTEGER;)>
+
+<!-- Value Associated to the ID -->
+<!ELEMENT PC-AtomString_value (#PCDATA)>
+
+<!-- Rudimentary Atom Electronic Configuration Designation -->
+<!ELEMENT PC-AtomRadical (
+        PC-AtomRadical_aid, 
+        PC-AtomRadical_type)>
+
+<!--
+ Atom Identifier for the Value
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-AtomRadical_aid (%INTEGER;)>
+<!-- Type of Atom Radical -->
+<!ELEMENT PC-AtomRadical_type (%INTEGER;)>
+
+<!--
+    singlet	-  Open-Shell Singlet
+    doublet	-  Open-Shell Doublet
+    triplet	-  Open-Shell Triplet
+    quartet	-  Open-Shell Quartet
+    quintet	-  Open-Shell Quintet
+    hextet	-  Open-Shell Hextet
+    heptet	-  Open-Shell Quintet
+    octet	-  Open-Shell Octet
+    none	-  Closed-Shell Singlet
+-->
+<!ATTLIST PC-AtomRadical_type value (
+        singlet |
+        doublet |
+        triplet |
+        quartet |
+        quintet |
+        hextet |
+        heptet |
+        octet |
+        none
+        ) #IMPLIED >
+
+
+<!-- Element Information [which may contain "illegal" element values] -->
+<!ELEMENT PC-Element (%INTEGER;)>
+
+<!--
+    a	-  Illegal Atom Numbers that may be Interpreted to be something else
+         Unspecified Atom (Asterick)
+    d	-  Dummy Atom
+    r	-  Rgroup Label
+    lp	-  Lone Pair
+    h	-  Elements
+-->
+<!ATTLIST PC-Element value (
+        a |
+        d |
+        r |
+        lp |
+        h |
+        he |
+        li |
+        be |
+        b |
+        c |
+        n |
+        o |
+        f |
+        ne |
+        na |
+        mg |
+        al |
+        si |
+        p |
+        s |
+        cl |
+        ar |
+        k |
+        ca |
+        sc |
+        ti |
+        v |
+        cr |
+        mn |
+        fe |
+        co |
+        ni |
+        cu |
+        zn |
+        ga |
+        ge |
+        as |
+        se |
+        br |
+        kr |
+        rb |
+        sr |
+        y |
+        zr |
+        nb |
+        mo |
+        tc |
+        ru |
+        rh |
+        pd |
+        ag |
+        cd |
+        in |
+        sn |
+        sb |
+        te |
+        i |
+        xe |
+        cs |
+        ba |
+        la |
+        ce |
+        pr |
+        nd |
+        pm |
+        sm |
+        eu |
+        gd |
+        tb |
+        dy |
+        ho |
+        er |
+        tm |
+        yb |
+        lu |
+        hf |
+        ta |
+        w |
+        re |
+        os |
+        ir |
+        pt |
+        au |
+        hg |
+        tl |
+        pb |
+        bi |
+        po |
+        at |
+        rn |
+        fr |
+        ra |
+        ac |
+        th |
+        pa |
+        u |
+        np |
+        pu |
+        am |
+        cm |
+        bk |
+        cf |
+        es |
+        fm |
+        md |
+        no |
+        lr |
+        rf |
+        db |
+        sg |
+        bh |
+        hs |
+        mt |
+        ds |
+        rg
+        ) #IMPLIED >
+
+
+<!-- Bond Description Information  (in Parallel Arrays) -->
+<!ELEMENT PC-Bonds (
+        PC-Bonds_aid1, 
+        PC-Bonds_aid2, 
+        PC-Bonds_order)>
+
+<!--
+  [Note: Parallel Arrays must be kept Synchronized]
+ Atom1 Identifier (vector)
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-Bonds_aid1 (PC-Bonds_aid1_E*)>
+
+
+<!ELEMENT PC-Bonds_aid1_E (%INTEGER;)>
+
+<!--
+ Atom2 Identifier (vector)
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-Bonds_aid2 (PC-Bonds_aid2_E*)>
+
+
+<!ELEMENT PC-Bonds_aid2_E (%INTEGER;)>
+
+<!-- Bond Type Information (vector) -->
+<!ELEMENT PC-Bonds_order (PC-BondType*)>
+
+<!-- Bond Type Information -->
+<!ELEMENT PC-BondType (%INTEGER;)>
+
+<!--
+    single	-  Single Bond
+    double	-  Double Bond
+    triple	-  Triple Bond
+    quadruple	-  Quadruple Bond
+    dative	-  Dative Bond
+    complex	-  Complex Bond
+    ionic	-  Ionic Bond
+    unknown	-  Unknown/Unspecified Connectivity
+-->
+<!ATTLIST PC-BondType value (
+        single |
+        double |
+        triple |
+        quadruple |
+        dative |
+        complex |
+        ionic |
+        unknown
+        ) #IMPLIED >
+
+
+<!--
+ Allowed Stereogenic Center Types
+   [Using IUPAC Stereogenic Center recommendations and terminology]
+-->
+<!ELEMENT PC-StereoCenter (
+        PC-StereoCenter_tetrahedral | 
+        PC-StereoCenter_planar | 
+        PC-StereoCenter_squareplanar | 
+        PC-StereoCenter_octahedral | 
+        PC-StereoCenter_bipyramid | 
+        PC-StereoCenter_tshape | 
+        PC-StereoCenter_pentagonal)>
+
+<!-- Tetrahedral (SP3) StereoCenter -->
+<!ELEMENT PC-StereoCenter_tetrahedral (PC-StereoTetrahedral)>
+
+<!-- Planar (SP2) StereoCenter -->
+<!ELEMENT PC-StereoCenter_planar (PC-StereoPlanar)>
+
+<!-- Square Planar (SP4) StereoCenter -->
+<!ELEMENT PC-StereoCenter_squareplanar (PC-StereoSquarePlanar)>
+
+<!-- Octahedral (OC-6) / Square Pyramid (SPY-5) StereoCenters -->
+<!ELEMENT PC-StereoCenter_octahedral (PC-StereoOctahedral)>
+
+<!-- Trigonal BiPyramid (TBPY-4 and TBPY-5) StereoCenters -->
+<!ELEMENT PC-StereoCenter_bipyramid (PC-StereoTrigonalBiPyramid)>
+
+<!-- T-Shaped (TS-3) StereoCenters -->
+<!ELEMENT PC-StereoCenter_tshape (PC-StereoTShape)>
+
+<!-- Pentagonal BiPyramid (PBPY-7) StereoCenters -->
+<!ELEMENT PC-StereoCenter_pentagonal (PC-StereoPentagonalBiPyramid)>
+
+<!--
+ SP3 Tetrahedral StereoCenter, Trigonal Pyramid Stereogenic Center,
+   Cumulenic StereoCenter (Linear systems of an even number of double bonds),
+   or Hindered biaryl stereocenter (All biaryls have hindered rotation that
+   to some extent the ortho-hydrogens prevent coplanarity)
+   [Using IUPAC Stereogenic Center recommendations and terminology]
+   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
+-->
+<!ELEMENT PC-StereoTetrahedral (
+        PC-StereoTetrahedral_center, 
+        PC-StereoTetrahedral_above, 
+        PC-StereoTetrahedral_top, 
+        PC-StereoTetrahedral_bottom, 
+        PC-StereoTetrahedral_below, 
+        PC-StereoTetrahedral_parity?, 
+        PC-StereoTetrahedral_type?)>
+
+<!--
+ Atom Identifier of Atom Center
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTetrahedral_center (%INTEGER;)>
+
+<!--
+ Atom Identifier of Atom Above the Plane
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTetrahedral_above (%INTEGER;)>
+
+<!--
+ Atom Identifier of Atom In-Plane and at the Top
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTetrahedral_top (%INTEGER;)>
+
+<!--
+ Atom Identifier of Atom In-Plane and at the Bottom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTetrahedral_bottom (%INTEGER;)>
+
+<!--
+ Atom Identifier of Atom Below the Plane
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTetrahedral_below (%INTEGER;)>
+<!-- StereoCenter Designation -->
+<!ELEMENT PC-StereoTetrahedral_parity (%INTEGER;)>
+<!ATTLIST PC-StereoTetrahedral_parity value (
+        clockwise |
+        counterclockwise |
+        any |
+        unknown
+        ) #IMPLIED >
+
+<!-- Type of StereoCenter, Tetrahedral, if not specified -->
+<!ELEMENT PC-StereoTetrahedral_type (%INTEGER;)>
+
+<!--
+    tetrahedral	-  Tetrahedral StereoCenter
+    cumulenic	-  Cumulenic StereoCenter
+    biaryl	-  Biaryl StereoCenter
+-->
+<!ATTLIST PC-StereoTetrahedral_type value (
+        tetrahedral |
+        cumulenic |
+        biaryl
+        ) #IMPLIED >
+
+
+<!--
+ SP2 Planar Stereogenic Center, Cumulenic StereoCenter (Linear systems on an odd
+   number of double bonds present planar stereochemistry)
+   [Using IUPAC Stereogenic Center recommendations and terminology]
+   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
+-->
+<!ELEMENT PC-StereoPlanar (
+        PC-StereoPlanar_left, 
+        PC-StereoPlanar_ltop, 
+        PC-StereoPlanar_lbottom, 
+        PC-StereoPlanar_right, 
+        PC-StereoPlanar_rtop, 
+        PC-StereoPlanar_rbottom, 
+        PC-StereoPlanar_parity?, 
+        PC-StereoPlanar_type?)>
+
+<!--
+ Atom ID of Left Double Bond Atom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPlanar_left (%INTEGER;)>
+
+<!--
+ Atom ID of Top Atom attached to the Left Double Bond Atom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPlanar_ltop (%INTEGER;)>
+
+<!--
+ Atom ID of Bottom Atom attached to the Left Double Bond Atom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPlanar_lbottom (%INTEGER;)>
+
+<!--
+ Atom ID of Right Double Bond Atom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPlanar_right (%INTEGER;)>
+
+<!--
+ Atom ID of Top Atom attached to the Right Double Bond Atom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPlanar_rtop (%INTEGER;)>
+
+<!--
+ Atom ID of Bottom Atom attached to the Right Double Bond Atom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPlanar_rbottom (%INTEGER;)>
+<!-- StereoCenter Designation -->
+<!ELEMENT PC-StereoPlanar_parity (%INTEGER;)>
+<!ATTLIST PC-StereoPlanar_parity value (
+        same |
+        opposite |
+        any |
+        unknown
+        ) #IMPLIED >
+
+<!-- Type of StereoCenter, SP2 Planar, if not specified -->
+<!ELEMENT PC-StereoPlanar_type (%INTEGER;)>
+
+<!--
+    planar	-  SP2 Planar StereoCenter
+    cumulenic	-  Cumulenic StereoCenter
+-->
+<!ATTLIST PC-StereoPlanar_type value (
+        planar |
+        cumulenic
+        ) #IMPLIED >
+
+
+<!--
+ Square Planar (SP4) StereoCenters
+   [Using IUPAC Stereogenic Center recommendations and terminology]
+   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
+-->
+<!ELEMENT PC-StereoSquarePlanar (
+        PC-StereoSquarePlanar_center, 
+        PC-StereoSquarePlanar_lbelow, 
+        PC-StereoSquarePlanar_rbelow, 
+        PC-StereoSquarePlanar_labove, 
+        PC-StereoSquarePlanar_rabove, 
+        PC-StereoSquarePlanar_parity?)>
+
+<!--
+ Atom ID of Atom Center
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoSquarePlanar_center (%INTEGER;)>
+
+<!--
+ Atom ID of Left Below Plane Atom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoSquarePlanar_lbelow (%INTEGER;)>
+
+<!--
+ Atom ID of Right Below Plane Atom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoSquarePlanar_rbelow (%INTEGER;)>
+
+<!--
+ Atom ID of Left Above Plane Atom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoSquarePlanar_labove (%INTEGER;)>
+
+<!--
+ Atom ID of Right Above Plane Atom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoSquarePlanar_rabove (%INTEGER;)>
+<!-- StereoCenter Type -->
+<!ELEMENT PC-StereoSquarePlanar_parity (%INTEGER;)>
+
+<!--
+    u-shape	-    U shaped isomer (labove-lbelow-rbelow-rabove)
+    z-shape	-    Z shaped isomer (labove-rabove-lbelow-rbelow)
+    x-shape	-    X shaped isomer (labove-rbelow-rabove-lbelow)
+    any	-    Nonspecific mixture of isomers
+-->
+<!ATTLIST PC-StereoSquarePlanar_parity value (
+        u-shape |
+        z-shape |
+        x-shape |
+        any |
+        unknown
+        ) #IMPLIED >
+
+
+<!--
+ Octahedral (OC-6) and Square Pyramid (SPY-5) StereoCenters
+   [Using IUPAC Stereogenic Center recommendations and terminology]
+   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
+-->
+<!ELEMENT PC-StereoOctahedral (
+        PC-StereoOctahedral_center, 
+        PC-StereoOctahedral_top, 
+        PC-StereoOctahedral_bottom, 
+        PC-StereoOctahedral_labove, 
+        PC-StereoOctahedral_lbelow, 
+        PC-StereoOctahedral_rabove, 
+        PC-StereoOctahedral_rbelow)>
+
+<!--
+ Atom ID of Atom Center
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoOctahedral_center (%INTEGER;)>
+
+<!--
+ Atom ID of Atom In-Plane and at the Top
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoOctahedral_top (%INTEGER;)>
+
+<!--
+ Atom ID of Atom In-Plane and at the Bottom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoOctahedral_bottom (%INTEGER;)>
+
+<!--
+ Atom ID of Atom Above the Plane on the Left
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoOctahedral_labove (%INTEGER;)>
+
+<!--
+ Atom ID of Atom Below the Plane on the Left
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoOctahedral_lbelow (%INTEGER;)>
+
+<!--
+ Atom ID of Atom Above the Plane on the Right
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoOctahedral_rabove (%INTEGER;)>
+
+<!--
+ Atom ID of Atom Below the Plane on the Right
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoOctahedral_rbelow (%INTEGER;)>
+
+<!--
+ Trigonal BiPyramid (TBPY-4 and TBPY-5) StereoCenters
+   [Using IUPAC Stereogenic Center recommendations and terminology]
+   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
+-->
+<!ELEMENT PC-StereoTrigonalBiPyramid (
+        PC-StereoTrigonalBiPyramid_center, 
+        PC-StereoTrigonalBiPyramid_above, 
+        PC-StereoTrigonalBiPyramid_below, 
+        PC-StereoTrigonalBiPyramid_top, 
+        PC-StereoTrigonalBiPyramid_bottom, 
+        PC-StereoTrigonalBiPyramid_right)>
+
+<!--
+ Atom ID of Atom Center
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTrigonalBiPyramid_center (%INTEGER;)>
+
+<!--
+ Atom ID of Atom Above the Plane
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTrigonalBiPyramid_above (%INTEGER;)>
+
+<!--
+ Atom ID of Atom Below the Plane
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTrigonalBiPyramid_below (%INTEGER;)>
+
+<!--
+ Atom ID of Atom In-Plane and at the Top
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTrigonalBiPyramid_top (%INTEGER;)>
+
+<!--
+ Atom ID of Atom In-Plane and at the Bottom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTrigonalBiPyramid_bottom (%INTEGER;)>
+
+<!--
+ Atom ID of Atom In-Plane and to the Right
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTrigonalBiPyramid_right (%INTEGER;)>
+
+<!--
+ T-Shaped (TS-3) StereoCenters
+   [Using IUPAC Stereogenic Center recommendations and terminology]
+   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
+-->
+<!ELEMENT PC-StereoTShape (
+        PC-StereoTShape_center, 
+        PC-StereoTShape_top, 
+        PC-StereoTShape_bottom, 
+        PC-StereoTShape_above)>
+
+<!--
+ Atom ID of Atom Center
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTShape_center (%INTEGER;)>
+
+<!--
+ Atom ID of Atom In-Plane and at the Top
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTShape_top (%INTEGER;)>
+
+<!--
+ Atom ID of Atom In-Plane and at the Bottom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTShape_bottom (%INTEGER;)>
+
+<!--
+ Atom ID of Atom Above the Plane
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoTShape_above (%INTEGER;)>
+
+<!--
+ Pentagonal BiPyramid (PBPY-7) StereoCenters
+   [Using IUPAC Stereogenic Center recommendations and terminology]
+   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
+-->
+<!ELEMENT PC-StereoPentagonalBiPyramid (
+        PC-StereoPentagonalBiPyramid_center, 
+        PC-StereoPentagonalBiPyramid_top, 
+        PC-StereoPentagonalBiPyramid_bottom, 
+        PC-StereoPentagonalBiPyramid_left, 
+        PC-StereoPentagonalBiPyramid_labove, 
+        PC-StereoPentagonalBiPyramid_lbelow, 
+        PC-StereoPentagonalBiPyramid_rabove, 
+        PC-StereoPentagonalBiPyramid_rbelow)>
+
+<!--
+ Atom ID of Atom Center
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPentagonalBiPyramid_center (%INTEGER;)>
+
+<!--
+ Atom ID of Atom In-Plane and at the Top
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPentagonalBiPyramid_top (%INTEGER;)>
+
+<!--
+ Atom ID of Atom In-Plane and at the Bottom
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPentagonalBiPyramid_bottom (%INTEGER;)>
+
+<!--
+ Atom ID of Atom In-Plane and at the Left
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPentagonalBiPyramid_left (%INTEGER;)>
+
+<!--
+ Atom ID of Atom Above the Plane on the Left
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPentagonalBiPyramid_labove (%INTEGER;)>
+
+<!--
+ Atom ID of Atom Below the Plane on the Left
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPentagonalBiPyramid_lbelow (%INTEGER;)>
+
+<!--
+ Atom ID of Atom Above the Plane on the Right
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPentagonalBiPyramid_rabove (%INTEGER;)>
+
+<!--
+ Atom ID of Atom Below the Plane on the Right
+   Note: Atom ID's must be greater than "0"
+-->
+<!ELEMENT PC-StereoPentagonalBiPyramid_rbelow (%INTEGER;)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Project.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Project.dtd
new file mode 100644
index 0000000..4d42013
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Project.dtd
@@ -0,0 +1,95 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "proj.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Project.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Project_module PUBLIC "-//NCBI//NCBI Project Module//EN" "NCBI_Project.mod.dtd">
+%NCBI_Project_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_PubMed_module PUBLIC "-//NCBI//NCBI PubMed Module//EN" "NCBI_PubMed.mod.dtd">
+%NCBI_PubMed_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Seqset_module PUBLIC "-//NCBI//NCBI Seqset Module//EN" "NCBI_Seqset.mod.dtd">
+%NCBI_Seqset_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Project.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Project.mod.dtd
new file mode 100644
index 0000000..e2215fe
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Project.mod.dtd
@@ -0,0 +1,158 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "proj.asn"
+     ::DATATOOL:: by application DATATOOL version 1.8.1
+     ::DATATOOL:: on 01/18/2007 23:07:18
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Project"
+================================================= -->
+
+<!--
+$Revision: 6.3 $
+****************************************************************
+
+  NCBI Project Definition Module
+  by Jim Ostell and Jonathan Kans, 1998
+
+****************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Project,
+          Project-item -->
+
+<!-- Elements referenced from other modules:
+          Date FROM NCBI-General,
+          PubMedId FROM NCBI-Biblio,
+          Seq-id,
+          Seq-loc FROM NCBI-Seqloc,
+          Seq-annot,
+          Pubdesc FROM NCBI-Sequence,
+          Seq-entry FROM NCBI-Seqset,
+          Pubmed-entry FROM NCBI-PubMed -->
+<!-- ============================================ -->
+
+
+<!ELEMENT Project (
+        Project_descr?, 
+        Project_data)>
+
+<!ELEMENT Project_descr (Project-descr)>
+
+<!ELEMENT Project_data (Project-item)>
+
+
+<!ELEMENT Project-item (
+        Project-item_pmuid | 
+        Project-item_protuid | 
+        Project-item_nucuid | 
+        Project-item_sequid | 
+        Project-item_genomeuid | 
+        Project-item_structuid | 
+        Project-item_pmid | 
+        Project-item_protid | 
+        Project-item_nucid | 
+        Project-item_seqid | 
+        Project-item_genomeid | 
+        Project-item_structid | 
+        Project-item_pment | 
+        Project-item_protent | 
+        Project-item_nucent | 
+        Project-item_seqent | 
+        Project-item_genomeent | 
+        Project-item_structent | 
+        Project-item_seqannot | 
+        Project-item_loc | 
+        Project-item_proj)>
+
+<!ELEMENT Project-item_pmuid (Project-item_pmuid_E*)>
+
+
+<!ELEMENT Project-item_pmuid_E (%INTEGER;)>
+
+<!ELEMENT Project-item_protuid (Project-item_protuid_E*)>
+
+
+<!ELEMENT Project-item_protuid_E (%INTEGER;)>
+
+<!ELEMENT Project-item_nucuid (Project-item_nucuid_E*)>
+
+
+<!ELEMENT Project-item_nucuid_E (%INTEGER;)>
+
+<!ELEMENT Project-item_sequid (Project-item_sequid_E*)>
+
+
+<!ELEMENT Project-item_sequid_E (%INTEGER;)>
+
+<!ELEMENT Project-item_genomeuid (Project-item_genomeuid_E*)>
+
+
+<!ELEMENT Project-item_genomeuid_E (%INTEGER;)>
+
+<!ELEMENT Project-item_structuid (Project-item_structuid_E*)>
+
+
+<!ELEMENT Project-item_structuid_E (%INTEGER;)>
+
+<!ELEMENT Project-item_pmid (PubMedId*)>
+
+<!ELEMENT Project-item_protid (Seq-id*)>
+
+<!ELEMENT Project-item_nucid (Seq-id*)>
+
+<!ELEMENT Project-item_seqid (Seq-id*)>
+
+<!ELEMENT Project-item_genomeid (Seq-id*)>
+
+<!ELEMENT Project-item_structid EMPTY>
+
+<!ELEMENT Project-item_pment (Pubmed-entry*)>
+
+<!ELEMENT Project-item_protent (Seq-entry*)>
+
+<!ELEMENT Project-item_nucent (Seq-entry*)>
+
+<!ELEMENT Project-item_seqent (Seq-entry*)>
+
+<!ELEMENT Project-item_genomeent (Seq-entry*)>
+
+<!ELEMENT Project-item_structent EMPTY>
+
+<!ELEMENT Project-item_seqannot (Seq-annot*)>
+
+<!ELEMENT Project-item_loc (Seq-loc*)>
+
+<!ELEMENT Project-item_proj (Project*)>
+
+
+<!ELEMENT Project-descr (
+        Project-descr_id, 
+        Project-descr_name?, 
+        Project-descr_descr?)>
+
+<!ELEMENT Project-descr_id (Project-id*)>
+
+<!ELEMENT Project-descr_name (#PCDATA)>
+
+<!ELEMENT Project-descr_descr (Projdesc*)>
+
+
+<!ELEMENT Projdesc (
+        Projdesc_pub | 
+        Projdesc_date | 
+        Projdesc_comment | 
+        Projdesc_title)>
+
+<!ELEMENT Projdesc_pub (Pubdesc)>
+
+<!ELEMENT Projdesc_date (Date)>
+
+<!ELEMENT Projdesc_comment (#PCDATA)>
+
+<!ELEMENT Projdesc_title (#PCDATA)>
+
+
+<!ELEMENT Project-id (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Protein.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Protein.dtd
new file mode 100644
index 0000000..e8279ea
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Protein.dtd
@@ -0,0 +1,20 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_Protein.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Protein.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Protein.mod.dtd
new file mode 100644
index 0000000..e833a5d
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Protein.mod.dtd
@@ -0,0 +1,75 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Protein"
+================================================= -->
+
+<!--
+**********************************************************************
+
+  NCBI Protein
+  by James Ostell, 1990
+  version 0.8
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Prot-ref -->
+
+<!-- Elements referenced from other modules:
+          Dbtag FROM NCBI-General -->
+<!-- ============================================ -->
+
+<!--
+*** Prot-ref ***********************************************
+*
+*  Reference to a protein name
+*
+-->
+<!ELEMENT Prot-ref (
+        Prot-ref_name?, 
+        Prot-ref_desc?, 
+        Prot-ref_ec?, 
+        Prot-ref_activity?, 
+        Prot-ref_db?, 
+        Prot-ref_processed?)>
+
+<!-- protein name -->
+<!ELEMENT Prot-ref_name (Prot-ref_name_E*)>
+
+
+<!ELEMENT Prot-ref_name_E (#PCDATA)>
+
+<!-- description (instead of name) -->
+<!ELEMENT Prot-ref_desc (#PCDATA)>
+
+<!-- E.C. number(s) -->
+<!ELEMENT Prot-ref_ec (Prot-ref_ec_E*)>
+
+
+<!ELEMENT Prot-ref_ec_E (#PCDATA)>
+
+<!-- activities -->
+<!ELEMENT Prot-ref_activity (Prot-ref_activity_E*)>
+
+
+<!ELEMENT Prot-ref_activity_E (#PCDATA)>
+
+<!-- ids in other dbases -->
+<!ELEMENT Prot-ref_db (Dbtag*)>
+<!-- processing status -->
+<!ELEMENT Prot-ref_processed %ENUM;>
+<!ATTLIST Prot-ref_processed value (
+        not-set |
+        preprotein |
+        mature |
+        signal-peptide |
+        transit-peptide
+        ) #REQUIRED >
+
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Pub.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Pub.dtd
new file mode 100644
index 0000000..6a52954
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Pub.dtd
@@ -0,0 +1,26 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "pub.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_Pub.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Pub.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Pub.mod.dtd
new file mode 100644
index 0000000..ca92c18
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Pub.mod.dtd
@@ -0,0 +1,120 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "pub.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Pub"
+================================================= -->
+
+<!--
+$Revision: 6.0 $
+********************************************************************
+
+  Publication common set
+  James Ostell, 1990
+
+  This is the base class definitions for Publications of all sorts
+
+  support for PubMedId added in 1996
+********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Pub,
+          Pub-set,
+          Pub-equiv -->
+
+<!-- Elements referenced from other modules:
+          Medline-entry FROM NCBI-Medline,
+          Cit-art,
+          Cit-jour,
+          Cit-book,
+          Cit-proc,
+          Cit-pat,
+          Id-pat,
+          Cit-gen,
+          Cit-let,
+          Cit-sub,
+          PubMedId FROM NCBI-Biblio -->
+<!-- ============================================ -->
+
+
+<!ELEMENT Pub (
+        Pub_gen | 
+        Pub_sub | 
+        Pub_medline | 
+        Pub_muid | 
+        Pub_article | 
+        Pub_journal | 
+        Pub_book | 
+        Pub_proc | 
+        Pub_patent | 
+        Pub_pat-id | 
+        Pub_man | 
+        Pub_equiv | 
+        Pub_pmid)>
+
+<!-- general or generic unparsed -->
+<!ELEMENT Pub_gen (Cit-gen)>
+
+<!-- submission -->
+<!ELEMENT Pub_sub (Cit-sub)>
+
+<!ELEMENT Pub_medline (Medline-entry)>
+
+<!-- medline uid -->
+<!ELEMENT Pub_muid (%INTEGER;)>
+
+<!ELEMENT Pub_article (Cit-art)>
+
+<!ELEMENT Pub_journal (Cit-jour)>
+
+<!ELEMENT Pub_book (Cit-book)>
+
+<!-- proceedings of a meeting -->
+<!ELEMENT Pub_proc (Cit-proc)>
+
+<!ELEMENT Pub_patent (Cit-pat)>
+
+<!-- identify a patent -->
+<!ELEMENT Pub_pat-id (Id-pat)>
+
+<!-- manuscript, thesis, or letter -->
+<!ELEMENT Pub_man (Cit-let)>
+
+<!-- to cite a variety of ways -->
+<!ELEMENT Pub_equiv (Pub-equiv)>
+
+<!-- PubMedId -->
+<!ELEMENT Pub_pmid (PubMedId)>
+
+<!-- equivalent identifiers for same citation -->
+<!ELEMENT Pub-equiv (Pub*)>
+
+
+<!ELEMENT Pub-set (
+        Pub-set_pub | 
+        Pub-set_medline | 
+        Pub-set_article | 
+        Pub-set_journal | 
+        Pub-set_book | 
+        Pub-set_proc | 
+        Pub-set_patent)>
+
+<!ELEMENT Pub-set_pub (Pub*)>
+
+<!ELEMENT Pub-set_medline (Medline-entry*)>
+
+<!ELEMENT Pub-set_article (Cit-art*)>
+
+<!ELEMENT Pub-set_journal (Cit-jour*)>
+
+<!ELEMENT Pub-set_book (Cit-book*)>
+
+<!-- proceedings of a meeting -->
+<!ELEMENT Pub-set_proc (Cit-proc*)>
+
+<!ELEMENT Pub-set_patent (Cit-pat*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_PubMed.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_PubMed.dtd
new file mode 100644
index 0000000..b272da6
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_PubMed.dtd
@@ -0,0 +1,26 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "pubmed.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_PubMed.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_PubMed_module PUBLIC "-//NCBI//NCBI PubMed Module//EN" "NCBI_PubMed.mod.dtd">
+%NCBI_PubMed_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_PubMed.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_PubMed.mod.dtd
new file mode 100644
index 0000000..4313a76
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_PubMed.mod.dtd
@@ -0,0 +1,64 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "pubmed.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-PubMed"
+================================================= -->
+
+<!--
+$Revision: 6.0 $
+**********************************************************************
+
+  PUBMED data definitions
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Pubmed-entry,
+          Pubmed-url -->
+
+<!-- Elements referenced from other modules:
+          PubMedId FROM NCBI-Biblio,
+          Medline-entry FROM NCBI-Medline -->
+<!-- ============================================ -->
+
+<!--
+ a PubMed entry
+ PUBMED records must include the PubMedId
+-->
+<!ELEMENT Pubmed-entry (
+        Pubmed-entry_pmid, 
+        Pubmed-entry_medent?, 
+        Pubmed-entry_publisher?, 
+        Pubmed-entry_urls?, 
+        Pubmed-entry_pubid?)>
+
+<!ELEMENT Pubmed-entry_pmid (PubMedId)>
+
+<!-- Medline entry information -->
+<!ELEMENT Pubmed-entry_medent (Medline-entry)>
+
+<!-- Publisher name -->
+<!ELEMENT Pubmed-entry_publisher (#PCDATA)>
+
+<!-- List of URL to publisher cite -->
+<!ELEMENT Pubmed-entry_urls (Pubmed-url*)>
+
+<!-- Publisher's article identifier -->
+<!ELEMENT Pubmed-entry_pubid (#PCDATA)>
+
+
+<!ELEMENT Pubmed-url (
+        Pubmed-url_location?, 
+        Pubmed-url_url)>
+
+<!-- Location code -->
+<!ELEMENT Pubmed-url_location (#PCDATA)>
+
+<!-- Selected URL for location -->
+<!ELEMENT Pubmed-url_url (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_RNA.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_RNA.dtd
new file mode 100644
index 0000000..c64fad9
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_RNA.dtd
@@ -0,0 +1,86 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_RNA.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_RNA.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_RNA.mod.dtd
new file mode 100644
index 0000000..b1c7991
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_RNA.mod.dtd
@@ -0,0 +1,144 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 09/15/2008 23:08:25
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-RNA"
+================================================= -->
+
+<!--
+**********************************************************************
+
+  NCBI RNAs
+  by James Ostell, 1990
+  version 0.8
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          RNA-ref,
+          Trna-ext,
+          RNA-gen,
+          RNA-qual,
+          RNA-qual-set -->
+
+<!-- Elements referenced from other modules:
+          Seq-loc FROM NCBI-Seqloc -->
+<!-- ============================================ -->
+
+<!--
+*** rnas ***********************************************
+*
+*  various rnas
+*
+ minimal RNA sequence
+-->
+<!ELEMENT RNA-ref (
+        RNA-ref_type, 
+        RNA-ref_pseudo?, 
+        RNA-ref_ext?)>
+<!-- type of RNA feature -->
+<!ELEMENT RNA-ref_type %ENUM;>
+
+<!--
+    snRNA	-  will become ncRNA, with RNA-gen.class = snRNA
+    scRNA	-  will become ncRNA, with RNA-gen.class = scRNA
+    snoRNA	-  will become ncRNA, with RNA-gen.class = snoRNA
+    ncRNA	-  non-coding RNA; subsumes snRNA, scRNA, snoRNA
+-->
+<!ATTLIST RNA-ref_type value (
+        unknown |
+        premsg |
+        mRNA |
+        tRNA |
+        rRNA |
+        snRNA |
+        scRNA |
+        snoRNA |
+        ncRNA |
+        tmRNA |
+        miscRNA |
+        other
+        ) #REQUIRED >
+
+
+<!ELEMENT RNA-ref_pseudo EMPTY>
+<!ATTLIST RNA-ref_pseudo value ( true | false ) #REQUIRED >
+
+
+<!-- generic fields for ncRNA, tmRNA, miscRNA -->
+<!ELEMENT RNA-ref_ext (
+        RNA-ref_ext_name | 
+        RNA-ref_ext_tRNA | 
+        RNA-ref_ext_gen)>
+
+<!-- for naming "other" type -->
+<!ELEMENT RNA-ref_ext_name (#PCDATA)>
+
+<!-- for tRNAs -->
+<!ELEMENT RNA-ref_ext_tRNA (Trna-ext)>
+
+<!ELEMENT RNA-ref_ext_gen (RNA-gen)>
+
+<!-- tRNA feature extensions -->
+<!ELEMENT Trna-ext (
+        Trna-ext_aa?, 
+        Trna-ext_codon?, 
+        Trna-ext_anticodon?)>
+<!-- aa this carries -->
+<!ELEMENT Trna-ext_aa (
+        Trna-ext_aa_iupacaa | 
+        Trna-ext_aa_ncbieaa | 
+        Trna-ext_aa_ncbi8aa | 
+        Trna-ext_aa_ncbistdaa)>
+
+<!ELEMENT Trna-ext_aa_iupacaa (%INTEGER;)>
+
+<!ELEMENT Trna-ext_aa_ncbieaa (%INTEGER;)>
+
+<!ELEMENT Trna-ext_aa_ncbi8aa (%INTEGER;)>
+
+<!ELEMENT Trna-ext_aa_ncbistdaa (%INTEGER;)>
+
+<!-- codon(s) as in Genetic-code -->
+<!ELEMENT Trna-ext_codon (Trna-ext_codon_E*)>
+
+
+<!ELEMENT Trna-ext_codon_E (%INTEGER;)>
+
+<!-- location of anticodon -->
+<!ELEMENT Trna-ext_anticodon (Seq-loc)>
+
+
+<!ELEMENT RNA-gen (
+        RNA-gen_class?, 
+        RNA-gen_product?, 
+        RNA-gen_quals?)>
+
+<!--
+ for ncRNAs, the class of non-coding RNA:
+ examples: antisense_RNA, guide_RNA, snRNA
+-->
+<!ELEMENT RNA-gen_class (#PCDATA)>
+
+<!ELEMENT RNA-gen_product (#PCDATA)>
+
+<!-- e.g., tag_peptide qualifier for tmRNAs -->
+<!ELEMENT RNA-gen_quals (RNA-qual-set)>
+
+<!-- Additional data values for RNA-gen, -->
+<!ELEMENT RNA-qual (
+        RNA-qual_qual, 
+        RNA-qual_val)>
+
+<!-- in a tag (qual), value (val) format -->
+<!ELEMENT RNA-qual_qual (#PCDATA)>
+
+<!ELEMENT RNA-qual_val (#PCDATA)>
+
+
+<!ELEMENT RNA-qual-set (RNA-qual*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Remap.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Remap.dtd
new file mode 100644
index 0000000..4696a50
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Remap.dtd
@@ -0,0 +1,89 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "remap.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Remap.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Remap_module PUBLIC "-//NCBI//NCBI Remap Module//EN" "NCBI_Remap.mod.dtd">
+%NCBI_Remap_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Remap.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Remap.mod.dtd
new file mode 100644
index 0000000..9f14d35
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Remap.mod.dtd
@@ -0,0 +1,158 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "remap.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Remap"
+================================================= -->
+
+<!--
+$Id: remap.asn,v 1.2 2004/07/28 13:43:33 jcherry Exp $********************************************
+
+  remap.asn
+   Version 1
+
+   API for remapping locations on sequences
+
+   Author: Josh Cherry
+
+***************************************************************
+-->
+
+<!-- Elements referenced from other modules:
+          Seq-loc FROM NCBI-Seqloc -->
+<!-- ============================================ -->
+
+<!-- a date/time stamp -->
+<!ELEMENT Remap-dt (%INTEGER;)>
+
+<!-- database name -->
+<!ELEMENT Remap-db-id (#PCDATA)>
+
+<!--
+***************************************
+  Remap Request types
+***************************************
+****************************************
+ The basic request wrapper leaves space for a version which
+   allow the server to support older clients
+ The tool parameter allows us to log the client types for
+   debugging and tuning
+****************************************
+ a standard request
+-->
+<!ELEMENT Remap-request (
+        Remap-request_request, 
+        Remap-request_version, 
+        Remap-request_tool?)>
+
+<!-- the actual request -->
+<!ELEMENT Remap-request_request (RMRequest)>
+
+<!-- ASN1 spec version -->
+<!ELEMENT Remap-request_version (%INTEGER;)>
+
+<!-- tool making request -->
+<!ELEMENT Remap-request_tool (#PCDATA)>
+
+<!-- request types -->
+<!ELEMENT RMRequest (
+        RMRequest_remap | 
+        RMRequest_maps-to-builds | 
+        RMRequest_maps-from-builds | 
+        RMRequest_all-builds)>
+
+<!-- do the actual remapping -->
+<!ELEMENT RMRequest_remap (Remap-query)>
+
+<!-- what builds can this be mapped to? -->
+<!ELEMENT RMRequest_maps-to-builds (#PCDATA)>
+
+<!-- what builds can be mapped to this? -->
+<!ELEMENT RMRequest_maps-from-builds (#PCDATA)>
+
+<!-- all the builds the server knows of -->
+<!ELEMENT RMRequest_all-builds EMPTY>
+
+
+<!ELEMENT Remap-query (
+        Remap-query_from-build, 
+        Remap-query_to-build, 
+        Remap-query_locs)>
+
+<!-- build to map from -->
+<!ELEMENT Remap-query_from-build (#PCDATA)>
+
+<!-- build to map to -->
+<!ELEMENT Remap-query_to-build (#PCDATA)>
+
+<!-- the locations to remap -->
+<!ELEMENT Remap-query_locs (Seq-loc*)>
+
+<!--
+**********************************************************
+ Replies from the server
+  all replies contain the date/time stamp when they were executed
+**********************************************************
+-->
+<!ELEMENT Remap-reply (
+        Remap-reply_reply, 
+        Remap-reply_dt, 
+        Remap-reply_server, 
+        Remap-reply_msg?)>
+
+<!-- the actual reply -->
+<!ELEMENT Remap-reply_reply (RMReply)>
+
+<!-- date/time stamp from server -->
+<!ELEMENT Remap-reply_dt (Remap-dt)>
+
+<!-- server version info -->
+<!ELEMENT Remap-reply_server (#PCDATA)>
+
+<!-- possibly a message to the user -->
+<!ELEMENT Remap-reply_msg (#PCDATA)>
+
+
+<!ELEMENT RMReply (
+        RMReply_error | 
+        RMReply_remap | 
+        RMReply_maps-to-builds | 
+        RMReply_maps-from-builds | 
+        RMReply_all-builds)>
+
+<!-- if nothing can be returned -->
+<!ELEMENT RMReply_error (#PCDATA)>
+
+<!-- result of actual remapping -->
+<!ELEMENT RMReply_remap (Remap-result)>
+
+<!--
+ all the builds that the server
+ knows how to map this build to
+-->
+<!ELEMENT RMReply_maps-to-builds (RMReply_maps-to-builds_E*)>
+
+
+<!ELEMENT RMReply_maps-to-builds_E (#PCDATA)>
+
+<!--
+ all the builds that the server
+ knows how to map to this build
+-->
+<!ELEMENT RMReply_maps-from-builds (RMReply_maps-from-builds_E*)>
+
+
+<!ELEMENT RMReply_maps-from-builds_E (#PCDATA)>
+
+<!-- all builds that the server knows of -->
+<!ELEMENT RMReply_all-builds (RMReply_all-builds_E*)>
+
+
+<!ELEMENT RMReply_all-builds_E (#PCDATA)>
+
+<!-- remapped locations -->
+<!ELEMENT Remap-result (Seq-loc*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Rsite.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Rsite.dtd
new file mode 100644
index 0000000..7bcf4de
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Rsite.dtd
@@ -0,0 +1,20 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_Rsite.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Rsite.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Rsite.mod.dtd
new file mode 100644
index 0000000..1758ab1
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Rsite.mod.dtd
@@ -0,0 +1,38 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Rsite"
+================================================= -->
+
+<!--
+**********************************************************************
+
+  NCBI Restriction Sites
+  by James Ostell, 1990
+  version 0.8
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Rsite-ref -->
+
+<!-- Elements referenced from other modules:
+          Dbtag FROM NCBI-General -->
+<!-- ============================================ -->
+
+
+<!ELEMENT Rsite-ref (
+        Rsite-ref_str | 
+        Rsite-ref_db)>
+
+<!-- may be unparsable -->
+<!ELEMENT Rsite-ref_str (#PCDATA)>
+
+<!-- pointer to a restriction site database -->
+<!ELEMENT Rsite-ref_db (Dbtag)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_ScoreMat.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_ScoreMat.dtd
new file mode 100644
index 0000000..64a676b
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_ScoreMat.dtd
@@ -0,0 +1,92 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "scoremat.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_ScoreMat.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_ScoreMat_module PUBLIC "-//NCBI//NCBI ScoreMat Module//EN" "NCBI_ScoreMat.mod.dtd">
+%NCBI_ScoreMat_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Seqset_module PUBLIC "-//NCBI//NCBI Seqset Module//EN" "NCBI_Seqset.mod.dtd">
+%NCBI_Seqset_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_ScoreMat.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_ScoreMat.mod.dtd
new file mode 100644
index 0000000..d79b8e0
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_ScoreMat.mod.dtd
@@ -0,0 +1,579 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "scoremat.asn"
+     ::DATATOOL:: by application DATATOOL version 2.3.1
+     ::DATATOOL:: on 03/06/2012 23:04:40
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-ScoreMat"
+================================================= -->
+
+<!--
+$Id: scoremat.asn 347837 2011-12-21 15:28:42Z boratyng $
+ ===========================================================================
+
+                            PUBLIC DOMAIN NOTICE
+               National Center for Biotechnology Information
+
+  This software/database is a "United States Government Work" under the
+  terms of the United States Copyright Act.  It was written as part of
+  the author's official duties as a United States Government employee and
+  thus cannot be copyrighted.  This software/database is freely available
+  to the public for use. The National Library of Medicine and the U.S.
+  Government have not placed any restriction on its use or reproduction.
+
+  Although all reasonable efforts have been taken to ensure the accuracy
+  and reliability of the software and data, the NLM and the U.S.
+  Government do not and cannot warrant the performance or results that
+  may be obtained by using this software or data. The NLM and the U.S.
+  Government disclaim all warranties, express or implied, including
+  warranties of performance, merchantability or fitness for any particular
+  purpose.
+
+  Please cite the author in any work or product based on this material.
+
+ ===========================================================================
+
+ Author:  Christiam Camacho
+
+ File Description:
+      ASN.1 definitions for scoring matrix
+
+ ===========================================================================
+-->
+
+<!-- Elements used by other modules:
+          Pssm,
+          PssmIntermediateData,
+          PssmFinalData,
+          PssmParameters,
+          PssmWithParameters -->
+
+<!-- Elements referenced from other modules:
+          Object-id FROM NCBI-General,
+          Seq-entry FROM NCBI-Seqset -->
+<!-- ============================================ -->
+
+<!--
+ a rudimentary block/core-model, to be used with block-based alignment 
+ routines and threading
+-->
+<!ELEMENT BlockProperty (
+        BlockProperty_type, 
+        BlockProperty_intvalue?, 
+        BlockProperty_textvalue?)>
+
+<!ELEMENT BlockProperty_type (%INTEGER;)>
+
+<!--
+    threshold	-  score threshold for heuristics
+    minscore	-  observed minimum score in CD
+    maxscore	-  observed maximum score in CD
+    meanscore	-  observed mean score in CD
+    variance	-  observed score variance
+    name	-  just name the block
+    is-optional	-  block may not have to be used    
+-->
+<!ATTLIST BlockProperty_type value (
+        unassigned |
+        threshold |
+        minscore |
+        maxscore |
+        meanscore |
+        variance |
+        name |
+        is-optional |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT BlockProperty_intvalue (%INTEGER;)>
+
+<!ELEMENT BlockProperty_textvalue (#PCDATA)>
+
+
+<!ELEMENT CoreBlock (
+        CoreBlock_start, 
+        CoreBlock_stop, 
+        CoreBlock_minstart?, 
+        CoreBlock_maxstop?, 
+        CoreBlock_property?)>
+
+<!-- begin of block on query -->
+<!ELEMENT CoreBlock_start (%INTEGER;)>
+
+<!-- end of block on query -->
+<!ELEMENT CoreBlock_stop (%INTEGER;)>
+
+<!-- optional N-terminal extension -->
+<!ELEMENT CoreBlock_minstart (%INTEGER;)>
+
+<!-- optional C-terminal extension -->
+<!ELEMENT CoreBlock_maxstop (%INTEGER;)>
+
+<!ELEMENT CoreBlock_property (BlockProperty*)>
+
+
+<!ELEMENT LoopConstraint (
+        LoopConstraint_minlength?, 
+        LoopConstraint_maxlength?)>
+
+<!-- minimum length of unaligned region -->
+<!ELEMENT LoopConstraint_minlength (%INTEGER;)>
+
+<!-- maximum length of unaligned region -->
+<!ELEMENT LoopConstraint_maxlength (%INTEGER;)>
+
+
+<!ELEMENT CoreDef (
+        CoreDef_nblocks, 
+        CoreDef_blocks, 
+        CoreDef_loops, 
+        CoreDef_isDiscontinuous?, 
+        CoreDef_insertions?)>
+
+<!-- number of core elements/blocks -->
+<!ELEMENT CoreDef_nblocks (%INTEGER;)>
+
+<!-- nblocks locations -->
+<!ELEMENT CoreDef_blocks (CoreBlock*)>
+
+<!-- (nblocks+1) constraints -->
+<!ELEMENT CoreDef_loops (LoopConstraint*)>
+
+<!-- is it a discontinuous domain -->
+<!ELEMENT CoreDef_isDiscontinuous EMPTY>
+<!ATTLIST CoreDef_isDiscontinuous value ( true | false ) #REQUIRED >
+
+
+<!-- positions of long insertions -->
+<!ELEMENT CoreDef_insertions (CoreDef_insertions_E*)>
+
+
+<!ELEMENT CoreDef_insertions_E (%INTEGER;)>
+
+
+<!ELEMENT Site-annot (
+        Site-annot_startPosition, 
+        Site-annot_stopPosition, 
+        Site-annot_description?, 
+        Site-annot_type?, 
+        Site-annot_aliases?, 
+        Site-annot_motif?, 
+        Site-annot_motifuse?)>
+
+<!-- location of the annotation, -->
+<!ELEMENT Site-annot_startPosition (%INTEGER;)>
+
+<!--
+ start and stop position in the
+ PSSM
+-->
+<!ELEMENT Site-annot_stopPosition (%INTEGER;)>
+
+<!--
+ holds description or names, that
+ can be used for labels in
+ visualization
+-->
+<!ELEMENT Site-annot_description (#PCDATA)>
+
+<!--
+ type of the annotated feature,
+ similarly to Align-annot in
+ NCBI-Cdd
+-->
+<!ELEMENT Site-annot_type (%INTEGER;)>
+
+<!--
+ additional names for
+ the annotation
+-->
+<!ELEMENT Site-annot_aliases (Site-annot_aliases_E*)>
+
+
+<!ELEMENT Site-annot_aliases_E (#PCDATA)>
+
+<!-- motif to validate mapping of sites -->
+<!ELEMENT Site-annot_motif (#PCDATA)>
+
+<!--
+ 0 for validation
+ 1 for motif in seqloc
+ 2 for multiple motifs in seqloc
+-->
+<!ELEMENT Site-annot_motifuse (%INTEGER;)>
+
+
+<!ELEMENT Site-annot-set (Site-annot*)>
+
+<!--
+ ===========================================================================
+ PSI-BLAST, formatrpsdb, RPS-BLAST workflow:
+ ===========================================
+
+ Two possible inputs to PSI-BLAST and formatrpsdb:
+ 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix 
+    of frequency ratios)
+ 2) PssmWithParams where pssm field contains final PSSM data (matrix of 
+    scores and statistical parameters) - such as written by cddumper
+
+ In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform
+ the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database.
+ In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores
+ statistical parameters are used to perform the search in PSI-BLAST and the
+ same data and the data in PssmWithParams::params::rpsdbparams is used to
+ build the PSSM and ultimately the RPS-BLAST database
+ 
+ 
+                 reads    ++++++++++++++ writes
+ PssmWithParams  ====>    + PSI-BLAST  + =====> PssmWithParams
+                          ++++++++++++++             |  ^
+         ^                                           |  |
+         |                                           |  |
+         +===========================================+  |
+                                                     |  |
+         +===========================================+  |
+         |                                              |
+ reads   |                                              | 
+         v                                              |
+  +++++++++++++++ writes +++++++++++++++++++++++        |
+  | formatrpsdb | =====> | RPS-BLAST databases |        |
+  +++++++++++++++        +++++++++++++++++++++++        |
+                                   ^                    |
+                                   |                    |
+                                   | reads              |
+                             +++++++++++++              |
+                             | RPS-BLAST |              |
+                             +++++++++++++              |
+                                                        |
+       reads  ++++++++++++               writes         |
+  Cdd ======> | cddumper | =============================+
+              ++++++++++++
+
+ ===========================================================================
+ Contains the PSSM's scores and its associated statistical parameters. 
+ Dimensions and order in which scores are stored must be the same as that 
+ specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow
+-->
+<!ELEMENT PssmFinalData (
+        PssmFinalData_scores, 
+        PssmFinalData_lambda, 
+        PssmFinalData_kappa, 
+        PssmFinalData_h, 
+        PssmFinalData_scalingFactor?, 
+        PssmFinalData_lambdaUngapped?, 
+        PssmFinalData_kappaUngapped?, 
+        PssmFinalData_hUngapped?)>
+
+<!-- PSSM's scores -->
+<!ELEMENT PssmFinalData_scores (PssmFinalData_scores_E*)>
+
+
+<!ELEMENT PssmFinalData_scores_E (%INTEGER;)>
+
+<!-- Karlin & Altschul parameter produced during the PSSM's calculation -->
+<!ELEMENT PssmFinalData_lambda (%REAL;)>
+
+<!-- Karlin & Altschul parameter produced during the PSSM's calculation -->
+<!ELEMENT PssmFinalData_kappa (%REAL;)>
+
+<!-- Karlin & Altschul parameter produced during the PSSM's calculation -->
+<!ELEMENT PssmFinalData_h (%REAL;)>
+
+<!--
+ scaling factor used to obtain more precision when building the PSSM.
+ (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM
+ engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is
+ given a PSSM which contains a scaled-up PSSM (indicated by having a
+ scalingFactor greater than 1), then it will scale down the PSSM to
+ perform the initial stages of the search with it.
+ N.B.: When building RPS-BLAST databases, if formatrpsdb is provided 
+ scaled-up PSSMs, it will ensure that all PSSMs used to build the 
+ RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST 
+ will silently produce incorrect results).
+-->
+<!ELEMENT PssmFinalData_scalingFactor (%INTEGER;)>
+
+<!-- Karlin & Altschul parameter produced during the PSSM's calculation -->
+<!ELEMENT PssmFinalData_lambdaUngapped (%REAL;)>
+
+<!-- Karlin & Altschul parameter produced during the PSSM's calculation -->
+<!ELEMENT PssmFinalData_kappaUngapped (%REAL;)>
+
+<!-- Karlin & Altschul parameter produced during the PSSM's calculation -->
+<!ELEMENT PssmFinalData_hUngapped (%REAL;)>
+
+<!--
+ Contains the PSSM's intermediate data used to create the PSSM's scores 
+ and statistical parameters. Dimensions and order in which scores are 
+ stored must be the same as that specified in Pssm::numRows, 
+ Pssm::numColumns, and Pssm::byrow
+-->
+<!ELEMENT PssmIntermediateData (
+        PssmIntermediateData_resFreqsPerPos?, 
+        PssmIntermediateData_weightedResFreqsPerPos?, 
+        PssmIntermediateData_freqRatios, 
+        PssmIntermediateData_informationContent?, 
+        PssmIntermediateData_gaplessColumnWeights?, 
+        PssmIntermediateData_sigma?, 
+        PssmIntermediateData_intervalSizes?, 
+        PssmIntermediateData_numMatchingSeqs?, 
+        PssmIntermediateData_numIndeptObsr?)>
+
+<!--
+ observed residue frequencies (or counts) per position of the PSSM 
+ (prior to application of pseudocounts)
+-->
+<!ELEMENT PssmIntermediateData_resFreqsPerPos (PssmIntermediateData_resFreqsPerPos_E*)>
+
+
+<!ELEMENT PssmIntermediateData_resFreqsPerPos_E (%INTEGER;)>
+
+<!--
+ Weighted observed residue frequencies per position of the PSSM.
+ (N.B.: each position's weights should add up to 1.0).
+ This field corresponds to f_i (f sub i) in equation 2 of 
+ Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
+ NOTE: this is needed for diagnostics information only (i.e.:
+ -out_ascii_pssm option in psiblast)
+-->
+<!ELEMENT PssmIntermediateData_weightedResFreqsPerPos (PssmIntermediateData_weightedResFreqsPerPos_E*)>
+
+
+<!ELEMENT PssmIntermediateData_weightedResFreqsPerPos_E (%REAL;)>
+
+<!-- PSSM's frequency ratios -->
+<!ELEMENT PssmIntermediateData_freqRatios (PssmIntermediateData_freqRatios_E*)>
+
+
+<!ELEMENT PssmIntermediateData_freqRatios_E (%REAL;)>
+
+<!--
+ Information content per position of the PSSM
+ NOTE: this is needed for diagnostics information only (i.e.:
+ -out_ascii_pssm option in psiblast)
+-->
+<!ELEMENT PssmIntermediateData_informationContent (PssmIntermediateData_informationContent_E*)>
+
+
+<!ELEMENT PssmIntermediateData_informationContent_E (%REAL;)>
+
+<!--
+ Relative weight for columns of the PSSM without gaps to pseudocounts
+ NOTE: this is needed for diagnostics information only (i.e.:
+ -out_ascii_pssm option in psiblast)
+-->
+<!ELEMENT PssmIntermediateData_gaplessColumnWeights (PssmIntermediateData_gaplessColumnWeights_E*)>
+
+
+<!ELEMENT PssmIntermediateData_gaplessColumnWeights_E (%REAL;)>
+
+<!--
+ Used in sequence weights computation
+ NOTE: this is needed for diagnostics information only (i.e.:
+ -out_ascii_pssm option in psiblast)
+-->
+<!ELEMENT PssmIntermediateData_sigma (PssmIntermediateData_sigma_E*)>
+
+
+<!ELEMENT PssmIntermediateData_sigma_E (%REAL;)>
+
+<!--
+ Length of the aligned regions per position of the query sequence
+ NOTE: this is needed for diagnostics information only (i.e.:
+ -out_ascii_pssm option in psiblast)
+-->
+<!ELEMENT PssmIntermediateData_intervalSizes (PssmIntermediateData_intervalSizes_E*)>
+
+
+<!ELEMENT PssmIntermediateData_intervalSizes_E (%INTEGER;)>
+
+<!--
+ Number of matching sequences per position of the PSSM (including the
+ query)
+ NOTE: this is needed for diagnostics information only (i.e.:
+ -out_ascii_pssm option in psiblast)
+-->
+<!ELEMENT PssmIntermediateData_numMatchingSeqs (PssmIntermediateData_numMatchingSeqs_E*)>
+
+
+<!ELEMENT PssmIntermediateData_numMatchingSeqs_E (%INTEGER;)>
+
+<!--
+ Number of independent observations per position of the PSSM
+ NOTE: this is needed for building CDD database for DELTA-BLAST
+-->
+<!ELEMENT PssmIntermediateData_numIndeptObsr (PssmIntermediateData_numIndeptObsr_E*)>
+
+
+<!ELEMENT PssmIntermediateData_numIndeptObsr_E (%REAL;)>
+
+<!--
+ Position-specific scoring matrix
+
+ Column indices on the PSSM refer to the positions corresponding to the
+ query/master sequence, i.e. the number of columns (N) is the same
+ as the length of the query/master sequence. 
+ Row indices refer to individual amino acid types, i.e. the number of 
+ rows (M) is the same as the number of different residues in the 
+ alphabet we use. Consequently, row labels are amino acid identifiers.
+
+ PSSMs are stored as linear arrays of integers. By default, we store
+ them column-by-column, M values for the first column followed by M
+ values for the second column, and so on. In order to provide
+ flexibility for external applications, the boolean field "byrow" is 
+ provided to specify the storage order.
+-->
+<!ELEMENT Pssm (
+        Pssm_isProtein?, 
+        Pssm_identifier?, 
+        Pssm_numRows, 
+        Pssm_numColumns, 
+        Pssm_rowLabels?, 
+        Pssm_byRow?, 
+        Pssm_query?, 
+        Pssm_intermediateData?, 
+        Pssm_finalData?)>
+
+<!-- Is the this a protein or nucleotide scoring matrix? -->
+<!ELEMENT Pssm_isProtein EMPTY>
+<!ATTLIST Pssm_isProtein value ( true | false ) "true" >
+
+
+<!-- PSSM identifier -->
+<!ELEMENT Pssm_identifier (Object-id)>
+
+<!--
+ The dimensions of the matrix are returned so the client can
+ verify that all data was received.
+ number of rows
+-->
+<!ELEMENT Pssm_numRows (%INTEGER;)>
+
+<!-- number of columns -->
+<!ELEMENT Pssm_numColumns (%INTEGER;)>
+
+<!--
+ row-labels is given to note the order of residue types so that it can
+ be cross-checked between applications.
+ If this field is not given, the matrix values are presented in 
+ order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl.
+ for proteins the values returned correspond to 
+ (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ...
+-->
+<!ELEMENT Pssm_rowLabels (Pssm_rowLabels_E*)>
+
+
+<!ELEMENT Pssm_rowLabels_E (#PCDATA)>
+
+<!-- are matrices stored row by row? -->
+<!ELEMENT Pssm_byRow EMPTY>
+<!ATTLIST Pssm_byRow value ( true | false ) "false" >
+
+
+<!-- PSSM representative sequence (master)  -->
+<!ELEMENT Pssm_query (Seq-entry)>
+
+<!--
+ both intermediateData and finalData can be provided, but at least one of
+ them must be provided.
+ N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData 
+ representation. 
+ Intermediate or final data for the PSSM
+-->
+<!ELEMENT Pssm_intermediateData (PssmIntermediateData)>
+
+<!-- Final representation for the PSSM -->
+<!ELEMENT Pssm_finalData (PssmFinalData)>
+
+<!--
+ This structure is used to create the RPS-BLAST database auxiliary file 
+ (*.aux) and it contains parameters set at creation time of the PSSM.
+ Also, the matrixName field is used by formatrpsdb to build a PSSM from 
+ a Pssm structure which only contains PssmIntermediateData.
+-->
+<!ELEMENT FormatRpsDbParameters (
+        FormatRpsDbParameters_matrixName, 
+        FormatRpsDbParameters_gapOpen?, 
+        FormatRpsDbParameters_gapExtend?)>
+
+<!--
+ name of the underlying score matrix whose frequency ratios were
+ used in PSSM construction (e.g.: BLOSUM62)
+-->
+<!ELEMENT FormatRpsDbParameters_matrixName (#PCDATA)>
+
+<!-- gap opening penalty corresponding to the matrix above -->
+<!ELEMENT FormatRpsDbParameters_gapOpen (%INTEGER;)>
+
+<!-- gap extension penalty corresponding to the matrix above -->
+<!ELEMENT FormatRpsDbParameters_gapExtend (%INTEGER;)>
+
+<!--
+ Populated by PSSM engine of PSI-BLAST, original source for these values 
+ are the PSI-BLAST options specified using the BLAST options API
+-->
+<!ELEMENT PssmParameters (
+        PssmParameters_pseudocount?, 
+        PssmParameters_rpsdbparams?, 
+        PssmParameters_constraints?, 
+        PssmParameters_bitScoreThresh?, 
+        PssmParameters_annotatedSites?)>
+
+<!--
+ pseudocount constant used for PSSM. This field corresponds to beta in 
+ equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
+-->
+<!ELEMENT PssmParameters_pseudocount (%INTEGER;)>
+
+<!--
+ data needed by formatrpsdb to create RPS-BLAST databases. matrixName is
+ populated by PSI-BLAST
+-->
+<!ELEMENT PssmParameters_rpsdbparams (FormatRpsDbParameters)>
+
+<!--
+ alignment constraints needed by sequence-structure threader
+ and other global or local block-alignment algorithms
+-->
+<!ELEMENT PssmParameters_constraints (CoreDef)>
+
+<!-- bit score threshold for specific conserved domain hits -->
+<!ELEMENT PssmParameters_bitScoreThresh (%REAL;)>
+
+<!-- conserved functional sites with annotations -->
+<!ELEMENT PssmParameters_annotatedSites (Site-annot-set)>
+
+<!--
+ Envelope containing PSSM and the parameters used to create it. 
+ Provided for use in PSI-BLAST, formatrpsdb, and for the structure group.
+-->
+<!ELEMENT PssmWithParameters (
+        PssmWithParameters_pssm, 
+        PssmWithParameters_params?)>
+
+<!--
+ This field is applicable to PSI-BLAST and formatrpsdb.
+ When both the intermediate and final PSSM data are provided in this
+ field, the final data (matrix of scores and associated statistical
+ parameters) takes precedence and that data is used for further
+ processing. The rationale for this is that the PSSM's scores and
+ statistical parameters might have been calculated by other applications
+ and it might not be possible to recreate it by using PSI-BLAST's PSSM 
+ engine.
+-->
+<!ELEMENT PssmWithParameters_pssm (Pssm)>
+
+<!--
+ This field's rpsdbparams is used to specify the values of options 
+ for processing by formatrpsdb. If these are not set, the command 
+ line defaults of formatrpsdb are applied. This field is used
+ by PSI-BLAST to verify that the underlying scorem matrix used to BUILD
+ the PSSM is the same as the one being specified through the BLAST
+ Options API. If this field is omitted, no verification will be
+ performed, so be careful to keep track of what matrix was used to build
+ the PSSM or else the results produced by PSI-BLAST will be unreliable.
+-->
+<!ELEMENT PssmWithParameters_params (PssmParameters)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_SeqCode.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_SeqCode.dtd
new file mode 100644
index 0000000..3e754ac
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_SeqCode.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqcode.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_SeqCode.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_SeqCode_module PUBLIC "-//NCBI//NCBI SeqCode Module//EN" "NCBI_SeqCode.mod.dtd">
+%NCBI_SeqCode_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_SeqCode.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_SeqCode.mod.dtd
new file mode 100644
index 0000000..1e60966
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_SeqCode.mod.dtd
@@ -0,0 +1,150 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqcode.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-SeqCode"
+================================================= -->
+
+<!--
+$Revision: 6.0 $
+  *********************************************************************
+
+  These are code and conversion tables for NCBI sequence codes
+  ASN.1 for the sequences themselves are define in seq.asn
+
+  Seq-map-table and Seq-code-table REQUIRE that codes start with 0
+    and increase continuously.  So IUPAC codes, which are upper case
+    letters will always have 65 0 cells before the codes begin.  This
+    allows all codes to do indexed lookups for things
+
+  Valid names for code tables are:
+    IUPACna
+    IUPACaa
+    IUPACeaa
+    IUPACaa3     3 letter amino acid codes : parallels IUPACeaa
+                   display only, not a data exchange type
+    NCBI2na
+    NCBI4na
+    NCBI8na
+    NCBI8aa
+    NCBIstdaa
+     probability types map to IUPAC types for display as characters
+-->
+
+<!-- Elements used by other modules:
+          Seq-code-table,
+          Seq-map-table,
+          Seq-code-set -->
+<!-- ============================================ -->
+
+<!-- sequence representations -->
+<!ELEMENT Seq-code-type %ENUM;>
+
+<!--
+    iupacna	-  IUPAC 1 letter nuc acid code
+    iupacaa	-  IUPAC 1 letter amino acid code
+    ncbi2na	-  2 bit nucleic acid code
+    ncbi4na	-  4 bit nucleic acid code
+    ncbi8na	-  8 bit extended nucleic acid code
+    ncbipna	-  nucleic acid probabilities
+    ncbi8aa	-  8 bit extended amino acid codes
+    ncbieaa	-  extended ASCII 1 letter aa codes
+    ncbipaa	-  amino acid probabilities
+    iupacaa3	-  3 letter code only for display
+    ncbistdaa	-  consecutive codes for std aas, 0-25
+-->
+<!ATTLIST Seq-code-type value (
+        iupacna |
+        iupacaa |
+        ncbi2na |
+        ncbi4na |
+        ncbi8na |
+        ncbipna |
+        ncbi8aa |
+        ncbieaa |
+        ncbipaa |
+        iupacaa3 |
+        ncbistdaa
+        ) #REQUIRED >
+
+
+<!-- for tables of sequence mappings  -->
+<!ELEMENT Seq-map-table (
+        Seq-map-table_from, 
+        Seq-map-table_to, 
+        Seq-map-table_num, 
+        Seq-map-table_start-at?, 
+        Seq-map-table_table)>
+
+<!-- code to map from -->
+<!ELEMENT Seq-map-table_from (Seq-code-type)>
+
+<!-- code to map to -->
+<!ELEMENT Seq-map-table_to (Seq-code-type)>
+
+<!-- number of rows in table -->
+<!ELEMENT Seq-map-table_num (%INTEGER;)>
+
+<!-- index offset of first element -->
+<!ELEMENT Seq-map-table_start-at (%INTEGER;)>
+
+<!-- table of values, in from-to order -->
+<!ELEMENT Seq-map-table_table (Seq-map-table_table_E*)>
+
+
+<!ELEMENT Seq-map-table_table_E (%INTEGER;)>
+
+<!-- for names of coded values -->
+<!ELEMENT Seq-code-table (
+        Seq-code-table_code, 
+        Seq-code-table_num, 
+        Seq-code-table_one-letter, 
+        Seq-code-table_start-at?, 
+        Seq-code-table_table, 
+        Seq-code-table_comps?)>
+
+<!-- name of code -->
+<!ELEMENT Seq-code-table_code (Seq-code-type)>
+
+<!-- number of rows in table -->
+<!ELEMENT Seq-code-table_num (%INTEGER;)>
+
+<!-- symbol is ALWAYS 1 letter? -->
+<!ELEMENT Seq-code-table_one-letter EMPTY>
+<!ATTLIST Seq-code-table_one-letter value ( true | false ) #REQUIRED >
+
+
+<!-- index offset of first element -->
+<!ELEMENT Seq-code-table_start-at (%INTEGER;)>
+
+<!-- an explanatory name or string -->
+<!ELEMENT Seq-code-table_table (Seq-code-table_table_E*)>
+
+
+<!ELEMENT Seq-code-table_table_E (
+        Seq-code-table_table_E_symbol, 
+        Seq-code-table_table_E_name)>
+
+<!-- the printed symbol or letter -->
+<!ELEMENT Seq-code-table_table_E_symbol (#PCDATA)>
+
+<!ELEMENT Seq-code-table_table_E_name (#PCDATA)>
+
+<!-- pointers to complement nuc acid -->
+<!ELEMENT Seq-code-table_comps (Seq-code-table_comps_E*)>
+
+
+<!ELEMENT Seq-code-table_comps_E (%INTEGER;)>
+
+<!-- for distribution -->
+<!ELEMENT Seq-code-set (
+        Seq-code-set_codes?, 
+        Seq-code-set_maps?)>
+
+<!ELEMENT Seq-code-set_codes (Seq-code-table*)>
+
+<!ELEMENT Seq-code-set_maps (Seq-map-table*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_SeqTable.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_SeqTable.dtd
new file mode 100644
index 0000000..f47d9ba
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_SeqTable.dtd
@@ -0,0 +1,86 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqtable.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_SeqTable.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_SeqTable.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_SeqTable.mod.dtd
new file mode 100644
index 0000000..54232b3
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_SeqTable.mod.dtd
@@ -0,0 +1,390 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqtable.asn"
+     ::DATATOOL:: by application DATATOOL version 2.4.4
+     ::DATATOOL:: on 01/23/2013 23:05:37
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-SeqTable"
+================================================= -->
+
+<!--
+$Revision: 386776 $
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                            PUBLIC DOMAIN NOTICE
+                National Center for Biotechnology Information
+
+  This software/database is a "United States Government Work" under the terms
+  of the United States Copyright Act.  It was written as part of the author's
+  official duties as a United States Government employee and thus cannot be
+  copyrighted.  This software/database is freely available to the public for
+  use.  The National Library of Medicine and the U.S. Government have not
+  placed any restriction on its use or reproduction.
+
+  Although all reasonable efforts have been taken to ensure the accuracy and
+  reliability of the software and data, the NLM and the U.S. Government do not
+  and cannot warrant the performance or results that may be obtained by using
+  this software or data.  The NLM and the U.S. Government disclaim all
+  warranties, express or implied, including warranties of performance,
+  merchantability or fitness for any particular purpose.
+
+  Please cite the authors in any work or product based on this material.
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  Authors: Mike DiCuccio, Eugene Vasilchenko
+
+  ASN.1 interface to table readers
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-->
+
+<!-- Elements used by other modules:
+          SeqTable-column-info,
+          SeqTable-column,
+          Seq-table -->
+
+<!-- Elements referenced from other modules:
+          Seq-id,
+          Seq-loc,
+          Seq-interval FROM NCBI-Seqloc -->
+<!-- ============================================ -->
+
+
+<!ELEMENT SeqTable-column-info (
+        SeqTable-column-info_title?, 
+        SeqTable-column-info_field-id?, 
+        SeqTable-column-info_field-name?)>
+
+<!-- user friendly column name, can be skipped -->
+<!ELEMENT SeqTable-column-info_title (#PCDATA)>
+<!--
+ identification of the column data in the objects described by the table
+ known column data types
+ position types
+-->
+<!ELEMENT SeqTable-column-info_field-id (%INTEGER;)>
+
+<!--
+    location	-  location as Seq-loc
+    location-id	-  location Seq-id
+    location-gi	-  gi
+    location-from	-  interval from
+    location-to	-  interval to
+    location-strand	-  location strand
+    product	-  product as Seq-loc
+    product-id	-  product Seq-id
+    product-gi	-  product gi
+    product-from	-  product interval from
+    product-to	-  product interval to
+    product-strand	-  product strand
+    id-local	-  main feature fields
+         id.local.id
+    xref-id-local	-  xref.id.local.id
+    ext	-  field-name must be "E.xxx", see below
+    qual	-  field-name must be "Q.xxx", see below
+    dbxref	-  field-name must be "D.xxx", see below
+    data-imp-key	-  various data fields
+    ext-type	-  extra fields, see also special values for str below
+-->
+<!ATTLIST SeqTable-column-info_field-id value (
+        location |
+        location-id |
+        location-gi |
+        location-from |
+        location-to |
+        location-strand |
+        location-fuzz-from-lim |
+        location-fuzz-to-lim |
+        product |
+        product-id |
+        product-gi |
+        product-from |
+        product-to |
+        product-strand |
+        product-fuzz-from-lim |
+        product-fuzz-to-lim |
+        id-local |
+        xref-id-local |
+        partial |
+        comment |
+        title |
+        ext |
+        qual |
+        dbxref |
+        data-imp-key |
+        data-region |
+        data-cdregion-frame |
+        ext-type |
+        qual-qual |
+        qual-val |
+        dbxref-db |
+        dbxref-tag
+        ) #IMPLIED >
+
+
+<!--
+ any column can be identified by ASN.1 text locator string
+ with omitted object type.
+ examples:
+   "data.gene.locus" for Seq-feat.data.gene.locus
+   "data.imp.key" for Seq-feat.data.imp.key
+   "qual.qual"
+    - Seq-feat.qual is SEQUENCE so several columns are allowed
+      see also "Q.xxx" special value for shorter qual representation
+   "ext.type.str"
+   "ext.data.label.str"
+   "ext.data.data.int"
+      see also "E.xxx" special value for shorter ext representation
+ special values start with capital letter:
+   "E.xxx" - ext.data.label.str = xxx, ext.data.data = data
+    - Seq-feat.ext.data is SEQUENCE so several columns are allowed
+   "Q.xxx" - qual.qual = xxx, qual.val = data
+    - Seq-feat.qual is SEQUENCE so several columns are allowed
+   "D.xxx" - dbxref.id = xxx, dbxref.tag = data
+    - Seq-feat.dbxref is SET so several columns are allowed
+-->
+<!ELEMENT SeqTable-column-info_field-name (#PCDATA)>
+
+
+<!ELEMENT CommonString-table (
+        CommonString-table_strings, 
+        CommonString-table_indexes)>
+
+<!-- set of possible values -->
+<!ELEMENT CommonString-table_strings (CommonString-table_strings_E*)>
+
+
+<!ELEMENT CommonString-table_strings_E (#PCDATA)>
+
+<!-- indexes of values -->
+<!ELEMENT CommonString-table_indexes (CommonString-table_indexes_E*)>
+
+
+<!ELEMENT CommonString-table_indexes_E (%INTEGER;)>
+
+
+<!ELEMENT CommonBytes-table (
+        CommonBytes-table_bytes, 
+        CommonBytes-table_indexes)>
+
+<!-- set of possible values -->
+<!ELEMENT CommonBytes-table_bytes (CommonBytes-table_bytes_E*)>
+
+
+<!ELEMENT CommonBytes-table_bytes_E (%OCTETS;)>
+
+<!-- indexes of values -->
+<!ELEMENT CommonBytes-table_indexes (CommonBytes-table_indexes_E*)>
+
+
+<!ELEMENT CommonBytes-table_indexes_E (%INTEGER;)>
+
+
+<!ELEMENT SeqTable-multi-data (
+        SeqTable-multi-data_int | 
+        SeqTable-multi-data_real | 
+        SeqTable-multi-data_string | 
+        SeqTable-multi-data_bytes | 
+        SeqTable-multi-data_common-string | 
+        SeqTable-multi-data_common-bytes | 
+        SeqTable-multi-data_bit | 
+        SeqTable-multi-data_loc | 
+        SeqTable-multi-data_id | 
+        SeqTable-multi-data_interval)>
+
+<!-- a set of integers, one per row -->
+<!ELEMENT SeqTable-multi-data_int (SeqTable-multi-data_int_E*)>
+
+
+<!ELEMENT SeqTable-multi-data_int_E (%INTEGER;)>
+
+<!-- a set of reals, one per row -->
+<!ELEMENT SeqTable-multi-data_real (SeqTable-multi-data_real_E*)>
+
+
+<!ELEMENT SeqTable-multi-data_real_E (%REAL;)>
+
+<!-- a set of strings, one per row -->
+<!ELEMENT SeqTable-multi-data_string (SeqTable-multi-data_string_E*)>
+
+
+<!ELEMENT SeqTable-multi-data_string_E (#PCDATA)>
+
+<!-- a set of byte arrays, one per row -->
+<!ELEMENT SeqTable-multi-data_bytes (SeqTable-multi-data_bytes_E*)>
+
+
+<!ELEMENT SeqTable-multi-data_bytes_E (%OCTETS;)>
+
+<!-- a set of string with small set of possible values -->
+<!ELEMENT SeqTable-multi-data_common-string (CommonString-table)>
+
+<!-- a set of byte arrays with small set of possible values -->
+<!ELEMENT SeqTable-multi-data_common-bytes (CommonBytes-table)>
+
+<!--
+ a set of bits, one per row
+ this uses bm::bvector<> as its storage mechanism
+-->
+<!ELEMENT SeqTable-multi-data_bit (%OCTETS;)>
+
+<!-- a set of locations, one per row -->
+<!ELEMENT SeqTable-multi-data_loc (Seq-loc*)>
+
+<!ELEMENT SeqTable-multi-data_id (Seq-id*)>
+
+<!ELEMENT SeqTable-multi-data_interval (Seq-interval*)>
+
+
+<!ELEMENT SeqTable-single-data (
+        SeqTable-single-data_int | 
+        SeqTable-single-data_real | 
+        SeqTable-single-data_string | 
+        SeqTable-single-data_bytes | 
+        SeqTable-single-data_bit | 
+        SeqTable-single-data_loc | 
+        SeqTable-single-data_id | 
+        SeqTable-single-data_interval)>
+
+<!-- integer -->
+<!ELEMENT SeqTable-single-data_int (%INTEGER;)>
+
+<!-- real -->
+<!ELEMENT SeqTable-single-data_real (%REAL;)>
+
+<!-- string -->
+<!ELEMENT SeqTable-single-data_string (#PCDATA)>
+
+<!-- byte array -->
+<!ELEMENT SeqTable-single-data_bytes (%OCTETS;)>
+
+<!-- bit -->
+<!ELEMENT SeqTable-single-data_bit EMPTY>
+<!ATTLIST SeqTable-single-data_bit value ( true | false ) #REQUIRED >
+
+
+<!-- location -->
+<!ELEMENT SeqTable-single-data_loc (Seq-loc)>
+
+<!ELEMENT SeqTable-single-data_id (Seq-id)>
+
+<!ELEMENT SeqTable-single-data_interval (Seq-interval)>
+
+
+<!ELEMENT SeqTable-sparse-index (
+        SeqTable-sparse-index_indexes | 
+        SeqTable-sparse-index_bit-set)>
+
+<!-- indexes of rows with values -->
+<!ELEMENT SeqTable-sparse-index_indexes (SeqTable-sparse-index_indexes_E*)>
+
+
+<!ELEMENT SeqTable-sparse-index_indexes_E (%INTEGER;)>
+
+<!-- bitset of rows with values -->
+<!ELEMENT SeqTable-sparse-index_bit-set (%OCTETS;)>
+
+
+<!ELEMENT SeqTable-column (
+        SeqTable-column_header, 
+        SeqTable-column_data?, 
+        SeqTable-column_sparse?, 
+        SeqTable-column_default?, 
+        SeqTable-column_sparse-other?)>
+
+<!--
+ column description or reference to previously defined info
+ information about data
+-->
+<!ELEMENT SeqTable-column_header (SeqTable-column-info)>
+
+<!-- row data -->
+<!ELEMENT SeqTable-column_data (SeqTable-multi-data)>
+
+<!-- in case not all rows contain data this field will contain sparse info -->
+<!ELEMENT SeqTable-column_sparse (SeqTable-sparse-index)>
+
+<!-- default value for sparse table, or if row data is too short -->
+<!ELEMENT SeqTable-column_default (SeqTable-single-data)>
+
+<!-- single value for indexes not listed in sparse table -->
+<!ELEMENT SeqTable-column_sparse-other (SeqTable-single-data)>
+
+
+<!ELEMENT Seq-table (
+        Seq-table_feat-type, 
+        Seq-table_feat-subtype?, 
+        Seq-table_num-rows, 
+        Seq-table_columns)>
+
+<!-- type of features in this table, equal to Seq-feat.data variant index -->
+<!ELEMENT Seq-table_feat-type (%INTEGER;)>
+
+<!-- subtype of features in this table, defined in header SeqFeatData.hpp -->
+<!ELEMENT Seq-table_feat-subtype (%INTEGER;)>
+
+<!-- number of rows -->
+<!ELEMENT Seq-table_num-rows (%INTEGER;)>
+
+<!-- data in columns -->
+<!ELEMENT Seq-table_columns (SeqTable-column*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Seq_split.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Seq_split.dtd
new file mode 100644
index 0000000..d705b7c
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Seq_split.dtd
@@ -0,0 +1,92 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqsplit.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Seq_split.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_Seq_split_module PUBLIC "-//NCBI//NCBI Seq split Module//EN" "NCBI_Seq_split.mod.dtd">
+%NCBI_Seq_split_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Seqset_module PUBLIC "-//NCBI//NCBI Seqset Module//EN" "NCBI_Seqset.mod.dtd">
+%NCBI_Seqset_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Seq_split.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Seq_split.mod.dtd
new file mode 100644
index 0000000..4ab94ad
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Seq_split.mod.dtd
@@ -0,0 +1,559 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqsplit.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 08/02/2010 23:05:14
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Seq-split"
+================================================= -->
+
+<!--
+$Revision: 181438 $
+********************************************************************
+
+  Network Id server network access
+  Vasilchenko 2003
+
+
+*********************************************************************
+
+  seqsplit.asn
+
+     representation of split sequences
+
+*********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          ID2S-Chunk-Id,
+          ID2S-Seq-annot-Info -->
+
+<!-- Elements referenced from other modules:
+          Seq-id FROM NCBI-Seqloc,
+          Seq-entry FROM NCBI-Seqset,
+          Bioseq,
+          Seq-annot,
+          Seq-descr,
+          Seq-literal FROM NCBI-Sequence,
+          Seq-align FROM NCBI-Seqalign,
+          Feat-id FROM NCBI-Seqfeat -->
+<!-- ============================================ -->
+
+<!--
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Blob split info types
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Chunks split description
+-->
+<!ELEMENT ID2S-Split-Info (
+        ID2S-Split-Info_bioseqs-info?, 
+        ID2S-Split-Info_chunks, 
+        ID2S-Split-Info_skeleton?)>
+
+<!ELEMENT ID2S-Split-Info_bioseqs-info (ID2S-Bioseqs-Info*)>
+
+<!ELEMENT ID2S-Split-Info_chunks (ID2S-Chunk-Info*)>
+
+<!ELEMENT ID2S-Split-Info_skeleton (Seq-entry)>
+
+
+<!ELEMENT ID2S-Bioseqs-Info (
+        ID2S-Bioseqs-Info_info, 
+        ID2S-Bioseqs-Info_bioseqs)>
+
+<!ELEMENT ID2S-Bioseqs-Info_info (ID2S-Bioseq-Info)>
+
+<!ELEMENT ID2S-Bioseqs-Info_bioseqs (ID2S-Bioseq-Ids)>
+
+
+<!ELEMENT ID2S-Bioseq-Info (
+        ID2S-Bioseq-Info_gap-count?, 
+        ID2S-Bioseq-Info_seq-map-has-ref?)>
+
+<!ELEMENT ID2S-Bioseq-Info_gap-count (%INTEGER;)>
+
+<!ELEMENT ID2S-Bioseq-Info_seq-map-has-ref EMPTY>
+<!ATTLIST ID2S-Bioseq-Info_seq-map-has-ref value ( true | false ) #REQUIRED >
+
+
+
+<!ELEMENT ID2S-Chunk-Info (
+        ID2S-Chunk-Info_id, 
+        ID2S-Chunk-Info_content)>
+<!--
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ utility types
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-->
+<!ELEMENT ID2S-Chunk-Info_id (ID2S-Chunk-Id)>
+
+<!ELEMENT ID2S-Chunk-Info_content (ID2S-Chunk-Content*)>
+
+<!--
+ Description of information in this chunk
+ Place means id of Bioseq or Bioseq-set
+-->
+<!ELEMENT ID2S-Chunk-Content (
+        ID2S-Chunk-Content_seq-descr | 
+        ID2S-Chunk-Content_seq-annot | 
+        ID2S-Chunk-Content_seq-assembly | 
+        ID2S-Chunk-Content_seq-map | 
+        ID2S-Chunk-Content_seq-data | 
+        ID2S-Chunk-Content_seq-annot-place | 
+        ID2S-Chunk-Content_bioseq-place | 
+        ID2S-Chunk-Content_feat-ids)>
+
+<!-- place of Seq-descrs -->
+<!ELEMENT ID2S-Chunk-Content_seq-descr (ID2S-Seq-descr-Info)>
+
+<!-- locations and types of annotations -->
+<!ELEMENT ID2S-Chunk-Content_seq-annot (ID2S-Seq-annot-Info)>
+
+<!-- place of assembly history -->
+<!ELEMENT ID2S-Chunk-Content_seq-assembly (ID2S-Seq-assembly-Info)>
+
+<!-- place of sequence map -->
+<!ELEMENT ID2S-Chunk-Content_seq-map (ID2S-Seq-map-Info)>
+
+<!-- place of sequence data -->
+<!ELEMENT ID2S-Chunk-Content_seq-data (ID2S-Seq-data-Info)>
+
+<!-- place of Seq-annots -->
+<!ELEMENT ID2S-Chunk-Content_seq-annot-place (ID2S-Seq-annot-place-Info)>
+
+<!-- place of Bioseqs -->
+<!ELEMENT ID2S-Chunk-Content_bioseq-place (ID2S-Bioseq-place-Info*)>
+
+<!-- ids of features -->
+<!ELEMENT ID2S-Chunk-Content_feat-ids (ID2S-Seq-feat-Ids-Info*)>
+
+
+<!ELEMENT ID2S-Seq-descr-Info (
+        ID2S-Seq-descr-Info_type-mask, 
+        ID2S-Seq-descr-Info_bioseqs?, 
+        ID2S-Seq-descr-Info_bioseq-sets?)>
+
+<!-- mask of Seq-descr types, -->
+<!ELEMENT ID2S-Seq-descr-Info_type-mask (%INTEGER;)>
+
+<!ELEMENT ID2S-Seq-descr-Info_bioseqs (ID2S-Bioseq-Ids)>
+
+<!ELEMENT ID2S-Seq-descr-Info_bioseq-sets (ID2S-Bioseq-set-Ids)>
+
+
+<!ELEMENT ID2S-Seq-annot-Info (
+        ID2S-Seq-annot-Info_name?, 
+        ID2S-Seq-annot-Info_align?, 
+        ID2S-Seq-annot-Info_graph?, 
+        ID2S-Seq-annot-Info_feat?, 
+        ID2S-Seq-annot-Info_seq-loc?)>
+
+<!--
+ name is set if this is named annot
+ name may be empty which differ from unnamed annot
+-->
+<!ELEMENT ID2S-Seq-annot-Info_name (#PCDATA)>
+
+<!ELEMENT ID2S-Seq-annot-Info_align EMPTY>
+
+<!ELEMENT ID2S-Seq-annot-Info_graph EMPTY>
+
+<!ELEMENT ID2S-Seq-annot-Info_feat (ID2S-Feat-type-Info*)>
+<!--
+ ID2S-Seq-loc is used to represent unordered and unstranded
+ set of intervals on set of sequences.
+ It's optimized for compact encoding of several common cases:
+    Seq-ids of type gi,
+    intervals covering whole sequences,
+    whole sequences with sequential gis,
+    set of intervals on the same sequence (Seq-id sharing).
+-->
+<!ELEMENT ID2S-Seq-annot-Info_seq-loc (ID2S-Seq-loc)>
+
+
+<!ELEMENT ID2S-Seq-annot-place-Info (
+        ID2S-Seq-annot-place-Info_name?, 
+        ID2S-Seq-annot-place-Info_bioseqs?, 
+        ID2S-Seq-annot-place-Info_bioseq-sets?)>
+
+<!ELEMENT ID2S-Seq-annot-place-Info_name (#PCDATA)>
+
+<!ELEMENT ID2S-Seq-annot-place-Info_bioseqs (ID2S-Bioseq-Ids)>
+
+<!ELEMENT ID2S-Seq-annot-place-Info_bioseq-sets (ID2S-Bioseq-set-Ids)>
+
+
+<!ELEMENT ID2S-Seq-feat-Ids-Info (
+        ID2S-Seq-feat-Ids-Info_feat-types?, 
+        ID2S-Seq-feat-Ids-Info_xref-types?, 
+        ID2S-Seq-feat-Ids-Info_local-ids?, 
+        ID2S-Seq-feat-Ids-Info_local-str-ids?)>
+
+<!ELEMENT ID2S-Seq-feat-Ids-Info_feat-types (ID2S-Feat-type-Info*)>
+
+<!ELEMENT ID2S-Seq-feat-Ids-Info_xref-types (ID2S-Feat-type-Info*)>
+
+<!ELEMENT ID2S-Seq-feat-Ids-Info_local-ids (ID2S-Seq-feat-Ids-Info_local-ids_E*)>
+
+
+<!ELEMENT ID2S-Seq-feat-Ids-Info_local-ids_E (%INTEGER;)>
+
+<!ELEMENT ID2S-Seq-feat-Ids-Info_local-str-ids (ID2S-Seq-feat-Ids-Info_local-str-ids_E*)>
+
+
+<!ELEMENT ID2S-Seq-feat-Ids-Info_local-str-ids_E (#PCDATA)>
+
+
+<!ELEMENT ID2S-Feat-type-Info (
+        ID2S-Feat-type-Info_type, 
+        ID2S-Feat-type-Info_subtypes?)>
+
+<!ELEMENT ID2S-Feat-type-Info_type (%INTEGER;)>
+
+<!ELEMENT ID2S-Feat-type-Info_subtypes (ID2S-Feat-type-Info_subtypes_E*)>
+
+
+<!ELEMENT ID2S-Feat-type-Info_subtypes_E (%INTEGER;)>
+
+
+<!ELEMENT ID2S-Seq-assembly-Info (
+        ID2S-Seq-assembly-Info_bioseqs)>
+
+<!ELEMENT ID2S-Seq-assembly-Info_bioseqs (ID2S-Bioseq-Ids)>
+
+
+<!ELEMENT ID2S-Seq-map-Info (ID2S-Seq-loc)>
+
+
+<!ELEMENT ID2S-Seq-data-Info (ID2S-Seq-loc)>
+
+
+<!ELEMENT ID2S-Bioseq-place-Info (
+        ID2S-Bioseq-place-Info_bioseq-set, 
+        ID2S-Bioseq-place-Info_seq-ids)>
+
+<!ELEMENT ID2S-Bioseq-place-Info_bioseq-set (%INTEGER;)>
+
+<!ELEMENT ID2S-Bioseq-place-Info_seq-ids (ID2S-Bioseq-Ids)>
+
+
+<!ELEMENT ID2S-Chunk (
+        ID2S-Chunk_data)>
+
+<!ELEMENT ID2S-Chunk_data (ID2S-Chunk-Data*)>
+
+
+<!ELEMENT ID2S-Chunk-Data (
+        ID2S-Chunk-Data_id, 
+        ID2S-Chunk-Data_descr?, 
+        ID2S-Chunk-Data_annots?, 
+        ID2S-Chunk-Data_assembly?, 
+        ID2S-Chunk-Data_seq-map?, 
+        ID2S-Chunk-Data_seq-data?, 
+        ID2S-Chunk-Data_bioseqs?)>
+<!-- place of data to insert -->
+<!ELEMENT ID2S-Chunk-Data_id (
+        ID2S-Chunk-Data_id_bioseq-set | 
+        ID2S-Chunk-Data_id_gi | 
+        ID2S-Chunk-Data_id_seq-id)>
+
+<!-- Bioseq-set id -->
+<!ELEMENT ID2S-Chunk-Data_id_bioseq-set (%INTEGER;)>
+
+<!-- Bioseq id -->
+<!ELEMENT ID2S-Chunk-Data_id_gi (%INTEGER;)>
+
+<!-- Bioseq id -->
+<!ELEMENT ID2S-Chunk-Data_id_seq-id (Seq-id)>
+
+<!-- Seq-descr, for Bioseq and Bioseq-set -->
+<!ELEMENT ID2S-Chunk-Data_descr (Seq-descr)>
+
+<!-- Seq-annot, for Bioseq and Bioseq-set -->
+<!ELEMENT ID2S-Chunk-Data_annots (Seq-annot*)>
+
+<!-- assembly history Seq-align, for Bioseq -->
+<!ELEMENT ID2S-Chunk-Data_assembly (Seq-align*)>
+
+<!-- sequence map, for Bioseq -->
+<!ELEMENT ID2S-Chunk-Data_seq-map (ID2S-Sequence-Piece*)>
+
+<!-- sequence data, for Bioseq -->
+<!ELEMENT ID2S-Chunk-Data_seq-data (ID2S-Sequence-Piece*)>
+
+<!-- Bioseq, for Bioseq-set -->
+<!ELEMENT ID2S-Chunk-Data_bioseqs (Bioseq*)>
+
+
+<!ELEMENT ID2S-Sequence-Piece (
+        ID2S-Sequence-Piece_start, 
+        ID2S-Sequence-Piece_data)>
+
+<!-- start position on sequence -->
+<!ELEMENT ID2S-Sequence-Piece_start (%INTEGER;)>
+
+<!ELEMENT ID2S-Sequence-Piece_data (Seq-literal*)>
+
+<!--
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ utility types
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-->
+<!ELEMENT ID2S-Chunk-Id (%INTEGER;)>
+
+
+<!ELEMENT ID2S-Bioseq-set-Ids (ID2S-Bioseq-set-Ids_E*)>
+
+
+
+<!ELEMENT ID2S-Bioseq-set-Ids_E (%INTEGER;)>
+
+
+<!ELEMENT ID2S-Bioseq-Ids (ID2S-Bioseq-Ids_E*)>
+
+
+
+<!ELEMENT ID2S-Bioseq-Ids_E (
+        ID2S-Bioseq-Ids_E_gi | 
+        ID2S-Bioseq-Ids_E_seq-id | 
+        ID2S-Bioseq-Ids_E_gi-range)>
+
+<!ELEMENT ID2S-Bioseq-Ids_E_gi (%INTEGER;)>
+
+<!ELEMENT ID2S-Bioseq-Ids_E_seq-id (Seq-id)>
+
+<!ELEMENT ID2S-Bioseq-Ids_E_gi-range (ID2S-Gi-Range)>
+
+
+<!ELEMENT ID2S-Gi-Range (
+        ID2S-Gi-Range_start, 
+        ID2S-Gi-Range_count?)>
+
+<!-- start gi in this gi range -->
+<!ELEMENT ID2S-Gi-Range_start (%INTEGER;)>
+
+<!-- number of sequential gis -->
+<!ELEMENT ID2S-Gi-Range_count (%INTEGER;)>
+
+<!--
+ ID2S-Seq-loc is used to represent unordered and unstranded
+ set of intervals on set of sequences.
+ It's optimized for compact encoding of several common cases:
+    Seq-ids of type gi,
+    intervals covering whole sequences,
+    whole sequences with sequential gis,
+    set of intervals on the same sequence (Seq-id sharing).
+-->
+<!ELEMENT ID2S-Seq-loc (
+        ID2S-Seq-loc_whole-gi | 
+        ID2S-Seq-loc_whole-seq-id | 
+        ID2S-Seq-loc_whole-gi-range | 
+        ID2S-Seq-loc_gi-interval | 
+        ID2S-Seq-loc_seq-id-interval | 
+        ID2S-Seq-loc_gi-ints | 
+        ID2S-Seq-loc_seq-id-ints | 
+        ID2S-Seq-loc_loc-set)>
+
+<!-- whole sequence by gi -->
+<!ELEMENT ID2S-Seq-loc_whole-gi (%INTEGER;)>
+
+<!-- whole sequence by Seq-id -->
+<!ELEMENT ID2S-Seq-loc_whole-seq-id (Seq-id)>
+
+<!-- set of whole sequences by gis -->
+<!ELEMENT ID2S-Seq-loc_whole-gi-range (ID2S-Gi-Range)>
+
+<!-- interval on sequence by gi -->
+<!ELEMENT ID2S-Seq-loc_gi-interval (ID2S-Gi-Interval)>
+
+<!-- interval on sequence by Seq-id -->
+<!ELEMENT ID2S-Seq-loc_seq-id-interval (ID2S-Seq-id-Interval)>
+
+<!-- set of intervals on the same gi -->
+<!ELEMENT ID2S-Seq-loc_gi-ints (ID2S-Gi-Ints)>
+
+<!-- set of intervals on the same id -->
+<!ELEMENT ID2S-Seq-loc_seq-id-ints (ID2S-Seq-id-Ints)>
+
+<!-- combination of locations -->
+<!ELEMENT ID2S-Seq-loc_loc-set (ID2S-Seq-loc*)>
+
+
+<!ELEMENT ID2S-Gi-Interval (
+        ID2S-Gi-Interval_gi, 
+        ID2S-Gi-Interval_start, 
+        ID2S-Gi-Interval_length?)>
+
+<!ELEMENT ID2S-Gi-Interval_gi (%INTEGER;)>
+
+<!ELEMENT ID2S-Gi-Interval_start (%INTEGER;)>
+
+<!ELEMENT ID2S-Gi-Interval_length (%INTEGER;)>
+
+
+<!ELEMENT ID2S-Seq-id-Interval (
+        ID2S-Seq-id-Interval_seq-id, 
+        ID2S-Seq-id-Interval_start, 
+        ID2S-Seq-id-Interval_length?)>
+
+<!ELEMENT ID2S-Seq-id-Interval_seq-id (Seq-id)>
+
+<!ELEMENT ID2S-Seq-id-Interval_start (%INTEGER;)>
+
+<!ELEMENT ID2S-Seq-id-Interval_length (%INTEGER;)>
+
+
+<!ELEMENT ID2S-Interval (
+        ID2S-Interval_start, 
+        ID2S-Interval_length?)>
+
+<!ELEMENT ID2S-Interval_start (%INTEGER;)>
+
+<!ELEMENT ID2S-Interval_length (%INTEGER;)>
+
+
+<!ELEMENT ID2S-Gi-Ints (
+        ID2S-Gi-Ints_gi, 
+        ID2S-Gi-Ints_ints)>
+
+<!ELEMENT ID2S-Gi-Ints_gi (%INTEGER;)>
+
+<!ELEMENT ID2S-Gi-Ints_ints (ID2S-Interval*)>
+
+
+<!ELEMENT ID2S-Seq-id-Ints (
+        ID2S-Seq-id-Ints_seq-id, 
+        ID2S-Seq-id-Ints_ints)>
+
+<!ELEMENT ID2S-Seq-id-Ints_seq-id (Seq-id)>
+
+<!ELEMENT ID2S-Seq-id-Ints_ints (ID2S-Interval*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Seqalign.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Seqalign.dtd
new file mode 100644
index 0000000..08e37eb
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Seqalign.dtd
@@ -0,0 +1,86 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqalign.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Seqalign.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Seqalign.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Seqalign.mod.dtd
new file mode 100644
index 0000000..721a351
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Seqalign.mod.dtd
@@ -0,0 +1,570 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqalign.asn"
+     ::DATATOOL:: by application DATATOOL version 2.4.4
+     ::DATATOOL:: on 08/01/2012 23:04:42
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Seqalign"
+================================================= -->
+
+<!--
+$Revision: 370567 $
+**********************************************************************
+
+  NCBI Sequence Alignment elements
+  by James Ostell, 1990
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Seq-align,
+          Score,
+          Score-set,
+          Seq-align-set -->
+
+<!-- Elements referenced from other modules:
+          Seq-id,
+          Seq-loc,
+          Na-strand FROM NCBI-Seqloc,
+          User-object,
+          Object-id FROM NCBI-General -->
+<!-- ============================================ -->
+
+<!--
+*** Sequence Alignment ********************************
+*
+-->
+<!ELEMENT Seq-align-set (Seq-align*)>
+
+
+<!ELEMENT Seq-align (
+        Seq-align_type, 
+        Seq-align_dim?, 
+        Seq-align_score?, 
+        Seq-align_segs, 
+        Seq-align_bounds?, 
+        Seq-align_id?, 
+        Seq-align_ext?)>
+
+<!ELEMENT Seq-align_type %ENUM;>
+
+<!--
+    diags	-  unbroken, but not ordered, diagonals
+    partial	-  mapping pieces together
+    disc	-  discontinuous alignment
+-->
+<!ATTLIST Seq-align_type value (
+        not-set |
+        global |
+        diags |
+        partial |
+        disc |
+        other
+        ) #REQUIRED >
+
+
+<!-- dimensionality -->
+<!ELEMENT Seq-align_dim (%INTEGER;)>
+
+<!-- for whole alignment -->
+<!ELEMENT Seq-align_score (Score*)>
+<!-- alignment data -->
+<!ELEMENT Seq-align_segs (
+        Seq-align_segs_dendiag | 
+        Seq-align_segs_denseg | 
+        Seq-align_segs_std | 
+        Seq-align_segs_packed | 
+        Seq-align_segs_disc | 
+        Seq-align_segs_spliced | 
+        Seq-align_segs_sparse)>
+
+<!ELEMENT Seq-align_segs_dendiag (Dense-diag*)>
+<!--
+ Dense-seg: the densist packing for sequence alignments only.
+            a start of -1 indicates a gap for that sequence of
+            length lens.
+
+ id=100  AAGGCCTTTTAGAGATGATGATGATGATGA
+ id=200  AAGGCCTTTTAG.......GATGATGATGA
+ id=300  ....CCTTTTAGAGATGATGAT....ATGA
+
+ dim = 3, numseg = 6, ids = { 100, 200, 300 }
+ starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
+ lens = { 4, 8, 7, 3, 4, 4 }
+
+ for (multiway) global or partial alignments
+-->
+<!ELEMENT Seq-align_segs_denseg (Dense-seg)>
+
+<!ELEMENT Seq-align_segs_std (Std-seg*)>
+<!-- for (multiway) global or partial alignments -->
+<!ELEMENT Seq-align_segs_packed (Packed-seg)>
+<!--
+*** Sequence Alignment ********************************
+*
+-->
+<!ELEMENT Seq-align_segs_disc (Seq-align-set)>
+
+<!ELEMENT Seq-align_segs_spliced (Spliced-seg)>
+<!--
+ ==========================================================================
+
+ Sparse-seg follows the semantics of dense-seg and is more optimal for
+ representing sparse multiple alignments
+
+ ==========================================================================
+-->
+<!ELEMENT Seq-align_segs_sparse (Sparse-seg)>
+
+<!--
+ regions of sequence over which align
+  was computed
+-->
+<!ELEMENT Seq-align_bounds (Seq-loc*)>
+
+<!-- alignment id -->
+<!ELEMENT Seq-align_id (Object-id*)>
+
+<!--extra info -->
+<!ELEMENT Seq-align_ext (User-object*)>
+
+<!-- for (multiway) diagonals -->
+<!ELEMENT Dense-diag (
+        Dense-diag_dim?, 
+        Dense-diag_ids, 
+        Dense-diag_starts, 
+        Dense-diag_len, 
+        Dense-diag_strands?, 
+        Dense-diag_scores?)>
+
+<!-- dimensionality -->
+<!ELEMENT Dense-diag_dim (%INTEGER;)>
+
+<!-- sequences in order -->
+<!ELEMENT Dense-diag_ids (Seq-id*)>
+
+<!-- start OFFSETS in ids order -->
+<!ELEMENT Dense-diag_starts (Dense-diag_starts_E*)>
+
+
+<!ELEMENT Dense-diag_starts_E (%INTEGER;)>
+
+<!-- len of aligned segments -->
+<!ELEMENT Dense-diag_len (%INTEGER;)>
+
+<!ELEMENT Dense-diag_strands (Na-strand*)>
+
+<!ELEMENT Dense-diag_scores (Score*)>
+
+<!--
+ Dense-seg: the densist packing for sequence alignments only.
+            a start of -1 indicates a gap for that sequence of
+            length lens.
+
+ id=100  AAGGCCTTTTAGAGATGATGATGATGATGA
+ id=200  AAGGCCTTTTAG.......GATGATGATGA
+ id=300  ....CCTTTTAGAGATGATGAT....ATGA
+
+ dim = 3, numseg = 6, ids = { 100, 200, 300 }
+ starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
+ lens = { 4, 8, 7, 3, 4, 4 }
+
+ for (multiway) global or partial alignments
+-->
+<!ELEMENT Dense-seg (
+        Dense-seg_dim?, 
+        Dense-seg_numseg, 
+        Dense-seg_ids, 
+        Dense-seg_starts, 
+        Dense-seg_lens, 
+        Dense-seg_strands?, 
+        Dense-seg_scores?)>
+
+<!-- dimensionality -->
+<!ELEMENT Dense-seg_dim (%INTEGER;)>
+
+<!-- number of segments here -->
+<!ELEMENT Dense-seg_numseg (%INTEGER;)>
+
+<!-- sequences in order -->
+<!ELEMENT Dense-seg_ids (Seq-id*)>
+
+<!-- start OFFSETS in ids order within segs -->
+<!ELEMENT Dense-seg_starts (Dense-seg_starts_E*)>
+
+
+<!ELEMENT Dense-seg_starts_E (%INTEGER;)>
+
+<!-- lengths in ids order within segs -->
+<!ELEMENT Dense-seg_lens (Dense-seg_lens_E*)>
+
+
+<!ELEMENT Dense-seg_lens_E (%INTEGER;)>
+
+<!ELEMENT Dense-seg_strands (Na-strand*)>
+
+<!-- score for each seg -->
+<!ELEMENT Dense-seg_scores (Score*)>
+
+<!-- for (multiway) global or partial alignments -->
+<!ELEMENT Packed-seg (
+        Packed-seg_dim?, 
+        Packed-seg_numseg, 
+        Packed-seg_ids, 
+        Packed-seg_starts, 
+        Packed-seg_present, 
+        Packed-seg_lens, 
+        Packed-seg_strands?, 
+        Packed-seg_scores?)>
+
+<!-- dimensionality -->
+<!ELEMENT Packed-seg_dim (%INTEGER;)>
+
+<!-- number of segments here -->
+<!ELEMENT Packed-seg_numseg (%INTEGER;)>
+
+<!-- sequences in order -->
+<!ELEMENT Packed-seg_ids (Seq-id*)>
+
+<!-- start OFFSETS in ids order for whole alignment -->
+<!ELEMENT Packed-seg_starts (Packed-seg_starts_E*)>
+
+
+<!ELEMENT Packed-seg_starts_E (%INTEGER;)>
+
+<!--
+ Boolean if each sequence present or absent in
+   each segment
+-->
+<!ELEMENT Packed-seg_present (%OCTETS;)>
+
+<!-- length of each segment -->
+<!ELEMENT Packed-seg_lens (Packed-seg_lens_E*)>
+
+
+<!ELEMENT Packed-seg_lens_E (%INTEGER;)>
+
+<!ELEMENT Packed-seg_strands (Na-strand*)>
+
+<!-- score for each segment -->
+<!ELEMENT Packed-seg_scores (Score*)>
+
+
+<!ELEMENT Std-seg (
+        Std-seg_dim?, 
+        Std-seg_ids?, 
+        Std-seg_loc, 
+        Std-seg_scores?)>
+
+<!-- dimensionality -->
+<!ELEMENT Std-seg_dim (%INTEGER;)>
+
+<!ELEMENT Std-seg_ids (Seq-id*)>
+
+<!ELEMENT Std-seg_loc (Seq-loc*)>
+
+<!ELEMENT Std-seg_scores (Score*)>
+
+
+<!ELEMENT Spliced-seg (
+        Spliced-seg_product-id?, 
+        Spliced-seg_genomic-id?, 
+        Spliced-seg_product-strand?, 
+        Spliced-seg_genomic-strand?, 
+        Spliced-seg_product-type, 
+        Spliced-seg_exons, 
+        Spliced-seg_poly-a?, 
+        Spliced-seg_product-length?, 
+        Spliced-seg_modifiers?)>
+
+<!-- product is either protein or transcript (cDNA) -->
+<!ELEMENT Spliced-seg_product-id (Seq-id)>
+
+<!ELEMENT Spliced-seg_genomic-id (Seq-id)>
+
+<!-- should be 'plus' or 'minus' -->
+<!ELEMENT Spliced-seg_product-strand (Na-strand)>
+
+<!ELEMENT Spliced-seg_genomic-strand (Na-strand)>
+
+<!ELEMENT Spliced-seg_product-type %ENUM;>
+<!ATTLIST Spliced-seg_product-type value (
+        transcript |
+        protein
+        ) #REQUIRED >
+
+
+<!--
+ set of segments involved
+ each segment corresponds to one exon
+ exons are always in biological order
+-->
+<!ELEMENT Spliced-seg_exons (Spliced-exon*)>
+
+<!--
+ start of poly(A) tail on the transcript
+ For sense transcripts:
+   aligned product positions < poly-a <= product-length
+   poly-a == product-length indicates inferred poly(A) tail at transcript's end
+ For antisense transcripts:
+   -1 <= poly-a < aligned product positions
+   poly-a == -1 indicates inferred poly(A) tail at transcript's start
+-->
+<!ELEMENT Spliced-seg_poly-a (%INTEGER;)>
+
+<!--
+ length of the product, in bases/residues
+ from this (or from poly-a if present), a 3' unaligned length can be extracted
+-->
+<!ELEMENT Spliced-seg_product-length (%INTEGER;)>
+
+<!--
+ alignment descriptors / modifiers
+ this provides us a set for extension
+-->
+<!ELEMENT Spliced-seg_modifiers (Spliced-seg-modifier*)>
+
+
+<!ELEMENT Spliced-seg-modifier (
+        Spliced-seg-modifier_start-codon-found | 
+        Spliced-seg-modifier_stop-codon-found)>
+
+<!--
+ protein aligns from the start and the first codon 
+ on both product and genomic is start codon
+-->
+<!ELEMENT Spliced-seg-modifier_start-codon-found EMPTY>
+<!ATTLIST Spliced-seg-modifier_start-codon-found value ( true | false ) #REQUIRED >
+
+
+<!--
+ protein aligns to it's end and there is stop codon 
+ on the genomic right after the alignment
+-->
+<!ELEMENT Spliced-seg-modifier_stop-codon-found EMPTY>
+<!ATTLIST Spliced-seg-modifier_stop-codon-found value ( true | false ) #REQUIRED >
+
+
+<!--
+ complete or partial exon
+ two consecutive Spliced-exons may belong to one exon
+-->
+<!ELEMENT Spliced-exon (
+        Spliced-exon_product-start, 
+        Spliced-exon_product-end, 
+        Spliced-exon_genomic-start, 
+        Spliced-exon_genomic-end, 
+        Spliced-exon_product-id?, 
+        Spliced-exon_genomic-id?, 
+        Spliced-exon_product-strand?, 
+        Spliced-exon_genomic-strand?, 
+        Spliced-exon_parts?, 
+        Spliced-exon_scores?, 
+        Spliced-exon_acceptor-before-exon?, 
+        Spliced-exon_donor-after-exon?, 
+        Spliced-exon_partial?, 
+        Spliced-exon_ext?)>
+
+<!-- product-end >= product-start -->
+<!ELEMENT Spliced-exon_product-start (Product-pos)>
+
+<!ELEMENT Spliced-exon_product-end (Product-pos)>
+
+<!-- genomic-end >= genomic-start -->
+<!ELEMENT Spliced-exon_genomic-start (%INTEGER;)>
+
+<!ELEMENT Spliced-exon_genomic-end (%INTEGER;)>
+
+<!-- product is either protein or transcript (cDNA) -->
+<!ELEMENT Spliced-exon_product-id (Seq-id)>
+
+<!ELEMENT Spliced-exon_genomic-id (Seq-id)>
+
+<!-- should be 'plus' or 'minus' -->
+<!ELEMENT Spliced-exon_product-strand (Na-strand)>
+
+<!-- genomic-strand represents the strand of translation -->
+<!ELEMENT Spliced-exon_genomic-strand (Na-strand)>
+
+<!-- basic seqments always are in biologic order -->
+<!ELEMENT Spliced-exon_parts (Spliced-exon-chunk*)>
+
+<!-- scores for this exon -->
+<!ELEMENT Spliced-exon_scores (Score-set)>
+
+<!-- splice sites -->
+<!ELEMENT Spliced-exon_acceptor-before-exon (Splice-site)>
+<!-- site involved in splice -->
+<!ELEMENT Spliced-exon_donor-after-exon (Splice-site)>
+
+<!-- flag: is this exon complete or partial? -->
+<!ELEMENT Spliced-exon_partial EMPTY>
+<!ATTLIST Spliced-exon_partial value ( true | false ) #REQUIRED >
+
+
+<!--extra info -->
+<!ELEMENT Spliced-exon_ext (User-object*)>
+
+
+<!ELEMENT Product-pos (
+        Product-pos_nucpos | 
+        Product-pos_protpos)>
+
+<!ELEMENT Product-pos_nucpos (%INTEGER;)>
+<!-- position on protein (1/3 of amino-acid resolution) -->
+<!ELEMENT Product-pos_protpos (Prot-pos)>
+
+<!-- position on protein (1/3 of amino-acid resolution) -->
+<!ELEMENT Prot-pos (
+        Prot-pos_amin, 
+        Prot-pos_frame?)>
+
+<!-- amino-acid position (0-based) -->
+<!ELEMENT Prot-pos_amin (%INTEGER;)>
+
+<!--
+ position within codon (1-based)
+ 0 = not set (meaning 1)
+-->
+<!ELEMENT Prot-pos_frame (%INTEGER;)>
+
+<!--
+ Spliced-exon-chunk: piece of an exon
+ lengths are given in nucleotide bases (1/3 of aminoacid when product is a
+ protein)
+-->
+<!ELEMENT Spliced-exon-chunk (
+        Spliced-exon-chunk_match | 
+        Spliced-exon-chunk_mismatch | 
+        Spliced-exon-chunk_diag | 
+        Spliced-exon-chunk_product-ins | 
+        Spliced-exon-chunk_genomic-ins)>
+
+<!-- both sequences represented, product and genomic sequences match -->
+<!ELEMENT Spliced-exon-chunk_match (%INTEGER;)>
+
+<!-- both sequences represented, product and genomic sequences do not match -->
+<!ELEMENT Spliced-exon-chunk_mismatch (%INTEGER;)>
+
+<!--
+ both sequences are represented, there is sufficient similarity 
+ between product and genomic sequences. Can be used to replace stretches
+ of matches and mismatches, mostly for protein to genomic where 
+ definition of match or mismatch depends on translation table
+-->
+<!ELEMENT Spliced-exon-chunk_diag (%INTEGER;)>
+
+<!-- insertion in product sequence (i.e. gap in the genomic sequence) -->
+<!ELEMENT Spliced-exon-chunk_product-ins (%INTEGER;)>
+
+<!-- insertion in genomic sequence (i.e. gap in the product sequence) -->
+<!ELEMENT Spliced-exon-chunk_genomic-ins (%INTEGER;)>
+
+<!-- site involved in splice -->
+<!ELEMENT Splice-site (
+        Splice-site_bases)>
+
+<!--
+ typically two bases in the intronic region, always
+ in IUPAC format
+-->
+<!ELEMENT Splice-site_bases (#PCDATA)>
+
+<!--
+ ==========================================================================
+
+ Sparse-seg follows the semantics of dense-seg and is more optimal for
+ representing sparse multiple alignments
+
+ ==========================================================================
+-->
+<!ELEMENT Sparse-seg (
+        Sparse-seg_master-id?, 
+        Sparse-seg_rows, 
+        Sparse-seg_row-scores?, 
+        Sparse-seg_ext?)>
+
+<!ELEMENT Sparse-seg_master-id (Seq-id)>
+
+<!-- pairwise alignments constituting this multiple alignment -->
+<!ELEMENT Sparse-seg_rows (Sparse-align*)>
+
+<!-- per-row scores -->
+<!ELEMENT Sparse-seg_row-scores (Score*)>
+
+<!-- index of extra items -->
+<!ELEMENT Sparse-seg_ext (Sparse-seg-ext*)>
+
+
+<!ELEMENT Sparse-align (
+        Sparse-align_first-id, 
+        Sparse-align_second-id, 
+        Sparse-align_numseg, 
+        Sparse-align_first-starts, 
+        Sparse-align_second-starts, 
+        Sparse-align_lens, 
+        Sparse-align_second-strands?, 
+        Sparse-align_seg-scores?)>
+
+<!ELEMENT Sparse-align_first-id (Seq-id)>
+
+<!ELEMENT Sparse-align_second-id (Seq-id)>
+
+<!--number of segments -->
+<!ELEMENT Sparse-align_numseg (%INTEGER;)>
+
+<!--starts on the first sequence [numseg] -->
+<!ELEMENT Sparse-align_first-starts (Sparse-align_first-starts_E*)>
+
+
+<!ELEMENT Sparse-align_first-starts_E (%INTEGER;)>
+
+<!--starts on the second sequence [numseg] -->
+<!ELEMENT Sparse-align_second-starts (Sparse-align_second-starts_E*)>
+
+
+<!ELEMENT Sparse-align_second-starts_E (%INTEGER;)>
+
+<!--lengths of segments [numseg] -->
+<!ELEMENT Sparse-align_lens (Sparse-align_lens_E*)>
+
+
+<!ELEMENT Sparse-align_lens_E (%INTEGER;)>
+
+<!ELEMENT Sparse-align_second-strands (Na-strand*)>
+
+<!-- per-segment scores -->
+<!ELEMENT Sparse-align_seg-scores (Score*)>
+
+
+<!ELEMENT Sparse-seg-ext (
+        Sparse-seg-ext_index)>
+
+<!--
+seg-ext SET OF {
+    index INTEGER,
+    data User-field
+ }
+-->
+<!ELEMENT Sparse-seg-ext_index (%INTEGER;)>
+
+<!-- use of Score is discouraged for external ASN.1 specifications -->
+<!ELEMENT Score (
+        Score_id?, 
+        Score_value)>
+
+<!ELEMENT Score_id (Object-id)>
+
+<!ELEMENT Score_value (
+        Score_value_real | 
+        Score_value_int)>
+
+<!ELEMENT Score_value_real (%REAL;)>
+
+<!ELEMENT Score_value_int (%INTEGER;)>
+
+<!-- use of Score-set is encouraged for external ASN.1 specifications -->
+<!ELEMENT Score-set (Score*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Seqfeat.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Seqfeat.dtd
new file mode 100644
index 0000000..48af83f
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Seqfeat.dtd
@@ -0,0 +1,86 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Seqfeat.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Seqfeat.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Seqfeat.mod.dtd
new file mode 100644
index 0000000..fb7dc75
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Seqfeat.mod.dtd
@@ -0,0 +1,772 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 2.4.4
+     ::DATATOOL:: on 09/28/2012 23:04:43
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Seqfeat"
+================================================= -->
+
+<!--
+$Revision: 376374 $
+**********************************************************************
+
+  NCBI Sequence Feature elements
+  by James Ostell, 1990
+  Version 3.0 - June 1994
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Seq-feat,
+          Feat-id,
+          Genetic-code,
+          ModelEvidenceSupport -->
+
+<!-- Elements referenced from other modules:
+          Gene-ref FROM NCBI-Gene,
+          Prot-ref FROM NCBI-Protein,
+          Org-ref FROM NCBI-Organism,
+          Variation-ref FROM NCBI-Variation,
+          BioSource FROM NCBI-BioSource,
+          RNA-ref FROM NCBI-RNA,
+          Seq-id,
+          Seq-loc,
+          Giimport-id FROM NCBI-Seqloc,
+          Pubdesc,
+          Numbering,
+          Heterogen FROM NCBI-Sequence,
+          Rsite-ref FROM NCBI-Rsite,
+          Txinit FROM NCBI-TxInit,
+          DOI,
+          PubMedId FROM NCBI-Biblio,
+          Pub-set FROM NCBI-Pub,
+          Object-id,
+          Dbtag,
+          User-object FROM NCBI-General -->
+<!-- ============================================ -->
+
+<!--
+*** Feature identifiers ********************************
+*
+-->
+<!ELEMENT Feat-id (
+        Feat-id_gibb | 
+        Feat-id_giim | 
+        Feat-id_local | 
+        Feat-id_general)>
+
+<!-- geninfo backbone -->
+<!ELEMENT Feat-id_gibb (%INTEGER;)>
+
+<!-- geninfo import -->
+<!ELEMENT Feat-id_giim (Giimport-id)>
+
+<!-- for local software use -->
+<!ELEMENT Feat-id_local (Object-id)>
+
+<!-- for use by various databases -->
+<!ELEMENT Feat-id_general (Dbtag)>
+
+<!--
+*** Seq-feat *******************************************
+*  sequence feature generalization
+-->
+<!ELEMENT Seq-feat (
+        Seq-feat_id?, 
+        Seq-feat_data, 
+        Seq-feat_partial?, 
+        Seq-feat_except?, 
+        Seq-feat_comment?, 
+        Seq-feat_product?, 
+        Seq-feat_location, 
+        Seq-feat_qual?, 
+        Seq-feat_title?, 
+        Seq-feat_ext?, 
+        Seq-feat_cit?, 
+        Seq-feat_exp-ev?, 
+        Seq-feat_xref?, 
+        Seq-feat_dbxref?, 
+        Seq-feat_pseudo?, 
+        Seq-feat_except-text?, 
+        Seq-feat_ids?, 
+        Seq-feat_exts?, 
+        Seq-feat_support?)>
+<!--
+*** Feature identifiers ********************************
+*
+-->
+<!ELEMENT Seq-feat_id (Feat-id)>
+
+<!-- the specific data -->
+<!ELEMENT Seq-feat_data (SeqFeatData)>
+
+<!-- incomplete in some way? -->
+<!ELEMENT Seq-feat_partial EMPTY>
+<!ATTLIST Seq-feat_partial value ( true | false ) #REQUIRED >
+
+
+<!-- something funny about this? -->
+<!ELEMENT Seq-feat_except EMPTY>
+<!ATTLIST Seq-feat_except value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Seq-feat_comment (#PCDATA)>
+
+<!-- product of process -->
+<!ELEMENT Seq-feat_product (Seq-loc)>
+
+<!-- feature made from -->
+<!ELEMENT Seq-feat_location (Seq-loc)>
+
+<!-- qualifiers -->
+<!ELEMENT Seq-feat_qual (Gb-qual*)>
+
+<!-- for user defined label -->
+<!ELEMENT Seq-feat_title (#PCDATA)>
+
+<!-- user defined structure extension -->
+<!ELEMENT Seq-feat_ext (User-object)>
+
+<!-- citations for this feature -->
+<!ELEMENT Seq-feat_cit (Pub-set)>
+<!-- evidence for existence of feature -->
+<!ELEMENT Seq-feat_exp-ev %ENUM;>
+
+<!--
+    experimental	-  any reasonable experimental check
+    not-experimental	-  similarity, pattern, etc
+-->
+<!ATTLIST Seq-feat_exp-ev value (
+        experimental |
+        not-experimental
+        ) #REQUIRED >
+
+
+<!-- cite other relevant features -->
+<!ELEMENT Seq-feat_xref (SeqFeatXref*)>
+
+<!-- support for xref to other databases -->
+<!ELEMENT Seq-feat_dbxref (Dbtag*)>
+
+<!-- annotated on pseudogene? -->
+<!ELEMENT Seq-feat_pseudo EMPTY>
+<!ATTLIST Seq-feat_pseudo value ( true | false ) #REQUIRED >
+
+
+<!-- explain if except=TRUE -->
+<!ELEMENT Seq-feat_except-text (#PCDATA)>
+
+<!-- set of Ids; will replace 'id' field -->
+<!ELEMENT Seq-feat_ids (Feat-id*)>
+
+<!-- set of extensions; will replace 'ext' field -->
+<!ELEMENT Seq-feat_exts (User-object*)>
+
+<!-- will replace /experiment, /inference, model-evidence -->
+<!ELEMENT Seq-feat_support (SeqFeatSupport)>
+
+
+<!ELEMENT SeqFeatData (
+        SeqFeatData_gene | 
+        SeqFeatData_org | 
+        SeqFeatData_cdregion | 
+        SeqFeatData_prot | 
+        SeqFeatData_rna | 
+        SeqFeatData_pub | 
+        SeqFeatData_seq | 
+        SeqFeatData_imp | 
+        SeqFeatData_region | 
+        SeqFeatData_comment | 
+        SeqFeatData_bond | 
+        SeqFeatData_site | 
+        SeqFeatData_rsite | 
+        SeqFeatData_user | 
+        SeqFeatData_txinit | 
+        SeqFeatData_num | 
+        SeqFeatData_psec-str | 
+        SeqFeatData_non-std-residue | 
+        SeqFeatData_het | 
+        SeqFeatData_biosrc | 
+        SeqFeatData_clone | 
+        SeqFeatData_variation)>
+
+<!ELEMENT SeqFeatData_gene (Gene-ref)>
+
+<!ELEMENT SeqFeatData_org (Org-ref)>
+<!--
+*** CdRegion ***********************************************
+*
+*  Instructions to translate from a nucleic acid to a peptide
+*    conflict means it's supposed to translate but doesn't
+*
+-->
+<!ELEMENT SeqFeatData_cdregion (Cdregion)>
+
+<!ELEMENT SeqFeatData_prot (Prot-ref)>
+
+<!ELEMENT SeqFeatData_rna (RNA-ref)>
+
+<!-- publication applies to this seq -->
+<!ELEMENT SeqFeatData_pub (Pubdesc)>
+
+<!-- to annotate origin from another seq -->
+<!ELEMENT SeqFeatData_seq (Seq-loc)>
+<!--
+*** Import ***********************************************
+*
+*  Features imported from other databases
+*
+-->
+<!ELEMENT SeqFeatData_imp (Imp-feat)>
+
+<!-- named region (globin locus) -->
+<!ELEMENT SeqFeatData_region (#PCDATA)>
+
+<!-- just a comment -->
+<!ELEMENT SeqFeatData_comment EMPTY>
+
+<!ELEMENT SeqFeatData_bond %ENUM;>
+<!ATTLIST SeqFeatData_bond value (
+        disulfide |
+        thiolester |
+        xlink |
+        thioether |
+        other
+        ) #REQUIRED >
+
+
+<!ELEMENT SeqFeatData_site %ENUM;>
+<!ATTLIST SeqFeatData_site value (
+        active |
+        binding |
+        cleavage |
+        inhibit |
+        modified |
+        glycosylation |
+        myristoylation |
+        mutagenized |
+        metal-binding |
+        phosphorylation |
+        acetylation |
+        amidation |
+        methylation |
+        hydroxylation |
+        sulfatation |
+        oxidative-deamination |
+        pyrrolidone-carboxylic-acid |
+        gamma-carboxyglutamic-acid |
+        blocked |
+        lipid-binding |
+        np-binding |
+        dna-binding |
+        signal-peptide |
+        transit-peptide |
+        transmembrane-region |
+        nitrosylation |
+        other
+        ) #REQUIRED >
+
+
+<!-- restriction site  (for maps really) -->
+<!ELEMENT SeqFeatData_rsite (Rsite-ref)>
+
+<!-- user defined structure -->
+<!ELEMENT SeqFeatData_user (User-object)>
+
+<!-- transcription initiation -->
+<!ELEMENT SeqFeatData_txinit (Txinit)>
+
+<!-- a numbering system -->
+<!ELEMENT SeqFeatData_num (Numbering)>
+<!-- protein secondary structure -->
+<!ELEMENT SeqFeatData_psec-str %ENUM;>
+
+<!--
+    helix	-  any helix
+    sheet	-  beta sheet
+    turn	-  beta or gamma turn
+-->
+<!ATTLIST SeqFeatData_psec-str value (
+        helix |
+        sheet |
+        turn
+        ) #REQUIRED >
+
+
+<!-- non-standard residue here in seq -->
+<!ELEMENT SeqFeatData_non-std-residue (#PCDATA)>
+
+<!-- cofactor, prosthetic grp, etc, bound to seq -->
+<!ELEMENT SeqFeatData_het (Heterogen)>
+
+<!ELEMENT SeqFeatData_biosrc (BioSource)>
+<!--
+*** Clone-ref ***********************************************
+*
+*  Specification of clone features
+*
+-->
+<!ELEMENT SeqFeatData_clone (Clone-ref)>
+
+<!ELEMENT SeqFeatData_variation (Variation-ref)>
+
+<!-- both optional because can have one or both -->
+<!ELEMENT SeqFeatXref (
+        SeqFeatXref_id?, 
+        SeqFeatXref_data?)>
+
+<!-- the feature copied -->
+<!ELEMENT SeqFeatXref_id (Feat-id)>
+
+<!-- the specific data -->
+<!ELEMENT SeqFeatXref_data (SeqFeatData)>
+
+
+<!ELEMENT SeqFeatSupport (
+        SeqFeatSupport_experiment?, 
+        SeqFeatSupport_inference?, 
+        SeqFeatSupport_model-evidence?)>
+
+<!ELEMENT SeqFeatSupport_experiment (ExperimentSupport*)>
+
+<!ELEMENT SeqFeatSupport_inference (InferenceSupport*)>
+
+<!ELEMENT SeqFeatSupport_model-evidence (ModelEvidenceSupport*)>
+
+
+<!ELEMENT EvidenceCategory (%INTEGER;)>
+<!ATTLIST EvidenceCategory value (
+        not-set |
+        coordinates |
+        description |
+        existence
+        ) #IMPLIED >
+
+
+
+<!ELEMENT ExperimentSupport (
+        ExperimentSupport_category?, 
+        ExperimentSupport_explanation, 
+        ExperimentSupport_pmids?, 
+        ExperimentSupport_dois?)>
+
+<!ELEMENT ExperimentSupport_category (EvidenceCategory)>
+
+<!ELEMENT ExperimentSupport_explanation (#PCDATA)>
+
+<!ELEMENT ExperimentSupport_pmids (PubMedId*)>
+
+<!ELEMENT ExperimentSupport_dois (DOI*)>
+
+
+<!ELEMENT Program-id (
+        Program-id_name, 
+        Program-id_version?)>
+
+<!ELEMENT Program-id_name (#PCDATA)>
+
+<!ELEMENT Program-id_version (#PCDATA)>
+
+
+<!ELEMENT EvidenceBasis (
+        EvidenceBasis_programs?, 
+        EvidenceBasis_accessions?)>
+
+<!ELEMENT EvidenceBasis_programs (Program-id*)>
+
+<!ELEMENT EvidenceBasis_accessions (Seq-id*)>
+
+
+<!ELEMENT InferenceSupport (
+        InferenceSupport_category?, 
+        InferenceSupport_type?, 
+        InferenceSupport_other-type?, 
+        InferenceSupport_same-species?, 
+        InferenceSupport_basis, 
+        InferenceSupport_pmids?, 
+        InferenceSupport_dois?)>
+
+<!ELEMENT InferenceSupport_category (EvidenceCategory)>
+
+<!ELEMENT InferenceSupport_type (%INTEGER;)>
+<!ATTLIST InferenceSupport_type value (
+        not-set |
+        similar-to-sequence |
+        similar-to-aa |
+        similar-to-dna |
+        similar-to-rna |
+        similar-to-mrna |
+        similiar-to-est |
+        similar-to-other-rna |
+        profile |
+        nucleotide-motif |
+        protein-motif |
+        ab-initio-prediction |
+        alignment |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT InferenceSupport_other-type (#PCDATA)>
+
+<!ELEMENT InferenceSupport_same-species EMPTY>
+<!ATTLIST InferenceSupport_same-species value ( true | false ) "false" >
+
+
+<!ELEMENT InferenceSupport_basis (EvidenceBasis)>
+
+<!ELEMENT InferenceSupport_pmids (PubMedId*)>
+
+<!ELEMENT InferenceSupport_dois (DOI*)>
+
+
+<!ELEMENT ModelEvidenceItem (
+        ModelEvidenceItem_id, 
+        ModelEvidenceItem_exon-count?, 
+        ModelEvidenceItem_exon-length?, 
+        ModelEvidenceItem_full-length?, 
+        ModelEvidenceItem_supports-all-exon-combo?)>
+
+<!ELEMENT ModelEvidenceItem_id (Seq-id)>
+
+<!ELEMENT ModelEvidenceItem_exon-count (%INTEGER;)>
+
+<!ELEMENT ModelEvidenceItem_exon-length (%INTEGER;)>
+
+<!ELEMENT ModelEvidenceItem_full-length EMPTY>
+<!ATTLIST ModelEvidenceItem_full-length value ( true | false ) "false" >
+
+
+<!ELEMENT ModelEvidenceItem_supports-all-exon-combo EMPTY>
+<!ATTLIST ModelEvidenceItem_supports-all-exon-combo value ( true | false ) "false" >
+
+
+
+<!ELEMENT ModelEvidenceSupport (
+        ModelEvidenceSupport_method?, 
+        ModelEvidenceSupport_mrna?, 
+        ModelEvidenceSupport_est?, 
+        ModelEvidenceSupport_protein?, 
+        ModelEvidenceSupport_identification?, 
+        ModelEvidenceSupport_dbxref?, 
+        ModelEvidenceSupport_exon-count?, 
+        ModelEvidenceSupport_exon-length?, 
+        ModelEvidenceSupport_full-length?, 
+        ModelEvidenceSupport_supports-all-exon-combo?)>
+
+<!ELEMENT ModelEvidenceSupport_method (#PCDATA)>
+
+<!ELEMENT ModelEvidenceSupport_mrna (ModelEvidenceItem*)>
+
+<!ELEMENT ModelEvidenceSupport_est (ModelEvidenceItem*)>
+
+<!ELEMENT ModelEvidenceSupport_protein (ModelEvidenceItem*)>
+
+<!ELEMENT ModelEvidenceSupport_identification (Seq-id)>
+
+<!ELEMENT ModelEvidenceSupport_dbxref (Dbtag*)>
+
+<!ELEMENT ModelEvidenceSupport_exon-count (%INTEGER;)>
+
+<!ELEMENT ModelEvidenceSupport_exon-length (%INTEGER;)>
+
+<!ELEMENT ModelEvidenceSupport_full-length EMPTY>
+<!ATTLIST ModelEvidenceSupport_full-length value ( true | false ) "false" >
+
+
+<!ELEMENT ModelEvidenceSupport_supports-all-exon-combo EMPTY>
+<!ATTLIST ModelEvidenceSupport_supports-all-exon-combo value ( true | false ) "false" >
+
+
+<!--
+*** CdRegion ***********************************************
+*
+*  Instructions to translate from a nucleic acid to a peptide
+*    conflict means it's supposed to translate but doesn't
+*
+-->
+<!ELEMENT Cdregion (
+        Cdregion_orf?, 
+        Cdregion_frame?, 
+        Cdregion_conflict?, 
+        Cdregion_gaps?, 
+        Cdregion_mismatch?, 
+        Cdregion_code?, 
+        Cdregion_code-break?, 
+        Cdregion_stops?)>
+
+<!-- just an ORF ? -->
+<!ELEMENT Cdregion_orf EMPTY>
+<!ATTLIST Cdregion_orf value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Cdregion_frame %ENUM;>
+
+<!--
+    not-set	-  not set, code uses one
+    three	-  reading frame
+-->
+<!ATTLIST Cdregion_frame value (
+        not-set |
+        one |
+        two |
+        three
+        ) #REQUIRED >
+
+
+<!-- conflict -->
+<!ELEMENT Cdregion_conflict EMPTY>
+<!ATTLIST Cdregion_conflict value ( true | false ) #REQUIRED >
+
+
+<!-- number of gaps on conflict/except -->
+<!ELEMENT Cdregion_gaps (%INTEGER;)>
+
+<!-- number of mismatches on above -->
+<!ELEMENT Cdregion_mismatch (%INTEGER;)>
+
+<!-- genetic code used -->
+<!ELEMENT Cdregion_code (Genetic-code)>
+
+<!-- individual exceptions -->
+<!ELEMENT Cdregion_code-break (Code-break*)>
+
+<!-- number of stop codons on above -->
+<!ELEMENT Cdregion_stops (%INTEGER;)>
+
+<!--
+ each code is 64 cells long, in the order where
+ T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
+ NOTE: this order does NOT correspond to a Seq-data
+ encoding.  It is "natural" to codon usage instead.
+ the value in each cell is the AA coded for
+ start= AA coded only if first in peptide
+   in start array, if codon is not a legitimate start
+   codon, that cell will have the "gap" symbol for
+   that alphabet.  Otherwise it will have the AA
+   encoded when that codon is used at the start.
+-->
+<!ELEMENT Genetic-code (Genetic-code_E*)>
+
+
+
+<!ELEMENT Genetic-code_E (
+        Genetic-code_E_name | 
+        Genetic-code_E_id | 
+        Genetic-code_E_ncbieaa | 
+        Genetic-code_E_ncbi8aa | 
+        Genetic-code_E_ncbistdaa | 
+        Genetic-code_E_sncbieaa | 
+        Genetic-code_E_sncbi8aa | 
+        Genetic-code_E_sncbistdaa)>
+
+<!-- name of a code -->
+<!ELEMENT Genetic-code_E_name (#PCDATA)>
+
+<!-- id in dbase -->
+<!ELEMENT Genetic-code_E_id (%INTEGER;)>
+
+<!-- indexed to IUPAC extended -->
+<!ELEMENT Genetic-code_E_ncbieaa (#PCDATA)>
+
+<!-- indexed to NCBI8aa -->
+<!ELEMENT Genetic-code_E_ncbi8aa (%OCTETS;)>
+
+<!-- indexed to NCBIstdaa -->
+<!ELEMENT Genetic-code_E_ncbistdaa (%OCTETS;)>
+
+<!-- start, indexed to IUPAC extended -->
+<!ELEMENT Genetic-code_E_sncbieaa (#PCDATA)>
+
+<!-- start, indexed to NCBI8aa -->
+<!ELEMENT Genetic-code_E_sncbi8aa (%OCTETS;)>
+
+<!-- start, indexed to NCBIstdaa -->
+<!ELEMENT Genetic-code_E_sncbistdaa (%OCTETS;)>
+
+<!--
+ specific codon exceptions
+ NCBIstdaa code
+-->
+<!ELEMENT Code-break (
+        Code-break_loc, 
+        Code-break_aa)>
+
+<!-- location of exception -->
+<!ELEMENT Code-break_loc (Seq-loc)>
+<!-- the amino acid -->
+<!ELEMENT Code-break_aa (
+        Code-break_aa_ncbieaa | 
+        Code-break_aa_ncbi8aa | 
+        Code-break_aa_ncbistdaa)>
+
+<!-- ASCII value of NCBIeaa code -->
+<!ELEMENT Code-break_aa_ncbieaa (%INTEGER;)>
+
+<!-- NCBI8aa code -->
+<!ELEMENT Code-break_aa_ncbi8aa (%INTEGER;)>
+
+<!ELEMENT Code-break_aa_ncbistdaa (%INTEGER;)>
+
+<!-- table of genetic codes -->
+<!ELEMENT Genetic-code-table (Genetic-code*)>
+
+<!--
+*** Import ***********************************************
+*
+*  Features imported from other databases
+*
+-->
+<!ELEMENT Imp-feat (
+        Imp-feat_key, 
+        Imp-feat_loc?, 
+        Imp-feat_descr?)>
+
+<!ELEMENT Imp-feat_key (#PCDATA)>
+
+<!-- original location string -->
+<!ELEMENT Imp-feat_loc (#PCDATA)>
+
+<!-- text description -->
+<!ELEMENT Imp-feat_descr (#PCDATA)>
+
+
+<!ELEMENT Gb-qual (
+        Gb-qual_qual, 
+        Gb-qual_val)>
+
+<!ELEMENT Gb-qual_qual (#PCDATA)>
+
+<!ELEMENT Gb-qual_val (#PCDATA)>
+
+<!--
+*** Clone-ref ***********************************************
+*
+*  Specification of clone features
+*
+-->
+<!ELEMENT Clone-ref (
+        Clone-ref_name, 
+        Clone-ref_library?, 
+        Clone-ref_concordant?, 
+        Clone-ref_unique?, 
+        Clone-ref_placement-method?, 
+        Clone-ref_clone-seq?)>
+
+<!-- Official clone symbol -->
+<!ELEMENT Clone-ref_name (#PCDATA)>
+
+<!-- Library name -->
+<!ELEMENT Clone-ref_library (#PCDATA)>
+
+<!-- OPTIONAL? -->
+<!ELEMENT Clone-ref_concordant EMPTY>
+<!ATTLIST Clone-ref_concordant value ( true | false ) "false" >
+
+
+<!-- OPTIONAL? -->
+<!ELEMENT Clone-ref_unique EMPTY>
+<!ATTLIST Clone-ref_unique value ( true | false ) "false" >
+
+
+<!ELEMENT Clone-ref_placement-method (%INTEGER;)>
+
+<!--
+    end-seq	-  Clone placed by end sequence
+    insert-alignment	-  Clone placed by insert alignment
+    sts	-  Clone placed by STS
+    end-seq-insert-alignment	-  combined end-seq and insert align
+    external	-  Placement provided externally
+    curated	-  Human placed or approved
+-->
+<!ATTLIST Clone-ref_placement-method value (
+        end-seq |
+        insert-alignment |
+        sts |
+        fish |
+        fingerprint |
+        end-seq-insert-alignment |
+        external |
+        curated |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Clone-ref_clone-seq (Clone-seq-set)>
+
+
+<!ELEMENT Clone-seq-set (Clone-seq*)>
+
+
+<!ELEMENT Clone-seq (
+        Clone-seq_type, 
+        Clone-seq_confidence?, 
+        Clone-seq_location, 
+        Clone-seq_seq?, 
+        Clone-seq_align-id?, 
+        Clone-seq_support?)>
+
+<!ELEMENT Clone-seq_type (%INTEGER;)>
+<!ATTLIST Clone-seq_type value (
+        insert |
+        end |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Clone-seq_confidence (%INTEGER;)>
+
+<!--
+    multiple	-  Multiple hits
+    na	-  Unspecified
+    nohit-rep	-  No hits, end flagged repetitive
+    nohitnorep	-  No hits, end not flagged repetitive
+    other-chrm	-  Hit on different chromosome
+    virtual	-  Virtual (hasn't been sequenced)
+    multiple-rep	-  Multiple hits, end flagged repetitive
+    multiplenorep	-  Multiple hits, end not flagged repetitive
+    no-hit	-  No hits
+-->
+<!ATTLIST Clone-seq_confidence value (
+        multiple |
+        na |
+        nohit-rep |
+        nohitnorep |
+        other-chrm |
+        unique |
+        virtual |
+        multiple-rep |
+        multiplenorep |
+        no-hit |
+        other
+        ) #IMPLIED >
+
+
+<!-- location on sequence -->
+<!ELEMENT Clone-seq_location (Seq-loc)>
+
+<!-- clone sequence location -->
+<!ELEMENT Clone-seq_seq (Seq-loc)>
+
+<!-- internal alignment identifier -->
+<!ELEMENT Clone-seq_align-id (Dbtag)>
+
+<!ELEMENT Clone-seq_support (%INTEGER;)>
+
+<!--
+    prototype	-  sequence used to place clone
+    supporting	-  sequence supports placement
+    supports-other	-  supports a different placement
+    non-supporting	-  does not support any placement
+-->
+<!ATTLIST Clone-seq_support value (
+        prototype |
+        supporting |
+        supports-other |
+        non-supporting
+        ) #IMPLIED >
+
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Seqloc.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Seqloc.dtd
new file mode 100644
index 0000000..a0464a4
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Seqloc.dtd
@@ -0,0 +1,86 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqloc.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Seqloc.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Seqloc.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Seqloc.mod.dtd
new file mode 100644
index 0000000..7daa894
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Seqloc.mod.dtd
@@ -0,0 +1,325 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqloc.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 08/02/2010 23:05:14
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Seqloc"
+================================================= -->
+
+<!--
+$Revision: 182653 $
+**********************************************************************
+
+  NCBI Sequence location and identifier elements
+  by James Ostell, 1990
+
+  Version 3.0 - 1994
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Seq-id,
+          Seq-loc,
+          Seq-interval,
+          Packed-seqint,
+          Seq-point,
+          Packed-seqpnt,
+          Na-strand,
+          Giimport-id -->
+
+<!-- Elements referenced from other modules:
+          Object-id,
+          Int-fuzz,
+          Dbtag,
+          Date FROM NCBI-General,
+          Id-pat FROM NCBI-Biblio,
+          Feat-id FROM NCBI-Seqfeat -->
+<!-- ============================================ -->
+
+<!--
+*** Sequence identifiers ********************************
+*
+-->
+<!ELEMENT Seq-id (
+        Seq-id_local | 
+        Seq-id_gibbsq | 
+        Seq-id_gibbmt | 
+        Seq-id_giim | 
+        Seq-id_genbank | 
+        Seq-id_embl | 
+        Seq-id_pir | 
+        Seq-id_swissprot | 
+        Seq-id_patent | 
+        Seq-id_other | 
+        Seq-id_general | 
+        Seq-id_gi | 
+        Seq-id_ddbj | 
+        Seq-id_prf | 
+        Seq-id_pdb | 
+        Seq-id_tpg | 
+        Seq-id_tpe | 
+        Seq-id_tpd | 
+        Seq-id_gpipe | 
+        Seq-id_named-annot-track)>
+
+<!-- local use -->
+<!ELEMENT Seq-id_local (Object-id)>
+
+<!-- Geninfo backbone seqid -->
+<!ELEMENT Seq-id_gibbsq (%INTEGER;)>
+
+<!-- Geninfo backbone moltype -->
+<!ELEMENT Seq-id_gibbmt (%INTEGER;)>
+
+<!-- Geninfo import id -->
+<!ELEMENT Seq-id_giim (Giimport-id)>
+
+<!ELEMENT Seq-id_genbank (Textseq-id)>
+
+<!ELEMENT Seq-id_embl (Textseq-id)>
+
+<!ELEMENT Seq-id_pir (Textseq-id)>
+
+<!ELEMENT Seq-id_swissprot (Textseq-id)>
+
+<!ELEMENT Seq-id_patent (Patent-seq-id)>
+
+<!-- for historical reasons, 'other' = 'refseq' -->
+<!ELEMENT Seq-id_other (Textseq-id)>
+
+<!-- for other databases -->
+<!ELEMENT Seq-id_general (Dbtag)>
+
+<!-- GenInfo Integrated Database -->
+<!ELEMENT Seq-id_gi (%INTEGER;)>
+
+<!-- DDBJ -->
+<!ELEMENT Seq-id_ddbj (Textseq-id)>
+
+<!-- PRF SEQDB -->
+<!ELEMENT Seq-id_prf (Textseq-id)>
+
+<!-- PDB sequence -->
+<!ELEMENT Seq-id_pdb (PDB-seq-id)>
+
+<!-- Third Party Annot/Seq Genbank -->
+<!ELEMENT Seq-id_tpg (Textseq-id)>
+
+<!-- Third Party Annot/Seq EMBL -->
+<!ELEMENT Seq-id_tpe (Textseq-id)>
+
+<!-- Third Party Annot/Seq DDBJ -->
+<!ELEMENT Seq-id_tpd (Textseq-id)>
+
+<!-- Internal NCBI genome pipeline processing ID -->
+<!ELEMENT Seq-id_gpipe (Textseq-id)>
+
+<!-- Internal named annotation tracking ID -->
+<!ELEMENT Seq-id_named-annot-track (Textseq-id)>
+
+
+<!ELEMENT Seq-id-set (Seq-id*)>
+
+
+<!ELEMENT Patent-seq-id (
+        Patent-seq-id_seqid, 
+        Patent-seq-id_cit)>
+
+<!-- number of sequence in patent -->
+<!ELEMENT Patent-seq-id_seqid (%INTEGER;)>
+
+<!-- patent citation -->
+<!ELEMENT Patent-seq-id_cit (Id-pat)>
+
+
+<!ELEMENT Textseq-id (
+        Textseq-id_name?, 
+        Textseq-id_accession?, 
+        Textseq-id_release?, 
+        Textseq-id_version?)>
+
+<!ELEMENT Textseq-id_name (#PCDATA)>
+
+<!ELEMENT Textseq-id_accession (#PCDATA)>
+
+<!ELEMENT Textseq-id_release (#PCDATA)>
+
+<!ELEMENT Textseq-id_version (%INTEGER;)>
+
+
+<!ELEMENT Giimport-id (
+        Giimport-id_id, 
+        Giimport-id_db?, 
+        Giimport-id_release?)>
+
+<!-- the id to use here -->
+<!ELEMENT Giimport-id_id (%INTEGER;)>
+
+<!-- dbase used in -->
+<!ELEMENT Giimport-id_db (#PCDATA)>
+
+<!-- the release -->
+<!ELEMENT Giimport-id_release (#PCDATA)>
+
+
+<!ELEMENT PDB-seq-id (
+        PDB-seq-id_mol, 
+        PDB-seq-id_chain?, 
+        PDB-seq-id_rel?)>
+
+<!-- the molecule name -->
+<!ELEMENT PDB-seq-id_mol (PDB-mol-id)>
+
+<!-- a single ASCII character, chain id -->
+<!ELEMENT PDB-seq-id_chain (%INTEGER;)>
+
+<!-- release date, month and year -->
+<!ELEMENT PDB-seq-id_rel (Date)>
+
+<!-- name of mol, 4 chars -->
+<!ELEMENT PDB-mol-id (#PCDATA)>
+
+<!--
+*** Sequence locations **********************************
+*
+-->
+<!ELEMENT Seq-loc (
+        Seq-loc_null | 
+        Seq-loc_empty | 
+        Seq-loc_whole | 
+        Seq-loc_int | 
+        Seq-loc_packed-int | 
+        Seq-loc_pnt | 
+        Seq-loc_packed-pnt | 
+        Seq-loc_mix | 
+        Seq-loc_equiv | 
+        Seq-loc_bond | 
+        Seq-loc_feat)>
+
+<!-- not placed -->
+<!ELEMENT Seq-loc_null EMPTY>
+
+<!-- to NULL one Seq-id in a collection -->
+<!ELEMENT Seq-loc_empty (Seq-id)>
+
+<!-- whole sequence -->
+<!ELEMENT Seq-loc_whole (Seq-id)>
+
+<!-- from to -->
+<!ELEMENT Seq-loc_int (Seq-interval)>
+
+<!ELEMENT Seq-loc_packed-int (Packed-seqint)>
+
+<!ELEMENT Seq-loc_pnt (Seq-point)>
+
+<!ELEMENT Seq-loc_packed-pnt (Packed-seqpnt)>
+<!-- this will hold anything -->
+<!ELEMENT Seq-loc_mix (Seq-loc-mix)>
+
+<!-- equivalent sets of locations -->
+<!ELEMENT Seq-loc_equiv (Seq-loc-equiv)>
+<!-- bond between residues -->
+<!ELEMENT Seq-loc_bond (Seq-bond)>
+
+<!-- indirect, through a Seq-feat -->
+<!ELEMENT Seq-loc_feat (Feat-id)>
+
+
+<!ELEMENT Seq-interval (
+        Seq-interval_from, 
+        Seq-interval_to, 
+        Seq-interval_strand?, 
+        Seq-interval_id, 
+        Seq-interval_fuzz-from?, 
+        Seq-interval_fuzz-to?)>
+
+<!ELEMENT Seq-interval_from (%INTEGER;)>
+
+<!ELEMENT Seq-interval_to (%INTEGER;)>
+<!-- strand of nucleic acid -->
+<!ELEMENT Seq-interval_strand (Na-strand)>
+
+<!-- WARNING: this used to be optional -->
+<!ELEMENT Seq-interval_id (Seq-id)>
+
+<!ELEMENT Seq-interval_fuzz-from (Int-fuzz)>
+
+<!ELEMENT Seq-interval_fuzz-to (Int-fuzz)>
+
+
+<!ELEMENT Packed-seqint (Seq-interval*)>
+
+
+<!ELEMENT Seq-point (
+        Seq-point_point, 
+        Seq-point_strand?, 
+        Seq-point_id, 
+        Seq-point_fuzz?)>
+
+<!ELEMENT Seq-point_point (%INTEGER;)>
+<!-- strand of nucleic acid -->
+<!ELEMENT Seq-point_strand (Na-strand)>
+
+<!-- WARNING: this used to be optional -->
+<!ELEMENT Seq-point_id (Seq-id)>
+
+<!ELEMENT Seq-point_fuzz (Int-fuzz)>
+
+
+<!ELEMENT Packed-seqpnt (
+        Packed-seqpnt_strand?, 
+        Packed-seqpnt_id, 
+        Packed-seqpnt_fuzz?, 
+        Packed-seqpnt_points)>
+<!-- strand of nucleic acid -->
+<!ELEMENT Packed-seqpnt_strand (Na-strand)>
+<!--
+*** Sequence identifiers ********************************
+*
+-->
+<!ELEMENT Packed-seqpnt_id (Seq-id)>
+
+<!ELEMENT Packed-seqpnt_fuzz (Int-fuzz)>
+
+<!ELEMENT Packed-seqpnt_points (Packed-seqpnt_points_E*)>
+
+
+<!ELEMENT Packed-seqpnt_points_E (%INTEGER;)>
+
+<!-- strand of nucleic acid -->
+<!ELEMENT Na-strand %ENUM;>
+
+<!--
+    both	-  in forward orientation
+    both-rev	-  in reverse orientation
+-->
+<!ATTLIST Na-strand value (
+        unknown |
+        plus |
+        minus |
+        both |
+        both-rev |
+        other
+        ) #REQUIRED >
+
+
+<!-- bond between residues -->
+<!ELEMENT Seq-bond (
+        Seq-bond_a, 
+        Seq-bond_b?)>
+
+<!-- connection to a least one residue -->
+<!ELEMENT Seq-bond_a (Seq-point)>
+
+<!-- other end may not be available -->
+<!ELEMENT Seq-bond_b (Seq-point)>
+
+<!-- this will hold anything -->
+<!ELEMENT Seq-loc-mix (Seq-loc*)>
+
+<!-- for a set of equivalent locations -->
+<!ELEMENT Seq-loc-equiv (Seq-loc*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Seqres.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Seqres.dtd
new file mode 100644
index 0000000..353ff75
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Seqres.dtd
@@ -0,0 +1,86 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqres.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Seqres.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Seqres.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Seqres.mod.dtd
new file mode 100644
index 0000000..27dba62
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Seqres.mod.dtd
@@ -0,0 +1,134 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqres.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Seqres"
+================================================= -->
+
+<!--
+$Revision: 6.0 $
+**********************************************************************
+
+  NCBI Sequence Analysis Results (other than alignments)
+  by James Ostell, 1990
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Seq-graph -->
+
+<!-- Elements referenced from other modules:
+          Seq-loc FROM NCBI-Seqloc -->
+<!-- ============================================ -->
+
+<!--
+*** Sequence Graph ********************************
+*
+*   for values mapped by residue or range to sequence
+*
+-->
+<!ELEMENT Seq-graph (
+        Seq-graph_title?, 
+        Seq-graph_comment?, 
+        Seq-graph_loc, 
+        Seq-graph_title-x?, 
+        Seq-graph_title-y?, 
+        Seq-graph_comp?, 
+        Seq-graph_a?, 
+        Seq-graph_b?, 
+        Seq-graph_numval, 
+        Seq-graph_graph)>
+
+<!ELEMENT Seq-graph_title (#PCDATA)>
+
+<!ELEMENT Seq-graph_comment (#PCDATA)>
+
+<!-- region this applies to -->
+<!ELEMENT Seq-graph_loc (Seq-loc)>
+
+<!-- title for x-axis -->
+<!ELEMENT Seq-graph_title-x (#PCDATA)>
+
+<!ELEMENT Seq-graph_title-y (#PCDATA)>
+
+<!-- compression (residues/value) -->
+<!ELEMENT Seq-graph_comp (%INTEGER;)>
+
+<!-- for scaling values -->
+<!ELEMENT Seq-graph_a (%REAL;)>
+
+<!-- display = (a x value) + b -->
+<!ELEMENT Seq-graph_b (%REAL;)>
+
+<!-- number of values in graph -->
+<!ELEMENT Seq-graph_numval (%INTEGER;)>
+
+<!ELEMENT Seq-graph_graph (
+        Seq-graph_graph_real | 
+        Seq-graph_graph_int | 
+        Seq-graph_graph_byte)>
+
+<!ELEMENT Seq-graph_graph_real (Real-graph)>
+
+<!ELEMENT Seq-graph_graph_int (Int-graph)>
+<!-- integer from 0-255 -->
+<!ELEMENT Seq-graph_graph_byte (Byte-graph)>
+
+
+<!ELEMENT Real-graph (
+        Real-graph_max, 
+        Real-graph_min, 
+        Real-graph_axis, 
+        Real-graph_values)>
+
+<!-- top of graph -->
+<!ELEMENT Real-graph_max (%REAL;)>
+
+<!-- bottom of graph -->
+<!ELEMENT Real-graph_min (%REAL;)>
+
+<!-- value to draw axis on -->
+<!ELEMENT Real-graph_axis (%REAL;)>
+
+<!ELEMENT Real-graph_values (Real-graph_values_E*)>
+
+
+<!ELEMENT Real-graph_values_E (%REAL;)>
+
+
+<!ELEMENT Int-graph (
+        Int-graph_max, 
+        Int-graph_min, 
+        Int-graph_axis, 
+        Int-graph_values)>
+
+<!ELEMENT Int-graph_max (%INTEGER;)>
+
+<!ELEMENT Int-graph_min (%INTEGER;)>
+
+<!ELEMENT Int-graph_axis (%INTEGER;)>
+
+<!ELEMENT Int-graph_values (Int-graph_values_E*)>
+
+
+<!ELEMENT Int-graph_values_E (%INTEGER;)>
+
+<!-- integer from 0-255 -->
+<!ELEMENT Byte-graph (
+        Byte-graph_max, 
+        Byte-graph_min, 
+        Byte-graph_axis, 
+        Byte-graph_values)>
+
+<!ELEMENT Byte-graph_max (%INTEGER;)>
+
+<!ELEMENT Byte-graph_min (%INTEGER;)>
+
+<!ELEMENT Byte-graph_axis (%INTEGER;)>
+
+<!ELEMENT Byte-graph_values (%OCTETS;)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Seqset.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Seqset.dtd
new file mode 100644
index 0000000..69de4db
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Seqset.dtd
@@ -0,0 +1,89 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqset.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Seqset.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Seqset_module PUBLIC "-//NCBI//NCBI Seqset Module//EN" "NCBI_Seqset.mod.dtd">
+%NCBI_Seqset_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Seqset.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Seqset.mod.dtd
new file mode 100644
index 0000000..549d8d6
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Seqset.mod.dtd
@@ -0,0 +1,138 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqset.asn"
+     ::DATATOOL:: by application DATATOOL version 2.3.1
+     ::DATATOOL:: on 05/02/2011 23:05:01
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Seqset"
+================================================= -->
+
+<!--
+$Revision: 279709 $
+**********************************************************************
+
+  NCBI Sequence Collections
+  by James Ostell, 1990
+
+  Version 3.0 - 1994
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Bioseq-set,
+          Seq-entry -->
+
+<!-- Elements referenced from other modules:
+          Bioseq,
+          Seq-annot,
+          Seq-descr FROM NCBI-Sequence,
+          Object-id,
+          Dbtag,
+          Date FROM NCBI-General -->
+<!-- ============================================ -->
+
+<!--
+*** Sequence Collections ********************************
+*
+ just a collection
+-->
+<!ELEMENT Bioseq-set (
+        Bioseq-set_id?, 
+        Bioseq-set_coll?, 
+        Bioseq-set_level?, 
+        Bioseq-set_class?, 
+        Bioseq-set_release?, 
+        Bioseq-set_date?, 
+        Bioseq-set_descr?, 
+        Bioseq-set_seq-set, 
+        Bioseq-set_annot?)>
+
+<!ELEMENT Bioseq-set_id (Object-id)>
+
+<!-- to identify a collection -->
+<!ELEMENT Bioseq-set_coll (Dbtag)>
+
+<!-- nesting level -->
+<!ELEMENT Bioseq-set_level (%INTEGER;)>
+
+<!ELEMENT Bioseq-set_class %ENUM;>
+
+<!--
+    nuc-prot	-  nuc acid and coded proteins
+    segset	-  segmented sequence + parts
+    conset	-  constructed sequence + parts
+    parts	-  parts for 2 or 3
+    gibb	-  geninfo backbone
+    gi	-  geninfo
+    genbank	-  converted genbank
+    pir	-  converted pir
+    pub-set	-  all the seqs from a single publication
+    equiv	-  a set of equivalent maps or seqs
+    swissprot	-  converted SWISSPROT
+    pdb-entry	-  a complete PDB entry
+    mut-set	-  set of mutations
+    pop-set	-  population study
+    phy-set	-  phylogenetic study
+    eco-set	-  ecological sample study
+    gen-prod-set	-  genomic products, chrom+mRNA+protein
+    wgs-set	-  whole genome shotgun project
+    named-annot	-  named annotation set
+    named-annot-prod	-  with instantiated mRNA+protein
+    read-set	-  set from a single read
+    paired-end-reads	-  paired sequences within a read-set
+    small-genome-set	-  viral segments or mitochondrial minicircles
+-->
+<!ATTLIST Bioseq-set_class value (
+        not-set |
+        nuc-prot |
+        segset |
+        conset |
+        parts |
+        gibb |
+        gi |
+        genbank |
+        pir |
+        pub-set |
+        equiv |
+        swissprot |
+        pdb-entry |
+        mut-set |
+        pop-set |
+        phy-set |
+        eco-set |
+        gen-prod-set |
+        wgs-set |
+        named-annot |
+        named-annot-prod |
+        read-set |
+        paired-end-reads |
+        small-genome-set |
+        other
+        ) #REQUIRED >
+
+
+<!ELEMENT Bioseq-set_release (#PCDATA)>
+
+<!ELEMENT Bioseq-set_date (Date)>
+
+<!ELEMENT Bioseq-set_descr (Seq-descr)>
+
+<!ELEMENT Bioseq-set_seq-set (Seq-entry*)>
+
+<!ELEMENT Bioseq-set_annot (Seq-annot*)>
+
+
+<!ELEMENT Seq-entry (
+        Seq-entry_seq | 
+        Seq-entry_set)>
+
+<!ELEMENT Seq-entry_seq (Bioseq)>
+<!--
+*** Sequence Collections ********************************
+*
+ just a collection
+-->
+<!ELEMENT Seq-entry_set (Bioseq-set)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Sequence.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Sequence.dtd
new file mode 100644
index 0000000..78673f6
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Sequence.dtd
@@ -0,0 +1,86 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seq.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Sequence.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Sequence.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Sequence.mod.dtd
new file mode 100644
index 0000000..d09ae02
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Sequence.mod.dtd
@@ -0,0 +1,1112 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seq.asn"
+     ::DATATOOL:: by application DATATOOL version 2.4.4
+     ::DATATOOL:: on 09/25/2012 23:04:47
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Sequence"
+================================================= -->
+
+<!--
+$Revision: 375986 $
+**********************************************************************
+
+  NCBI Sequence elements
+  by James Ostell, 1990
+  Version 3.0 - June 1994
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Annotdesc,
+          Annot-descr,
+          Bioseq,
+          GIBB-mol,
+          Heterogen,
+          MolInfo,
+          Numbering,
+          Pubdesc,
+          Seq-annot,
+          Seq-data,
+          Seqdesc,
+          Seq-descr,
+          Seq-ext,
+          Seq-hist,
+          Seq-inst,
+          Seq-literal,
+          Seqdesc,
+          Delta-ext,
+          Seq-gap -->
+
+<!-- Elements referenced from other modules:
+          Date,
+          Int-fuzz,
+          Dbtag,
+          Object-id,
+          User-object FROM NCBI-General,
+          Seq-align FROM NCBI-Seqalign,
+          Seq-feat,
+          ModelEvidenceSupport FROM NCBI-Seqfeat,
+          Seq-graph FROM NCBI-Seqres,
+          Pub-equiv FROM NCBI-Pub,
+          Org-ref FROM NCBI-Organism,
+          BioSource FROM NCBI-BioSource,
+          Seq-id,
+          Seq-loc FROM NCBI-Seqloc,
+          GB-block FROM GenBank-General,
+          PIR-block FROM PIR-General,
+          EMBL-block FROM EMBL-General,
+          SP-block FROM SP-General,
+          PRF-block FROM PRF-General,
+          PDB-block FROM PDB-General,
+          Seq-table FROM NCBI-SeqTable -->
+<!-- ============================================ -->
+
+<!--
+*** Sequence ********************************
+*
+-->
+<!ELEMENT Bioseq (
+        Bioseq_id, 
+        Bioseq_descr?, 
+        Bioseq_inst, 
+        Bioseq_annot?)>
+
+<!-- equivalent identifiers -->
+<!ELEMENT Bioseq_id (Seq-id*)>
+
+<!-- descriptors -->
+<!ELEMENT Bioseq_descr (Seq-descr)>
+
+<!-- the sequence data -->
+<!ELEMENT Bioseq_inst (Seq-inst)>
+
+<!ELEMENT Bioseq_annot (Seq-annot*)>
+
+<!--
+*** Descriptors *****************************
+*
+-->
+<!ELEMENT Seq-descr (Seqdesc*)>
+
+
+<!ELEMENT Seqdesc (
+        Seqdesc_mol-type | 
+        Seqdesc_modif | 
+        Seqdesc_method | 
+        Seqdesc_name | 
+        Seqdesc_title | 
+        Seqdesc_org | 
+        Seqdesc_comment | 
+        Seqdesc_num | 
+        Seqdesc_maploc | 
+        Seqdesc_pir | 
+        Seqdesc_genbank | 
+        Seqdesc_pub | 
+        Seqdesc_region | 
+        Seqdesc_user | 
+        Seqdesc_sp | 
+        Seqdesc_dbxref | 
+        Seqdesc_embl | 
+        Seqdesc_create-date | 
+        Seqdesc_update-date | 
+        Seqdesc_prf | 
+        Seqdesc_pdb | 
+        Seqdesc_het | 
+        Seqdesc_source | 
+        Seqdesc_molinfo | 
+        Seqdesc_modelev)>
+
+<!-- type of molecule -->
+<!ELEMENT Seqdesc_mol-type (GIBB-mol)>
+
+<!-- modifiers -->
+<!ELEMENT Seqdesc_modif (GIBB-mod*)>
+
+<!-- sequencing method -->
+<!ELEMENT Seqdesc_method (GIBB-method)>
+
+<!-- a name for this sequence -->
+<!ELEMENT Seqdesc_name (#PCDATA)>
+
+<!-- a title for this sequence -->
+<!ELEMENT Seqdesc_title (#PCDATA)>
+
+<!-- if all from one organism -->
+<!ELEMENT Seqdesc_org (Org-ref)>
+
+<!-- a more extensive comment -->
+<!ELEMENT Seqdesc_comment (#PCDATA)>
+
+<!-- a numbering system -->
+<!ELEMENT Seqdesc_num (Numbering)>
+
+<!-- map location of this sequence -->
+<!ELEMENT Seqdesc_maploc (Dbtag)>
+
+<!-- PIR specific info -->
+<!ELEMENT Seqdesc_pir (PIR-block)>
+
+<!-- GenBank specific info -->
+<!ELEMENT Seqdesc_genbank (GB-block)>
+
+<!-- a reference to the publication -->
+<!ELEMENT Seqdesc_pub (Pubdesc)>
+
+<!-- overall region (globin locus) -->
+<!ELEMENT Seqdesc_region (#PCDATA)>
+
+<!-- user defined object -->
+<!ELEMENT Seqdesc_user (User-object)>
+
+<!-- SWISSPROT specific info -->
+<!ELEMENT Seqdesc_sp (SP-block)>
+
+<!-- xref to other databases -->
+<!ELEMENT Seqdesc_dbxref (Dbtag)>
+
+<!-- EMBL specific information -->
+<!ELEMENT Seqdesc_embl (EMBL-block)>
+
+<!-- date entry first created/released -->
+<!ELEMENT Seqdesc_create-date (Date)>
+
+<!-- date of last update -->
+<!ELEMENT Seqdesc_update-date (Date)>
+
+<!-- PRF specific information -->
+<!ELEMENT Seqdesc_prf (PRF-block)>
+
+<!-- PDB specific information -->
+<!ELEMENT Seqdesc_pdb (PDB-block)>
+
+<!-- cofactor, etc associated but not bound -->
+<!ELEMENT Seqdesc_het (Heterogen)>
+
+<!-- source of materials, includes Org-ref -->
+<!ELEMENT Seqdesc_source (BioSource)>
+
+<!-- info on the molecule and techniques -->
+<!ELEMENT Seqdesc_molinfo (MolInfo)>
+
+<!-- model evidence for XM records -->
+<!ELEMENT Seqdesc_modelev (ModelEvidenceSupport)>
+
+<!--
+******* NOTE:
+*       mol-type, modif, method, and org are consolidated and expanded
+*       in Org-ref, BioSource, and MolInfo in this specification. They
+*       will be removed in later specifications. Do not use them in the
+*       the future. Instead expect the new structures.
+*
+***************************
+********************************************************************
+
+ MolInfo gives information on the
+ classification of the type and quality of the sequence
+
+ WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
+
+********************************************************************
+-->
+<!ELEMENT MolInfo (
+        MolInfo_biomol?, 
+        MolInfo_tech?, 
+        MolInfo_techexp?, 
+        MolInfo_completeness?, 
+        MolInfo_gbmoltype?)>
+
+<!ELEMENT MolInfo_biomol (%INTEGER;)>
+
+<!--
+    pre-RNA	-  precursor RNA of any sort really
+    other-genetic	-  other genetic material
+    genomic-mRNA	-  reported a mix of genomic and cdna sequence
+    cRNA	-  viral RNA genome copy intermediate
+    snoRNA	-  small nucleolar RNA
+    transcribed-RNA	-  transcribed RNA other than existing classes
+-->
+<!ATTLIST MolInfo_biomol value (
+        unknown |
+        genomic |
+        pre-RNA |
+        mRNA |
+        rRNA |
+        tRNA |
+        snRNA |
+        scRNA |
+        peptide |
+        other-genetic |
+        genomic-mRNA |
+        cRNA |
+        snoRNA |
+        transcribed-RNA |
+        ncRNA |
+        tmRNA |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT MolInfo_tech (%INTEGER;)>
+
+<!--
+    standard	-  standard sequencing
+    est	-  Expressed Sequence Tag
+    sts	-  Sequence Tagged Site
+    survey	-  one-pass genomic sequence
+    genemap	-  from genetic mapping techniques
+    physmap	-  from physical mapping techniques
+    derived	-  derived from other data, not a primary entity
+    concept-trans	-  conceptual translation
+    seq-pept	-  peptide was sequenced
+    both	-  concept transl. w/ partial pept. seq.
+    seq-pept-overlap	-  sequenced peptide, ordered by overlap
+    seq-pept-homol	-  sequenced peptide, ordered by homology
+    concept-trans-a	-  conceptual transl. supplied by author
+    htgs-1	-  unordered High Throughput sequence contig
+    htgs-2	-  ordered High Throughput sequence contig
+    htgs-3	-  finished High Throughput sequence
+    fli-cdna	-  full length insert cDNA
+    htgs-0	-  single genomic reads for coordination
+    htc	-  high throughput cDNA
+    wgs	-  whole genome shotgun sequencing
+    barcode	-  barcode of life project
+    composite-wgs-htgs	-  composite of WGS and HTGS
+    tsa	-  transcriptome shotgun assembly
+    other	-  use Source.techexp
+-->
+<!ATTLIST MolInfo_tech value (
+        unknown |
+        standard |
+        est |
+        sts |
+        survey |
+        genemap |
+        physmap |
+        derived |
+        concept-trans |
+        seq-pept |
+        both |
+        seq-pept-overlap |
+        seq-pept-homol |
+        concept-trans-a |
+        htgs-1 |
+        htgs-2 |
+        htgs-3 |
+        fli-cdna |
+        htgs-0 |
+        htc |
+        wgs |
+        barcode |
+        composite-wgs-htgs |
+        tsa |
+        other
+        ) #IMPLIED >
+
+
+<!--
+ explanation if tech not enough
+
+ Completeness is not indicated in most records.  For genomes, assume
+ the sequences are incomplete unless specifically marked as complete.
+ For mRNAs, assume the ends are not known exactly unless marked as
+ having the left or right end.
+
+-->
+<!ELEMENT MolInfo_techexp (#PCDATA)>
+
+<!ELEMENT MolInfo_completeness (%INTEGER;)>
+
+<!--
+    complete	-  complete biological entity
+    partial	-  partial but no details given
+    no-left	-  missing 5' or NH3 end
+    no-right	-  missing 3' or COOH end
+    no-ends	-  missing both ends
+    has-left	-  5' or NH3 end present
+    has-right	-  3' or COOH end present
+-->
+<!ATTLIST MolInfo_completeness value (
+        unknown |
+        complete |
+        partial |
+        no-left |
+        no-right |
+        no-ends |
+        has-left |
+        has-right |
+        other
+        ) #IMPLIED >
+
+
+<!-- identifies particular ncRNA -->
+<!ELEMENT MolInfo_gbmoltype (#PCDATA)>
+
+<!-- type of molecule represented -->
+<!ELEMENT GIBB-mol %ENUM;>
+
+<!--
+    pre-mRNA	-  precursor RNA of any sort really
+    other-genetic	-  other genetic material
+    genomic-mRNA	-  reported a mix of genomic and cdna sequence
+-->
+<!ATTLIST GIBB-mol value (
+        unknown |
+        genomic |
+        pre-mRNA |
+        mRNA |
+        rRNA |
+        tRNA |
+        snRNA |
+        scRNA |
+        peptide |
+        other-genetic |
+        genomic-mRNA |
+        other
+        ) #REQUIRED >
+
+
+<!-- GenInfo Backbone modifiers -->
+<!ELEMENT GIBB-mod %ENUM;>
+
+<!--
+    mutagen	-  subject of mutagenesis ?
+    natmut	-  natural mutant ?
+    no-left	-  missing left end (5' for na, NH2 for aa)
+    no-right	-  missing right end (3' or COOH)
+    est	-  expressed sequence tag
+    sts	-  sequence tagged site
+    survey	-  one pass survey sequence
+    genemap	-  is a genetic map
+    restmap	-  is an ordered restriction map
+    physmap	-  is a physical map (not ordered restriction map)
+-->
+<!ATTLIST GIBB-mod value (
+        dna |
+        rna |
+        extrachrom |
+        plasmid |
+        mitochondrial |
+        chloroplast |
+        kinetoplast |
+        cyanelle |
+        synthetic |
+        recombinant |
+        partial |
+        complete |
+        mutagen |
+        natmut |
+        transposon |
+        insertion-seq |
+        no-left |
+        no-right |
+        macronuclear |
+        proviral |
+        est |
+        sts |
+        survey |
+        chromoplast |
+        genemap |
+        restmap |
+        physmap |
+        other
+        ) #REQUIRED >
+
+
+<!-- sequencing methods -->
+<!ELEMENT GIBB-method %ENUM;>
+
+<!--
+    concept-trans	-  conceptual translation
+    seq-pept	-  peptide was sequenced
+    both	-  concept transl. w/ partial pept. seq.
+    seq-pept-overlap	-  sequenced peptide, ordered by overlap
+    seq-pept-homol	-  sequenced peptide, ordered by homology
+    concept-trans-a	-  conceptual transl. supplied by author
+-->
+<!ATTLIST GIBB-method value (
+        concept-trans |
+        seq-pept |
+        both |
+        seq-pept-overlap |
+        seq-pept-homol |
+        concept-trans-a |
+        other
+        ) #REQUIRED >
+
+
+<!-- any display numbering system -->
+<!ELEMENT Numbering (
+        Numbering_cont | 
+        Numbering_enum | 
+        Numbering_ref | 
+        Numbering_real)>
+
+<!-- continuous numbering -->
+<!ELEMENT Numbering_cont (Num-cont)>
+
+<!-- enumerated names for residues -->
+<!ELEMENT Numbering_enum (Num-enum)>
+
+<!-- by reference to another sequence -->
+<!ELEMENT Numbering_ref (Num-ref)>
+
+<!-- supports mapping to a float system -->
+<!ELEMENT Numbering_real (Num-real)>
+
+<!-- continuous display numbering system -->
+<!ELEMENT Num-cont (
+        Num-cont_refnum?, 
+        Num-cont_has-zero?, 
+        Num-cont_ascending?)>
+
+<!-- number assigned to first residue -->
+<!ELEMENT Num-cont_refnum (%INTEGER;)>
+
+<!-- 0 used? -->
+<!ELEMENT Num-cont_has-zero EMPTY>
+<!ATTLIST Num-cont_has-zero value ( true | false ) "false" >
+
+
+<!-- ascending numbers? -->
+<!ELEMENT Num-cont_ascending EMPTY>
+<!ATTLIST Num-cont_ascending value ( true | false ) "true" >
+
+
+<!-- any tags to residues -->
+<!ELEMENT Num-enum (
+        Num-enum_num, 
+        Num-enum_names)>
+
+<!-- number of tags to follow -->
+<!ELEMENT Num-enum_num (%INTEGER;)>
+
+<!-- the tags -->
+<!ELEMENT Num-enum_names (Num-enum_names_E*)>
+
+
+<!ELEMENT Num-enum_names_E (#PCDATA)>
+
+<!-- by reference to other sequences -->
+<!ELEMENT Num-ref (
+        Num-ref_type, 
+        Num-ref_aligns?)>
+<!-- type of reference -->
+<!ELEMENT Num-ref_type %ENUM;>
+
+<!--
+    sources	-  by segmented or const seq sources
+    aligns	-  by alignments given below
+-->
+<!ATTLIST Num-ref_type value (
+        not-set |
+        sources |
+        aligns
+        ) #REQUIRED >
+
+
+<!ELEMENT Num-ref_aligns (Seq-align)>
+
+<!-- mapping to floating point system -->
+<!ELEMENT Num-real (
+        Num-real_a, 
+        Num-real_b, 
+        Num-real_units?)>
+
+<!-- from an integer system used by Bioseq -->
+<!ELEMENT Num-real_a (%REAL;)>
+
+<!-- position = (a * int_position) + b -->
+<!ELEMENT Num-real_b (%REAL;)>
+
+<!ELEMENT Num-real_units (#PCDATA)>
+
+<!-- how sequence presented in pub -->
+<!ELEMENT Pubdesc (
+        Pubdesc_pub, 
+        Pubdesc_name?, 
+        Pubdesc_fig?, 
+        Pubdesc_num?, 
+        Pubdesc_numexc?, 
+        Pubdesc_poly-a?, 
+        Pubdesc_maploc?, 
+        Pubdesc_seq-raw?, 
+        Pubdesc_align-group?, 
+        Pubdesc_comment?, 
+        Pubdesc_reftype?)>
+
+<!-- the citation(s) -->
+<!ELEMENT Pubdesc_pub (Pub-equiv)>
+
+<!-- name used in paper -->
+<!ELEMENT Pubdesc_name (#PCDATA)>
+
+<!-- figure in paper -->
+<!ELEMENT Pubdesc_fig (#PCDATA)>
+
+<!-- numbering from paper -->
+<!ELEMENT Pubdesc_num (Numbering)>
+
+<!-- numbering problem with paper -->
+<!ELEMENT Pubdesc_numexc EMPTY>
+<!ATTLIST Pubdesc_numexc value ( true | false ) #REQUIRED >
+
+
+<!-- poly A tail indicated in figure? -->
+<!ELEMENT Pubdesc_poly-a EMPTY>
+<!ATTLIST Pubdesc_poly-a value ( true | false ) #REQUIRED >
+
+
+<!-- map location reported in paper -->
+<!ELEMENT Pubdesc_maploc (#PCDATA)>
+
+<!-- original sequence from paper -->
+<!ELEMENT Pubdesc_seq-raw (#PCDATA)>
+
+<!-- this seq aligned with others in paper -->
+<!ELEMENT Pubdesc_align-group (%INTEGER;)>
+
+<!-- any comment on this pub in context -->
+<!ELEMENT Pubdesc_comment (#PCDATA)>
+<!-- type of reference in a GenBank record -->
+<!ELEMENT Pubdesc_reftype (%INTEGER;)>
+
+<!--
+    seq	-  refers to sequence
+    sites	-  refers to unspecified features
+    feats	-  refers to specified features
+    no-target	-  nothing specified (EMBL)
+-->
+<!ATTLIST Pubdesc_reftype value (
+        seq |
+        sites |
+        feats |
+        no-target
+        ) #IMPLIED >
+
+
+<!-- cofactor, prosthetic group, inhibitor, etc -->
+<!ELEMENT Heterogen (#PCDATA)>
+
+<!--
+*** Instances of sequences *******************************
+*
+ the sequence data itself
+-->
+<!ELEMENT Seq-inst (
+        Seq-inst_repr, 
+        Seq-inst_mol, 
+        Seq-inst_length?, 
+        Seq-inst_fuzz?, 
+        Seq-inst_topology?, 
+        Seq-inst_strand?, 
+        Seq-inst_seq-data?, 
+        Seq-inst_ext?, 
+        Seq-inst_hist?)>
+<!-- representation class -->
+<!ELEMENT Seq-inst_repr %ENUM;>
+
+<!--
+    not-set	-  empty
+    virtual	-  no seq data
+    raw	-  continuous sequence
+    seg	-  segmented sequence
+    const	-  constructed sequence
+    ref	-  reference to another sequence
+    consen	-  consensus sequence or pattern
+    map	-  ordered map of any kind
+    delta	-  sequence made by changes (delta) to others
+-->
+<!ATTLIST Seq-inst_repr value (
+        not-set |
+        virtual |
+        raw |
+        seg |
+        const |
+        ref |
+        consen |
+        map |
+        delta |
+        other
+        ) #REQUIRED >
+
+<!-- molecule class in living organism -->
+<!ELEMENT Seq-inst_mol %ENUM;>
+
+<!--
+    not-set	-    > cdna = rna
+    na	-  just a nucleic acid
+-->
+<!ATTLIST Seq-inst_mol value (
+        not-set |
+        dna |
+        rna |
+        aa |
+        na |
+        other
+        ) #REQUIRED >
+
+
+<!-- length of sequence in residues -->
+<!ELEMENT Seq-inst_length (%INTEGER;)>
+
+<!-- length uncertainty -->
+<!ELEMENT Seq-inst_fuzz (Int-fuzz)>
+<!-- topology of molecule -->
+<!ELEMENT Seq-inst_topology %ENUM;>
+
+<!--
+    tandem	-  some part of tandem repeat
+-->
+<!ATTLIST Seq-inst_topology value (
+        not-set |
+        linear |
+        circular |
+        tandem |
+        other
+        ) #REQUIRED >
+
+<!-- strandedness in living organism -->
+<!ELEMENT Seq-inst_strand %ENUM;>
+
+<!--
+    ss	-  single strand
+    ds	-  double strand
+    other	-  default ds for DNA, ss for RNA, pept
+-->
+<!ATTLIST Seq-inst_strand value (
+        not-set |
+        ss |
+        ds |
+        mixed |
+        other
+        ) #REQUIRED >
+
+
+<!-- the sequence -->
+<!ELEMENT Seq-inst_seq-data (Seq-data)>
+
+<!-- extensions for special types -->
+<!ELEMENT Seq-inst_ext (Seq-ext)>
+
+<!-- sequence history -->
+<!ELEMENT Seq-inst_hist (Seq-hist)>
+
+<!--
+*** Sequence Extensions **********************************
+*  for representing more complex types
+*  const type uses Seq-hist.assembly
+-->
+<!ELEMENT Seq-ext (
+        Seq-ext_seg | 
+        Seq-ext_ref | 
+        Seq-ext_map | 
+        Seq-ext_delta)>
+
+<!-- segmented sequences -->
+<!ELEMENT Seq-ext_seg (Seg-ext)>
+
+<!-- hot link to another sequence (a view) -->
+<!ELEMENT Seq-ext_ref (Ref-ext)>
+
+<!-- ordered map of markers -->
+<!ELEMENT Seq-ext_map (Map-ext)>
+
+<!ELEMENT Seq-ext_delta (Delta-ext)>
+
+
+<!ELEMENT Seg-ext (Seq-loc*)>
+
+
+<!ELEMENT Ref-ext (Seq-loc)>
+
+
+<!ELEMENT Map-ext (Seq-feat*)>
+
+
+<!ELEMENT Delta-ext (Delta-seq*)>
+
+
+<!ELEMENT Delta-seq (
+        Delta-seq_loc | 
+        Delta-seq_literal)>
+
+<!-- point to a sequence -->
+<!ELEMENT Delta-seq_loc (Seq-loc)>
+
+<!-- a piece of sequence -->
+<!ELEMENT Delta-seq_literal (Seq-literal)>
+
+
+<!ELEMENT Seq-literal (
+        Seq-literal_length, 
+        Seq-literal_fuzz?, 
+        Seq-literal_seq-data?)>
+
+<!-- must give a length in residues -->
+<!ELEMENT Seq-literal_length (%INTEGER;)>
+
+<!-- could be unsure -->
+<!ELEMENT Seq-literal_fuzz (Int-fuzz)>
+
+<!-- may have the data -->
+<!ELEMENT Seq-literal_seq-data (Seq-data)>
+
+<!--
+*** Sequence History Record ***********************************
+** assembly = records how seq was assembled from others
+** replaces = records sequences made obsolete by this one
+** replaced-by = this seq is made obsolete by another(s)
+-->
+<!ELEMENT Seq-hist (
+        Seq-hist_assembly?, 
+        Seq-hist_replaces?, 
+        Seq-hist_replaced-by?, 
+        Seq-hist_deleted?)>
+
+<!-- how was this assembled? -->
+<!ELEMENT Seq-hist_assembly (Seq-align*)>
+
+<!-- seq makes these seqs obsolete -->
+<!ELEMENT Seq-hist_replaces (Seq-hist-rec)>
+
+<!-- these seqs make this one obsolete -->
+<!ELEMENT Seq-hist_replaced-by (Seq-hist-rec)>
+
+<!ELEMENT Seq-hist_deleted (
+        Seq-hist_deleted_bool | 
+        Seq-hist_deleted_date)>
+
+<!ELEMENT Seq-hist_deleted_bool EMPTY>
+<!ATTLIST Seq-hist_deleted_bool value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Seq-hist_deleted_date (Date)>
+
+
+<!ELEMENT Seq-hist-rec (
+        Seq-hist-rec_date?, 
+        Seq-hist-rec_ids)>
+
+<!ELEMENT Seq-hist-rec_date (Date)>
+
+<!ELEMENT Seq-hist-rec_ids (Seq-id*)>
+
+<!--
+*** Various internal sequence representations ************
+*      all are controlled, fixed length forms
+ sequence representations
+-->
+<!ELEMENT Seq-data (
+        Seq-data_iupacna | 
+        Seq-data_iupacaa | 
+        Seq-data_ncbi2na | 
+        Seq-data_ncbi4na | 
+        Seq-data_ncbi8na | 
+        Seq-data_ncbipna | 
+        Seq-data_ncbi8aa | 
+        Seq-data_ncbieaa | 
+        Seq-data_ncbipaa | 
+        Seq-data_ncbistdaa | 
+        Seq-data_gap)>
+
+<!-- IUPAC 1 letter nuc acid code -->
+<!ELEMENT Seq-data_iupacna (IUPACna)>
+
+<!-- IUPAC 1 letter amino acid code -->
+<!ELEMENT Seq-data_iupacaa (IUPACaa)>
+
+<!-- 2 bit nucleic acid code -->
+<!ELEMENT Seq-data_ncbi2na (NCBI2na)>
+
+<!-- 4 bit nucleic acid code -->
+<!ELEMENT Seq-data_ncbi4na (NCBI4na)>
+
+<!-- 8 bit extended nucleic acid code -->
+<!ELEMENT Seq-data_ncbi8na (NCBI8na)>
+
+<!-- nucleic acid probabilities -->
+<!ELEMENT Seq-data_ncbipna (NCBIpna)>
+
+<!-- 8 bit extended amino acid codes -->
+<!ELEMENT Seq-data_ncbi8aa (NCBI8aa)>
+
+<!-- extended ASCII 1 letter aa codes -->
+<!ELEMENT Seq-data_ncbieaa (NCBIeaa)>
+
+<!-- amino acid probabilities -->
+<!ELEMENT Seq-data_ncbipaa (NCBIpaa)>
+
+<!-- consecutive codes for std aas -->
+<!ELEMENT Seq-data_ncbistdaa (NCBIstdaa)>
+
+<!-- gap types -->
+<!ELEMENT Seq-data_gap (Seq-gap)>
+
+
+<!ELEMENT Seq-gap (
+        Seq-gap_type, 
+        Seq-gap_linkage?, 
+        Seq-gap_linkage-evidence?)>
+
+<!ELEMENT Seq-gap_type (%INTEGER;)>
+
+<!--
+    fragment	-  Deprecated. Used only for AGP 1.1
+    clone	-  Deprecated. Used only for AGP 1.1
+-->
+<!ATTLIST Seq-gap_type value (
+        unknown |
+        fragment |
+        clone |
+        short-arm |
+        heterochromatin |
+        centromere |
+        telomere |
+        repeat |
+        contig |
+        scaffold |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Seq-gap_linkage (%INTEGER;)>
+<!ATTLIST Seq-gap_linkage value (
+        unlinked |
+        linked |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Seq-gap_linkage-evidence (Linkage-evidence*)>
+
+
+<!ELEMENT Linkage-evidence (
+        Linkage-evidence_type)>
+
+<!ELEMENT Linkage-evidence_type (%INTEGER;)>
+<!ATTLIST Linkage-evidence_type value (
+        paired-ends |
+        align-genus |
+        align-xgenus |
+        align-trnscpt |
+        within-clone |
+        clone-contig |
+        map |
+        strobe |
+        unspecified |
+        pcr |
+        other
+        ) #IMPLIED >
+
+
+<!-- IUPAC 1 letter codes, no spaces -->
+<!ELEMENT IUPACna (#PCDATA)>
+
+<!-- IUPAC 1 letter codes, no spaces -->
+<!ELEMENT IUPACaa (#PCDATA)>
+
+<!-- 00=A, 01=C, 10=G, 11=T -->
+<!ELEMENT NCBI2na (%OCTETS;)>
+
+<!--
+ 1 bit each for agct
+ 0001=A, 0010=C, 0100=G, 1000=T/U
+ 0101=Purine, 1010=Pyrimidine, etc
+-->
+<!ELEMENT NCBI4na (%OCTETS;)>
+
+<!-- for modified nucleic acids -->
+<!ELEMENT NCBI8na (%OCTETS;)>
+
+<!--
+ 5 octets/base, prob for a,c,g,t,n
+ probabilities are coded 0-255 = 0.0-1.0
+-->
+<!ELEMENT NCBIpna (%OCTETS;)>
+
+<!-- for modified amino acids -->
+<!ELEMENT NCBI8aa (%OCTETS;)>
+
+<!--
+ ASCII extended 1 letter aa codes
+ IUPAC codes + U=selenocysteine
+-->
+<!ELEMENT NCBIeaa (#PCDATA)>
+
+<!--
+ 25 octets/aa, prob for IUPAC aas in order:
+ A-Y,B,Z,X,(ter),anything
+ probabilities are coded 0-255 = 0.0-1.0
+-->
+<!ELEMENT NCBIpaa (%OCTETS;)>
+
+<!-- codes 0-25, 1 per byte -->
+<!ELEMENT NCBIstdaa (%OCTETS;)>
+
+<!--
+*** Sequence Annotation *************************************
+*
+ This is a replica of Textseq-id
+ This is specific for annotations, and exists to maintain a semantic
+ difference between IDs assigned to annotations and IDs assigned to
+ sequences
+-->
+<!ELEMENT Textannot-id (
+        Textannot-id_name?, 
+        Textannot-id_accession?, 
+        Textannot-id_release?, 
+        Textannot-id_version?)>
+
+<!ELEMENT Textannot-id_name (#PCDATA)>
+
+<!ELEMENT Textannot-id_accession (#PCDATA)>
+
+<!ELEMENT Textannot-id_release (#PCDATA)>
+
+<!ELEMENT Textannot-id_version (%INTEGER;)>
+
+
+<!ELEMENT Annot-id (
+        Annot-id_local | 
+        Annot-id_ncbi | 
+        Annot-id_general | 
+        Annot-id_other)>
+
+<!ELEMENT Annot-id_local (Object-id)>
+
+<!ELEMENT Annot-id_ncbi (%INTEGER;)>
+
+<!ELEMENT Annot-id_general (Dbtag)>
+<!--
+*** Sequence Annotation *************************************
+*
+ This is a replica of Textseq-id
+ This is specific for annotations, and exists to maintain a semantic
+ difference between IDs assigned to annotations and IDs assigned to
+ sequences
+-->
+<!ELEMENT Annot-id_other (Textannot-id)>
+
+
+<!ELEMENT Annot-descr (Annotdesc*)>
+
+
+<!ELEMENT Annotdesc (
+        Annotdesc_name | 
+        Annotdesc_title | 
+        Annotdesc_comment | 
+        Annotdesc_pub | 
+        Annotdesc_user | 
+        Annotdesc_create-date | 
+        Annotdesc_update-date | 
+        Annotdesc_src | 
+        Annotdesc_align | 
+        Annotdesc_region)>
+
+<!-- a short name for this collection -->
+<!ELEMENT Annotdesc_name (#PCDATA)>
+
+<!-- a title for this collection -->
+<!ELEMENT Annotdesc_title (#PCDATA)>
+
+<!-- a more extensive comment -->
+<!ELEMENT Annotdesc_comment (#PCDATA)>
+
+<!-- a reference to the publication -->
+<!ELEMENT Annotdesc_pub (Pubdesc)>
+
+<!-- user defined object -->
+<!ELEMENT Annotdesc_user (User-object)>
+
+<!-- date entry first created/released -->
+<!ELEMENT Annotdesc_create-date (Date)>
+
+<!-- date of last update -->
+<!ELEMENT Annotdesc_update-date (Date)>
+
+<!-- source sequence from which annot came -->
+<!ELEMENT Annotdesc_src (Seq-id)>
+
+<!-- definition of the SeqAligns -->
+<!ELEMENT Annotdesc_align (Align-def)>
+
+<!-- all contents cover this region -->
+<!ELEMENT Annotdesc_region (Seq-loc)>
+
+
+<!ELEMENT Align-def (
+        Align-def_align-type, 
+        Align-def_ids?)>
+<!-- class of align Seq-annot -->
+<!ELEMENT Align-def_align-type (%INTEGER;)>
+
+<!--
+    ref	-  set of alignments to the same sequence
+    alt	-  set of alternate alignments of the same seqs
+    blocks	-  set of aligned blocks in the same seqs
+-->
+<!ATTLIST Align-def_align-type value (
+        ref |
+        alt |
+        blocks |
+        other
+        ) #IMPLIED >
+
+
+<!-- used for the one ref seqid for now -->
+<!ELEMENT Align-def_ids (Seq-id*)>
+
+<!-- features in table form -->
+<!ELEMENT Seq-annot (
+        Seq-annot_id?, 
+        Seq-annot_db?, 
+        Seq-annot_name?, 
+        Seq-annot_desc?, 
+        Seq-annot_data)>
+
+<!ELEMENT Seq-annot_id (Annot-id*)>
+<!-- source of annotation -->
+<!ELEMENT Seq-annot_db (%INTEGER;)>
+<!ATTLIST Seq-annot_db value (
+        genbank |
+        embl |
+        ddbj |
+        pir |
+        sp |
+        bbone |
+        pdb |
+        other
+        ) #IMPLIED >
+
+
+<!-- source if "other" above -->
+<!ELEMENT Seq-annot_name (#PCDATA)>
+
+<!-- used only for stand alone Seq-annots -->
+<!ELEMENT Seq-annot_desc (Annot-descr)>
+
+<!ELEMENT Seq-annot_data (
+        Seq-annot_data_ftable | 
+        Seq-annot_data_align | 
+        Seq-annot_data_graph | 
+        Seq-annot_data_ids | 
+        Seq-annot_data_locs | 
+        Seq-annot_data_seq-table)>
+
+<!ELEMENT Seq-annot_data_ftable (Seq-feat*)>
+
+<!ELEMENT Seq-annot_data_align (Seq-align*)>
+
+<!ELEMENT Seq-annot_data_graph (Seq-graph*)>
+
+<!-- used for communication between tools -->
+<!ELEMENT Seq-annot_data_ids (Seq-id*)>
+
+<!-- used for communication between tools -->
+<!ELEMENT Seq-annot_data_locs (Seq-loc*)>
+
+<!ELEMENT Seq-annot_data_seq-table (Seq-table)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Submit.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Submit.dtd
new file mode 100644
index 0000000..b81b188
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Submit.dtd
@@ -0,0 +1,92 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "submit.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Submit.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Seqset_module PUBLIC "-//NCBI//NCBI Seqset Module//EN" "NCBI_Seqset.mod.dtd">
+%NCBI_Seqset_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_Submit_module PUBLIC "-//NCBI//NCBI Submit Module//EN" "NCBI_Submit.mod.dtd">
+%NCBI_Submit_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Submit.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Submit.mod.dtd
new file mode 100644
index 0000000..64885f0
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Submit.mod.dtd
@@ -0,0 +1,156 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "submit.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Submit"
+================================================= -->
+
+<!--
+$Revision: 6.1 $
+********************************************************************
+
+  Direct Submission of Sequence Data
+  James Ostell, 1991
+
+  This is a trial specification for direct submission of sequence
+    data worked out between NCBI and EMBL
+  Later revised to reflect work with GenBank and Integrated database
+
+  Version 3.0, 1994
+    This is the official NCBI sequence submission format now.
+
+********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Seq-submit,
+          Contact-info -->
+
+<!-- Elements referenced from other modules:
+          Cit-sub,
+          Author FROM NCBI-Biblio,
+          Date,
+          Object-id FROM NCBI-General,
+          Seq-annot FROM NCBI-Sequence,
+          Seq-id FROM NCBI-Seqloc,
+          Seq-entry FROM NCBI-Seqset -->
+<!-- ============================================ -->
+
+<!-- deletions of entries -->
+<!ELEMENT Seq-submit (
+        Seq-submit_sub, 
+        Seq-submit_data)>
+
+<!ELEMENT Seq-submit_sub (Submit-block)>
+
+<!ELEMENT Seq-submit_data (
+        Seq-submit_data_entrys | 
+        Seq-submit_data_annots | 
+        Seq-submit_data_delete)>
+
+<!-- sequence(s) -->
+<!ELEMENT Seq-submit_data_entrys (Seq-entry*)>
+
+<!-- annotation(s) -->
+<!ELEMENT Seq-submit_data_annots (Seq-annot*)>
+
+<!ELEMENT Seq-submit_data_delete (Seq-id*)>
+
+
+<!ELEMENT Submit-block (
+        Submit-block_contact, 
+        Submit-block_cit, 
+        Submit-block_hup?, 
+        Submit-block_reldate?, 
+        Submit-block_subtype?, 
+        Submit-block_tool?, 
+        Submit-block_user-tag?, 
+        Submit-block_comment?)>
+
+<!-- who to contact -->
+<!ELEMENT Submit-block_contact (Contact-info)>
+
+<!-- citation for this submission -->
+<!ELEMENT Submit-block_cit (Cit-sub)>
+
+<!-- hold until publish -->
+<!ELEMENT Submit-block_hup EMPTY>
+<!ATTLIST Submit-block_hup value ( true | false ) "false" >
+
+
+<!-- release by date -->
+<!ELEMENT Submit-block_reldate (Date)>
+<!-- type of submission -->
+<!ELEMENT Submit-block_subtype (%INTEGER;)>
+
+<!--
+    new	-  new data
+    update	-  update by author
+    revision	-  3rd party (non-author) update
+-->
+<!ATTLIST Submit-block_subtype value (
+        new |
+        update |
+        revision |
+        other
+        ) #IMPLIED >
+
+
+<!-- tool used to make submission -->
+<!ELEMENT Submit-block_tool (#PCDATA)>
+
+<!-- user supplied id for this submission -->
+<!ELEMENT Submit-block_user-tag (#PCDATA)>
+
+<!-- user comments/advice to database -->
+<!ELEMENT Submit-block_comment (#PCDATA)>
+
+<!-- who to contact to discuss the submission -->
+<!ELEMENT Contact-info (
+        Contact-info_name?, 
+        Contact-info_address?, 
+        Contact-info_phone?, 
+        Contact-info_fax?, 
+        Contact-info_email?, 
+        Contact-info_telex?, 
+        Contact-info_owner-id?, 
+        Contact-info_password?, 
+        Contact-info_last-name?, 
+        Contact-info_first-name?, 
+        Contact-info_middle-initial?, 
+        Contact-info_contact?)>
+
+<!-- OBSOLETE: will be removed -->
+<!ELEMENT Contact-info_name (#PCDATA)>
+
+<!ELEMENT Contact-info_address (Contact-info_address_E*)>
+
+
+<!ELEMENT Contact-info_address_E (#PCDATA)>
+
+<!ELEMENT Contact-info_phone (#PCDATA)>
+
+<!ELEMENT Contact-info_fax (#PCDATA)>
+
+<!ELEMENT Contact-info_email (#PCDATA)>
+
+<!ELEMENT Contact-info_telex (#PCDATA)>
+
+<!-- for owner accounts -->
+<!ELEMENT Contact-info_owner-id (Object-id)>
+
+<!ELEMENT Contact-info_password (%OCTETS;)>
+
+<!-- structured to replace name above -->
+<!ELEMENT Contact-info_last-name (#PCDATA)>
+
+<!ELEMENT Contact-info_first-name (#PCDATA)>
+
+<!ELEMENT Contact-info_middle-initial (#PCDATA)>
+
+<!-- WARNING: this will replace the above -->
+<!ELEMENT Contact-info_contact (Author)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Systems.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Systems.dtd
new file mode 100644
index 0000000..d1f8c2a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Systems.dtd
@@ -0,0 +1,86 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "systems.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.3
+     ::DATATOOL:: on 09/17/2009 11:55:41
+     ============================================ -->
+
+<!-- NCBI_Systems.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_Systems_module PUBLIC "-//NCBI//NCBI Systems Module//EN" "NCBI_Systems.mod.dtd">
+%NCBI_Systems_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_TSeq.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_TSeq.dtd
new file mode 100644
index 0000000..9463557
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_TSeq.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "tinyseq.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_TSeq.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_TSeq_module PUBLIC "-//NCBI//NCBI TSeq Module//EN" "NCBI_TSeq.mod.dtd">
+%NCBI_TSeq_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_TSeq.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_TSeq.mod.dtd
new file mode 100644
index 0000000..e7fc3f4
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_TSeq.mod.dtd
@@ -0,0 +1,66 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "tinyseq.asn"
+     ::DATATOOL:: by application DATATOOL version 1.8.1
+     ::DATATOOL:: on 01/18/2007 23:07:18
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-TSeq"
+================================================= -->
+
+<!--
+$Revision: 6.1 $
+**********************************************************************
+
+  ASN.1 for a tiny Bioseq in XML
+    basically a structured FASTA file with a few extras
+    in this case we drop all modularity of components
+      All ids are Optional - simpler structure, less checking
+      Components of organism are hard coded - can't easily add or change
+      sequence is just string whether DNA or protein
+  by James Ostell, 2000
+
+**********************************************************************
+-->
+
+
+<!ELEMENT TSeq (
+        TSeq_seqtype, 
+        TSeq_gi?, 
+        TSeq_accver?, 
+        TSeq_sid?, 
+        TSeq_local?, 
+        TSeq_taxid?, 
+        TSeq_orgname?, 
+        TSeq_defline, 
+        TSeq_length, 
+        TSeq_sequence)>
+
+<!ELEMENT TSeq_seqtype %ENUM;>
+<!ATTLIST TSeq_seqtype value (
+        nucleotide |
+        protein
+        ) #REQUIRED >
+
+
+<!ELEMENT TSeq_gi (%INTEGER;)>
+
+<!ELEMENT TSeq_accver (#PCDATA)>
+
+<!ELEMENT TSeq_sid (#PCDATA)>
+
+<!ELEMENT TSeq_local (#PCDATA)>
+
+<!ELEMENT TSeq_taxid (%INTEGER;)>
+
+<!ELEMENT TSeq_orgname (#PCDATA)>
+
+<!ELEMENT TSeq_defline (#PCDATA)>
+
+<!ELEMENT TSeq_length (%INTEGER;)>
+
+<!ELEMENT TSeq_sequence (#PCDATA)>
+
+<!-- a bunch of them -->
+<!ELEMENT TSeqSet (TSeq*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_TxInit.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_TxInit.dtd
new file mode 100644
index 0000000..fe9a6c1
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_TxInit.dtd
@@ -0,0 +1,29 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NCBI_TxInit.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_TxInit.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_TxInit.mod.dtd
new file mode 100644
index 0000000..d5d97c1
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_TxInit.mod.dtd
@@ -0,0 +1,184 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-TxInit"
+================================================= -->
+
+<!--
+********************************************************************
+
+  Transcription Initiation Site Feature Data Block
+  James Ostell, 1991
+  Philip Bucher, David Ghosh
+  version 1.1
+
+
+
+********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          Txinit -->
+
+<!-- Elements referenced from other modules:
+          Gene-ref FROM NCBI-Gene,
+          Prot-ref FROM NCBI-Protein,
+          Org-ref FROM NCBI-Organism -->
+<!-- ============================================ -->
+
+
+<!ELEMENT Txinit (
+        Txinit_name, 
+        Txinit_syn?, 
+        Txinit_gene?, 
+        Txinit_protein?, 
+        Txinit_rna?, 
+        Txinit_expression?, 
+        Txinit_txsystem, 
+        Txinit_txdescr?, 
+        Txinit_txorg?, 
+        Txinit_mapping-precise?, 
+        Txinit_location-accurate?, 
+        Txinit_inittype?, 
+        Txinit_evidence?)>
+
+<!-- descriptive name of initiation site -->
+<!ELEMENT Txinit_name (#PCDATA)>
+
+<!-- synonyms -->
+<!ELEMENT Txinit_syn (Txinit_syn_E*)>
+
+
+<!ELEMENT Txinit_syn_E (#PCDATA)>
+
+<!-- gene(s) transcribed -->
+<!ELEMENT Txinit_gene (Gene-ref*)>
+
+<!-- protein(s) produced -->
+<!ELEMENT Txinit_protein (Prot-ref*)>
+
+<!-- rna(s) produced -->
+<!ELEMENT Txinit_rna (Txinit_rna_E*)>
+
+
+<!ELEMENT Txinit_rna_E (#PCDATA)>
+
+<!-- tissue/time of expression -->
+<!ELEMENT Txinit_expression (#PCDATA)>
+<!-- transcription apparatus used at this site -->
+<!ELEMENT Txinit_txsystem %ENUM;>
+
+<!--
+    pol1	-  eukaryotic Pol I
+    pol2	-  eukaryotic Pol II
+    pol3	-  eukaryotic Pol III
+    rna	-  RNA replicase
+-->
+<!ATTLIST Txinit_txsystem value (
+        unknown |
+        pol1 |
+        pol2 |
+        pol3 |
+        bacterial |
+        viral |
+        rna |
+        organelle |
+        other
+        ) #REQUIRED >
+
+
+<!-- modifiers on txsystem -->
+<!ELEMENT Txinit_txdescr (#PCDATA)>
+
+<!-- organism supplying transcription apparatus -->
+<!ELEMENT Txinit_txorg (Org-ref)>
+
+<!-- mapping precise or approx -->
+<!ELEMENT Txinit_mapping-precise EMPTY>
+<!ATTLIST Txinit_mapping-precise value ( true | false ) "false" >
+
+
+<!-- does Seq-loc reflect mapping -->
+<!ELEMENT Txinit_location-accurate EMPTY>
+<!ATTLIST Txinit_location-accurate value ( true | false ) "false" >
+
+
+<!ELEMENT Txinit_inittype %ENUM;>
+<!ATTLIST Txinit_inittype value (
+        unknown |
+        single |
+        multiple |
+        region
+        ) #REQUIRED >
+
+
+<!ELEMENT Txinit_evidence (Tx-evidence*)>
+
+
+<!ELEMENT Tx-evidence (
+        Tx-evidence_exp-code, 
+        Tx-evidence_expression-system?, 
+        Tx-evidence_low-prec-data?, 
+        Tx-evidence_from-homolog?)>
+
+<!ELEMENT Tx-evidence_exp-code %ENUM;>
+
+<!--
+    rna-seq	-  direct RNA sequencing
+    rna-size	-  RNA length measurement
+    np-map	-  nuclease protection mapping with homologous sequence ladder
+    np-size	-  nuclease protected fragment length measurement
+    pe-seq	-  dideoxy RNA sequencing
+    cDNA-seq	-  full-length cDNA sequencing
+    pe-map	-  primer extension mapping with homologous sequence ladder
+    pe-size	-  primer extension product length measurement
+    pseudo-seq	-  full-length processed pseudogene sequencing
+    rev-pe-map	-  see NOTE (1) below
+-->
+<!ATTLIST Tx-evidence_exp-code value (
+        unknown |
+        rna-seq |
+        rna-size |
+        np-map |
+        np-size |
+        pe-seq |
+        cDNA-seq |
+        pe-map |
+        pe-size |
+        pseudo-seq |
+        rev-pe-map |
+        other
+        ) #REQUIRED >
+
+
+<!ELEMENT Tx-evidence_expression-system %ENUM;>
+<!ATTLIST Tx-evidence_expression-system value (
+        unknown |
+        physiological |
+        in-vitro |
+        oocyte |
+        transfection |
+        transgenic |
+        other
+        ) #REQUIRED >
+
+
+<!ELEMENT Tx-evidence_low-prec-data EMPTY>
+<!ATTLIST Tx-evidence_low-prec-data value ( true | false ) "false" >
+
+
+<!-- experiment actually done on -->
+<!ELEMENT Tx-evidence_from-homolog EMPTY>
+<!ATTLIST Tx-evidence_from-homolog value ( true | false ) "false" >
+<!--
+  close homolog
+ NOTE (1) length measurement of a reverse direction primer-extension
+          product (blocked  by  RNA  5'end)  by  comparison with
+          homologous sequence ladder (J. Mol. Biol. 199, 587)
+-->
+
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Variation.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Variation.dtd
new file mode 100644
index 0000000..7a992b1
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Variation.dtd
@@ -0,0 +1,86 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- NCBI_Variation.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_Variation.mod.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_Variation.mod.dtd
new file mode 100644
index 0000000..ee7f020
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_Variation.mod.dtd
@@ -0,0 +1,944 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqfeat.asn"
+     ::DATATOOL:: by application DATATOOL version 2.3.1
+     ::DATATOOL:: on 12/01/2011 23:05:19
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NCBI-Variation"
+================================================= -->
+
+<!--
+*** Variation-ref ***********************************************
+*
+*  Specification of variation features
+*
+-->
+
+<!-- Elements used by other modules:
+          Variation-ref,
+          Variation-inst,
+          VariantProperties,
+          Population-data,
+          Phenotype -->
+
+<!-- Elements referenced from other modules:
+          Int-fuzz,
+          User-object,
+          Object-id,
+          Dbtag FROM NCBI-General,
+          Seq-literal FROM NCBI-Sequence,
+          SubSource FROM NCBI-BioSource,
+          Seq-loc FROM NCBI-Seqloc,
+          Pub FROM NCBI-Pub -->
+<!-- ============================================ -->
+
+<!--
+ 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Historically, the dbSNP definitions document data structures used in the
+ processing and annotation of variations by the dbSNP group.  The intention
+ is to provide information to clients that reflect internal information
+ produced during the mapping of SNPs
+ 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-->
+<!ELEMENT VariantProperties (
+        VariantProperties_version, 
+        VariantProperties_resource-link?, 
+        VariantProperties_gene-location?, 
+        VariantProperties_effect?, 
+        VariantProperties_mapping?, 
+        VariantProperties_map-weight?, 
+        VariantProperties_frequency-based-validation?, 
+        VariantProperties_genotype?, 
+        VariantProperties_project-data?, 
+        VariantProperties_quality-check?, 
+        VariantProperties_confidence?, 
+        VariantProperties_other-validation?, 
+        VariantProperties_allele-origin?, 
+        VariantProperties_allele-state?, 
+        VariantProperties_allele-frequency?, 
+        VariantProperties_is-ancestral-allele?)>
+
+<!ELEMENT VariantProperties_version (%INTEGER;)>
+<!--
+ NOTE:
+ The format for most of these values is as an integer
+ Unless otherwise noted, these integers represent a bitwise OR (= simple
+ sum) of the possible values, and as such, these values represent the
+ specific bit flags that may be set for each of the possible attributes
+ here.
+-->
+<!ELEMENT VariantProperties_resource-link (%INTEGER;)>
+
+<!--
+    preserved	-  Clinical, Pubmed, Cited, (0x01)
+    provisional	-  Provisional Third Party Annotations (0x02)
+    has3D	-  Has 3D strcture SNP3D table (0x04)
+    submitterLinkout	-  SNP->SubSNP->Batch link_out (0x08)
+    clinical	-  Clinical if LSDB, OMIM, TPA, Diagnostic (0x10)
+    genotypeKit	-  Marker exists on high density genotyping kit
+         (0x20)
+-->
+<!ATTLIST VariantProperties_resource-link value (
+        preserved |
+        provisional |
+        has3D |
+        submitterLinkout |
+        clinical |
+        genotypeKit
+        ) #IMPLIED >
+
+
+<!ELEMENT VariantProperties_gene-location (%INTEGER;)>
+
+<!--
+    in-gene	-  Sequence intervals covered by a gene ID but not
+         having an aligned transcript (0x01)
+    near-gene-5	-  Within 2kb of the 5' end of a gene feature
+    near-gene-3	-  Within 0.5kb of the 3' end of a gene feature
+    intron	-  In Intron (0x08)
+    donor	-  In donor splice-site (0x10)
+    acceptor	-  In acceptor splice-site (0x20)
+    utr-5	-  In 5' UTR (0x40)
+    utr-3	-  In 3' UTR (0x80)
+    in-start-codon	-  the variant is observed in a start codon
+         (0x100)
+    in-stop-codon	-  the variant is observed in a stop codon
+         (0x200)
+    intergenic	-  variant located between genes (0x400)
+    conserved-noncoding	-  variant is located in a conserved
+         non-coding region (0x800)
+-->
+<!ATTLIST VariantProperties_gene-location value (
+        in-gene |
+        near-gene-5 |
+        near-gene-3 |
+        intron |
+        donor |
+        acceptor |
+        utr-5 |
+        utr-3 |
+        in-start-codon |
+        in-stop-codon |
+        intergenic |
+        conserved-noncoding
+        ) #IMPLIED >
+
+
+<!ELEMENT VariantProperties_effect (%INTEGER;)>
+
+<!--
+    no-change	-  known to cause no functional changes
+         since 0 does not combine with any other bit
+         value, 'no-change' specifically implies that
+         there are no consequences
+    synonymous	-  one allele in the set does not change the encoded
+         amino acid (0x1)
+    nonsense	-  one allele in the set changes to STOP codon
+         (TER).  (0x2)
+    missense	-  one allele in the set changes protein peptide
+         (0x4)
+    frameshift	-  one allele in the set changes all downstream
+         amino acids (0x8)
+    up-regulator	-  the variant causes increased transcription
+         (0x10)
+    down-regulator	-  the variant causes decreased transcription
+         (0x20)
+    stop-gain	-  reference codon is not stop codon, but the snp
+         variant allele changes the codon to a
+         terminating codon.
+    stop-loss	-  reverse of STOP-GAIN: reference codon is a
+         stop codon, but a snp variant allele changes
+         the codon to a non-terminating codon.
+-->
+<!ATTLIST VariantProperties_effect value (
+        no-change |
+        synonymous |
+        nonsense |
+        missense |
+        frameshift |
+        up-regulator |
+        down-regulator |
+        methylation |
+        stop-gain |
+        stop-loss
+        ) #IMPLIED >
+
+
+<!ELEMENT VariantProperties_mapping (%INTEGER;)>
+
+<!--
+    has-other-snp	-  Another SNP has the same mapped positions
+         on reference assembly (0x01)
+    has-assembly-conflict	-  Weight 1 or 2 SNPs that map to different
+         chromosomes on different assemblies (0x02)
+    is-assembly-specific	-  Only maps to 1 assembly (0x04)
+-->
+<!ATTLIST VariantProperties_mapping value (
+        has-other-snp |
+        has-assembly-conflict |
+        is-assembly-specific
+        ) #IMPLIED >
+
+<!--
+ map-weight captures specificity of placement
+ NOTE: This is *NOT* a bitfield
+-->
+<!ELEMENT VariantProperties_map-weight (%INTEGER;)>
+<!ATTLIST VariantProperties_map-weight value (
+        is-uniquely-placed |
+        placed-twice-on-same-chrom |
+        placed-twice-on-diff-chrom |
+        many-placements
+        ) #IMPLIED >
+
+
+<!ELEMENT VariantProperties_frequency-based-validation (%INTEGER;)>
+
+<!--
+    is-mutation	-  low frequency variation that is cited in
+         journal or other reputable sources (0x01)
+    above-5pct-all	-  >5% minor allele freq in each and all
+         populations (0x02)
+    above-5pct-1plus	-  >5% minor allele freq in 1+ populations (0x04)
+    validated	-  Bit is set if the variant has a minor allele
+         observed in two or more separate chromosomes
+    above-1pct-all	-  >1% minor allele freq in each and all
+         populations (0x10)
+    above-1pct-1plus	-  >1% minor allele freq in 1+ populations (0x20)
+-->
+<!ATTLIST VariantProperties_frequency-based-validation value (
+        is-mutation |
+        above-5pct-all |
+        above-5pct-1plus |
+        validated |
+        above-1pct-all |
+        above-1pct-1plus
+        ) #IMPLIED >
+
+
+<!ELEMENT VariantProperties_genotype (%INTEGER;)>
+
+<!--
+    in-haplotype-set	-  Exists in a haplotype tagging set (0x01)
+    has-genotypes	-  SNP has individual genotype (0x02)
+-->
+<!ATTLIST VariantProperties_genotype value (
+        in-haplotype-set |
+        has-genotypes
+        ) #IMPLIED >
+
+
+<!--
+ project IDs are IDs from BioProjects
+ in order to report information about project relationships, we
+ require projects to be registered
+ This field in many ways duplicates dbxrefs; however, the
+ intention of this field is to more adequately reflect
+ ownership and data source
+
+ 11/9/2010: DO NOT USE
+ This field was changed in the spec in a breaking way; using it will
+ break clients.  We are officially suppressing / abandoning this field.
+ Clients who need to use this should instead place the data in
+ Seq-feat.dbxref, using the db name 'BioProject'
+-->
+<!ELEMENT VariantProperties_project-data (VariantProperties_project-data_E*)>
+
+
+<!ELEMENT VariantProperties_project-data_E (%INTEGER;)>
+
+<!ELEMENT VariantProperties_quality-check (%INTEGER;)>
+
+<!--
+    contig-allele-missing	-  Reference sequence allele at the mapped
+         position is not present in the SNP
+         allele list, adjusted for orientation
+         (0x01)
+    withdrawn-by-submitter	-  One member SS is withdrawn by submitter
+         (0x02)
+    non-overlapping-alleles	-  RS set has 2+ alleles from different
+         submissions and these sets share no
+         alleles in common (0x04)
+    strain-specific	-  Straing specific fixed difference (0x08)
+    genotype-conflict	-  Has Genotype Conflict (0x10)
+-->
+<!ATTLIST VariantProperties_quality-check value (
+        contig-allele-missing |
+        withdrawn-by-submitter |
+        non-overlapping-alleles |
+        strain-specific |
+        genotype-conflict
+        ) #IMPLIED >
+
+
+<!ELEMENT VariantProperties_confidence (%INTEGER;)>
+<!ATTLIST VariantProperties_confidence value (
+        unknown |
+        likely-artifact |
+        other
+        ) #IMPLIED >
+
+
+<!--
+ has this variant been validated?
+ While a boolean flag offers no subtle distinctions of validation
+ methods, occasionally it is only known as a single boolean value
+ NOTE: this flag is redundant and should be omitted if more comprehensive
+ validation information is present
+-->
+<!ELEMENT VariantProperties_other-validation EMPTY>
+<!ATTLIST VariantProperties_other-validation value ( true | false ) #REQUIRED >
+
+<!--
+ origin of this allele, if known
+ note that these are powers-of-two, and represent bits; thus, we can
+ represent more than one state simultaneously through a bitwise OR
+-->
+<!ELEMENT VariantProperties_allele-origin (%INTEGER;)>
+
+<!--
+    other	-  stopper - 2^31
+-->
+<!ATTLIST VariantProperties_allele-origin value (
+        unknown |
+        germline |
+        somatic |
+        inherited |
+        paternal |
+        maternal |
+        de-novo |
+        biparental |
+        uniparental |
+        not-tested |
+        tested-inconclusive |
+        not-reported |
+        other
+        ) #IMPLIED >
+
+<!--
+ observed allele state, if known
+ NOTE: THIS IS NOT A BITFIELD!
+-->
+<!ELEMENT VariantProperties_allele-state (%INTEGER;)>
+<!ATTLIST VariantProperties_allele-state value (
+        unknown |
+        homozygous |
+        heterozygous |
+        hemizygous |
+        nullizygous |
+        other
+        ) #IMPLIED >
+
+
+<!--
+ NOTE:
+ 'allele-frequency' here refers to the minor allele frequency of the
+ default population
+-->
+<!ELEMENT VariantProperties_allele-frequency (%REAL;)>
+
+<!-- is this variant the ancestral allele? -->
+<!ELEMENT VariantProperties_is-ancestral-allele EMPTY>
+<!ATTLIST VariantProperties_is-ancestral-allele value ( true | false ) #REQUIRED >
+
+
+
+<!ELEMENT Phenotype (
+        Phenotype_source?, 
+        Phenotype_term?, 
+        Phenotype_xref?, 
+        Phenotype_clinical-significance?)>
+
+<!ELEMENT Phenotype_source (#PCDATA)>
+
+<!ELEMENT Phenotype_term (#PCDATA)>
+
+<!ELEMENT Phenotype_xref (Dbtag*)>
+<!-- does this variant have known clinical significance? -->
+<!ELEMENT Phenotype_clinical-significance (%INTEGER;)>
+<!ATTLIST Phenotype_clinical-significance value (
+        unknown |
+        untested |
+        non-pathogenic |
+        probable-non-pathogenic |
+        probable-pathogenic |
+        pathogenic |
+        drug-response |
+        histocompatibility |
+        other
+        ) #IMPLIED >
+
+
+
+<!ELEMENT Population-data (
+        Population-data_population, 
+        Population-data_genotype-frequency?, 
+        Population-data_chromosomes-tested?, 
+        Population-data_sample-ids?, 
+        Population-data_allele-frequency?, 
+        Population-data_flags?)>
+
+<!-- assayed population (e.g. HAPMAP-CEU) -->
+<!ELEMENT Population-data_population (#PCDATA)>
+
+<!ELEMENT Population-data_genotype-frequency (%REAL;)>
+
+<!ELEMENT Population-data_chromosomes-tested (%INTEGER;)>
+
+<!ELEMENT Population-data_sample-ids (Object-id*)>
+
+<!ELEMENT Population-data_allele-frequency (%REAL;)>
+<!--
+ This field is an explicit bit-field
+ Valid values should be a bitwise combination (= simple sum)
+ of any of the values below
+-->
+<!ELEMENT Population-data_flags (%INTEGER;)>
+<!ATTLIST Population-data_flags value (
+        is-default-population |
+        is-minor-allele |
+        is-rare-allele
+        ) #IMPLIED >
+
+
+
+<!ELEMENT Ext-loc (
+        Ext-loc_id, 
+        Ext-loc_location)>
+
+<!ELEMENT Ext-loc_id (Object-id)>
+
+<!ELEMENT Ext-loc_location (Seq-loc)>
+
+
+<!ELEMENT Variation-ref (
+        Variation-ref_id?, 
+        Variation-ref_parent-id?, 
+        Variation-ref_sample-id?, 
+        Variation-ref_other-ids?, 
+        Variation-ref_name?, 
+        Variation-ref_synonyms?, 
+        Variation-ref_description?, 
+        Variation-ref_phenotype?, 
+        Variation-ref_method?, 
+        Variation-ref_population-data?, 
+        Variation-ref_variant-prop?, 
+        Variation-ref_validated?, 
+        Variation-ref_clinical-test?, 
+        Variation-ref_allele-origin?, 
+        Variation-ref_allele-state?, 
+        Variation-ref_allele-frequency?, 
+        Variation-ref_is-ancestral-allele?, 
+        Variation-ref_pub?, 
+        Variation-ref_data, 
+        Variation-ref_consequence?, 
+        Variation-ref_location?, 
+        Variation-ref_ext-locs?, 
+        Variation-ref_ext?, 
+        Variation-ref_somatic-origin?)>
+
+<!--
+ ids (i.e., SNP rsid / ssid, dbVar nsv/nssv)
+ expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1'
+
+ we relate three kinds of IDs here:
+  - our current object's id
+  - the id of this object's parent, if it exists
+  - the sample ID that this item originates from
+-->
+<!ELEMENT Variation-ref_id (Dbtag)>
+
+<!ELEMENT Variation-ref_parent-id (Dbtag)>
+
+<!ELEMENT Variation-ref_sample-id (Object-id)>
+
+<!ELEMENT Variation-ref_other-ids (Dbtag*)>
+
+<!--
+ names and synonyms
+ some variants have well-known canonical names and possible accepted
+ synonyms
+-->
+<!ELEMENT Variation-ref_name (#PCDATA)>
+
+<!ELEMENT Variation-ref_synonyms (Variation-ref_synonyms_E*)>
+
+
+<!ELEMENT Variation-ref_synonyms_E (#PCDATA)>
+
+<!-- tag for comment and descriptions -->
+<!ELEMENT Variation-ref_description (#PCDATA)>
+
+<!-- phenotype -->
+<!ELEMENT Variation-ref_phenotype (Phenotype*)>
+<!-- sequencing / acuisition method -->
+<!ELEMENT Variation-ref_method (Variation-ref_method_E*)>
+
+
+<!ELEMENT Variation-ref_method_E (%INTEGER;)>
+<!ATTLIST Variation-ref_method_E value (
+        unknown |
+        bac-acgh |
+        computational |
+        curated |
+        digital-array |
+        expression-array |
+        fish |
+        flanking-sequence |
+        maph |
+        mcd-analysis |
+        mlpa |
+        oea-assembly |
+        oligo-acgh |
+        paired-end |
+        pcr |
+        qpcr |
+        read-depth |
+        roma |
+        rt-pcr |
+        sage |
+        sequence-alignment |
+        sequencing |
+        snp-array |
+        snp-genoytyping |
+        southern |
+        western |
+        optical-mapping |
+        other
+        ) #IMPLIED >
+
+
+<!--
+ Note about SNP representation and pretinent fields: allele-frequency,
+ population, quality-codes:
+ The case of multiple alleles for a SNP would be described by
+ parent-feature of type Variation-set.diff-alleles, where the child
+ features of type Variation-inst, all at the same location, would
+ describe individual alleles.
+ population data
+ DEPRECATED - do not use
+-->
+<!ELEMENT Variation-ref_population-data (Population-data*)>
+
+<!-- variant properties bit fields -->
+<!ELEMENT Variation-ref_variant-prop (VariantProperties)>
+
+<!--
+ has this variant been validated?
+ DEPRECATED: new field = VariantProperties.other-validation
+-->
+<!ELEMENT Variation-ref_validated EMPTY>
+<!ATTLIST Variation-ref_validated value ( true | false ) #REQUIRED >
+
+
+<!--
+ link-outs to GeneTests database
+ DEPRECATED - do not use
+-->
+<!ELEMENT Variation-ref_clinical-test (Dbtag*)>
+<!--
+ origin of this allele, if known
+ note that these are powers-of-two, and represent bits; thus, we can
+ represent more than one state simultaneously through a bitwise OR
+ DEPRECATED: new field = VariantProperties.allele-origin
+-->
+<!ELEMENT Variation-ref_allele-origin (%INTEGER;)>
+
+<!--
+    other	-  stopper - 2^31
+-->
+<!ATTLIST Variation-ref_allele-origin value (
+        unknown |
+        germline |
+        somatic |
+        inherited |
+        paternal |
+        maternal |
+        de-novo |
+        biparental |
+        uniparental |
+        not-tested |
+        tested-inconclusive |
+        other
+        ) #IMPLIED >
+
+<!--
+ observed allele state, if known
+ DEPRECATED: new field = VariantProperties.allele-state
+-->
+<!ELEMENT Variation-ref_allele-state (%INTEGER;)>
+<!ATTLIST Variation-ref_allele-state value (
+        unknown |
+        homozygous |
+        heterozygous |
+        hemizygous |
+        nullizygous |
+        other
+        ) #IMPLIED >
+
+
+<!--
+ NOTE:
+ 'allele-frequency' here refers to the minor allele frequency of the
+ default population
+ DEPRECATED: new field = VariantProperties.allele-frequency
+-->
+<!ELEMENT Variation-ref_allele-frequency (%REAL;)>
+
+<!--
+ is this variant the ancestral allele?
+ DEPRECATED: new field = VariantProperties.is-ancestral-allele
+-->
+<!ELEMENT Variation-ref_is-ancestral-allele EMPTY>
+<!ATTLIST Variation-ref_is-ancestral-allele value ( true | false ) #REQUIRED >
+
+
+<!--
+ publication support.
+ Note: made this pub instead of pub-equiv, since
+ Pub can be pub-equiv and pub-equiv is a set of pubs, but it looks like
+ Pub is more often used as top-level container
+ DEPRECATED - do not use; use Seq-feat.dbxref instead
+-->
+<!ELEMENT Variation-ref_pub (Pub)>
+
+<!ELEMENT Variation-ref_data (
+        Variation-ref_data_unknown | 
+        Variation-ref_data_note | 
+        Variation-ref_data_uniparental-disomy | 
+        Variation-ref_data_instance | 
+        Variation-ref_data_set | 
+        Variation-ref_data_complex)>
+
+<!ELEMENT Variation-ref_data_unknown EMPTY>
+
+<!--free-form -->
+<!ELEMENT Variation-ref_data_note (#PCDATA)>
+
+<!ELEMENT Variation-ref_data_uniparental-disomy EMPTY>
+
+<!-- actual sequence-edit at feat.location -->
+<!ELEMENT Variation-ref_data_instance (Variation-inst)>
+<!--
+ Set of related Variations.
+ Location of the set equals to the union of member locations
+-->
+<!ELEMENT Variation-ref_data_set (
+        Variation-ref_data_set_type, 
+        Variation-ref_data_set_variations, 
+        Variation-ref_data_set_name?)>
+
+<!ELEMENT Variation-ref_data_set_type (%INTEGER;)>
+
+<!--
+    compound	-  complex change at the same location on the
+         same molecule
+    products	-  different products arising from the same
+         variation in a precursor, e.g. r.[13g>a,
+         13_88del]
+    haplotype	-  changes on the same allele, e.g
+         r.[13g>a;15u>c]
+    genotype	-  changes on different alleles in the same
+         genotype, e.g. g.[476C>T]+[476C>T]
+    mosaic	-  different genotypes in the same individual
+    individual	-  same organism; allele relationship unknown,
+         e.g. g.[476C>T(+)183G>C]
+    population	-  population
+    alleles	-  set represents a set of observed alleles
+    package	-  set represents a package of observations at
+         a given location, generally containing
+         asserted + reference
+-->
+<!ATTLIST Variation-ref_data_set_type value (
+        unknown |
+        compound |
+        products |
+        haplotype |
+        genotype |
+        mosaic |
+        individual |
+        population |
+        alleles |
+        package |
+        other
+        ) #IMPLIED >
+
+
+<!ELEMENT Variation-ref_data_set_variations (Variation-ref*)>
+
+<!ELEMENT Variation-ref_data_set_name (#PCDATA)>
+
+<!--
+ variant is a complex and undescribed change at the location
+ This type of variant is known to occur in dbVar submissions
+-->
+<!ELEMENT Variation-ref_data_complex EMPTY>
+
+<!ELEMENT Variation-ref_consequence (Variation-ref_consequence_E*)>
+
+
+<!ELEMENT Variation-ref_consequence_E (
+        Variation-ref_consequence_E_unknown | 
+        Variation-ref_consequence_E_splicing | 
+        Variation-ref_consequence_E_note | 
+        Variation-ref_consequence_E_variation | 
+        Variation-ref_consequence_E_frameshift | 
+        Variation-ref_consequence_E_loss-of-heterozygosity)>
+
+<!ELEMENT Variation-ref_consequence_E_unknown EMPTY>
+
+<!--some effect on splicing -->
+<!ELEMENT Variation-ref_consequence_E_splicing EMPTY>
+
+<!--freeform -->
+<!ELEMENT Variation-ref_consequence_E_note (#PCDATA)>
+
+<!--
+ Describe resulting variation in the product, e.g. missense,
+ nonsense, silent, neutral, etc in a protein, that arises from
+ THIS variation.
+-->
+<!ELEMENT Variation-ref_consequence_E_variation (Variation-ref)>
+<!-- see http://www.hgvs.org/mutnomen/recs-prot.html -->
+<!ELEMENT Variation-ref_consequence_E_frameshift (
+        Variation-ref_consequence_E_frameshift_phase?, 
+        Variation-ref_consequence_E_frameshift_x-length?)>
+
+<!ELEMENT Variation-ref_consequence_E_frameshift_phase (%INTEGER;)>
+
+<!ELEMENT Variation-ref_consequence_E_frameshift_x-length (%INTEGER;)>
+
+<!ELEMENT Variation-ref_consequence_E_loss-of-heterozygosity (
+        Variation-ref_consequence_E_loss-of-heterozygosity_reference?, 
+        Variation-ref_consequence_E_loss-of-heterozygosity_test?)>
+
+<!--
+ In germline comparison, it will be reference genome assembly
+ (default) or reference/normal population. In somatic mutation,
+ it will be a name of the normal tissue.
+-->
+<!ELEMENT Variation-ref_consequence_E_loss-of-heterozygosity_reference (#PCDATA)>
+
+<!-- Name of the testing subject type or the testing tissue. -->
+<!ELEMENT Variation-ref_consequence_E_loss-of-heterozygosity_test (#PCDATA)>
+
+<!--
+ Observed location, if different from the parent set or feature.location.
+ DEPRECATED - do not use
+-->
+<!ELEMENT Variation-ref_location (Seq-loc)>
+
+<!--
+ reference other locs, e.g. mapped source
+ DEPRECATED - do not use
+-->
+<!ELEMENT Variation-ref_ext-locs (Ext-loc*)>
+
+<!-- DEPRECATED - do not use; use Seq-feat.exts instead -->
+<!ELEMENT Variation-ref_ext (User-object)>
+
+<!ELEMENT Variation-ref_somatic-origin (Variation-ref_somatic-origin_E*)>
+
+
+<!ELEMENT Variation-ref_somatic-origin_E (
+        Variation-ref_somatic-origin_E_source?, 
+        Variation-ref_somatic-origin_E_condition?)>
+
+<!-- description of the somatic origin itself -->
+<!ELEMENT Variation-ref_somatic-origin_E_source (SubSource)>
+<!-- condition related to this origin's type -->
+<!ELEMENT Variation-ref_somatic-origin_E_condition (
+        Variation-ref_somatic-origin_E_condition_description?, 
+        Variation-ref_somatic-origin_E_condition_object-id?)>
+
+<!ELEMENT Variation-ref_somatic-origin_E_condition_description (#PCDATA)>
+
+<!-- reference to BioTerm / other descriptive database -->
+<!ELEMENT Variation-ref_somatic-origin_E_condition_object-id (Dbtag*)>
+
+
+<!ELEMENT Delta-item (
+        Delta-item_seq?, 
+        Delta-item_multiplier?, 
+        Delta-item_multiplier-fuzz?, 
+        Delta-item_action?)>
+
+<!ELEMENT Delta-item_seq (
+        Delta-item_seq_literal | 
+        Delta-item_seq_loc | 
+        Delta-item_seq_this)>
+
+<!ELEMENT Delta-item_seq_literal (Seq-literal)>
+
+<!ELEMENT Delta-item_seq_loc (Seq-loc)>
+
+<!--same location as variation-ref itself -->
+<!ELEMENT Delta-item_seq_this EMPTY>
+
+<!--
+ Multiplier allows representing a tandem, e.g.  ATATAT as AT*3
+ This allows describing CNV/SSR where delta=self  with a
+ multiplier which specifies the count of the repeat unit.
+assumed 1 if not specified.
+-->
+<!ELEMENT Delta-item_multiplier (%INTEGER;)>
+
+<!ELEMENT Delta-item_multiplier-fuzz (Int-fuzz)>
+
+<!ELEMENT Delta-item_action (%INTEGER;)>
+
+<!--
+    morph	-  replace len(seq) positions starting with location.start with seq
+    offset	-  go downstream by distance specified by multiplier (upstream if < 0),
+         in genomic context.
+    del-at	-  excise sequence at location
+         if multiplier is specified, delete len(location)*multiplier
+         positions downstream
+    ins-before	-  insert seq before the location.start
+-->
+<!ATTLIST Delta-item_action value (
+        morph |
+        offset |
+        del-at |
+        ins-before
+        ) #IMPLIED >
+
+
+<!-- Variation instance -->
+<!ELEMENT Variation-inst (
+        Variation-inst_type, 
+        Variation-inst_delta, 
+        Variation-inst_observation?)>
+
+<!ELEMENT Variation-inst_type (%INTEGER;)>
+
+<!--
+    unknown	-  delta=[]
+    identity	-  delta=[]
+    inv	-  delta=[del, ins.seq=
+         RevComp(variation-location)]
+    snv	-  delta=[morph of length 1]
+         NOTE: this is snV not snP; the latter
+         requires frequency-based validation to be
+         established in VariantProperties
+         the strict definition of SNP is an SNV with
+         an established population frequency of at
+         least 1% in at least 1 popuplation
+    mnp	-  delta=[morph of length >1]
+    delins	-  delta=[del, ins]
+    del	-  delta=[del]
+    ins	-  delta=[ins]
+    microsatellite	-  delta=[del, ins.seq= repeat-unit with fuzzy
+         multiplier]
+         variation-location is the microsat expansion
+         on the sequence
+    transposon	-  delta=[del, ins.seq= known donor or 'this']
+         variation-location is equiv of transposon
+         locs.
+    cnv	-  delta=[del, ins= 'this' with fuzzy
+         multiplier]
+    direct-copy	-  delta=[ins.seq= upstream location on the
+         same strand]
+    rev-direct-copy	-  delta=[ins.seq= downstream location on the
+         same strand]
+    inverted-copy	-  delta=[ins.seq= upstream location on the
+         opposite strand]
+    everted-copy	-  delta=[ins.seq= downstream location on the
+         opposite strand]
+    translocation	-  delta=like delins
+    prot-missense	-  delta=[morph of length 1]
+    prot-nonsense	-  delta=[del]; variation-location is the tail
+         of the protein being truncated
+    prot-neutral	-  delta=[morph of length 1]
+    prot-silent	-  delta=[morph of length 1, same AA as at
+         variation-location]
+    prot-other	-  delta=any
+    other	-  delta=any
+-->
+<!ATTLIST Variation-inst_type value (
+        unknown |
+        identity |
+        inv |
+        snv |
+        mnp |
+        delins |
+        del |
+        ins |
+        microsatellite |
+        transposon |
+        cnv |
+        direct-copy |
+        rev-direct-copy |
+        inverted-copy |
+        everted-copy |
+        translocation |
+        prot-missense |
+        prot-nonsense |
+        prot-neutral |
+        prot-silent |
+        prot-other |
+        other
+        ) #IMPLIED >
+
+
+<!-- Sequence that replaces the location, in biological order. -->
+<!ELEMENT Variation-inst_delta (Delta-item*)>
+<!--
+ 'observation' is used to label items in a Variation-ref package
+ This field is explicitly a bit-field, so the bitwise OR (= sum) of any
+ of the values may be observed.
+-->
+<!ELEMENT Variation-inst_observation (%INTEGER;)>
+
+<!--
+    asserted	-  inst represents the asserted base at a
+         position
+    reference	-  inst represents the reference base at the
+         position
+    variant	-  inst represent the observed variant at a
+         given position
+-->
+<!ATTLIST Variation-inst_observation value (
+        asserted |
+        reference |
+        variant
+        ) #IMPLIED >
+
+
diff --git a/code/lib/Bio/Entrez/DTDs/NCBI_all.dtd b/code/lib/Bio/Entrez/DTDs/NCBI_all.dtd
new file mode 100644
index 0000000..3ed101d
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NCBI_all.dtd
@@ -0,0 +1,202 @@
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % Docsum_3_0_module PUBLIC "-//NCBI//Docsum 3 0 Module//EN" "Docsum_3_0.mod.dtd">
+%Docsum_3_0_module;
+
+<!ENTITY % Docsum_3_1_module PUBLIC "-//NCBI//Docsum 3 1 Module//EN" "Docsum_3_1.mod.dtd">
+%Docsum_3_1_module;
+
+<!ENTITY % Docsum_3_2_module PUBLIC "-//NCBI//Docsum 3 2 Module//EN" "Docsum_3_2.mod.dtd">
+%Docsum_3_2_module;
+
+<!ENTITY % Docsum_3_3_module PUBLIC "-//NCBI//Docsum 3 3 Module//EN" "Docsum_3_3.mod.dtd">
+%Docsum_3_3_module;
+
+<!ENTITY % Docsum_3_4_module PUBLIC "-//NCBI//Docsum 3 4 Module//EN" "Docsum_3_4.mod.dtd">
+%Docsum_3_4_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % HomoloGene_module PUBLIC "-//NCBI//HomoloGene Module//EN" "HomoloGene.mod.dtd">
+%HomoloGene_module;
+
+<!ENTITY % INSD_INSDSeq_module PUBLIC "-//NCBI//INSD INSDSeq Module//EN" "INSD_INSDSeq.mod.dtd">
+%INSD_INSDSeq_module;
+
+<!ENTITY % MMDB_module PUBLIC "-//NCBI//MMDB Module//EN" "MMDB.mod.dtd">
+%MMDB_module;
+
+<!ENTITY % MMDB_Chemical_graph_module PUBLIC "-//NCBI//MMDB Chemical graph Module//EN" "MMDB_Chemical_graph.mod.dtd">
+%MMDB_Chemical_graph_module;
+
+<!ENTITY % MMDB_Features_module PUBLIC "-//NCBI//MMDB Features Module//EN" "MMDB_Features.mod.dtd">
+%MMDB_Features_module;
+
+<!ENTITY % MMDB_Structural_model_module PUBLIC "-//NCBI//MMDB Structural model Module//EN" "MMDB_Structural_model.mod.dtd">
+%MMDB_Structural_model_module;
+
+<!ENTITY % NCBI_Access_module PUBLIC "-//NCBI//NCBI Access Module//EN" "NCBI_Access.mod.dtd">
+%NCBI_Access_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_BioTree_module PUBLIC "-//NCBI//NCBI BioTree Module//EN" "NCBI_BioTree.mod.dtd">
+%NCBI_BioTree_module;
+
+<!ENTITY % NCBI_Blast4_module PUBLIC "-//NCBI//NCBI Blast4 Module//EN" "NCBI_Blast4.mod.dtd">
+%NCBI_Blast4_module;
+
+<!ENTITY % NCBI_BlastDL_module PUBLIC "-//NCBI//NCBI BlastDL Module//EN" "NCBI_BlastDL.mod.dtd">
+%NCBI_BlastDL_module;
+
+<!ENTITY % NCBI_BlastOutput_module PUBLIC "-//NCBI//NCBI BlastOutput Module//EN" "NCBI_BlastOutput.mod.dtd">
+%NCBI_BlastOutput_module;
+
+<!ENTITY % NCBI_Cdd_module PUBLIC "-//NCBI//NCBI Cdd Module//EN" "NCBI_Cdd.mod.dtd">
+%NCBI_Cdd_module;
+
+<!ENTITY % NCBI_Cn3d_module PUBLIC "-//NCBI//NCBI Cn3d Module//EN" "NCBI_Cn3d.mod.dtd">
+%NCBI_Cn3d_module;
+
+<!ENTITY % NCBI_Entrez2_module PUBLIC "-//NCBI//NCBI Entrez2 Module//EN" "NCBI_Entrez2.mod.dtd">
+%NCBI_Entrez2_module;
+
+<!ENTITY % NCBI_Entrezgene_module PUBLIC "-//NCBI//NCBI Entrezgene Module//EN" "NCBI_Entrezgene.mod.dtd">
+%NCBI_Entrezgene_module;
+
+<!ENTITY % NCBI_FeatDef_module PUBLIC "-//NCBI//NCBI FeatDef Module//EN" "NCBI_FeatDef.mod.dtd">
+%NCBI_FeatDef_module;
+
+<!ENTITY % NCBI_GBSeq_module PUBLIC "-//NCBI//NCBI GBSeq Module//EN" "NCBI_GBSeq.mod.dtd">
+%NCBI_GBSeq_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_ID1Access_module PUBLIC "-//NCBI//NCBI ID1Access Module//EN" "NCBI_ID1Access.mod.dtd">
+%NCBI_ID1Access_module;
+
+<!ENTITY % NCBI_ID2Access_module PUBLIC "-//NCBI//NCBI ID2Access Module//EN" "NCBI_ID2Access.mod.dtd">
+%NCBI_ID2Access_module;
+
+<!ENTITY % NCBI_MedArchive_module PUBLIC "-//NCBI//NCBI MedArchive Module//EN" "NCBI_MedArchive.mod.dtd">
+%NCBI_MedArchive_module;
+
+<!ENTITY % NCBI_Medlars_module PUBLIC "-//NCBI//NCBI Medlars Module//EN" "NCBI_Medlars.mod.dtd">
+%NCBI_Medlars_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Mim_module PUBLIC "-//NCBI//NCBI Mim Module//EN" "NCBI_Mim.mod.dtd">
+%NCBI_Mim_module;
+
+<!ENTITY % NCBI_Mime_module PUBLIC "-//NCBI//NCBI Mime Module//EN" "NCBI_Mime.mod.dtd">
+%NCBI_Mime_module;
+
+<!ENTITY % NCBI_ObjPrt_module PUBLIC "-//NCBI//NCBI ObjPrt Module//EN" "NCBI_ObjPrt.mod.dtd">
+%NCBI_ObjPrt_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_PCAssay_module PUBLIC "-//NCBI//NCBI PCAssay Module//EN" "NCBI_PCAssay.mod.dtd">
+%NCBI_PCAssay_module;
+
+<!ENTITY % NCBI_PCSubstance_module PUBLIC "-//NCBI//NCBI PCSubstance Module//EN" "NCBI_PCSubstance.mod.dtd">
+%NCBI_PCSubstance_module;
+
+<!ENTITY % NCBI_Project_module PUBLIC "-//NCBI//NCBI Project Module//EN" "NCBI_Project.mod.dtd">
+%NCBI_Project_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_PubMed_module PUBLIC "-//NCBI//NCBI PubMed Module//EN" "NCBI_PubMed.mod.dtd">
+%NCBI_PubMed_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Remap_module PUBLIC "-//NCBI//NCBI Remap Module//EN" "NCBI_Remap.mod.dtd">
+%NCBI_Remap_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_ScoreMat_module PUBLIC "-//NCBI//NCBI ScoreMat Module//EN" "NCBI_ScoreMat.mod.dtd">
+%NCBI_ScoreMat_module;
+
+<!ENTITY % NCBI_SeqCode_module PUBLIC "-//NCBI//NCBI SeqCode Module//EN" "NCBI_SeqCode.mod.dtd">
+%NCBI_SeqCode_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seq_split_module PUBLIC "-//NCBI//NCBI Seq split Module//EN" "NCBI_Seq_split.mod.dtd">
+%NCBI_Seq_split_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Seqset_module PUBLIC "-//NCBI//NCBI Seqset Module//EN" "NCBI_Seqset.mod.dtd">
+%NCBI_Seqset_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_Submit_module PUBLIC "-//NCBI//NCBI Submit Module//EN" "NCBI_Submit.mod.dtd">
+%NCBI_Submit_module;
+
+<!ENTITY % NCBI_TSeq_module PUBLIC "-//NCBI//NCBI TSeq Module//EN" "NCBI_TSeq.mod.dtd">
+%NCBI_TSeq_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % NSE_module PUBLIC "-//NCBI//NSE Module//EN" "NSE.mod.dtd">
+%NSE_module;
+
+<!ENTITY % OMSSA_module PUBLIC "-//NCBI//OMSSA Module//EN" "OMSSA.mod.dtd">
+%OMSSA_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
+
diff --git a/code/lib/Bio/Entrez/DTDs/NSE.dtd b/code/lib/Bio/Entrez/DTDs/NSE.dtd
new file mode 100644
index 0000000..74f0075
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NSE.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "docsum.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- NSE.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NSE_module PUBLIC "-//NCBI//NSE Module//EN" "NSE.mod.dtd">
+%NSE_module;
diff --git a/code/lib/Bio/Entrez/DTDs/NSE.mod.dtd b/code/lib/Bio/Entrez/DTDs/NSE.mod.dtd
new file mode 100644
index 0000000..4327f48
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/NSE.mod.dtd
@@ -0,0 +1,895 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "docsum.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "NSE"
+================================================= -->
+
+
+<!-- Elements used by other modules:
+          ExchangeSet,
+          Rs,
+          Ss,
+          MapLoc,
+          FxnSet,
+          Assembly,
+          Assay,
+          BaseURL,
+          PrimarySequence,
+          RsStruct,
+          RsLinkout,
+          Component -->
+<!-- ============================================ -->
+
+<!-- Set of dbSNP refSNP docsums -->
+<!ELEMENT ExchangeSet (
+        ExchangeSet_setType?, 
+        ExchangeSet_setDepth?, 
+        ExchangeSet_specVersion?, 
+        ExchangeSet_dbSnpBuild?, 
+        ExchangeSet_generated?, 
+        ExchangeSet_sourceDatabase, 
+        ExchangeSet_rs?, 
+        ExchangeSet_assay?, 
+        ExchangeSet_query?, 
+        ExchangeSet_summary, 
+        ExchangeSet_baseURL)>
+
+<!-- set-type: full dump; from query; single refSNP -->
+<!ELEMENT ExchangeSet_setType (#PCDATA)>
+
+<!-- content depth: brief XML (only refSNP properties and summary subSNP element content); full XML (full refSNP, full subSNP content; all flanking sequences) -->
+<!ELEMENT ExchangeSet_setDepth (#PCDATA)>
+
+<!-- version number of docsum.asn/docsum.dtd specification -->
+<!ELEMENT ExchangeSet_specVersion (#PCDATA)>
+
+<!-- build number of database for this export -->
+<!ELEMENT ExchangeSet_dbSnpBuild (%INTEGER;)>
+
+<!-- Generated date -->
+<!ELEMENT ExchangeSet_generated (#PCDATA)>
+
+<!ELEMENT ExchangeSet_sourceDatabase (
+        ExchangeSet_sourceDatabase_taxId, 
+        ExchangeSet_sourceDatabase_organism, 
+        ExchangeSet_sourceDatabase_dbSnpOrgAbbr?, 
+        ExchangeSet_sourceDatabase_gpipeOrgAbbr?)>
+
+<!-- NCBI taxonomy ID for variation -->
+<!ELEMENT ExchangeSet_sourceDatabase_taxId (%INTEGER;)>
+
+<!-- common name for species used as part of database name. -->
+<!ELEMENT ExchangeSet_sourceDatabase_organism (#PCDATA)>
+
+<!-- organism abbreviation used in dbSNP. -->
+<!ELEMENT ExchangeSet_sourceDatabase_dbSnpOrgAbbr (#PCDATA)>
+
+<!-- organism abbreviation used within NCBI genome pipeline data dumps. -->
+<!ELEMENT ExchangeSet_sourceDatabase_gpipeOrgAbbr (#PCDATA)>
+
+<!ELEMENT ExchangeSet_rs (Rs*)>
+
+<!ELEMENT ExchangeSet_assay (Assay)>
+
+<!ELEMENT ExchangeSet_query (
+        ExchangeSet_query_date?, 
+        ExchangeSet_query_string?)>
+
+<!-- yyyy-mm-dd -->
+<!ELEMENT ExchangeSet_query_date (#PCDATA)>
+
+<!-- Query terms or search constraints -->
+<!ELEMENT ExchangeSet_query_string (#PCDATA)>
+
+<!ELEMENT ExchangeSet_summary (
+        ExchangeSet_summary_numRsIds?, 
+        ExchangeSet_summary_totalSeqLength?, 
+        ExchangeSet_summary_numContigHits?, 
+        ExchangeSet_summary_numGeneHits?, 
+        ExchangeSet_summary_numGiHits?, 
+        ExchangeSet_summary_num3dStructs?, 
+        ExchangeSet_summary_numAlleleFreqs?, 
+        ExchangeSet_summary_numStsHits?, 
+        ExchangeSet_summary_numUnigeneCids?)>
+
+<!-- Total number of refsnp-ids in this exchange set -->
+<!ELEMENT ExchangeSet_summary_numRsIds (%INTEGER;)>
+
+<!-- Total length of exemplar flanking sequences -->
+<!ELEMENT ExchangeSet_summary_totalSeqLength (%INTEGER;)>
+
+<!-- Total number of contig locations from SNPContigLoc -->
+<!ELEMENT ExchangeSet_summary_numContigHits (%INTEGER;)>
+
+<!-- Total number of locus ids from SNPContigLocusId -->
+<!ELEMENT ExchangeSet_summary_numGeneHits (%INTEGER;)>
+
+<!-- Total number of gi hits from MapLink -->
+<!ELEMENT ExchangeSet_summary_numGiHits (%INTEGER;)>
+
+<!-- Total number of 3D structures from SNP3D -->
+<!ELEMENT ExchangeSet_summary_num3dStructs (%INTEGER;)>
+
+<!-- Total number of allele frequences from SubPopAllele -->
+<!ELEMENT ExchangeSet_summary_numAlleleFreqs (%INTEGER;)>
+
+<!-- Total number of STS hits from SnpInSts -->
+<!ELEMENT ExchangeSet_summary_numStsHits (%INTEGER;)>
+
+<!-- Total number of unigene cluster ids from UnigeneSnp -->
+<!ELEMENT ExchangeSet_summary_numUnigeneCids (%INTEGER;)>
+
+<!ELEMENT ExchangeSet_baseURL (BaseURL*)>
+
+<!-- defines the docsum structure for refSNP clusters, where a refSNP cluster (rs) is a grouping of individual dbSNP submissions that all refer to the same variation. The refsnp provides a single unified record for annotation of NCBI resources such as reference genome sequence. -->
+<!ELEMENT Rs (
+        Rs_rsId, 
+        Rs_snpClass, 
+        Rs_snpType, 
+        Rs_molType, 
+        Rs_validProbMin?, 
+        Rs_validProbMax?, 
+        Rs_genotype?, 
+        Rs_het?, 
+        Rs_validation, 
+        Rs_create, 
+        Rs_update?, 
+        Rs_sequence, 
+        Rs_ss, 
+        Rs_assembly?, 
+        Rs_primarySequence?, 
+        Rs_rsStruct?, 
+        Rs_rsLinkout?, 
+        Rs_mergeHistory?)>
+
+<!-- refSNP (rs) number -->
+<!ELEMENT Rs_rsId (%INTEGER;)>
+
+<!ELEMENT Rs_snpClass %ENUM;>
+<!ATTLIST Rs_snpClass value (
+        snp |
+        in-del |
+        heterozygous |
+        microsatellite |
+        named-locus |
+        no-variation |
+        mixed |
+        multinucleotide-polymorphism
+        ) #REQUIRED >
+
+
+<!ELEMENT Rs_snpType %ENUM;>
+<!ATTLIST Rs_snpType value (
+        notwithdrawn |
+        artifact |
+        gene-duplication |
+        duplicate-submission |
+        notspecified |
+        ambiguous-location |
+        low-map-quality
+        ) #REQUIRED >
+
+
+<!ELEMENT Rs_molType %ENUM;>
+<!ATTLIST Rs_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro |
+        unknown
+        ) #REQUIRED >
+
+
+<!-- minimum reported success rate of all submissions in cluster -->
+<!ELEMENT Rs_validProbMin (%INTEGER;)>
+
+<!-- maximum reported success rate of all submissions in cluster -->
+<!ELEMENT Rs_validProbMax (%INTEGER;)>
+
+<!-- at least one genotype reported for this refSNP -->
+<!ELEMENT Rs_genotype EMPTY>
+<!ATTLIST Rs_genotype value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_het (
+        Rs_het_type, 
+        Rs_het_value, 
+        Rs_het_stdError?)>
+
+<!-- Est=Estimated average het from allele frequencies, Obs=Observed from genotype data -->
+<!ELEMENT Rs_het_type %ENUM;>
+<!ATTLIST Rs_het_type value (
+        est |
+        obs
+        ) #REQUIRED >
+
+
+<!-- Heterozygosity -->
+<!ELEMENT Rs_het_value (%REAL;)>
+
+<!-- Standard error of Het estimate -->
+<!ELEMENT Rs_het_stdError (%REAL;)>
+
+<!ELEMENT Rs_validation (
+        Rs_validation_byCluster?, 
+        Rs_validation_byFrequency?, 
+        Rs_validation_byOtherPop?, 
+        Rs_validation_by2Hit2Allele?, 
+        Rs_validation_byHapMap?, 
+        Rs_validation_otherPopBatchId?, 
+        Rs_validation_twoHit2AlleleBatchId?)>
+
+<!-- at least one subsnp in cluster has frequency data submitted -->
+<!ELEMENT Rs_validation_byCluster EMPTY>
+<!ATTLIST Rs_validation_byCluster value ( true | false ) #REQUIRED >
+
+
+<!-- cluster has 2+ submissions, with 1+ submissions assayed with a non-computational method -->
+<!ELEMENT Rs_validation_byFrequency EMPTY>
+<!ATTLIST Rs_validation_byFrequency value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Rs_validation_byOtherPop EMPTY>
+<!ATTLIST Rs_validation_byOtherPop value ( true | false ) #REQUIRED >
+
+
+<!-- cluster has 2+ submissions, with 1+ submissions assayed with a non-computational method -->
+<!ELEMENT Rs_validation_by2Hit2Allele EMPTY>
+<!ATTLIST Rs_validation_by2Hit2Allele value ( true | false ) #REQUIRED >
+
+
+<!-- TBD -->
+<!ELEMENT Rs_validation_byHapMap EMPTY>
+<!ATTLIST Rs_validation_byHapMap value ( true | false ) #REQUIRED >
+
+
+<!-- dbSNP batch-id's for other pop snp validation data. -->
+<!ELEMENT Rs_validation_otherPopBatchId (Rs_validation_otherPopBatchId_E*)>
+
+
+<!ELEMENT Rs_validation_otherPopBatchId_E (%INTEGER;)>
+
+<!-- dbSNP batch-id's for double-hit snp validation data. Use batch-id to get methods, etc. -->
+<!ELEMENT Rs_validation_twoHit2AlleleBatchId (Rs_validation_twoHit2AlleleBatchId_E*)>
+
+
+<!ELEMENT Rs_validation_twoHit2AlleleBatchId_E (%INTEGER;)>
+
+<!-- date the refsnp cluster was instantiated -->
+<!ELEMENT Rs_create (
+        Rs_create_build?, 
+        Rs_create_date?)>
+
+<!-- build number when the cluster was created -->
+<!ELEMENT Rs_create_build (%INTEGER;)>
+
+<!-- yyyy-mm-dd -->
+<!ELEMENT Rs_create_date (#PCDATA)>
+
+<!-- most recent date the cluster was updated (member added or deleted) -->
+<!ELEMENT Rs_update (
+        Rs_update_build?, 
+        Rs_update_date?)>
+
+<!-- build number when the cluster was updated -->
+<!ELEMENT Rs_update_build (%INTEGER;)>
+
+<!-- yyyy-mm-dd -->
+<!ELEMENT Rs_update_date (#PCDATA)>
+
+<!ELEMENT Rs_sequence (
+        Rs_sequence_exemplarSs, 
+        Rs_sequence_seq5?, 
+        Rs_sequence_observed, 
+        Rs_sequence_seq3?)>
+
+<!-- dbSNP ss# selected as source of refSNP flanking sequence, ss# part of ss-list below -->
+<!ELEMENT Rs_sequence_exemplarSs (%INTEGER;)>
+
+<!-- 5' sequence that flanks the variation -->
+<!ELEMENT Rs_sequence_seq5 (#PCDATA)>
+
+<!-- list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation -->
+<!ELEMENT Rs_sequence_observed (#PCDATA)>
+
+<!-- 3' sequence that flanks the variation -->
+<!ELEMENT Rs_sequence_seq3 (#PCDATA)>
+
+<!ELEMENT Rs_ss (Ss*)>
+
+<!ELEMENT Rs_assembly (Assembly*)>
+
+<!ELEMENT Rs_primarySequence (PrimarySequence*)>
+
+<!ELEMENT Rs_rsStruct (RsStruct*)>
+
+<!ELEMENT Rs_rsLinkout (RsLinkout*)>
+
+<!ELEMENT Rs_mergeHistory (Rs_mergeHistory_E*)>
+
+
+<!ELEMENT Rs_mergeHistory_E (
+        Rs_mergeHistory_E_rsId, 
+        Rs_mergeHistory_E_buildId?, 
+        Rs_mergeHistory_E_orientFlip?)>
+
+<!-- previously issued rs id whose member assays have now been merged -->
+<!ELEMENT Rs_mergeHistory_E_rsId (%INTEGER;)>
+
+<!-- build id when rs id was merged into parent rs -->
+<!ELEMENT Rs_mergeHistory_E_buildId (%INTEGER;)>
+
+<!-- TRUE if strand of rs id is reverse to parent object's current strand -->
+<!ELEMENT Rs_mergeHistory_E_orientFlip EMPTY>
+<!ATTLIST Rs_mergeHistory_E_orientFlip value ( true | false ) #REQUIRED >
+
+
+<!-- data for an individual submission to dbSNP -->
+<!ELEMENT Ss (
+        Ss_ssId, 
+        Ss_handle, 
+        Ss_batchId, 
+        Ss_locSnpId?, 
+        Ss_subSnpClass?, 
+        Ss_orient?, 
+        Ss_strand?, 
+        Ss_molType?, 
+        Ss_buildId?, 
+        Ss_methodClass?, 
+        Ss_validated?, 
+        Ss_linkoutUrl?, 
+        Ss_sequence)>
+
+<!-- dbSNP accession number for submission -->
+<!ELEMENT Ss_ssId (%INTEGER;)>
+
+<!-- Tag for the submitting laboratory -->
+<!ELEMENT Ss_handle (#PCDATA)>
+
+<!-- dbSNP number for batch submission -->
+<!ELEMENT Ss_batchId (%INTEGER;)>
+
+<!-- submission (ss#) submitter ID -->
+<!ELEMENT Ss_locSnpId (#PCDATA)>
+
+<!-- SubSNP classification by type of variation -->
+<!ELEMENT Ss_subSnpClass %ENUM;>
+<!ATTLIST Ss_subSnpClass value (
+        snp |
+        in-del |
+        heterozygous |
+        microsatellite |
+        named-locus |
+        no-variation |
+        mixed |
+        multinucleotide-polymorphism
+        ) #REQUIRED >
+
+
+<!-- orientation of refsnp cluster members to refsnp cluster sequence -->
+<!ELEMENT Ss_orient %ENUM;>
+
+<!--
+    forward	-  ss flanking sequence is in same orientation as seq-ss-exemplar
+    reverse	-  lanking sequence and alleles are reverse complement of refSNP as defined by ss exemplar
+-->
+<!ATTLIST Ss_orient value (
+        forward |
+        reverse
+        ) #REQUIRED >
+
+
+<!-- strand is defined as TOP/BOTTOM by nature of flanking nucleotide sequence -->
+<!ELEMENT Ss_strand %ENUM;>
+<!ATTLIST Ss_strand value (
+        top |
+        bottom
+        ) #REQUIRED >
+
+
+<!-- moltype from Batch table -->
+<!ELEMENT Ss_molType %ENUM;>
+<!ATTLIST Ss_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro |
+        unknown
+        ) #REQUIRED >
+
+
+<!-- dbSNP build number when ss# was added to a refSNP (rs#) cluster -->
+<!ELEMENT Ss_buildId (%INTEGER;)>
+
+<!-- class of method used to assay for the variation -->
+<!ELEMENT Ss_methodClass %ENUM;>
+
+<!--
+    dHPLC	-  Denaturing High Pressure Liquid Chromatography used to detect SNP
+    hybridize	-  a hybridization method (e.g. chip) was used to assay for variation
+    computed	-  variation was mined from sequence alignment with software
+    sequence	-  samples were sequenced and resulting alignment used to define variation
+-->
+<!ATTLIST Ss_methodClass value (
+        dHPLC |
+        hybridize |
+        computed |
+        sSCP |
+        other |
+        unknown |
+        rFLP |
+        sequence
+        ) #REQUIRED >
+
+
+<!ELEMENT Ss_validated %ENUM;>
+
+<!--
+    by-submitter	-  subsnp has been experimentally validated by submitter
+    by-frequency	-  subsnp has frequency data submitted
+    by-cluster	-  has 2+ submissions, with 1+ submission assayed with a non-computational method
+-->
+<!ATTLIST Ss_validated value (
+        by-submitter |
+        by-frequency |
+        by-cluster
+        ) #REQUIRED >
+
+
+<!-- append loc-snp-id to this base URL to construct a pointer to submitter data. -->
+<!ELEMENT Ss_linkoutUrl (#PCDATA)>
+
+<!ELEMENT Ss_sequence (
+        Ss_sequence_seq5?, 
+        Ss_sequence_observed, 
+        Ss_sequence_seq3?)>
+
+<!-- 5' sequence that flanks the variation -->
+<!ELEMENT Ss_sequence_seq5 (#PCDATA)>
+
+<!-- list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation -->
+<!ELEMENT Ss_sequence_observed (#PCDATA)>
+
+<!-- 3' sequence that flanks the variation -->
+<!ELEMENT Ss_sequence_seq3 (#PCDATA)>
+
+<!-- Position of a single hit of a variation on a contig -->
+<!ELEMENT MapLoc (
+        MapLoc_asnFrom, 
+        MapLoc_asnTo, 
+        MapLoc_locType, 
+        MapLoc_alnQuality?, 
+        MapLoc_orient?, 
+        MapLoc_physMapInt?, 
+        MapLoc_leftFlankNeighborPos?, 
+        MapLoc_rightFlankNeighborPos?, 
+        MapLoc_leftContigNeighborPos?, 
+        MapLoc_rightContigNeighborPos?, 
+        MapLoc_numberOfMismatches?, 
+        MapLoc_numberOfDeletions?, 
+        MapLoc_numberOfInsertions?, 
+        MapLoc_fxnSet?)>
+
+<!-- beginning of variation as feature on contig -->
+<!ELEMENT MapLoc_asnFrom (%INTEGER;)>
+
+<!-- end position of variation as feature on contig -->
+<!ELEMENT MapLoc_asnTo (%INTEGER;)>
+
+<!-- defines the seq-loc symbol if asn_from != asn_to -->
+<!ELEMENT MapLoc_locType %ENUM;>
+
+<!--
+    insertion	-  insertion on contig
+    exact	-  asn-from = asn-to write as 'asn-from'
+    deletion	-  deletion on contig
+-->
+<!ATTLIST MapLoc_locType value (
+        insertion |
+        exact |
+        deletion |
+        range-ins |
+        range-exact |
+        range-del
+        ) #REQUIRED >
+
+
+<!-- alignment qualiity -->
+<!ELEMENT MapLoc_alnQuality (%REAL;)>
+
+<!-- orientation of refSNP sequence to contig sequence -->
+<!ELEMENT MapLoc_orient %ENUM;>
+<!ATTLIST MapLoc_orient value (
+        forward |
+        reverse
+        ) #REQUIRED >
+
+
+<!-- chromosome position as integer for sorting -->
+<!ELEMENT MapLoc_physMapInt (%INTEGER;)>
+
+<!-- nearest aligned position in 5' flanking sequence of snp -->
+<!ELEMENT MapLoc_leftFlankNeighborPos (%INTEGER;)>
+
+<!-- nearest aligned position in 3' flanking sequence of snp -->
+<!ELEMENT MapLoc_rightFlankNeighborPos (%INTEGER;)>
+
+<!-- nearest aligned position in 5' contig alignment of snp -->
+<!ELEMENT MapLoc_leftContigNeighborPos (%INTEGER;)>
+
+<!-- nearest aligned position in 3' contig alignment of snp -->
+<!ELEMENT MapLoc_rightContigNeighborPos (%INTEGER;)>
+
+<!-- number of Mismatched positions in this alignment -->
+<!ELEMENT MapLoc_numberOfMismatches (%INTEGER;)>
+
+<!-- number of deletions in this alignment -->
+<!ELEMENT MapLoc_numberOfDeletions (%INTEGER;)>
+
+<!-- number of insetions in this alignment -->
+<!ELEMENT MapLoc_numberOfInsertions (%INTEGER;)>
+
+<!ELEMENT MapLoc_fxnSet (FxnSet*)>
+
+<!-- functional relationship of SNP (and possibly alleles) to genes at contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx tables. -->
+<!ELEMENT FxnSet (
+        FxnSet_geneId?, 
+        FxnSet_symbol?, 
+        FxnSet_mrnaAcc?, 
+        FxnSet_mrnaVer?, 
+        FxnSet_protAcc?, 
+        FxnSet_protVer?, 
+        FxnSet_fxnClass?, 
+        FxnSet_readingFrame?, 
+        FxnSet_allele?, 
+        FxnSet_residue?, 
+        FxnSet_aaPosition?)>
+
+<!-- gene-id of gene as aligned to contig -->
+<!ELEMENT FxnSet_geneId (%INTEGER;)>
+
+<!-- symbol (official if present in Entrez Gene) of gene -->
+<!ELEMENT FxnSet_symbol (#PCDATA)>
+
+<!-- mRNA accession if variation in transcript -->
+<!ELEMENT FxnSet_mrnaAcc (#PCDATA)>
+
+<!-- mRNA sequence version if variation is in transcripot -->
+<!ELEMENT FxnSet_mrnaVer (%INTEGER;)>
+
+<!-- protein accession if variation in protein -->
+<!ELEMENT FxnSet_protAcc (#PCDATA)>
+
+<!-- protein version if variation is in protein -->
+<!ELEMENT FxnSet_protVer (%INTEGER;)>
+
+<!ELEMENT FxnSet_fxnClass %ENUM;>
+
+<!--
+    locus-region	-  variation in region of gene, but not in transcript
+-->
+<!ATTLIST FxnSet_fxnClass value (
+        locus-region |
+        coding-unknown |
+        coding-synonymous |
+        coding-nonsynonymous |
+        mrna-utr |
+        intron |
+        splice-site |
+        reference |
+        coding-exception
+        ) #REQUIRED >
+
+
+<!ELEMENT FxnSet_readingFrame (%INTEGER;)>
+
+<!-- variation allele: * suffix indicates allele of contig at this location -->
+<!ELEMENT FxnSet_allele (#PCDATA)>
+
+<!-- translated amino acid residue for allele -->
+<!ELEMENT FxnSet_residue (#PCDATA)>
+
+<!-- position of the variant residue in peptide sequence -->
+<!ELEMENT FxnSet_aaPosition (%INTEGER;)>
+
+<!-- A collection of genome sequence records (curated gene regions (NG's), contigs (NWNT's)  and chromosomes (NC/AC's) produced by a genome sequence project. Structure is populated from ContigInfo tables. -->
+<!ELEMENT Assembly (
+        Assembly_dbSnpBuild, 
+        Assembly_genomeBuild, 
+        Assembly_groupLabel?, 
+        Assembly_assemblySource?, 
+        Assembly_current?, 
+        Assembly_reference?, 
+        Assembly_component?, 
+        Assembly_snpStat)>
+
+<!-- dbSNP build number defining the rsid set aligned to this assembly -->
+<!ELEMENT Assembly_dbSnpBuild (%INTEGER;)>
+
+<!-- assembly build number with possible 'subbuild' version numbers to reflect updates in gene annotation (human e.g. 34_3, 35_1, 36_1) -->
+<!ELEMENT Assembly_genomeBuild (#PCDATA)>
+
+<!-- High-level classification of the assembly to distinguish reference projects from alternate solutions. GroupLabel field from organism/build-specific ContigInfo tables. "reference" is occasionally used as the preferred assembly; standards will converge as additional organism genome projects are finished. Note that some organism assembly names include extended characters like '~' and '/' that may be incompatible with OS filename conventions. -->
+<!ELEMENT Assembly_groupLabel (#PCDATA)>
+
+<!-- Name of the group(s) or organization(s) that generated the assembly -->
+<!ELEMENT Assembly_assemblySource (#PCDATA)>
+
+<!-- Marks the current genomic assembly -->
+<!ELEMENT Assembly_current EMPTY>
+<!ATTLIST Assembly_current value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Assembly_reference EMPTY>
+<!ATTLIST Assembly_reference value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT Assembly_component (Component*)>
+
+<!ELEMENT Assembly_snpStat (
+        Assembly_snpStat_mapWeight, 
+        Assembly_snpStat_chromCount?, 
+        Assembly_snpStat_placedContigCount?, 
+        Assembly_snpStat_unplacedContigCount?, 
+        Assembly_snpStat_seqlocCount?, 
+        Assembly_snpStat_hapCount?)>
+
+<!-- summary measure of placement precision in the assembly -->
+<!ELEMENT Assembly_snpStat_mapWeight %ENUM;>
+<!ATTLIST Assembly_snpStat_mapWeight value (
+        unmapped |
+        unique-in-contig |
+        two-hits-in-contig |
+        less-10-hits |
+        multiple-hits
+        ) #REQUIRED >
+
+
+<!-- number of distinct chromosomes in the mapset -->
+<!ELEMENT Assembly_snpStat_chromCount (%INTEGER;)>
+
+<!-- number of distinct contigs [ gi | accession[.version] ] in the mapset -->
+<!ELEMENT Assembly_snpStat_placedContigCount (%INTEGER;)>
+
+<!-- number of sequence postions to a contig with unknown chromosomal assignment -->
+<!ELEMENT Assembly_snpStat_unplacedContigCount (%INTEGER;)>
+
+<!-- total number of sequence positions in the mapset -->
+<!ELEMENT Assembly_snpStat_seqlocCount (%INTEGER;)>
+
+<!-- Number of hits to alternative genomic haplotypes (e.g. HLA DR region, KIR, or pseudo-autosomal regions like PAR) within the assembly mapset. Note that positions on haplotypes defined in other assemblies (a different assembly_group_label value) will not be counted in this value. -->
+<!ELEMENT Assembly_snpStat_hapCount (%INTEGER;)>
+
+
+<!ELEMENT Assay (
+        Assay_handle?, 
+        Assay_batch?, 
+        Assay_batchId?, 
+        Assay_batchType?, 
+        Assay_molType?, 
+        Assay_sampleSize?, 
+        Assay_population?, 
+        Assay_linkoutUrl?, 
+        Assay_method?, 
+        Assay_taxonomy, 
+        Assay_strains?, 
+        Assay_comment?, 
+        Assay_citation?)>
+
+<!ELEMENT Assay_handle (#PCDATA)>
+
+<!ELEMENT Assay_batch (#PCDATA)>
+
+<!ELEMENT Assay_batchId (%INTEGER;)>
+
+<!ELEMENT Assay_batchType %ENUM;>
+<!ATTLIST Assay_batchType value (
+        snpassay |
+        validation |
+        doublehit
+        ) #REQUIRED >
+
+
+<!ELEMENT Assay_molType %ENUM;>
+<!ATTLIST Assay_molType value (
+        genomic |
+        cDNA |
+        mito |
+        chloro
+        ) #REQUIRED >
+
+
+<!ELEMENT Assay_sampleSize (%INTEGER;)>
+
+<!ELEMENT Assay_population (#PCDATA)>
+
+<!ELEMENT Assay_linkoutUrl (#PCDATA)>
+
+<!ELEMENT Assay_method (
+        Assay_method_name?, 
+        Assay_method_id?, 
+        Assay_method_exception)>
+
+<!-- Submitters method identifier -->
+<!ELEMENT Assay_method_name (#PCDATA)>
+
+<!-- dbSNP method identifier -->
+<!ELEMENT Assay_method_id (#PCDATA)>
+
+<!-- description of deviation from/addition to given method -->
+<!ELEMENT Assay_method_exception (#PCDATA)>
+
+<!ELEMENT Assay_taxonomy (
+        Assay_taxonomy_id, 
+        Assay_taxonomy_organism?)>
+
+<!-- NCBI taxonomy ID for variation -->
+<!ELEMENT Assay_taxonomy_id (%INTEGER;)>
+
+<!ELEMENT Assay_taxonomy_organism (#PCDATA)>
+
+<!ELEMENT Assay_strains (Assay_strains_E*)>
+
+
+<!ELEMENT Assay_strains_E (#PCDATA)>
+
+<!ELEMENT Assay_comment (#PCDATA)>
+
+<!ELEMENT Assay_citation (Assay_citation_E*)>
+
+
+<!ELEMENT Assay_citation_E (#PCDATA)>
+
+<!-- URL value from dbSNP_main.BaseURL links table. attributes provide context information and URL id that is referenced within individual refSNP objects. -->
+<!ELEMENT BaseURL (
+        BaseURL_urlId?, 
+        BaseURL_resourceName?, 
+        BaseURL_resourceId?, 
+        BaseURL_baseURL)>
+
+<!-- Resource identifier from dbSNP_main.baseURL. -->
+<!ELEMENT BaseURL_urlId (%INTEGER;)>
+
+<!-- Name of linked resource -->
+<!ELEMENT BaseURL_resourceName (#PCDATA)>
+
+<!-- identifier expected by resource for URL -->
+<!ELEMENT BaseURL_resourceId (#PCDATA)>
+
+<!ELEMENT BaseURL_baseURL (#PCDATA)>
+
+
+<!ELEMENT PrimarySequence (
+        PrimarySequence_dbSnpBuild, 
+        PrimarySequence_gi, 
+        PrimarySequence_source?, 
+        PrimarySequence_accession?, 
+        PrimarySequence_mapLoc)>
+
+<!ELEMENT PrimarySequence_dbSnpBuild (%INTEGER;)>
+
+<!ELEMENT PrimarySequence_gi (%INTEGER;)>
+
+<!ELEMENT PrimarySequence_source %ENUM;>
+<!ATTLIST PrimarySequence_source value (
+        submitter |
+        blastmb |
+        xm
+        ) #REQUIRED >
+
+
+<!ELEMENT PrimarySequence_accession (#PCDATA)>
+
+<!ELEMENT PrimarySequence_mapLoc (MapLoc*)>
+
+<!-- structure information for SNP -->
+<!ELEMENT RsStruct (
+        RsStruct_protAcc?, 
+        RsStruct_protGi?, 
+        RsStruct_protLoc?, 
+        RsStruct_protResidue?, 
+        RsStruct_rsResidue?, 
+        RsStruct_structGi?, 
+        RsStruct_structLoc?, 
+        RsStruct_structResidue?)>
+
+<!-- accession of the protein with variation -->
+<!ELEMENT RsStruct_protAcc (#PCDATA)>
+
+<!-- GI of the protein with variation -->
+<!ELEMENT RsStruct_protGi (%INTEGER;)>
+
+<!-- position of the residue for the protein GI -->
+<!ELEMENT RsStruct_protLoc (%INTEGER;)>
+
+<!-- residue specified for protein at prot-loc location -->
+<!ELEMENT RsStruct_protResidue (#PCDATA)>
+
+<!-- alternative residue specified by variation sequence -->
+<!ELEMENT RsStruct_rsResidue (#PCDATA)>
+
+<!-- GI of the structure neighbor -->
+<!ELEMENT RsStruct_structGi (%INTEGER;)>
+
+<!-- position of the residue for the structure GI -->
+<!ELEMENT RsStruct_structLoc (%INTEGER;)>
+
+<!-- residue specified for protein at struct-loc location -->
+<!ELEMENT RsStruct_structResidue (#PCDATA)>
+
+<!-- link data for another resource -->
+<!ELEMENT RsLinkout (
+        RsLinkout_resourceId, 
+        RsLinkout_linkValue)>
+
+<!-- BaseURLList.url_id -->
+<!ELEMENT RsLinkout_resourceId (#PCDATA)>
+
+<!-- value to append to ResourceURL.base-url for complete link -->
+<!ELEMENT RsLinkout_linkValue (#PCDATA)>
+
+
+<!ELEMENT Component (
+        Component_componentType?, 
+        Component_ctgId?, 
+        Component_accession?, 
+        Component_name?, 
+        Component_chromosome?, 
+        Component_start?, 
+        Component_end?, 
+        Component_orientation?, 
+        Component_gi?, 
+        Component_groupTerm?, 
+        Component_contigLabel?, 
+        Component_mapLoc)>
+
+<!-- type of component: chromosome, contig, gene_region, etc. -->
+<!ELEMENT Component_componentType %ENUM;>
+<!ATTLIST Component_componentType value (
+        contig |
+        mrna
+        ) #REQUIRED >
+
+
+<!-- dbSNP contig_id used to join on contig hit / mapset data to these assembly properties -->
+<!ELEMENT Component_ctgId (%INTEGER;)>
+
+<!-- Accession[.version] for the sequence component -->
+<!ELEMENT Component_accession (#PCDATA)>
+
+<!-- contig name defined as either a submitter local id, element of a whole genome assembly set, or internal NCBI local id -->
+<!ELEMENT Component_name (#PCDATA)>
+
+<!-- Organism appropriate chromosome tag, 'Un' reserved for default case of unplaced components -->
+<!ELEMENT Component_chromosome (#PCDATA)>
+
+<!-- component starting position on the chromosome (base 0 inclusive) -->
+<!ELEMENT Component_start (%INTEGER;)>
+
+<!-- component ending position on the chromosome (base 0 inclusive) -->
+<!ELEMENT Component_end (%INTEGER;)>
+
+<!-- orientation of this component to chromosome, forward (fwd) = 0, reverse (rev) = 1, unknown = NULL in ContigInfo.orient. -->
+<!ELEMENT Component_orientation %ENUM;>
+<!ATTLIST Component_orientation value (
+        fwd |
+        rev |
+        unknown
+        ) #REQUIRED >
+
+
+<!-- NCBI gi for component sequence (equivalent to accession.version) for nucleotide sequence. -->
+<!ELEMENT Component_gi (#PCDATA)>
+
+<!-- Identifier label for the genome assembly that defines the contigs in this mapset and their placement within the organism genome. -->
+<!ELEMENT Component_groupTerm (#PCDATA)>
+
+<!-- Display label for component -->
+<!ELEMENT Component_contigLabel (#PCDATA)>
+
+<!ELEMENT Component_mapLoc (MapLoc*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/OMSSA.dtd b/code/lib/Bio/Entrez/DTDs/OMSSA.dtd
new file mode 100644
index 0000000..4ab5adc
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/OMSSA.dtd
@@ -0,0 +1,89 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "omssa.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- OMSSA.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % OMSSA_module PUBLIC "-//NCBI//OMSSA Module//EN" "OMSSA.mod.dtd">
+%OMSSA_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/OMSSA.mod.dtd b/code/lib/Bio/Entrez/DTDs/OMSSA.mod.dtd
new file mode 100644
index 0000000..26f050e
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/OMSSA.mod.dtd
@@ -0,0 +1,1361 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "omssa.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 08/02/2010 23:05:14
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "OMSSA"
+================================================= -->
+
+<!--
+ $Id: omssa.asn 192083 2010-05-19 22:28:08Z lewisg $
+**********************************************************************
+
+  OMSSA (Open Mass Spectrometry Search Algorithm) data definitions
+  Lewis Geer, 2003
+
+  make using something like
+  "datatool -m omssa.asn -oc ObjOmssa -oA -od omssa.def"
+
+  note that this file requires omssa.def
+
+**********************************************************************
+-->
+
+<!-- Elements referenced from other modules:
+          Bioseq FROM NCBI-Sequence -->
+<!-- ============================================ -->
+
+<!-- Generic holder for experimental info -->
+<!ELEMENT NameValue (
+        NameValue_name, 
+        NameValue_value)>
+
+<!ELEMENT NameValue_name (#PCDATA)>
+
+<!ELEMENT NameValue_value (#PCDATA)>
+
+<!-- Holds a single spectrum -->
+<!ELEMENT MSSpectrum (
+        MSSpectrum_number, 
+        MSSpectrum_charge, 
+        MSSpectrum_precursormz, 
+        MSSpectrum_mz, 
+        MSSpectrum_abundance, 
+        MSSpectrum_iscale, 
+        MSSpectrum_ids?, 
+        MSSpectrum_namevalue?)>
+
+<!-- unique number of spectrum -->
+<!ELEMENT MSSpectrum_number (%INTEGER;)>
+
+<!-- may be more than one if unknown -->
+<!ELEMENT MSSpectrum_charge (MSSpectrum_charge_E*)>
+
+
+<!ELEMENT MSSpectrum_charge_E (%INTEGER;)>
+
+<!-- scaled precursor m/z, scale is in MSSearchSettings -->
+<!ELEMENT MSSpectrum_precursormz (%INTEGER;)>
+
+<!-- scaled product m/z -->
+<!ELEMENT MSSpectrum_mz (MSSpectrum_mz_E*)>
+
+
+<!ELEMENT MSSpectrum_mz_E (%INTEGER;)>
+
+<!-- scaled product abundance -->
+<!ELEMENT MSSpectrum_abundance (MSSpectrum_abundance_E*)>
+
+
+<!ELEMENT MSSpectrum_abundance_E (%INTEGER;)>
+
+<!-- abundance scale, float to integer -->
+<!ELEMENT MSSpectrum_iscale (%REAL;)>
+
+<!-- ids/filenames -->
+<!ELEMENT MSSpectrum_ids (MSSpectrum_ids_E*)>
+
+
+<!ELEMENT MSSpectrum_ids_E (#PCDATA)>
+
+<!-- extra info: retention times, etc. -->
+<!ELEMENT MSSpectrum_namevalue (NameValue*)>
+
+<!-- Holds a set of spectra -->
+<!ELEMENT MSSpectrumset (MSSpectrum*)>
+
+<!-- enumerate enzymes -->
+<!ELEMENT MSEnzymes (%INTEGER;)>
+<!ATTLIST MSEnzymes value (
+        trypsin |
+        argc |
+        cnbr |
+        chymotrypsin |
+        formicacid |
+        lysc |
+        lysc-p |
+        pepsin-a |
+        tryp-cnbr |
+        tryp-chymo |
+        trypsin-p |
+        whole-protein |
+        aspn |
+        gluc |
+        aspngluc |
+        top-down |
+        semi-tryptic |
+        no-enzyme |
+        chymotrypsin-p |
+        aspn-de |
+        gluc-de |
+        lysn |
+        thermolysin-p |
+        semi-chymotrypsin |
+        semi-gluc |
+        max |
+        none
+        ) #IMPLIED >
+
+
+<!-- enumerate modifications -->
+<!ELEMENT MSMod (%INTEGER;)>
+
+<!--
+    methylk	-  methylation of K
+    oxym	-  oxidation of methionine
+    carboxymethylc	-  carboxymethyl cysteine
+    carbamidomethylc	-  carbamidomethyl cysteine
+    deamidationkq	-  deamidation of K and Q
+    propionamidec	-  propionamide cysteine
+    phosphorylations	-  phosphorylation of S
+    phosphorylationt	-  phosphorylation of T
+    phosphorylationy	-  phosphorylation of Y    
+    ntermmcleave	-  N terminal methionine cleavage
+    ntermacetyl	-  N terminal protein acetyl
+    ntermmethyl	-  N terminal protein methyl
+    ntermtrimethyl	-  N terminal protein trimethyl
+    methythiold	-  beta methythiolation of D
+    methylq	-  methylation of Q
+    trimethylk	-  trimethylation of K
+    methyld	-  methylation of D
+    methyle	-  methylation of E
+    ctermpepmethyl	-  C terminal methylation
+    trideuteromethyld	-  trideuteromethylation of D
+    trideuteromethyle	-  trideuteromethylation of E
+    ctermpeptrideuteromethyl	-  C terminal trideuteromethylation
+    usermod1	-  start of user defined mods
+    usermod10	-  end of user defined mods
+    max	-  maximum number of mods
+    unknown	-  modification of unknown type
+-->
+<!ATTLIST MSMod value (
+        methylk |
+        oxym |
+        carboxymethylc |
+        carbamidomethylc |
+        deamidationkq |
+        propionamidec |
+        phosphorylations |
+        phosphorylationt |
+        phosphorylationy |
+        ntermmcleave |
+        ntermacetyl |
+        ntermmethyl |
+        ntermtrimethyl |
+        methythiold |
+        methylq |
+        trimethylk |
+        methyld |
+        methyle |
+        ctermpepmethyl |
+        trideuteromethyld |
+        trideuteromethyle |
+        ctermpeptrideuteromethyl |
+        nformylmet |
+        twoamino3oxobutanoicacid |
+        acetylk |
+        ctermamide |
+        bmethylthiold |
+        carbamidomethylk |
+        carbamidometylh |
+        carbamidomethyld |
+        carbamidomethyle |
+        carbamylk |
+        ntermcarbamyl |
+        citrullinationr |
+        cysteicacidc |
+        diiodinationy |
+        dimethylk |
+        dimethylr |
+        ntermpepdimethyl |
+        dihydroxyf |
+        thioacetylk |
+        ntermpeptioacetyl |
+        farnesylationc |
+        formylk |
+        ntermpepformyl |
+        formylkynureninw |
+        phef |
+        gammacarboxyld |
+        gammacarboxyle |
+        geranylgeranylc |
+        ntermpepglucuronylg |
+        glutathionec |
+        glyglyk |
+        guanidinationk |
+        his2asnh |
+        his2asph |
+        ctermpephsem |
+        ctermpephselactm |
+        hydroxykynureninw |
+        hydroxylationd |
+        hydroxylationk |
+        hydroxylationn |
+        hydroxylationp |
+        hydroxylationf |
+        hydroxylationy |
+        iodinationy |
+        kynureninw |
+        lipoylk |
+        ctermpepmeester |
+        meesterd |
+        meestere |
+        meesters |
+        meestery |
+        methylc |
+        methylh |
+        methyln |
+        ntermpepmethyl |
+        methylr |
+        ntermpepmyristoyeylationg |
+        ntermpepmyristoyl4hg |
+        ntermpepmyristoylationg |
+        myristoylationk |
+        ntermformyl |
+        nemc |
+        nipcam |
+        nitrow |
+        nitroy |
+        ctermpepo18 |
+        ctermpepdio18 |
+        oxyh |
+        oxyw |
+        ppantetheines |
+        palmitoylationc |
+        palmitoylationk |
+        palmitoylations |
+        palmitoylationt |
+        phospholosss |
+        phospholosst |
+        phospholossy |
+        phosphoneutrallossc |
+        phosphoneutrallossd |
+        phosphoneutrallossh |
+        propionylk |
+        ntermpeppropionyl |
+        propionylheavyk |
+        ntermpeppropionylheavy |
+        pyridylk |
+        ntermpeppyridyl |
+        ntermpeppyrocmc |
+        ntermpeppyroe |
+        ntermpeppyroq |
+        pyroglutamicp |
+        spyridylethylc |
+        semetm |
+        sulfationy |
+        suphonem |
+        triiodinationy |
+        trimethylationr |
+        ntermpeptripalmitatec |
+        usermod1 |
+        usermod2 |
+        usermod3 |
+        usermod4 |
+        usermod5 |
+        usermod6 |
+        usermod7 |
+        usermod8 |
+        usermod9 |
+        usermod10 |
+        icatlight |
+        icatheavy |
+        camthiopropanoylk |
+        phosphoneutrallosss |
+        phosphoneutrallosst |
+        phosphoetdlosss |
+        phosphoetdlosst |
+        arg-13c6 |
+        arg-13c6-15n4 |
+        lys-13c6 |
+        oxy18 |
+        beta-elim-s |
+        beta-elim-t |
+        usermod11 |
+        usermod12 |
+        usermod13 |
+        usermod14 |
+        usermod15 |
+        usermod16 |
+        usermod17 |
+        usermod18 |
+        usermod19 |
+        usermod20 |
+        usermod21 |
+        usermod22 |
+        usermod23 |
+        usermod24 |
+        usermod25 |
+        usermod26 |
+        usermod27 |
+        usermod28 |
+        usermod29 |
+        usermod30 |
+        sulfinicacid |
+        arg2orn |
+        dehydro |
+        carboxykynurenin |
+        sumoylation |
+        iTRAQ114nterm |
+        iTRAQ114K |
+        iTRAQ114Y |
+        iTRAQ115nterm |
+        iTRAQ115K |
+        iTRAQ115Y |
+        iTRAQ116nterm |
+        iTRAQ116K |
+        iTRAQ116Y |
+        iTRAQ117nterm |
+        iTRAQ117K |
+        iTRAQ117Y |
+        mmts |
+        lys-2H4 |
+        lys-13C615N2 |
+        hexNAcN |
+        dHexHexNAcN |
+        hexNAcS |
+        hexNAcT |
+        mod186 |
+        mod187 |
+        mod188 |
+        mod189 |
+        mod190 |
+        mod191 |
+        mod192 |
+        mod193 |
+        mod194 |
+        mod195 |
+        mod196 |
+        mod197 |
+        mod198 |
+        mod199 |
+        mod200 |
+        mod201 |
+        mod202 |
+        mod203 |
+        mod204 |
+        mod205 |
+        mod206 |
+        mod207 |
+        mod208 |
+        mod209 |
+        mod210 |
+        mod211 |
+        mod212 |
+        mod213 |
+        mod214 |
+        mod215 |
+        mod216 |
+        mod217 |
+        mod218 |
+        mod219 |
+        mod220 |
+        mod221 |
+        mod222 |
+        mod223 |
+        mod224 |
+        mod225 |
+        mod226 |
+        mod227 |
+        mod228 |
+        mod229 |
+        mod230 |
+        max |
+        unknown |
+        none
+        ) #IMPLIED >
+
+
+<!-- enumerate modification types -->
+<!ELEMENT MSModType (%INTEGER;)>
+
+<!--
+    modaa	-  at particular amino acids
+    modn	-  at the N terminus of a protein
+    modnaa	-  at the N terminus of a protein at particular amino acids
+    modc	-  at the C terminus of a protein
+    modcaa	-  at the C terminus of a protein at particular amino acids
+    modnp	-  at the N terminus of a peptide
+    modnpaa	-  at the N terminus of a peptide at particular amino acids
+    modcp	-  at the C terminus of a peptide
+    modcpaa	-  at the C terminus of a peptide at particular amino acids
+    modmax	-  the max number of modification types
+-->
+<!ATTLIST MSModType value (
+        modaa |
+        modn |
+        modnaa |
+        modc |
+        modcaa |
+        modnp |
+        modnpaa |
+        modcp |
+        modcpaa |
+        modmax
+        ) #IMPLIED >
+
+
+<!-- mass container -->
+<!ELEMENT MSMassSet (
+        MSMassSet_monomass, 
+        MSMassSet_averagemass, 
+        MSMassSet_n15mass)>
+
+<!ELEMENT MSMassSet_monomass (%REAL;)>
+
+<!ELEMENT MSMassSet_averagemass (%REAL;)>
+
+<!ELEMENT MSMassSet_n15mass (%REAL;)>
+
+<!-- Modification Definition -->
+<!ELEMENT MSModSpec (
+        MSModSpec_mod, 
+        MSModSpec_type, 
+        MSModSpec_name, 
+        MSModSpec_monomass, 
+        MSModSpec_averagemass, 
+        MSModSpec_n15mass, 
+        MSModSpec_residues?, 
+        MSModSpec_neutralloss?, 
+        MSModSpec_unimod?, 
+        MSModSpec_psi-ms?)>
+
+<!-- what is the mod -->
+<!ELEMENT MSModSpec_mod (MSMod)>
+
+<!-- modification type -->
+<!ELEMENT MSModSpec_type (MSModType)>
+
+<!-- friendly name of mod -->
+<!ELEMENT MSModSpec_name (#PCDATA)>
+
+<!-- monoisotopic mass -->
+<!ELEMENT MSModSpec_monomass (%REAL;)>
+
+<!-- average mass -->
+<!ELEMENT MSModSpec_averagemass (%REAL;)>
+
+<!-- monoisotopic n15 mass -->
+<!ELEMENT MSModSpec_n15mass (%REAL;)>
+
+<!-- residues to apply mod to -->
+<!ELEMENT MSModSpec_residues (MSModSpec_residues_E*)>
+
+
+<!ELEMENT MSModSpec_residues_E (#PCDATA)>
+
+<!-- loss after precursor mass determination -->
+<!ELEMENT MSModSpec_neutralloss (MSMassSet)>
+
+<!-- the equivalent Unimod Accession number -->
+<!ELEMENT MSModSpec_unimod (%INTEGER;)>
+
+<!-- the PSI-MS equivalent name   -->
+<!ELEMENT MSModSpec_psi-ms (#PCDATA)>
+
+<!-- Holds a set of modifications -->
+<!ELEMENT MSModSpecSet (MSModSpec*)>
+
+<!--
+ How is charge to be handled?  Some input files are not clear
+ on this.  For example, a dta file only specifies one charge, 
+ even though the charge is not really known.
+-->
+<!ELEMENT MSCalcPlusOne (%INTEGER;)>
+
+<!--
+    dontcalc	-  don't guess charge one
+    calc	-  guess charge one
+-->
+<!ATTLIST MSCalcPlusOne value (
+        dontcalc |
+        calc
+        ) #IMPLIED >
+
+
+<!-- user instructions on whether to believe charges in input file -->
+<!ELEMENT MSCalcCharge (%INTEGER;)>
+
+<!--
+    calculate	-  guess the charge(s) from the data
+    usefile	-  use what the input file says
+    userange	-  use the charge range specified
+-->
+<!ATTLIST MSCalcCharge value (
+        calculate |
+        usefile |
+        userange
+        ) #IMPLIED >
+
+
+<!-- How to handle precursor charge -->
+<!ELEMENT MSChargeHandle (
+        MSChargeHandle_calcplusone?, 
+        MSChargeHandle_calccharge?, 
+        MSChargeHandle_mincharge?, 
+        MSChargeHandle_maxcharge?, 
+        MSChargeHandle_considermult?, 
+        MSChargeHandle_plusone, 
+        MSChargeHandle_maxproductcharge?, 
+        MSChargeHandle_prodlesspre?, 
+        MSChargeHandle_negative?)>
+
+<!-- do we guess charge one? -->
+<!ELEMENT MSChargeHandle_calcplusone (MSCalcPlusOne)>
+
+<!-- how do we handle charges? -->
+<!ELEMENT MSChargeHandle_calccharge (MSCalcCharge)>
+
+<!-- if userange, what is the min? -->
+<!ELEMENT MSChargeHandle_mincharge (%INTEGER;)>
+
+<!-- if userange, what is the max? -->
+<!ELEMENT MSChargeHandle_maxcharge (%INTEGER;)>
+
+<!-- at which precursor charge to consider +2 ions? -->
+<!ELEMENT MSChargeHandle_considermult (%INTEGER;)>
+
+<!-- what % of peaks below precursor needed to call as +1 -->
+<!ELEMENT MSChargeHandle_plusone (%REAL;)>
+
+<!-- maximum product ion charge -->
+<!ELEMENT MSChargeHandle_maxproductcharge (%INTEGER;)>
+
+<!-- product charge always less thanor equal to precursor? -->
+<!ELEMENT MSChargeHandle_prodlesspre EMPTY>
+<!ATTLIST MSChargeHandle_prodlesspre value ( true | false ) #REQUIRED >
+
+
+<!-- negative ion search if -1, positive ion if 1 -->
+<!ELEMENT MSChargeHandle_negative (%INTEGER;)>
+
+<!-- what type of atomic mass to use -->
+<!ELEMENT MSSearchType (%INTEGER;)>
+<!ATTLIST MSSearchType value (
+        monoisotopic |
+        average |
+        monon15 |
+        exact |
+        multiisotope |
+        max
+        ) #IMPLIED >
+
+
+<!-- what is the charge dependence of the mass tolerance? -->
+<!ELEMENT MSZdependence (%INTEGER;)>
+
+<!--
+    independent	-  mass tol. invariant with charge
+    linearwithz	-  mass tol. scales with charge
+-->
+<!ATTLIST MSZdependence value (
+        independent |
+        linearwithz |
+        max
+        ) #IMPLIED >
+
+
+<!-- Iterative search settings -->
+<!ELEMENT MSIterativeSettings (
+        MSIterativeSettings_researchthresh, 
+        MSIterativeSettings_subsetthresh, 
+        MSIterativeSettings_replacethresh)>
+
+<!-- e-val threshold for re-searching spectra, 0 = always re-search -->
+<!ELEMENT MSIterativeSettings_researchthresh (%REAL;)>
+
+<!-- e-val threshold for picking sequence subset, 0 = all sequences -->
+<!ELEMENT MSIterativeSettings_subsetthresh (%REAL;)>
+
+<!-- e-val threshold for replacing hitset, 0 = only if better -->
+<!ELEMENT MSIterativeSettings_replacethresh (%REAL;)>
+
+<!-- Library search settings -->
+<!ELEMENT MSLibrarySettings (
+        MSLibrarySettings_libnames, 
+        MSLibrarySettings_presearch, 
+        MSLibrarySettings_useomssascore, 
+        MSLibrarySettings_usereplicatescore, 
+        MSLibrarySettings_qtofscore)>
+
+<!-- names of search libraries -->
+<!ELEMENT MSLibrarySettings_libnames (MSLibrarySettings_libnames_E*)>
+
+
+<!ELEMENT MSLibrarySettings_libnames_E (#PCDATA)>
+
+<!-- should there be a restriction on precursor mass? -->
+<!ELEMENT MSLibrarySettings_presearch EMPTY>
+<!ATTLIST MSLibrarySettings_presearch value ( true | false ) #REQUIRED >
+
+
+<!-- use the omssa score? -->
+<!ELEMENT MSLibrarySettings_useomssascore EMPTY>
+<!ATTLIST MSLibrarySettings_useomssascore value ( true | false ) #REQUIRED >
+
+
+<!-- use the number of replicates score? -->
+<!ELEMENT MSLibrarySettings_usereplicatescore EMPTY>
+<!ATTLIST MSLibrarySettings_usereplicatescore value ( true | false ) #REQUIRED >
+
+
+<!-- use the qtof score? -->
+<!ELEMENT MSLibrarySettings_qtofscore EMPTY>
+<!ATTLIST MSLibrarySettings_qtofscore value ( true | false ) #REQUIRED >
+
+
+<!-- Generic search settings -->
+<!ELEMENT MSSearchSettings (
+        MSSearchSettings_precursorsearchtype, 
+        MSSearchSettings_productsearchtype, 
+        MSSearchSettings_ionstosearch, 
+        MSSearchSettings_peptol, 
+        MSSearchSettings_msmstol, 
+        MSSearchSettings_zdep, 
+        MSSearchSettings_cutoff, 
+        MSSearchSettings_cutlo, 
+        MSSearchSettings_cuthi, 
+        MSSearchSettings_cutinc, 
+        MSSearchSettings_singlewin, 
+        MSSearchSettings_doublewin, 
+        MSSearchSettings_singlenum, 
+        MSSearchSettings_doublenum, 
+        MSSearchSettings_fixed, 
+        MSSearchSettings_variable, 
+        MSSearchSettings_enzyme, 
+        MSSearchSettings_missedcleave, 
+        MSSearchSettings_hitlistlen?, 
+        MSSearchSettings_db, 
+        MSSearchSettings_tophitnum, 
+        MSSearchSettings_minhit?, 
+        MSSearchSettings_minspectra?, 
+        MSSearchSettings_scale?, 
+        MSSearchSettings_maxmods?, 
+        MSSearchSettings_taxids?, 
+        MSSearchSettings_chargehandling?, 
+        MSSearchSettings_usermods?, 
+        MSSearchSettings_pseudocount?, 
+        MSSearchSettings_searchb1?, 
+        MSSearchSettings_searchctermproduct?, 
+        MSSearchSettings_maxproductions?, 
+        MSSearchSettings_minnoenzyme?, 
+        MSSearchSettings_maxnoenzyme?, 
+        MSSearchSettings_exactmass?, 
+        MSSearchSettings_settingid?, 
+        MSSearchSettings_iterativesettings?, 
+        MSSearchSettings_precursorcull?, 
+        MSSearchSettings_infiles?, 
+        MSSearchSettings_outfiles?, 
+        MSSearchSettings_nocorrelationscore?, 
+        MSSearchSettings_probfollowingion?, 
+        MSSearchSettings_nmethionine?, 
+        MSSearchSettings_automassadjust?, 
+        MSSearchSettings_lomasscutoff?, 
+        MSSearchSettings_libsearchsettings?, 
+        MSSearchSettings_noprolineions?, 
+        MSSearchSettings_reversesearch?, 
+        MSSearchSettings_othersettings?, 
+        MSSearchSettings_numisotopes?, 
+        MSSearchSettings_pepppm?, 
+        MSSearchSettings_msmsppm?, 
+        MSSearchSettings_reportedhitcount?)>
+
+<!-- average or monoisotopic? -->
+<!ELEMENT MSSearchSettings_precursorsearchtype (MSSearchType)>
+
+<!-- average or monoisotopic? -->
+<!ELEMENT MSSearchSettings_productsearchtype (MSSearchType)>
+
+<!-- which ions to search? -->
+<!ELEMENT MSSearchSettings_ionstosearch (MSIonType*)>
+
+<!-- peptide mass tolerance -->
+<!ELEMENT MSSearchSettings_peptol (%REAL;)>
+
+<!-- msms mass tolerance -->
+<!ELEMENT MSSearchSettings_msmstol (%REAL;)>
+
+<!-- what is the charge dependence of the mass tolerance? -->
+<!ELEMENT MSSearchSettings_zdep (MSZdependence)>
+
+<!--
+ evalue cutoff
+ next 3 fields define intensity fraction below
+ which peaks will be discard
+-->
+<!ELEMENT MSSearchSettings_cutoff (%REAL;)>
+
+<!-- the start of the cutoff, fraction of most intense peak -->
+<!ELEMENT MSSearchSettings_cutlo (%REAL;)>
+
+<!-- the end of the cutoff -->
+<!ELEMENT MSSearchSettings_cuthi (%REAL;)>
+
+<!-- the increment of the cutoff -->
+<!ELEMENT MSSearchSettings_cutinc (%REAL;)>
+
+<!-- the size of the single charge filtering window -->
+<!ELEMENT MSSearchSettings_singlewin (%INTEGER;)>
+
+<!-- the size of the double charge filtering window -->
+<!ELEMENT MSSearchSettings_doublewin (%INTEGER;)>
+
+<!-- the number of peaks allowed in the single window -->
+<!ELEMENT MSSearchSettings_singlenum (%INTEGER;)>
+
+<!-- the number of peaks allowed in the double window -->
+<!ELEMENT MSSearchSettings_doublenum (%INTEGER;)>
+
+<!-- fixed PTM's -->
+<!ELEMENT MSSearchSettings_fixed (MSMod*)>
+
+<!-- variable PTM's -->
+<!ELEMENT MSSearchSettings_variable (MSMod*)>
+
+<!-- digestion enzyme -->
+<!ELEMENT MSSearchSettings_enzyme (MSEnzymes)>
+
+<!-- number of missed cleaves allowed -->
+<!ELEMENT MSSearchSettings_missedcleave (%INTEGER;)>
+
+<!--
+ the number of hits kept in memory
+ for a spectrum
+-->
+<!ELEMENT MSSearchSettings_hitlistlen (%INTEGER;)>
+
+<!-- sequence set to search, e.g. "nr" -->
+<!ELEMENT MSSearchSettings_db (#PCDATA)>
+
+<!-- number of m/z to consider in first pass -->
+<!ELEMENT MSSearchSettings_tophitnum (%INTEGER;)>
+
+<!-- minimum number of m/z values for a valid hit -->
+<!ELEMENT MSSearchSettings_minhit (%INTEGER;)>
+
+<!-- minimum number of m/z for a valid spectra -->
+<!ELEMENT MSSearchSettings_minspectra (%INTEGER;)>
+
+<!-- scale for m/z float to integer -->
+<!ELEMENT MSSearchSettings_scale (%INTEGER;)>
+
+<!--
+ maximum number of mass ladders per
+ database peptide
+-->
+<!ELEMENT MSSearchSettings_maxmods (%INTEGER;)>
+
+<!-- taxa to limit search -->
+<!ELEMENT MSSearchSettings_taxids (MSSearchSettings_taxids_E*)>
+
+
+<!ELEMENT MSSearchSettings_taxids_E (%INTEGER;)>
+
+<!-- how to deal with charges -->
+<!ELEMENT MSSearchSettings_chargehandling (MSChargeHandle)>
+
+<!-- user defined modifications -->
+<!ELEMENT MSSearchSettings_usermods (MSModSpecSet)>
+
+<!-- min number of counts per precursor bin -->
+<!ELEMENT MSSearchSettings_pseudocount (%INTEGER;)>
+
+<!-- should b1 product be in search (1=no, 0=yes) -->
+<!ELEMENT MSSearchSettings_searchb1 (%INTEGER;)>
+
+<!-- should c terminus ion be searched (1=no, 0=yes) -->
+<!ELEMENT MSSearchSettings_searchctermproduct (%INTEGER;)>
+
+<!-- max number of ions in each series (0=all) -->
+<!ELEMENT MSSearchSettings_maxproductions (%INTEGER;)>
+
+<!-- min number of AA in peptide for noenzyme search -->
+<!ELEMENT MSSearchSettings_minnoenzyme (%INTEGER;)>
+
+<!-- max number of AA in peptide for noenzyme search (0=none) -->
+<!ELEMENT MSSearchSettings_maxnoenzyme (%INTEGER;)>
+
+<!-- the threshold in Da for adding neutron -->
+<!ELEMENT MSSearchSettings_exactmass (%REAL;)>
+
+<!-- id of the search settings -->
+<!ELEMENT MSSearchSettings_settingid (%INTEGER;)>
+
+<!-- iterative search settings -->
+<!ELEMENT MSSearchSettings_iterativesettings (MSIterativeSettings)>
+
+<!-- turn on aggressive precursor culling for ETD (0=none) -->
+<!ELEMENT MSSearchSettings_precursorcull (%INTEGER;)>
+
+<!-- input files -->
+<!ELEMENT MSSearchSettings_infiles (MSInFile*)>
+
+<!-- output files -->
+<!ELEMENT MSSearchSettings_outfiles (MSOutFile*)>
+
+<!-- turn on correlation score (1=nocorr) -->
+<!ELEMENT MSSearchSettings_nocorrelationscore (%INTEGER;)>
+
+<!-- probability of a consecutive ion (used in correlation) -->
+<!ELEMENT MSSearchSettings_probfollowingion (%REAL;)>
+
+<!-- should nmethionine be cleaved? -->
+<!ELEMENT MSSearchSettings_nmethionine EMPTY>
+<!ATTLIST MSSearchSettings_nmethionine value ( true | false ) #REQUIRED >
+
+
+<!-- fraction allowable adjustment of product mass tolerance -->
+<!ELEMENT MSSearchSettings_automassadjust (%REAL;)>
+
+<!-- low mass filter in Daltons, unscaled -->
+<!ELEMENT MSSearchSettings_lomasscutoff (%REAL;)>
+
+<!-- library search settings -->
+<!ELEMENT MSSearchSettings_libsearchsettings (MSLibrarySettings)>
+
+<!-- which ions to use no proline rule -->
+<!ELEMENT MSSearchSettings_noprolineions (MSIonType*)>
+
+<!-- do reverse search -->
+<!ELEMENT MSSearchSettings_reversesearch EMPTY>
+<!ATTLIST MSSearchSettings_reversesearch value ( true | false ) #REQUIRED >
+
+
+<!-- extra search settings -->
+<!ELEMENT MSSearchSettings_othersettings (NameValue*)>
+
+<!-- number of isotopic peaks to search when using MSSearchType multiisotope -->
+<!ELEMENT MSSearchSettings_numisotopes (%INTEGER;)>
+
+<!-- search precursor as ppm -->
+<!ELEMENT MSSearchSettings_pepppm EMPTY>
+<!ATTLIST MSSearchSettings_pepppm value ( true | false ) #REQUIRED >
+
+
+<!-- search product as ppm -->
+<!ELEMENT MSSearchSettings_msmsppm EMPTY>
+<!ATTLIST MSSearchSettings_msmsppm value ( true | false ) #REQUIRED >
+
+
+<!-- the maximum number of hits to report per spectrum, 0=all -->
+<!ELEMENT MSSearchSettings_reportedhitcount (%INTEGER;)>
+
+
+<!ELEMENT MSSerialDataFormat (%INTEGER;)>
+
+<!--
+    asntext	-  open ASN.1 text format
+    asnbinary	-  open ASN.1 binary format
+    xml	-  open XML format
+    csv	-  csv (excel)
+    pepxml	-  pepXML format
+    xmlbz2	-  bzip2 XML format
+-->
+<!ATTLIST MSSerialDataFormat value (
+        none |
+        asntext |
+        asnbinary |
+        xml |
+        csv |
+        pepxml |
+        xmlbz2
+        ) #IMPLIED >
+
+
+
+<!ELEMENT MSOutFile (
+        MSOutFile_outfile, 
+        MSOutFile_outfiletype, 
+        MSOutFile_includerequest)>
+
+<!-- output file name -->
+<!ELEMENT MSOutFile_outfile (#PCDATA)>
+
+<!-- output file type -->
+<!ELEMENT MSOutFile_outfiletype (MSSerialDataFormat)>
+
+<!-- should the output include the request? -->
+<!ELEMENT MSOutFile_includerequest EMPTY>
+<!ATTLIST MSOutFile_includerequest value ( true | false ) #REQUIRED >
+
+
+
+<!ELEMENT MSSpectrumFileType (%INTEGER;)>
+
+<!--
+    oms	-  asn.1 binary for iterative search
+    omx	-  xml for iterative search
+    xml	-  xml MSRequest
+    omxbz2	-  bzip2 omx file
+-->
+<!ATTLIST MSSpectrumFileType value (
+        dta |
+        dtablank |
+        dtaxml |
+        asc |
+        pkl |
+        pks |
+        sciex |
+        mgf |
+        unknown |
+        oms |
+        omx |
+        xml |
+        omxbz2
+        ) #IMPLIED >
+
+
+
+<!ELEMENT MSInFile (
+        MSInFile_infile, 
+        MSInFile_infiletype)>
+
+<!-- input file name -->
+<!ELEMENT MSInFile_infile (#PCDATA)>
+
+<!-- input file type -->
+<!ELEMENT MSInFile_infiletype (MSSpectrumFileType)>
+
+
+<!ELEMENT MSSearchSettingsSet (MSSearchSettings*)>
+
+<!-- The search request that is given to the OMSSA algorithm -->
+<!ELEMENT MSRequest (
+        MSRequest_spectra, 
+        MSRequest_settings, 
+        MSRequest_rid?, 
+        MSRequest_moresettings?, 
+        MSRequest_modset?)>
+
+<!-- the set of spectra -->
+<!ELEMENT MSRequest_spectra (MSSpectrumset)>
+
+<!-- the search settings -->
+<!ELEMENT MSRequest_settings (MSSearchSettings)>
+
+<!-- request id -->
+<!ELEMENT MSRequest_rid (#PCDATA)>
+
+<!-- additional search runs -->
+<!ELEMENT MSRequest_moresettings (MSSearchSettingsSet)>
+
+<!-- list of mods that can be used in search -->
+<!ELEMENT MSRequest_modset (MSModSpecSet)>
+
+<!-- enumeration of ion types -->
+<!ELEMENT MSIonType (%INTEGER;)>
+
+<!--
+    z	-  actually zdot
+-->
+<!ATTLIST MSIonType value (
+        a |
+        b |
+        c |
+        x |
+        y |
+        z |
+        parent |
+        internal |
+        immonium |
+        unknown |
+        adot |
+        x-CO2 |
+        adot-CO2 |
+        max
+        ) #IMPLIED >
+
+
+<!-- types of neutral loss -->
+<!ELEMENT MSIonNeutralLoss (%INTEGER;)>
+
+<!--
+    water	-  minus 18 Da
+    ammonia	-  minus 17 Da
+-->
+<!ATTLIST MSIonNeutralLoss value (
+        water |
+        ammonia
+        ) #IMPLIED >
+
+
+<!-- iosotopic type of ion -->
+<!ELEMENT MSIonIsotopicType (%INTEGER;)>
+
+<!--
+    monoisotopic	-  no c13s in molecule
+    c13	-  one c13 in molecule
+    c13two	-  two c13s in molecule, and so on...
+-->
+<!ATTLIST MSIonIsotopicType value (
+        monoisotopic |
+        c13 |
+        c13two |
+        c13three |
+        c13four
+        ) #IMPLIED >
+
+
+<!-- type of immonium ion -->
+<!ELEMENT MSImmonium (
+        MSImmonium_parent, 
+        MSImmonium_product?)>
+
+<!-- parent amino acid -->
+<!ELEMENT MSImmonium_parent (#PCDATA)>
+
+<!-- product ion code -->
+<!ELEMENT MSImmonium_product (#PCDATA)>
+
+<!-- ion type at a finer level than ion series -->
+<!ELEMENT MSIon (
+        MSIon_neutralloss?, 
+        MSIon_isotope?, 
+        MSIon_internal?, 
+        MSIon_immonium?)>
+
+<!-- is this peak a neutral loss? -->
+<!ELEMENT MSIon_neutralloss (MSIonNeutralLoss)>
+
+<!-- isotopic composition of peak -->
+<!ELEMENT MSIon_isotope (MSIonIsotopicType)>
+
+<!-- if iontype is internal, this is the internal sequence -->
+<!ELEMENT MSIon_internal (#PCDATA)>
+
+<!-- if iontype is immonium, show characteristics -->
+<!ELEMENT MSIon_immonium (MSImmonium)>
+
+<!-- annotated comments about the ion -->
+<!ELEMENT MSIonAnnot (
+        MSIonAnnot_suspect?, 
+        MSIonAnnot_massdiff?, 
+        MSIonAnnot_missingisotope?)>
+
+<!-- is this peak suspect? -->
+<!ELEMENT MSIonAnnot_suspect EMPTY>
+<!ATTLIST MSIonAnnot_suspect value ( true | false ) #REQUIRED >
+
+
+<!-- what is the difference in mass from library spectrum? -->
+<!ELEMENT MSIonAnnot_massdiff (%REAL;)>
+
+<!-- are the lower mass peaks missing? -->
+<!ELEMENT MSIonAnnot_missingisotope EMPTY>
+<!ATTLIST MSIonAnnot_missingisotope value ( true | false ) #REQUIRED >
+
+
+<!-- defines a particular ion -->
+<!ELEMENT MSMZHit (
+        MSMZHit_ion, 
+        MSMZHit_charge, 
+        MSMZHit_number, 
+        MSMZHit_mz, 
+        MSMZHit_index?, 
+        MSMZHit_moreion?, 
+        MSMZHit_annotation?)>
+
+<!-- ion type, e.g. b -->
+<!ELEMENT MSMZHit_ion (MSIonType)>
+
+<!-- ion charge -->
+<!ELEMENT MSMZHit_charge (%INTEGER;)>
+
+<!-- the sequential number of the ion -->
+<!ELEMENT MSMZHit_number (%INTEGER;)>
+
+<!-- scaled m/z value in Da -->
+<!ELEMENT MSMZHit_mz (%INTEGER;)>
+
+<!-- the index of the peak in the original spectrum -->
+<!ELEMENT MSMZHit_index (%INTEGER;)>
+
+<!-- more information about the ion type -->
+<!ELEMENT MSMZHit_moreion (MSIon)>
+
+<!-- annotations on the ion -->
+<!ELEMENT MSMZHit_annotation (MSIonAnnot)>
+
+<!--
+ contains information about sequences with identical peptide
+ sequences
+-->
+<!ELEMENT MSPepHit (
+        MSPepHit_start, 
+        MSPepHit_stop, 
+        MSPepHit_gi?, 
+        MSPepHit_accession?, 
+        MSPepHit_defline?, 
+        MSPepHit_protlength?, 
+        MSPepHit_oid?, 
+        MSPepHit_reversed?, 
+        MSPepHit_pepstart?, 
+        MSPepHit_pepstop?)>
+
+<!-- start position (inclusive) in sequence -->
+<!ELEMENT MSPepHit_start (%INTEGER;)>
+
+<!-- stop position (inclusive) in sequence -->
+<!ELEMENT MSPepHit_stop (%INTEGER;)>
+
+<!-- genbank identifier -->
+<!ELEMENT MSPepHit_gi (%INTEGER;)>
+
+<!-- sequence accession -->
+<!ELEMENT MSPepHit_accession (#PCDATA)>
+
+<!-- sequence description -->
+<!ELEMENT MSPepHit_defline (#PCDATA)>
+
+<!-- length of protein -->
+<!ELEMENT MSPepHit_protlength (%INTEGER;)>
+
+<!-- blast library oid  -->
+<!ELEMENT MSPepHit_oid (%INTEGER;)>
+
+<!-- reversed sequence -->
+<!ELEMENT MSPepHit_reversed EMPTY>
+<!ATTLIST MSPepHit_reversed value ( true | false ) #REQUIRED >
+
+
+<!-- AA before the peptide -->
+<!ELEMENT MSPepHit_pepstart (#PCDATA)>
+
+<!-- AA after the peptide -->
+<!ELEMENT MSPepHit_pepstop (#PCDATA)>
+
+<!-- modifications to a hit peptide -->
+<!ELEMENT MSModHit (
+        MSModHit_site, 
+        MSModHit_modtype)>
+
+<!-- the position in the peptide -->
+<!ELEMENT MSModHit_site (%INTEGER;)>
+
+<!-- the type of modification -->
+<!ELEMENT MSModHit_modtype (MSMod)>
+
+<!-- sets of scores -->
+<!ELEMENT MSScoreSet (
+        MSScoreSet_name, 
+        MSScoreSet_value)>
+
+<!ELEMENT MSScoreSet_name (#PCDATA)>
+
+<!ELEMENT MSScoreSet_value (%REAL;)>
+
+<!-- hits to a given spectrum -->
+<!ELEMENT MSHits (
+        MSHits_evalue, 
+        MSHits_pvalue, 
+        MSHits_charge, 
+        MSHits_pephits, 
+        MSHits_mzhits?, 
+        MSHits_pepstring?, 
+        MSHits_mass?, 
+        MSHits_mods?, 
+        MSHits_pepstart?, 
+        MSHits_pepstop?, 
+        MSHits_protlength?, 
+        MSHits_theomass?, 
+        MSHits_oid?, 
+        MSHits_scores?, 
+        MSHits_libaccession?)>
+
+<!-- E-value (expect value) -->
+<!ELEMENT MSHits_evalue (%REAL;)>
+
+<!-- P-value (probability value) -->
+<!ELEMENT MSHits_pvalue (%REAL;)>
+
+<!-- the charge state used in search.  -1 == not +1 -->
+<!ELEMENT MSHits_charge (%INTEGER;)>
+
+<!-- peptides that match this hit -->
+<!ELEMENT MSHits_pephits (MSPepHit*)>
+
+<!-- ions hit -->
+<!ELEMENT MSHits_mzhits (MSMZHit*)>
+
+<!-- the peptide sequence -->
+<!ELEMENT MSHits_pepstring (#PCDATA)>
+
+<!-- scaled experimental mass of peptide in Da -->
+<!ELEMENT MSHits_mass (%INTEGER;)>
+
+<!-- modifications to sequence -->
+<!ELEMENT MSHits_mods (MSModHit*)>
+
+<!-- AA before the peptide (depricated) -->
+<!ELEMENT MSHits_pepstart (#PCDATA)>
+
+<!-- AA after the peptide (depricated) -->
+<!ELEMENT MSHits_pepstop (#PCDATA)>
+
+<!-- length of protein hit (depricated) -->
+<!ELEMENT MSHits_protlength (%INTEGER;)>
+
+<!-- scaled theoretical mass of peptide hit -->
+<!ELEMENT MSHits_theomass (%INTEGER;)>
+
+<!-- blast library oid (depricated)  -->
+<!ELEMENT MSHits_oid (%INTEGER;)>
+
+<!-- optional scores (for library search) -->
+<!ELEMENT MSHits_scores (MSScoreSet*)>
+
+<!-- library search accesssion -->
+<!ELEMENT MSHits_libaccession (#PCDATA)>
+
+<!-- error return for a particular spectrum's hitset -->
+<!ELEMENT MSHitError (%INTEGER;)>
+
+<!--
+    unable2read	-  can't read the spectrum
+    notenuffpeaks	-  not enough peaks to search
+-->
+<!ATTLIST MSHitError value (
+        none |
+        generalerr |
+        unable2read |
+        notenuffpeaks
+        ) #IMPLIED >
+
+
+<!-- MSHitSet annotation by end user -->
+<!ELEMENT MSUserAnnot (%INTEGER;)>
+<!ATTLIST MSUserAnnot value (
+        none |
+        delete |
+        flag
+        ) #IMPLIED >
+
+
+<!-- contains a set of hits to a single spectrum -->
+<!ELEMENT MSHitSet (
+        MSHitSet_number, 
+        MSHitSet_error?, 
+        MSHitSet_hits?, 
+        MSHitSet_ids?, 
+        MSHitSet_namevalue?, 
+        MSHitSet_settingid?, 
+        MSHitSet_userannotation?)>
+
+<!-- unique number of spectrum -->
+<!ELEMENT MSHitSet_number (%INTEGER;)>
+
+<!-- error, if any -->
+<!ELEMENT MSHitSet_error (MSHitError)>
+
+<!-- set of hit to spectrum -->
+<!ELEMENT MSHitSet_hits (MSHits*)>
+
+<!-- filenames or other ids of spectra searched -->
+<!ELEMENT MSHitSet_ids (MSHitSet_ids_E*)>
+
+
+<!ELEMENT MSHitSet_ids_E (#PCDATA)>
+
+<!-- extra info: retention times, etc. -->
+<!ELEMENT MSHitSet_namevalue (NameValue*)>
+
+<!-- id of the search setting used -->
+<!ELEMENT MSHitSet_settingid (%INTEGER;)>
+
+<!-- allows users to flag certain -->
+<!ELEMENT MSHitSet_userannotation (MSUserAnnot)>
+
+<!-- error return for the entire response -->
+<!ELEMENT MSResponseError (%INTEGER;)>
+
+<!--
+    noblastdb	-  unable to open blast library
+    noinput	-  input missing
+-->
+<!ATTLIST MSResponseError value (
+        none |
+        generalerr |
+        noblastdb |
+        noinput
+        ) #IMPLIED >
+
+
+<!-- bioseq container -->
+<!ELEMENT MSBioseq (
+        MSBioseq_oid, 
+        MSBioseq_seq)>
+
+<!-- blast library oid -->
+<!ELEMENT MSBioseq_oid (%INTEGER;)>
+
+<!ELEMENT MSBioseq_seq (Bioseq)>
+
+
+<!ELEMENT MSBioseqSet (MSBioseq*)>
+
+<!-- search results -->
+<!ELEMENT MSResponse (
+        MSResponse_hitsets, 
+        MSResponse_scale?, 
+        MSResponse_rid?, 
+        MSResponse_error?, 
+        MSResponse_version?, 
+        MSResponse_email?, 
+        MSResponse_dbversion?, 
+        MSResponse_bioseqs?)>
+
+<!-- hits grouped by spectrum -->
+<!ELEMENT MSResponse_hitsets (MSHitSet*)>
+
+<!-- scale to change m/z float to integer -->
+<!ELEMENT MSResponse_scale (%INTEGER;)>
+
+<!-- request id -->
+<!ELEMENT MSResponse_rid (#PCDATA)>
+
+<!-- error response -->
+<!ELEMENT MSResponse_error (MSResponseError)>
+
+<!-- version of OMSSA -->
+<!ELEMENT MSResponse_version (#PCDATA)>
+
+<!-- email address for notification -->
+<!ELEMENT MSResponse_email (#PCDATA)>
+
+<!-- version of db searched (usually size) -->
+<!ELEMENT MSResponse_dbversion (%INTEGER;)>
+
+<!-- sequences found in search      -->
+<!ELEMENT MSResponse_bioseqs (MSBioseqSet)>
+
+<!-- holds both search requests and responses -->
+<!ELEMENT MSSearch (
+        MSSearch_request?, 
+        MSSearch_response?)>
+
+<!ELEMENT MSSearch_request (MSRequest*)>
+
+<!ELEMENT MSSearch_response (MSResponse*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/PDB_General.dtd b/code/lib/Bio/Entrez/DTDs/PDB_General.dtd
new file mode 100644
index 0000000..cb2b663
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/PDB_General.dtd
@@ -0,0 +1,20 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqblock.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- PDB_General.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/PDB_General.mod.dtd b/code/lib/Bio/Entrez/DTDs/PDB_General.mod.dtd
new file mode 100644
index 0000000..1e5aae9
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/PDB_General.mod.dtd
@@ -0,0 +1,70 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqblock.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "PDB-General"
+================================================= -->
+
+<!--
+*********************************************************************
+
+  PDB specific data
+  This block of specifications was developed by Jim Ostell and
+      Steve Bryant of NCBI
+
+*********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          PDB-block -->
+
+<!-- Elements referenced from other modules:
+          Date FROM NCBI-General -->
+<!-- ============================================ -->
+
+<!-- PDB specific descriptions -->
+<!ELEMENT PDB-block (
+        PDB-block_deposition, 
+        PDB-block_class, 
+        PDB-block_compound, 
+        PDB-block_source, 
+        PDB-block_exp-method?, 
+        PDB-block_replace?)>
+
+<!-- deposition date  month,year -->
+<!ELEMENT PDB-block_deposition (Date)>
+
+<!ELEMENT PDB-block_class (#PCDATA)>
+
+<!ELEMENT PDB-block_compound (PDB-block_compound_E*)>
+
+
+<!ELEMENT PDB-block_compound_E (#PCDATA)>
+
+<!ELEMENT PDB-block_source (PDB-block_source_E*)>
+
+
+<!ELEMENT PDB-block_source_E (#PCDATA)>
+
+<!-- present if NOT X-ray diffraction -->
+<!ELEMENT PDB-block_exp-method (#PCDATA)>
+
+<!-- replacement history -->
+<!ELEMENT PDB-block_replace (PDB-replace)>
+
+
+<!ELEMENT PDB-replace (
+        PDB-replace_date, 
+        PDB-replace_ids)>
+
+<!ELEMENT PDB-replace_date (Date)>
+
+<!-- entry ids replace by this one -->
+<!ELEMENT PDB-replace_ids (PDB-replace_ids_E*)>
+
+
+<!ELEMENT PDB-replace_ids_E (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/PIR_General.dtd b/code/lib/Bio/Entrez/DTDs/PIR_General.dtd
new file mode 100644
index 0000000..15f7879
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/PIR_General.dtd
@@ -0,0 +1,86 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqblock.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- PIR_General.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/PIR_General.mod.dtd b/code/lib/Bio/Entrez/DTDs/PIR_General.mod.dtd
new file mode 100644
index 0000000..bedc2ed
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/PIR_General.mod.dtd
@@ -0,0 +1,78 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqblock.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "PIR-General"
+================================================= -->
+
+<!--
+*********************************************************************
+
+  PIR specific data
+  This block of specifications was developed by Jim Ostell of
+      NCBI
+
+*********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          PIR-block -->
+
+<!-- Elements referenced from other modules:
+          Seq-id FROM NCBI-Seqloc -->
+<!-- ============================================ -->
+
+<!-- PIR specific descriptions -->
+<!ELEMENT PIR-block (
+        PIR-block_had-punct?, 
+        PIR-block_host?, 
+        PIR-block_source?, 
+        PIR-block_summary?, 
+        PIR-block_genetic?, 
+        PIR-block_includes?, 
+        PIR-block_placement?, 
+        PIR-block_superfamily?, 
+        PIR-block_keywords?, 
+        PIR-block_cross-reference?, 
+        PIR-block_date?, 
+        PIR-block_seq-raw?, 
+        PIR-block_seqref?)>
+
+<!-- had punctuation in sequence ? -->
+<!ELEMENT PIR-block_had-punct EMPTY>
+<!ATTLIST PIR-block_had-punct value ( true | false ) #REQUIRED >
+
+
+<!ELEMENT PIR-block_host (#PCDATA)>
+
+<!-- source line -->
+<!ELEMENT PIR-block_source (#PCDATA)>
+
+<!ELEMENT PIR-block_summary (#PCDATA)>
+
+<!ELEMENT PIR-block_genetic (#PCDATA)>
+
+<!ELEMENT PIR-block_includes (#PCDATA)>
+
+<!ELEMENT PIR-block_placement (#PCDATA)>
+
+<!ELEMENT PIR-block_superfamily (#PCDATA)>
+
+<!ELEMENT PIR-block_keywords (PIR-block_keywords_E*)>
+
+
+<!ELEMENT PIR-block_keywords_E (#PCDATA)>
+
+<!ELEMENT PIR-block_cross-reference (#PCDATA)>
+
+<!ELEMENT PIR-block_date (#PCDATA)>
+
+<!-- seq with punctuation -->
+<!ELEMENT PIR-block_seq-raw (#PCDATA)>
+
+<!-- xref to other sequences -->
+<!ELEMENT PIR-block_seqref (Seq-id*)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/PRF_General.dtd b/code/lib/Bio/Entrez/DTDs/PRF_General.dtd
new file mode 100644
index 0000000..b6cf457
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/PRF_General.dtd
@@ -0,0 +1,17 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqblock.asn"
+     ::DATATOOL:: by application DATATOOL version 1.5.0
+     ::DATATOOL:: on 06/06/2006 23:03:48
+     ============================================ -->
+
+<!-- PRF_General.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/PRF_General.mod.dtd b/code/lib/Bio/Entrez/DTDs/PRF_General.mod.dtd
new file mode 100644
index 0000000..e09cc60
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/PRF_General.mod.dtd
@@ -0,0 +1,56 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqblock.asn"
+     ::DATATOOL:: by application DATATOOL version 1.8.1
+     ::DATATOOL:: on 01/18/2007 23:07:18
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "PRF-General"
+================================================= -->
+
+<!--
+**********************************************************************
+ PRF specific definition
+    PRF is a protein sequence database crated and maintained by
+    Protein Research Foundation, Minoo-city, Osaka, Japan.
+
+    Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab),
+            Kyoto Univ., Japan
+
+**********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          PRF-block -->
+<!-- ============================================ -->
+
+
+<!ELEMENT PRF-block (
+        PRF-block_extra-src?, 
+        PRF-block_keywords?)>
+
+<!ELEMENT PRF-block_extra-src (PRF-ExtraSrc)>
+
+<!ELEMENT PRF-block_keywords (PRF-block_keywords_E*)>
+
+
+<!ELEMENT PRF-block_keywords_E (#PCDATA)>
+
+
+<!ELEMENT PRF-ExtraSrc (
+        PRF-ExtraSrc_host?, 
+        PRF-ExtraSrc_part?, 
+        PRF-ExtraSrc_state?, 
+        PRF-ExtraSrc_strain?, 
+        PRF-ExtraSrc_taxon?)>
+
+<!ELEMENT PRF-ExtraSrc_host (#PCDATA)>
+
+<!ELEMENT PRF-ExtraSrc_part (#PCDATA)>
+
+<!ELEMENT PRF-ExtraSrc_state (#PCDATA)>
+
+<!ELEMENT PRF-ExtraSrc_strain (#PCDATA)>
+
+<!ELEMENT PRF-ExtraSrc_taxon (#PCDATA)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/SP_General.dtd b/code/lib/Bio/Entrez/DTDs/SP_General.dtd
new file mode 100644
index 0000000..8a981ec
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/SP_General.dtd
@@ -0,0 +1,86 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqblock.asn"
+     ::DATATOOL:: by application DATATOOL version 2.0.0
+     ::DATATOOL:: on 04/26/2010 23:04:43
+     ============================================ -->
+
+<!-- SP_General.dtd
+  This file is built from a series of basic modules.
+  The actual ELEMENT and ENTITY declarations are in the modules.
+  This file is used to put them together.
+-->
+
+<!ENTITY % NCBI_Entity_module PUBLIC "-//NCBI//NCBI Entity Module//EN" "NCBI_Entity.mod.dtd">
+%NCBI_Entity_module;
+
+<!ENTITY % EMBL_General_module PUBLIC "-//NCBI//EMBL General Module//EN" "EMBL_General.mod.dtd">
+%EMBL_General_module;
+
+<!ENTITY % GenBank_General_module PUBLIC "-//NCBI//GenBank General Module//EN" "GenBank_General.mod.dtd">
+%GenBank_General_module;
+
+<!ENTITY % NCBI_Biblio_module PUBLIC "-//NCBI//NCBI Biblio Module//EN" "NCBI_Biblio.mod.dtd">
+%NCBI_Biblio_module;
+
+<!ENTITY % NCBI_BioSource_module PUBLIC "-//NCBI//NCBI BioSource Module//EN" "NCBI_BioSource.mod.dtd">
+%NCBI_BioSource_module;
+
+<!ENTITY % NCBI_Gene_module PUBLIC "-//NCBI//NCBI Gene Module//EN" "NCBI_Gene.mod.dtd">
+%NCBI_Gene_module;
+
+<!ENTITY % NCBI_General_module PUBLIC "-//NCBI//NCBI General Module//EN" "NCBI_General.mod.dtd">
+%NCBI_General_module;
+
+<!ENTITY % NCBI_Medline_module PUBLIC "-//NCBI//NCBI Medline Module//EN" "NCBI_Medline.mod.dtd">
+%NCBI_Medline_module;
+
+<!ENTITY % NCBI_Organism_module PUBLIC "-//NCBI//NCBI Organism Module//EN" "NCBI_Organism.mod.dtd">
+%NCBI_Organism_module;
+
+<!ENTITY % NCBI_Protein_module PUBLIC "-//NCBI//NCBI Protein Module//EN" "NCBI_Protein.mod.dtd">
+%NCBI_Protein_module;
+
+<!ENTITY % NCBI_Pub_module PUBLIC "-//NCBI//NCBI Pub Module//EN" "NCBI_Pub.mod.dtd">
+%NCBI_Pub_module;
+
+<!ENTITY % NCBI_RNA_module PUBLIC "-//NCBI//NCBI RNA Module//EN" "NCBI_RNA.mod.dtd">
+%NCBI_RNA_module;
+
+<!ENTITY % NCBI_Rsite_module PUBLIC "-//NCBI//NCBI Rsite Module//EN" "NCBI_Rsite.mod.dtd">
+%NCBI_Rsite_module;
+
+<!ENTITY % NCBI_SeqTable_module PUBLIC "-//NCBI//NCBI SeqTable Module//EN" "NCBI_SeqTable.mod.dtd">
+%NCBI_SeqTable_module;
+
+<!ENTITY % NCBI_Seqalign_module PUBLIC "-//NCBI//NCBI Seqalign Module//EN" "NCBI_Seqalign.mod.dtd">
+%NCBI_Seqalign_module;
+
+<!ENTITY % NCBI_Seqfeat_module PUBLIC "-//NCBI//NCBI Seqfeat Module//EN" "NCBI_Seqfeat.mod.dtd">
+%NCBI_Seqfeat_module;
+
+<!ENTITY % NCBI_Seqloc_module PUBLIC "-//NCBI//NCBI Seqloc Module//EN" "NCBI_Seqloc.mod.dtd">
+%NCBI_Seqloc_module;
+
+<!ENTITY % NCBI_Seqres_module PUBLIC "-//NCBI//NCBI Seqres Module//EN" "NCBI_Seqres.mod.dtd">
+%NCBI_Seqres_module;
+
+<!ENTITY % NCBI_Sequence_module PUBLIC "-//NCBI//NCBI Sequence Module//EN" "NCBI_Sequence.mod.dtd">
+%NCBI_Sequence_module;
+
+<!ENTITY % NCBI_TxInit_module PUBLIC "-//NCBI//NCBI TxInit Module//EN" "NCBI_TxInit.mod.dtd">
+%NCBI_TxInit_module;
+
+<!ENTITY % NCBI_Variation_module PUBLIC "-//NCBI//NCBI Variation Module//EN" "NCBI_Variation.mod.dtd">
+%NCBI_Variation_module;
+
+<!ENTITY % PDB_General_module PUBLIC "-//NCBI//PDB General Module//EN" "PDB_General.mod.dtd">
+%PDB_General_module;
+
+<!ENTITY % PIR_General_module PUBLIC "-//NCBI//PIR General Module//EN" "PIR_General.mod.dtd">
+%PIR_General_module;
+
+<!ENTITY % PRF_General_module PUBLIC "-//NCBI//PRF General Module//EN" "PRF_General.mod.dtd">
+%PRF_General_module;
+
+<!ENTITY % SP_General_module PUBLIC "-//NCBI//SP General Module//EN" "SP_General.mod.dtd">
+%SP_General_module;
diff --git a/code/lib/Bio/Entrez/DTDs/SP_General.mod.dtd b/code/lib/Bio/Entrez/DTDs/SP_General.mod.dtd
new file mode 100644
index 0000000..cab7937
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/SP_General.mod.dtd
@@ -0,0 +1,94 @@
+<!-- ============================================
+     ::DATATOOL:: Generated from "seqblock.asn"
+     ::DATATOOL:: by application DATATOOL version 1.9.0
+     ::DATATOOL:: on 04/10/2008 16:04:22
+     ============================================ -->
+
+<!-- ============================================ -->
+<!-- This section is mapped from module "SP-General"
+================================================= -->
+
+<!--
+*********************************************************************
+
+  SWISSPROT specific data
+  This block of specifications was developed by Mark Cavanaugh of
+      NCBI working with Amos Bairoch of SWISSPROT
+
+*********************************************************************
+-->
+
+<!-- Elements used by other modules:
+          SP-block -->
+
+<!-- Elements referenced from other modules:
+          Date,
+          Dbtag FROM NCBI-General,
+          Seq-id FROM NCBI-Seqloc -->
+<!-- ============================================ -->
+
+<!-- SWISSPROT specific descriptions -->
+<!ELEMENT SP-block (
+        SP-block_class, 
+        SP-block_extra-acc?, 
+        SP-block_imeth?, 
+        SP-block_plasnm?, 
+        SP-block_seqref?, 
+        SP-block_dbref?, 
+        SP-block_keywords?, 
+        SP-block_created?, 
+        SP-block_sequpd?, 
+        SP-block_annotupd?)>
+
+<!ELEMENT SP-block_class %ENUM;>
+
+<!--
+    standard	-  conforms to all SWISSPROT checks
+    prelim	-  only seq and biblio checked
+-->
+<!ATTLIST SP-block_class value (
+        not-set |
+        standard |
+        prelim |
+        other
+        ) #REQUIRED >
+
+
+<!-- old SWISSPROT ids -->
+<!ELEMENT SP-block_extra-acc (SP-block_extra-acc_E*)>
+
+
+<!ELEMENT SP-block_extra-acc_E (#PCDATA)>
+
+<!-- seq known to start with Met -->
+<!ELEMENT SP-block_imeth EMPTY>
+<!ATTLIST SP-block_imeth value ( true | false ) "false" >
+
+
+<!-- plasmid names carrying gene -->
+<!ELEMENT SP-block_plasnm (SP-block_plasnm_E*)>
+
+
+<!ELEMENT SP-block_plasnm_E (#PCDATA)>
+
+<!-- xref to other sequences -->
+<!ELEMENT SP-block_seqref (Seq-id*)>
+
+<!-- xref to non-sequence dbases -->
+<!ELEMENT SP-block_dbref (Dbtag*)>
+
+<!-- keywords -->
+<!ELEMENT SP-block_keywords (SP-block_keywords_E*)>
+
+
+<!ELEMENT SP-block_keywords_E (#PCDATA)>
+
+<!-- creation date -->
+<!ELEMENT SP-block_created (Date)>
+
+<!-- sequence update -->
+<!ELEMENT SP-block_sequpd (Date)>
+
+<!-- annotation update -->
+<!ELEMENT SP-block_annotupd (Date)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/XHTMLtablesetup.ent b/code/lib/Bio/Entrez/DTDs/XHTMLtablesetup.ent
new file mode 100644
index 0000000..1ebe3ce
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/XHTMLtablesetup.ent
@@ -0,0 +1,309 @@
+<!-- ============================================================= -->
+<!--  MODULE:    XHTML Table Setup Module                          -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite XHTML Table Setup Module v2.0 20040830//EN"
+     Delivered as file "XHTMLtablesetup.ent"                       -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Provides the organization for using the XHTML 1.1 -->
+<!--             table model                                       -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Overrides to standard parameter entities used  -->
+<!--                in the XHTML 1.1 table model                   -->
+<!--             2) Call to XHTML 1.1 table model                  -->
+<!--                                                               -->
+<!-- MODULES REQUIRED:                                             -->
+<!--             1) htmltable.dtd        XHTML 1.1 table model     -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-07-30)
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+          
+  4. Updated public identifier to "v2.0 20040830"          
+
+     =============================================================
+     Version 1.1                           (TRG) v1.1 (2003-11-01)
+
+  3. Added attribute "alternate-form-of" to <table> 
+     (by modifying parameter entity %Common.attrib;)
+     Rationale: Where multiple formats of an item (e.g., graphic 
+     file, media object, chemical structure) are available, this 
+     attribute indicates that a format is a secondary one and 
+     provides a link to the primary format, so that only one 
+     format of an item is displayed.
+     
+  2. Added attribute "id" information to parameter entity 
+     %Common-attrib;  
+     Rationale: Provide unique identifier so these elements can be 
+     linked to, especially for one version of a table to be linked 
+     to by alternate versions of the same table.
+      
+  1. Added attribute "content-type" information to <table> 
+     (by modifying parameter entity %Common.attrib;)     
+     Rationale: To identify and preserve the semantic intent of 
+     semantically rich source documents.
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module, 
+                        usually accomplished in the Customization
+                        Module for the specific DTD:
+                          %inside-cell; 
+                                                                   -->
+<!-- ============================================================= -->
+                                                                   
+                                                                   
+<!-- ============================================================= -->
+<!--                    SET UP FOR THE XHTML 1.1 TABLE MODULE      -->
+<!-- ============================================================= -->
+                                                                   
+                                                                   
+<!-- ============================================================= -->
+<!--                    DEFAULTS FOR TABLE ELEMENT NAMES           -->
+<!-- ============================================================= -->
+
+
+<!ENTITY % table.qname  "table"                                      >
+<!ENTITY % caption.qname  
+                        "caption"                                    >
+<!ENTITY % thead.qname  "thead"                                      >
+<!ENTITY % tfoot.qname  "tfoot"                                      >
+<!ENTITY % tbody.qname  "tbody"                                      >
+<!ENTITY % colgroup.qname  
+                        "colgroup"                                   >
+<!ENTITY % col.qname    "col"                                        >
+<!ENTITY % tr.qname     "tr"                                         >
+<!ENTITY % th.qname     "th"                                         >
+<!ENTITY % td.qname     "td"                                         >
+                                                                   
+                                                                   
+<!-- ============================================================= -->
+<!--                    DEFAULTS FOR DATATYPE PARAMETER ENTITIES   -->
+<!-- ============================================================= -->
+
+
+<!ENTITY % Text.datatype  
+                        "CDATA"                                      >
+
+
+<!ENTITY % Number.datatype  
+                        "CDATA"                                      >
+
+
+<!ENTITY % MultiLength.datatype
+                        "CDATA"                                      >
+
+
+<!ENTITY % Length.datatype
+                        "CDATA"                                      >
+
+
+<!ENTITY % Pixels.datatype
+                        "CDATA"                                      >
+
+
+<!ENTITY % Character.datatype
+                        "CDATA"                                      >
+                                                                   
+                                                                   
+<!-- ============================================================= -->
+<!--                    DEFAULTS FOR ATTRIBUTE PARAMETER ENTITIES  -->
+<!-- ============================================================= -->
+
+
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.       
+             id         Unique identifier so that the table can
+                        be referenced                              -->
+<!ENTITY % Common.attrib
+              "alternate-form-of
+                        IDREF                             #IMPLIED
+               content-type
+                        CDATA                             #IMPLIED
+               id       ID                                #IMPLIED"  >
+                                                                   
+                                                                   
+<!-- ============================================================= -->
+<!--                    OVER-RIDES TO REMOVE CAPTION               -->
+<!-- ============================================================= -->
+
+
+<!--                   CAPTION FOR A TABLE                         -->
+<!--                   Modification of the standard XHMTL model    
+                       Removed the definition of caption, so the
+                       element would not be multiply defined       -->
+<!ENTITY % caption.element  
+                       "IGNORE"                                      >
+                                                                   
+                                                                   
+<!-- ============================================================= -->
+<!--                    OVER-RIDES FOR CONTENT PARAMETER ENTITIES  -->
+<!-- ============================================================= -->
+
+
+<!--                   INLINE ELEMENTS                             -->
+<!--                   Modification of the standard XHMTL model     
+                       for inline elements used in the <caption>
+                       Set to the null because the <caption>
+                       element is now inside the table wrapper,
+                       not inside the table, as the original XHTML
+                       table intended                              -->
+<!ENTITY % Inline.mix  ""                                            >
+
+
+<!--                   CONTENTS OF A TABLE CELL                    -->
+<!--                   Modification of the standard XHMTL model
+                       used for the content of tables cells <th>
+                       and <td>                                    -->
+<!ENTITY % Flow.mix    "%inside-cell;"                               >
+
+
+<!--                   CONTENTS OF A TABLE                         -->
+<!--                   Modification of the standard XHMTL model    
+                       This has been modified from the XHTML model
+                       to remove the <caption> element from the
+                       <table> model, since in the Archiving and
+                       Interchange DTD Suite modular library, the 
+                       <caption> element is part  of the Table Wrapper 
+                       <table-wrap> element. No other changes were 
+                       made to the XHTML table content model.      -->
+<!ENTITY % table.content
+     "( ( %col.qname;* | %colgroup.qname;* ),
+        ( ( %thead.qname;?, %tfoot.qname;?, %tbody.qname;+ ) | 
+          ( %tr.qname;+ ) 
+        ) 
+      )"                                                             >
+ 
+
+<!-- ============================================================= -->
+<!--                    THE XML TABLE INVOCATION                   -->
+<!-- ============================================================= -->
+
+
+<!--                    XHTML TABLE MODEL                          -->
+<!--                    This module declares element types and 
+                        attributes used to provide table markup 
+                        similar to HTML 4, including features that 
+                        enable better accessibility for non-visual 
+                        user agents. This is the XHTML reformulation 
+                        of HTML as a modular XML application. 
+                        Copyright 1998-2001 W3C (MIT, INRIA, Keio), 
+                        All Rights Reserved.
+                        Revision: $Id: xhtml-table-1.mod,v 4.1 
+                        2001/04/10 09:42:30 altheim Exp $ SMI,using
+                        SYSTEM identifier:
+"http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-table-1.mod"
+                                                                   -->
+%htmltable.dtd;
+
+
+<!-- ================== End XHTML Table Setup Module ============= -->
diff --git a/code/lib/Bio/Entrez/DTDs/archivearticle.dtd b/code/lib/Bio/Entrez/DTDs/archivearticle.dtd
new file mode 100644
index 0000000..0b81d6a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/archivearticle.dtd
@@ -0,0 +1,952 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Journal Archiving and Interchange DTD             -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Journal Archiving and Interchange DTD v2.0 20040830//EN"
+     Delivered as file "archivearticle.dtd"                        -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Journal Archiving and Interchange DTD of the      -->
+<!--             Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    DTD to describe a full-text journal article       -->
+<!--                                                               -->
+<!--             The Journal Archiving and Interchange DTD         -->
+<!--             describes journal articles and some non-article   -->
+<!--             material such as product and book reviews for     -->
+<!--             repository or interchange purposes. It describes  -->
+<!--             both the metadata for a journal article and the   -->
+<!--             full content of the article.                      --> 
+<!--             This DTD was constructed using the modules in the -->
+<!--             Archiving and Interchange DTD Suite, using almost -->
+<!--             no customization.                                 -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Declare and invoke the Archival (Green) DTD-   -->
+<!--                specific module of modules, that names  any    -->
+<!--                modules created expressly for this DTD.        -->
+<!--                   (%archivecustom-modules.ent;)               -->
+<!--             2) Declare and invoke the DTD Suite module of     -->
+<!--                   modules (%modules.ent;), the Archiving and  -->
+<!--                   Interchange Suite module that names all the -->
+<!--                   Suite modules this DTD might invoke         -->
+<!--             3) Invoke DTD-specific classes over-ride module   -->
+<!--                               (%archivecustom-classes.ent;)   -->
+<!--             4) Invoke Suite default classes                   -->
+<!--                               (%default-classes.ent;)         -->
+<!--             5) Invoke DTD-specific mixes over-ride module     -->
+<!--                               (%archivecustom-mixes.ent;)     -->
+<!--             6) Invoke Suite default mixes                     -->
+<!--                               (%default-mixes.ent;)           -->
+<!--             7) Invoke DTD-specific content model over-ride    -->
+<!--                        module (%archivecustom-models.ent;)    -->
+<!--             8) Invoke the Common Element Module (from Suite)  -->
+<!--             9) Invoke all the Class Modules (from Suite) as   -->
+<!--                    well as any DTD-specific element modules   -->
+<!--            10) Parameter Entities for attribute values used   -->
+<!--                 in this module (DTD-specific definitions)     -->
+<!--            11) Define document element (Article <article>)    -->
+<!--            12) Define Front Matter <front>                    -->
+<!--            13) Define Body Matter <body>                      -->
+<!--            14) Define Back Matter <back>                      -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital Archive of Journal Articles               -->
+<!--             National Center for Biotechnology Information     -->
+<!--                (NCBI)                                         -->
+<!--             National Library of Medicine (NLM)                -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             The Journal Archiving and Interchange DTD is      -->
+<!--             built from the Archiving and Interchange DTD      -->
+<!--             Suite.                                            -->
+<!--                                                               -->
+<!--             The Archiving and Interchange DTD Suite evolved   -->
+<!--             from NLM/NCBI's pmc-1 DTD.                        -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--             contributions.                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             this DTD should be sent in email to:              -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-07-30)
+     
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  8. COMPLETE MODELS WHEN OVER-RIDING A CONTENT MODEL 
+     (for all Parameter Entities suffixed "-model")
+     ### Customization Alert ###
+     Made all the model over-rides consistent. Some included
+     the outer parentheses, some did not. They all do now.
+     Added parentheses to Parameter Entity and removed them 
+     from the Element Declarations:
+     -  %article-short-model; 
+     -  %article-full-model;
+     -  %front-model;
+     -  %body-model;
+     -  %back-model;
+
+  7. ATTRIBUTE VALUE LISTS
+     a. For the Green (Preservationist Archival) DTD, changed all 
+        explicit attribute value lists to CDATA. In the future, 
+        attribute value lists will be a feature of the Blue
+        (Publishing) and new Authoring DTDs. 
+        - Added Parameter Entities to hold "date-type"
+        - Changed the following attributes: 
+             article-type (in %article-atts;)
+     b. Removed Parameter Entity definition for
+        %article-types: since it was not being used at all! 
+        The attribute takes CDATA values.
+     
+     c. SUBARTICLE ATTRIBUTES - made into a Parameter Entity
+
+      
+  6. PARAMETER ENTITY CLEANUP AND REGULARIZATION
+
+     a. RENAME ELEMENT MIXES NOT TO END IN "-elements" 
+        ### Customization Alert ###
+        Names ending in "-elements" saved for the particular mixes
+        that will be added to #PCDATA for a particular element,
+        therefore:
+        -  %doc-back-matter-elements; ==> %doc-back-matter-mix;
+
+     b. FRONT MATTER MODEL - rewritten to use new class Parameter
+        Entity %front-back.class; and to use %list.class; rather 
+        than just <def-list>. This widens the model of Front Matter
+        by adding <list>.
+     
+  5. REMODULARIZATION - The entire Archiving and Interchange DTD
+     Suite was remodularized. Replaced the previous single Archiving
+     customization module (archivecustomize.ent) with the
+     following single-function customization modules:
+       - archivecustom-modules.ent (DTD-specific module naming)
+       - archivecustom-classes.ent (DTD-specific classes over-rides)
+       - default.classes (Default classes declarations)
+       - archivecustom-mixes.ent (DTD-specific mixes over-rides)
+       - default-mixes (Default mixes declarations)
+       - archivecustom-models.ent (DTD-specific content model 
+         over-rides to the base Suite) 
+            There are two types of such over-rides. One that 
+            replaces a complete content model (which are named with a 
+            suffix "-model") and those that are OR groups of 
+            elements, intended to be mixed with #PCDATA inside 
+            a particular model (which are named with an "-elements" 
+            suffix).
+  
+  4. VERSION 2.0 - Updated public identifier to "v2.0 20040830" on 
+     this module and the module-of-modules module and changed the
+     "dtd-version" fixed attribute to reference latest version 
+     of DTD (version "2.0").
+
+     =============================================================
+     Version 1.1                           (TRG) v1.1 (2003-11-01)
+
+  3. Updated public identifier to "v1.1 20031101"
+
+  2. Changed Parameter Entity %dtd-version; to reference latest
+     version of DTD (version 1.1)
+
+  1. Updated public identifiers for modules.ent and 
+     archivecustomize.ent to reflect modules' modification.
+     Rationale: To call updated modules.
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    DESIGN CONSIDERATIONS                      -->
+<!-- ============================================================= -->
+
+
+<!-- MODULAR DTD LIBRARY
+                        A set of journal archiving and interchange 
+                        DTD modules was written as the basis for 
+                        publishing, interchange, and repository 
+                        DTDs, with the intention that DTDs for 
+                        specific purposes, such as this Journal 
+                        Archiving and Interchange DTD, would be 
+                        developed based on them.
+                         
+                        This archiving DTD has been designed to  
+                        be descriptive, open, and inclusive to 
+                        allow journal articles to be translated into  
+                        this format as conveniently as possible
+                        from a wide variety of proprietary journal
+                        article DTDs (such as Blackwell, Keton, and
+                        Ovid).
+                       
+                        This Archiving DTD has been developed from
+                        the Archiving and Interchange DTD Suite
+                        modules, in the approved manner, making 
+                        changes to the declarations in those modules 
+                        by over-riding Parameter Entity contents 
+                        by redefining the entities in the  
+                            %archivecustomize.ent;
+                        module, which is called from this DTD file. 
+                        No changes to any of the original Archiving 
+                        and Interchange Suite modules are required 
+                        in order to use this DTD.                  -->
+
+
+<!-- ============================================================= -->
+<!--                    VERSION 1.0 DESIGN CONSIDERATIONS          -->
+<!-- ============================================================= -->
+
+
+<!-- THE MANY ROADS NOT TAKEN
+                        In the interest of getting a version of this
+                        DTD into production as quickly as practical,
+                        several structures and functions that might
+                        be appropriately included have been delayed 
+                        until a future version of this DTD. Such 
+                        components include:
+                          - Questions and Answers (These may be 
+                              modeled with the current DTDs by using 
+                              paragraphs and lists)
+                          - Continuing Medical Education material
+                             (which frequently contains questions 
+                             and answers)
+                          - Forms and fill-in-the-blank
+                          - Conflict of Interest statements and
+                              Financial Disclosure (May be modeled
+                              as paragraphs or footnotes)
+                          - Electronic and Digital Rights Management
+                          - Advertising included in the journal:
+                                + Job ads
+                                + Classified advertising
+                                + Display advertising
+                          - Calendars, meeting schedules, and 
+                              announcements.  (These can be handled 
+                              as ordinary articles or sections within
+                              articles)
+                          - About the Journal material such as Author
+                              Guidelines, Policy and Scope statements,
+                              Editorial or advisory boards, detailed
+                              indicia, etc.
+                                                                  -->
+ 
+ 
+<!-- ============================================================= -->
+<!--                    MODULES OF MODULES INVOKED                 -->
+<!-- ============================================================= -->
+
+
+<!--                    MODULE TO NAME DTD-SPECIFIC MODULES        -->
+<!--                    Names all DTD-specific external modules    -->
+<!ENTITY % archivecustom-modules.ent PUBLIC  
+"-//NLM//DTD Journal Archiving DTD-Specific Modules v2.0 20040830//EN"
+"archivecustom-modules.ent"                                          >
+%archivecustom-modules.ent;
+
+
+<!--                    MODULE TO NAME THE MODULES                 -->
+<!--                    Declares all the external modules that are  
+                        part of the modular Journal Archiving and 
+                        Interchange DTD Suite library. Thus it does
+                        NOT declare itself; the DTD-specific 
+                        module-of-modules; any DTD-specific class,
+                        mix, or model over-ride modules; or any new
+                        new content modules specific to this DTD.
+                        Those are declared in the DTD-specific
+                        module of modules.
+                           Since this module declares but does not
+                        invoke modules, this DTD then invokes any
+                        modules it uses by referencing the external 
+                        Parameter Entities defined in the Module of
+                        Modules. To include a set of elements (such 
+                        as all the lists or the MathML elements) this 
+                        module defines the external Parameter Entity
+                        for the module(s) that contains the MathML
+                        declarations and the DTD references that 
+                        entity.                                    -->
+<!ENTITY % modules.ent PUBLIC  
+"-//NLM//DTD Archiving and Interchange DTD Suite Module of Modules v2.0 20040830//EN" 
+"modules.ent"                                                        >
+%modules.ent;
+
+
+<!-- ============================================================= -->
+<!--                    CUSTOMIZATION MODULES INVOKED              -->
+<!--                    Note: These modules must be called after   -->
+<!--                    all Module of Modules but before any other -->
+<!--                    modules. Unlike any other grouping, order  -->
+<!--                    of these modules matters.                  -->
+<!-- ============================================================= -->
+
+
+<!--                    DTD-SPECIFIC CLASS CUSTOMIZATIONS MODULE   -->
+<!--                    Names the module that holds the DTD-specific
+                        class definitions for the Journal Archiving
+                        and Interchange DTD Suite.                
+                        (Defined in %archivecustom-modules.ent;)   -->
+%archivecustom-classes.ent;
+
+
+<!--                    DEFAULT ELEMENT CLASSES MODULE             -->
+<!--                    Names the module that holds the standard
+                        class definitions for the Journal Archiving
+                        and Interchange DTD Suite.                 -->
+%default-classes.ent;
+
+
+<!--                    DTD-SPECIFIC MIX CUSTOMIZATIONS MODULE     -->
+<!--                    Set up the Parameter Entities and element
+                        class definitions that will be used to
+                        over-ride some element mixes in this DTD.
+                        (Defined in %archivecustom-modules.ent;)   -->
+%archivecustom-mixes.ent;
+
+
+<!--                    DEFAULT MIX CUSTOMIZATIONS MODULE          -->
+<!--                    Names the module that holds the standard
+                        mix definitions for the Journal Archiving
+                        and Interchange DTD Suite.                 -->
+%default-mixes.ent;
+
+
+<!--                    DTD-SPECIFIC MODELS/ATTRIBUTES CUSTOMIZATIONS 
+                        MODULE                                     -->
+<!--                    Names the module that holds the over-rides
+                        of content models, attribute lists, elements
+                        lists to be used in content models, and
+                        attribute values. These are DTD-specific.
+                        (Defined in %archivecustom-modules.ent;)   -->
+%archivecustom-models.ent;
+
+
+<!-- ============================================================= -->
+<!--                    COMMON (SHARED) ELEMENTS MODULE INVOKED    -->
+<!-- ============================================================= -->
+
+
+<!--                    COMMON (SHARED) DECLARATIONS               -->
+<!--                    Declarations for elements, attributes, 
+                        entities, and Notations that are shared by
+                        more than one class module. Note: Must be 
+                        called before any of the class modules.    -->
+
+%common.ent;
+
+ 
+<!-- ============================================================= -->
+<!--                    JOURNAL ARTICLE CLASS ELEMENTS (alpha)     -->
+<!-- ============================================================= -->
+
+<!--                    ARTICLE METADATA ELEMENTS                  -->
+%articlemeta.ent;
+
+
+<!--                    BACK MATTER ELEMENTS                       -->
+%backmatter.ent;
+
+
+<!--                    DISPLAY (GRAPHICAL) ELEMENTS               -->
+%display.ent;
+
+
+<!--                    FORMATTING ELEMENT CLASSES                 -->
+<!--                    Elements that change rendition/display.    -->
+%format.ent;
+
+
+<!--                    JOURNAL METADATA ELEMENTS                  -->
+%journalmeta.ent;
+
+
+<!--                    LINK CLASS ELEMENTS                        -->
+%link.ent;
+
+
+<!--                    LIST CLASS ELEMENTS                        -->
+%list.ent;
+
+
+<!--                    MATH ELEMENTS                              -->
+%math.ent;
+
+
+<!--                    PARAGRAPH-LEVEL ELEMENTS                   -->
+%para.ent;
+
+
+<!--                    PHRASE-LEVEL ELEMENTS                      -->
+%phrase.ent;
+
+
+<!--                    BIBLIOGRAPHIC REFERENCE (CITATION) 
+                        CLASS ELEMENTS                             -->
+%references.ent;
+
+
+<!--                    SECTION ELEMENTS                           -->
+%section.ent;
+
+ 
+<!-- ============================================================= -->
+<!--                    THE REST OF THE EXTERNAL MODULES INVOKED   -->
+<!-- ============================================================= -->
+
+
+<!--                    MATHML SETUP MODULE                        -->
+<!--                    Invoke the MathML modules                  -->
+%mathmlsetup.ent;                                            
+
+
+
+<!--                    XHTML TABLE SETUP MODULE                   -->
+<!--                    Set up the necessary Parameter Entity values
+                        and then invoke XHTML (HTML 4.0) table 
+                        module                                     -->
+%XHTMLtablesetup.ent;
+
+
+<!--                    SPECIAL CHARACTERS DECLARATIONS            -->
+<!--                    Standard XML special character entities 
+                        used in this DTD                           -->
+%xmlspecchars.ent;   
+
+
+<!--                    CUSTOM SPECIAL CHARACTERS DECLARATIONS     -->
+<!--                    Custom special character entities created 
+                        specifically for use in this DTD Suite     -->
+%chars.ent;
+
+
+<!--                    NOTATION DECLARATIONS MODULE               -->
+%notat.ent;
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR ATTRIBUTE LISTS     -->
+<!-- ============================================================= -->
+
+
+<!--                    DTD VERSION                                -->
+<!--                    What version of this DTD was used to make
+                        the document instance under consideration.
+                        Note that this is a fixed value that should
+                        change every time the DTD changes versions or
+                        revisions.                                 -->
+<!ENTITY % dtd-version   
+            "dtd-version        
+                        CDATA                         #FIXED '2.0'"  >
+
+
+
+<!--                    ARTICLE ATTRIBUTES                         -->
+<!--                    Attributes for the top-level element
+                        <article>                                  -->
+<!ENTITY % article-atts
+            "article-type   
+                        CDATA                              #IMPLIED
+             %dtd-version;                              
+             xml:lang   NMTOKEN                            'EN'     
+             %XLINK.xmlns.attrib;
+             %MATHML.xmlns.attrib;"                                  >
+
+
+<!--                    SUB-ARTICLE ATTRIBUTES                     -->
+<!--                    Attributes for the <sub-article> element   -->
+<!ENTITY % sub-article-atts
+            "article-type   
+                        CDATA                              #IMPLIED
+             xml:lang   NMTOKEN                            'EN'"     >
+
+
+<!--                    RESPONSE ATTRIBUTES                        -->
+<!--                    Attributes for the <response> element      -->
+<!ENTITY % response-atts
+            "response-type   
+                        CDATA                              #IMPLIED
+             xml:lang   NMTOKEN                            'EN'"     >
+                                                               
+
+<!-- ============================================================= -->
+<!--                    JOURNAL ARTICLE ELEMENTS                   -->
+<!-- ============================================================= -->
+
+
+<!--                    ARTICLE MODEL (LIMITED)                    -->
+<!ENTITY % article-short-model    
+                        "(front, body?, back?)"                      >
+
+
+<!--                    ARTICLE MODEL WITH SUBARTICLE OR RESPONSE  -->
+<!ENTITY % article-full-model
+                        "(front, body?, back?, 
+                          (sub-article* | response*) )"              >
+
+
+<!--                    ARTICLE                                    -->
+<!--                    The complete content of a journal article.
+                        An article is potentially divided into 
+                        four (more typically three) parts: 1) the
+                        Front Matter (article metadata or header),
+                        the body of the article (main content), any
+                        ancillary information such as a glossary or
+                        appendix, and (the least common) a
+                        response, that is a commentary on the
+                        article itself.                            -->
+<!ELEMENT  article      %article-full-model;                         >
+
+<!--         article-type   
+                        What kind of article is this?  
+                        Note: When the article is a commentary on 
+                        another article, for example a correction or
+                        addendum, this attribute is metadata for the 
+                        commentary itself, it does NOT define the 
+                        kind of article that is being corrected or
+                        amended.
+                          This is an optional CDATA attribute, but
+                        an article type should be identified if
+                        possible. Potential values include:
+                          abstract   The article itself is an 
+                                     abstract (of a paper or
+                                     presentation), usually that 
+                                     has been presented or published 
+                                     separately.
+                          addendum   A published item that adds
+                                     additional information or
+                                     clarification to another item
+                                     Similar value "correction"
+                                     corrects an error in previously
+                                     published material
+                          announcement
+                                     Material announced in the 
+                                     publication (may or may not be
+                                     directly related to the pub)
+                          article-commentary
+                                     An item whose subject or focus
+                                     is another article or articles; 
+                                     this article comments on the 
+                                     other article(s) (For example,
+                                     for a controversial article, the
+                                     editors of the publication 
+                                     might invite an author of the
+                                     opposing opinion to comment on
+                                     the first article, and publish
+                                     the two together.)
+                          book-review
+                                     Review or analysis of one or more
+                                     printed or online books (Note
+                                     that product reviews are a
+                                     separate type.)
+                          books-received
+                                     Notification of items such as
+                                     books that have been received
+                                     by the publication for review
+                                     or other consideration
+                          brief-report
+                                     A short and/or rapid announcement 
+                                     of research results
+                          calendar   A list of events
+                          case-report
+                                     Case study, case report, or other
+                                     description of a case
+                          correction A modification, or
+                                     correction of previously
+                                     published material (sometimes
+                                     called "errata") Similar value
+                                     "addendum" merely adds to 
+                                     previously published material
+                          discussion Invited discussion related to a 
+                                     specific article or issue
+                          editorial  Opinion piece, policy statement,
+                                     or general commentary, typically
+                                     written by staff of the
+                                     publication. Note: similar value
+                                     "article-commentary" is reserved 
+                                     for a commentary on a specific
+                                     article or articles.
+                          in-brief   Summary of items in the current 
+                                     issue
+                          introduction
+                                     An introduction to the
+                                     publication, a series of articles
+                                     within the publication, etc.,
+                                     typically for a special section
+                                     or issue
+                          letter     Letter to the publication,
+                                     typically commenting upon a
+                                     published item
+                          meeting-report
+                                      Report of a conference, 
+                                      symposium, or meeting
+                          news        News item
+                          obituary    Announcement of a death or
+                                      appreciation of a colleague
+                                      who has recently died
+                          oration     Reprint of a speech or oral 
+                                      presentation
+                          other       Not any of the article types
+                                      explicitly named in this list
+                          product-review
+                                      Description, analysis, or review
+                                      of a product or service, for
+                                      example a software package (note
+                                      that book review is a separate
+                                      type)
+                          reply       Reply to a letter or commentary,
+                                      typically by the original
+                                      author commenting upon the
+                                      comments
+                          research-article
+                                      Research article
+                          retraction  Retraction of previously
+                                      published material
+                          review-article
+                                      Review or state-of-the-art
+                                      summary article                   
+             dtd-version 
+                        Which version of the DTD does this article
+                        use? The value is a #FIXED attribute, so
+                        the entire attribute definition is defined
+                        in a Parameter Entity, so the next version
+                        of the DTD can use a different #FIXED value.
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German).  These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).
+                        Processing and Conversion Note: All other
+                        xml:lang attributes within the article
+                        inherit the value set on <article> unless
+                        explicitly set themselves with their own
+                        "xml:lang" attribute.                      
+             xmlns      Set up the pseudo-attributes for the
+                        namespaces for any namespaces used in this
+                        DTD.  XLink and MathML are set up in the
+                        MathML modules. These xmlns are called
+                        here because certain older tools will not
+                        work properly with namespaces unless they
+                        are declared on the top level element.
+                        The PEs are defined in the MathML modules. -->
+<!ATTLIST  article
+             %article-atts;                                          >
+
+
+<!-- ============================================================= -->
+<!--                    FRONT MATTER ELEMENTS                      -->
+<!-- ============================================================= -->
+
+
+<!--                    FRONT MATTER MODEL                         -->
+<!ENTITY % front-model  "(journal-meta?, article-meta, 
+                          (%list.class; | %front-back.class;)* )"    > 
+ 
+
+<!--                    FRONT MATTER                               -->
+<!--                    The metadata concerning an article, such as
+                        the name and issue of the journal in which it
+                        appears and the name and author(s) of the 
+                        article.
+                        In some journal DTDs this is called the
+                        header information, and it includes metadata
+                        concerning the journal <journal-meta> and 
+                        metadata concerning the issue of the journal 
+                        and the individual article <article-meta>. -->
+<!ELEMENT  front        %front-model;                                > 
+
+
+<!-- ============================================================= -->
+<!--                    BODY ELEMENTS                              -->
+<!-- ============================================================= -->
+
+
+<!--                    BODY MODEL                                 -->
+<!--                    Content model for the Body (main textual
+                        content) of a journal article.             -->
+<!ENTITY % body-model   "((%para-level;)*, (%sec-level;)* )"         > 
+
+
+<!--                    BODY OF THE ARTICLE                        -->
+<!--                    The main textual portion of the article that
+                        conveys the content.                       -->
+<!ELEMENT  body         %body-model;                                 >
+
+
+<!-- ============================================================= -->
+<!--                    BACK MATTER ELEMENTS                       -->
+<!-- ============================================================= -->
+
+
+<!--                    BACK MATTER MODEL                          -->
+<!--                    Content model for the Back Matter (ancillary
+                        material such as appendices) of a journal
+                        article.                                   -->
+<!ENTITY % back-model   "(title*, (%doc-back-matter-mix;)* )"        >
+
+
+<!--                    BACK MATTER                                -->
+<!--                    Ancillary or supporting material not included
+                        as part of the main textual content of a 
+                        journal article, for example appendices and
+                        acknowledgments.                           -->
+<!ELEMENT  back         %back-model;                                 >
+
+
+<!-- ============================================================= -->
+<!--                    SUBARTICLE                                 -->
+<!-- ============================================================= -->
+
+
+<!--                    SUBARTICLE                                 -->
+<!--                    An article that is completely contained 
+                        inside another article. Both the article and
+                        the subarticle have their own metadata. The
+                        article has at very least journal and issue
+                        information and start and end pages; it may
+                        have a title, author, or other metadata.
+                        The subarticle has its own, independent 
+                        metadata, such as authors, that it 
+                        may not share with the article that
+                        encloses it.                                  
+                        Conversion Note: "Superarticles" that contain
+                        other articles rarely contain much content of
+                        their own, perhaps just a title and 
+                        introductory paragraph.                   
+                        Authoring Note: This construction is NOT to 
+                        be used for an article and its response, or
+                        for a series of responses, even if the
+                        original article to which the responses are
+                        replying is elsewhere.                     -->
+<!ELEMENT  sub-article  %article-full-model;                         >
+<!--         article-type   
+                        What kind of article is this?  
+                        Note: When the article is a commentary on 
+                        another article, for example a correction or
+                        addendum, this attribute is metadata for the 
+                        commentary itself, it does NOT define the 
+                        kind of article that is being corrected or
+                        amended.
+                        Authoring Note: All articles
+                        should have types assigned if possible.
+                          abstract   The article itself is an 
+                                     abstract (of a paper or
+                                     presentation), usually that 
+                                     has been presented or published 
+                                     separately.
+                          addendum   A published item that adds
+                                     additional information or
+                                     clarification to another item
+                                     Similar value "correction"
+                                     corrects an error in previously
+                                     published material
+                          announcement
+                                     Material announced in the 
+                                     publication (may or may not be
+                                     directly related to the pub)
+                          article-commentary
+                                     An item whose subject or focus
+                                     is another article or articles; 
+                                     this article comments on the 
+                                     other article(s) (For example,
+                                     for a controversial article, the
+                                     editors of the publication 
+                                     might invite an author of the
+                                     opposing opinion to comment on
+                                     the first article, and publish
+                                     the two together.)
+                          book-review
+                                     Review or analysis of one or more
+                                     printed or online books (Note
+                                     that product reviews are a
+                                     separate type.)
+                          books-received
+                                     Notification of items such as
+                                     books that have been received
+                                     by the publication for review
+                                     or other consideration
+                          brief-report
+                                     A short and/or rapid announcement 
+                                     of research results
+                          calendar   A list of events
+                          case-report
+                                     Case study, case report, or other
+                                     description of a case
+                          correction A modification, or
+                                     correction of previously
+                                     published material (sometimes
+                                     called "errata") Similar value
+                                     "addendum" merely adds to 
+                                     previously published material
+                          discussion Invited discussion related to a 
+                                     specific article or issue
+                          editorial  Opinion piece, policy statement,
+                                     or general commentary, typically
+                                     written by staff of the
+                                     publication. Note: similar value
+                                     "article-commentary" is reserved 
+                                     for a commentary on a specific
+                                     article or articles.
+                          in-brief   Summary of items in the current 
+                                     issue
+                          introduction
+                                     An introduction to the
+                                     publication, a series of articles
+                                     within the publication, etc.,
+                                     typically for a special section
+                                     or issue
+                          letter     Letter to the publication,
+                                     typically commenting upon a
+                                     published item
+                          meeting-report
+                                      Report of a conference, 
+                                      symposium, or meeting
+                          news        News item
+                          obituary    Announcement of a death or
+                                      appreciation of a colleague
+                                      who has recently died
+                          oration     Reprint of a speech or oral 
+                                      presentation
+                          other       Not any of the article types
+                                      explicitly named in this list
+                          product-review
+                                      Description, analysis, or review
+                                      of a product or service, for
+                                      example a software package (note
+                                      that book review is a separate
+                                      type)
+                          reply       Reply to a letter or commentary,
+                                      typically by the original
+                                      author commenting upon the
+                                      comments
+                          research-article
+                                      Research article
+                          retraction  Retraction of previously
+                                      published material
+                          review-article
+                                      Review or state-of-the-art
+                                      summary article                   
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German).  These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).
+                        Processing and Conversion Note: All other
+                        xml:lang attributes within the article
+                        inherit the value set on <article> unless
+                        explicitly set themselves with their own
+                        "xml:lang" attribute.                      -->
+<!ATTLIST  sub-article 
+             %sub-article-atts;                                      >
+             
+             
+<!-- ============================================================= -->
+<!--                    RESPONSE ELEMENTS                          -->
+<!-- ============================================================= -->
+
+
+<!--                    RESPONSE                                   -->
+<!--                    Reply, response, or commentary concerning the
+                        journal article. In the typical case, the
+                        response is included in the same XML package 
+                        as the original article, and thus attached 
+                        at the end of the article proper.
+                        Authoring Note: Frequently a reply or
+                        response is an article in its own right, not
+                        included as part of the original article.
+                        Such an article could use the 
+                        <related-article> element in the article 
+                        metadata to record the metadata for the 
+                        original article.     
+                        Conversion Note: This construction can also
+                        be used for the pathological case, rarely
+                        seen, in which several responses to an 
+                        article are lumped together into a single
+                        container which is not the original article,
+                        merely a collection of responses. In one
+                        example we examined, the first two responses
+                        were to an original article which was else-
+                        where and the third response was a response
+                        to the first two responses.                -->
+<!ELEMENT  response     %article-short-model;                        >
+
+<!--         response-type
+                        What kind of response is this?
+                        Note: This does NOT define the kind of
+                        article on which the response is commenting,
+                        it is metadata for the response itself.
+                        Authoring Note: Use of this attribute is not
+                        to be encouraged.  It was added to allow for
+                        observed cases of a series of articles
+                        being chained together as an introduction,
+                        an article, and several responses.          
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German).  These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).
+                        Processing and Conversion Note: All other
+                        xml:lang attributes within the article
+                        inherit the value set on <article> unless
+                        explicitly set themselves with their own
+                        "xml:lang" attribute.                      -->
+<!ATTLIST  response
+             %response-atts;                                         >
+
+
+<!-- ================== End Journal Archiving and Interchange DTD  -->
diff --git a/code/lib/Bio/Entrez/DTDs/archivecustom-classes.ent b/code/lib/Bio/Entrez/DTDs/archivecustom-classes.ent
new file mode 100644
index 0000000..3d665ad
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/archivecustom-classes.ent
@@ -0,0 +1,157 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Journal Archiving DTD Customize Classes Module    -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Journal Archiving DTD Customize Classes Module v2.0 20040830//EN"
+Delivered as file "archivecustom-classes.ent"                      -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Journal Archiving and Interchange DTD of the      -->
+<!--             Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    To declare the Parameter Entities (PEs) used to   -->
+<!--             over-ride the named element classes               -->
+<!--                                                               -->
+<!--             Note: Since PEs must be declared before they      -->
+<!--             are used, this module must be called before the   -->
+<!--             content modules that declare elements, and before -->
+<!--             the default classes module.                       -->
+<!--                                                               -->
+<!-- CONTAINS:   1) PEs for changing the contents of the default   -->
+<!--                element classes                                -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital Archive of Journal Articles               -->
+<!--             National Center for Biotechnology Information     -->
+<!--                (NCBI)                                         -->
+<!--             National Library of Medicine (NLM)                -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             August 2004                                       -->
+<!--                                                               -->
+<!-- CREATED BY: Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             B. Tommie Usdin (Mulberry Technologies, Inc.)     -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             this DTD should be sent in email to:              -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+          
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  6. STRING-NAME
+     Added <string-name> everywhere <name> was allowed before
+
+  5. CONTRIBUTOR / CONTRIBUTOR GROUP
+     a. <etal> added to %contrib-info.class;, strict sequence after
+        the contributor name no longer enforced.
+        
+     b. <uri> and <fn> added to %contrib-info.class;
+        
+     c. Added <x> (generated punctuation) to:
+         - <contributor> (through %x.class;)
+         - <contributor-group>  (through %x.class;)
+     
+  4. <x> GENERATED TEXT AND PUNCTUATION ELEMENT ADDED TO
+     - <kwd-group> by way of %keyword-group-model;
+     - <def-list> by way of %def-list-model;
+     - <def-item> by way of %def-item-model;
+     - <article-meta> by way of %article-meta-model;
+     - <contributor> and <contributor-group> (via %contrib-model;
+       and %contrib-group-model;)
+     - <person-group> by way of %person-group-model;
+     - <x> was added to everywhere the %references.class; was used, 
+       though NOT by adding it to the class itself.
+      
+  3. DOI/ID - in view of the larger role that some publishers
+     are giving to DOIs, a new class Parameter Entity %id.class;
+     was established that holds the new element Object Identifier
+     <object-id>, which can be used to capture any publisher's or 
+     archive's ID. This was modeled as an element rather than as 
+     an attribute to allow for multiple IDs. Used in:
+      - %references.class (therefore inside <citation>, 
+           <related-article>, and <product>)
+        
+  2. RENAME EXISTING CLASSES
+     ### Customization Alert ###
+     Some classes did not have the ".class" suffix. Changed the names 
+     to add the class suffix:
+      - %contrib-info.class;
+                
+  1. Created this module as version "v2.0 20040830"                -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR CLASS OVER-RIDES    -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    METADATA CLASSES (articlemeta.ent)         -->
+<!-- ============================================================= -->
+
+
+<!--                    CONTRIBUTOR INFORMATION                    -->
+<!--                    Metadata about a contributor               
+                        Added <fn>                                 -->
+<!ENTITY % contrib-info.class
+                        "address | aff | author-comment | bio | 
+                         email |  etal | ext-link | fn | 
+                         on-behalf-of | role | uri | xref"           >
+                         
+
+<!-- ============================================================= -->
+<!--                    PRESENTATION INFO CLASSES                  -->
+<!-- ============================================================= -->
+                        
+                        
+<!--                    X-GENERATED PUNCTUATION CLASS              -->
+<!--                    Class containing a single element that will
+                        hold generated punctuation or other 
+                        generatable text, for example, the commas or
+                        semicolons between keywords.
+                        This textual material element has been 
+                        created:
+                         - to match existing input DTDs
+                         - to identify such material, so that it
+                             can be processed (such as removed)
+                         - to try to remove look-and-feel material
+                             from semantic material such as keywords.
+                                                                   -->
+<!ENTITY % x.class      "x"                                          >
+              
+              
+<!-- ================== End Archiving Classes Customization ====== -->
diff --git a/code/lib/Bio/Entrez/DTDs/archivecustom-mixes.ent b/code/lib/Bio/Entrez/DTDs/archivecustom-mixes.ent
new file mode 100644
index 0000000..a5e1b05
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/archivecustom-mixes.ent
@@ -0,0 +1,306 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Journal Archiving DTD Customize Mixes Module      -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Journal Archiving DTD Customize Mixes Module v2.0 20040830//EN"
+Delivered as file "archivecustom-mixes.ent"                        -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Journal Archiving and Interchange DTD of the      -->
+<!--             Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Declares over-ride values for the default element -->
+<!--             mixes in the Suite. These over-rides are specific -->
+<!--             to the Journal Archiving and Interchange DTD.     -->
+<!--                                                               -->
+<!--             Note: Since PEs must be declared before they      -->
+<!--             are used, this module must be called before the   -->
+<!--             default mixes modules (%default-mixes;)           -->
+<!--                                                               -->
+<!-- CONTAINS:   1) PEs that define the element mixes to be        -->
+<!--                over-ridden                                    -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital Archive of Journal Articles               -->
+<!--             National Center for Biotechnology Information     -->
+<!--                (NCBI)                                         -->
+<!--             National Library of Medicine (NLM)                -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             August 2004                                       -->
+<!--                                                               -->
+<!-- CREATED BY: Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             B. Tommie Usdin (Mulberry Technologies, Inc.)     -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             this DTD should be sent in email to:              -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-07-30)
+     
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  3. LOOSENING ALL PHRASE-LEVEL ELEMENT USAGE - As part of the
+     version 2.0 split into 3 DTDs, removed the distinctions
+     between where phrase-level elements may be used. Now, if
+     you can use one phrase-level, you can use them all. This
+     allows any in any order. Phrase-level elements include the
+     address elements (which is only a little odd for <addr-line>).
+     
+     The New Parameter Entity is %all-phrase; used inside:
+       - %inside-cell;
+     as well as inside the following inline mixes:
+       - %emphasized-text;
+       - %just-rendition;
+       - %rendition-plus;
+       - %simple-phrase;
+       - %simple-text;
+            
+  2. REGULARIZING CLASS/MIX NAMES AND USAGE 
+
+     a. MODIFY INLINE PARAMETER MIXES
+        ### Customization Alert ###
+        Changed the inline-mix Parameter Entities to use the 
+        OR-bar-first mechanism. This requires changing not
+        only the Parameter Entity, but all content models that
+        use the entity.
+        - %emphasized-text; (used in most of the format elements)
+        - %simple-phrase; 
+        - %simple-text; 
+        The following PEs were already in OR-bar-first form and were
+        not changed:
+        - %just-rendition;
+        - %rendition-plus;
+  
+     b. RENAME EXISTING MIXES
+        ### Customization Alert ###
+        Rename all general-purpose mixes whose named ended in the 
+        suffix "-elements", since that suffix is reserved for mixes 
+        that are mixed with #PCDATA for a particular element
+        -  %doc-back-matter-elements; ==> %doc-back-matter-mix;
+        -  %sec-back-matter-elements; ==> %sec-back-matter-mix;
+        
+     c. RENAME EXISTING CLASSES
+        ### Customization Alert ###
+        Some classes did not have the ".class" suffix in their
+        names. Changed the names to add the class suffix.
+        - %block-math.class; used in 
+             - %p-elements;
+             - %inside-cell;
+        - %inline-math.class; used in -%all-phrase;
+
+
+     d.  MODIFIED %doc-back-matter-mix; (formerly named
+         %doc-back-matter-elements;
+         ### Customization Alert ###
+         Corrected the historical error that had this Parameter 
+         Entity calling a mix (-%sec-level;) and not a class 
+         (-%sec.class);. Since there was nothing in -%sec-level; 
+         but <sec>, this has no effect on blue or green but may 
+         change existing customizations.
+                
+  1. Created this module as version "v2.0 20040830"                -->
+                          
+                        
+<!-- ============================================================= -->
+<!--                    ELEMENT MIXES FOR USE IN CONTENT MODELS    -->
+<!--                    (MIXES ARE COMPOSED USING CLASSES)         -->
+<!-- ============================================================= -->
+                         
+
+<!-- ============================================================= -->
+<!--                    EXCEPTION: A MIX USED IN OTHER MIXES       -->
+<!-- ============================================================= -->
+
+
+<!--                    ALL PHRASE-LEVEL ELEMENTS                  -->
+<!--                    This Parameter Entity contains all of the
+                        phrase-level elements in the entire
+                        Archival Tag Set EXCEPT THE <x> AND
+                        <break> elements.
+                        This change is as a result of the version
+                        2.0 of the DTDs splitting into three.
+                        The restrictions in the phrase-level content 
+                        models were in place to help regularize the
+                        archive. They have no place in a more
+                        descriptive, open archival DTD. The cleanest
+                        way to accomplish this is to relax the
+                        fairly clean class/mix structure that the
+                        DTD uses and do one layer of "onion" entities.
+                          Thus the individual entities such as
+                        %emphasized-text; will continue to exist,
+                        but they will contain all the phrase-level
+                        elements, through this Parameter Entity.
+                        Developer's Note: At the moment, the <break>
+                        and <x> elements are not included considered
+                        to be "ordinary" phrase-level elements and
+                        are not in %all-phrase;.
+                        ### Usage Alert ###
+                        Design Notes: 
+                        1) Since this is used in other mixes,
+                        unlike all other mixes, all-phrase must be 
+                        declared first in this module. 
+                        2) Since it acts  like a class (is used in
+                        other mixes) it does not start with an 
+                        OR bar, as all other inline mixes do       -->
+<!ENTITY % all-phrase   "%address-link.class; | %article-link.class; |
+                         %appearance.class; | %emphasis.class;  |
+                         %inline-display.class; | 
+                         %inline-math.class; | %math.class; |  
+                         %phrase.class; | %simple-link.class; |
+                         %subsup.class;"                             >
+
+                         
+<!-- ============================================================= -->
+<!--                    TABLE ELEMENT MIXES                        -->
+<!-- ============================================================= -->
+                          
+
+<!--                    INSIDE TABLE CELL ELEMENTS                 -->
+<!--                    Mixed with #PCDATA inside a table cell, such
+                        as a <td> or <th> element in the XHTML table
+                        model, the <entry> element in the OASIS CALS
+                        table model, etc.  This PE will be used as the
+                        value of %Flow.mix;, %paracon;, etc.
+                        ### Usage Alert ###
+                        Design Note: Inside cell is an exception, an
+                        inline mix that does not start with an OR
+                        bar. This is because is used within the
+                        PE -%Flow.mix;, which is an inline mix
+                        defined in the course of the XHTML Table DTD,
+                        a DTD not under control of this DTD Suite. -->
+<!ENTITY % inside-cell  "%all-phrase;  | %block-math.class; |
+                         %break.class; | %citation.class; |
+                         %list.class;  | %simple-display.class;"     >
+                         
+
+<!-- ============================================================= -->
+<!--                    BACK MATTER ELEMENT MIXES(%backmatter.ent;)-->
+<!-- ============================================================= -->
+
+
+<!--                    DOCUMENT BACK MATTER ELEMENTS              -->
+<!--                    Back Matter Elements used by a full document
+                        such as a journal article. This is an element
+                        grouping rather than a class. These 
+                        elements may also appear in the content models 
+                        of structural elements, such as back matter.
+                                                                   -->
+<!ENTITY % doc-back-matter-mix
+                        "%back.class; | %front-back.class; | 
+                         %sec.class;"                                >
+                         
+
+<!-- ============================================================= -->
+<!--                    INLINE ELEMENT MIXES                       -->
+<!-- ============================================================= -->
+
+
+<!--                    EMPHASIS MIX ELEMENTS                      -->
+<!--                    Elements that may be used inside most of the
+                        emphasis class elements                    -->
+<!ENTITY % emphasized-text  
+                        "| %all-phrase;"                             >
+                         
+
+<!--                    JUST RENDITION                             -->
+<!--                    Only the simplest of the typographic 
+                        emphasis elements, as well as subscript and
+                        superscript.  Usually used in a model that
+                        allows #PCDATA and this restricted mixture.
+                        This mix may be stripped down to only
+                        subscript and superscript by some, more
+                        restrictive DTDs.                         
+                        DTD Maintenance Note: This Parameter Entity
+                        and the related PE "rendition-plus" have
+                        been put in place to restrict the amount of
+                        variability that a person modifying the DTD
+                        through PE redefinition can achieve. Some
+                        elements have been set #PCDATA plus one PE
+                        and some have been set to #PCDATA plus the
+                        other in an effort to allow designers to
+                        modify entire groups of elements, but not
+                        to change similar models individually .    -->
+<!ENTITY % just-rendition
+                        "| %all-phrase;"                             >
+                         
+
+<!--                    RENDITION MARKUP PLUS                      -->
+<!--                    Only the simplest of the typographic 
+                        emphasis elements, as well as subscript and
+                        superscript.  Usually used in a model that
+                        allows #PCDATA and this restricted mixture.
+                        This mix may be enhanced slightly in some
+                        more permissive DTDs, and should always
+                        contain at least typographic emphasis, 
+                        subscript, and superscript.  
+                        DTD Maintenance Note: This Parameter Entity
+                        and the related PE "Just-rendition" have
+                        been put in place to restrict the amount of
+                        variability that a person modifying the DTD
+                        through PE redefinition can achieve. Some
+                        elements have been set #PCDATA plus one PE
+                        and some have been set to #PCDATA plus the
+                        other in an effort to allow designers to
+                        modify entire groups of elements, but not
+                        to individually change similar models. 
+                        modify entire groups of elements, but not
+                        to change similar models individually .    -->
+<!ENTITY % rendition-plus                             
+                        "| %all-phrase;"                             >
+
+
+<!--                    SIMPLE PHRASE-LEVEL TEXTUAL ELEMENTS       -->
+<!--                    Elements that may be used almost anywhere
+                        text is used, for example, inside a title.
+                        Simple text plus inline display and math 
+                        elements.                                  -->
+<!ENTITY % simple-phrase
+                        "| %all-phrase;"                             >
+
+
+<!--                    SIMPLE TEXTUAL CONTENT                     -->
+<!--                    Elements that may be used inside elements
+                        that are really expected to be #PCDATA and
+                        not to contain any of these things.
+                        Note that in the original, this contained
+                        no math and no links, thus is was even
+                        simpler than %simple-phrase; Now (v2 2004)
+                        the two are the same.                      -->
+<!ENTITY % simple-text  "| %all-phrase;"                             >
+
+
+<!-- ================== End Archiving DTD Mixes Customization ==== -->
diff --git a/code/lib/Bio/Entrez/DTDs/archivecustom-models.ent b/code/lib/Bio/Entrez/DTDs/archivecustom-models.ent
new file mode 100644
index 0000000..eb494c0
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/archivecustom-models.ent
@@ -0,0 +1,756 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Journal Archiving DTD Customize Content and       -->
+<!--             Attributes Module                                 -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Journal Archiving DTD Customize Content and Attributes Module v2.0 20040830//EN"
+Delivered as file "archivecustom-models.ent"                       -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Journal Archiving and Interchange DTD of the      -->
+<!--             Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    To declare the Parameter Entities (PEs) used to   -->
+<!--             over-ride content models or parts of content      -->
+<!--             models when making a new DTD from the modules of  -->
+<!--             the Archiving and Interchange DTD Suite.          -->
+<!--                                                               -->
+<!--             Or-groups within models should use mixes or       -->
+<!--             classes rather than name elements directly.       -->
+<!--                                                               -->
+<!--             Note: Since PEs must be declared before they      -->
+<!--             are used, this module must be called before the   -->
+<!--             content modules that declare elements.            -->
+<!--                                                               -->
+<!-- CONTAINS:   1) PEs that define OR lists of elements to be     -->
+<!--                mixed with #PCDATA in element content models   -->
+<!--                (all PEs are named "yyy-elements", where "yyy" -->
+<!--                is the name of the primary element whose       -->
+<!--                content model will use the declared PE.)       -->
+<!--             2) PEs that define complete content models.       -->
+<!--                (names ending in the suffix "-model")          -->
+<!--             3) PEs that define attribute list or attribute    -->
+<!--                value over-rides                               -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital Archive of Journal Articles               -->
+<!--             National Center for Biotechnology Information     -->
+<!--                (NCBI)                                         -->
+<!--             National Library of Medicine (NLM)                -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             August 2004                                       -->
+<!--                                                               -->
+<!-- CREATED BY: Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             B. Tommie Usdin (Mulberry Technologies, Inc.)     -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             this DTD should be sent in email to:              -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+ 14. OTHER-TABLE-WRAP-ATTS - Moved this Parameter Entity to the
+     display module, so all variant DTDs could use it.
+
+ 13. LOOSENING ALL PHRASE-LEVEL ELEMENT USAGE - As part of the
+     version 2.0 split into 3 DTDs, removed the distinctions
+     between where phrase-level elements may be used. Now, if
+     you can use one phrase-level, you can use them all. This
+     allows any in any order. Phrase-level elements include the
+     address elements (which is only a little odd for <addr-line>).
+     
+     New Parameter Entity is %all-phrase; used inside:
+      - the Parameter Entities named "nnn-elements" that can
+        be mixed with #PCDATA in the content model of the nnn
+        element (for which the defaults are defined in the module 
+        that holds the element, just before the element, and the
+        over-rides are defined below.)
+      - And the following content model over-rides:
+        . %chem-struct-model; (via %chem-struct-elements;)
+        . %copyright-statement-model; (renamed -elements)
+        . %history-model; (via %history-elements)
+
+ 12. ACCESS CLASS
+        ### Customization Alert ###
+     a. Took <ext-link> out of -%access.class;. It did not belong.
+        It belongs in -%address-link.class;
+     b. Added -%address-link.class; to anywhere regular
+        %access.class; was used.
+
+ 11. ATTRIBUTE VALUE LISTS - The Green (Archival) DTD needed to
+     change all explicit attribute value lists in the DTD to CDATA. 
+     In the future, explicit list types will be a feature of the
+     Blue (Publishing) and new Authoring DTDs. Therefore, added
+     Parameter Entities to hold the following attributes:
+       - %date-atts; (date-type changed from list to CDATA)
+       - %article-id-atts; (pub-id-type changed to CDATA)
+       - %pub-id-atts; (pub-id-type changed to CDATA)
+       - %xref-atts; (ref-type changed to CDATA)
+     
+ 10. <x> GENERATED TEXT AND PUNCTUATION ELEMENT:
+     a. Added to the following models:
+        - %def-list-model;
+        - %def-item-model;
+        - %article-meta-model;
+        - %person-group-model;
+     b. Added to %contrib-model; and %contrib-group-model; 
+        (via %x.class;)  
+        (Therefore added to <contributor> and
+         <contributor-group>)
+     c. Also added to the element groups for:
+        - %aff-elements;
+        - %citation-elements;
+        - %collab-elements;
+        - %corresp-elements;
+        - %product-elements;
+        - %publisher-loc-elements;
+        - %related-article-elements;
+
+  9. OVER-RIDE OF ARTICLE-META-MODEL - Added new elements
+     a. <custom-meta-wrap> containing <custom-meta>
+     b. <email>
+     c. <issue-id>
+     d. <issue-title> (for special title or theme)
+     e. <page-range>
+     f. <volume-id>
+     g. <license> (for now, just a bucket into which to
+        dump license-related material until the AIT Workgroup
+        does more analysis)
+
+  8. DATES
+     a. Made a new Parameter Entity %string-date-elements; to
+        hold all the elements that may be mixed with #PCDATA
+        in a <string-date>
+     b. Used %date-parts.class; and %x.class; in 
+           new %string-date-elements;
+
+  7. PRODUCT AND RELATED ARTICLE
+     Added reference elements to both
+
+  6. COMPLETE MODELS WHEN OVER-RIDING A MODEL 
+     (for all Parameter Entities suffixed "-model")
+     ### Customization Alert ###
+     Made all the model over-rides consistent. Some included
+     the outer parentheses, some did not. They all do now,
+     as this is the most flexible system, allowing for
+     #PCDATA, mixed, or element content. (This is in direct
+     contrast to the "-element" suffixed models, which are
+     designed to prohibit element content and permit only
+     #PCDATA or mixed content.) Added parentheses to Parameter
+     Entity and removed them from the Element Declaration.
+     -  %article-meta-model; 
+     -  %contrib-group-model;
+     -  %contrib-model;
+     -  %def-item-model;
+     -  %def-list-model; 
+     -  %kwd-group-model; 
+
+  5. CITATION MODEL / COPYRIGHT STATEMENT MODEL
+     ### Customization Alert ###
+     -  Replaced the Parameter Entity %citation-model; with the
+        OR list Parameter Entity %citation-elements; and used that
+        within the content model of <citation>
+     -  Similar Change for copyright-statement-elements
+
+  4. DEF-ITEM LOOSENING
+     a. Made <term> optional and repeatable so that incomplete
+        <def-item>s can be captured. 
+     b. Added <x> to:
+        - <def-list> by way of -%def-list-model;
+        - <def-item> by way of -%def-item-model;
+ 
+  3. RENAMED CLASSES
+     ### Customization Alert ###
+     Not all classes ended in the ".class" suffix. Changed the 
+     following to add the class suffix:
+     - %display-back-matter.class; used in -%preformat-elements;
+     - %address-elements; (renamed -%address.class;!) used in:
+          - %aff-elements; 
+          - %corresp-elements; 
+          - %publisher-loc-elements;
+     - %contrib-info.class; used in -%contrib-group-model;
+     - %block-math.class; used in 
+          - %named-content-elements; 
+          - %term-elements;
+     - %rest-of-para.class;
+          Inside the content model for Paragraph <p>, was renamed 
+          %p-elements;. The content model for Named Content 
+          and the model for %p-elements; still use
+          %rest-of-para.class;.
+
+  2. INLINE PARAMETER MIXES
+     ### Customization Alert ###
+     INLINE FORMULA/DISPLAY FORMULA/PREFORMAT/RELATED ARTICLE - 
+     Changed the "-element" Parameter Entities to use the 
+     OR-bar-first mechanism:
+        - Inline Formula - In the PE %inline-formula-elements;
+          Changed the PE "%all-phrase;" to "| %all-phrase;"  (Since
+          the OR bar following the #PCDATA in the content model 
+          had been changed to match.) 
+        - Display Formula (%disp-formula-elements; 
+             %disp-formula-model;)
+        - Preformat Elements (%preformat-elements; %preformat-model;)
+        - Related Article (%related-article-elements;)
+                
+  1. Created this module as version "v2.0 20040830"                -->
+
+
+<!-- ============================================================= -->
+<!--                    INLINE MIXES TO OVER-RIDE CONTENT MODELS   -->
+<!--                    (ELEMENTS TO BE ADDED TO #PCDATA IN MODELS)-->
+<!-- ============================================================= -->
+
+
+<!--                    ABBREVIATION ELEMENTS                      -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <abbrev> element                       -->
+<!ENTITY % abbrev-elements
+                        "| %all-phrase; | %def.class;"               >
+
+
+<!--                    ACCESS DATE ELEMENTS                       -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the Access Date <access-date> element      -->
+<!ENTITY % access-date-elements 
+                        "| %date-parts.class; | %x.class;"           >
+
+
+<!--                    AFFILIATION ELEMENTS                       -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <aff> element                          -->
+<!ENTITY % aff-elements "| %address.class; | %all-phrase; | 
+                           %break.class; | %label.class; | 
+                           %x.class;"                                > 
+
+
+<!--                    CHEMICAL STRUCTURE ELEMENTS                -->
+<!--                    Those elements that may mix with the data
+                        characters inside a Chemical Structure
+                        <chem-struct>                              -->
+<!ENTITY % chem-struct-elements
+                        "| %access.class; |  %all-phrase; | 
+                         %break.class; | %id.class; | 
+                         %label.class; | %list.class; | 
+                         %simple-display.class;"                     >
+
+
+<!--                    CITATION ELEMENTS                          -->
+<!--                    Those elements that may mix with the data
+                        characters inside a Citation               -->
+<!ENTITY % citation-elements   
+                        "| %article-link.class; | %appearance.class; | 
+                         %emphasis.class;  | %inline-display.class; | 
+                         %inline-math.class; | %label.class; |
+                         %math.class; |  %phrase.class; | 
+                         %references.class; | %simple-link.class; | 
+                         %subsup.class; |  %x.class;"                > 
+                         
+                         
+<!--                    COLLABORATIVE (GROUP) AUTHOR ELEMENTS      -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <collab> element                       -->
+<!ENTITY % collab-elements 
+                        "| %all-phrase; | %break.class; | %x.class;" > 
+
+
+<!--                    CONFERENCE DATE ELEMENTS                   -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <conf-date> element                    -->
+<!ENTITY % conf-date-elements 
+                        "| %date-parts.class; | %x.class;"           > 
+
+
+<!--                    COPYRIGHT STATEMENT ELEMENTS               -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <copyright-statement>                  -->
+<!ENTITY % copyright-statement-elements 
+                        "| %all-phrase; "                            > 
+
+
+<!--                    CORRESPONDENCE INFORMATION ELEMENTS        -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the correspondence information.            -->
+<!ENTITY % corresp-elements 
+                        "| %address.class; | %all-phrase; | 
+                           %break.class; | %label.class; | %x.class;">
+
+
+<!--                    FORMULA, DISPLAY ELEMENTS                  -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <disp-formula>                         -->
+<!ENTITY % disp-formula-elements  
+                        "| %all-phrase; | %access.class; | 
+                         %break.class; | %label.class; |
+                         %simple-display.class;"                     > 
+
+
+<!--                    EMAIL ADDRESS ELEMENTS                     -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <email> element                        -->
+<!ENTITY % email-elements 
+                        "| %all-phrase;"                             > 
+
+
+<!--                    EXTERNAL LINK ELEMENTS                     -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        an <ext-link>                              -->
+<!ENTITY % ext-link-elements
+                        "| %all-phrase;"                             > 
+
+
+<!--                    FORMULA, INLINE ELEMENTS                   -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <inline-formula> element.              -->
+<!ENTITY % inline-formula-elements   
+                        "| %all-phrase;"                             > 
+
+
+<!--                    INLINE SUPPLEMENTARY MATERIAL ELEMENTS     -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <inline-supplementary-material> element-->
+<!ENTITY % inline-supplementary-material-elements 
+                        "| %access.class; | %all-phrase;"      > 
+
+
+<!--                    INSTITUTION NAME ELEMENTS                  -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <institution> element                  -->
+<!ENTITY % institution-elements 
+                        "| %all-phrase; | %break.class; |  
+                         %x.class;"                                  > 
+
+
+<!--                    ISSUE TITLE ELEMENTS                       -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <issue-title> element                  -->
+<!ENTITY % issue-title-elements 
+                        "| %all-phrase;"                             > 
+
+
+<!--                    KEYWORD CONTENT ELEMENTS                   -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a keyword.                                 -->
+<!ENTITY % kwd-elements
+                        "| %break.class; | %all-phrase;"             > 
+
+
+<!--                    LABEL ELEMENTS                             -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <label> element                        -->
+<!ENTITY % label-elements 
+                        "| %break.class; | %all-phrase;"             >
+
+
+<!--                    LINK ELEMENTS                              -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        linking element such as <xref>, <target>, 
+                        and <ext-link>                             -->
+<!ENTITY % link-elements 
+                        "| %break.class; | %all-phrase;"             > 
+
+
+<!--                    METADATA DATA NAME ELEMENTS                -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <meta-name> element                    -->
+<!ENTITY % meta-name-elements
+                        "| %all-phrase;"                             > 
+
+
+<!--                    METADATA DATA VALUE ELEMENTS               -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <meta-value> element                   -->
+<!ENTITY % meta-value-elements
+                        "| %all-phrase;"                             > 
+
+
+<!--                    NAMED CONTENT ELEMENTS                     -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <named-content> element                -->
+<!ENTITY % named-content-elements
+                        "| %all-phrase; | %block-display.class; | 
+                           %block-math.class; | %list.class; | 
+                           %rest-of-para.class;"                     >
+
+
+<!--                    PARAGRAPH ELEMENTS                         -->
+<!--                    Elements that may be used within a paragraph
+                        in a mixed content model with #PCDATA.
+                        DESIGN NOTE: There is a major overlap between
+                        this parameter entity and the mix for elements
+                        that are at the same level as a paragraph.
+                        Inline elements appear only inside a 
+                        paragraph, but block elements such as quotes 
+                        and lists may appear either within a 
+                        paragraph or at the same level as a 
+                        paragraph. This serves a requirement in a 
+                        repository DTD, since some incoming material 
+                        will have restricted such elements to only 
+                        inside a paragraph, some incoming material 
+                        will have restricted them to only outside a 
+                        paragraph and some may allow them in both
+                        places. Thus the DTD must allow for them to
+                        be in either or both.                      -->
+<!ENTITY % p-elements   "| %all-phrase; | %block-display.class; | 
+                         %block-math.class; | %citation.class; | 
+                         %list.class; | %rest-of-para.class;"        >
+
+
+<!--                    PREFORMATTED TEXT ELEMENTS                 -->
+<!--                    Elements that may be used, along with data
+                        characters, inside the content model for the
+                        <preformat> element, in which white space,
+                        such as tabs, line feeds, and spaces will
+                        be preserved.                              -->
+<!ENTITY % preformat-elements
+                        "| %access.class; | %all-phrase; |
+                         %display-back-matter.class; "               >
+
+
+<!--                    PRODUCT ELEMENTS                           -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <product> element                      -->
+<!ENTITY % product-elements   
+                        "| %article-link.class; |
+                         %appearance.class; | %break.class; |
+                         %emphasis.class; |
+                         %inline-display.class; | 
+                         %inline-math.class; | %math.class; |  
+                         %phrase.class; |  %references.class; |
+                         %simple-link.class; | %subsup.class; | 
+                         %x.class;"                                  >
+                         
+<!--                    PUBLISHER'S LOCATION ELEMENTS              -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <publisher-loc> element                -->
+<!ENTITY % publisher-loc-elements 
+                        "| %address.class; | %all-phrase; | 
+                         %break.class; | %x.class;"                  > 
+
+
+<!--                    RELATED ARTICLE ELEMENTS                   -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <related-article> element.             -->
+<!ENTITY % related-article-elements 
+                        "| %article-link.class; | 
+                         %appearance.class; | %break.class; |
+                         %emphasis.class; |
+                         %inline-display.class; | 
+                         %inline-math.class; | %math.class; |  
+                         %phrase.class; | %references.class; | 
+                         %simple-link.class; | %subsup.class; | 
+                         %x.class;"                                  >
+                         
+
+<!--                    SELF-URI ELEMENTS                          -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <self-uri> element                     -->
+<!ENTITY % self-uri-elements
+                        "| %all-phrase;"                             >
+
+
+<!--                    SOURCE ELEMENTS                            -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <source> element                       -->
+<!ENTITY % source-elements
+                        "| %all-phrase; | %break.class;"             > 
+
+<!--                    SPEAKER ELEMENTS                           -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a speaker.                                 -->
+<!ENTITY % speaker-elements
+                        "| %all-phrase; | %person-name.class;"       >
+
+
+<!--                    STRING DATE ELEMENTS                       -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <string-date> element                  -->
+<!ENTITY % string-date-elements 
+                        "| %all-phrase; | %date-parts.class; | 
+                         %x.class;"                                  >
+                        
+                        
+<!--                    STRING NAME ELEMENTS                       -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <string-name> element                  -->
+<!ENTITY % string-name-elements 
+                        "| %all-phrase; | %person-name.class; |
+                         %x.class;"                                  >
+
+
+<!--                    STRUCTURAL TITLE ELEMENTS                  -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <title> element                        -->
+<!ENTITY % struct-title-elements 
+                        "| %all-phrase; | %break.class; | %x.class;" > 
+
+
+<!--                    SUBJECT GROUPING NAME ELEMENTS             -->
+<!--                    Elements that may be used, along with data
+                        characters inside the content model of the
+                        <subject> element                          -->
+<!ENTITY % subject-elements
+                        "| %all-phrase; | %break.class; | %x.class;" > 
+
+
+<!--                    DEFINITION LIST: TERM ELEMENTS             -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <term>.                                  -->
+<!ENTITY % term-elements
+                        "| %all-phrase; | %block-math.class; | 
+                         %simple-display.class;"                     >
+
+
+<!--                    TITLE ELEMENTS                             -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        title elements such as <title>, <subtitle>, 
+                        <trans-title>, etc.                        -->
+<!ENTITY % title-elements   
+                        "| %all-phrase; | %break.class;"             > 
+
+
+<!-- ============================================================= -->
+<!--                    OVER-RIDES OF CONTENT MODELS (FULL MODELS) -->
+<!-- ============================================================= -->
+
+
+<!--                    ARTICLE METADATA MODEL                     -->
+<!--                    Full content model for the metadata that is
+                        specific to the article.
+                        Note: This includes both metadata that 
+                        directly describes the article (such as the 
+                        article title, abstract, or article ID) as 
+                        well as the metadata that many MAJOUR-style 
+                        DTDs (such DTDs split the metadata into three
+                        categories: journal metadata, article
+                        metadata, and issue metadata), call the issue
+                        metadata.
+                        Related Element: Metadata specific to the
+                        journal is within the element <journal-meta> 
+                        Journal Metadata                           -->
+<!ENTITY % article-meta-model  
+                        "(article-id*, article-categories?, 
+                         title-group?, 
+                         (contrib-group | aff | %x.class;)*, 
+                         author-notes?, pub-date*,
+                         volume?, volume-id*,
+                         issue?, issue-id*, issue-title*,  
+                         supplement?, 
+                         ( (fpage, lpage?, page-range?) | 
+                            elocation-id )?, 
+                         (%address-link.class; | product | 
+                          supplementary-material)*, 
+                         history?, copyright-statement?, 
+                         copyright-year?, license?,  
+                         self-uri*, related-article*, abstract*, 
+                         trans-abstract*, kwd-group*,
+                         contract-num*, contract-sponsor*, 
+                         conference*, counts?, custom-meta-wrap?)"   >
+
+
+<!--                    CONTRIBUTOR GROUP MODEL                    -->
+<!--                    Content model for the <title-group> element-->
+<!ENTITY % contrib-group-model   
+                        "(contrib+, 
+                          (%contrib-info.class; | %x.class;)* )"     > 
+             
+
+<!--                    CONTRIBUTOR MODEL                          -->
+<!--                    Content model for the <contrib> element    -->
+<!ENTITY % contrib-model  
+                        "( (%name.class;)*, 
+                           (%degree.class; | %contrib-info.class; | 
+                            %x.class;)* )"                           >
+
+
+<!--                    DEFINITION LIST: DEFINITION ITEM MODEL     -->
+<!--                    The content model of a <def-item>          -->
+<!ENTITY % def-item-model 
+                        "(label?, term*, (%def.class; | %x.class;)* )"
+                                                                     >
+
+<!--                    DEFINITION LIST MODEL                      -->
+<!--                    Content model for the <def-list> element   -->
+<!ENTITY % def-list-model   
+                        "(label?, title?, term-head?, def-head?, 
+                          (def-item | %x.class;)*, def-list* )"      > 
+
+
+<!--                    HISTORY ELEMENTS                           -->
+<!--                    Elements that may be mixed with data 
+                        characters inside the model for <history>
+                        when <history> is modeled as a mixed content
+                        element.                                   -->
+<!ENTITY % history-elements   
+                        "| %all-phrase; | %break.class; |
+                          %date.class; | %x.class;"                  >
+
+
+<!--                    HISTORY MODEL                              -->
+<!--                    The content model for the <history> element
+                        (DESIGN NOTE: This content model is a full
+                        model not just an OR list of elements, because
+                        the model it is replacing is an element
+                        model not a mixed-content model.)          -->
+<!ENTITY % history-model   
+                        "(#PCDATA %history-elements;)*"              >
+
+
+<!--                    KEYWORD GROUP MODEL                        -->
+<!--                    Content model for a <kwd-group> element    -->
+<!ENTITY % kwd-group-model
+                        "(title?, (%kwd.class; | %x.class;)+ )"      > 
+
+
+<!--                    PERSON GROUP MODEL                         -->
+<!--                    Content model for the Person Group element -->
+<!ENTITY % person-group-model
+                        "(aff | etal | %name.class; | %x.class;)*"     >
+
+
+<!-- ============================================================= -->
+<!--                    OVER-RIDES OF ATTRIBUTE LISTS              -->
+<!-- ============================================================= -->
+                                                                
+                                                                 
+<!--                    ARTICLE IDENTIFIER ATTRIBUTES              -->
+<!--                    Attributes for the <article-id> element
+             pub-id-type
+                        Publication (article) Identifier Type
+                        Names the type of identifier, or the 
+                        organization or system that defined this 
+                        identifier for the identifier of the journal 
+                        article or a cited publication. This is an
+                        optional CDATA attribute that should be used
+                        whenever the type is known.
+                           Used on the <article-id> element, which 
+                        holds an identifier for the entire article.  
+                           Also used on the <pubid> element, which 
+                        is an identifier for a publication cited in 
+                        a bibliographic reference (citation).
+                        Suggested values include:
+                          coden  - Obsolete PDB/CCDC identifier (may
+                                   be present on older articles)
+                          doi    - Digital Object Identifier
+                          medline- NLM Medline identifier
+                          other  - None of the named identifiers
+                          pii    - Publisher Item Identifier, see
+                                    http://pubs.acs.org/epub/piius.htm
+                                   or 
+                                    http://www.aip.org/epub/piipr.html
+                          pmid   - PubMed ID (see
+                                   www.ncbi.nlm.nih.gov/entrez/
+                                   query.fcgi?db=PubMed)         
+                          publisher-id 
+                                 - Publisher's identifier such 
+                                   as an 'article-id', 'artnum',
+                                   'identifier', 'article- number', 
+                                   'pub-id', etc.
+                          sici   - Serial Item and Contribution 
+                                   Identifier (SICI). A journal 
+                                   article may have more than one 
+                                   SICI, one for a print version and
+                                   one for an electronic version.  -->
+<!ENTITY % article-id-atts
+            "pub-id-type
+                        CDATA                              #IMPLIED" >
+
+
+<!--                    DATE (HISTORICAL) ATTRIBUTES               -->
+<!--         date-type  Attribute should only be used if the type
+                        is known, otherwise omit the attribute. 
+                        Suggested values include:
+                          accepted    - Date manuscript was 
+                                        accepted         
+                          received    - Date manuscript received
+                          rev-request - Date revisions were 
+                                        requested or manuscript 
+                                        was returned
+                          rev-recd    - Date revised manuscript 
+                                        was received               -->
+<!ENTITY % date-atts
+             "date-type CDATA                              #IMPLIED" >
+                                                                
+                                                                 
+<!--                    PUBLICATION IDENTIFIER ATTRIBUTES          -->
+<!--                    Attributes for the <pub-id> element        -->
+<!ENTITY %  pub-id-atts  
+             "pub-id-type 
+                        CDATA                             #IMPLIED"  >
+
+
+<!--                    X(CROSS) REFERENCE ATTRIBUTES              -->
+<!--                    Attribute list for cross references        -->
+<!ENTITY %  xref-atts
+             "id         ID                               #IMPLIED
+              ref-type   CDATA                            #IMPLIED 
+              rid        IDREFS                           #IMPLIED"  >
+              
+              
+<!-- ================== End Archiving Content/ATT Over-rides ===== -->
diff --git a/code/lib/Bio/Entrez/DTDs/archivecustom-modules.ent b/code/lib/Bio/Entrez/DTDs/archivecustom-modules.ent
new file mode 100644
index 0000000..36fbc43
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/archivecustom-modules.ent
@@ -0,0 +1,116 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Journal Archiving DTD-Specific Modules            -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Journal Archiving DTD-Specific Modules v2.0 20040830//EN"
+     Delivered as file "archivecustom-modules.ent"                 -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    To name any modules created explicitly for this   -->
+<!--             DTD, that is, not present in the Archiving and    -->
+<!--             Interchange DTD Suite                             -->
+<!--                                                               -->
+<!-- CONTAINS:   1)  Full external Parameter Entity declarations   -->
+<!--                 for all DTD-specific modules (Note: the       -->
+<!--                 modules are declared here but referenced in   -->
+<!--                 the DTD.)                                     -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital Archive of Journal Articles               -->
+<!--             National Center for Biotechnology Information     -->
+<!--                (NCBI)                                         -->
+<!--             National Library of Medicine (NLM)                -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             August 2004                                       -->
+<!--                                                               -->
+<!-- CREATED BY: Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             B. Tommie Usdin (Mulberry Technologies, Inc.)     -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             this DTD should be sent in email to:              -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+                
+  1. Created this module as version "v2.0 20040830"                -->
+
+
+<!-- ============================================================= -->
+<!--                    CUSTOMIZATION MODULES DECLARED             -->
+<!-- ============================================================= -->
+
+
+<!--                    DTD-SPECIFIC CLASS CUSTOMIZATIONS MODULE   -->
+<!--                    Set up the Parameter Entities and element
+                        class definitions that will be used to
+                        over-ride some element classes in the
+                        DTD Suite.                                 -->
+<!ENTITY % archivecustom-classes.ent 
+                        PUBLIC  
+"-//NLM//DTD Journal Archiving DTD Customize Classes Module v2.0 20040830//EN"
+"archivecustom-classes.ent"                                          >
+
+
+<!--                    DTD-SPECIFIC MIX CUSTOMIZATIONS MODULE     -->
+<!--                    Set up the Parameter Entities and element
+                        mix definitions that will be used to
+                        over-ride some element mixes in the DTD
+                        Suite.                                     -->
+<!ENTITY % archivecustom-mixes.ent 
+                        PUBLIC  
+"-//NLM//DTD Journal Archiving DTD Customize Mixes Module v2.0 20040830//EN"
+"archivecustom-mixes.ent"                                            >
+
+
+<!--                    DTD-SPECIFIC MODELS/ATTRIBUTES CUSTOMIZATIONS 
+                        MODULE                                     -->
+<!--                    Set up the Parameter Entities for element-
+                        specific element groups, complete content 
+                        models, and attribute list and value over-
+                        rides. These PEs will over-ride selected
+                        content models and attribute lists for the 
+                        Journal Archiving and Interchange DTD Suite-->
+<!ENTITY % archivecustom-models.ent 
+                        PUBLIC  
+"-//NLM//DTD Journal Archiving DTD Customize Content and Attributes Module v2.0 20040830//EN"
+"archivecustom-models.ent"                                           >
+
+
+<!-- =================== End Archive DTD Module of Modules ======= -->
diff --git a/code/lib/Bio/Entrez/DTDs/articlemeta.ent b/code/lib/Bio/Entrez/DTDs/articlemeta.ent
new file mode 100644
index 0000000..f594afe
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/articlemeta.ent
@@ -0,0 +1,1811 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Journal Article Metadata Elements                 -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Journal Article Metadata Elements v2.0 20040830//EN"
+     Delivered as file "articlemeta.ent"                           -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Names all elements used to describe the journal   --> 
+<!--             in which the journal article is published.        -->
+<!--                                                               -->
+<!-- CONTAINS:   1. Article element parameter entity               -->
+<!--             2. Article metadata elements in alphabetical      -->
+<!--                order                                          -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+ 13. CONTRACT NAME/CONTRACT SPONSOR ATTRIBUTES - Added new attributes
+     to both these elements through a new PE %contract-atts;:
+      - id  (ID)
+      - rid (IDREFS)
+      - %might-link-atts;
+
+ 12. ROLE ELEMENT
+     a. Added to the model for <citation>
+     b. Therefore the element declaration for <role> was
+        moved from this module into the common module.
+
+ 11. DEFAULT ARTICLE-META-MODEL - Added the following 
+     elements to -%article-meta-model;:
+     Existing elements:
+        - <email>
+     New elements:
+       - <license> (for now, just a bucket into which to
+         dump license-related material until the AIT Workgroup
+         does more analysis
+       - <issue-id> (existing <issue> means issue number)
+       - <issue-title> (for special title or theme)
+       - <page-range>
+       - <volume-id> (existing <volume> means volume number)
+       - <custom-meta-wrap> Which is a wrapper element used to hold
+         name/value pairs for metadata elements that are in source
+         material but were never envisioned by this DTD.
+
+ 10. CONTRIBUTOR / CONTRIBUTOR GROUP
+     a. Made model into Parameter Entity %contrib-model;
+     b. Allowed <string-name> as an alternative to <name>
+     c. Allowed <degrees> to follow either <name> or 
+        <string-name>
+     d. Moved <etal> to %contrib-info.class; to solve \
+        classing problems
+     e. Added footnote <fn> to %contrib-info.class;
+
+  9. STRING NAME
+     a. Created a new element <string-name> for names that
+        do not follow the former, strict personal name model.
+
+     b. Allowed <string-name> to be used anywhere <name> is
+        used. Inside
+         - <product> (by adding to %references.class; used in 
+           %product-elements;)
+         - <contrib> (by adding to %name.class; used in 
+           %contrib-model;)
+
+  8. DOI - added to elements by request of AIT WG. Elements with
+     DOIs are those a publisher could expect to sell or handle
+     separately. DOI added to: (using <object-id> element)
+      - <abstract> (through %abstract-model;)
+
+  7. COMPLETE MODELS WHEN OVER-RIDING A MODEL 
+     (for all Parameter Entities suffixed "-model")
+     ### Customization Alert ###
+     Added internal parenthesis to Parameter Entity and removed 
+     them from Element Declaration for:
+     - %abstract-model;
+     - %article-meta-model;
+     - %author-notes-model;
+     - %contrib-group-model;
+     - %contrib-model;
+     - %counts-model;
+     - %date-model;
+     - %title-group-model;
+     - %trans-abstract-model;
+ 
+  6. RENAMED CONTENT MODEL PARAMETER ENTITY
+     ### Customization Alert ###
+     - %author-notes-elements; ==> -%author-notes-model;
+
+  5. PARAMETER ENTITIES FOR CLASSES/MIXES/ATT LISTS
+     a. RENAMED CLASSES
+        ### Customization Alert ###
+         Not all class names ended in the ".class" suffix. 
+         Changed the following to add that suffix:
+         - %address-elements; (renamed -%address.class;!)
+               used in -%corresp-elements; 
+         - %contrib-info.class; used in -%contrib-group-model; 
+               and in -%contrib-model;
+         - %inline-math.class; used in -%subject-elements;
+               and in -%kwd-elements; (via %all-phrase;)
+
+     b. DEFAULT CLASSES - Were moved from this module to 
+        the module -%default-classes.ent;
+
+     c. NEW PARAMETER ENTITIES - To correct potential classing 
+        problems,  new Parameter Entities were created. The following
+        PEs and models were changed to use these new PEs:
+             New Content model PEs:
+        - %date.class;        in %history-elements; 
+        - %kwd.class;         in %kwd-group-model;
+        - %just-para.class;   in <author-comment>
+        
+        - %author-comments;   in <author-comments>
+        - %contrib-model;     in <contrib>
+        - %history-model;     in <history>      
+        - %kwd-group-model;   in <kwd-group>
+        
+        - In <author-notes> "(corresp | fn)+" was replaced
+          with the %fn-link.class; and the new class
+          -%corresp.class;)
+
+              New Attribute List PEs:
+        - %article-id-atts;  for <article-id>
+
+              New mix PEs
+        - %degrees-elements;      in <degrees>        
+        - %on-behalf-of-elements; in <on-behalf-of>
+        - %self-uri-elements; in <self-uri>  
+        - %series-text-elements;  in <series-text>  
+        - %series-title-elements; in <series-title>
+
+     d. LINK CLASSES -  Replaced %link.class; in the PE
+        -%product-elements; with the following classes 
+        (no DTD change, just parameterization):
+         - %simple-link.class;  (the internal links, same)
+         - %article-link.class; (links for journal article)
+  
+  4. Updated public identifier to "v2.0 20040830"
+
+     =============================================================
+     Version 1.1                           (TRG) v1.1 (2003-11-01)
+      
+  3. Added element <label> to content model of element <author-notes>
+     (via %author-notes-elements;) 
+     Rationale: To allow label as a format override
+  
+  2. Created parameter entity %author-notes-elements; to hold 
+     contents of <author-notes>
+     Rationale: In order to distinguish between models used by
+     the Archiving DTD and the Publishing DTD, it was necessary 
+     to create a parameter entity that could be overridden.
+     
+  1. Added parameter entity %label.class; to parameter 
+     %corresp-elements;  
+     Rationale: To allow label as a format override
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module, 
+                        usually accomplished in the Customization
+                        Module for the specific DTD:
+                                 Classes and Mixes
+                        - %address.class; 
+                        - %break.class; 
+                        - %conference.class;
+                        - %contrib-info.class;
+                        - %emphasis.class; 
+                        - %inline-display.class;
+                        - %inline-math.class;
+                        - %just-rendition;
+                        - %references.class;
+                        - %rendition-plus;
+                        - %simple-link.class;
+                        - %simple-phrase;
+                        - %simple-text;
+                        - %subsup.class;
+                        - %title-elements;
+                                Complete Content Models
+                        - %article-meta-model;
+                        - %date-model; 
+                        - %sec-opt-title-model; 
+                                 Attribute Values
+                        - %pub-id-types;         
+                                 Attribute Lists
+                        - %might-link-atts; 
+                                                                   -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR ATTRIBUTE VALUES    -->
+<!-- ============================================================= -->
+
+
+<!--                    PUBLICATION TYPES                          -->
+<!--                    Used to record the type of publication, for
+                        example a print-only publication versus
+                        an electronic-only publication, in any of
+                        several life stages.
+                          This Parameter Entity is intended to name
+                        the values of the "pub-type" attribute, but
+                        in Version 1.0 of this DTD Suite, the 
+                        "pub-type" attribute is defined with a
+                        Declared Value of CDATA and this Parameter
+                        Entity is not used or provide its values.
+                        Suggested values include:
+                          epub        - Electronic publication
+                          ppub        - Print publication
+                          epub-ppub   - Published in both print and
+                                        electronic form
+                          epreprint   - Electronic preprint 
+                                        dissemination    
+                          ppreprint   - Print preprint dissemination
+                          ecorrected  - Corrected in electronic    
+                          pcorrected  - Corrected in print     
+                          eretracted  - Retracted in electronic    
+                          pretracted  - Retracted in print         -->
+<!ENTITY % pub-types    "epub | ppub | epub-ppub | epreprint |
+                         ppreprint | ecorrected | pcorrected | 
+                         eretracted | pretracted"                    >
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR ATTRIBUTE LISTS     -->
+<!-- ============================================================= -->
+                                                                
+                                                                 
+<!--                    ABSTRACT ATTRIBUTES                        -->
+<!--                    Attributes for the <abstract> element and
+                        the <trans-abstract> element               -->
+<!ENTITY % abstract-atts
+            "abstract-type   
+                        CDATA                              #IMPLIED
+             xml:lang   NMTOKEN                            #IMPLIED" > 
+                                                                
+                                                                 
+<!--                    ALTERNATE TITLE ATTRIBUTES                 -->
+<!--                    Attributes for the <alt-title> element     -->
+<!--         alt-title-type
+                        Why this title was created, for example,
+                        "short" for a short version of the title,
+                        "toc" for use in a Table of Contents, "ASCII" 
+                        for an ASCII title, "right-running" for
+                        a right-running-head title, etc.           -->
+<!ENTITY % alt-title-atts
+            "alt-title-type
+                        CDATA                              #IMPLIED" >
+                                                                
+                                                                 
+<!--                    ARTICLE IDENTIFIER ATTRIBUTES              -->
+<!--                    Attributes for the <article-id> element
+             pub-id-type
+                        Publication (article) Identifier Type
+                        Names the type of identifier, or the 
+                        organization or system that defined this 
+                        identifier for the identifier of the journal 
+                        article or a cited publication.
+                           Used on the <article-id> element, which 
+                        holds an identifier for the entire article.  
+                           Also used on the <pubid> element, which 
+                        is an identifier for a publication cited in 
+                        a bibliographic reference (citation).
+                        Valid Types include:
+                          coden  - Obsolete PDB/CCDC identifier (may
+                                   be present on older articles)
+                          doi    - Digital Object Identifier
+                          medline- NLM Medline identifier
+                          other  - None of the named identifiers
+                          pii    - Publisher Item Identifier, see
+                                    http://pubs.acs.org/epub/piius.htm
+                                   or 
+                                    http://www.aip.org/epub/piipr.html
+                          pmid   - PubMed ID (see
+                                   www.ncbi.nlm.nih.gov/entrez/
+                                   query.fcgi?db=PubMed)         
+                          publisher-id 
+                                 - Publisher's identifier such 
+                                   as an 'article-id', 'artnum',
+                                   'identifier', 'article- number', 
+                                   'pub-id', etc.
+                          sici   - Serial Item and Contribution 
+                                   Identifier (SICI). A journal 
+                                   article may have more than one 
+                                   SICI, one for a print version and
+                                   one for an electronic version.  -->
+<!ENTITY % article-id-atts
+            "pub-id-type
+                        (%pub-id-types;)                   #IMPLIED" >
+                                                                
+                                                                 
+<!--                    CONTRACT ATTRIBUTES                        -->
+<!--                    Attributes for the <contract-num> element and
+                        the <contract-sponsor> element            
+             id         Unique identifier, so the contributor can be
+                        referenced
+             rid        May be used to point to, for example, link
+                        contract numbers and sponsors
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename associated with
+                        a grant or sponsor                         -->
+<!ENTITY % contract-atts
+            "id         ID                                 #IMPLIED
+             rid        IDREFS                             #IMPLIED
+             %might-link-atts;"                                      >
+
+                                                                 
+<!--                    CONTRIBUTOR ATTRIBUTES                     -->
+<!--                    Attributes for the Contributor <contrib> 
+                        element                                    -->
+<!--         contrib-type
+                        What was the contribution of this person,
+                        for example: author, editor, contributor,
+                        translator, illustrator, designer, 
+                        research assistant, etc.                 
+             id         Unique identifier, so the contributor can be
+                        referenced
+             corresp    Corresponding Author (Set to 'yes' if this 
+                        contributor is a corresponding author.)
+             equal-contrib    
+                        Contributed equally (Set to 'yes' if all 
+                        contributors contributed equally.)
+             deceased   Deceased (Set to 'yes' if the contributor
+                        has died.)                           
+             rid        May be used to point to information concerning
+                        the contributor, for example to the <corresp>
+                        Corresponding Information element.  There
+                        is no limit to the number of contributors 
+                        that can be designated as corresponding.
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename.               -->
+<!ENTITY % contrib-atts
+            "contrib-type       
+                        CDATA                              #IMPLIED  
+             id         ID                                 #IMPLIED
+             corresp    (no | yes)                         #IMPLIED        
+             equal-contrib 
+                        (no | yes)                         #IMPLIED  
+             deceased   (no | yes)                         #IMPLIED 
+             rid        IDREFS                             #IMPLIED  
+             %might-link-atts;                                     " >
+             
+             
+<!--                    CUSTOM METADATA ATTRIBUTES                 -->
+<!--                    Attributes for the <custom-meta> element   -->
+<!ENTITY % custom-meta-atts
+            "used-by    CDATA                              #IMPLIED
+             %might-link-atts;"                                      >
+                                                                
+                                                                 
+<!--                    KEYWORD GROUP ATTRIBUTES                   -->
+<!--                    Attributes for the <kwd-group> element     -->
+<!ENTITY % kwd-group-atts
+            "id         ID                                 #IMPLIED 
+             kwd-group-type  
+                        CDATA                              #IMPLIED
+             xml:lang   NMTOKEN                            #IMPLIED" >
+                                                                
+                                                                 
+<!--                    LICENSE ATTRIBUTES                         -->
+<!--                    Attributes for the <license> element    
+             license-type
+                        xxx ask Jeff xxx
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename.               -->
+<!ENTITY % license-atts
+            "license-type  
+                        CDATA                              #IMPLIED
+             %might-link-atts;"                                      >
+                                                                
+                                                                 
+<!--                    PRODUCT ATTRIBUTES                         -->
+<!--                    Attributes for the Product <product> 
+                        element                                    -->
+<!--         product-type
+                        Type of product being reviewed, for example:
+                        book, software package, journal or journal
+                        issue, website, film, hardware component,
+                        etc.
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                -->
+<!ENTITY % product-atts
+            "product-type 
+                        CDATA                              #IMPLIED           
+             %might-link-atts;"                                      >
+
+                                                                 
+<!--                    PUBLICATION DATE ATTRIBUTES                -->
+<!--                    Attributes for the <pub-date> element      -->
+<!--                    Used to record the type of publication, for
+                        example a print-only publication versus
+                        an electronic-only publication, in any of
+                        several life stages.
+                        Suggested values include: 
+                          epub        - Electronic publication
+                          ppub        - Print publication
+                          epub-ppub   - Published in both print and
+                                        electronic form
+                          epreprint   - Electronic preprint 
+                                        dissemination    
+                          ppreprint   - Print preprint dissemination     
+                          ecorrected  - Corrected in electronic    
+                          pcorrected  - Corrected in print     
+                          eretracted  - Retracted in electronic    
+                          pretracted  - Retracted in print     
+                                                                   -->
+<!ENTITY % pub-date-atts
+            "pub-type   CDATA                              #IMPLIED" >
+
+
+<!--                    SUBJECT GROUP ATTRIBUTES                   -->
+<!--                    Attributes for the <subj-group> element    -->
+<!--         xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   
+             subj-group-type
+                        Identification of the subject, information
+                        class, or type of this particular subject
+                        group                                      -->
+<!ENTITY % subj-group-atts
+            "xml:lang   NMTOKEN                            #IMPLIED 
+             subj-group-type
+                        CDATA                              #IMPLIED" > 
+
+
+<!--                    SUPPLEMENT ATTRIBUTES                      -->
+<!--                    Attributes for the <supplement> element    -->
+<!--        supplement-type   
+                        Indicates what sort of supplement, if the
+                        for example: "issue" meaning a supplement to 
+                        a particular journal issue; "conference", 
+                        meaning the supplement contains information 
+                        from a conference, symposium, or other 
+                        gathering; "periodic" for supplements issued  
+                        at regular intervals, etc.                 -->
+<!ENTITY % supplement-atts
+            "supplement-type   
+                        CDATA                              #IMPLIED" >
+
+
+<!-- ============================================================= -->
+<!--                    ARTICLE METADATA                           -->
+<!-- ============================================================= -->
+
+
+<!--                    ARTICLE METADATA MODEL                     -->
+<!--                    Complete content model for the <article-meta>
+                        element, which names the journal article
+                        metadata                                   -->
+<!ENTITY % article-meta-model  
+                        "(article-id*, article-categories?, 
+                         title-group?, (contrib-group | aff)*, 
+                         author-notes?, pub-date*, 
+                         volume?, volume-id*, 
+                         issue?, issue-id*, issue-title*,  
+                         supplement?, 
+                         ( (fpage, lpage?, page-range?) | 
+                            elocation-id )?, 
+                         (%address-link.class; | product |
+                          supplementary-material)*, 
+                         history?, copyright-statement?, 
+                         copyright-year?, license?, 
+                         self-uri*, related-article*, abstract*, 
+                         trans-abstract*, kwd-group*,
+                         contract-num*, contract-sponsor*,
+                         conference*, counts?, custom-meta-wrap?)"   >
+
+
+<!--                    ARTICLE METADATA                           -->
+<!--                    Metadata that identifies this article, for
+                        example, bibliographic information such as
+                        the title, author, and copyright year.     -->
+<!ELEMENT  article-meta %article-meta-model;                         >
+
+
+<!--ELEM   copyright-statement
+                        Defined in %common.ent"                    -->
+
+
+<!-- ============================================================= -->
+<!--                    ARTICLE METADATA ELEMENTS                  -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    ARTICLE IDENTIFICATION                     -->
+<!-- ============================================================= -->
+
+
+<!--                    ARTICLE IDENTIFIER                         -->
+<!--                    Optional element, used to hold one of the
+                        "unique identifiers" that have been assigned
+                        at various times to an article.  Such
+                        identifiers may come from the publisher, the
+                        jobber, or from inside PMC. Examples of such
+                        numbers are the publishers tracking number,
+                        the PNAS number, etc. PMC tries to carry 
+                        all identifiers associated with an article,
+                        whether they came in as elements or attributes
+                        on the original article.
+                          The "type attribute" should only be used if
+                        the type is known, for example, to identify
+                        DOIs.
+                          REMARKS: The <article-id> element is one
+                        of three elements used to provide an element
+                        identifier (such as a DOI). The <article-id>
+                        holds an identifier for Articles. The 
+                        <pub-id> element holds an identifier for  
+                        cited publications, such as a journal article
+                        listed inside the bibliographic reference 
+                        list <ref-list>. The <object-id> holds an
+                        identifier for an object such as a Figure or
+                        sidebar <boxed-text>. All three elements
+                        take the "pub-id-type" attribute.          -->
+<!ELEMENT  article-id   (#PCDATA)                                    >
+<!--         pub-id-type
+                        Publication (article) Identifier Type
+                        Names the type of identifier, or the 
+                        organization or system that defined this 
+                        identifier for the identifier of the journal 
+                        article or a cited publication.
+                           Used on the <article-id> element (article
+                        metadata), the <pub-id> element (cited
+                        publications in the reference list), and
+                        in the <object-id> element (used on Figures,
+                        etc. to provide a DOI or other ID).
+                          For the Archiving (Green) DTD, this is a
+                        CDATA attribute with the following types
+                        suggested. Other DTDs may enforce the
+                        types. Valid/suggested types include:
+                          coden  - Obsolete PDB/CCDC identifier (may
+                                   be present on older articles)
+                          doi    - Digital Object Identifier
+                          medline- NLM Medline identifier
+                          other  - None of the named identifiers
+                          pii    - Publisher Item Identifier, see
+                                    http://pubs.acs.org/epub/piius.htm
+                                   or 
+                                    http://www.aip.org/epub/piipr.html
+                          pmid   - PubMed ID (see
+                                   www.ncbi.nlm.nih.gov/entrez/
+                                   query.fcgi?db=PubMed)         
+                          publisher-id 
+                                 - Publisher's identifier such 
+                                   as an 'article-id', 'artnum',
+                                   'identifier', 'article- number', 
+                                   'pub-id', etc.
+                          sici   - Serial Item and Contribution 
+                                   Identifier (SICI). A journal 
+                                   article may have more than one 
+                                   SICI, one for a print version and
+                                   one for an electronic version.  -->
+<!ATTLIST  article-id  
+             %article-id-atts;                                       >
+             
+
+<!-- ============================================================= -->
+<!--                    ARTICLE GROUPING DATA (ARTICLE METADATA)   -->
+<!-- ============================================================= -->
+
+
+<!--                    ARTICLE GROUPING DATA                      -->
+<!--                    Container for elements that may be used to
+                        group articles into related clusters       -->
+<!ELEMENT  article-categories    
+                        (subj-group*, series-title*, series-text?)   >
+                                                                  
+
+<!--                    GROUPING ARTICLES IN TITLED CATEGORIES
+                        For some journals, articles are grouped into
+                        categories, with the category indicated in
+                        the article's display.
+                        Sometimes the grouping or category refers
+                        to the type of article, such as "Essay",
+                        "Commentary", or "Article".  Sometimes the
+                        grouping refers to subject areas, such as
+                        "Physical Sciences", "Biological Sciences",
+                        or "Social Sciences". Sometimes the grouping 
+                        refers to topics within the larger subject 
+                        areas, such as "Applied Math", "Biology", or 
+                        "Chemistry".
+
+                        In a printed journal as well as on the PMC
+                        website, articles may be grouped or arranged
+                        under these headings (here are all the
+                        Essays, here are all the Biology articles,
+                        etc.) Some journals divide articles into 
+                        three layers of grouping, some into two, and 
+                        some into only one.
+                        For example, a three level grouping might be:
+                         <subj-group>
+                           <subject>Articles</subject>
+                           <subj-group>
+                             <subject>Biological Sciences</subject>
+                             <subj-group>
+                               <subject>Entomology</subject>
+                             </subj-group>
+                           </subj-group>
+                         </subj-group>
+                        And a one-level grouping might be
+                         <subj-group>
+                          <subject>Retraction</subject>
+                         </subj-group>
+                        or, alternatively
+                         <subj-group>
+                          <subject>Essay</subject>
+                         </subj-group>
+                         
+                        Articles may also be assigned to more than 
+                        one grouping. For example, if an article is
+                        classified as "Biochemistry" under 
+                        "Biological Sciences" and "Chemistry" under
+                        "Physical Sciences," the subj-group wrapper
+                        may repeat. 
+                         
+                        For example, 
+                         <subj-group>
+                           <subject>Articles</subject>
+                           <subj-group>
+                             <subject>Biological Sciences</subject>
+                             <subj-group>
+                               <subject>Biochemistry</subject>
+                             </subj-group>
+                           </subj-group>
+                           <subj-group>
+                             <subject>Physical Sciences</subject>
+                             <subj-group>
+                                <subject>Chemistry</subject>
+                             </subj-group>
+                         </subj-group>
+                                                                   -->
+<!ELEMENT  subj-group   (subject+, subj-group*)                      > 
+<!--         xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   
+             subj-group-type
+                        Identification of the subject, information
+                        class, or type of this particular subject
+                        group                                      -->
+<!ATTLIST  subj-group
+             %subj-group-atts;                                       > 
+
+
+<!--                    SUBJECT GROUPING NAME ELEMENTS             -->
+<!--                    Elements that may be used, along with data
+                        characters inside the content model of the
+                        <subject> element                          -->
+<!ENTITY % subject-elements
+                        "| %emphasis.class; | %inline-display.class; | 
+                         %inline-math.class; | %subsup.class;"       > 
+
+
+<!--                    SUBJECT GROUPING NAME                      -->
+<!--                    The name of one of the subject groups used
+                        to describe an article.  Such groups are
+                        used, typically, to provide headings for
+                        groups of articles in a printed or online
+                        generated Table of Contents.               -->
+<!ELEMENT  subject      (#PCDATA %subject-elements;)*                >
+
+
+<!-- ============================================================= -->
+<!--                    SERIES INFORMATION                         -->
+<!-- ============================================================= -->
+                        
+
+<!--                    GROUPING ARTICLES IN SERIES
+                        Series (as used in the <series-title> and 
+                        <series-text> elements described below) is 
+                        used in two different senses. Some issues of
+                        journals are part of a series and will have
+                        series information just as they have an
+                        issue number as part of the article metadata,
+                        to describe the issue of the journal in which
+                        the article is published.  The second usage
+                        is for groupings of articles within one
+                        issue of a journal. For example, in some 
+                        journals, articles are grouped into a
+                        series such as "From the Cover" and 
+                        identified as part of a series. 
+                        The Series Title element names the series 
+                        and the Series Text element provides textual
+                        description (if any) describing the series.-->
+
+
+<!--                    SERIES TITLE ELEMENTS                      -->
+<!--                    Elements that may be used, along with data
+                        characters inside the content model of the
+                        <series-title> element                     -->
+<!ENTITY % series-title-elements
+                        "%rendition-plus;"                           >
+
+
+<!--                    SERIES TITLE                               -->
+<!--                    Title of the journal series (bibliographic
+                        meaning) or the title of a  series of 
+                        articles internal to one issue of a journal
+                                                                   -->
+<!ELEMENT  series-title (#PCDATA %series-title-elements;)*           >
+
+
+<!--                    SERIES TEXT ELEMENTS                       -->
+<!--                    Elements that may be used, along with data
+                        characters inside the content model of the
+                        <series-text> element                      -->
+<!ENTITY % series-text-elements
+                        "%rendition-plus;"                           >
+    
+
+<!--                    SERIES TEXT: HEADER TEXT to DESCRIBE       -->
+<!--                    Textual description of the series of articles
+                        that are named in a <series-title> element -->
+<!ELEMENT  series-text  (#PCDATA %series-text-elements;)*            >
+
+
+<!-- ============================================================= -->
+<!--                    TOP-LEVEL ARTICLE METADATA CONTINUED       -->
+<!-- ============================================================= -->
+
+
+<!--                    AUTHOR NOTES MODEL                         -->
+<!--                    Content model for an <author-notes> element.
+                                                                   -->
+<!ENTITY % author-notes-model 
+                        "((%label.class;)*, title?, 
+                          (%corresp.class; | %fn-link.class;)+ )"    >
+
+
+<!--                    AUTHOR NOTE GROUP                          -->
+<!--                    Footnotes to authors or notes about authors
+                        (and, potentially other contributors) are
+                        collected in the Author note group.  
+                        References to these footnotes are made 
+                        using the <xref> element.
+                        Authoring Note:  While this element 
+                        contains an optional Label element, the 
+                        Label element should be included only in 
+                        those circumstances where a formatting 
+                        override is needed; Label should NOT 
+                        be used in the ordinary course of 
+                        tagging.                                   -->
+<!ELEMENT  author-notes %author-notes-model;                         >
+<!--         id         Unique identifier so that the note group
+                        may be referenced, for example by an author
+             rid        May be used to point to an author, if the
+                        source has recorded connections in both
+                        directions                                 -->
+<!ATTLIST  author-notes
+             id         ID                                 #IMPLIED  
+             rid        IDREFS                             #IMPLIED  >
+
+
+<!--ELEM   volume       Defined in %common.ent;                    -->
+<!--ELEM   issue        Defined in %common.ent;                    -->
+<!--ELEM   supplement   Defined in %common.ent;                    -->
+<!--ELEM   fpage        Defined in %common.ent;                    -->
+<!--ELEM   lpage        Defined in %common.ent;                    -->
+<!--ELEM   elocation-id Defined in %common.ent;                    -->
+<!--ELEM   ext-link     Defined in %common.ent;                    -->
+<!--ELEM   lpage        Defined in %common.ent;                    -->
+
+
+<!-- ============================================================= -->
+<!--                 PRODUCT REVIEW INFORMATION (PRODUCT METADATA) -->
+<!-- ============================================================= -->
+
+
+<!--                    PRODUCT ELEMENTS                           -->
+<!--                    Elements that may be used inside the 
+                        <product> element                          
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an inline
+                        mix, the OR bar is already there.          -->
+<!ENTITY % product-elements   
+                        "%simple-text; | %article-link.class; |  
+                         %break.class; |  %references.class; | 
+                         %simple-link.class;"                        > 
+
+
+<!--                    PRODUCT INFORMATION                        --> 
+<!--                    Used as a wrapper for metadata for a product
+                        (such as a book, software package, hardware
+                        component, website etc.) that is being 
+                        reviewed. 
+                        Authoring and Conversion Note: This element 
+                        should be used when the value of the 
+                        "article-type" attribute on the element 
+                        <article> is "book-review" or 
+                        "product-review".
+                        
+                        Authoring and Conversion Note: A review of 
+                        a book, journal, website, etc. should 
+                        include as much information about the item
+                        being reviewed as is practical, for example:
+                        <product>
+                            <name>
+                              <surname>Lapeyre</surname>
+                              <given-names>Deborah A.</given-names>
+                            </name>
+                            <name>
+                              <surname>Usdin</surname>
+                              <given-names>B. Tommie</given-names>
+                            </name>
+                            <source>Wildflowers of the
+                            Washington Area</source>
+                            <year>2002</year>
+                            <publisher-name>Lippman Ltd.
+                            </publisher-name>
+                            <publisher-loc>Bethesda, MD
+                            </publisher-loc>
+                            <isbn>0-23-8675-309</isbn>, 
+                            includes a CD-ROM, 
+                            <bold>$19.95</bold> (Used 12.50 when
+                            available)
+                         </product>
+                                                                   -->
+<!ELEMENT  product      (#PCDATA %product-elements;)*                >
+<!--         product-type
+                        Type of product being reviewed, for example:
+                        book, software package, journal or journal
+                        issue, website, film, hardware component,
+                        etc.
+              xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                -->
+<!ATTLIST  product
+             %product-atts;                                          >
+
+
+<!-- ============================================================= -->
+<!--                    PUBLICATION HISTORY ELEMENTS               -->
+<!-- ============================================================= -->
+
+
+<!--                    HISTORY MODEL                              -->
+<!--                    The content model for the <history> element
+                                                                   -->
+<!ENTITY % history-model   
+                        "(%date.class;)+"                            > 
+
+
+<!--                    HISTORY:  DOCUMENT HISTORY                 -->
+<!--                    Used as a container for dates related to the 
+                        processing history of the document, such as 
+                        received date and accepted date.           
+                        Authoring and Conversion Note: The dates 
+                        inside the <history> element are used to
+                        preserve events other than publication dates
+                        in the lifecycle of the article. Publication
+                        dates are considered to be an important
+                        part of the metadata. History dates include
+                        accepted date, received date, reviewed
+                        date, and other dates that may be important
+                        to the publisher but are not a likely part 
+                        of the article metadata for searching, 
+                        building a DOI, etc.                       -->
+<!ELEMENT  history      %history-model;                              >
+
+
+<!-- ============================================================= -->
+<!--                    FURTHER METADATA ELEMENTS                  -->
+<!-- ============================================================= -->
+
+
+<!--                    COPYRIGHT YEAR                             -->
+<!--                    Year of the copyright. Need not be used, if,
+                        for example, having the year as part of the 
+                        copyright statement is sufficient.
+                        Note: since Copyright Statement is intended
+                        for display, Copyright Year is not expected 
+                        to be displayed (but will be available for 
+                        searching).                                -->
+<!ELEMENT  copyright-year    
+                        (#PCDATA)                                    >
+
+
+<!--                    LICENSE MODEL                              -->
+<!--                    Content model for an <license> element     -->
+<!ENTITY % license-model
+                        "((%just-para.class;)+)"                     > 
+
+
+<!--                    LICENSE INFORMATION                        -->
+<!--                    The set of conditions under which people are
+                        allowed to use this article or other
+                        license-related information or restrictions.
+                        For now, this is a place-holder element
+                        that may change after the AIT Workgroup has
+                        done additional analysis                   -->
+<!ELEMENT  license      %license-model;                              >
+<!ATTLIST  license  
+             %license-atts;                                          >
+
+
+<!--                    SELF-URI ELEMENTS                          -->
+<!--                    Elements to be mixed with data characters
+                        inside the <self-uri> element              -->
+<!ENTITY % self-uri-elements
+                        " "                                          >
+ 
+
+<!--                    URI FOR THIS SAME ARTICLE ONLINE           -->
+<!--                    Sometimes an article is available in several
+                        forms, for example there is the version that
+                        was published in print and there is the same
+                        article (possibly expanded or with different
+                        graphics) available online.              
+                           The URI (such as a URL) may be used as a 
+                        live link, typically naming a website or the 
+                        element content may name the URL, e.g., and
+                        use the link attributes to hold the real link:
+                           <self-uri xlink:href="...">An expanded
+                           version of this article is available
+                           online</self-uri>                       -->
+<!ELEMENT  self-uri     (#PCDATA %self-uri-elements;)*               >
+<!ATTLIST  self-uri 
+             %might-link-atts;                                       >
+
+
+<!--ELEM   related-article       
+                        Defined in %common.ent;                    -->
+
+
+<!-- ============================================================= -->
+<!--                    ABSTRACTS                                  -->
+<!-- ============================================================= -->
+
+
+<!--                    ABSTRACT MODEL                             -->
+<!--                    Content model for an <abstract> element    -->
+<!ENTITY % abstract-model
+                        "((%id.class;)*, %sec-opt-title-model; )"    > 
+
+
+<!--                    ABSTRACT                                   -->
+<!ELEMENT  abstract     %abstract-model;                             >
+<!--         abstract-type   
+                        What type of abstract, for the various
+                        styles of abstracts that publishers
+                        identify, such as:
+                          ASCII        Without special characters or
+                                       equations so it can be sent in
+                                       email or displayed on primitive
+                                       browsers
+                           executive-summary 
+                                       A non-technical summation of
+                                       the major findings of the
+                                       article
+                           graphical   The abstract is a picture
+                           editor      For an abstract written by an
+                                       editor and not the author
+                           key-points  An abstract which is a list of
+                                       the key points made by the 
+                                       document
+                           objectives  Used for Learning Objectives 
+                                       or article objectives
+                           short       An abbreviated form of the
+                                       abstract, for use, for example
+                                       inside a generated Table of
+                                       Contents, or to be returned
+                                       in addition to the article 
+                                       title during a search
+                           stereochemical 
+                                       An abstract containing only
+                                       the details of a chemical
+                                       compound (For example, the
+                                       Elsevier DTD "stereochem")
+                           summary     Summation of the article,
+                                       typically used in conjunction
+                                       with other types of abstracts
+                           teaser      A short abstract specifically
+                                       written to draw the attention
+                                       of the reader
+                           toc         A line or two that displays
+                                       in a table of contents
+                           web-summary
+                                       Short summary intended for
+                                       distribution on a website   -->
+<!ATTLIST  abstract
+             %abstract-atts;                                         >
+
+
+<!--                    TRANSLATED ABSTRACT MODEL                  -->
+<!--                    Content model for an <trans-abstract> element.
+                        The section model include the parentheses. -->
+<!ENTITY % trans-abstract-model
+                        "%sec-opt-title-model;"                      > 
+
+
+<!--                    TRANSLATED ABSTRACT                        -->
+<!--                    An abstract that has been translated into
+                        another language                           -->
+<!ELEMENT  trans-abstract
+                        %trans-abstract-model;                       >
+<!--         abstract-type
+                        What type of abstract, for the various
+                        unusual styles of abstracts that publishers
+                        identify, such as "short" abstract, "graphic"
+                        abstract, "ASCII" abstract, "stereochemical"
+                        abstract, etc.                            
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  trans-abstract
+             %abstract-atts;                                         > 
+
+
+<!-- ============================================================= -->
+<!--                    KEYWORD ELEMENTS                           -->
+<!-- ============================================================= -->
+
+
+<!--                    KEYWORD GROUP MODEL                        -->
+<!--                    Content model for a <kwd-group> element    -->
+<!ENTITY % kwd-group-model
+                        "(title?, (%kwd.class;)+ )"                  > 
+
+
+<!--                    KEYWORD GROUP                              -->
+<!--                    Container element for one set of keywords 
+                        used to describe a document.
+                        Remarks: A document may have multiple sets
+                        of keywords, each with a source named in
+                        the "kwd-group-type" attribute.            -->
+<!ELEMENT  kwd-group    %kwd-group-model;                            >
+<!--         id         Unique identifier so the element may be
+                        referenced                                
+             kwd-group-type  
+                        Name of the source of the keywords, for
+                        example "MESH", "IEEE", "author",
+                        "ICD9-codes", etc.
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German).  These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  kwd-group
+             %kwd-group-atts;                                        >
+
+
+<!--ELEM   title        Defined in %common.ent;                    -->
+
+
+<!--                    KEYWORD CONTENT ELEMENTS                   -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a keyword.                                 -->
+<!ENTITY % kwd-elements
+                        "| %emphasis.class; | %inline-display.class; | 
+                         %inline-math.class; | %simple-link.class; |
+                         %subsup.class;"                             >
+
+
+<!--                    KEYWORD                                    -->
+<!--                    One subject term, critical expression, key
+                        phrase, abbreviation, indexing word, etc. 
+                        that is associated with the whole document
+                        and can be used for identification and 
+                        indexing purposes.
+                        There maybe several sets of keywords,
+                        identified by language or vocabulary source
+                        at the Keyword Group level <kwd-group>.
+                        Conversion Note: Keywords are not allowed to
+                        nest.  There are a few journal DTDs in which
+                        keyword nesting is used to simulate a two-
+                        part list.  These keyword list should be
+                        tagged as definition lists instead.        -->
+<!ELEMENT  kwd          (#PCDATA %kwd-elements;)*                    >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 -->
+<!ATTLIST  kwd
+             id         ID                                 #IMPLIED  >
+
+
+<!-- ============================================================= -->
+<!--                    STILL FURTHER ARTICLE METADATA             -->
+<!-- ============================================================= -->
+
+
+<!--                    CORRESPONDENCE INFORMATION ELEMENTS        -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the correspondence information.            -->
+<!ENTITY % corresp-elements 
+                        "| %address.class; | %address-link.class;| 
+                         %emphasis.class; | %label.class; | 
+                         %subsup.class;"                             >
+
+
+<!--                    CORRESPONDENCE INFORMATION                 -->
+<!--                    Optional element, used as a container for 
+                        information concerning which of the authors 
+                        (or other contributors) is the corresponding 
+                        contributor, to whom information requests 
+                        should be addressed.
+                        A cross-reference element may point to the
+                        identifier attribute.
+                        Authoring Note:  While this element 
+                        contains an optional Label element, the 
+                        Label element should be included only in 
+                        those circumstances where a formatting 
+                        override is needed; Label should NOT 
+                        be used in the ordinary course of 
+                        tagging.                                   -->
+<!ELEMENT  corresp      (#PCDATA %corresp-elements;)*                >
+<!--         id         Unique identifier, so the element can be
+                        referenced                                 -->
+<!ATTLIST  corresp
+             id         ID                                 #IMPLIED  >
+
+
+<!--                    PUBLICATION DATE                           -->
+<!--                    Date of publication or release of the
+                        material in one particular format.  Inside
+                        the article metadata, the Publication Date
+                        is allowed to repeat, and each date can take 
+                        a "pub-type" attribute to distinguish
+                        which form of release or publication.      -->
+<!ELEMENT  pub-date     %date-model;                                 >
+<!--                    Used to record the type of publication, that
+                        was released or published on this date, for
+                        example a print-only publication versus
+                        an electronic-only publication, in any of
+                        several life stages.
+                        Suggested values include:
+                          epub        - Electronic publication
+                          ppub        - Print publication
+                          epub-ppub   - Published in both print and
+                                        electronic form
+                          epreprint   - Electronic preprint 
+                                        dissemination    
+                          ppreprint   - Print preprint dissemination     
+                          ecorrected  - Corrected in electronic    
+                          pcorrected  - Corrected in print     
+                          eretracted  - Retracted in electronic    
+                          pretracted  - Retracted in print         -->
+<!ATTLIST  pub-date
+             %pub-date-atts;                                         >
+
+
+<!-- ============================================================= -->
+<!--                    CONTRACT/GRANT INFORMATION ELEMENTS        -->
+<!-- ============================================================= -->
+
+
+<!--                    CONTRACT/GRANT NUMBER ELEMENTS             -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the contract number.  
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an inline
+                        mix, the OR bar is already there.          -->
+<!ENTITY % contract-elements 
+                        "%simple-text;"                              >
+
+
+<!--                    CONTRACT/GRANT NUMBER                      -->
+<!--                    Contract or grant number of the supported
+                        work described in the article.
+                        Authoring Conversion Note: If an acronym of
+                        the sponsor is available, it should be
+                        inserted before the number.                -->
+<!ELEMENT  contract-num
+                        (#PCDATA %contract-elements;)*               >
+<!--         id         Unique identifier, so the contributor can be
+                        referenced
+             rid        May be used to point to, for example, link
+                        contract numbers and sponsors
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename associated with
+                        a grant or sponsor                         -->
+<!ATTLIST  contract-num
+             %contract-atts;                                         >
+
+
+<!--                    CONTRACT/GRANT SPONSOR ELEMENTS            -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the contract sponsor.         
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an online
+                        mix, the OR bar is already there.          -->
+<!ENTITY % contract-sponsor-elements 
+                        "%simple-text;"                              >
+  
+
+<!--                    CONTRACT/GRANT SPONSOR                     -->
+<!--                    Name of the organization that sponsored the
+                        work described in the article.
+                        proceedings.                               -->
+<!ELEMENT  contract-sponsor
+                        (#PCDATA %contract-sponsor-elements;)*       >
+<!--         id         Unique identifier, so the contributor can be
+                        referenced
+             rid        May be used to point to, for example, link
+                        contract numbers and sponsors
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename associated with
+                        a grant or sponsor                         -->
+<!ATTLIST  contract-sponsor
+             %contract-atts;                                         >
+
+
+<!-- ============================================================= -->
+<!--                    CONFERENCE INFORMATION ELEMENTS            -->
+<!-- ============================================================= -->
+
+
+<!--                    CONFERENCE MODEL                           -->
+<!--                    Content model for the <conference> element -->
+<!ENTITY % conference-model 
+                        "(%conference.class;)*"                      > 
+
+
+<!--                    CONFERENCE INFORMATION                     -->
+<!--                    The container element for the information 
+                        about a single conference and its 
+                        proceedings.
+                        DESIGN NOTE: Conference elements were largely
+                        based on Cross-Ref.                        -->
+<!ELEMENT  conference   %conference-model;                           >
+
+
+<!--ELEM   conf-date     Defined in %common.ent;                   -->
+<!--ELEM   conf-name     Defined in %common.ent;                   -->
+
+
+<!--                    CONFERENCE ACRONYM ELEMENTS                -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the conference acronym.   
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an online
+                        mix, the OR bar is already there.          -->
+<!ENTITY % conf-acronym-elements 
+                        "%simple-text;"                              >
+ 
+
+<!--                    CONFERENCE ACRONYM                         -->
+<!--                    The short name, popular name, or "jargon 
+                        name" for a conference, for example,
+                        "Extreme" for "Extreme Markup Languages" or
+                        "SIGGRAPH" for "Special Interest Group on 
+                        Computer Graphics". Provided for searching
+                        convenience when the jargon name is better
+                        known that the full formal conference name.
+                          Authoring and Conversion Note: A conference
+                        acronym often includes the year of the 
+                        conference (e.g. "SGML '97") or, less often, 
+                        the conference number (e.g., "AMBA 6"). It 
+                        is preferred, but not required, that the 
+                        acronym exclude this year or number (thus
+                        using "SGML" or "AMBA" and putting the year
+                        or number in the <conf-date> and 
+                        <conf-num> elements, respectively.         -->
+<!ELEMENT  conf-acronym (#PCDATA %conf-acronym-elements;)*           >
+
+
+<!--                    CONFERENCE NUMBER ELEMENTS                 -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the conference number.     
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an inline
+                        mix, the OR bar is already there.          -->
+<!ENTITY % conf-num-elements 
+                        "%simple-text;"                              >
+ 
+
+<!--                    CONFERENCE NUMBER                          -->
+<!--                    The sequential number of the conference.
+                        Authoring and Conversion Note: Ideally, the
+                        conference number should include only the 
+                        numeric portion of the number without any  
+                        suffixes or other text. For example, "The 
+                        19th XML/SGML Conference" should be tagged as 
+                           <conf-num>19</conf-num> 
+                        with the "th" dropped.                     -->
+<!ELEMENT  conf-num     (#PCDATA %conf-num-elements;)*               >
+
+
+<!--                    CONFERENCE SPONSOR  ELEMENTS               -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the conference sponsor. 
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an inline
+                        mix, the OR bar is already there.          -->
+<!ENTITY % conf-sponsor-elements 
+                        "%simple-text;"                              >
+ 
+
+<!--                    CONFERENCE SPONSOR                         -->
+<!--                    One organization that sponsored the 
+                        conference. If more than one organization
+                        sponsored the conference, multiple 
+                        <conf-sponsor> elements should be used.    -->
+<!ELEMENT  conf-sponsor (#PCDATA %conf-sponsor-elements;)*           >
+
+
+<!--                    CONFERENCE THEME ELEMENTS                  -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the conference theme.         
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an online
+                        mix, the OR bar is already there.          -->
+<!ENTITY % conf-theme-elements 
+                        "%simple-text;"                              >
+ 
+
+<!--                    CONFERENCE THEME                           -->
+<!--                    The theme, slogan, or major subject area of
+                        the conference.  For example, the name of an
+                        annual conference, may be "16th ACH Gathering"
+                        but each year has a different theme topic,
+                        such as "Database Integration" or "Topic
+                        Map Subject Access".                       -->
+<!ELEMENT  conf-theme   (#PCDATA %conf-theme-elements;)*             >
+
+
+<!-- ============================================================= -->
+<!--                    COUNTING INFORMATION (ARTICLE METADATA)    -->
+<!-- ============================================================= -->
+
+
+<!ENTITY % counts-model "(fig-count?, table-count?, equation-count?, 
+                         ref-count?, page-count?, word-count?)"      >
+
+
+<!--                    COUNTS                                     -->
+<!--                    Wrapper element to hold all metadata that 
+                        "counts how many of something appear in the 
+                        article                                    -->
+<!ELEMENT  counts       %counts-model;                               >
+
+
+<!--                     EQUATION COUNT                            -->
+<!--                     Number of display equations <disp-formula> 
+                         that appear in the article. Inline-equations 
+                         <inline-formula> are not counted. No 
+                         distinction is made between numbered and 
+                         unnumbered equations, both are counted.   -->
+<!ELEMENT  equation-count 
+                         EMPTY                                       >
+<!--                     The "count" attribute indicates the number
+                         of display equations <disp-formula> in the 
+                         article. Inline-equations <inline-formula> 
+                         are not counted. No distinction is made 
+                         between numbered and unnumbered equations, 
+                         both are counted.                         -->
+<!ATTLIST  equation-count
+            count        NMTOKEN                           #REQUIRED >
+
+
+<!--                     FIGURE COUNT                              -->
+<!--                     Number of Figures <fig> that appear in the
+                         article. Loose <graphic>s that appear
+                         outside figures are not counted.          -->
+<!ELEMENT  fig-count     EMPTY                                       >
+<!--                     The "count" attribute indicates the number
+                         of figures in the article, that is, how
+                         many <fig> elements are used              -->
+<!ATTLIST  fig-count
+            count        NMTOKEN                           #REQUIRED >
+
+
+<!--                     TABLE COUNT                               -->
+<!--                     Number of tables (Table Wrapper <table-wrap>
+                         elements that appear in the article. Arrays 
+                         are not counted as tables.                -->
+<!ELEMENT  table-count   EMPTY                                       >
+<!--                     The "count" attribute indicates the number
+                         of tables in the article, that is, how
+                         many <table-wrap> elements are used.      -->
+<!ATTLIST  table-count
+            count        NMTOKEN                           #REQUIRED >
+
+
+<!--                     REFERENCE COUNT                           -->
+<!--                     Number of reference citations <citation> 
+                         that appear in the bibliographic reference 
+                         list <ref-list> in the article            -->
+<!ELEMENT  ref-count     EMPTY                                       >
+<!--                     The "count" attribute indicates the number
+                         of reference citations in the article     -->
+<!ATTLIST  ref-count
+            count        NMTOKEN                           #REQUIRED >
+
+
+<!--                     PAGE COUNT                                -->
+<!--                     Number of pages in a print article, counting
+                         each page or partial page as one. Electronic
+                         articles do not have page counts.         -->
+<!ELEMENT  page-count    EMPTY                                       >
+<!--                     The "count" attribute indicates the number
+                         of pages in the article                   -->
+<!ATTLIST  page-count
+            count        NMTOKEN                           #REQUIRED >
+
+
+<!--                     WORD COUNT                                -->
+<!--                     Approximate number of words that appear in 
+                         the article                               -->
+<!ELEMENT  word-count    EMPTY                                       >
+<!--                     The "count" attribute indicates the number
+                         of  words in the textual portion of an 
+                         article (not including the words in the 
+                         metadata or header information)           -->
+<!ATTLIST  word-count
+            count        NMTOKEN                           #REQUIRED >
+
+
+
+<!-- ============================================================= -->
+<!--                    TITLE GROUP ELEMENTS (BIBLIOGRAPHIC)       -->
+<!-- ============================================================= -->
+
+
+<!--                    TITLE GROUP MODEL                          -->
+<!--                    Content model for the <title-group> element-->
+<!ENTITY % title-group-model   
+                        "(article-title, subtitle*, trans-title*, 
+                          alt-title*, fn-group?)"                    > 
+
+
+<!--                    TITLE GROUP                                -->
+<!--                    Wrapper element to hold the various article 
+                        titles.
+                        Authoring and Conversion Note: A footnote or 
+                        other reference inside a title should usually 
+                        be placed inside the title, but may be tagged
+                        in a group at the end of the <title-group>, 
+                        if that is the way they are in the source.)-->
+<!ELEMENT  title-group  %title-group-model;                          >
+
+
+<!--ELEM   article-title
+                        Defined in %common.ent;                    -->
+<!--ELEM   trans-title  Defined in %common.ent;                    -->
+<!--ENTITY %title-elements;   
+                        Defined in %common.ent;                    -->
+                                                             
+
+<!--                    ARTICLE SUBTITLE                           -->
+<!ELEMENT  subtitle     (#PCDATA %title-elements;)*                  >
+
+
+<!--                    ALTERNATE TITLE                            -->
+<!--                    A "different" version of an article title,
+                        usually created so that it can be processed 
+                        in a special way, for example a short 
+                        version of the title for use in a Table of 
+                        Contents, an ASCII title, a right-running-
+                        head title, etc.
+                        Authoring and Conversion Note: This element
+                        should not be used for either the translated 
+                        title (which is an alternate version of a 
+                        title in another language) or the subtitle
+                        (which is an addition to the title).       -->
+<!ELEMENT  alt-title    (#PCDATA %title-elements;)*                  >
+<!--         alt-title-type
+                        Why this title was created, for example,
+                        "short" for a short version of the title,
+                        "toc" for use in a Table of Contents, "ASCII" 
+                        for an ASCII title, "right-running" for
+                        a right-running-head title, etc.           -->
+<!ATTLIST  alt-title
+             %alt-title-atts;                                        >
+
+
+<!-- ============================================================= -->
+<!--                    CONTRIBUTOR GROUP (AUTHOR/EDITOR) ELEMENTS -->
+<!-- ============================================================= -->
+
+
+<!--                    CONTRIBUTOR GROUP MODEL                    -->
+<!--                    Content model for the <contrib-group> 
+                        element                                    -->
+<!ENTITY % contrib-group-model   
+                        "(contrib+, (%contrib-info.class;)* )"       > 
+
+
+<!--                    CONTRIBUTOR GROUP                          -->
+<!--                    Wrapper element for information concerning 
+                        a grouping of contributors, such as the
+                        primary authors                            -->
+<!ELEMENT  contrib-group
+                        %contrib-group-model;                        >
+
+
+<!--                    CONTRIBUTOR MODEL                          -->
+<!--                    Content model for the <contrib> element    -->
+<!ENTITY % contrib-model  
+                        "((%name.class;)*, 
+                          (%degree.class; | %contrib-info.class;)* )">
+
+
+<!--                    CONTRIBUTOR                                -->
+<!--                    Wrapper element to contain the information
+                        about a single contributor, for example an
+                        author or editor.                   
+                        Authoring and Conversion Note: Use <xref> 
+                        to point to the institution <aff>, the
+                        corresponding information <corresp>, and 
+                        any author footnotes in the author note
+                        group.
+                        Conversion Note: When the ranking or 
+                        importance of authors is marked as a note,
+                        (for example, the ranking element in the 
+                        Elsevier DTD), it should be encoded in the
+                        role element. Thus, the Elsevier-tagged
+                        text:
+                          <ranking><sup>*</sup></ranking>
+                        would be converted to:
+                          <role><sup>*</sup></role>
+                                                                   -->
+<!ELEMENT  contrib      %contrib-model;                              >
+<!--         contrib-type
+                        What was the contribution of this person,
+                        for example: author, editor, contributor,
+                        translator, illustrator, designer, 
+                        research assistant, etc.                 
+             id         Unique identifier, so the contributor can be
+                        referenced
+             corresp    Corresponding Author (Set to 'yes' if this 
+                        contributor is a corresponding author.)
+             equal-contrib    
+                        Contributed equally (Set to 'yes' if all 
+                        contributors contributed equally.) 
+             deceased   Deceased (Set to 'yes' if the contributor
+                        has died.)                           
+             rid        May be used to point to information concerning
+                        the contributor, for example to the <corresp>
+                        Corresponding Information element.  There
+                        is no limit to the number of contributors 
+                        that can be designated as corresponding.
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename.               -->
+<!ATTLIST  contrib
+             %contrib-atts;                                          >
+
+
+
+<!--ELEM  collab        Defined %common.ent; (also used in
+                        bibliographic citations)                   -->
+<!--ELEM  etal          Defined %common.ent; (also used in
+                        bibliographic citations)                   -->
+
+
+<!--                    DEGREE(S) ELEMENTS                         -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        <degrees>                             
+                        Design Note: -%just-rendition; begins with
+                        an OR bar, so this inline mix begins with
+                        an OR bar.                                 -->
+<!ENTITY % degrees-elements
+                        "%just-rendition;"                           >
+
+
+<!--                    DEGREE(S)                                  -->
+<!--                    Academic degrees or professional
+                        certifications                             -->
+<!ELEMENT  degrees      (#PCDATA %degrees-elements;)*                >
+
+
+<!--                    ON BEHALF OF CONTENT ELEMENTS              -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        <on-behalf-of>                             
+                        Design Note: -%rendition-plus; begins with
+                        an OR bar, so this inline mix beguines with
+                        an OR bar.                                 -->
+<!ENTITY % on-behalf-of-elements
+                        "%rendition-plus;"                           >
+
+
+<!--                    ON BEHALF OF                               -->
+<!--                    When a contributor has written or edited
+                        a work  "on-behalf-of" an organization or 
+                        group the contributor is acting as a
+                        representative of the organization, which 
+                        may or may not be his/her usual affiliation.
+                        Related elements: Some DTDs identify this
+                        category as a "collaboration", but it
+                        is really more akin to a "role" the person
+                        is playing with respect to the organization.
+                        Thus this element joins the related
+                        element <role> in describing the contribution
+                        of a contributor. The <role> element will
+                        say that a person is an "Editor" or 
+                        "Associate Editor" and the <on-behalf-of>
+                        element will state that this editing was
+                        undertaken as a representative of a group
+                        or organization:
+                        <contrib> 
+                             <name>
+                               <surname>Campion</surname>
+                               <given-names>M</given-names>
+                             </name>
+                             <on-behalf-of>for 
+                             the Multiple Sclerosis Collaborative 
+                             Research Group</on-behalf-of>
+                          </contrib>
+                           Conversion Note: In some DTDs, the 
+                        association "on-behalf-of" may have been 
+                        tagged as a <role> rather than as a
+                        specific <on-behalf-of>.
+                        It is therefore also acceptable although
+                        not as accurate) to tag the example as: 
+                           <contrib>
+                             <name>
+                               <surname>Campion</surname>
+                               <given-names>M</given-names>
+                             </name>
+                             <role>for the Multiple 
+                             Sclerosis Collaborative Research 
+                             Group</role>
+                           </contrib>                              -->
+<!ELEMENT  on-behalf-of    
+                        (#PCDATA %on-behalf-of-elements;)*           >
+
+
+<!--                    AUTHOR COMMENT MODEL                       -->
+<!--                    Content model for the <author-comment> 
+                        element                                    -->
+<!ENTITY % author-comment-model  
+                        "(title?, (%just-para.class;)+ )"            >
+
+
+<!--                    AUTHOR COMMENT                             -->
+<!--                    Used for extra textual material associated 
+                        with a contributor such as an author or
+                        editor
+                        Conversion Note: During conversion, this 
+                        element can act as an escape mechanism, to
+                        hold material not anticipated by the DTD.
+                        The base model is just paragraphs, but this
+                        model has been made into a Parameter Entity
+                        in case a section level is needed.         -->
+<!ELEMENT  author-comment    
+                        %author-comment-model;                       >
+
+
+<!-- ============================================================= -->
+<!--                    SUPPLEMENT ELEMENTS                        -->
+<!-- ============================================================= -->
+
+
+<!--                    SUPPLEMENT ELEMENTS                        -->
+<!--                    Elements for use in the <supplement> element
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an inline
+                        mix, the OR bar is already there.          -->
+<!ENTITY % supplement-elements 
+                        "%simple-text;"                              > 
+
+
+<!--                    SUPPLEMENT                                 -->
+<!--                    For a journal published as a supplement, this
+                        is a container element for all the provided
+                        supplement information, such as additional
+                        identification numbers, titles, and 
+                        supplement series information.             -->
+<!ELEMENT supplement    (#PCDATA %supplement-elements;)*             >
+<!--        supplement-type   
+                        Indicates what sort of supplement, if the
+                        publisher has recorded that information,
+                        for example: "issue" meaning a supplement to 
+                        a particular journal issue; "conference", 
+                        meaning the supplement contains information 
+                        from a conference, symposium, or other 
+                        gathering; "periodic" for supplements issued  
+                        at regular intervals, etc.                 -->
+<!ATTLIST supplement
+            %supplement-atts;                                        >
+
+
+<!-- ================== End Article Metadata Elements  =========== -->
diff --git a/code/lib/Bio/Entrez/DTDs/backmatter.ent b/code/lib/Bio/Entrez/DTDs/backmatter.ent
new file mode 100644
index 0000000..1ece324
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/backmatter.ent
@@ -0,0 +1,277 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Back Matter Elements                              -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Back Matter Elements v2.0 20040830//EN"
+     Delivered as file "backmatter.ent"                            -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Names elements that are not part of the main      -->
+<!--             textual flow of a work, but are considered to be  -->
+<!--             ancillary material.                               -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Parameter Entities for attribute lists         -->
+<!--             2) List of the back matter elements in            -->
+<!--                alphabetical order                             -->
+<!--                 - Acknowledgments <ack> (defined in           -->
+<!--                     %common.ent;)                             -->
+<!--                 - Appendix Matter <app-group>                 -->
+<!--                 - Footnote Group <fn-group>                   -->
+<!--                 - Glossary <glossary>                         -->
+<!--                 - Biography <bio> (defined in %common.ent;)   -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+        
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  5. COMPLETE MODELS WHEN OVER-RIDING A CONTENT MODEL 
+     (for all Parameter Entities suffixed "-model")
+     ### Customization Alert ###
+     Made all the model over-rides consistent. Some included
+     the outer parentheses, some did not. They all do now.
+     Added parentheses to Parameter Entity and removed them 
+     from the Element Declarations:
+     -  %app-model; 
+     -  %app-group-model; 
+     -  %fn-group-model;
+     -  %glossary-model;
+     -  %glossary-group-model;
+
+  4. DEFAULT CLASSES - Were moved from this module to 
+     %default-classes.ent;
+
+  3. APPENDIX GROUP - model rewritten to use the new class Parameter
+     Entities %app.class; and %ref-list.class;. Default entities
+     placed into this module.
+
+  2. To correct potential classing problems, added the following
+     new Parameter Entities:     
+        - %fn-group-model;
+        - %fn-link.class; used in %fn-group-model;
+        - %gloss-group-model; (and made it contain its parentheses)
+        - %gloss-model; 
+
+  1. FPI - Updated public identifier to "v2.0 20040830"            -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module, 
+                        usually accomplished in the Customization
+                        Module for the specific DTD:
+                        %para-level;
+                        %sec-model;                                -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR ATTRIBUTES LISTS    -->
+<!-- ============================================================= -->
+
+                                                                
+<!--                    APPENDIX ATTRIBUTES                        -->
+<!--                    Attributes for the Appendix <app> element  -->
+<!ENTITY % app-atts
+             "id        ID                                #IMPLIED"  >
+                                                                   
+
+<!-- ============================================================= -->
+<!--                    BACKMATTER ELEMENTS                        -->
+<!-- ============================================================= -->
+
+
+<!--ELEM   bio          Defined in %common.ent;                    -->
+<!--ELEM   ref-list     Defined in %references.ent;                -->
+<!--ELEM   sec          Defined in %section.ent;                   -->
+<!--ELEM   ack          Defined in %common.ent;                    -->
+
+
+<!-- ============================================================= -->
+<!--                    APPENDIX ELEMENTS                          -->
+<!-- ============================================================= -->
+
+
+<!--                    APPENDIX GROUP MODEL                       -->
+<!--                    Content model for the <app-group> element  
+                        Note: an exception is made to the full
+                        class-mix structure to allow <app>
+                        inside an <app-group> without requiring a
+                        class for it.                              -->
+<!ENTITY % app-group-model   
+                        "(title?, (%para-level;)*, 
+                          (%app.class; | %ref-list.class;)*)"        > 
+
+
+<!--                    APPENDIX GROUP                             -->
+<!ELEMENT  app-group    %app-group-model;                            >
+
+
+<!--                    APPENDIX MODEL                             -->
+<!--                    Content model for the <app> element. The
+                        section model already contains parentheses.-->
+<!ENTITY % app-model    "%sec-model;"                                > 
+
+
+<!--                    APPENDIX                                   -->
+<!ELEMENT  app          %app-model;                                  >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 -->
+<!ATTLIST  app
+             %app-atts;                                              >
+
+
+<!-- ============================================================= -->
+<!--                    FOOTNOTE GROUPING ELEMENTS                 -->
+<!-- ============================================================= -->
+
+
+<!--                    FOOTNOTE GROUP MODEL                       -->
+<!--                    Content model for the <fn-group> element   -->
+<!ENTITY % fn-group-model
+                        "(title?, (%fn-link.class;)+)"               >
+
+
+<!--                    FOOTNOTE GROUP                             -->
+<!--                    Footnotes in a journal article may be
+                        scattered throughout the text, at the places
+                        they occur, or collected in groups at the
+                        end of structural units.  This element is a
+                        wrapper tag for such groups of footnotes.  -->
+<!ELEMENT  fn-group     %fn-group-model;                             >
+
+
+<!-- ============================================================= -->
+<!--                    GLOSSARY                                   -->
+<!-- ============================================================= -->
+
+
+<!--                    GLOSSARY MODEL                             -->
+<!--                    Content model for the <glossary> element
+                        Note: an exception is made to the full
+                        class-mix structure to allow <gloss-group>
+                        inside an <glossary> without requiring a
+                        class for it.                              -->
+<!ENTITY % glossary-model
+                        "(title?, (%para-level;)*, gloss-group*)"    >
+                        
+                        
+<!--                    GLOSSARY ELEMENTS                          -->
+<!--                    Glossary or list of definitions.  Typically
+                        the content will be one or more Definition
+                        Lists.                                     -->
+<!ELEMENT  glossary     %glossary-model;                             >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 -->
+<!ATTLIST  glossary
+             id         ID                                 #IMPLIED  >
+
+
+<!--                    GLOSSARY GROUP MODEL                       -->
+<!--                    Content model for the <gloss-group> element-->
+<!ENTITY % gloss-group-model
+                        "(title?, (%para-level;)*)"                  > 
+
+
+<!--                    GLOSSARY GROUP                             -->
+<!--                    A (usually headed) group of glossary
+                        definitions.  The most likely content for this
+                        element is a paragraph of explanation,
+                        followed by a definition list.             -->
+<!ELEMENT  gloss-group  %gloss-group-model;                          >
+
+
+<!-- ================== End Back Matter Module =================== -->
diff --git a/code/lib/Bio/Entrez/DTDs/bookdoc_100301.dtd b/code/lib/Bio/Entrez/DTDs/bookdoc_100301.dtd
new file mode 100644
index 0000000..78e2bae
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/bookdoc_100301.dtd
@@ -0,0 +1,78 @@
+<!-- 
+
+		PubMed Books and Documents DTD
+		
+	This DTD was created in December 2009 to accommodate the loading of book and 
+	book chapter records to PubMed. 
+	
+	Use the PUBLIC Identifier
+	
+	"-//NLM-NCBI//DTD PubMed Books Documents, 12-16-2009 v.1//EN"
+	
+	Delivered as "bookdoc.dtd"
+
+	1/29/2010 - changed "FirstPage" to "StartPage"
+	2/3/2010 - added ArticleIdList to BookDocument and added bookaccession as value in %art.id.type.int;
+	
+	-->
+
+
+
+<!--ENTITY % personal.name "( ForeName?, LastName, Initials, Suffix? )">
+<!ENTITY % author.name "((%personal.name; | CollectiveName), Affiliation?)"-->
+
+<!--ENTITY % normal.date "Year,Month,Day,(Hour,(Minute,Second?)?)?"-->
+
+<!ENTITY % booklinkatts		
+			  "book		CDATA			#IMPLIED
+				part		CDATA			#IMPLIED
+				sec		CDATA			#IMPLIED"  >
+
+
+<!ELEMENT BookDocumentSet (BookDocument*, DeleteDocument?)>
+<!ELEMENT BookDocument ( PMID, ArticleIdList, Book, LocationLabel*, ArticleTitle?, VernacularTitle?,
+	Pagination?, Language*, AuthorList*, GroupList?, PublicationType*, Abstract?, Sections?, KeywordList*, 
+	ContributionDate?, DateRevised?, CitationString?, GrantList?) >
+
+<!ELEMENT Book ( Publisher, BookTitle, PubDate, BeginningDate?, EndingDate?, AuthorList*, Volume?, VolumeTitle?, 
+	Edition?, CollectionTitle?, Isbn*, ELocationID*, Medium?, ReportNumber?) >
+<!ELEMENT Publisher (PublisherName, PublisherLocation?) >
+<!ELEMENT PublisherName      %text; >
+<!ELEMENT PublisherLocation (#PCDATA) >
+<!ELEMENT BookTitle      %text; >
+<!ATTLIST BookTitle	%booklinkatts; >
+
+<!ELEMENT ContributionDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT BeginningDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT EndingDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT VolumeTitle %text; >
+<!ELEMENT Edition (#PCDATA) >
+<!ELEMENT CollectionTitle      %text; >
+<!ATTLIST CollectionTitle	%booklinkatts; >
+<!ELEMENT Isbn (#PCDATA) >
+<!ELEMENT Medium (#PCDATA) >
+<!ELEMENT ReportNumber (#PCDATA) >
+<!ELEMENT ContractNumber (#PCDATA) >
+<!ELEMENT CitationString      %text; >
+
+<!ATTLIST ArticleTitle	%booklinkatts; >
+
+
+<!ELEMENT LocationLabel		(#PCDATA)>
+<!ATTLIST LocationLabel
+			Type			(part|chapter|section|appendix|figure|table|box)			#IMPLIED >
+
+<!ATTLIST AuthorList Type ( authors | editors ) "authors">
+
+<!ELEMENT DeleteDocument (PMID*) >
+
+<!ELEMENT PubmedBookArticle (BookDocument, PubmedBookData?)>
+<!ELEMENT PubmedBookData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+
+<!ELEMENT Sections	(Section+) >
+<!ELEMENT Section	(LocationLabel?, SectionTitle, Section*) >
+
+<!ELEMENT SectionTitle	%text; >
+<!ATTLIST SectionTitle	%booklinkatts; >
+
+<!ELEMENT PubmedBookArticleSet (PubmedBookArticle)*>
diff --git a/code/lib/Bio/Entrez/DTDs/bookdoc_110101.dtd b/code/lib/Bio/Entrez/DTDs/bookdoc_110101.dtd
new file mode 100644
index 0000000..78e2bae
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/bookdoc_110101.dtd
@@ -0,0 +1,78 @@
+<!-- 
+
+		PubMed Books and Documents DTD
+		
+	This DTD was created in December 2009 to accommodate the loading of book and 
+	book chapter records to PubMed. 
+	
+	Use the PUBLIC Identifier
+	
+	"-//NLM-NCBI//DTD PubMed Books Documents, 12-16-2009 v.1//EN"
+	
+	Delivered as "bookdoc.dtd"
+
+	1/29/2010 - changed "FirstPage" to "StartPage"
+	2/3/2010 - added ArticleIdList to BookDocument and added bookaccession as value in %art.id.type.int;
+	
+	-->
+
+
+
+<!--ENTITY % personal.name "( ForeName?, LastName, Initials, Suffix? )">
+<!ENTITY % author.name "((%personal.name; | CollectiveName), Affiliation?)"-->
+
+<!--ENTITY % normal.date "Year,Month,Day,(Hour,(Minute,Second?)?)?"-->
+
+<!ENTITY % booklinkatts		
+			  "book		CDATA			#IMPLIED
+				part		CDATA			#IMPLIED
+				sec		CDATA			#IMPLIED"  >
+
+
+<!ELEMENT BookDocumentSet (BookDocument*, DeleteDocument?)>
+<!ELEMENT BookDocument ( PMID, ArticleIdList, Book, LocationLabel*, ArticleTitle?, VernacularTitle?,
+	Pagination?, Language*, AuthorList*, GroupList?, PublicationType*, Abstract?, Sections?, KeywordList*, 
+	ContributionDate?, DateRevised?, CitationString?, GrantList?) >
+
+<!ELEMENT Book ( Publisher, BookTitle, PubDate, BeginningDate?, EndingDate?, AuthorList*, Volume?, VolumeTitle?, 
+	Edition?, CollectionTitle?, Isbn*, ELocationID*, Medium?, ReportNumber?) >
+<!ELEMENT Publisher (PublisherName, PublisherLocation?) >
+<!ELEMENT PublisherName      %text; >
+<!ELEMENT PublisherLocation (#PCDATA) >
+<!ELEMENT BookTitle      %text; >
+<!ATTLIST BookTitle	%booklinkatts; >
+
+<!ELEMENT ContributionDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT BeginningDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT EndingDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT VolumeTitle %text; >
+<!ELEMENT Edition (#PCDATA) >
+<!ELEMENT CollectionTitle      %text; >
+<!ATTLIST CollectionTitle	%booklinkatts; >
+<!ELEMENT Isbn (#PCDATA) >
+<!ELEMENT Medium (#PCDATA) >
+<!ELEMENT ReportNumber (#PCDATA) >
+<!ELEMENT ContractNumber (#PCDATA) >
+<!ELEMENT CitationString      %text; >
+
+<!ATTLIST ArticleTitle	%booklinkatts; >
+
+
+<!ELEMENT LocationLabel		(#PCDATA)>
+<!ATTLIST LocationLabel
+			Type			(part|chapter|section|appendix|figure|table|box)			#IMPLIED >
+
+<!ATTLIST AuthorList Type ( authors | editors ) "authors">
+
+<!ELEMENT DeleteDocument (PMID*) >
+
+<!ELEMENT PubmedBookArticle (BookDocument, PubmedBookData?)>
+<!ELEMENT PubmedBookData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+
+<!ELEMENT Sections	(Section+) >
+<!ELEMENT Section	(LocationLabel?, SectionTitle, Section*) >
+
+<!ELEMENT SectionTitle	%text; >
+<!ATTLIST SectionTitle	%booklinkatts; >
+
+<!ELEMENT PubmedBookArticleSet (PubmedBookArticle)*>
diff --git a/code/lib/Bio/Entrez/DTDs/bookdoc_120101.dtd b/code/lib/Bio/Entrez/DTDs/bookdoc_120101.dtd
new file mode 100644
index 0000000..78e2bae
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/bookdoc_120101.dtd
@@ -0,0 +1,78 @@
+<!-- 
+
+		PubMed Books and Documents DTD
+		
+	This DTD was created in December 2009 to accommodate the loading of book and 
+	book chapter records to PubMed. 
+	
+	Use the PUBLIC Identifier
+	
+	"-//NLM-NCBI//DTD PubMed Books Documents, 12-16-2009 v.1//EN"
+	
+	Delivered as "bookdoc.dtd"
+
+	1/29/2010 - changed "FirstPage" to "StartPage"
+	2/3/2010 - added ArticleIdList to BookDocument and added bookaccession as value in %art.id.type.int;
+	
+	-->
+
+
+
+<!--ENTITY % personal.name "( ForeName?, LastName, Initials, Suffix? )">
+<!ENTITY % author.name "((%personal.name; | CollectiveName), Affiliation?)"-->
+
+<!--ENTITY % normal.date "Year,Month,Day,(Hour,(Minute,Second?)?)?"-->
+
+<!ENTITY % booklinkatts		
+			  "book		CDATA			#IMPLIED
+				part		CDATA			#IMPLIED
+				sec		CDATA			#IMPLIED"  >
+
+
+<!ELEMENT BookDocumentSet (BookDocument*, DeleteDocument?)>
+<!ELEMENT BookDocument ( PMID, ArticleIdList, Book, LocationLabel*, ArticleTitle?, VernacularTitle?,
+	Pagination?, Language*, AuthorList*, GroupList?, PublicationType*, Abstract?, Sections?, KeywordList*, 
+	ContributionDate?, DateRevised?, CitationString?, GrantList?) >
+
+<!ELEMENT Book ( Publisher, BookTitle, PubDate, BeginningDate?, EndingDate?, AuthorList*, Volume?, VolumeTitle?, 
+	Edition?, CollectionTitle?, Isbn*, ELocationID*, Medium?, ReportNumber?) >
+<!ELEMENT Publisher (PublisherName, PublisherLocation?) >
+<!ELEMENT PublisherName      %text; >
+<!ELEMENT PublisherLocation (#PCDATA) >
+<!ELEMENT BookTitle      %text; >
+<!ATTLIST BookTitle	%booklinkatts; >
+
+<!ELEMENT ContributionDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT BeginningDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT EndingDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT VolumeTitle %text; >
+<!ELEMENT Edition (#PCDATA) >
+<!ELEMENT CollectionTitle      %text; >
+<!ATTLIST CollectionTitle	%booklinkatts; >
+<!ELEMENT Isbn (#PCDATA) >
+<!ELEMENT Medium (#PCDATA) >
+<!ELEMENT ReportNumber (#PCDATA) >
+<!ELEMENT ContractNumber (#PCDATA) >
+<!ELEMENT CitationString      %text; >
+
+<!ATTLIST ArticleTitle	%booklinkatts; >
+
+
+<!ELEMENT LocationLabel		(#PCDATA)>
+<!ATTLIST LocationLabel
+			Type			(part|chapter|section|appendix|figure|table|box)			#IMPLIED >
+
+<!ATTLIST AuthorList Type ( authors | editors ) "authors">
+
+<!ELEMENT DeleteDocument (PMID*) >
+
+<!ELEMENT PubmedBookArticle (BookDocument, PubmedBookData?)>
+<!ELEMENT PubmedBookData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+
+<!ELEMENT Sections	(Section+) >
+<!ELEMENT Section	(LocationLabel?, SectionTitle, Section*) >
+
+<!ELEMENT SectionTitle	%text; >
+<!ATTLIST SectionTitle	%booklinkatts; >
+
+<!ELEMENT PubmedBookArticleSet (PubmedBookArticle)*>
diff --git a/code/lib/Bio/Entrez/DTDs/bookdoc_130101.dtd b/code/lib/Bio/Entrez/DTDs/bookdoc_130101.dtd
new file mode 100644
index 0000000..8a4f338
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/bookdoc_130101.dtd
@@ -0,0 +1,82 @@
+<!-- 
+
+		PubMed Books and Documents DTD
+		
+	This DTD was created in December 2009 to accommodate the loading of book and 
+	book chapter records to PubMed. 
+	
+	Use the PUBLIC Identifier
+	
+	"-//NLM-NCBI//DTD PubMed Books Documents, 12-16-2009 v.1//EN"
+	
+	Delivered as "bookdoc.dtd"
+
+	1/29/2010 - changed "FirstPage" to "StartPage"
+	2/3/2010 - added ArticleIdList to BookDocument and added bookaccession as value in %art.id.type.int;
+	
+	-->
+
+
+
+<!--ENTITY % personal.name "( ForeName?, LastName, Initials, Suffix? )">
+<!ENTITY % author.name "((%personal.name; | CollectiveName), Affiliation?)"-->
+
+<!--ENTITY % normal.date "Year,Month,Day,(Hour,(Minute,Second?)?)?"-->
+
+<!ENTITY % booklinkatts		
+			  "book		CDATA			#IMPLIED
+				part		CDATA			#IMPLIED
+				sec		CDATA			#IMPLIED"  >
+
+
+<!ELEMENT BookDocumentSet (BookDocument*, DeleteDocument?)>
+<!ELEMENT BookDocument ( PMID, ArticleIdList, Book, LocationLabel*, ArticleTitle?, VernacularTitle?,
+	Pagination?, Language*, AuthorList*, GroupList?, PublicationType*, Abstract?, Sections?, KeywordList*, 
+	ContributionDate?, DateRevised?, CitationString?, GrantList?, ItemList*) >
+
+<!ELEMENT Book ( Publisher, BookTitle, PubDate, BeginningDate?, EndingDate?, AuthorList*, Volume?, VolumeTitle?, 
+	Edition?, CollectionTitle?, Isbn*, ELocationID*, Medium?, ReportNumber?) >
+<!ELEMENT Publisher (PublisherName, PublisherLocation?) >
+<!ELEMENT PublisherName      %text; >
+<!ELEMENT PublisherLocation (#PCDATA) >
+<!ELEMENT BookTitle      %text; >
+<!ATTLIST BookTitle	%booklinkatts; >
+
+<!ELEMENT ContributionDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT BeginningDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT EndingDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT VolumeTitle %text; >
+<!ELEMENT Edition (#PCDATA) >
+<!ELEMENT CollectionTitle      %text; >
+<!ATTLIST CollectionTitle	%booklinkatts; >
+<!ELEMENT Isbn (#PCDATA) >
+<!ELEMENT Medium (#PCDATA) >
+<!ELEMENT ReportNumber (#PCDATA) >
+<!ELEMENT ContractNumber (#PCDATA) >
+<!ELEMENT CitationString      %text; >
+
+<!ATTLIST ArticleTitle	%booklinkatts; >
+
+
+<!ELEMENT LocationLabel		(#PCDATA)>
+<!ATTLIST LocationLabel
+			Type			(part|chapter|section|appendix|figure|table|box)			#IMPLIED >
+
+<!ATTLIST AuthorList Type ( authors | editors ) "authors">
+
+<!ELEMENT DeleteDocument (PMID*) >
+
+<!ELEMENT PubmedBookArticle (BookDocument, PubmedBookData?)>
+<!ELEMENT PubmedBookData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+
+<!ELEMENT Sections	(Section+) >
+<!ELEMENT Section	(LocationLabel?, SectionTitle, Section*) >
+
+<!ELEMENT SectionTitle	%text; >
+<!ATTLIST SectionTitle	%booklinkatts; >
+
+<!ELEMENT PubmedBookArticleSet (PubmedBookArticle)*>
+
+<!ELEMENT ItemList (Item)+>
+<!ATTLIST ItemList ListType CDATA #REQUIRED>
+<!ELEMENT Item (#PCDATA)>
diff --git a/code/lib/Bio/Entrez/DTDs/bookdoc_140101.dtd b/code/lib/Bio/Entrez/DTDs/bookdoc_140101.dtd
new file mode 100644
index 0000000..8a4f338
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/bookdoc_140101.dtd
@@ -0,0 +1,82 @@
+<!-- 
+
+		PubMed Books and Documents DTD
+		
+	This DTD was created in December 2009 to accommodate the loading of book and 
+	book chapter records to PubMed. 
+	
+	Use the PUBLIC Identifier
+	
+	"-//NLM-NCBI//DTD PubMed Books Documents, 12-16-2009 v.1//EN"
+	
+	Delivered as "bookdoc.dtd"
+
+	1/29/2010 - changed "FirstPage" to "StartPage"
+	2/3/2010 - added ArticleIdList to BookDocument and added bookaccession as value in %art.id.type.int;
+	
+	-->
+
+
+
+<!--ENTITY % personal.name "( ForeName?, LastName, Initials, Suffix? )">
+<!ENTITY % author.name "((%personal.name; | CollectiveName), Affiliation?)"-->
+
+<!--ENTITY % normal.date "Year,Month,Day,(Hour,(Minute,Second?)?)?"-->
+
+<!ENTITY % booklinkatts		
+			  "book		CDATA			#IMPLIED
+				part		CDATA			#IMPLIED
+				sec		CDATA			#IMPLIED"  >
+
+
+<!ELEMENT BookDocumentSet (BookDocument*, DeleteDocument?)>
+<!ELEMENT BookDocument ( PMID, ArticleIdList, Book, LocationLabel*, ArticleTitle?, VernacularTitle?,
+	Pagination?, Language*, AuthorList*, GroupList?, PublicationType*, Abstract?, Sections?, KeywordList*, 
+	ContributionDate?, DateRevised?, CitationString?, GrantList?, ItemList*) >
+
+<!ELEMENT Book ( Publisher, BookTitle, PubDate, BeginningDate?, EndingDate?, AuthorList*, Volume?, VolumeTitle?, 
+	Edition?, CollectionTitle?, Isbn*, ELocationID*, Medium?, ReportNumber?) >
+<!ELEMENT Publisher (PublisherName, PublisherLocation?) >
+<!ELEMENT PublisherName      %text; >
+<!ELEMENT PublisherLocation (#PCDATA) >
+<!ELEMENT BookTitle      %text; >
+<!ATTLIST BookTitle	%booklinkatts; >
+
+<!ELEMENT ContributionDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT BeginningDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT EndingDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT VolumeTitle %text; >
+<!ELEMENT Edition (#PCDATA) >
+<!ELEMENT CollectionTitle      %text; >
+<!ATTLIST CollectionTitle	%booklinkatts; >
+<!ELEMENT Isbn (#PCDATA) >
+<!ELEMENT Medium (#PCDATA) >
+<!ELEMENT ReportNumber (#PCDATA) >
+<!ELEMENT ContractNumber (#PCDATA) >
+<!ELEMENT CitationString      %text; >
+
+<!ATTLIST ArticleTitle	%booklinkatts; >
+
+
+<!ELEMENT LocationLabel		(#PCDATA)>
+<!ATTLIST LocationLabel
+			Type			(part|chapter|section|appendix|figure|table|box)			#IMPLIED >
+
+<!ATTLIST AuthorList Type ( authors | editors ) "authors">
+
+<!ELEMENT DeleteDocument (PMID*) >
+
+<!ELEMENT PubmedBookArticle (BookDocument, PubmedBookData?)>
+<!ELEMENT PubmedBookData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+
+<!ELEMENT Sections	(Section+) >
+<!ELEMENT Section	(LocationLabel?, SectionTitle, Section*) >
+
+<!ELEMENT SectionTitle	%text; >
+<!ATTLIST SectionTitle	%booklinkatts; >
+
+<!ELEMENT PubmedBookArticleSet (PubmedBookArticle)*>
+
+<!ELEMENT ItemList (Item)+>
+<!ATTLIST ItemList ListType CDATA #REQUIRED>
+<!ELEMENT Item (#PCDATA)>
diff --git a/code/lib/Bio/Entrez/DTDs/bookdoc_150101.dtd b/code/lib/Bio/Entrez/DTDs/bookdoc_150101.dtd
new file mode 100644
index 0000000..8a4f338
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/bookdoc_150101.dtd
@@ -0,0 +1,82 @@
+<!-- 
+
+		PubMed Books and Documents DTD
+		
+	This DTD was created in December 2009 to accommodate the loading of book and 
+	book chapter records to PubMed. 
+	
+	Use the PUBLIC Identifier
+	
+	"-//NLM-NCBI//DTD PubMed Books Documents, 12-16-2009 v.1//EN"
+	
+	Delivered as "bookdoc.dtd"
+
+	1/29/2010 - changed "FirstPage" to "StartPage"
+	2/3/2010 - added ArticleIdList to BookDocument and added bookaccession as value in %art.id.type.int;
+	
+	-->
+
+
+
+<!--ENTITY % personal.name "( ForeName?, LastName, Initials, Suffix? )">
+<!ENTITY % author.name "((%personal.name; | CollectiveName), Affiliation?)"-->
+
+<!--ENTITY % normal.date "Year,Month,Day,(Hour,(Minute,Second?)?)?"-->
+
+<!ENTITY % booklinkatts		
+			  "book		CDATA			#IMPLIED
+				part		CDATA			#IMPLIED
+				sec		CDATA			#IMPLIED"  >
+
+
+<!ELEMENT BookDocumentSet (BookDocument*, DeleteDocument?)>
+<!ELEMENT BookDocument ( PMID, ArticleIdList, Book, LocationLabel*, ArticleTitle?, VernacularTitle?,
+	Pagination?, Language*, AuthorList*, GroupList?, PublicationType*, Abstract?, Sections?, KeywordList*, 
+	ContributionDate?, DateRevised?, CitationString?, GrantList?, ItemList*) >
+
+<!ELEMENT Book ( Publisher, BookTitle, PubDate, BeginningDate?, EndingDate?, AuthorList*, Volume?, VolumeTitle?, 
+	Edition?, CollectionTitle?, Isbn*, ELocationID*, Medium?, ReportNumber?) >
+<!ELEMENT Publisher (PublisherName, PublisherLocation?) >
+<!ELEMENT PublisherName      %text; >
+<!ELEMENT PublisherLocation (#PCDATA) >
+<!ELEMENT BookTitle      %text; >
+<!ATTLIST BookTitle	%booklinkatts; >
+
+<!ELEMENT ContributionDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT BeginningDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT EndingDate ( Year, ((Month, Day?) | Season)? ) >
+<!ELEMENT VolumeTitle %text; >
+<!ELEMENT Edition (#PCDATA) >
+<!ELEMENT CollectionTitle      %text; >
+<!ATTLIST CollectionTitle	%booklinkatts; >
+<!ELEMENT Isbn (#PCDATA) >
+<!ELEMENT Medium (#PCDATA) >
+<!ELEMENT ReportNumber (#PCDATA) >
+<!ELEMENT ContractNumber (#PCDATA) >
+<!ELEMENT CitationString      %text; >
+
+<!ATTLIST ArticleTitle	%booklinkatts; >
+
+
+<!ELEMENT LocationLabel		(#PCDATA)>
+<!ATTLIST LocationLabel
+			Type			(part|chapter|section|appendix|figure|table|box)			#IMPLIED >
+
+<!ATTLIST AuthorList Type ( authors | editors ) "authors">
+
+<!ELEMENT DeleteDocument (PMID*) >
+
+<!ELEMENT PubmedBookArticle (BookDocument, PubmedBookData?)>
+<!ELEMENT PubmedBookData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+
+<!ELEMENT Sections	(Section+) >
+<!ELEMENT Section	(LocationLabel?, SectionTitle, Section*) >
+
+<!ELEMENT SectionTitle	%text; >
+<!ATTLIST SectionTitle	%booklinkatts; >
+
+<!ELEMENT PubmedBookArticleSet (PubmedBookArticle)*>
+
+<!ELEMENT ItemList (Item)+>
+<!ATTLIST ItemList ListType CDATA #REQUIRED>
+<!ELEMENT Item (#PCDATA)>
diff --git a/code/lib/Bio/Entrez/DTDs/chars.ent b/code/lib/Bio/Entrez/DTDs/chars.ent
new file mode 100644
index 0000000..19b6313
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/chars.ent
@@ -0,0 +1,359 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Custom Special Characters Module                  -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Custom Special Characters Module v2.0 20040830//EN"
+     Delivered as file "chars.ent"                                 -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    XML special character entities                    -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Definitions of DTD-specific and custom         -->
+<!--                special characters (as general entities        -->
+<!--                defined as hexadecimal or decimal character    -->
+<!--                entities - Unicode numbers)                    -->
+<!--                                                               -->
+<!-- REQUESTS FOR DTD CHANGES:                                     -->
+<!--             Send email to: pmc@ncbi.nlm.nih.gov               -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  2. Updated public identifier to "v2.0 20040830"          
+
+     =============================================================
+     Version 1.1                            (TRG) v1.1 (2003-11-01)
+
+  1. Modified content model of element <private-char> so that 
+     the element <inline-graphic> could be used multiple times.
+     Rationale: A series of <inline-graphic> elements may occur in 
+     the same content, especially if they are variants of one graphic, 
+     and may be linked, with only one variation being displayed.
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    DESIGN COMMENT                             -->
+<!-- ============================================================= -->
+<!--                    This DTD Suite has been designed with Unicode
+                        as the basic representation of all special
+                        characters. The use of combining characters 
+                        is supported and encouraged as is the use
+                        of entities defined by the STIX project
+                        (http://www.ams.org/STIX/). Unicode values
+                        in planes other than Plane 0 may be freely
+                        used.
+                        
+                        Use of private publisher entities and Unicode
+                        Private Use Area is discouraged, but supported
+                        with the <private-char> element, for which a
+                        corresponding bitmap must be submitted.
+                        
+                        In cases where an entity name has been generally
+                        accepted with a corresponding Unicode number
+                        and the entity has not been added to
+                        the ISO standard entity sets, a named entity
+                        may be defined below (e.g. &euro;).
+                        
+                        Because of the potential for conflicts in
+                        assignments by different publishers,
+                        the Archival and Interchange DTD Suite does 
+                        not support assignment of values in the
+                        Unicode Private Use Area.
+                        Publishers who have defined characters in the
+                        Private Use Area must remap those characters
+                        to existing Unicode values (using combining
+                        characters for special accented characters
+                        where appropriate), or must submit bitmaps of
+                        those characters using one of the two methods
+                        supported under the <private-char> element.
+                        
+                        Those custom publisher entities for which 
+                        corresponding Unicode values have not been 
+                        determined must be tagged with the 
+                        <private-char> element. Publishers must submit 
+                        bitmaps of those characters using one of the 
+                        two methods supported in the <private-char> 
+                        element.                                   -->
+
+<!-- ============================================================= -->
+<!--                    COMMONLY ACCEPTED ENTITIES FOR UNICODE 
+                        GLYPHS                                     -->
+<!-- ============================================================= -->
+
+<!--                    For each of the following entities a name
+                        and a Unicode numerical character reference
+                        is given. Where a unique Unicode character 
+                        could be determined, that character was used.
+                        For some of the symbols combining characters
+                        have been used. Do not use this space to
+                        redefine characters already found in standard
+                        ISO entity sets. Do not use this space to
+                        define any character that cannot be 
+                        represented with Unicode.                  -->
+
+<!--                    LATIN SMALL LETTER G WITH CARON            -->
+<!ENTITY  gcaron        "&#x01E7;"                                   >
+
+
+<!--                    LATIN CAPITAL LETTER H WITH MACRON         -->
+<!ENTITY  Hmacr         "&#x0048;&#x0304;"                           >
+
+
+<!--                    EURO CURRENCY                              -->
+<!ENTITY  euro          "&#x20AC;"                                   >
+
+
+<!--                    FRANC CURRENCY                             -->
+<!ENTITY  franc         "&#x20A3;"                                   >
+
+
+<!-- ============================================================= -->
+<!--                    PRIVATE USE AREA AND CUSTOM CHARACTERS     -->
+<!-- ============================================================= -->
+<!--
+                        Special characters defined by publishers as
+                        custom entities or in the Unicode Private Use
+                        Area may not be deposited as is. If they
+                        cannot be remapped to existing Unicode values,
+                        they must be submitted as a bitmap using
+                        the <private-char> element. The most 
+                        repository-friendly technique is <glyph-data> 
+                        although individual bitmap files may be 
+                        submitted with inline-graphic.
+
+                        We would like to thank Beacon Publishing and
+                        the APS (American Physical Society) for 
+                        providing us with this technique.          -->
+
+
+<!--                    PRIVATE CHARACTER (CUSTOM OR UNICODE)      -->
+<!--                    A custom character entity defined by a
+                        publisher or a custom character from the
+                        Unicode private-use area for which a bitmap
+                        is submitted for the glyph.
+
+                        Since there are no completely standard/public
+                        agreements on how such characters are to be
+                        named and displayed, this technique is to be
+                        used instead of a custom general entity 
+                        reference, to provide complete information 
+                        on the intended character.
+                        A document should contain a <private-char> 
+                        element at each location where a private 
+                        character is used within the document. The 
+                        corresponding image for the glyph may be 
+                        given in the <glyph-data> element or as an 
+                        external bitmap file referenced by an 
+                        <inline-graphic> element.
+
+                        Implementation Note: <inline-graphic> should
+                        only be used outside <private-char> when the
+                        graphic is something other than a special
+                        character.                                 -->
+<!ELEMENT  private-char ((glyph-data | glyph-ref) | inline-graphic*) >
+<!--         description
+                        A human-readable description of the
+                        character, for example, "Arrow, normal 
+                        weight, single line, two-headed, Northwest 
+                        to Southeast". 
+             name       Unique name for the character in all 
+                        uppercase ASCII, similar to names found 
+                        in Unicode standard (e.g., "NORTHWEST 
+                        SOUTHEAST ARROW"                           -->
+<!ATTLIST  private-char
+             description
+                        CDATA                              #IMPLIED
+             name       CDATA                              #IMPLIED  >
+
+
+<!--                    GLYPH DATA FOR A PRIVATE CHARACTER         -->
+<!--                    This element is used when there is known to
+                        be no font available to render the private
+                        character. The <glyph-data> element can be
+                        used to provide information on the actual
+                        glyph that is associated with the private-use
+                        character. The element includes an inline
+                        bitmap of the glyph encoded in plain
+                        PBM (Plain Bit Map) format so that it is
+                        human-readable.
+
+                        For example:
+                        <private-char name="NORTHWEST SOUTHEAST ARROW"
+                        description="Arrow, normal weight, single
+                        line, two-headed, Northwest to Southeast">
+                        <glyph-data format="PBM" resolution="300"
+                        x-size="34" y-size="34">
+                        0000000000000000000000000000000000
+                        0111111111111100000000000000000000
+                        0111111111111100000000000000000000
+                        0111110000000000000000000000000000
+                        0111110000000000000000000000000000
+                        0111111000000000000000000000000000
+                        0110111100000000000000000000000000
+                        0110011110000000000000000000000000
+                        0110001111000000000000000000000000
+                        0110000111100000000000000000000000
+                        0110000011110000000000000000000000
+                        0110000001111000000000000000000000
+                        0110000000111100000000000000000000
+                        0110000000011110000000000000000000
+                        0110000000001111000000000000000000
+                        0110000000000111100000000000000000
+                        0110000000000011110000000000000000
+                        0000000000000001111000000000000000
+                        0000000000000000111100000000000110
+                        0000000000000000011110000000000110
+                        0000000000000000001111000000000110
+                        0000000000000000000111100000000110
+                        0000000000000000000011110000000110
+                        0000000000000000000001111000000110
+                        0000000000000000000000111100000110
+                        0000000000000000000000011110000110
+                        0000000000000000000000001111000110
+                        0000000000000000000000000111100110
+                        0000000000000000000000000011110110
+                        0000000000000000000000000001111110
+                        0000000000000000000000000001111110
+                        0000000000000000011111111111111110
+                        0000000000000000011111111111111110
+                        0000000000000000000000000000000000
+                        </glyph-data></private-char>               -->
+<!ELEMENT  glyph-data   (#PCDATA)                                    >
+<!--         id         Identifier so that the full glyph data need
+                        not be repeated every time the character is
+                        used. The <glyph-ref> element can be used
+                        to point to this ID, to reuse a character
+                        in subsequent text. If a character is not
+                        reused, it need not be given an "id"
+                        attribute.
+             format     An optional attribute that names the image
+                        format of the bitmap. Should be "PBM" if the
+                        plain bitmap is included inline, so that the
+                        plain text bitmap will be a human-readable
+                        matrix of zeros and ones.
+             resolution Resolution of the bitmap in dots per inch,
+                        expressed as a decimal integer (e.g. 72, 300)
+             x-size     Number of pixels per row in the bit-mapped 
+                        glyph
+             y-size     Number of rows of the bit-mapped glyph     -->
+<!ATTLIST  glyph-data
+             id         ID                                 #IMPLIED
+             format     NMTOKEN                            #IMPLIED
+             resolution CDATA                              #IMPLIED
+             x-size     CDATA                              #IMPLIED
+             y-size     CDATA                              #IMPLIED  >
+
+
+<!--                    GLYPH REFERENCE FOR A PRIVATE CHARACTER    -->
+<!--                    Once a private character has been declared
+                        using a <glyph-data> element, the character
+                        can be reused by using this element to 
+                        point to the full <glyph-data> element.
+                        The pointing uses the ID/IDREF mechanism,
+                        using the "glyph-data" attribute of this
+                        element to point to the "id" attribute of 
+                        another <glyph-data> element.              -->
+<!ELEMENT  glyph-ref    EMPTY                                        >
+<!--         glyph-data An IDREF-type attribute that points to the
+                        "id" attribute of a <glyph-data> character.
+                        The idea is to use the full glyph data once,
+                        then point to an existing character instead 
+                        of repeating the entire glyph data again.  -->
+<!ATTLIST  glyph-ref
+             glyph-data IDREF                              #IMPLIED  >
+
+<!-- ================== End Custom XML Special Characters ======== -->
diff --git a/code/lib/Bio/Entrez/DTDs/common.ent b/code/lib/Bio/Entrez/DTDs/common.ent
new file mode 100644
index 0000000..c1907d6
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/common.ent
@@ -0,0 +1,2790 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Common (Shared) Elements Module                   -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Common (Shared) Elements Module v2.0 20040830//EN"
+Delivered as file "common.ent"                                     -->
+<!-- ============================================================= -->
+
+
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Defines the common parameter entities, calls the  -->
+<!--             shared modules (such as special characters and    -->
+<!--             notations) and provides declarations for elements -->
+<!--             that do not properly fit into one class, since    -->
+<!--             they can be used at more than one structural level-->
+<!--                                                               -->
+<!-- CONTAINS:   1) Default classes for elements defined in this   -->
+<!--                module.  May be overridden by values in the    -->
+<!--                DTD's Customization Module                     -->
+<!--             2) Parameter Entities for attribute values        -->
+<!--             3) Parameter Entities for content models          -->
+<!--             4) Parameter Entities for attribute lists         -->
+<!--             5) Elements used in many modules/classes          -->
+<!--                  - Address elements                           -->
+<!--                  - Date elements                              -->
+<!--                  - Personal name elements                     -->
+<!--                  - Common metadata elements (shared by both   -->
+<!--                      article metadata and bibliographic       -->
+<!--                      reference metadata                       -->
+<!--                                                               -->
+<!-- MODULES REQUIRED:                                             -->
+<!--             1) Standard XML Special Characters Module         -->
+<!--                                       (%xmlspecchars.ent;)    -->
+<!--             2) Custom XML Special Characters (%chars.ent;)    -->
+<!--             3) Notation Declarations    (%notat.ent;)         -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+     
+ 29. NEW ELEMENT <x> GENERATED TEXT AND PUNCTUATION
+     Added a container element to hold punctuation or other 
+     generated text, typically when 1) an archive decides not 
+     to have any text generated and thus to pre-generate such 
+     things as commas or semicolons between keywords or 2) 
+     when an archive receives text with <x> tags embedded and 
+     wishes to retain them.
+
+ 28. PUBLISHER - Moved element here from %journalmeta.ent;
+     module since Book DTD also needed it and was not including
+     that module.
+
+ 27. PAGE ELEMENT ATTRIBUTES
+     a. Made an Parameter Entity for:
+        - %fpage-atts;
+        - %elocation-id-atts;
+     b. Gave %elocation-id-atts" the attribute "seq"
+        
+ 26. PUB ID TYPES - Added the new attribute values to the
+     Parameter Entity %pub-id-types;, which is used on <article>
+     and other elements. New values: pmcid and art-access-id
+
+ 25. CUSTOM METADATA - Added the new <custom-meta> element, its
+     components, and its wrapper. This element is used to insert
+     name/value pairs for metadata elements that are in source
+     material but were never envisioned by this DTD. Allowed this
+     element at the end of <article-meta>.
+
+ 24. DATES 
+     a. Made a new Parameter Entity %string-date-elements; to
+        hold all the elements that may be mixed with #PCDATA
+        inside a <string-date>
+     b. Also used %date-parts.class; and %x.class; in 
+        the new PE %string-date-elements;
+
+ 23. COMPLETE MODELS WHEN OVER-RIDING A MODEL 
+     (for all Parameter Entities suffixed "-model")
+     ### Customization Alert ###
+     Made all the model over-rides consistent. Some included
+     the outer parentheses, some did not. They all do now,
+     as this is the most flexible system, allowing for
+     #PCDATA, mixed, or element content. (This is in direct
+     contrast to the "-element" suffixed models, which are
+     designed to prohibit element content and permit only
+     #PCDATA or mixed content.) Added parentheses to Parameter
+     Entity and removed them from the Element Declaration.
+     - %custom-meta-model;
+     - %custom-meta-wrap-model;
+     - %def-model;
+     - %sec-model;
+     - %sec-opt-title-model;
+     - Removed parentheses from <notes>, <ack>, <address>, <date>
+
+ 22. CITATION MODEL / COPYRIGHT STATEMENT MODEL
+     ### Customization Alert ###
+     a. Replaced the Parameter Entity %copyright-statement-model; 
+        with the OR list Parameter Entity 
+        %copyright-statement-elements; and used that
+        within the content model of <copyright-statement>
+     b. Citation - In the PE %citation-elements; also
+        changed the PE -%ext-links.class; ==> -%address-links.class;
+        (Changed the content model of citation to match.)
+     c. In -%copyright-statement-elements;, replaced the mix 
+        -%rendition-plus; with its  constituent classes
+     d. In -%citation-elements;, replaced the mix -%simple-text;
+        with its constituent classes (with exception of 
+        %address-link.class;).
+
+ 21. RELATED ARTICLE
+     ### Customization Alert ###
+      - Renamed 
+         -%related-article-model; ==> -%related-article-elements;
+          (To match the DTD naming convention that full content 
+          models are named with a "-model" suffix and groups of 
+          elements to be added to #PCDATA for a particular
+          content model are named "-elements".)
+      - Started "-related-article-elements" PE with an OR Bar
+      - Deleted "ext-links.class", because these elements are 
+        already in the %references.class; (no model change)
+
+ 20. INLINE MATH - <inline-formula> added to:
+     - %label-elements; 
+
+ 19. RELATED ARTICLE ATTRIBUTES - Added the following attributes
+     to the element <related-article>:
+     a.  "id" - to provide a unique identifier
+     b.  "alternate-form-of", which works (similarly to the same 
+         attribute when used on a <graphic> element) to point to 
+         another <related-article> element within the same document 
+         as an alternate form of the related article.
+
+ 18. ISSUE TITLE <issue-title> - Created new element for a theme or
+     special issue title
+        - Defined here in the common module
+        - Added to <article-meta>
+        - Added to %references.class; (therefore to <citation>,
+          <product>, and <related-article>
+        - Created new Parameter Entities %issue-title-elements; 
+          which hold the OR-group of elements that may be mixed with 
+          #PCDATA inside the content model 
+
+ 17. ATTRIBUTION
+      - Moved <attrib> and its Parameter Entities from the paragraph
+        module to here, as it is numerous elements, such as <array>, 
+        <boxed-text>, <fig>, <table-wrap>, etc., in the Archival DTD.
+
+ 16. CUSTOM METADATA - Added the new <custom-meta> element, its
+     components, and its wrapper. This element is used to insert
+     name/value pairs for metadata elements that are in source
+     material but were never envisioned by this DTD. Allowed this
+     element at the end of <article-meta> and <journal-meta>.
+
+ 15. EMAIL- Considered to be just another type of external link, as
+     <ext-link> is, so added to:
+     - %collab-elements;
+     - %copyright-statement-elements;
+
+ 14. DATES 
+     a. Added new class %date-parts.class; to hold all the
+        potential components of date, such as <year>, <day>,
+        etc.
+     b. Made a new Parameter Entity %string-date-elements; to
+        hold all the elements that may be mixed with #PCDATA
+        inside a <string-date>
+     c. Used %date-parts.class; in that model.
+
+ 13. ROLE ELEMENT - Was added to the model for <citation>, thus
+     moved to this module from the article metadata module.
+
+
+ 12. NEW IDENTIFICATION ELEMENTS - in view of the larger role that 
+     some publishers are giving to DOIs:
+
+     a. OBJECT ID  -  Added a new class Parameter Entity 
+        %id.class; to hold the new element <object-id> Object 
+        Identifier. The <object-id> element is used to capture any 
+        publisher's or archive's ID, such as a DOI. This was modeled 
+        as an element (rather than as an attribute) to allow for 
+        multiple  IDs. New PE used in:
+          - %references.class (therefore inside <citation>, 
+             <related-article>, and <product>)
+          - metadata objects such as <abstract>
+          - display objects such as <figure>.
+        Added PE %object-id-atts; to hold the element's attributes
+        
+     b. ISSUE ID <issue-id> - new element for an identifier such 
+        as a DOI associated with a journal issue (as opposed to the 
+        existing element <issue>, which is defined as the issue 
+        number) 
+        - Defined here in the common module
+        - Added to <article-meta>
+        - Added to %references.class; (therefore to <citation>,
+          <product>, and <related-article>
+        - Created PE %issue-id-atts; to hold the attributes.
+
+     c. VOLUME ID <volume-id> - new element for an identifier such 
+        as a DOI associated with a volume of a journal (as opposed 
+        to the  existing element <volume>, which is defined as the 
+        volume number) 
+        - Defined here in the common module
+        - Added to <article-meta>
+        - Added to %references.class; (therefore to <citation>,
+          <product>, and <related-article>
+        - Created new Parameter Entities %volume-id-elements; 
+          which hold the OR-group of elements that may be mixed with 
+          #PCDATA inside the content model 
+        - Added PE %volume-id-atts; to hold the attributes.
+
+     
+     Note: All ID attributes for these new identification
+     elements was set to use the "pub-id-type" attribute, 
+     which is for Archival (Green) set as a CDATA attribute.
+
+ 11. NAME CLASS / STRING NAME / PERSON NAME CLASS
+     a. Created a new element <string-name> for names that
+        do not follow the former, strict personal name model.
+        <string-name> holds any combination of #PCDATA and any of 
+        the known personal-name elements such as <surname>.
+        - Used a new Parameter Entity %string-name-elements;
+          in the content model
+     
+     b. Created a new class %name.class; to hold all the ways
+        to name people: <name>, <string-name>, and <collab>
+        who produce products or articles.
+
+     c. Allowed <string-name> to be used anywhere <name> is
+        used. Inside
+         - citation (by adding to %references.class;)
+         - product  (by adding to %references.class;)
+
+     d. Added new attribute "initials" to the personal name
+        components: <surname> and <given-names>. This provides
+        a way to preserve the initials, for matching services
+        that prefer initials to full names. Since there are
+        many cases in which the initials cannot be determined
+        programmatically, we merely provide a bucket to record
+        them.
+        
+     e. Added new Parameter Entities %given-name-atts; and
+        %surname-atts; to hold the new "initials" attribute.
+
+ 10. ATTRIBUTE VALUE LISTS - The Green (Archival) DTD needed to
+     change all explicit attribute value lists in the DTD to CDATA. 
+     In the future, explicit list types will be a feature of the
+     Blue (Publishing) and new Authoring DTDs. Therefore, added
+     Parameter entities to hold the following attributes:
+       - date-type
+
+  9. RELATED ARTICLE ATTRIBUTES - Added the following attributes
+     to the element <related-article>:
+      - "id ID #IMPLIED" attribute, so the related-article
+        can be referenced.
+      - "ext-link-type" - to indicate the type of link used to 
+        point to the related article. Attribute was used with 
+        exactly the same content (CDATA) and suggested values 
+        as when used with the element <ext-link>.
+      - "issue" - used (along with "vol", "page", and "journal-id")
+         to provide metadata concerning the related article
+      - "journal-id"- used (along with "vol", "page", and "issue")
+         to provide metadata concerning the related article.
+      - "journal-id-type" - Performs the same function that this
+        attribute performs for the element <journal-id>. The
+        "journal-id"  values are the same as those for existing 
+        journal identifiers plus "issn".
+      - "alternate-form-of", which works (similarly to the same 
+        attribute when used on a <graphic> element) to point to 
+        another <related-article> element within the same document 
+        as an alternate form of the related article.
+     
+  8. PARAMETER ENTITY CLEANUP AND REGULARIZATION
+
+     a. RENAME ELEMENT MIXES NOT TO END IN "-elements", since that
+        suffix is reserved for mixes that are added to #PCDATA in
+        a particular element
+        ### Customization Alert ###
+        Names ending in "-elements" saved for element-specific mixes
+        that will be added to #PCDATA for one element
+        -  %sec-back-matter-elements; ==> %sec-back-matter-mix;
+               (Used in -%sec-model; and -%sec-opt-title-model;)
+     
+     b. NEW CLASSES - Largely for classing and modification reasons,
+        the following new classes were added:
+        - %date.class;
+        - %def.class;
+        - %degree.class;
+        - %id.class; 
+        - Removed duplicate %person-name.class;
+
+     c. NEW MIXES - To correct potential classing problems, new 
+        Parameter Entities were created. The following content 
+        models were changed to use these new entities.
+        - <etal>           -%etal-elements;
+        - <ext-link>       -%ext-link-elements;
+        - <fax>            -%fax-elements;
+        - <given-names>    -%given-names-elements;
+        - <issn>           -%issn-elements;
+        - <issue>          -%issue-elements;
+        - <label>          -%label-elements;
+        - <long-desc>      -%long-desc-elements;
+        - <phone>          -%phone-elements;
+        - <prefix>         -%prefix-elements;
+        - <publisher-name> -%publisher-name-elements;
+        - <role>           -%role-elements;
+        - <suffix>         -%suffix-elements;
+        - <surname>        -%surname-elements;
+        - <uri>            -%uri-elements;
+        - <volume>         -%volume-elements;
+        - <volume-id>      -%volume-id-elements;
+
+        - <bio>      %bio-model; (which uses %just-para.class;)
+        - <def>      %def-model;
+        - <def>      %just-para.class;
+ 
+     d. RENAME CLASSES
+        ### Customization Alert ###
+        Not all classes ended in the ".class" suffix. Changed the 
+        following to add the class suffix:
+        - %address-elements; (renamed -%address.class;) used in 
+            - %aff-elements; 
+            - %publisher-loc-elements;
+            - %address-model;
+        - %inline-math.class; used in -%label-elements; 
+ 
+     e. PE %attrib-elements;
+        ### Customization Alert ###
+        Uses %emphasized-text; which now starts with an OR bar, so
+        removed the OR bar from this PE.
+
+     f. LINK CLASSES
+        - In %aff-elements;, replaced %link.class; with
+          the following classes (via use of %all-phrase;)
+          (no DTD change):
+            - %address-link.class;  (external used in addresses)
+            - %simple-link.class;   (the internal links, same)
+            - %article-link.class;  (for journal articles)
+       -  In -%collab-elements;
+            - Replaced -%ext-links.class; with the following class
+              (via use of %all-phrase;)
+              (no DTD change):
+              - address-link.class; (external used in addresses)
+            - Deleted -%inpara-address; 
+              (No model change, %address-link.class; covers it.)
+       -  In %copyright-statement-elements;, replaced 
+          -%inpara-address; with -%address-link.class;
+          (via use of %all-phrase;)
+
+  7. DEFAULT CLASSES - Were moved from this module to 
+     %default-classes.ent;
+
+  6. Updated public identifier to "v2.0 20040830" 
+     
+     =============================================================
+     Version 1.1                           (TRG) v1.1 (2003-11-01)
+
+  5. Made <copyright-statement> model into a parameter entity in 
+     order to add <ext-link> to the content model.
+     Rationale:  Permit linkage between copyright statement and 
+     copyright holder's site.
+
+  4. Added attribute "content-type" to element <ack>. 
+     Rationale: To identify and preserve the semantic intent of 
+     semantically rich source documents.
+               
+  3. Added ID attribute to the following elements:
+       - <ack> 
+       - <address> 
+       - <alt-text>
+       - <ext-link> (by modifying %ext-link-atts;)
+       - <institution>
+       - <long-desc> 
+       - <notes> (by modifying parameter entity %notes-atts;)
+     Rationale: Provide unique identifier so these elements can be 
+     linked to.  
+     
+  2. Added element <label> to content model of <aff> (by adding
+     -%label.class; to parameter -%aff-elements;)
+     Rationale: To provide <label>  when needed for format 
+     over-ride.
+     
+     =============================================================
+     Version 1.0 Changes Before Public Release
+                                       (Lapeyre) v1.0 (2002-12-25)
+
+  1. ETAL - Changed the content model of <etal> to replace the
+     entire content model with a Parameter Entity, not just name 
+     some elements that may mix with #PCDATA. Rationale: The 
+     Authoring DTD would like to make <etal> EMPTY.         
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module, 
+                        usually accomplished in the Customization
+                        Module for the specific DTD:
+                        - %address.class;
+                        - %break.class;
+                        - %emphasis.class;
+                        - %inline-display.class;
+                        - %just-rendition;
+                        - %label.class;
+                        - %para-level;
+                        - %references.class;
+                        - %rendition-plus;
+                        - %sec-back-matter-mix;
+                        - %sec-level;
+                        - %sec-opt-title-model;
+                        - %simple-link.class;
+                        - %simple-phrase;
+                        - %simple-text;
+                        - %subsup.class;
+                        - %might-link-atts;                        -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR ATTRIBUTE VALUES    -->
+<!-- ============================================================= -->
+
+
+<!--                    HISTORY DATE TYPES                         -->
+<!--                    The <history> element contains one or more 
+                        <date> elements that record events in the 
+                        life of a publication.          
+          date-type     Attribute should only be used if the date
+                        is one of the known types; otherwise omit
+                        the attribute. 
+                        Values are:
+                          accepted    - Date manuscript was 
+                                        accepted         
+                          received    - Date manuscript received
+                          rev-request - Date revisions were 
+                                        requested or manuscript 
+                                        was returned
+                          rev-recd    - Date revised manuscript 
+                                        was received               -->
+<!ENTITY % date-types   "accepted | received | rev-request | 
+                         rev-recd"                                   >
+
+
+<!--                    ARTICLE/PUBLICATION IDENTIFIER TYPES       -->
+<!--                    The "pub-id-type" attribute names the
+                        type of identifier, or the organization or 
+                        system that defined this identifier for the 
+                        identifier of the journal article or a 
+                        cited publication.
+                           Used on the <article-id> element, which 
+                        holds an identifier for the entire article.  
+                           Also used on the <pub-id> element, which 
+                        is an identifier for a publication cited in 
+                        a bibliographic reference (citation).
+                        Valid Types include:
+                          art-access-id
+                                 - Generic article accession id for
+                                   interchange and retrieval between
+                                   archives
+                          coden  - Obsolete PDB/CCDC identifier (may
+                                   be present on older articles)
+                                   be present on older articles)
+                          doi    - Digital Object Identifier for
+                                   the article
+                          medline- NLM Medline identifier
+                          other  - None of the named identifiers
+                          pii    - Publisher Item Identifier, see
+                                    /epub/piius.htm
+                          pmcid  - PubMed Central identifier        
+                          pmid   - PubMed ID (see
+                                   www.ncbi.nlm.nih.gov/entrez/
+                                   query.fcgi?db=PubMed)         
+                          publisher-id - 
+                                   Publisher's identifying number 
+                                   such as an 'article-id', 'artnum',
+                                   'identifier', 'article- number', 
+                                   etc.
+                          sici   - Serial Item and Contribution 
+                                   Identifier (SICI). A journal 
+                                   article may have more than one 
+                                   SICI, one for a print version and
+                                   one for an electronic version.  -->
+<!ENTITY % pub-id-types  "art-access-id | coden | doi | medline | 
+                          other | pii | pmcid |  pmid | 
+                          publisher-id | sici"                       >
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR COMMENT MODELS      -->
+<!-- ============================================================= -->
+
+
+<!--                    DATE ELEMENTS MODEL                        -->
+<!--                    The content models for elements that describe
+                        dates, such as Publication Date <pub-date>
+                        and History Dates <date>.  The <string-date>
+                        element holds dates for which months and 
+                        years are not given, for example "first 
+                        quarter", "spring", etc.                   -->
+<!ENTITY % date-model   "( ( (day?, month?) | season)?,
+                          year?, string-date?)"                      >
+
+
+<!--                    CONTENT MODEL FOR A STRUCTURAL SECTION     -->
+<!--                    The model for a section that requires that a
+                        section title be present, used for elements
+                        such as Section and Appendix.              -->
+<!ENTITY % sec-model    "(label?, title, (%para-level;)*, 
+                          (%sec-level;)*, 
+                          (%sec-back-matter-mix;)* )"                >
+
+
+<!--                    CONTENT MODEL FOR AN UNTITLED SECTION      -->
+<!--                    The model for a section-like structure that
+                        may or may not have an initial title       -->
+<!ENTITY % sec-opt-title-model
+                        "(label?, title?, (%para-level;)*,  
+                          (%sec-level;)*, 
+                          (%sec-back-matter-mix;)* )"                >
+
+
+<!--                    LINK ELEMENTS                              -->
+<!--                    Elements for use in the linking elements
+                        such as <xref>, <target>, and <ext-link>   -->
+<!ENTITY % link-elements 
+                        "| %emphasis.class; | %subsup.class;"        > 
+
+
+<!--                    TITLE ELEMENTS                             -->
+<!--                    Elements for use in all the title elements
+                        such as <title>, <subtitle>, <trans-title>,
+                        etc.                       
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-phrase; is an 
+                        inline mix, the OR bar is already there.   -->
+<!ENTITY % title-elements   
+                        "%simple-phrase; | %break.class;"            > 
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR LINKING ATTRIBUTES  -->
+<!-- ============================================================= -->
+              
+
+<!--                    XLINK LINK ATTRIBUTES                      -->
+<!--                    Used for elements that are a link by
+                        definition, such as the <xref> element.         
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename
+             xlink:role Provides a URI reference pointing to some
+                        resource that describes the role or function 
+                        of this link           
+             xlink:title
+                        Describes the meaning of the link in a 
+                        human-readable and displayable fashion
+             xlink:show Describes the requested presentation when 
+                        the link is traversed.  
+                        Values are:
+                          embed    Load the new presentation in the
+                                   place of the link
+                          new      Load a new window, frame, pane, or 
+                                   other presentation
+                          none     No clue for the application
+                          other    Look to other markup for a clue
+                          replace  Load the new resource in the same
+                                   window, frame, pane, or other
+                                   presentation context as the link
+                                   started in
+            xlink:actuate
+                        When is the link traversed, and under whose
+                        control.  
+                        Values are:
+                          none     No clue for the application
+                          onLoad   Traverse the link immediately
+                                   upon loading the link
+                          onRequest
+                                   User or software requests that the
+                                   link be traversed
+                          other    Look to other markup for a clue
+                                                                   -->       
+<!ENTITY % link-atts
+            "xmlns:xlink CDATA                            #FIXED
+                                     'http://www.w3.org/1999/xlink'
+             xlink:type  (simple)                   #FIXED 'simple'
+             xlink:href  CDATA                            #REQUIRED 
+             xlink:role  CDATA                            #IMPLIED
+             xlink:title CDATA                            #IMPLIED
+             xlink:show  (embed | new | none | other | replace)
+                                                          #IMPLIED
+             xlink:actuate   
+                         (none | onLoad | onRequest | other)          
+                                                           #IMPLIED" >
+
+
+<!--                    MIGHT LINK XLINK ATTRIBUTES                -->
+<!--                    Used for elements which may need to link to
+                        external sources or other objects within
+                        the document, but may not necessarily act 
+                        as a link at all.  The attribute
+                        "xlink:href" identifies the object to which 
+                        the link points.                           -->
+<!ENTITY % might-link-atts
+            "xmlns:xlink CDATA                             #FIXED
+                                     'http://www.w3.org/1999/xlink'
+             xlink:type  (simple)                   #FIXED 'simple'
+             xlink:href  CDATA                             #IMPLIED 
+             xlink:role  CDATA                             #IMPLIED
+             xlink:title CDATA                             #IMPLIED
+             xlink:show  (embed | new | none | other | replace)
+                                                           #IMPLIED
+             xlink:actuate   
+                         (none | onLoad | onRequest | other)          
+                                                           #IMPLIED" >
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR ATTRIBUTES LISTS    -->
+<!--                    (ALPHABETICAL ORDER)                       -->
+<!-- ============================================================= -->
+                                                                
+                                                                 
+<!--                    AFFILIATION ATTRIBUTES                     -->
+<!--                    Attributes for the Affiliation <aff> 
+                        element                                    -->
+<!ENTITY % aff-atts
+            "id         ID                                 #IMPLIED  
+             rid        IDREFS                             #IMPLIED" >
+
+                                                                
+<!--                    BIOGRAPHY ATTRIBUTES                       -->
+<!--                    Attributes for <bio> element    
+             id         Unique identifier so the element may be
+                        referenced 
+             rid        Points to the identifier of an author or
+                        authors                               
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename.               -->
+<!ENTITY % bio-atts
+             "id        ID                                #IMPLIED
+              rid       IDREFS                            #IMPLIED
+              %might-link-atts;"                                     >
+
+                                                                
+<!--                    COLLABORATION ATTRIBUTES                   -->
+<!--                    Attributes for <collab>                    -->
+<!--          collab-type
+                        The type or role of the collaborators, 
+                        what function did they play in the 
+                        publication.
+                        Suggested values include:
+                          assignee 
+                                  - Group or company to whom a patent 
+                                    is awarded
+                          authors - Content creators
+                          editors - Content editors
+                          compilers
+                                  - Put together a composite work
+                                    from multiple sources
+                          guest-editors 
+                                  - A group of editors that have been
+                                    invited to edit all or part of a
+                                    work
+                          inventors 
+                                  - Idea, software, or machine 
+                                    creators
+                          translators
+                                  - Translated the cited work from 
+                                    one language into another
+             id         Unique identifier so that the collaboration
+                        can be referenced
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                -->
+<!ENTITY % collab-atts
+            "collab-type       
+                        CDATA                             #IMPLIED  
+             id         ID                                #IMPLIED
+             %might-link-atts;"                                      >
+
+
+<!--                    EXTERNAL LINK ATTRIBUTES                   -->
+<!--                    Attribute list for external links 
+             ext-link-type 
+                        Type of external link. Use this attribute
+                        if the type of external link is one of the
+                        ones below or another named type.
+                        Suggested values include:
+                            aoi         Astronomical Object Identifier 
+                            doi         Digital Object Identifier
+                            ec          Enzyme nomenclature - see
+                              http://www.chem.qmw.ac.uk/iubmb/enzyme/
+                            ftp         File transfer protocol
+                            email       An email address
+                            gen         GenBank identifier
+                            genpept     Translated Protein-encoding
+                                        sequence Database
+                            highwire    HighWire press intrajournal 
+                            pmid        PubMed identifier
+                            pdb         Protein data bank. See 
+                                        http://www.rcsb.org/pdb/
+                            pgr         Plant gene register. See
+                                        http://www.tarweed.com/pgr/
+                            pir         Protein Information Resource
+                                        see http://pir.georgetown.edu
+                            pirdb       Protein Information Resource 
+                                        see http://pir.georgetown.edu
+                            pmc         PubMedCentral identifier
+                            sprot       Swiss-Prot. See
+                                    http://www.ebi.ac.uk/swissprot/
+                            uri         Website or web service    
+             id         Unique identifier so the element may be
+                        referenced 
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                -->
+<!ENTITY %  ext-link-atts
+             "ext-link-type 
+                        CDATA                              #IMPLIED
+              id        ID                                 #IMPLIED
+              %might-link-atts;"                                     >
+
+                                                                
+<!--                    CITATION ATTRIBUTES                        -->
+<!--                    Attributes for <citation>
+             citation-type
+                        Defines the type of work being referenced.
+                        Suggested values include:
+                          book         Book 
+                          conf-proceedings
+                                       Conference proceedings
+                          discussion   Discussion among a group in
+                                       some forum - public, private,
+                                       or electronic, which may or
+                                       may not be moderated
+                          gov          Government publication
+                          journal      Journal article
+                          list         Listserv
+                          other        None of the listed types
+                          patent       Patent or patent application
+                          personal-communication
+                                       Informal or personal 
+                                       communication, such as a 
+                                       phone call or an email
+                                       message
+                          standard     Standards document issued by
+                                       a recognized standards body,
+                                       such as ISO, ANSI, IEEE, 
+                                       OASIS, etc.
+                          thesis       Work written as part of the
+                                       completion of an advanced 
+                                       degree
+                          web          Website
+             id         Unique identifier so the element may be
+                        referenced 
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                -->
+<!ENTITY % citation-atts
+             "citation-type   
+                        CDATA                              #IMPLIED  
+              id        ID                                 #IMPLIED
+              %might-link-atts;"                                     >
+
+
+<!--                    DATE (HISTORICAL) ATTRIBUTES               -->
+<!--         date-type  Attribute should only be used if the date
+                        is one of the known types, otherwise omit
+                        the attribute. Values are:
+                          accepted    - Date manuscript was 
+                                        accepted         
+                          received    - Date manuscript received
+                          rev-request - Date revisions were 
+                                        requested or manuscript 
+                                        was returned
+                          rev-recd    - Date revised manuscript 
+                                        was received               -->
+<!ENTITY % date-atts
+             "date-type (%date-types;)                     #IMPLIED" >
+
+
+<!--                    GIVEN NAMES ATTRIBUTES                     -->
+<!--         initials   The initial(s) for the given names of the
+                        person. Particularly useful in those cases
+                        that cannot be algorithmically determined. -->
+<!ENTITY % given-names-atts
+             "initials  CDATA                              #IMPLIED" >
+
+
+<!--                    ISSN ATTRIBUTES                            -->
+<!--                    Attribute list for <issn>                  -->
+<!--                    Used to record the type of publication, for
+                        example, a print-only publication versus
+                        an electronic-only publication, in any of
+                        several life stages.
+                        Suggested values include:
+                          epub        - Electronic publication
+                          ppub        - Print publication
+                          epub-ppub   - Published in both print and
+                                        electronic form
+                          epreprint   - Electronic preprint 
+                                        dissemination    
+                          ppreprint   - Print preprint dissemination     
+                          ecorrected  - Corrected in electronic    
+                          pcorrected  - Corrected in print     
+                          eretracted  - Retracted in electronic    
+                          pretracted  - Retracted in print         -->
+<!ENTITY %  issn-atts
+             "pub-type   CDATA                           #IMPLIED"   >
+                                                                
+                                                                 
+<!--                    ISSUE IDENTIFIER ATTRIBUTES                -->
+<!--                    Attributes for the <issue-id> element
+             pub-id-type
+                        Publication (article) Identifier Type
+                        Names the type of identifier, or the 
+                        organization or system that defined this 
+                        identifier for the identifier of the journal 
+                        article or a cited publication. This is an
+                        optional CDATA attribute that should be used
+                        whenever the type is known.
+                           Used on the <article-id> element, which 
+                        holds an identifier for the entire article.  
+                           Also used on the <pubid> element, which 
+                        is an identifier for a publication cited in 
+                        a bibliographic reference (citation).
+                        Suggested values include:
+                          coden  - Obsolete PDB/CCDC identifier (may
+                                   be present on older articles)
+                          doi    - Digital Object Identifier
+                          medline- NLM Medline identifier
+                          other  - None of the named identifiers
+                          pii    - Publisher Item Identifier, see
+                                    http://pubs.acs.org/epub/piius.htm
+                                   or 
+                                    http://www.aip.org/epub/piipr.html
+                          pmid   - PubMed Central identifier (see
+                                   www.ncbi.nlm.nih.gov/entrez/
+                                   query.fcgi?db=PubMed)         
+                          publisher-id 
+                                 - Publisher's identifier such 
+                                   as an 'article-id', 'artnum',
+                                   'identifier', 'article- number', 
+                                   'pub-id', etc.
+                          sici   - Serial Item and Contribution 
+                                   Identifier (SICI). A journal 
+                                   article may have more than one 
+                                   SICI, one for a print version and
+                                   one for an electronic version.  -->
+<!ENTITY % issue-id-atts
+            "pub-id-type
+                        CDATA                              #IMPLIED" >
+
+
+<!--                    NOTE ATTRIBUTES                            -->
+<!--                    Attribute list for <note>                  -->
+<!--         notes-type To identify the type of note, if the type
+                        can be/has been identified, for example,
+                        "note-in-proof"                            
+             id         Unique identifier so the element may be
+                        referenced                                 -->
+<!ENTITY % notes-atts
+            "id         ID                                #IMPLIED
+             notes-type CDATA                             #IMPLIED"  >
+                                                                
+                                                                 
+<!--                    OBJECT IDENTIFIER ATTRIBUTES               -->
+<!--                    Attributes for the <object-id> element
+             pub-id-type
+                        Publication (article) Identifier Type
+                        Names the type of identifier, or the 
+                        organization or system that defined this 
+                        identifier for the identifier of the journal 
+                        article or a cited publication. This is an
+                        optional CDATA attribute that should be used
+                        whenever the type is known.
+                           Used on the <article-id> element, which 
+                        holds an identifier for the entire article.  
+                           Also used on the <pubid> element, which 
+                        is an identifier for a publication cited in 
+                        a bibliographic reference (citation).
+                        Suggested values include:
+                          coden  - Obsolete PDB/CCDC identifier (may
+                                   be present on older articles)
+                          doi    - Digital Object Identifier
+                          medline- NLM Medline identifier
+                          other  - None of the named identifiers
+                          pii    - Publisher Item Identifier, see
+                                    http://pubs.acs.org/epub/piius.htm
+                                   or 
+                                    http://www.aip.org/epub/piipr.html
+                          pmid   - PubMed Central identifier (see
+                                   www.ncbi.nlm.nih.gov/entrez/
+                                   query.fcgi?db=PubMed)         
+                          publisher-id 
+                                 - Publisher's identifier such 
+                                   as an 'article-id', 'artnum',
+                                   'identifier', 'article- number', 
+                                   'pub-id', etc.
+                          sici   - Serial Item and Contribution 
+                                   Identifier (SICI). A journal 
+                                   article may have more than one 
+                                   SICI, one for a print version and
+                                   one for an electronic version.  -->
+<!ENTITY % object-id-atts
+            "pub-id-type
+                        CDATA                              #IMPLIED" >
+
+                                                                
+<!--                    RELATED ARTICLE ATTRIBUTES                 -->
+<!--                    Attributes for <related-article>         
+             id         Unique identifier so the element may be
+                        referenced
+             alt-form-of
+                        Exactly like the "alt-form-of" attribute
+                        used with <graphic>, this is an IDREF
+                        attribute which points to the ID of another
+                        <related-article> in the same document
+                         rather than to an external file.
+             related-article-type
+                          addendum      Additional material for an
+                                        article, generated too late
+                                        to be added to the main text
+                          commentary-article   
+                                        Used in an commentary or
+                                        editorial to link to the 
+                                        article on which it is
+                                        commenting
+                          companion     Used in an article to link 
+                                        to a companion (related
+                                        or sibling) article 
+                          corrected-article
+                                        Used in a correction to link 
+                                        to the article being 
+                                        corrected. Sometimes called
+                                        erratum.
+                          in-this-issue Related article in the same
+                                        journal issue
+                          letter        A letter to the publication
+                                        or a reply to such a
+                                        letter
+                          commentary    Used in an article to link 
+                                        to its associated commentary
+                                        or editorial 
+                          correction-forward
+                                        Used in an article to link
+                                        forward to its associated 
+                                        correction (rarely used)
+                          retracted-article
+                                        Used in a retraction to link 
+                                        to the article being retracted
+                          retraction-forward   
+                                        Used in an article to link
+                                        forward to its associated 
+                                        retraction (rare)
+             ext-link-type
+                        Type of external link used to point to the
+                        related article. Use this attribute
+                        if the type of external link is one of the
+                        ones below or another named type.
+                        Suggested values include:
+                            aoi         Astronomical Object Identifier 
+                            doi         Digital Object Identifier
+                            ec          Enzyme nomenclature - see
+                              http://www.chem.qmw.ac.uk/iubmb/enzyme/
+                            ftp         File transfer protocol
+                            email       An email address
+                            gen         GenBank identifier
+                            genpept     Translated Protein-encoding
+                                        sequence Database
+                            highwire    HighWire press intrajournal 
+                            pdb         Protein data bank. See 
+                                        http://www.rcsb.org/pdb/
+                            pgr         Plant gene register. See
+                                        http://www.tarweed.com/pgr/
+                            pir         Protein Information Resource
+                                        see http://pir.georgetown.edu
+                            pirdb       Protein Information Resource 
+                                        see http://pir.georgetown.edu
+                            pmc         PubMed Central identifier
+                            pmid        PubMed identifier
+                            sprot       Swiss-Prot. See
+                                    http://www.ebi.ac.uk/swissprot/
+                            uri         Website or web service    
+             The next five attributes are used to identify the
+             journal in which the related article was published. 
+             vol        Volume of the journal in which the related
+                        article exists.  It is best practice to
+                        limit the scope to the current journal, 
+                        the one in which the current article
+                        resides.
+             page       Page number of the related article. It is 
+                        best practice to limit the scope to the 
+                        current journal, the one in which the 
+                        current article resides. The values for this 
+                        attribute should be a first page or a page 
+                        range.
+             issue      Issue number of the related article. 
+             journal-id Identifier for the journal which contains the
+                        related article.
+             journal-id-type   
+                        Indicates whose identifier this is, for
+                        example, "pub-id" for a publisher's
+                        identifier or "pmc" for PubMed Central.
+                        Suggested values include:
+                          archive Identifier assigned by an archive
+                                  or other repository
+                          aggregator
+                                 Identifier assigned by a data
+                                 aggregator
+                          doi    Digital Object Identifier for the
+                                 entire journal, not just for the
+                                 article (rare)
+                          index  Identifier assigned by an
+                                 abstracting or indexing service
+                          issn   International Standard Serial Number
+                                 of the journal in which the related
+                                 article was published
+                          pmc    Identifier assigned by PubMed Central
+                                 for example, the pmc journal 
+                                 abbreviation such as "pnas", "mbc", 
+                                 "nar", "molcellb", which may be the 
+                                 same as the abbreviated journal 
+                                 title
+                          publisher-id 
+                                 Identifier assigned by the content
+                                 publisher, for example, "MOLEC", 
+                                 "MOLCEL"
+                          nlm-ta Identifier assigned by the 
+                                 PubMed/Medline, and is typically
+                                 the journal abbreviation, for 
+                                 example, "Mol Biol Cell", "Nucleic
+                                 Acids Res", which may be the
+                                 same as the abbreviated journal 
+                                 title.
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename.               -->
+<!ENTITY % related-article-atts
+             "id        ID                                #IMPLIED
+              alternate-form-of
+                        IDREF                             #IMPLIED
+              related-article-type
+                        CDATA                             #REQUIRED
+              ext-link-type 
+                        CDATA                             #IMPLIED
+              vol       CDATA                             #IMPLIED
+              page      CDATA                             #IMPLIED  
+              issue     CDATA                             #IMPLIED
+              journal-id
+                        CDATA                             #IMPLIED  
+              journal-id-type  
+                        CDATA                             #IMPLIED 
+              %might-link-atts;"                                     >
+
+
+<!--                    SURNAME ATTRIBUTES                         -->
+<!--         initials   The initial(s) for the given names of the
+                        person. Particularly useful in those cases
+                        that cannot be algorithmically determined. -->
+<!ENTITY % surname-atts
+             "initials  CDATA                              #IMPLIED" >
+                                                                
+                                                                 
+<!--                    VOLUME IDENTIFIER ATTRIBUTES               -->
+<!--                    Attributes for the <issue-id> element
+             pub-id-type
+                        Publication (article) Identifier Type
+                        Names the type of identifier, or the 
+                        organization or system that defined this 
+                        identifier for the identifier of the journal 
+                        article or a cited publication. This is an
+                        optional CDATA attribute that should be used
+                        whenever the type is known.
+                           Used on the <article-id> element, which 
+                        holds an identifier for the entire article.  
+                           Also used on the <pubid> element, which 
+                        is an identifier for a publication cited in 
+                        a bibliographic reference (citation).
+                        Suggested values include:
+                          coden  - Obsolete PDB/CCDC identifier (may
+                                   be present on older articles)
+                          doi    - Digital Object Identifier
+                          medline- NLM Medline identifier
+                          other  - None of the named identifiers
+                          pii    - Publisher Item Identifier, see
+                                    http://pubs.acs.org/epub/piius.htm
+                                   or 
+                                    http://www.aip.org/epub/piipr.html
+                          pmid   - PubMed Central identifier (see
+                                   www.ncbi.nlm.nih.gov/entrez/
+                                   query.fcgi?db=PubMed)         
+                          publisher-id 
+                                 - Publisher's identifier such 
+                                   as an 'article-id', 'artnum',
+                                   'identifier', 'article- number', 
+                                   'pub-id', etc.
+                          sici   - Serial Item and Contribution 
+                                   Identifier (SICI). A journal 
+                                   article may have more than one 
+                                   SICI, one for a print version and
+                                   one for an electronic version.  -->
+<!ENTITY % volume-id-atts
+            "pub-id-type
+                        CDATA                              #IMPLIED" >
+
+
+<!-- ============================================================= -->
+<!--                    ELEMENT USED BY MORE THAN ONE CLASS        -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    COMMON METADATA/BIBLIOGRAPHIC ELEMENTS     -->
+<!-- ============================================================= -->
+
+
+<!--                    ARTICLE TITLE                              -->
+<!--                    The title of the article in the language
+                        in which the article was originally
+                        published          
+                        Remarks: The related element Translated Title
+                        <trans-title> should be used for a
+                        translation of the article into another
+                        language, for example, to hold the English
+                        version of a Japanese article title.       -->
+<!ELEMENT  article-title       
+                        (#PCDATA %title-elements;)*                  >
+<!--         xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  article-title
+             xml:lang   NMTOKEN                            #IMPLIED  > 
+
+
+<!--                    AFFILIATION ELEMENTS                       -->
+<!--                    Elements for use in the <aff> element      -->
+<!ENTITY % aff-elements "| %address.class; | %address-link.class; |
+                         %article-link.class; | %break.class; | 
+                         %emphasis.class; | %label.class; | 
+                         %simple-link.class; | %subsup.class;"       > 
+
+
+<!--                    AFFILIATION                                -->
+<!--                    Name of a institution or organization such as
+                        a university or corporation. 
+                        Authoring and Conversion Note: In a typical 
+                        case, the "id" attribute will be pointed to 
+                        by one or more contributors.               
+                        Conversion Note: Any explicitly tagged numbers
+                        or symbols for author linkages should be 
+                        discarded, as the linkage will be recreated
+                        from the "id" connection.
+                        Authoring Note:  While this element 
+                        contains an optional Label element, the 
+                        Label element should be included only in 
+                        those circumstances where a formatting 
+                        override is needed; Label should NOT 
+                        be used in the ordinary course of 
+                        tagging.                                   -->                       
+<!ELEMENT  aff          (#PCDATA %aff-elements;)*                    >
+<!--         id         Unique identifier so that the affiliated
+                        institution may be referenced, for example 
+                        by a contributor
+             rid        May be used to point to affiliation-related
+                        information within the document            
+                                                                   -->
+<!ATTLIST  aff
+             %aff-atts;                                              >
+
+
+<!--                    COLLABORATIVE (GROUP) AUTHOR ELEMENTS      -->
+<!--                    Elements for use in the <collab> element
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an inline
+                        mix, the OR bar is already there. 
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an inline
+                        mix, the OR bar is already there.          -->
+<!ENTITY % collab-elements 
+                        "%simple-text; | %address-link.class; | 
+                         %simple-link.class;"                        > 
+
+
+<!--                    COLLABORATIVE (GROUP) AUTHOR               -->
+<!--                    Used for groups of authors credited under 
+                        one name, either as a collaboration in the
+                        strictest sense, or when an organization, 
+                        institution, or corporation is the group.  
+                        Remarks: When an author is an organization 
+                        rather than a person, use the <collab> 
+                        element rather than <surname>, for
+                        example: 
+                           <collab>Aardvark Institute</collab>  
+                                                                   -->
+<!ELEMENT  collab       (#PCDATA %collab-elements;)*                 >
+<!--         collab-type
+                        The type or role of the collaborators, 
+                        what function did they play in the 
+                        publication.
+                        Suggested values include:
+                          assignee 
+                                  - Group or company to whom a patent 
+                                    is awarded
+                          authors - Content creators
+                          editors - Content editors
+                          compilers
+                                  - Put together a composite work
+                                    from multiple sources
+                          guest-editors 
+                                  - A group of editors that have been
+                                    invited to edit all or part of a
+                                    work
+                          inventors 
+                                  - Idea, software, or machine 
+                                    creators
+                          translators
+                                  - Translated the cited work from 
+                                    one language into another
+                                    one language into another
+             id         unique identifier so that the collaboration
+                        can be referenced
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename.               -->
+<!ATTLIST  collab
+             %collab-atts;                                           >
+
+
+<!--                    CONFERENCE DATE ELEMENTS                   -->
+<!--                    Elements for use in the <conf-date> element-->
+<!ENTITY % conf-date-elements 
+                        "  "                                         > 
+             
+
+<!--                    CONFERENCE DATE                            -->
+<!--                    The date(s) on which the conference was held.
+                          Conference dates in journal headers are
+                        traditionally stored in one of two forms:
+                        as a single date ("May 1906") or as the
+                        first day and last day of the conference.
+                        Both types should be stored in this element;
+                        the dates that come from separate first and 
+                        last elements should be combined:
+                          <conf-start>August 4, 2002</conf-start>
+                          <conf-end>August 9, 2002<conf-end>
+                        should become:
+                          <conf-date>August 4, 2002 - August 9,
+                          2002</conf-date>
+                                                                   -->
+<!ELEMENT  conf-date    (#PCDATA %conf-date-elements;)*              >
+
+
+<!--                    CONFERENCE LOCATION ELEMENTS               -->
+<!--                    Elements for use in the <conf-loc> element 
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an inline
+                        mix, the OR bar is already there.          -->
+<!ENTITY % conf-loc-elements 
+                        "%simple-text;"                            > 
+ 
+
+<!--                    CONFERENCE LOCATION                        -->
+<!--                    The physical location(s) of the conference.
+                        This may include a city, a country, or a
+                        campus or organization location if that is
+                        the only location available.
+                        Authoring and Conversion Note: If the 
+                        conference is in the United States, the 
+                        state should also be provided. A specific 
+                        venue or address (e.g. conference hotel) 
+                        should not be provided unless there is no
+                        other location information.                -->
+<!ELEMENT  conf-loc     (#PCDATA %conf-loc-elements;)*               >
+
+
+<!--                    CONFERENCE NAME ELEMENTS                   -->
+<!--                    Elements for use in the <conf-name> element.
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an inline
+                        mix, the OR bar is already there.          -->
+<!ENTITY % conf-name-elements 
+                        "%simple-text;"                            > 
+
+
+<!--                    CONFERENCE NAME                            -->
+<!--                    The full name of the conference, including any
+                        qualifiers such as "43rd Annual". When the
+                        name includes the year of the conference,
+                        (e.g. "Extreme 2002"), the year should appear
+                        in both the conference Name and the Conference
+                        Date elements.                             -->
+<!ELEMENT  conf-name    (#PCDATA %conf-name-elements;)*              >
+
+
+<!--                    COPYRIGHT STATEMENT MODEL                  -->
+<!--                    Content model for <copyright-statement>    -->
+<!ENTITY % copyright-statement-elements 
+                        "| %address-link.class;| %emphasis.class; | 
+                         %subsup.class;"                             > 
+
+
+<!--                    COPYRIGHT STATEMENT                        -->
+<!--                    Copyright notice or statement, suitable for
+                        printing or display. Within the statement the
+                        copyright year should be identified, if
+                        expected to be displayed.                  -->
+<!ELEMENT  copyright-statement
+                        (#PCDATA %copyright-statement-elements;)*    >
+
+
+<!--                    OBJECT IDENTIFIER                          -->
+<!--                    Used to record an identifier such as a DOI
+                        for an interior element such as an <abstract>
+                        or <figure>.
+                        Remarks: Such an identifier may be created
+                        by a publisher or archive, and there is no
+                        requirement that identifiers be unique.    -->
+<!ELEMENT  object-id    (#PCDATA)                                    >  
+<!--         id-type    Indicates what type of identifier this is or
+                        who assigned the identifier, for example,
+                        "pub-id" for a publisher's non-DOI
+                        identifier or "DOI" for a Digital Object
+                        Identifier.
+                          In Archival this is a CDATA attribute but
+                        suggested values include:
+                          archive Identifier assigned by an archive
+                                  or other repository
+                          aggregator
+                                 Assigned by a data aggregator
+                          doi    Digital Object Identifier
+                          index  Assigned by an abstracting or 
+                                 indexing service
+                          pmc    Assigned by PubMed Central
+                          publisher-id 
+                                 Non-DOI Identifier assigned by 
+                                 the content publisher             -->
+<!ATTLIST  object-id
+             %object-id-atts;                                        >
+
+
+<!--                    JOURNAL IDENTIFIER ATTRIBUTES              -->
+<!--                    Attribute list for journal identifier 
+                        <journal-id> element                       -->
+<!--                    Indicates whose identifier this is, for
+                        example, "pub-id" for a publisher's
+                        identifier or "pmc" for PubMed Central.
+                        Suggested values include:
+                          archive Identifier assigned by an archive
+                                  or other repository
+                          aggregator
+                                 Identifier assigned by a data
+                                 aggregator
+                          doi    Digital Object Identifier for the
+                                 entire journal, not just for the
+                                 article (rare)
+                          index  Identifier assigned by an
+                                 abstracting or indexing service
+                          pmc    Identifier assigned by PubMed Central
+                                 for example, the pmc journal 
+                                 abbreviation such as "pnas", "mbc", 
+                                 "nar", "molcellb", which may be the 
+                                 same as the abbreviated journal 
+                                 title
+                          publisher-id 
+                                 Identifier assigned by the content
+                                 publisher, for example, "MOLEC", 
+                                 "MOLCEL"
+                          nlm-ta Identifier assigned by the 
+                                 PubMed/Medline, and is typically
+                                 the journal abbreviation, for 
+                                 example, "Mol Biol Cell", "Nucleic
+                                 Acids Res", which may be the
+                                 same as the abbreviated journal 
+                                 title.                            -->
+<!ENTITY %  journal-id-atts
+             "journal-id-type  
+                        CDATA                            #IMPLIED  " > 
+
+
+<!--                    ISSUE TITLE ELEMENTS                       -->
+<!--                    Elements for use with data characters inside
+                        the model for the <issue> element          -->
+<!ENTITY % issn-elements 
+                        " "                                          > 
+
+
+<!--                    ISSN                                       -->
+<!--                    International Standard Serial Number       -->
+<!ELEMENT  issn         (#PCDATA %issn-elements;)                    >
+<!--                    Used to record the type of publication, for
+                        which this ISSN was issued, for example, 
+                        a print-only publication versus
+                        an electronic-only publication, in any of
+                        several life stages.
+                        Suggested values include:
+                          epub        - Electronic publication
+                          ppub        - Print publication
+                          epub-ppub   - Published in both print and
+                                        electronic form
+                          epreprint   - Electronic preprint 
+                                        dissemination    
+                          ppreprint   - Print preprint dissemination     
+                          ecorrected  - Corrected in electronic    
+                          pcorrected  - Corrected in print     
+                          eretracted  - Retracted in electronic    
+                          pretracted  - Retracted in print     
+                          -->
+<!ATTLIST  issn
+             %issn-atts;                                             >
+
+
+<!--                    ISSUE TITLE ELEMENTS                       -->
+<!--                    Elements for use with data characters inside
+                        the model for the <issue> element          -->
+<!ENTITY % issue-elements 
+                        "%just-rendition;"                           > 
+
+
+<!--                    ISSUE NUMBER                               -->
+<!--                    NEW DEFINITION FOR RELEASE 2.0:
+                        The issue number, issue name, or other 
+                        identifier of an issue of a journal that
+                        is displayed or printed with the issue. 
+                        This is not the machine-readable internal 
+                        identifier such as a DOI or SICI, that is
+                        the related element <issue-id>             -->
+<!ELEMENT  issue        (#PCDATA %issue-elements;)*                  >  
+
+
+<!--                    ISSUE IDENTIFIER                           -->
+<!--                    Used to record an identifier such as a DOI
+                        that describes an entire issue of a 
+                        journal                                    -->
+<!ELEMENT  issue-id     (#PCDATA)                                    >  
+<!--         pub-id-type    
+                        Indicates what type of identifier this is or
+                        who assigned the identifier, for example,
+                        "pub-id" for a publisher's non-DOI
+                        identifier or "DOI" for a Digital Object
+                        Identifier.
+                          In Archival this is a CDATA attribute but
+                        suggested values include:
+                          archive Identifier assigned by an archive
+                                  or other repository
+                          aggregator
+                                 Assigned by a data aggregator
+                          doi    Digital Object Identifier
+                          index  Assigned by an abstracting or 
+                                 indexing service
+                          pmc    Assigned by PubMed Central
+                          publisher-id 
+                                 Non-DOI Identifier assigned by 
+                                 the content publisher             -->
+<!ATTLIST  issue-id
+             %issue-id-atts;                                         >
+
+
+<!--                    ISSUE TITLE ELEMENTS                       -->
+<!--                    Elements for use in the <issue-title> element
+                                                                   -->
+<!ENTITY % issue-title-elements 
+                        ""                                           > 
+
+
+<!--                    ISSUE TITLE                                -->
+<!--                    Used to record the theme or special issue
+                        title for an issue of the journal          -->
+<!ELEMENT  issue-title  (#PCDATA %issue-title-elements;)*            >  
+
+
+<!--                    ROLE ELEMENTS                              -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <role>                    
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar; since %rendition-plus; is an 
+                        inline mix, the OR bar is already there.   -->  
+<!ENTITY % role-elements
+                        "%rendition-plus;"                           >
+
+
+<!--                    ROLE OR FUNCTION TITLE OF CONTRIBUTOR      -->
+<!--                    A title or the role of a contributor
+                        (such as an author) in this work. For example,
+                        Editor-in-Chief, Contributor, Chief
+                        Scientist, Photographer, Research Associate,
+                        etc.              
+                        Remarks: Information on the role or type of
+                        contribution is collected in two places,
+                        in the "contrib-type" attribute on the
+                        Contributor element and in the Role element.  
+                        For example, the Contributor attribute might 
+                        have a value of "editor", while the content 
+                        of the role element could be "Associate 
+                        Editor". As another example, the contributor
+                        attribute might be "author" and the role
+                        element might be "Principle Author".
+                        The <role> element is also more likely to 
+                        appear on screen or in print than the 
+                        contributor attribute value.               -->
+<!ELEMENT  role         (#PCDATA %role-elements;)*                   >
+
+
+<!--                    VOLUME NUMBER ELEMENTS                     -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <volume>                                 -->
+<!ENTITY % volume-elements
+                        "%just-rendition; "                          >
+
+
+<!--                    VOLUME NUMBER                                     -->
+<!--                    NEW DEFINITION FOR RELEASE 2.0:
+                        The volume number, volume name, or other 
+                        identifier of an volume of a journal that
+                        is displayed or printed with the volume. 
+                        This is not the machine-readable internal 
+                        identifier such as a DOI or SICI, that is
+                        the related element <volume-id>            -->
+<!ELEMENT  volume       (#PCDATA %volume-elements;)*                 >
+
+
+<!--                    TRANSLATED TITLE                           -->
+<!--                    An alternate version of the title that has
+                        been translated into a language other than
+                        that of the article title <article-title>  
+                        Remarks: The related element Article Title
+                        <article-title> should be used for the title
+                        of the article in the original language in 
+                        which it was published, this element used
+                        for a translation of that title, for example
+                        the English version of a Japanese article. -->
+<!ELEMENT  trans-title  (#PCDATA %title-elements;)*                  >
+<!--         xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German).  These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).
+                          Authoring and Conversion Note: For the
+                        Translated Title element, the xml:lang
+                        attribute should always be set if the
+                        value is known. The only reason the
+                        attribute was not made #REQUIRED was that
+                        there may be cases where the language is
+                        not known.                                 -->
+<!ATTLIST  trans-title
+             xml:lang   NMTOKEN                            #IMPLIED  >
+
+
+<!--                    VOLUME IDENTIFIER ELEMENTS                 -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <volume-id>                              -->
+<!ENTITY % volume-id-elements
+                        "%just-rendition; "                          >
+
+
+<!--                    VOLUME IDENTIFIER                          -->
+<!--                    Used to record an identifier such as a DOI
+                        that describes an entire volume of a 
+                        journal.
+                           REMARKS: The related elements <volume>
+                        holds the volume number as published
+                        with the journal.                          -->
+<!ELEMENT  volume-id    (#PCDATA %volume-id-elements;)*              >
+<!--         pub-id-type    
+                        Indicates what type of identifier this is or
+                        who assigned the identifier, for example,
+                        "pub-id" for a publisher's non-DOI
+                        identifier or "DOI" for a Digital Object
+                        Identifier.
+                          In Archival this is a CDATA attribute but
+                        suggested values include:
+                          archive Identifier assigned by an archive
+                                  or other repository
+                          aggregator
+                                 Assigned by a data aggregator
+                          doi    Digital Object Identifier
+                          index  Assigned by an abstracting or 
+                                 indexing service
+                          pmc    Assigned by PubMed Central
+                          publisher-id 
+                                 Non-DOI Identifier assigned by 
+                                 the content publisher             -->
+<!ATTLIST  volume-id
+             %volume-id-atts;                                        >
+
+
+<!-- ============================================================= -->
+<!--                    COMMON METADATA ELEMENTS CONTINUED         -->
+<!--                    PUBLISHER IDENTIFICATION ELEMENTS          -->
+<!--                    (COMMON ARTICLE METADATA/BIBLIOGRAPHIC)    -->
+<!--                    Used in article metadata and also inside   -->
+<!--                    a bibliographic reference (citation)       -->
+<!-- ============================================================= -->
+
+
+<!--                    ET AL ELEMENTS                             -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        an <etal>                                  -->
+<!ENTITY % etal-elements
+                        "%just-rendition; "                          >
+
+
+<!--                    ET AL CONTENT MODEL                        -->
+<!--                    The content model for the <etal> element   -->
+<!ENTITY % etal-model   "(#PCDATA %etal-elements;)*"                 > 
+
+
+<!--                    ET AL                                      -->
+<!--                    Although most journals modeled this as an
+                        EMPTY element, typically used to generate
+                        the text "et al." in a stylesheet, some
+                        journal DTDs (Blackwell's, for example) expect
+                        content for this element, with such text as
+                        "Associates, coworkers, and colleagues".   -->
+<!ELEMENT  etal         %etal-model;                                 >  
+
+
+<!--                    PUBLISHER                                  -->
+<!--                    Who published the work                     -->
+<!ELEMENT  publisher    (publisher-name, publisher-loc?)             >
+
+
+<!--                    PUBLISHER'S NAME ELEMENTS                  -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <publisher-name>
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar; since %just-rendition; is an 
+                        inline mix, the OR bar is already there    -->  
+<!ENTITY % publisher-name-elements
+                        "%just-rendition; "                          >
+
+
+<!--                    PUBLISHER'S NAME                           -->
+<!--                    Name of the publisher of the work          -->
+<!ELEMENT  publisher-name      
+                        (#PCDATA %publisher-name-elements;)*         >  
+
+
+<!--                    PUBLISHER'S LOCATION ELEMENTS              -->
+<!--                    Elements for use in the Publisher Location
+                        <publisher-loc> element                    -->
+<!ENTITY % publisher-loc-elements 
+                        "| %address.class; | %address-link.class; |
+                         %emphasis.class; | %subsup.class;"          > 
+
+
+<!--                    PUBLISHER'S LOCATION                       -->
+<!--                    Place of publication, usually a city such
+                        as New York or London                      -->
+<!ELEMENT  publisher-loc       
+                        (#PCDATA %publisher-loc-elements;)*          >
+
+
+<!-- ============================================================= -->
+<!--                    COMMON METADATA ELEMENTS CONTINUED         -->
+<!--                    PAGE NUMBERING ELEMENTS                    -->
+<!-- ============================================================= -->
+
+
+<!--                    FIRST PAGE ATTRIBUTES                      -->
+<!--         seq        Used for sequence number or letter for 
+                        journals (such as continuous makeup journals) 
+                        with more than one article starting on the 
+                        same page                                  -->
+<!ENTITY % fpage-atts
+             "seq        CDATA                             #IMPLIED" >
+
+<!--                    FIRST PAGE                                 -->
+<!--                    The page number on which the article starts,
+                        for print journals that have page numbers  -->
+<!ELEMENT  fpage        (#PCDATA)                                    >
+<!--         seq        Used for sequence number or letter for 
+                        journals (such as continuous makeup journals) 
+                        with more than one article starting on the 
+                        same page                                  -->
+<!ATTLIST  fpage
+             %fpage-atts;                                            >
+             
+
+<!--                    LAST PAGE                                  -->
+<!--                    The page number on which the article ends,
+                        for print journals that have page numbers  -->
+<!ELEMENT  lpage        (#PCDATA)                                    >
+             
+
+<!--                    PAGE RANGES                                -->
+<!--                    A container element for additional page 
+                        information (TO BE USED TO SUPPLEMENT AND
+                        NOT TO REPLACE <fpage> and <lpage>) to record
+                        discontinuous pages ranges such as 
+                            "8-11, 14-19, 40"
+                        meaning that the article begins on page
+                        8, runs 8 through 11, skips to pages 14 
+                        through 19, and concludes on page 40.      -->
+<!ELEMENT  page-range   (#PCDATA)                                    >
+
+
+<!--                    ELECTRONIC LOCATION IDENTIFIER ATTRIBUTES  -->
+<!--         seq        Used for sequence number or letter for 
+                        journals (such as continuous makeup journals) 
+                        so that the publisher's sequence of articles
+                        can be preserved, even for electronic
+                        publications.                              -->
+<!ENTITY % elocation-id-atts
+             "seq        CDATA                             #IMPLIED" >
+
+
+<!--                    ELECTRONIC LOCATION IDENTIFIER             -->
+<!--                    Used to identify an article that 
+                        does not have traditional page numbers. 
+                        For a printed article, when citations are 
+                        sent through the Medline matcher to be turned
+                        into live links or when the metadata about an
+                        article is collected for complete
+                        identification,  the first page number (and 
+                        sometimes  also the last page number) of the 
+                        article is recorded to help indicate which
+                        article is being referenced. Electronic only
+                        journals have no page numbers, so this
+                        element is used as the equivalent identifier,
+                        the "electronic page number" that helps to
+                        identify the article. The value could be an 
+                        article identifier, a doi, etc., for 
+                        example, "E70".                        
+                        Remarks: This element is more accurately an
+                        electronic identifier, and it was considered
+                        renaming this element to that. The purpose
+                        of the name is to be a parallel with the
+                        <fpage>...<lpage> model. When neither first
+                        page or last page is appropriate, because
+                        the electronic journal has no pages, the
+                        <elocation-id> element is used instead.    -->
+<!ELEMENT  elocation-id (#PCDATA)                                    >
+<!--         seq        Used for sequence number or letter for 
+                        journals (such as continuous makeup journals) 
+                        with more than one article starting on the 
+                        same page                                  -->
+<!ATTLIST  elocation-id 
+             %elocation-id-atts;                                     >
+
+
+<!-- ============================================================= -->
+<!--                    CITATION (BIBLIOGRAPHIC REFERENCE)         -->
+<!-- ============================================================= -->
+
+
+<!--                    CITATION ELEMENTS                          -->
+<!--                    Content model for the <citation> element. 
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar.                                    -->
+<!ENTITY % citation-elements   
+                        "| %emphasis.class; | %inline-display.class; |
+                         %inline-math.class; | %label.class; | 
+                         %phrase.class; | %references.class; |
+                         %subsup.class;"                             > 
+
+
+<!--                    CITATION                                   -->
+<!--                    A citation is a description of a work, such
+                        as a journal article, book, or personal
+                        communication, that is cited in the text of
+                        the article. Although the DTD does not
+                        enforce it, a citation to a journal article
+                        *should* be tagged with as many as possible 
+                        of the following, so that PubMed Central or
+                        other matching processing can make the 
+                        citation into a live link:
+                           source  The MEDLINE abbreviation of the
+                                   journal name
+                           article-title  
+                                   Title of the article
+                           volume  Volume of the journal
+                           issue   Issue of the journal
+                           fpage   Page number on which the article
+                                   starts
+                           name    Of an author or editor
+                           year    Year of publication
+                           month   Month of publication (if present)
+                           day     Date of publication (if present)
+                        The other elements may be tagged if
+                        desired.  The <title> element may be used
+                        for titles of books, conference proceedings,
+                        etc.
+                        Authoring and Conversion Note: In order to 
+                        make citations into live links, as much
+                        of the author and date information as is
+                        available should be preserved. The most
+                        important date tag is year, and it should
+                        always be tagged if possible:
+                            <year>2003</year> 
+                        The <day> and <month> tags will be used
+                        more rarely; they are provided because some
+                        of the citation matching services can use
+                        the month and the day if they are available.
+                                                                   -->
+<!ELEMENT  citation     (#PCDATA %citation-elements;)*               >
+<!--         citation-type
+                        Defines the type of work being referenced.
+                        Suggested values include:
+                          book         Book 
+                          personal-communication
+                                       Informal or personal 
+                                       communication, such as a 
+                                       phone call or an email
+                                       message
+                          conf-proceedings
+                                       Conference proceedings
+                          discussion   Discussion among a group in
+                                       some forum - public, private,
+                                       or electronic, which may or
+                                       may not be moderated
+                          gov          Government publication
+                          journal      Journal article
+                          list         Listserv
+                          other        None of the listed types
+                          patent       Patent or patent application
+                          thesis       Work written as part of the
+                                       completion of an advanced 
+                                       degree
+                          web          Website
+             id         Unique identifier so the element may be
+                        referenced 
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                -->
+<!ATTLIST  citation
+             %citation-atts;                                         >
+             
+
+<!-- ============================================================= -->
+<!--                    ADDRESS ELEMENTS (BIBLIOGRAPHIC)           -->
+<!-- ============================================================= -->
+
+
+<!--                    ADDRESS MODEL                              -->
+<!--                    Content model for the <address> element    -->
+<!ENTITY % address-model    
+                        "(%address.class; | %address-link.class;)*"  >
+
+
+<!--                    ADDRESS/CONTACT INFORMATION                -->
+<!--                    Wrapper element for contact information such 
+                        as address, phone, fax, email, url, country,
+                        etc.                                       -->
+<!ELEMENT  address      %address-model;                              >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 -->
+<!ATTLIST  address
+             id         ID                                 #IMPLIED  >
+
+
+<!--                    ADDRESS LINE ELEMENTS                      -->
+<!--                    Elements for use in the <addr-line> element-->
+<!ENTITY % addr-line-elements 
+                        "%simple-text;"                              > 
+
+
+<!--                    ADDRESS LINE                               -->
+<!--                    One line in an address                     -->
+<!--                    Conversion Note: If the address is 
+                        undifferentiated data characters, the entire
+                        address may be inside one of these elements.
+                                                                   -->
+<!ELEMENT  addr-line    (#PCDATA %addr-line-elements;)*              >
+
+
+<!--                    COUNTRY:  IN AN ADDRESS                    -->
+<!ELEMENT  country      (#PCDATA)                                    >
+
+
+<!--                    EMAIL ADDRESS ELEMENTS                     -->
+<!--                    Elements to be mixed with #PCDATA inside the
+                        <email> element                            -->
+<!ENTITY % email-elements 
+                        " "                                          > 
+
+
+<!--                    EMAIL ADDRESS                              -->
+<!ELEMENT  email        (#PCDATA %email-elements;)*                  >
+<!--         xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                -->
+<!ATTLIST  email
+             %might-link-atts;                                       >
+
+
+<!--                    FAX NUMBER ELEMENTS                        -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <fax>                                    -->
+<!ENTITY % fax-elements
+                        "%just-rendition; "                          >
+
+
+<!--                    FAX NUMBER: IN AN ADDRESS                  -->
+<!ELEMENT  fax          (#PCDATA %fax-elements;)*                    >
+
+
+<!--                    INSTITUTION NAME ELEMENTS                  -->
+<!--                    Elements for use in the <institution> 
+                        element                                    -->
+<!ENTITY % institution-elements 
+                        "| %break.class; | %emphasis.class; |
+                         %subsup.class;"                             > 
+
+
+<!--                    INSTITUTION NAME: IN AN ADDRESS            -->
+<!--                    Name of a institution or organization such as
+                        a university or corporation                -->
+                        
+<!ELEMENT  institution  (#PCDATA %institution-elements;)*            >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                -->
+<!ATTLIST  institution
+             id         ID                                 #IMPLIED  
+             %might-link-atts;                                       > 
+
+
+<!--                    PHONE NUMBER ELEMENTS                      -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <phone number> 
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %just-rendition; is an 
+                        inline mix, the OR bar is already there.   -->
+<!ENTITY % phone-elements
+                        "%just-rendition;"                           >
+
+
+<!--                    PHONE NUMBER: IN AN ADDRESS                -->
+<!--                    A callable phone number in some telephone or
+                        wireless system somewhere in the world.
+                        Typically includes area code; may include
+                        country extension, especially for non-US.  -->
+<!ELEMENT  phone        (#PCDATA %phone-elements;)*                  >
+
+
+<!--                    URI ELEMENTS                               -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <uri>
+                        Design Note: This PE begins with an OR
+                        bar because %just-rendition; begins with an
+                        OR bar.                                    -->
+<!ENTITY % uri-elements
+                        "%just-rendition;"                           >
+
+
+<!--                    URI                                        -->
+<!--                    URI such as a URL that may be used as a 
+                        live link, typically naming a website, such 
+                        as:
+                           <url>http://www.mulberrytech.com</url>
+                        Alternatively the element content may name
+                        the URL, e.g., "Mulberry's Website" and the
+                        "xlink:href" attribute may hold the real
+                        URL.
+                           <url xlink:href="http://www.mulberrytech.
+                           com">Mulberry's Website</url>           
+                        Related Elements: A URI used outside the 
+                        context of an address of contributor should 
+                        be tagged as an External Link <ext-link>   -->
+<!ELEMENT  uri          (#PCDATA %uri-elements;)*                    >
+
+<!ATTLIST  uri
+             %might-link-atts;                                       >
+
+
+<!-- ============================================================= -->
+<!--                    DATE ELEMENTS (PUBLICATION HISTORY)        -->
+<!-- ============================================================= -->
+
+
+<!--                    DATE                                       -->
+<!--                    The elements <day>, <month>, and <year> should 
+                        ALWAYS be numeric values. The date may be 
+                        represented as a string in <string-date>, but
+                        the numeric values should be present whenever
+                        possible.                                  -->
+<!ELEMENT  date         %date-model;                                 >
+<!--         date-type  Attribute should only be used if the date
+                        is one of the known types, otherwise omit
+                        the attribute. Values are:
+                          accepted    - Date manuscript was 
+                                        accepted         
+                          received    - Date manuscript received
+                          rev-request - Date revisions were 
+                                        requested or manuscript 
+                                        was returned
+                          rev-recd    - Date revised manuscript 
+                                        was received               -->
+<!ATTLIST  date
+              date-type (%date-types;)                     #IMPLIED  >
+
+
+<!--                    DAY                                        -->
+<!--                    The numeric value of a day of the month, used
+                        in both article metadata and inside a citation,
+                        in two digits as it would be stated in the "DD" 
+                        in an international date format YYYY-MM-DD, for
+                        example "03", "25".                        -->
+<!ELEMENT  day          (#PCDATA)                                    >
+
+
+<!--                    MONTH                                      -->
+<!--                    Names one of the months of the year. Used in
+                        both article metadata and inside a citation,
+                        this element may contain a full month 
+                        "December", an abbreviation "Dec", or, 
+                        preferably, a numeric month such as "12".
+                        Authoring and Conversion Note: For ease in
+                        comparisons and searching, many archives
+                        prefer that months be converted to numeric
+                        Suggested values:
+                          1 = January
+                          2 = February
+                          3 = March, etc.                          -->
+<!ELEMENT  month        (#PCDATA)                                    >
+
+
+<!--                    SEASON                                     -->
+<!--                    Season of publication, such as "Spring". 
+                        Used in both article metadata and inside a 
+                        citation                                   -->
+<!ELEMENT  season       (#PCDATA)                                    >
+
+
+<!--                    YEAR                                       -->
+<!--                    Year of publication, which should be expressed
+                        as a 4-digit number: "1776" or "1924"      -->
+<!ELEMENT  year         (#PCDATA)                                    >
+
+
+<!--                    STRING DATE ELEMENTS                       -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <string-date> element                  -->
+<!ENTITY % string-date-elements 
+                        " | %date-parts.class;"                      >
+
+
+<!--                    DATE AS A STRING                           -->
+<!--                    This is a representation of the date as a 
+                        string. Usually used for dates for which 
+                        months and years are not given, but may be
+                        used for any date as a string (i.e., "January, 
+                        2001" "Fall 2001" "March 11, 2001".
+                        It is better practice to tag the year
+                        and month as numbers with a date such
+                        as "January, 2001" or "March 11, 2001".    -->
+<!ELEMENT  string-date  (#PCDATA %string-date-elements;)*            >
+
+
+<!-- ============================================================= -->
+<!--                    PERSON'S NAME ELEMENTS (BIBLIOGRAPHIC)     -->
+<!-- ============================================================= -->
+
+
+<!--                    STRING NAME ELEMENTS                       -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <string-name> element                  -->
+<!ENTITY % string-name-elements 
+                        " | %person-name.class;"                     >
+
+
+<!--                    NAME OF PERSON (UNSTRUCTURED)              -->
+<!--                    Wrapper element for personal names where the
+                        stricter format of the <name> element cannot
+                        be followed. This is a very loose element,
+                        allowing data characters, generated text,
+                        and any or all of the naming elements.
+                        Authoring or Conversion Note: Use this
+                        element if the name parts are unknown or 
+                        untagged.                                  -->
+<!ELEMENT  string-name  (#PCDATA %string-name-elements;)*            >
+
+
+<!--                    NAME OF PERSON (STRUCTURED)                -->
+<!--                    Wrapper element for personal names.
+                        Authoring or Conversion Note: If the name
+                        parts are unknown or untagged, names should
+                        be placed within the <string-name> element. 
+                                                                   -->
+<!ELEMENT  name         (surname, given-names?, prefix?, suffix?)    >
+<!--         name-style Used for choosing an inversion algorithm or
+                        for sorting or other processing functions.  The
+                        three values and approximate meanings are:
+                        Value     Display          Sort/Inversion
+                        western   given family     family given
+                        eastern   family given     family given
+                        islensk   given patronymic given, patronymic
+                                                                   -->
+<!ATTLIST  name
+             name-style (western | eastern | islensk)      "western" >
+
+
+<!--                    SURNAME ELEMENTS                           -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <surname>                                
+                        Design Note: This PE begins with an OR
+                        bar because %just-rendition; begins with an
+                        OR bar.                                    -->
+<!ENTITY % surname-elements
+                        "%just-rendition; "                          >
+
+
+<!--                    SURNAME                                    -->  
+<!--                    The surname of an individual.  If there is
+                        only one name, for example, "Cher" or
+                        "Pele", that is considered to be a surname
+                        for consistency purposes.                  -->
+<!ELEMENT  surname      (#PCDATA %surname-elements;)*                >
+<!ATTLIST  surname
+             %surname-atts;                                          >
+
+
+<!--                    GIVEN (FIRST) NAMES ELEMENTS               -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <given-names>                            -->
+<!ENTITY % given-names-elements
+                        "%just-rendition;"                           >
+
+
+<!--                    GIVEN (FIRST) NAMES                        -->
+<!--                    Includes all given names for a person, such 
+                        as the first name, middle names, maiden 
+                        name if used as part of the married name, 
+                        etc.)                                      -->
+<!ELEMENT  given-names  (#PCDATA %given-names-elements;)*            >
+<!ATTLIST  given-names
+             %given-names-atts;                                      >
+
+
+<!--                    PREFIX ELEMENTS                            -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <prefix>  
+                        Design Note: This PE begins with an OR
+                        bar because %just-rendition; begins with an
+                        OR bar.                                    -->
+<!ENTITY % prefix-elements
+                        "%just-rendition;"                           >
+
+
+<!--                    PREFIX                                     -->
+<!--                    Honorifics or other qualifiers that usually 
+                        precede the surname, for example,  Professor, 
+                        Rev., President, Senator, Dr., Sir, The 
+                        Honorable, et al.                          -->
+<!ELEMENT  prefix       (#PCDATA %prefix-elements;)*                 >
+
+
+
+<!--                    SUFFIX ELEMENTS                            -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <suffix>
+                        Design Note: This PE begins with an OR bar,
+                        it is inside %just-rendition;              -->
+<!ENTITY % suffix-elements
+                        "%just-rendition;"                           >
+
+
+<!--                    SUFFIX                                     -->
+<!--                    Text used as a suffix to a person's name, for
+                        example: Sr. Jr. III, 3rd                  -->
+<!ELEMENT  suffix       (#PCDATA %suffix-elements;)*                 >
+
+
+<!-- ============================================================= -->
+<!--                    EXTERNAL LINK ELEMENTS                     -->
+<!-- ============================================================= -->
+
+
+<!--                    EXTERNAL LINK ELEMENTS                     -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        an <ext-link>  
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %link-elements; is an inline
+                        mix, the OR bar is already there.          -->
+<!ENTITY % ext-link-elements
+                        "%link-elements;"                            >
+
+
+<!--                    EXTERNAL LINK                              -->
+<!--                    Link to an external file, such as Medline, 
+                        Genbank, etc.  Linking may be accomplished
+                        using the XLink linking attributes.        -->
+<!ELEMENT  ext-link     (#PCDATA %ext-link-elements;)*               >
+<!--         ext-link-type 
+                        Type of external link. Use this attribute
+                        if the type of external link is one of the
+                        ones below or another named type.
+                        Suggested values include:
+                            aoi         Astronomical Object Identifier 
+                            doi         Digital Object Identifier
+                            ec          Enzyme nomenclature - see
+                              http://www.chem.qmw.ac.uk/iubmb/enzyme/
+                            email       An email message
+                            ftp         File transfer protocol
+                            gen         GenBank identifier
+                            genpept     Translated Protein-encoding
+                                        sequence Database
+                            highwire    HighWire press intrajournal 
+                            medline     Medline or pubmed id
+                            pdb         Protein data bank. See 
+                                        http://www.rcsb.org/pdb/
+                            pgr         Plant gene register. See
+                                        http://www.tarweed.com/pgr/
+                            pir         Protein Information Resource
+                                        see http://pir.georgetown.edu
+                            pirdb       Protein Information Resource 
+                                        see http://pir.georgetown.edu
+                            pmc         Used to link between articles  
+                                        in PubMed Central access is 
+                                        PMID
+                            sprot       Swiss-Prot. See
+                                    http://www.ebi.ac.uk/swissprot/
+                            uri         Website or web service    
+             id         Unique identifier so the element may be
+                        referenced 
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                -->
+<!ATTLIST  ext-link
+             %ext-link-atts;                                         >
+
+
+<!-- ============================================================= -->
+<!--                    STRUCTURAL ELEMENTS                        -->
+<!-- ============================================================= -->
+
+
+<!--                    ATTRIBUTION ELEMENTS                       -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        an attribution                          
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-phrase; is an 
+                        inline mix, the OR bar is already there.   -->
+<!ENTITY % attrib-elements
+                        "%emphasized-text;"                          >
+             
+
+<!--                    ATTRIBUTION                                -->
+<!--                    Source, author, formal thanks, or other 
+                        information (other than copyright material)
+                        concerning the origins of an extract, a poem
+                        <verse-group> or similar element.
+                        Formatting Note: Typically displayed on 
+                        a separate line (or lines, following the 
+                        material it concerns, inheriting that
+                        material's margins.                        --> 
+<!ELEMENT  attrib       (#PCDATA %attrib-elements;)*                 >
+
+
+<!--                    STRUCTURAL TITLE ELEMENTS                  -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <title> element  
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-phrase; is an 
+                        inline mix, the OR bar is already there.   -->
+<!ENTITY % struct-title-elements 
+                        "%simple-phrase; | %break.class;"            > 
+
+
+<!--                    TITLE                                      -->
+<!--                    Heading or title for a structural element
+                        such as a Section, Figure, Boxed Text, etc.-->
+<!ELEMENT  title        (#PCDATA %struct-title-elements;)*           >
+
+
+<!--                    LABEL ELEMENTS                             -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <label> element                        -->
+<!ENTITY % label-elements 
+                        "| %emphasis.class; | %inline-display.class; | 
+                         %inline-math.class; | %subsup.class;"       >
+
+
+<!--                    LABEL OF A FIGURE, REFERENCE, ETC.         -->
+<!--                    The number and any prefix word that comes
+                        before, for example, the caption of a Figure,
+                        such as "Figure 3." or "Exhibit 2.".  This
+                        element can be used to preserve the prefix 
+                        number or label of an element, for example
+                        for a reference or citation "25." or
+                        "[Lapeyre 2002]". This can be useful when
+                        reconstructing untagged cross references.  -->                        
+<!ELEMENT  label        (#PCDATA %label-elements;)*                  >
+
+
+<!-- ============================================================= -->
+<!--                    RELATED ARTICLE ELEMENTS                   -->
+<!-- ============================================================= -->
+
+
+<!--                    RELATED ARTICLE ELEMENTS                   -->
+<!--                    Elements allowed inside <related-article>  -->
+<!ENTITY % related-article-elements 
+                        "| %emphasis.class; |%references.class; | 
+                         %subsup.class;"                             >
+
+
+<!--                    RELATED ARTICLE INFORMATION                -->
+<!--                    Wrapper element, used as a container for 
+                        text links to a related article, possibly 
+                        accompanied by a very brief description
+                        such as "errata (correction)".
+                        Remarks: This element is slightly overloaded
+                        in that it has 2 uses, one inside the article
+                        metadata to name one or more related articles
+                        and as part of the link class, which can
+                        occur many places in textual content.
+                        This allows all such references, to 
+                        companion articles within the same journal,
+                        to a previous part of a multi-part article,
+                        to a news item that summarizes the article,
+                        to the article for which this article is
+                        a correction or addendum, etc. wherever 
+                        these occur in the article.                -->
+<!ELEMENT  related-article
+                        (#PCDATA %related-article-elements;)*        >
+<!--         id         Unique identifier so the element may be
+                        referenced
+             alt-form-of
+                        Exactly like the "alt-form-of" attribute
+                        used with <graphic>, this is an IDREF
+                        attribute which points to the ID of another
+                        <related-article> in the same document
+                         rather than to an external file.
+             related-article-type
+                          addendum      Additional material for an
+                                        article, generated too late
+                                        to be added to the main text
+                          commentary-article   
+                                        Used in an commentary or
+                                        editorial to link to the 
+                                        article on which it is
+                                        commenting
+                          companion     Used in an article to link 
+                                        to a companion (related
+                                        or sibling article) 
+                          corrected-article
+                                        Used in a correction to link 
+                                        to the article being 
+                                        corrected. Sometimes called
+                                        erratum.
+                          in-this-issue Related article in the same
+                                        journal issue
+                          letter        A letter to the publication
+                                        or a reply to such a
+                                        letter
+                          commentary    Used in an article to link 
+                                        to its associated commentary
+                                        or editorial 
+                          correction-forward
+                                        Used in an article to link
+                                        forward to its associated 
+                                        correction (rarely used)
+                          retracted-article
+                                        Used in a retraction to link 
+                                        to the article being retracted
+                          retraction-forward   
+                                        Used in an article to link
+                                        forward to its associated 
+                                        retraction (rare)
+             vol        Volume of the journal in which the related
+                        article exists.  The scope is limited to
+                        the journal; in which the current article
+                        resides.
+             page       Page number of the related article. Scope
+                        is limited to the volume of the journal in 
+                        which the current article resides. The 
+                        values should be a first page or a page 
+                        range.
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename.               -->
+<!ATTLIST  related-article
+             %related-article-atts;                                  >
+
+
+<!-- ============================================================= -->
+<!--                    FRONT MATTER/BACK MATTER ELEMENTS          -->
+<!-- ============================================================= -->
+
+
+<!--                    ACKNOWLEDGMENTS MODEL                      -->
+<!--                    Content model for the <ack> element        -->
+<!ENTITY % ack-model    "%sec-opt-title-model;"                      > 
+
+
+<!--                    ACKNOWLEDGMENTS                            -->
+<!ELEMENT  ack          %ack-model;                                  >
+<!--         id         Unique identifier so the element may be
+                        referenced                                
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.                      -->
+<!ATTLIST  ack                                
+             id         ID                                 #IMPLIED
+             content-type
+                        CDATA                              #IMPLIED  >
+
+
+<!--                    BIOGRAPHY MODEL                            -->
+<!--                    Content model for the <bio> element        -->
+<!ENTITY % bio-model    "(title?, (%just-para.class;)+ ) "           > 
+
+
+<!--                    BIOGRAPHY                                  -->
+<!--                    Sort biography of a person, usually the
+                        author                                     
+                        Authoring Note:  The "xlink:href" attribute
+                        may be used to point to a graphic of the
+                        author or to his/her website, etc.         -->
+<!ELEMENT  bio          %bio-model;                                  >      
+<!--         id         Unique identifier so the element may be
+                        referenced 
+             rid        Points to the identifier of an author or
+                        authors                               
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename.               -->
+<!ATTLIST  bio
+             %bio-atts;                                              >
+
+
+<!--                    NOTES MODEL                                -->
+<!--                    Content model for the <notes> element      -->
+<!ENTITY % notes-model  "%sec-opt-title-model;"                      > 
+             
+
+<!--                    NOTES                                      -->
+<!--                    A container element for the notes that may
+                        appear at the end of an Article or at the 
+                        end of a Table.  For example, a typical
+                        end-of-article note is a "Note in Proof". 
+                        A Note in Proof contains late-breaking news 
+                        items or other material produced while the 
+                        article was  being typeset or was otherwise 
+                        in production, that therefore happened too 
+                        late to be included in the text of the 
+                        article. This is typically NOT peer-reviewed
+                        content and citations to other material is
+                        usually just integrated into the text, not
+                        listed separately in a References List.    -->
+<!ELEMENT  notes        %notes-model;                                >      
+<!--         id         Unique identifier so the element may be
+                        referenced                                 
+             notes-type To identify the type of note, if the type can
+                        be/has been identified, for example,
+                        "note-in-proof"                            -->                     
+<!ATTLIST  notes
+             %notes-atts;                                            >
+
+
+<!-- ============================================================= -->
+<!--                    ACCESSIBILITY ELEMENTS                     -->
+<!-- ============================================================= -->
+
+
+<!--                    ALTERNATE TITLE TEXT FOR A FIGURE, ETC.    -->
+<!--                    Short phrase used to display or pronounce 
+                        as an alternative to providing the full
+                        graphic for accessibility display or 
+                        graphic-limited websites or devices. For 
+                        example, <alt-text> may be used to display 
+                        "behind" a figure or graphic.   
+                        Authoring and Conversion Note: Not to be used
+                        as a replacement for <caption>.            -->
+<!ELEMENT  alt-text     (#PCDATA)                                    >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->             
+<!ATTLIST  alt-text
+             id         ID                                 #IMPLIED  
+             xml:lang   NMTOKEN                            #IMPLIED  > 
+
+
+<!--                    LONG DESCRIPTION ELEMENTS                  -->
+<!--                    Elements to be mixed with data characters
+                        inside the <long-desc> element             -->
+<!ENTITY % long-desc-elements
+                        " "                                          >
+
+
+<!--                    LONG DESCRIPTION                           -->
+<!--                    Description or summary of the content of a 
+                        graphical object, table, or textual object
+                        such as a text box, used by some systems to
+                        make the object accessible, even to people
+                        or systems that cannot read/see/display the
+                        object.                                  
+                        Authoring and Conversion Note: Not to be used
+                        as a replacement for <caption>.            -->
+<!ELEMENT  long-desc    (#PCDATA %long-desc-elements;)               >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->                 
+<!ATTLIST  long-desc
+             id         ID                                 #IMPLIED  
+             xml:lang   NMTOKEN                            #IMPLIED  > 
+
+
+<!--                    DEFINITION LIST: DEFINITION MODEL          -->
+<!--                    Content model for the <def> element        -->
+<!ENTITY % def-model    "((%just-para.class;)+ )"                    > 
+
+  
+<!--                    DEFINITION LIST: DEFINITION                -->
+<!--                    Used in two senses:
+                        1) The definition, description, or other
+                        explanation of the word, phrase, or picture
+                        of a 2-part or definition list
+                        2) The definition or expansion of an
+                        abbreviation or acronym <abbrev>           -->
+<!ELEMENT  def          %def-model;                                  >
+<!--         id         Unique identifier so the element may be
+                        referenced 
+             rid        Points to the identifier of a term,
+                        so that a term and definition may be linked
+                                                                   -->
+<!ATTLIST  def                                
+             rid        IDREFS                             #IMPLIED 
+             id         ID                                 #IMPLIED  >
+
+
+<!-- ============================================================= -->
+<!--                    CUSTOMIZED METADATA ELEMENTS               -->
+<!-- ============================================================= -->
+
+
+<!--                    CUSTOMIZED METADATA WRAPPER MODEL          -->
+<!--                    Content model for the <custom-meta-wrap> 
+                        element                                    -->
+<!ENTITY % custom-meta-wrap-model
+                        "(custom-meta+)"                             > 
+
+
+<!--                    METADATA DATA NAME FOR CUSTOM METADATA     -->
+<!--                    Some DTDs and schemas allow for metadata
+                        above and beyond that which can be specified
+                        by this DTD. This element is a wrapper 
+                        element used to contain all these additional
+                        metadata elements.                          -->
+<!ELEMENT  custom-meta-wrap
+                        %custom-meta-wrap-model;                      >
+
+
+<!--                    CUSTOMIZED METADATA MODEL                  -->
+<!--                    Content model for the <custom-meta> element-->
+<!ENTITY % custom-meta-model
+                        "(meta-name, meta-value)"                    > 
+
+
+<!--                    CUSTOM METADATA                            -->
+<!--                    Some DTDs and schemas allow for metadata
+                        above and beyond that which can be specified
+                        by this DTD. This element is used to capture
+                        metadata elements that have not been defined
+                        explicitly in the models for this DTD, so 
+                        that the intellectual content will not be lost.
+                        REMARKS:  The <custom-meta> element allows 
+                        for an infinite number of name/value pairs,
+                        with few constraints on the length or
+                        content of the value. This element will 
+                        probably be used for special cases, product-
+                        specific material, or other unusual
+                        metadata, for example the journal-history
+                        information preserved in at least one
+                        publisher's DTD.                           -->
+<!ELEMENT  custom-meta  %custom-meta-model;                          >
+
+
+<!--                    METADATA DATA NAME ELEMENTS                -->
+<!--                    Elements that may be used, along with data
+                        characters, inside the <meta-name> element 
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-phrase; is an 
+                        inline mix, the OR bar is already there.   -->
+<!ENTITY % meta-name-elements
+                        "%simple-phrase;"                            > 
+
+
+<!--                    METADATA DATA NAME FOR CUSTOM METADATA     -->
+<!--                    Some DTDs and schemas allow for metadata
+                        above and beyond that which can be specified
+                        by this DTD. The <custom-meta> element
+                        allow for an infinite number of name/value
+                        pairs, with few constraints on the length or
+                        content of the value. This element contains
+                        the name of the metadata field.            -->
+<!ELEMENT  meta-name    (#PCDATA %meta-name-elements;)*              >
+
+
+<!--                    METADATA DATA VALUE ELEMENTS               -->
+<!--                    Elements that may be used, along with data
+                        characters, inside the <meta-value> element 
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-phrase; is an 
+                        inline mix, the OR bar is already there.   -->
+<!ENTITY % meta-value-elements
+                        "%simple-phrase;"                            > 
+   
+
+<!--                    METADATA DATA NAME FOR CUSTOM METADATA     -->
+<!--                    Some DTDs and schemas allow for metadata
+                        above and beyond that which can be specified
+                        by this DTD. The <custom-meta> element
+                        allow for an infinite number of name/value
+                        pairs, with few constraints on the length or
+                        content of the value. This element contains
+                        the value of the metadata field that is named
+                        by the <meta-name> element.                -->
+<!ELEMENT  meta-value   (#PCDATA %meta-value-elements;)*             >
+
+
+
+<!-- ============================================================= -->
+<!--                    GENERATED TEXT OR PUNCTUATION              -->
+<!-- ============================================================= -->
+
+
+<!--                    X ELEMENTS                                 -->
+<!--                    Elements for use inside the <x> element    -->
+<!ENTITY % x-elements   " "                                          > 
+
+
+<!--                    X - GENERATED TEXT AND PUNCTUATION         -->
+<!--                    A container element to hold punctuation or
+                        other generated text, typically when 1) an
+                        archive decides not to have any text
+                        generated and thus to pre-generate such 
+                        things as commas or semicolons between 
+                        keywords or 2) when an archive receives text
+                        with <x> tags embedded and wishes to retain
+                        them.
+                          Remarks: This element is called "x" for
+                        historical reasons, since DTDs that use an
+                        element for generated punctuation have
+                        typically called it that.                  -->
+<!ELEMENT  x            (#PCDATA %x-elements;)*                      >
+
+
+<!-- ================== End Common (Shared) Elements Module ====== -->
+     
diff --git a/code/lib/Bio/Entrez/DTDs/default-classes.ent b/code/lib/Bio/Entrez/DTDs/default-classes.ent
new file mode 100644
index 0000000..81d1155
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/default-classes.ent
@@ -0,0 +1,704 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Default Element Classes Module                    -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Default Element Classes Module v2.0 20040830//EN"
+Delivered as file "default-classes.ent"                            -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Journal Archiving and Interchange DTD of the      -->
+<!--             Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    To declare the Parameter Entities (PEs) used to   -->
+<!--             define the default element classes. Classes are   -->
+<!--             OR-groups of elements that are defined together   -->
+<!--             to be used in mixes and in Element Declarations.  -->
+<!--                                                               -->
+<!--             Note: Since PEs must be declared before they      -->
+<!--             are used, this module must be called before all   -->
+<!--             content modules that declare elements, and after  -->
+<!--             the class customization module (if any).          -->
+<!--                                                               -->
+<!-- CONTAINS:   PEs that define the element classes to be used    -->
+<!--             in the Journal Archive and Interchange DTD Suite  -->
+<!--             modules.                                          -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital Archive of Journal Articles               -->
+<!--             National Center for Biotechnology Information     -->
+<!--                (NCBI)                                         -->
+<!--             National Library of Medicine (NLM)                -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             August 2004                                       -->
+<!--                                                               -->
+<!-- CREATED BY: Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             B. Tommie Usdin (Mulberry Technologies, Inc.)     -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             this DTD should be sent in email to:              -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+ 13. ACCESS CLASS
+     ### Customization Alert ###
+     a. Took <ext-link> out of -%access.class;. It did not belong.
+        It belongs in -%address-link.class;
+
+ 12. PARA CLASS
+     ### Customization Alert ###
+     a. Deleted -%para.class; 
+     b. Its place in the definition of the Paragraph <p> element 
+        will be taken by the -%p-elements; entity.
+     c. Its place in other mixes will be taken by the combination
+        of:
+        - %just-para.class; and 
+        - %rest-of-para.class;
+
+ 11. LINK CLASSES
+     ### Customization Alert ###
+     a. Deleted the following classes: 
+        - %link.class; 
+        - %inpara-address; 
+        - %ext-links.class;
+     b. Replaced above with four link classes: 
+           - %address-link.class; (the links used in addresses)
+           - %fn-link.class;      (just fn)
+           - %simple-link.class;  (the internal links, same)
+           - %article-link.class; (links for journal article)
+        as follows:
+           - All occurrences of -%ext-links.class; replaced with
+             the new class %address-links.class;     
+           - All occurrences of -%link.class; replaced with some
+             combination of the new link classes just named.
+        These were not usually DTD changes, just parameterization
+        changes.
+
+ 10. PERSON NAME/STRING-NAME - Allowed a looser name model 
+     <string-name> to be used anywhere <name> is used. This 
+     includes:
+      - Inside %references.class; 
+         (Therefore added to <citation>, <product>, 
+          <related-article>)
+      - New Parameter Entity %name.class; for types
+        of names: <name>, <string-name>, <collab>
+      - Inside %contrib-model; (which uses %name.class; thus
+        adding <string-name> to <contributor>)
+      - Inside <person-group> (in %name.class;)
+        
+  9. ROLE ELEMENT - Was added to the default references class 
+     %references.class;
+
+  8. DATES
+     a. Added new class -%date-parts.class; to hold all the
+        potential components of date, such as <year>, <day>,
+        etc.
+     b. Added <string-date> to default -%date.class;
+     
+  7. NEW IDENTIFICATION ELEMENTS
+     a. Added three new identification elements:
+      - ISSUE ID = new element for an identifier such as a DOI (as
+        opposed to an <issue> Issue Number) associated with a 
+        journal issue
+      - ISSUE TITLE = new element for a theme or special issue title
+      - VOLUME ID = new element for a theme or special issue title
+     b. These identification elements were:
+        - Added to <article-meta>
+        - Added to %references.class; (thus to <citation>,
+            <related-article>, and <product>)
+        - Defined in common module
+        - Added to several new Parameter Entities that are
+          named %nnn-elements; (Such Parameter Entities hold
+          the elements that may be mixed with #PCDATA inside
+          the element named "nnn".)
+
+  6. REFERENCES.CLASS - Added the following elements to the
+     - %references.class;:
+      - <issue-id>
+      - <issue-title>
+      - <page-range>
+      - <role>
+      - <string-name>
+      - <volume-id>
+      Thus adding them to:
+      - <citation>
+      - <related-article>
+      - <product>
+
+  5. DISPLAY BACK MATTER - Added <attrib> to %display-back-matter;
+     thus adding it to the following elements:
+      - <array>
+      - <boxed-text>
+      - <chem-struct-wrapper>
+      - <disp-quote>
+      - <fig>
+      - <graphic>
+      - <preformat>
+      - <table-wrap>
+      - <verse-group>
+        
+  4. ADDRESS ELEMENTS CLASS 
+     ### Customization Alert ###
+     - Redefined to remove <email> and <uri>       
+
+  3. MAKE CLASS NAMES EXPLICIT - Some classes did not have the
+     ".class" suffix. Renamed such classes to add the suffix.
+     ### Customization Alert ###
+     - %address-elements;    ==> to  -%address.class;
+     - %contrib-info;        ==> -%contrib-info.class;
+                                    (uri added to class)
+     - %display-back-matter; ==> -%display-back-matter.class;
+     - %block-math;          ==> -%block-math.class;
+     - %inline-math;         ==> -%inline-math.class;
+
+  2. NEW CLASSES - To correct potential classing problems, added 
+     the following new Parameter Entities and added a few new 
+     comments:
+     
+     New Classes:
+        - %app.class;
+        - %corresp.class;
+        - %def.class;
+        - %degree.class;
+        - %fn-link.class;
+        - %front-back.class; and %sec-back.class;
+             changed the following to use them
+               %doc-back-matter-mix;
+               %sec-back-matter-mix;
+        - %just-base-display.class;
+             and used it in 
+               %chem-struct-wrapper-model;
+               %array-model;
+               %fig-group-model; 
+        - %just-para.class;
+             used in <author-comment>, <bio>, <def>, 
+               <caption>, %fig-model;
+        - %just-table.class;
+        - %kwd.class;
+        - %back.class;
+        - %front-back.class;
+        - %ref-list.class;
+        - %sec-back.class;
+        - %table-foot.class;
+        - %tbody.class;
+                
+  1. Created this module as version "v2.0 20040830"                -->
+
+
+<!-- ============================================================= -->
+<!--                    CLASSES FOR COMMON ELEMENTS (%common.ent;) -->
+<!-- ============================================================= -->
+
+
+<!--                    ADDRESS CLASS ELEMENTS                     -->
+<!--                    Potential element components of an address;
+                        not a proper class                         -->
+<!ENTITY % address.class
+                        "addr-line | country | fax | 
+                         institution | phone "                       >
+
+
+<!--                    CITATION CLASS ELEMENTS                    -->
+<!--                    Reference to an external document, as used 
+                        within, for example, the text of a 
+                        paragraph                                  -->
+<!ENTITY % citation.class   
+                        "citation"                                   >
+
+
+<!--                    DEFINITION CLASS ELEMENTS                  -->
+<!--                    Definitions and other elements to match
+                        with terms and abbreviations               -->
+<!ENTITY % def.class    "def"                                        >
+
+
+<!--                    DEGREE CLASS                               -->
+<!--                    The academic or professional degrees that
+                        accompany a person's name                  -->
+<!ENTITY % degree.class "degrees"                                    >
+
+
+<!--                    IDENTIFIER CLASS ELEMENTS                  -->
+<!--                    DOIs and other identifiers are used by
+                        publishers at many levels, for example for
+                        an <abstract> or a <figure>.               -->
+<!ENTITY % id.class     "object-id"                                  >
+
+
+<!--                    LABEL CLASS                                -->
+<!--                    The label element, used to hold the number
+                        or character of a labeled object such as a
+                        table or footnote                          -->
+<!ENTITY % label.class  "label"                                      >
+
+
+<!--                    NAMES CLASS                                -->
+<!--                    The elements used to name the personal names
+                        for individuals and the collaboration names
+                        for groups                                 -->
+<!ENTITY % name.class   "collab | name | string-name"                >
+
+
+<!--                    PERSONAL NAMES CLASS                       -->
+<!--                    The element components of a person's name,
+                        for the name of a contributor              -->
+<!ENTITY % person-name.class
+                        "given-names | prefix | surname | suffix"    >
+
+
+<!-- ============================================================= -->
+<!--                    ARTICLE METADATA CLASSES %articlemeta.ent; -->
+<!-- ============================================================= -->
+
+
+<!--                    CONTRIBUTOR INFORMATION                    -->
+<!--                    Metadata about a contributor               -->
+<!ENTITY % contrib-info.class
+                        "address | aff | author-comment | bio |  
+                         email |  etal | ext-link | on-behalf-of |
+                         role | uri | xref"                          >
+
+
+<!--                    CONFERENCE CLASS                           -->
+<!--                    The element components of the description
+                        of a conference; not a proper class        -->
+<!ENTITY % conference.class   
+                        "conf-date | conf-name | conf-num | 
+                         conf-loc | conf-sponsor | conf-theme |
+                         conf-acronym"                               >
+
+
+<!--                    CORRESPONDING AUTHOR CLASS                 -->
+<!--                    Elements associated with the corresponding
+                        author                                     -->
+<!ENTITY % corresp.class
+                        "corresp"                                    >
+
+
+<!--                    DATE CLASS ELEMENTS                        -->
+<!--                    Dates and other matters of history         -->
+<!ENTITY % date.class   "date | string-date"                         >
+
+
+<!--                    DATE PARTS CLASS ELEMENTS                  -->
+<!--                    Components of date-style elements          -->
+<!ENTITY % date-parts.class   
+                        "day | month | season | year"                >
+
+
+<!--                    KEYWORD CLASS ELEMENTS                     -->
+<!--                    Keywords and any keyword-synonyms          -->
+<!ENTITY % kwd.class    "kwd"                                        >
+
+
+<!-- ============================================================= -->
+<!--                    BACK MATTER CLASSES (%backmatter.ent;)     -->
+<!-- ============================================================= -->
+
+
+<!--                    JUST APPENDIX CLASS                        -->
+<!--                    The appendix and only the appendix         -->
+<!ENTITY % app.class   
+                        "app"                                        >
+
+
+<!--                    BACK MATTER CLASS                          -->
+<!--                    Ancillary elements, typically used in the
+                        back matter of an article, section, etc.   -->
+<!ENTITY % back.class   "ack | app-group | bio | fn-group | 
+                         glossary |  ref-list"                       >
+
+
+<!--                    FRONT MATTER CLASS                         -->
+<!--                    Ancillary elements, typically used in the
+                        front matter of an article, book, etc.  .  -->
+<!ENTITY % front.class  "ack | bio | fn-group | glossary"            >   
+
+
+<!--                    FRONT AND BACK CLASS                       -->
+<!--                    Ancillary elements, typically used in the
+                        front or back matter of an article         -->
+<!ENTITY % front-back.class
+                        "notes"                                      >
+
+
+<!--                    SECTION BACK MATTER CLASS                  -->
+<!--                    Ancillary elements, typically used in the
+                        back matter of a section, etc.             -->
+<!ENTITY % sec-back.class   
+                        "fn-group | glossary |  ref-list"            >
+
+
+<!--                    JUST REF-LIST CLASS                        -->
+<!--                    The reference list and only the reference
+                        list (The element <ref-list> is defined in
+                        %references.ent;.)                         -->
+<!ENTITY % ref-list.class   
+                        "ref-list"                                   >
+
+
+<!-- ============================================================= -->
+<!--                    DISPLAY CLASSES                            -->
+<!-- ============================================================= -->
+
+
+<!--                    ACCESSIBILITY CLASS ELEMENTS               -->
+<!--                    Elements added to make it easier to process
+                        journal articles in ways that are more
+                        accessible to people and devices with special
+                        needs, for example the visually handicapped.
+                          <alt-text> is a short phrase description of
+                        an objects, <long-desc> is a more complete
+                        description of the content or intent of an
+                        object, and the <ext-link> in this grouping
+                        would point to an alternate form of the
+                        object.                                    -->
+<!ENTITY % access.class "alt-text | long-desc"                       >
+
+
+<!--                    DISPLAY CLASS ELEMENTS                     -->
+<!--                    Graphical or other image-related elements.
+                        The display elements may occur within 
+                        the text of a table cell or paragraph
+                        although they are typically at the same 
+                        hierarchical level as a paragraph.         -->
+<!ENTITY % block-display.class
+                        "array | boxed-text | chem-struct |
+                         chem-struct-wrapper | fig | fig-group | 
+                         graphic | media | preformat | 
+                         supplementary-material | table-wrap |
+                         table-wrap-group"                           >
+
+
+<!--                    CAPTION DISPLAY ELEMENTS                   -->
+<!--                    Basic figure display elements              -->
+<!ENTITY % caption.class
+                        "caption"                                    >
+
+
+<!--                    DISPLAY ELEMENT BACK MATTER ELEMENTS       -->
+<!--                    Miscellaneous stuff at the end of a display
+                        element such as a figure or a boxed text
+                        element such as a sidebar                  -->
+<!ENTITY % display-back-matter.class
+                        "attrib | copyright-statement"               >
+                            
+
+<!--                    FIGURE DISPLAY ELEMENTS                    -->
+<!--                    Basic figure display elements              -->
+<!ENTITY % fig-display.class
+                        "fig"                                        >
+
+
+<!--                    INLINE DISPLAY CLASS ELEMENTS              -->
+<!--                    Non-block display elements that set or
+                        display inline with the text               -->
+<!ENTITY % inline-display.class
+                        "inline-graphic | private-char"              >
+                            
+
+<!--                    MOST BASIC DISPLAY ELEMENTS                -->
+<!--                    Just the few display elements that are
+                        simple display objects: a picture, a movie,
+                        a sound file.                              -->
+<!ENTITY % just-base-display.class
+                        "graphic | media"                            >
+                            
+
+<!--                    SIMPLE DISPLAY ELEMENTS                    -->
+<!--                    The simplest and most basic of the Display
+                        Class elements, which may be allowed in many
+                        places, for example, inside other Display
+                        Class elements or inside the cell of a
+                        Table                                      -->
+<!ENTITY % simple-display.class
+                        "array | chem-struct | graphic | media | 
+                         preformat"                                  >
+
+
+<!--                    SIMPLE TABLE DISPLAY ELEMENTS              -->
+<!--                    Very similar to the simple-display.class, but
+                        Table Wrappers <table-wrap> should contain
+                        <table>s, <oasis:table>s, etc., not
+                        arrays.                                    -->
+<!ENTITY % simple-intable-display.class
+                        "chem-struct | graphic | media | preformat"  >
+
+
+<!-- ============================================================= -->
+<!--                    FORMAT CLASSES (%format.ent;)              -->
+<!-- ============================================================= -->
+
+
+<!--                    APPEARANCE CLASS ELEMENTS                  -->
+<!--                    Names those elements (inherited from the
+                        XHTML table DTD that are only concerned with
+                        appearance, not with structure or content.
+                        Use of these elements is to be discouraged.-->
+<!ENTITY % appearance.class   
+                        "font | hr"                                  >
+
+
+<!--                    FORCED BREAK FORMATTING CLASS ELEMENTS     -->
+<!--                    Element to force a formatting break such as
+                        a line break                               -->
+<!ENTITY % break.class  "break"                                      >
+
+
+<!--                    EMPHASIS/RENDITION ELEMENTS                -->
+<!--                    Elements concerning with marking the location
+                        of typographical emphasis (highlighting)
+                        DTD DESIGN NOTE: There are no emphasis
+                        elements for <fractur>, <openface> (black
+                        board), <script>, etc. because this DTD
+                        recommends the use of the STIX extensions
+                        to accomplish this, as soon as they are 
+                        available.                                  -->
+<!ENTITY % emphasis.class   
+                        "bold | italic | monospace | 
+                         overline | overline-start | overline-end | 
+                         sc | strike | underline | 
+                         underline-start | underline-end "           >
+
+
+<!--                    UP/DOWN RENDITION ELEMENTS                 -->
+<!ENTITY % subsup.class "sub | sup"                                  >
+
+
+<!-- ============================================================= -->
+<!--                    LINK CLASSES (%link.ent;)                  -->
+<!-- ============================================================= -->
+
+
+<!--                    ADDRESS LINK CLASS ELEMENTS                -->
+<!--                    Link elements that can be used inside 
+                        addresses. This is essentially the three
+                        generic external links.
+                        (Note: in earlier releases, this Parameter
+                        Entity was named %address-elements;,
+                        although it functioned as a class.)        -->
+<!ENTITY % address-link.class   
+                        "email | ext-link | uri"                     >
+
+
+<!--                    JOURNAL ARTICLE LINK CLASS ELEMENTS        -->
+<!--                    Links used inside journal articles, to point
+                        to related material                        -->
+<!ENTITY % article-link.class   
+                        "inline-supplementary-material | 
+                         related-article"                            >
+
+
+<!--                    FOOTNOTE LINKS CLASS                       -->
+<!--                    Only the most basic, internal links        -->
+<!ENTITY % fn-link.class   
+                        "fn"                                         >
+
+
+<!--                    RELATED ARTICLE LINKS CLASS                -->
+<!--                    For using <related-article> at the paragraph
+                        level                                      -->
+<!ENTITY % related-article.class   
+                        "related-article"                            >
+
+
+<!--                    SIMPLE LINKS/CROSS-REFERENCES CLASS        -->
+<!--                    The smaller and simpler linking elements
+                        that might be inside, for example, a
+                        Keyword <kwd>                              -->
+<!ENTITY % simple-link.class   
+                        "fn | target | xref"                         >
+
+
+<!-- ============================================================= -->
+<!--                    LIST CLASSES (%list.ent;)                  -->
+<!-- ============================================================= -->
+
+
+<!--                    LIST CLASS ELEMENTS                        -->
+<!--                    All the types of lists that may occur 
+                        as part of the text, therefore excluding
+                        Bibliographic Reference Lists <ref-list>   -->
+<!ENTITY % list.class   "def-list | list"                            >
+
+
+<!-- ============================================================= -->
+<!--                    MATH CLASSES (%math.ent;)                  -->
+<!-- ============================================================= -->
+
+
+<!--                    MATHEMATICAL EXPRESSIONS AND FORMULAE MIX  -->
+<!ENTITY % block-math.class
+                        "disp-formula"                               >
+
+
+<!--                    INLINE MATHEMATICAL EXPRESSIONS MIX        -->
+<!ENTITY % inline-math.class
+                        "inline-formula"                             >
+
+
+<!--                    MATHEMATICAL EXPRESSIONS                   -->
+<!ENTITY % math.class   "tex-math | mml:math"                        >
+
+
+<!-- ============================================================= -->
+<!--                    PARAGRAPH CLASSES (%para.ent;)             -->
+<!-- ============================================================= -->
+
+
+<!--                    REST OF PARAGRAPH CLASS                    -->
+<!--                    Information for the reader that is at the
+                        same structural level as a Paragraph.
+                        Contains all paragraph-level objects that are
+                        not also used inside tables and excepting
+                        also the paragraph element itself          -->
+<!ENTITY % rest-of-para.class   
+                        "ack | disp-quote | speech | statement | 
+                         verse-group"                                >
+
+
+<!--                    IN TABLE PARAGRAPH CLASS                   -->
+<!--                    The simpler of the paragraph-level elements
+                        that might be found inside a table cell    -->
+<!ENTITY % intable-para.class                                   
+                        "disp-quote | speech | statement | 
+                         verse-group"                                >
+
+
+<!--                    JUST PARAGRAPH CLASS                       -->
+<!--                    To hold the Paragraph element, alone.      -->
+<!ENTITY % just-para.class   
+                        "p"                                          >
+
+
+<!-- ============================================================= -->
+<!--                    PHRASE CLASSES (%phrase.ent;)              -->
+<!-- ============================================================= -->
+
+
+<!--                    PHRASE CLASS ELEMENTS                      -->
+<!--                    Small inline elements, that surround a word
+                        or phrase in the text because the subject
+                        (content) should be identified as something
+                        special or different                       -->
+<!ENTITY % phrase.class "abbrev | named-content"                     >
+
+
+<!-- ============================================================= -->
+<!--                    REFERENCES CLASSES (%references.ent;)      -->
+<!-- ============================================================= -->
+
+
+<!--                    BIBLIOGRAPHIC REFERENCE (CITATION) CLASS   -->
+<!--                    The elements that may be included inside a
+                        Citation (bibliographic reference)         -->
+<!ENTITY % references.class
+                        "access-date | annotation | article-title | 
+                         collab | comment | 
+                         conf-date | conf-loc | conf-name | 
+                         day | edition | email | elocation-id | 
+                         etal | ext-link | fpage |  gov | isbn | 
+                         issn | issue | issue-id | issue-title | 
+                         lpage | month | name | object-id |  
+                         page-count | page-range | 
+                         patent | person-group | pub-id | 
+                         publisher-loc | publisher-name | 
+                         role | season |
+                         series | source | std | string-name |
+                         supplement | time-stamp | trans-source | 
+                         trans-title | uri |
+                         volume | volume-id | year"                  >
+
+
+
+<!--                    JUST REF-LIST CLASS                        -->
+<!--                    The reference list and only the reference
+                        list                                       -->
+<!ENTITY % ref-list.class   
+                        "ref-list"                                   >
+
+
+<!-- ============================================================= -->
+<!--                    SECTION CLASS (%section.ent;)              -->
+<!-- ============================================================= -->
+
+<!--                    SECTION CLASS ELEMENTS                     -->
+<!--                    Information for the reader that is at the
+                        same structural level as a Section, which is
+                        a headed structure full of smaller elements
+                        such as paragraphs.                        -->
+<!ENTITY % sec.class    "sec"                                        >
+
+
+<!-- ============================================================= -->
+<!--                    TABLE MODEL CLASSES                        -->
+<!-- ============================================================= -->
+                            
+
+<!--                    JUST TABLE CLASS                           -->
+<!--                    To include just a table <tale-wrap> 
+                        element                                    -->
+<!ENTITY % just-table.class
+                        "table-wrap"                                 >
+
+
+<!--                    TABLE CLASS ELEMENTS                       -->
+<!--                    Elements that will be used to contain the
+                        rows and columns inside the Table Wrapper 
+                        element <table-wrap>.  The following elements 
+                        can be set up for inclusion:
+                          XHTML Table Model    table               -->
+<!ENTITY % table.class  "table"                                      >
+                            
+
+<!--                    TABLE FOOT CLASS                           -->
+<!--                    Elements to include at the end of a table
+                        in the table.                              -->
+<!ENTITY % table-foot.class
+                        "table-wrap-foot"                            >
+                            
+
+<!--                    TBODY CLASS                                -->
+<!--                    To include just a table <tale-wrap> 
+                        element                                    -->
+<!ENTITY % tbody.class  "tbody"                                      >
+
+              
+              
+<!-- ================== End Journal Suite Default Classes  ======= -->
diff --git a/code/lib/Bio/Entrez/DTDs/default-mixes.ent b/code/lib/Bio/Entrez/DTDs/default-mixes.ent
new file mode 100644
index 0000000..2f28bd9
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/default-mixes.ent
@@ -0,0 +1,357 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Default Element Mixes Module                      -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Default Element Mixes Module v2.0 20040830//EN"
+Delivered as file "default-mixes.ent"                              -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Journal Archiving and Interchange DTD of the      -->
+<!--             Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Declares default values for all the element mixes -->
+<!--             used in the content models of the DTD Suite       -->
+<!--                                                               -->
+<!--             Mixes are Or-groups of classes, used in many      -->
+<!--             different content models. Mixes should not use    -->
+<!--             element names directly, only through classes.     -->
+<!--                                                               -->
+<!--             Note: Since PEs must be declared before they      -->
+<!--             are used, this module must be called after the    -->
+<!--             customize mixes module (if any).                  -->
+<!--                                                               -->
+<!-- CONTAINS:   1) PEs that define the element mixes (such as     -->
+<!--                phrase-level elements, para-level elements,    -->
+<!--                inside-a-table-cell elements)                  -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital Archive of Journal Articles               -->
+<!--             National Center for Biotechnology Information     -->
+<!--                (NCBI)                                         -->
+<!--             National Library of Medicine (NLM)                -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             August 2004                                       -->
+<!--                                                               -->
+<!-- CREATED BY: Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             B. Tommie Usdin (Mulberry Technologies, Inc.)     -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             this DTD should be sent in email to:              -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-07-30)
+     
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  3. INLINE MATH CLASS - Added to everywhere %inline-display; was 
+     including:
+      - %simple-text; (via %all-phrase;)
+      - %emphasized-text; (via %all-phrase;)
+      
+  2. PARAMETER ENTITY CLEANUP AND REGULARIZATION
+
+     a. RENAME EXISTING CLASSES
+        ### Customization Alert ###
+        Some classes did not have the ".class" suffix. Changed the 
+        names to add the class suffix:
+        - %block-math.class; used in 
+           - %para-level;
+           - %inside-cell; 
+           - %inside-para; [now renamed -%p-elements;] 
+        - %inline-math.class; used in 
+           - %emphasized-text;
+           - %inside-cell; 
+           - %simple-phrase; and
+           - %inside-para; [now renamed -%p-elements;])
+ 
+     b. MODIFY PARA-LEVEL MIX
+        ### Customization Alert ###
+        New Parameter Entities in %para-level; (No model change.)
+        - %para.class; is replaced by the combination of:
+           - %just-para.class; and
+           - %rest-of-para.class;
+
+     c. LINK CLASSES
+        - Replaced %link.class; with the following classes (no DTD 
+          change):
+           - %address-link.class;  (external links in addresses)
+           - %article-link.class;  (links for journal article)
+           - %simple-link.class;   (the internal links)
+          in the following Parameter Entities:
+            - %emphasized-text;
+            - %inside-cell;
+            - %p-elements;
+            - %simple-phrase;
+        - Deleted -%inpara-address; from -%inside-para;
+          (which has been modified and renamed -%p-elements;) 
+          (No DTD change, -%address-link.class; covers it.)
+
+     d. INLINE PARAMETER MIXES
+         ### Customization Alert ###
+        Changed the inline-mix Parameter Entities to use the 
+        OR-bar-first mechanism. This requires changing not
+        only the Parameter Entity, but all content models that
+        use the entity.
+        - %emphasized-text; (used in most of the format
+            elements)
+
+     e. -ELEMENT AND -MODEL SUFFIXES
+         ### Customization Alert ###
+         RENAME ELEMENT MIXES NOT TO END IN "-elements", since that
+         suffix is reserved for mixes that are added to #PCDATA for
+         a particular element
+         - %doc-back-matter-elements; ==> -%doc-back-matter-mix;
+         - %sec-back-matter-elements; ==> -%sec-back-matter-mix;
+         - Deleted the element mix "inside-para" and replaced it
+           with "%p-elements;"
+                
+  1. Created this module as version "v2.0 20040830"                -->
+                          
+                        
+<!-- ============================================================= -->
+<!--                    ELEMENT MIXES FOR USE IN CONTENT MODELS    -->
+<!--                    (MIXES ARE COMPOSED USING CLASSES)         -->
+<!-- ============================================================= -->
+
+
+<!--                    SECTION-LEVEL ELEMENTS                     -->
+<!--                    Elements that may be used at the same
+                        structural level as a Section for example
+                        inside the Body <body>                     -->
+<!ENTITY % sec-level    "%sec.class;"                                >
+                         
+
+<!-- ============================================================= -->
+<!--                    BACK MATTER ELEMENT MIXES(%backmatter.ent;)-->
+<!-- ============================================================= -->
+
+
+<!--                    DOCUMENT BACK MATTER ELEMENTS              -->
+<!--                    Back Matter Elements used by a full document
+                        such as a journal article. This is an element
+                        grouping rather than a class. These 
+                        elements may also appear in the content models 
+                        of structural elements, such as back matter.
+                        (Note: Technically this should have used
+                        %sec.class;, but %sec-level; was used in an
+                        earlier release and backwards compatibility
+                        must be maintained.                        -->
+<!ENTITY % doc-back-matter-mix
+                        "%back.class; | %front-back.class; | 
+                         %sec-level;"                                >
+
+
+<!--                    SECTION BACK MATTER ELEMENTS               -->
+<!--                    Back matter elements used inside smaller
+                        structures, such as sections and sidebars  -->
+<!ENTITY % sec-back-matter-mix
+                        "%front-back.class; | %sec-back.class;"      >
+                         
+
+<!-- ============================================================= -->
+<!--                    PARAGRAPH-LEVEL ELEMENT MIXES              -->
+<!-- ============================================================= -->
+
+
+<!--                    PARAGRAPH-LEVEL ELEMENTS                   -->
+<!--                    Elements that may be used at the same
+                        structural level as a paragraph, for 
+                        example inside a Section
+                        Note: There a major overlap between this
+                        parameter entity and that for the elements
+                        that are at the same level as a paragraph.
+                        Inline elements appear only inside a 
+                        paragraph, but block elements such as quotes 
+                        and lists may appear either within a 
+                        paragraph or at the same level as a 
+                        paragraph. This serves a requirement in a 
+                        repository DTD, since some incoming material 
+                        will have restricted such elements to only 
+                        inside a paragraph,  some incoming material 
+                        will have restricted them to only outside a 
+                        paragraph and some may allow them in both
+                        places. Thus the DTD must allow for them to
+                        be in either or both.                      -->
+<!ENTITY % para-level   "%block-display.class; | %block-math.class; | 
+                         %just-para.class; | %list.class; | 
+                         %math.class; | %related-article.class; |
+                         %rest-of-para.class;"                       >
+
+                         
+<!-- ============================================================= -->
+<!--                    TABLE ELEMENT MIXES                        -->
+<!-- ============================================================= -->
+                          
+
+<!--                    INSIDE TABLE CELL ELEMENTS                 -->
+<!--                    Mixed with #PCDATA inside a table cell, such
+                        as a <td> or <th> element in the XHTML table
+                        model, the <entry> element in the OASIS CALS
+                        table model, etc.  This PE will be used as the
+                        value of %Flow.mix;, %paracon;, etc.
+                        ### Usage Alert ###
+                        Design Note: Inside cell is an exception, an
+                        inline mix that does not start with an OR
+                        bar. This is because is used within the
+                        PE -%Flow.mix;, which is an inline mix
+                        defined in the course of the XHTML Table DTD,
+                        a DTD not under control of this DTD Suite. -->
+<!ENTITY % inside-cell  "%address-link.class; |  %appearance.class; |
+                         %article-link.class; |
+                         %block-math.class; | %break.class; | 
+                         %citation.class; | %emphasis.class; | 
+                         %inline-display.class; |
+                         %inline-math.class; | 
+                         %list.class; | %math.class; | 
+                         %phrase.class; | %simple-display.class; | 
+                         %simple-link.class; | %subsup.class;"       >
+
+
+<!--                    INSIDE TABLE WRAPPER ELEMENTS              -->
+<!--                    Usually a Table Wrapper contains a table,
+                        properly tagged with rows and columns, but
+                        sometimes, a structure that is labeled as
+                        a "table" is actually a list, or two
+                        paragraphs.  This Parameter Entity names
+                        all the alternatives to table that may 
+                        occur inside a table wrapper.              -->
+<!ENTITY % inside-table-wrapper
+                        "%intable-para.class; | %list.class; | 
+                         %simple-intable-display.class;  |  
+                         %table.class;"                              >
+
+
+<!-- ============================================================= -->
+<!--                    INLINE ELEMENT MIXES                       -->
+<!-- ============================================================= -->
+
+
+<!--                    EMPHASIS MIX ELEMENTS                      -->
+<!--                    Elements that may be used inside most of the
+                        emphasis class elements                    
+                        DESIGN NOTE: Inline mixes begin with an
+                        OR bar                                     -->
+<!ENTITY % emphasized-text  
+                        "| %address-link.class; | 
+                         %article-link.class; |  %emphasis.class;  | 
+                         %inline-display.class; | 
+                         %inline-math.class; |  
+                         %math.class; |  %phrase.class; | 
+                         %simple-link.class; | %subsup.class;"       >
+                         
+
+<!--                    JUST RENDITION                             -->
+<!--                    Only the simplest of the typographic 
+                        emphasis elements, as well as subscript and
+                        superscript.  Usually used in a model that
+                        allows #PCDATA and this restricted mixture.
+                        This mix may be stripped down to only
+                        subscript and superscript by some, more
+                        restrictive DTDs.                         
+                        DTD Maintenance Note: This Parameter Entity
+                        and the related PE "rendition-plus" have
+                        been put in place to restrict the amount of
+                        variability that a person modifying the DTD
+                        through PE redefinition can achieve. Some
+                        elements have been set #PCDATA plus one PE
+                        and some have been set to #PCDATA plus the
+                        other in an effort to allow designers to
+                        modify entire groups of elements, but not
+                        to change similar models individually .   
+                        DESIGN NOTE: Inline mixes begin with an
+                        OR bar                                     -->
+<!ENTITY % just-rendition
+                        "| %emphasis.class;  | %subsup.class;"       >
+                         
+
+<!--                    RENDITION MARKUP PLUS                      -->
+<!--                    Only the simplest of the typographic 
+                        emphasis elements, as well as subscript and
+                        superscript.  Usually used in a model that
+                        allows #PCDATA and this restricted mixture.
+                        This mix may be enhanced slightly in some
+                        more permissive DTDs, and should always
+                        contain at least typographic emphasis, 
+                        subscript, and superscript.  
+                        DTD Maintenance Note: This Parameter Entity
+                        and the related PE "Just-rendition" have
+                        been put in place to restrict the amount of
+                        variability that a person modifying the DTD
+                        through PE redefinition can achieve. Some
+                        elements have been set #PCDATA plus one PE
+                        and some have been set to #PCDATA plus the
+                        other in an effort to allow designers to
+                        modify entire groups of elements, but not
+                        to individually change similar models. 
+                        modify entire groups of elements, but not
+                        to change similar models individually .     
+                        DESIGN NOTE: Inline mixes begin with an
+                        OR bar                                     -->
+<!ENTITY % rendition-plus                             
+                        "| %emphasis.class;  | %subsup.class;"       >
+
+
+<!--                    SIMPLE PHRASE-LEVEL TEXTUAL ELEMENTS       -->
+<!--                    Elements that may be used almost anywhere
+                        text is used, for example, inside a title.
+                        Simple text plus inline display and math 
+                        elements.                                  
+                        DESIGN NOTE: Inline mixes begin with an
+                        OR bar                                     -->
+<!ENTITY % simple-phrase
+                        "| %address-link.class; |
+                         %article-link.class; | 
+                         %emphasis.class; | %inline-display.class; | 
+                         %inline-math.class; | %math.class; | 
+                         %phrase.class; | 
+                         %simple-link.class; | %subsup.class;"       >
+
+
+<!--                    SIMPLE TEXTUAL CONTENT                     -->
+<!--                    Elements that may be used inside elements
+                        that are really expected to be #PCDATA and
+                        not to contain any of these things.
+                        Note that there is no math and no links.
+                        Simpler even than %simple-phrase;        
+                        DESIGN NOTE: Inline mixes begin with an
+                        OR bar                                     -->
+<!ENTITY % simple-text  "| %emphasis.class; | %inline-display.class; |
+                         %inline-math.class; | %phrase.class; | 
+                         %subsup.class; "                            >
+
+
+<!-- ================== End Archiving DTD Mixes Customization ==== -->
diff --git a/code/lib/Bio/Entrez/DTDs/display.ent b/code/lib/Bio/Entrez/DTDs/display.ent
new file mode 100644
index 0000000..ce81f7f
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/display.ent
@@ -0,0 +1,1468 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Display Class Elements                            -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Display Class Elements  v2.0 20040830//EN"
+     Delivered as file "display.ent"                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Describes display-related elements such as        -->
+<!--             Figures, Graphics, Math, Chemical Structures,     -->
+<!--             Graphics, etc.                                    -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Parameter Entities for attribute lists         -->
+<!--             2) Parameter Entities for content models          -->
+<!--             3) The display class elements (alpha order)       -->
+<!--                a) Array                                       -->
+<!--                b) Boxed text                                  -->
+<!--                c) Chemical Structure                          -->
+<!--                d) Figure                                      -->
+<!--                e) Graphic                                     -->
+<!--                f) Inline Graphic                              -->
+<!--                g) Pre-formatted text                          -->
+<!--                e) Supplementary Information                   -->
+<!--                h) Table Group                                 -->
+<!--                i) Table Wrapper                               -->
+<!--                j) Table (invoked module)                      -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+ 19. OTHER-TABLE-WRAP-ATTS - Moved this Parameter Entity from
+     the Archiving (Green) customization module to this module,
+     so that all variant DTDs could use it. Possible for both 
+     <table-wrapper> and <table-group>.
+
+ 18. COMPLETE MODELS WHEN OVER-RIDING A MODEL 
+     (for all Parameter Entities suffixed "-model")
+     ### Customization Alert ###
+     Made all the model over-rides consistent. Some included
+     the outer parentheses, some did not. They all do now,
+     as this is the most flexible system, allowing for
+     #PCDATA, mixed, or element content. (This is in direct
+     contrast to the "-element" suffixed models, which are
+     designed to prohibit element content and permit only
+     #PCDATA or mixed content.) Added parentheses to Parameter
+     Entity and removed them from the Element Declaration.
+     -  %array-model; 
+     -  %boxed-text-model; 
+     -  %chem-struct-wrapper-model; 
+     -  %fig-model; 
+     -  %fig-group-model; 
+     -  %preformat-model; 
+     -  %table-wrap-model; 
+     -  %table-wrap-group-model; 
+     -  %table-wrap-foot-model; 
+
+ 17. LOOSENING FIGURE - %label.class; and %caption.class; both 
+     allowed to repeat in <fig> model, to allow for alternate
+     placements.
+     
+ 16. OBJECT ID/DOI - added to elements by request of AIT WG. 
+     Elements with DOIs are those a publisher could expect to sell 
+     or handle separately. DOI is placed in an <object-id> element, 
+     part of the %id.class; and added to:
+      - <boxed-text>             (through %boxed-text-model;)
+      - <chem-struct-wrapper>    (through %chem-struct-wrapper-model;)
+      - <fig>                    (through %fig-model;)
+      - <graphic>                (through %graphic-model;)
+      - <media>                  (through %graphic-model;)
+      - <pre-format>             (through %preformat-elements;)
+      - <supplementary-material> (through %fig-model;)
+      - <table-wrap>             (through %table-wrap-model;) 
+
+     A "pub-id-type" attribute identifies the type of ID, DOI,
+     archive-issued ID, etc.
+
+      
+ 15. PARAMETER ENTITY CLEANUP AND REGULARIZATION
+
+     a. RENAME EXISTING CLASSES
+        ### Customization Alert ###
+        Some classes did not have the ".class" suffix. Changed the 
+        names to add the class suffix:
+        - %display-back-matter.class; used in 
+             -%array-model;      -%boxed-text-model;
+             -%fig-model;        -%chem-struct-wrapper-model;
+             -%graphic-model;    -%preformat-model;  
+             -%table-wrap-model; -%table-wrap-foot-model; 
+        - %block-math.class; (used in -%fig-model;)
+
+     b. INLINE MIXES USE OR BARs
+        ### Customization Alert ###
+        Changed the following PEs to start with an OR bar:
+        - %preformat-elements;
+
+     c. NEW CLASSES - To correct potential classing problems, new 
+        Parameter Entities were created:
+              New Content Models PEs
+        - %preformat-model;
+              New Classes PEs
+        - %caption.class; used in:
+            - %fig-model;        - %fig-group-model;
+            - %table-wrap-model; - %chem-struct-wrapper-model;
+            - %graphic-model;
+        - %fig-display.class; used in:
+            -%fig-group-model;
+        - %just-para.class; used in:
+            - <caption>
+            - %table-wrap-foot-model;
+        - %just-table.class; used in: 
+            - %table-wrap-group-model;
+            - %fig-model;
+        - %just-base-display.class; used in:
+            - %array-model;      - %fig-group-model;
+            - %chem-struct-wrapper-model;(where using it added
+              the <preformat> element to <chem-struct-wrapper>
+        - %tbody.class; used in
+            - %array-model;
+        - %chem-struct-elements; used in:
+            - %chem-struct-model;
+
+     d. ACCESS CLASS
+        ### Customization Alert ###
+        - Took <ext-link> out of -%access.class;. It did not belong.
+            It belongs in -%address-link.class;
+        - Added -%address-link.class; to anywhere regular
+            %access.class; was used.
+
+ 14. DEFAULT CLASSES - Were moved from this module to 
+     %default-classes.ent;
+
+ 13. Updated public identifier to "v2.0 20040830"
+
+     =============================================================
+     Version 1.1                       (TRG/BTU) v1.1 (2003-11-01)
+
+ 12. Added "mimetype" and "mime-subtype" attributes to all graphic 
+     and multi-media elements, including: 
+       - <graphic>
+       - <inline-graphic>
+       - <media>
+       - <supplementary-material>
+
+ 11. Created parameter entity %media-atts; to hold attribute list
+     for <media> (and replace %graphic-atts; as attribute list
+     holder)
+     Rationale: Needed to split attribute list for <media> from 
+     that for <graphic>, in order to have different values for 
+     new attribute "mimetype".
+
+ 10. Added attribute "alternate-form-of" to the following elements:
+       - <array>
+       - <chem-struct> (by modifying parameter entity 
+         %chem-structs-atts;)
+       - <graphic> (by modifying parameter entity %graphic-atts;)
+       - <media> (by modifying parameter entity %graphic-atts;)
+       - <supplementary-material> (by modifying parameter entity 
+             %supplementary-material-atts;)
+       - <inline-graphic> (by modifying parameter entity 
+             %inline-graphic-atts;)
+       - <preformat>
+     Rationale: Where multiple formats of an item (e.g., graphic 
+     file, media object, chemical structure) are available, this 
+     attribute indicates that a format is a secondary one and 
+     provides a link to the primary format, so that only one 
+     format of an item is displayed.
+
+  9. Modified parameter entity %array-model; so that the elements
+     <graphic> and <media> could be used multiple times.
+     Rationale: A series of <graphic> (or <media>) elements 
+     may occur in the same content, especially if they are
+     variants of one graphic (or media object) may be linked, 
+     with only one variation being displayed.
+
+  8. Added attribute "id" to the following: 
+       - <inline-graphic> (via parameter entity %inline-graphic-atts;) 
+       - <chem-struct> (via parameter entity %chem-struct-atts;)      
+     Rationale: Provide unique identifier so these elements can be 
+     linked to. Some will have alternate versions which will point
+     to them, others may be the target internal cross-references. 
+
+  7. Created parameter entity %inline-graphic-atts; to hold 
+     various attributes associated with <inline-graphic>
+     Rationale: In order to distinguish between attribute lists
+     used by the Archiving DTD and the Publishing DTD, it was 
+     necessary to create a parameter entity that could be overridden.
+
+  6. Added attribute "content-type" to the following:
+       - <array>  
+       - <boxed-text> by modifying parameter entity %boxed-text-atts;
+       - <fig-group> by modifying parameter entity %fig-group-atts;      
+       - <table-wrap> by modifying parameter entity %table-wrap-atts;
+       - <supplementary-material> by modifying parameter entity 
+             %supplementary-material-atts;
+       - <chem-struct> by modifying parameter entity %chem-struct-atts;
+       - <chem-struct-wrapper> by modifying parameter entity 
+             %chem-struct-wrapper-atts;
+     Rationale: To identify and preserve the semantic intent of 
+     semantically rich source documents.
+
+  5. Added ID attribute to element <array>.
+     Rationale: Provide unique identifier so <array> can be linked to. 
+
+  4. Added parameter entity %label.class; to parameter entity 
+     %chem-struct-model; 
+     Rationale: To allow label as a format override on <chem-struct>. 
+
+  3. Added element <label> to the parameter entity %array-model;
+     Rationale: To allow label as a format override
+     
+  2. Added element <media> to the following parameter entities:
+       - %block-display.class;
+       - %simple-display.class;
+       - %array-model; 
+       - %chem-struct-wrapper-model;
+       - %fig-group-model;                     
+     Rationale: Media object <media> to occur everywhere element 
+     <graphic> is allowed to occur.
+
+  1. Added element <media> for media objects; <media> will be used 
+     in the same way <graphic> is and will have the same content, 
+     but be used for animation or movies.
+     Rationale: To distinguish alternate media (such as movies or 
+     animation) from traditional print graphic forms.
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module, 
+                        usually accomplished in the Customization
+                        Module for the specific DTD:
+                            Element Classes and Groups
+                        %access.class;
+                        %block-math.class; 
+                        %break.class;
+                        %emphasis.class;
+                        %inside-table-wrapper;
+                        %intable-para.class;
+                        %label.class;
+                        %list.class;
+                        %math.class;
+                        %simple-display.class;
+                        %simple-link.class;
+                        %subsup.class;
+                            Content Models
+                        %sec-opt-title-model;
+                            Attribute Lists
+                        %link-atts;
+                        %might-link-atts;
+                        %other-table-wrap-atts;
+                                                                   -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DEFAULT PES FOR ATTRIBUTE LISTS            -->
+<!-- ============================================================= -->
+
+                                                                
+<!--                    DISPLAY ATTRIBUTES OPTIONAL ID             -->
+<!--         position   Must this display object (figure, boxed text,
+                        etc.) be anchored in its exact location
+                        within the text or may it float, for example
+                        to the top of the next page, next column, or
+                        within a separate window?  Values are:
+                          anchor      Object must remain in place
+                          float       Object is not anchored and
+                                      may be moved to a new column,
+                                      a new window, etc.
+                          margin      In print, item should be placed
+                                      in the margin or gutter. 
+                                      Online the item should 
+                                      remain closely associated 
+                                      with the text.               -->
+<!ENTITY % display-atts
+             "position  (anchor | float | margin)        'float'"    >
+
+                                                                
+<!--                    BOXED TEXT ATTRIBUTES                      -->
+<!--                    Attributes for the boxed text <boxed-text>
+                        element                                    -->
+<!ENTITY % boxed-text-atts
+             "id        ID                                #IMPLIED
+              %display-atts;
+              xml:lang  NMTOKEN                           #IMPLIED
+              content-type
+                        CDATA                             #IMPLIED"  >
+ 
+                                                                
+<!--                    CHEMICAL STRUCTURE WRAPPER ATTRIBUTES      -->
+<!--                    Attributes for the <chem-struct-wrapper> 
+                        element, the outer wrapper around one or more
+                        chemical structures                        -->
+<!ENTITY % chem-struct-wrapper-atts
+             "id        ID                                #IMPLIED
+              %display-atts;
+              content-type
+                        CDATA                             #IMPLIED"  >
+                                                                
+
+<!--                    CHEMICAL STRUCTURE ATTRIBUTES              -->
+<!--                    Attributes for <chem-struct>               -->
+<!ENTITY % chem-struct-atts
+             "%might-link-atts;
+              alternate-form-of
+                         IDREF                            #IMPLIED
+              content-type 
+                        CDATA                             #IMPLIED
+              id        ID                                #IMPLIED"  >
+
+                                                                
+<!--                    FIGURE GROUP ATTRIBUTES                    -->
+<!--                    Attributes for Figure Groups <fig-group>   -->
+<!ENTITY % fig-group-atts
+             "id        ID                                #IMPLIED
+              %display-atts;
+              content-type
+                        CDATA                             #IMPLIED"  >
+
+                                                                
+<!--                    FIGURE ATTRIBUTES                          -->
+<!--                    Attributes for Figures <fig>               -->
+<!ENTITY % fig-atts
+             "id        ID                               #IMPLIED
+              %display-atts;                                       
+              xml:lang  NMTOKEN                          #IMPLIED  
+              fig-type  CDATA                            #IMPLIED"   > 
+
+                                                                
+<!--                    GRAPHIC ATTRIBUTES                         -->
+<!--                    Attributes for Graphic <graphic>           -->
+<!ENTITY % graphic-atts
+             "alternate-form-of
+                        IDREF                            #IMPLIED
+              alt-version
+                        (yes|no)                             'no'
+              id        ID                               #IMPLIED
+              mime-subtype  
+                        (cgm | g3fax | gif | ief |  jpeg | naplps | 
+                         png | prs.btif | prs.pti | t38 | tiff |  
+                         tiff-fx)                         #IMPLIED 
+              mimetype  CDATA                       #FIXED 'image'
+              %display-atts;                                       
+              %link-atts;"                                           >
+
+
+<!--                    MEDIA ATTRIBUTES                           -->
+<!--                    Attributes for Media <media>               -->
+<!ENTITY % media-atts
+             "alternate-form-of
+                        IDREF                            #IMPLIED
+              id        ID                               #IMPLIED
+              mimetype  CDATA                            #IMPLIED
+              mime-subtype
+                        CDATA                            #IMPLIED 
+              %display-atts;                                       
+              %link-atts;"                                           >
+
+                                                                
+<!--                    INLINE GRAPHIC ATTRIBUTES                  -->
+<!--                    Attributes for Inline Graphic 
+                        <inline-graphic>                           -->
+<!ENTITY % inline-graphic-atts
+             "alternate-form-of
+                        IDREF                              #IMPLIED
+              id        ID                                 #IMPLIED
+              mimetype  CDATA                        #FIXED 'image'
+              mime-subtype
+                        CDATA                              #IMPLIED
+              %link-atts;"                                           >
+
+
+<!--                   PREFORMATTED TEXT ATTRIBUTES                -->
+<!--                   Attributes for Preformatted Text <preformat>-->
+<!ENTITY % preformat-atts
+             "alternate-form-of
+                       IDREF                             #IMPLIED
+              id        ID                               #IMPLIED
+              %display-atts;                                     
+              preformat-type
+                        CDATA                            #IMPLIED
+              xml:space (default | preserve)     #FIXED 'preserve'  
+              xml:lang  NMTOKEN                          #IMPLIED"   > 
+
+                                                                
+<!--                    SUPPLEMENTARY INFORMATION ATTRIBUTES       -->
+<!--                    Attributes for Supplementary Information
+                        <supplementary-material>                   -->
+<!ENTITY % supplementary-material-atts
+             "alternate-form-of
+                        IDREF                             #IMPLIED
+              content-type
+                        CDATA                             #IMPLIED
+              id        ID                                #IMPLIED
+              mimetype  CDATA                             #IMPLIED
+              mime-subtype
+                        CDATA                             #IMPLIED 
+              %display-atts;                                       
+              %might-link-atts;                                              
+              xml:lang  NMTOKEN                           #IMPLIED"  > 
+
+
+<!-- ============================================================= -->
+<!--                    TABLE ATTRIBUTE PARAMETER ENTITIES         -->
+<!-- ============================================================= -->
+                          
+
+<!--                    TABLE WRAPPER ATTRIBUTES                   -->
+<!--                    Attributes to be added to the regular NLM
+                        table attributes, for example, when the
+                        Elsevier or OASIS Exchange table models are
+                        used.                                      -->
+<!ENTITY % other-table-wrap-atts
+             ""                                                      > 
+
+                                                                
+<!--                    TABLE GROUP ATTRIBUTES                     -->
+<!--                    Attributes for Table Groups 
+                        <table-wrap-group>                         -->
+<!ENTITY % table-wrap-group-atts
+             "id        ID                                #IMPLIED
+              %other-table-wrap-atts;
+              %display-atts;"                                        >
+
+                                                                
+<!--                    TABLE WRAPPER ATTRIBUTES                   -->
+<!--                    Attributes for Table Wrapper <table-wrap>  -->
+<!ENTITY % table-wrap-atts
+             "id        ID                                #IMPLIED
+              %display-atts;                                       
+              content-type
+                        CDATA                             #IMPLIED 
+              %other-table-wrap-atts;
+              xml:lang  NMTOKEN                           #IMPLIED"  >
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR CONTENT MODELS      -->
+<!-- ============================================================= -->
+
+
+<!--                    FIGURE-LIKE CONTENT MODEL                  -->
+<!--                    Content model for the Figure element and any
+                        similarly structured elements              -->
+<!ENTITY % fig-model    "((%id.class;)*, (%label.class;)*, 
+                          (%caption.class;)*, 
+                          (%access.class; | %address-link.class;)*,  
+                          (%block-math.class; | %intable-para.class; |
+                           %just-table.class; | %just-para.class; |
+                           %list.class; | %simple-display.class;)*,
+                          (%display-back-matter.class;)* )"          >
+
+
+<!-- ============================================================= -->
+<!--                    ARRAY ELEMENTS                             -->
+<!-- ============================================================= -->
+
+
+<!--                    ARRAY CONTENT MODEL                        -->
+<!--                    The element used to contain material in
+                        rows and columns that is just a block insert
+                        into the text flow, not numbered or called a
+                        table, and not titled or captioned         -->
+<!ENTITY % array-model  "(label?, 
+                          (%access.class; | %address-link.class;)*,  
+                          ( (%just-base-display.class;)* | 
+                            %tbody.class; ), 
+                          (%display-back-matter.class;)* )"          >
+
+
+<!--                    ARRAY (SIMPLE TABULAR ARRAY)               -->
+<!--                    Used to define in-text table-like (columnar) 
+                        material.  Uses the XHTML table body element
+                        or a graphic to express the rows and columns.
+                        These have neither labels nor captions,
+                        arrays with labels and captions are table
+                        wrappers.
+                        Remarks: Arrays are not allowed to float,
+                        they are tied to their position in the
+                        text.
+                        Authoring Note:  While this element 
+                        contains an optional Label element, the 
+                        Label element should be included only in 
+                        those circumstances where a formatting 
+                        override is needed; Label should NOT 
+                        be used in the ordinary course of 
+                        tagging.                                   -->
+<!ELEMENT  array        %array-model;                                >
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an 
+                        item is displayed.
+             id         Unique identifier so the element may be
+                        referenced                                 
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.       
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   
+                        Display Note: An Array element is assumed
+                        to be anchored at its position within the
+                        text.                                      -->                                 
+<!ATTLIST  array
+             alternate-form-of
+                        IDREF                              #IMPLIED
+             id         ID                                 #IMPLIED
+             content-type
+                        CDATA                              #IMPLIED 
+             xml:lang   NMTOKEN                            #IMPLIED  >
+             
+
+<!-- ============================================================= -->
+<!--                    BOXED TEXT ELEMENTS                        -->
+<!-- ============================================================= -->
+
+
+<!--                    BOXED TEXT MODEL                           -->
+<!--                    Complete content model for the boxed text
+                        element, made into a PE so that it could be
+                        redefined, for example, by an authoring DTD.
+                                                                   -->
+<!ENTITY % boxed-text-model
+                        "((%id.class;)*, %sec-opt-title-model;, 
+                          (%display-back-matter.class;)* )"          >
+                                                 
+
+<!--                    BOXED TEXT                                 -->
+<!--                    Textual material that is outside the flow
+                        of the narrative text, for example, a 
+                        sidebar, marginalia, text insert, caution or
+                        note box, etc.                             -->
+<!ELEMENT  boxed-text   %boxed-text-model;                           >
+<!--         id         Unique identifier so the element may be
+                        referenced                                
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.                      
+             position   Must this display object (figure, boxed text,
+                        etc.) be anchored in its exact location
+                        within the text or may it float, for example
+                        to the top of the next page, next column, or
+                        within a separate window?  Values are:
+                          anchor      Object must remain in place
+                          float       Object is not anchored and
+                                      may be moved to a new column,
+                                      a new window, etc.
+                          margin      In print, item should be placed
+                                      in the margin or gutter. 
+                                      Online the item should 
+                                      remain closely associated 
+                                      with the text.
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   --> 
+<!ATTLIST  boxed-text
+             %boxed-text-atts;                                       > 
+
+
+<!-- ============================================================= -->
+<!--                    CHEMICAL STRUCTURE ELEMENTS                -->
+<!-- ============================================================= -->
+
+
+<!--                    CHEMICAL STRUCTURE WRAPPER MODEL           -->
+<!--                    Content model for the Chemical Structure
+                        Wrapper <chem-struct-wrapper> element      -->
+<!ENTITY % chem-struct-wrapper-model
+                        "((%id.class;)*, (%label.class;)?, 
+                          (%caption.class;)?, 
+                          (%access.class; | %address-link.class;)*,  
+                          (%simple-intable-display.class;)+,
+                          (%display-back-matter.class;)* )"          >
+
+
+<!--                    CHEMICAL STRUCTURE WRAPPER                 -->
+<!--                    A chemical expression, reaction, equation,
+                        etc. that is set apart within the text.
+                        These may be numbered.  They may be modeled
+                        as ASCII characters or as one or more 
+                        graphics.
+                        Implementer's Note: CML will go here, if
+                        added.                                     
+                        Display Note: A chemical structure is assumed
+                        to be anchored at its position within the
+                        text.                                      -->
+<!ELEMENT  chem-struct-wrapper         
+                        %chem-struct-wrapper-model;                  >
+<!--         id         A unique identifier for the chemical structure
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.                      
+             position   Must this display object (figure, boxed text,
+                        etc.) be anchored in its exact location
+                        within the text or may it float, for example
+                        to the top of the next page, next column, or
+                        within a separate window?  Values are:
+                          anchor      Object must remain in place
+                          float       Object is not anchored and
+                                      may be moved to a new column,
+                                      a new window, etc.
+                          margin      In print, item should be placed
+                                      in the margin or gutter. 
+                                      Online the item should 
+                                      remain closely associated 
+                                      with the text.               -->
+<!ATTLIST  chem-struct-wrapper
+             %chem-struct-wrapper-atts;                              >
+
+
+<!--                    CHEMICAL STRUCTURE ELEMENTS                -->
+<!--                    Those elements that may mix with the data
+                        characters inside a Chemical Structure
+                        <chem-struct>                              -->
+<!ENTITY % chem-struct-elements
+                        "| %access.class; | %address-link.class; | 
+                         %break.class; | 
+                         %emphasis.class; | %label.class; |
+                         %list.class; | %math.class; | 
+                         %simple-display.class; | 
+                         %simple-link.class; | %subsup.class; "      >
+
+
+<!--                    CHEMICAL STRUCTURE MODEL                   -->
+<!--                    A chemical expression, reaction, equation,
+                        etc. that is set apart within the text     
+                        DESIGN NOTE: The list class was added to 
+                        cover the Elsevier DTD <compound-info> 
+                        element.
+                        Authoring Note: This model allows for a
+                        <graphic> element inside of the <chem-struct>
+                        that holds the structure or for using the
+                        linking attributes to point directly to the
+                        URI of such as graphic.
+                        Authoring Note: Accessibility elements should
+                        not be used with this element if they can be
+                        used as part of the <chem-struct-wrapper> 
+                        wrapper element. They are allowed as part of
+                        this element only for those 
+                        <chem-struct-wrapper> elements that 
+                        contain more than one <chem-struct>.       -->
+<!ENTITY % chem-struct-model
+                        "(#PCDATA %chem-struct-elements;)* "         >
+
+
+<!--                    CHEMICAL STRUCTURE (DISPLAY)               -->
+<!--                    A chemical expression, reaction, equation,
+                        etc. that is set apart within the text.
+                        Authoring Note:  While this element 
+                        contains an optional Label element, the 
+                        Label element should be included only in 
+                        those circumstances where a formatting 
+                        override is needed; Label should NOT 
+                        be used in the ordinary course of 
+                        tagging.                                   -->
+<!ELEMENT  chem-struct  %chem-struct-model;                          >
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.                      
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                -->             
+<!ATTLIST  chem-struct 
+             %chem-struct-atts;                                      >
+
+
+<!-- ============================================================= -->
+<!--                    FIGURE ELEMENTS                            -->
+<!-- ============================================================= -->
+
+
+<!--                    FIGURE GROUP MODEL                         -->
+<!ENTITY % fig-group-model 
+                        "((%label.class;)?, (%caption.class;)?, 
+                          (%access.class; | %address-link.class;)*,  
+                          (%fig-display.class; | 
+                           %just-base-display.class;)* )"            >
+
+
+<!--                    FIGURE GROUP                               -->
+<!--                    Used for a group of figures that must be 
+                        displayed together                         -->
+<!ELEMENT  fig-group    %fig-group-model;                            >
+<!--         id         Unique identifier so the element may be
+                        referenced 
+                        Authoring Note: The value of the "id"
+                        attribute for a Figure Group should begin 
+                        with "FG" (FG1, FG2, FGIII). 
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.                      
+             position   Must this display object (figure, boxed text,
+                        etc.) be anchored in its exact location
+                        within the text or may it float, for example
+                        to the top of the next page, next column, or
+                        within a separate window?  Values are:
+                          anchor      Object must remain in place
+                          float       Object is not anchored and
+                                      may be moved to a new column,
+                                      a new window, etc.
+                          margin      In print, item should be placed
+                                      in the margin or gutter. 
+                                      Online the item should 
+                                      remain closely associated 
+                                      with the text.               -->
+<!ATTLIST  fig-group
+             %fig-group-atts;                                        >
+
+
+<!--                    FIGURE                                     -->
+<!--                    A block of graphic or textual material that
+                        is identified as a "Figure", usually with
+                        a caption and a label such as "Figure" or
+                        "Figure 3.". 
+                          The content of a Figure need not be 
+                        graphical in nature, for example a list or a
+                        table may be the content of a Figure.
+                        Authoring and Conversion Note: Unlabeled 
+                        graphical objects found in the content 
+                        should be tagged as <graphic>s. A common 
+                        test for Figure versus Graphic is to ask
+                        yourself whether, if you were to create a
+                        "List of Figures" for this article, the
+                        object would or would not appear in such a 
+                        list.                                       -->
+<!ELEMENT  fig          %fig-model;                                   >
+<!--         id         Unique identifier so the element may be
+                        referenced 
+                        Authoring Note: The value of the "id"
+                        attribute for a Figure should begin with "F" 
+                        (F1, F2, FIII). 
+                        Authoring Note: The value of the "id"
+                        attribute for a scheme should begin with "S"
+                        (S1, S2). The value of the "id" attribute 
+                        for a plate should begin with "P" (P1, P2).                      
+             position   Must this display object (figure, boxed text,
+                        etc.) be anchored in its exact location
+                        within the text or may it float, for example
+                        to the top of the next page, next column, or
+                        within a separate window?  Values are:
+                          anchor      Object must remain in place
+                          float       Object is not anchored and
+                                      may be moved to a new column,
+                                      a new window, etc.
+                          margin      In print, item should be placed
+                                      in the margin or gutter. 
+                                      Online the item should 
+                                      remain closely associated 
+                                      with the text.        
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   
+             fig-type   Some DTDs make a distinction between 
+                        different types of graphical objects, some
+                        with attributes, some with elements.  This
+                        attribute is for use in preserving that
+                        intellectual content.  For example, this
+                        attribute could record the fact that the
+                        contents of this figure are: a cartoon,
+                        chart, diagram, drawing, exhibit, 
+                        illustration, map, plate, scheme, workflow, 
+                        etc.                                       -->
+<!ATTLIST  fig
+             %fig-atts;                                              >
+
+
+<!--                    CAPTION BODY PARTS                         -->
+<!--                    Elements that may be included in the body of
+                        the <caption> element                      -->
+<!ENTITY % caption-body-parts  
+                        "(%just-para.class;)*"                       > 
+
+
+<!--                    CAPTION OF A FIGURE, TABLE, ETC.           -->
+<!--                    Wrapper element for the textual description
+                        associated with a figure, table, etc. In
+                        some source document captions, the first 
+                        sentence is set off from the rest as a title.
+                        In some source document, figures may posses
+                        both a title and a caption, in which case 
+                        the title will be moved inside the caption
+                        during conversion.                         -->
+<!ELEMENT  caption      (title?, %caption-body-parts;)               >
+<!--         xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  caption
+             xml:lang   NMTOKEN                            #IMPLIED  > 
+
+                        
+<!--ELEM  title        Defined in %common.ent;                     -->
+<!--ELEM  p            Defined in %common.ent;                     -->
+
+
+<!-- ============================================================= -->
+<!--                    THE GRAPHIC AND MEDIA OBJECT ELEMENTS      -->
+<!-- ============================================================= -->
+
+
+<!--                    GRAPHIC MODEL                              -->
+<!--                    Content model for the <graphic> element    -->
+<!ENTITY % graphic-model   
+                        "(%access.class; | %address-link.class; | 
+                          %id.class; | %label.class; | 
+                          %caption.class; | 
+                          %display-back-matter.class;)* "            > 
+
+
+<!--                    GRAPHIC                                    -->
+<!--                    An external file that holds a picture,
+                        illustration, etc., usually as some form of
+                        binary object. The "content" of the <graphic>
+                        element is not the object, but merely
+                        information about the object. 
+                        Authoring and Conversion Note: Internal
+                        elements such as <caption> should always be
+                        used at the highest possible level, in
+                        other words, if a Graphic is inside a
+                        Figure, the <caption>, <long-desc>, etc.
+                        should be part of the Figure, not part of
+                        the Graphic. Elements such as <caption>
+                        are used when a standalone graphic requires 
+                        them or when multiple graphics within a 
+                        container element such as a Figure need 
+                        them individually. Similarly, the "position"
+                        attribute should not be used on a <graphic>
+                        that is inside a larger display, container
+                        such as a Paragraph.                       -->
+<!ELEMENT  graphic      %graphic-model;                              >
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             alt-version
+                        Whether an alternative version of a graphic
+                        has also been provided, for example, a 
+                        thumbnail or a different resolution
+             mimetype   The mimetype of the object. For graphics
+                        the mimetype will always be "image".
+             mime-subtype
+                        The mime-subtype of the object. Taken from
+                        IANA (Internet Assigned Numbers Authority)
+                        allowable values of mime-subtype of mimetype
+                        image (all the non-vendor specific values) 
+                        are: 
+                           cgm
+                           g3fax
+                           gif
+                           ief
+                           jpeg
+                           naplps
+                           png
+                           prs.btif
+                           prs.pti
+                           t38
+                           tiff
+                           tiff-fx                      
+                        
+             id         Unique identifier so the element may be
+                        referenced. The "id" attribute such as 
+                        should be used at the highest level 
+                        possible, therefore a graphic only needs an
+                        "id" when it is standalone. 
+             position   Must this display object (figure, boxed text,
+                        etc.) be anchored in its exact location
+                        within the text or may it float, for example
+                        to the top of the next page, next column, or
+                        within a separate window?  Values are:
+                          anchor      Object must remain in place
+                          float       Object is not anchored and
+                                      may be moved to a new column,
+                                      a new window, etc.
+                          margin      In print, item should be placed
+                                      in the margin or gutter. 
+                                      Online the item should 
+                                      remain closely associated 
+                                      with the text.   
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename            
+                        NOTE: A graphic will always have an 
+                        "xlink:href" attribute that points to the
+                        URI where the graphic may be found.
+                        Authoring and Conversion Note: Best practice
+                        for graphic file names is to limit the names
+                        and path names to the characters: letters,
+                        numbers, underscore, hyphen, and period. All
+                        such names are assumed to be case sensitive.
+                        DOS-style file extensions may be used.     -->
+<!ATTLIST  graphic
+             %graphic-atts;                                          >
+
+
+<!--                    MEDIA OBJECT                               -->
+<!--                    An external file that holds a media object,
+                        such as an animation or a movie. The 
+                        "content" of the <media> element is not the 
+                        object, but merely information about the 
+                        object. 
+                        Authoring and Conversion Note: Internal
+                        elements such as <caption> should always be
+                        used at the highest possible level, in
+                        other words, if a media object is inside a
+                        Figure, the <caption>, <long-desc>, etc.
+                        should be part of the Figure, not part of
+                        the media object. Elements such as <caption>
+                        are used when a standalone media object 
+                        requires them or when multiple media objects 
+                        within a container element such as a Figure 
+                        need them individually. Similarly, the 
+                        "position" attribute should not be used on 
+                        a <media> that is inside a larger display
+                        container such as a Paragraph.             -->
+<!ELEMENT  media        %graphic-model;                              >
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             mimetype   The mimetype of the object.
+             mime-subtype
+                        The mime-subtype of the object
+             id         Unique identifier so the element may be
+                        referenced. The "id" attribute such as 
+                        should be used at the highest level 
+                        possible, therefore a graphic only needs an
+                        "id" when it is standalone. 
+             position   Must this display object (figure, boxed text,
+                        etc.) be anchored in its exact location
+                        within the text or may it float, for example
+                        to the top of the next page, next column, or
+                        within a separate window?  Values are:
+                          anchor      Object must remain in place
+                          float       Object is not anchored and
+                                      may be moved to a new column,
+                                      a new window, etc.
+                          margin      In print, item should be placed
+                                      in the margin or gutter. 
+                                      Online the item should 
+                                      remain closely associated 
+                                      with the text.   
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename            
+                        NOTE: A graphic will always have an 
+                        "xlink:href" attribute that points to the
+                        URI where the graphic may be found.
+                        Authoring and Conversion Note: Best practice
+                        for graphic file names is to limit the names
+                        and path names to the characters: letters,
+                        numbers, underscore, hyphen, and period. All
+                        such names are assumed to be case sensitive.
+                        DOS-style file extensions may be used.     -->
+<!ATTLIST  media
+             %media-atts;                                            >
+
+
+<!-- ============================================================= -->
+<!--                    INLINE GRAPHIC                             -->
+<!-- ============================================================= -->
+
+
+<!--                    INLINE GRAPHIC                             -->
+<!--                    A small graphic such as an icon or a small
+                        picture symbol that is displayed or set
+                        in the same line as the text.
+                        Authoring and Conversion Note: Although the
+                        DTD cannot enforce it, this element should
+                        NOT be used for custom-built or private
+                        characters, such as those in the Unicode
+                        private use areas. For such characters,
+                        use the <private-char> element, see
+                        %chars.ent;                                -->
+<!ELEMENT  inline-graphic
+                        (alt-text?)                                  >
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             id         Unique identifier so an inline-formula
+                        can be referenced                          
+             mimetype   fixed at "image"
+             mime-subtype
+                        The mime-subtype of the graphic
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename            
+                        NOTE: An inline graphic will always have an 
+                        "xlink:href" attribute that points to the
+                        URI where the graphic may be found.        -->
+<!ATTLIST  inline-graphic
+             %inline-graphic-atts;                                   >
+
+
+<!-- ============================================================= -->
+<!--                    PRESERVE THE WHITESPACE TEXT               -->
+<!-- ============================================================= -->
+
+
+<!--                    PREFORMATTED TEXT ELEMENTS                 -->
+<!--                    Elements that may be used, along with data
+                        characters, inside the content model for the
+                        <preformat> element, in which white space,
+                        such as tabs, line feeds, and spaces will
+                        be preserved                               -->
+<!ENTITY % preformat-elements
+                        "| %access.class; | %address-link.class; |
+                         %emphasis.class; |
+                         %display-back-matter.class; | %id.class; |
+                         %subsup.class;"                             >
+
+
+<!--                    PREFORMAT MODEL                            -->
+<!--                    Content model for the <preformat> element  -->
+<!ENTITY % preformat-model   
+                        "(#PCDATA %preformat-elements;)*"            > 
+
+
+<!--                    PREFORMATTED TEXT                          -->
+<!--                    Used for preformatted text such as 
+                        computer code in which whitespace, such as
+                        tabs, line feeds, and spaces, should be
+                        preserved. Typically displayed or printed
+                        in a monofont to preserve character 
+                        alignment.
+                        Related Elements: Poetry may be tagged with
+                        <preformat> if spacing is critical, but 
+                        usually poetry should be tagged with the
+                        <verse-group> element, which may not preserve
+                        the exact indentation but is likely to be
+                        displayed in a proportional font.          -->
+<!ELEMENT  preformat    %preformat-model;                            >
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             id         Unique identifier so the element may be
+                        referenced. 
+             position   Must this display object (figure, boxed text,
+                        etc.) be anchored in its exact location
+                        within the text or may it float, for example
+                        to the top of the next page, next column, or
+                        within a separate window?  Values are:
+                          anchor      Object must remain in place
+                          float       Object is not anchored and
+                                      may be moved to a new column,
+                                      a new window, etc.
+                          margin      In print, item should be placed
+                                      in the margin or gutter. 
+                                      Online the item should 
+                                      remain closely associated 
+                                      with the text.
+             preformat-type
+                        What kind of content, for example:
+                          code   Computer code
+                          poetry Poems
+                          ascii  ASCII art
+                        This attribute is added to preserve the
+                        intellectual content of DTDs that provide
+                        this information.  Many documents will not
+                        be able to provide this data.
+             xml:space  Request that, when printing or displaying
+                        this element, all white space such as tabs
+                        and spaces and line breaks be preserved.  
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  preformat
+             %preformat-atts;                                        >
+
+
+<!-- ============================================================= -->
+<!--                    SUPPLEMENTARY MATERIAL                     -->
+<!-- ============================================================= -->
+
+
+<!--                    SUPPLEMENTARY MATERIAL MODEL               -->
+<!--                    Content model for the 
+                        <supplementary-material> element           -->
+<!ENTITY % supplementary-material-model 
+                        "%fig-model;"                                > 
+
+
+<!--                    SUPPLEMENTARY MATERIAL                     -->
+<!--                    Additional data files that contain
+                        information directly supportive of the item,
+                        for example, an audio clip, movie, database,
+                        spreadsheet, applet, or other external file.  
+                        Remarks: This is similar to the audio-visual
+                        element in some DTDs and the unprinted-item
+                        element (meaning that it is used only for
+                        electronic files) in other DTDs.
+                        Remarks: This element is used in two senses: 
+                        inside the article front matter as an alert
+                        to the existence of supplementary material
+                        and as part of the textual flow, where it is
+                        similar to a Figure, in that it can be 
+                        positioned as a floating or anchored object 
+                        and may take a caption.
+                          In addition, the supplementary material
+                        may identify its file type with the
+                        "mimetype" attribute.
+                        Authoring Note: Supplementary material may
+                        contain a preview image (e.g., the first
+                        frame of a movie, tagged as a <graphic> or 
+                        <media>), with the caption/preview placed 
+                        in a manner similar to a Figure and a 
+                        cross-reference made to the material from 
+                        the text.        
+                        Related Elements: See related element
+                        <inline-supplementary-material> for 
+                        a simpler form that can be used to mark up
+                        text references to supplementary material
+                        where the reference appears in the regular
+                        flow of the text and does not have a preview
+                        image or separate caption.                 -->
+<!ELEMENT  supplementary-material     
+                        %supplementary-material-model;               >
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.                      
+             id         Unique identifier so the element may be
+                        referenced                              
+             mimetype   The mimetype of the object.
+             mime-subtype
+                        The mime-subtype of the object
+             position   Must this display object (figure, boxed text,
+                        etc.) be anchored in its exact location
+                        within the text or may it float, for example
+                        to the top of the next page, next column, or
+                        within a separate window?  Values are:
+                          anchor      Object must remain in place
+                          float       Object is not anchored and
+                                      may be moved to a new column,
+                                      a new window, etc.
+                          margin      In print, item should be placed
+                                      in the margin or gutter. 
+                                      Online the item should 
+                                      remain closely associated 
+                                      with the text.
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  supplementary-material
+             %supplementary-material-atts;                           >
+
+
+<!-- ============================================================= -->
+<!--                    TABLE ELEMENTS                             -->
+<!-- ============================================================= -->
+
+
+<!--                    TABLE WRAPPER GROUP MODEL                  -->
+<!--                    Content model for the <table-wrap-group> 
+                        element                                    -->
+<!ENTITY % table-wrap-group-model 
+                        "(label?, caption?, 
+                          (%access.class; | %address-link.class;)*,  
+                          (%just-table.class;)+ )"                   > 
+
+
+<!--                    TABLE WRAPPER GROUP                        -->
+<!--                    Used as a wrapper tag to contain a group of
+                        tables that must be displayed together     -->
+<!ELEMENT  table-wrap-group       
+                        %table-wrap-group-model;                     >
+<!--         id         Unique identifier so the element may be
+                        referenced 
+                        Authoring Note: id of table group should 
+                        begin with "TG" (TG1, TGIV, TG43) 
+             position   Must this display object (figure, boxed text,
+                        etc.) be anchored in its exact location
+                        within the text or may it float, for example
+                        to the top of the next page, next column, or
+                        within a separate window?  Values are:
+                          anchor      Object must remain in place
+                          float       Object is not anchored and
+                                      may be moved to a new column,
+                                      a new window, etc.
+                          margin      In print, item should be placed
+                                      in the margin or gutter. 
+                                      Online the item should 
+                                      remain closely associated 
+                                      with the text.               -->
+<!ATTLIST  table-wrap-group
+             %table-wrap-group-atts;                                 >
+
+
+<!--                    TABLE WRAPPER CONTENT MODEL                -->
+<!--                    Content model for the container element that
+                        surrounds the standard table models for row
+                        and columns.                               -->
+<!ENTITY % table-wrap-model    
+                        "((%id.class;)*, (%label.class;)?,
+                          (%caption.class;)?, 
+                          (%access.class; | %address-link.class;)*,  
+                          (%inside-table-wrapper;)*, 
+                          (%table-foot.class; |
+                           %display-back-matter.class;)* )"          >
+
+
+<!--                    TABLE WRAPPER                              -->
+<!--                    Used to hold a complete table, that is, not
+                        only the rows and columns that make up a
+                        table, but also the table captions, list
+                        of table footnotes, alternative descriptions
+                        for accessibility, etc.  Within the Table
+                        Wrapper element, the row and column tags that
+                        describe the table cells are defined by one
+                        of the popular "standard" table models, for
+                        example the XHTML table model, OASIS Exchange 
+                        (CALS) table model, of the Elsevier Science 
+                        Full Length Article table body <tblbody> 
+                        model, et al.)
+                        Remarks: This model has been designed to allow
+                        for a "table" that is just a graphic or a list 
+                        rather than the explicit rows and columns of a
+                        table element such as XHTML <table>.       
+                        Authoring and Conversion Note: Many journal
+                        DTDs use an element Called Table Footnote,
+                        with a tag such as <tblfn> for the footnotes
+                        inside a table.  This DTD uses <fn> inside 
+                        tables as well as everywhere. They are
+                        differentiated by context. Footnotes inside
+                        a Table Wrapper are assumed to be footnotes
+                        to the table and part of the table, 
+                        printed at the bottom of it, etc.          -->
+<!ELEMENT  table-wrap   %table-wrap-model;                           >
+<!--         id         Unique identifier so the element may be
+                        referenced 
+                        Authoring Note: The value of the "id"
+                        attribute of table should begin with 
+                        "T" (T1, TIV, T43)   
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.                     
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  table-wrap
+             %table-wrap-atts;                                       >
+
+
+<!--                    TABLE WRAP FOOTER MODEL                    -->
+<!--                    Content model for the <table-wrap-foot> 
+                        element                                    -->
+<!ENTITY % table-wrap-foot-model   
+                        "(title?, 
+                          (%just-para.class; |  %fn-link.class; | 
+                           %display-back-matter.class;)+ )"          > 
+
+
+<!--                    TABLE WRAP FOOTER                          -->
+<!--                    Wrapper element to hold a group of footnotes 
+                        or other notes or general paragraphs at the
+                        end of a table.  Not the same as the
+                        Table Foot <tfoot>, which contains rows
+                        and columns like the rest of the table.    -->
+<!ELEMENT  table-wrap-foot      
+                        %table-wrap-foot-model;                      >
+
+
+<!-- ================== End Display Class Module ================= -->
\ No newline at end of file
diff --git a/code/lib/Bio/Entrez/DTDs/eInfo_020511.dtd b/code/lib/Bio/Entrez/DTDs/eInfo_020511.dtd
new file mode 100644
index 0000000..ac4a59f
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/eInfo_020511.dtd
@@ -0,0 +1,60 @@
+<!--    
+                This is the Current DTD for Entrez eInfo
+$Id: eInfo_020511.dtd 393292 2013-03-22 21:30:47Z fialkov $
+-->
+<!-- ================================================================= -->
+
+<!ELEMENT	DbName		(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	Name		(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	FullName	(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	Description	(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	DbBuild		(#PCDATA)>      <!-- .+ -->
+<!ELEMENT	TermCount	(#PCDATA)>	<!-- \d+ -->
+<!ELEMENT	Menu		(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	DbTo		(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	MenuName	(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	Count		(#PCDATA)>	<!-- \d+ -->
+<!ELEMENT	LastUpdate	(#PCDATA)>	<!-- \d+ -->
+
+<!ELEMENT	ERROR		(#PCDATA)>	<!-- .+ -->
+
+<!ELEMENT	IsDate		(#PCDATA)>	<!-- (Y|N) -->
+<!ELEMENT	IsNumerical	(#PCDATA)>	<!-- (Y|N) -->
+<!ELEMENT	SingleToken	(#PCDATA)>	<!-- (Y|N) -->
+<!ELEMENT	Hierarchy	(#PCDATA)>	<!-- (Y|N) -->
+<!ELEMENT	IsHidden	(#PCDATA)>	<!-- (Y|N) -->
+<!ELEMENT       IsRangable      (#PCDATA)>      <!-- (Y|N) -->
+<!ELEMENT       IsTruncatable   (#PCDATA)>      <!-- (Y|N) -->
+
+
+<!ELEMENT	DbList		(DbName+)>
+
+<!ELEMENT	Field		(Name,
+                FullName,
+				Description,
+				TermCount,
+				IsDate,
+				IsNumerical,
+				SingleToken,
+				Hierarchy,
+                                IsHidden,
+                                IsRangable?,
+                IsTruncatable?)>
+
+<!ELEMENT	Link		(Name,Menu,Description,DbTo)>
+
+
+<!ELEMENT	LinkList	(Link*)>
+<!ELEMENT	FieldList	(Field*)>
+
+
+<!ELEMENT	DbInfo		(DbName,
+				MenuName,
+				Description,
+				DbBuild?,
+				Count,
+				LastUpdate,
+				FieldList,
+				LinkList?)>
+				
+<!ELEMENT	eInfoResult	(DbList|DbInfo|ERROR)>
diff --git a/code/lib/Bio/Entrez/DTDs/eLink_090910.dtd b/code/lib/Bio/Entrez/DTDs/eLink_090910.dtd
new file mode 100644
index 0000000..6aa4c47
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/eLink_090910.dtd
@@ -0,0 +1,79 @@
+<!--    
+                This is the Current DTD for Entrez eLink
+$Id: eLink_090910.dtd 170412 2009-09-11 21:29:34Z fialkov $
+-->
+<!-- ================================================================= -->
+
+<!ELEMENT       ERROR           (#PCDATA)>	<!-- .+ -->
+<!ELEMENT       Info                   (#PCDATA)>	<!-- .+ -->
+
+<!ELEMENT	Id	           (#PCDATA)>	<!-- \d+ -->
+<!ATTLIST	Id		
+			HasLinkOut  (Y|N)	#IMPLIED	
+			HasNeighbor (Y|N)	#IMPLIED
+			>
+<!ELEMENT	Score		(#PCDATA)>	<!-- \d+ -->
+<!ELEMENT	DbFrom	(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	DbTo		(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	LinkName	(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	WebEnv	(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT       MenuTag             (#PCDATA)>      <!-- \S+ -->
+<!ELEMENT       HtmlTag              (#PCDATA)>      <!-- \S+ -->
+<!ELEMENT       Priority                 (#PCDATA)>      <!-- \S+ -->
+
+
+<!ELEMENT	IdList		(Id*)>
+
+<!-- cmd=neighbor -->
+<!ELEMENT	Link		(Id, Score?)>
+<!ELEMENT	QueryKey	(#PCDATA)>
+
+<!ELEMENT	LinkSetDb	               (DbTo, LinkName, (Link*|Info), ERROR?)>
+<!ELEMENT	LinkSetDbHistory	(DbTo, LinkName, (QueryKey|Info), ERROR?)>
+
+<!-- cmd=llinks -->
+
+<!ELEMENT	Url		(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	IconUrl		(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT       SubjectType	(#PCDATA)>	<!-- .+ -->
+<!ELEMENT       Category	(#PCDATA)>	<!-- .+ -->
+<!ELEMENT       Attribute               (#PCDATA)>	<!-- .+ -->
+<!ELEMENT       Name		(#PCDATA)>	<!-- .+ -->
+<!ELEMENT       NameAbbr	(#PCDATA)>	<!-- \S+ -->
+
+<!ELEMENT	Provider (
+				Name,
+				NameAbbr,
+				Id,
+				Url,
+				IconUrl?
+			)>
+
+
+<!ELEMENT	ObjUrl	(
+				Url,
+				IconUrl?,
+				LinkName?,
+                                SubjectType*,
+                                Category*,
+                                Attribute*,
+                               Provider			
+			)>
+
+<!ELEMENT	IdUrlSet	(Id,(ObjUrl+|Info))>
+
+<!ELEMENT	IdUrlList	(IdUrlSet*,ERROR?)>
+
+<!ELEMENT       LinkInfo        (DbTo, LinkName, MenuTag?, HtmlTag?, Url?, Priority)>
+<!ELEMENT       IdLinkSet       (Id, LinkInfo*)>
+
+<!-- cmd=ncheck & lcheck & acheck -->
+<!ELEMENT	IdCheckList	((Id|IdLinkSet)*,ERROR?)>
+
+
+<!-- Common -->
+<!ELEMENT	LinkSet		(DbFrom, 
+				((IdList?, (LinkSetDb*  |  (LinkSetDbHistory*, WebEnv))) | IdUrlList | IdCheckList | ERROR)  
+				)>
+
+<!ELEMENT	eLinkResult	(LinkSet*, ERROR?)>
diff --git a/code/lib/Bio/Entrez/DTDs/eLink_101123.dtd b/code/lib/Bio/Entrez/DTDs/eLink_101123.dtd
new file mode 100644
index 0000000..934b5a9
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/eLink_101123.dtd
@@ -0,0 +1,88 @@
+<!--    
+                This is the Current DTD for Entrez eLink
+$Id: eLink_101123.dtd 349314 2012-01-09 23:26:00Z fialkov $
+-->
+<!-- ================================================================= -->
+
+<!ELEMENT	ERROR			(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	Info			(#PCDATA)>	<!-- .+ -->
+
+<!ELEMENT	Id				(#PCDATA)>	<!-- \d+ -->
+<!ATTLIST	Id		
+			HasLinkOut  (Y|N)	#IMPLIED	
+			HasNeighbor (Y|N)	#IMPLIED
+			>
+
+<!ELEMENT	Score			(#PCDATA)>	<!-- \d+ -->
+<!ELEMENT	DbFrom			(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	DbTo			(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	LinkName		(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	WebEnv			(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	MenuTag			(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	HtmlTag			(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	Priority		(#PCDATA)>	<!-- \S+ -->
+
+<!ELEMENT	IdList		(Id*)>
+
+<!-- cmd=neighbor -->
+<!ELEMENT	Link		(Id, Score?)>
+<!ELEMENT	QueryKey		(#PCDATA)>
+
+<!ELEMENT	LinkSetDb	(DbTo, LinkName, (Link*|Info), ERROR?)>
+<!ELEMENT	LinkSetDbHistory	(DbTo, LinkName, (QueryKey|Info), ERROR?)>
+
+<!-- cmd=llinks -->
+
+<!ELEMENT	Url			    (#PCDATA)>	<!-- \S+ -->
+<!ATTLIST	Url			LNG     (DA|DE|EN|EL|ES|FR|IT|IW|JA|NL|NO|RU|SV|ZH)     "EN">
+
+<!ELEMENT	IconUrl			(#PCDATA)>	<!-- \S+ -->
+<!ATTLIST	IconUrl		LNG     (DA|DE|EN|EL|ES|FR|IT|IW|JA|NL|NO|RU|SV|ZH)     "EN">
+
+<!ELEMENT	SubjectType		(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	Category		(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	Attribute		(#PCDATA)>	<!-- .+ -->
+<!--ELEMENT	LinkName		(#PCDATA)-->	<!--defined in neighbor section--><!-- \S+ -->
+<!ELEMENT	Name			(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	NameAbbr		(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	SubProvider		(#PCDATA)>
+
+<!ELEMENT   FirstChar		(#PCDATA)>
+
+<!ELEMENT	Provider (
+				Name,
+				NameAbbr,
+				Id,
+				Url,
+				IconUrl?
+			)>
+
+<!ELEMENT	ObjUrl	(
+				Url,
+				IconUrl?,
+				LinkName?,
+                SubjectType*,
+				Category*,
+                Attribute*,
+                Provider,
+                SubProvider?
+			)>
+
+<!ELEMENT	IdUrlSet	(Id,(ObjUrl+|Info))>
+
+<!ELEMENT   FirstChars  (FirstChar*)>
+
+<!ELEMENT	LinkInfo	(DbTo, LinkName, MenuTag?, HtmlTag?, Url?, Priority)>
+<!ELEMENT	IdLinkSet	(Id, LinkInfo*)>
+<!ELEMENT	IdUrlList	(IdUrlSet* | FirstChars*)>
+
+<!-- cmd=ncheck & lcheck & acheck -->
+<!ELEMENT	IdCheckList	((Id|IdLinkSet)*,ERROR?)>
+
+
+<!-- Common -->
+<!ELEMENT	LinkSet		(DbFrom, 
+				((IdList?, ((ERROR?, LinkSetDb)*  |  (LinkSetDbHistory*, WebEnv))) | IdUrlList | IdCheckList | ERROR), ERROR?  
+				)>
+
+<!ELEMENT	eLinkResult	(LinkSet*, ERROR?)>
diff --git a/code/lib/Bio/Entrez/DTDs/ePost_020511.dtd b/code/lib/Bio/Entrez/DTDs/ePost_020511.dtd
new file mode 100644
index 0000000..3da7498
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/ePost_020511.dtd
@@ -0,0 +1,14 @@
+<!--    
+                This is the Current DTD for Entrez ePost
+$Id: ePost_020511.dtd 161288 2009-05-26 18:34:21Z fialkov $
+-->
+<!-- ================================================================= -->
+
+<!ELEMENT	Id		(#PCDATA)>	<!-- \d+ -->
+
+<!ELEMENT	InvalidIdList	(Id+)>
+<!ELEMENT       QueryKey        (#PCDATA)>	<!-- \d+ -->
+<!ELEMENT       WebEnv          (#PCDATA)>	<!-- \S+ -->
+<!ELEMENT       ERROR           (#PCDATA)>	<!-- .+ -->
+
+<!ELEMENT     ePostResult       (InvalidIdList?,(QueryKey,WebEnv)?,ERROR?)>
diff --git a/code/lib/Bio/Entrez/DTDs/eSearch_020511.dtd b/code/lib/Bio/Entrez/DTDs/eSearch_020511.dtd
new file mode 100644
index 0000000..15e734b
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/eSearch_020511.dtd
@@ -0,0 +1,64 @@
+<!--    
+                This is the Current DTD for Entrez eSearch
+$Id: eSearch_020511.dtd 85163 2006-06-28 17:35:21Z olegh $
+-->
+<!-- ================================================================= -->
+
+<!ELEMENT       Count           (#PCDATA)>	<!-- \d+ -->
+<!ELEMENT       RetMax          (#PCDATA)>	<!-- \d+ -->
+<!ELEMENT       RetStart        (#PCDATA)>	<!-- \d+ -->
+<!ELEMENT       Id              (#PCDATA)>	<!-- \d+ -->
+
+<!ELEMENT       From            (#PCDATA)>	<!-- .+ -->
+<!ELEMENT       To              (#PCDATA)>	<!-- .+ -->
+<!ELEMENT       Term            (#PCDATA)>	<!-- .+ -->
+
+<!ELEMENT       Field           (#PCDATA)>	<!-- .+ -->
+
+<!ELEMENT        QueryKey       (#PCDATA)>	<!-- \d+ -->
+<!ELEMENT        WebEnv         (#PCDATA)>	<!-- \S+ -->
+ 
+<!ELEMENT       Explode         (#PCDATA)>	<!-- (Y|N) -->
+<!ELEMENT       OP              (#PCDATA)>	<!-- (AND|OR|NOT|RANGE|GROUP) -->
+<!ELEMENT       IdList          (Id*)>
+
+<!ELEMENT       Translation     (From, To)>
+<!ELEMENT       TranslationSet  (Translation*)>
+
+<!ELEMENT       TermSet (Term, Field, Count, Explode)>
+<!ELEMENT       TranslationStack        ((TermSet|OP)*)>
+
+<!-- Error message tags  -->
+
+<!ELEMENT        ERROR                  (#PCDATA)>	<!-- .+ -->
+
+<!ELEMENT        OutputMessage		    (#PCDATA)>	<!-- .+ -->
+<!ELEMENT        QuotedPhraseNotFound   (#PCDATA)>	<!-- .+ -->
+<!ELEMENT        PhraseIgnored          (#PCDATA)>	<!-- .+ -->
+<!ELEMENT        FieldNotFound          (#PCDATA)>	<!-- .+ -->
+<!ELEMENT        PhraseNotFound         (#PCDATA)>	<!-- .+ -->
+<!ELEMENT        QueryTranslation       (#PCDATA)>	<!-- .+ -->
+
+<!ELEMENT        ErrorList      (PhraseNotFound*,FieldNotFound*)>
+<!ELEMENT        WarningList   	(PhraseIgnored*,
+				QuotedPhraseNotFound*,
+				OutputMessage*)>
+<!-- Response tags -->
+
+
+<!ELEMENT       eSearchResult   (((
+                                Count,
+                                    (RetMax,
+                                    RetStart,
+                                    QueryKey?,
+                                    WebEnv?,
+                                    IdList,
+                                    TranslationSet,
+                                    TranslationStack?,
+                                    QueryTranslation
+                                    )?
+                                ) | ERROR),
+				ErrorList?,
+				WarningList?
+				)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/eSpell.dtd b/code/lib/Bio/Entrez/DTDs/eSpell.dtd
new file mode 100644
index 0000000..18b6265
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/eSpell.dtd
@@ -0,0 +1,20 @@
+<!--    
+This is the Current DTD for Entrez eSpell
+$Id:
+-->
+<!-- ================================================================= -->
+
+<!ELEMENT Original                     (#PCDATA)>           <!-- \d+ -->
+<!ELEMENT Replaced                     (#PCDATA)>           <!-- \d+ -->
+
+<!ELEMENT Database                     (#PCDATA)>           <!-- \d+ -->
+<!ELEMENT Query                        (#PCDATA)>           <!-- \d+ -->
+<!ELEMENT CorrectedQuery               (#PCDATA)>           <!-- \d+ -->
+<!ELEMENT SpelledQuery                 (Replaced|Original)*> <!-- \d+ -->
+<!ELEMENT ERROR                        (#PCDATA)>           <!-- \d+ -->
+
+<!ELEMENT eSpellResult    (Database, Query, CorrectedQuery, SpelledQuery, ERROR)>
+
+
+
+
diff --git a/code/lib/Bio/Entrez/DTDs/eSummary_041029.dtd b/code/lib/Bio/Entrez/DTDs/eSummary_041029.dtd
new file mode 100644
index 0000000..a10572a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/eSummary_041029.dtd
@@ -0,0 +1,20 @@
+<!--    
+This is the Current DTD for Entrez eSummary version 2
+$Id: eSummary_041029.dtd 49514 2004-10-29 15:52:04Z parantha $
+-->
+<!-- ================================================================= -->
+
+<!ELEMENT Id                (#PCDATA)>          <!-- \d+ -->
+
+<!ELEMENT Item              (#PCDATA|Item)*>   <!-- .+ -->
+
+<!ATTLIST Item
+    Name CDATA #REQUIRED
+    Type (Integer|Date|String|Structure|List|Flags|Qualifier|Enumerator|Unknown) #REQUIRED
+>
+
+<!ELEMENT ERROR             (#PCDATA)>  <!-- .+ -->
+
+<!ELEMENT DocSum            (Id, Item+)>
+
+<!ELEMENT eSummaryResult    (DocSum|ERROR)+>
diff --git a/code/lib/Bio/Entrez/DTDs/egquery.dtd b/code/lib/Bio/Entrez/DTDs/egquery.dtd
new file mode 100644
index 0000000..ff53342
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/egquery.dtd
@@ -0,0 +1,22 @@
+<!--
+        This is the Current DTD for Entrez eGSearch
+        $Id: egquery.dtd 39250 2004-05-03 16:19:48Z yasmax $
+-->
+<!-- ================================================================= -->
+
+<!ELEMENT       DbName          (#PCDATA)>      <!-- .+ -->
+<!ELEMENT       MenuName        (#PCDATA)>      <!-- .+ -->
+<!ELEMENT       Count           (#PCDATA)>      <!-- \d+ -->
+<!ELEMENT       Status          (#PCDATA)>      <!-- .+ -->
+<!ELEMENT       Term            (#PCDATA)>      <!-- .+ -->
+
+<!ELEMENT       ResultItem      (
+                                     DbName,
+                                     MenuName,
+                                     Count,
+                                     Status
+                                )>
+<!ELEMENT       eGQueryResult  (ResultItem+)>
+
+<!ELEMENT       Result         (Term, eGQueryResult)>
+
diff --git a/code/lib/Bio/Entrez/DTDs/einfo.dtd b/code/lib/Bio/Entrez/DTDs/einfo.dtd
new file mode 100644
index 0000000..f42e108
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/einfo.dtd
@@ -0,0 +1,62 @@
+<!--    
+                This is the Current DTD for Entrez eInfo
+$Id: eInfo_020511.dtd 393292 2013-03-22 21:30:47Z fialkov $
+-->
+<!-- ================================================================= -->
+
+<!ELEMENT	DbName		(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	Name		(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	FullName	(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	Description	(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	DbBuild		(#PCDATA)>      <!-- .+ -->
+<!ELEMENT	TermCount	(#PCDATA)>	<!-- \d+ -->
+<!ELEMENT	Menu		(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	DbTo		(#PCDATA)>	<!-- \S+ -->
+<!ELEMENT	MenuName	(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	Count		(#PCDATA)>	<!-- \d+ -->
+<!ELEMENT	LastUpdate	(#PCDATA)>	<!-- \d+ -->
+
+<!ELEMENT	ERROR		(#PCDATA)>	<!-- .+ -->
+<!ELEMENT	Warning		(#PCDATA)>  <!-- .+ -->
+
+<!ELEMENT	IsDate		(#PCDATA)>	<!-- (Y|N) -->
+<!ELEMENT	IsNumerical	(#PCDATA)>	<!-- (Y|N) -->
+<!ELEMENT	SingleToken	(#PCDATA)>	<!-- (Y|N) -->
+<!ELEMENT	Hierarchy	(#PCDATA)>	<!-- (Y|N) -->
+<!ELEMENT	IsHidden	(#PCDATA)>	<!-- (Y|N) -->
+<!ELEMENT       IsRangable      (#PCDATA)>      <!-- (Y|N) -->
+<!ELEMENT       IsTruncatable   (#PCDATA)>      <!-- (Y|N) -->
+
+
+<!ELEMENT	DbList		(DbName+)>
+
+<!ELEMENT	Field		(Name,
+                FullName,
+				Description,
+				TermCount,
+				IsDate,
+				IsNumerical,
+				SingleToken,
+				Hierarchy,
+                                IsHidden,
+                                IsRangable?,
+                IsTruncatable?)>
+
+<!ELEMENT	Link		(Name,Menu,Description,DbTo)>
+
+
+<!ELEMENT	LinkList	(Link*)>
+<!ELEMENT	FieldList	(Field*)>
+
+
+<!ELEMENT	DbInfo		(DbName,
+				MenuName,
+				Description,
+				DbBuild?,
+				Warning?,
+				Count?,
+				LastUpdate?,
+				FieldList?,
+				LinkList?)>
+				
+<!ELEMENT	eInfoResult	(DbList|DbInfo|ERROR)>
diff --git a/code/lib/Bio/Entrez/DTDs/elink_020122.dtd b/code/lib/Bio/Entrez/DTDs/elink_020122.dtd
new file mode 100644
index 0000000..6f93374
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/elink_020122.dtd
@@ -0,0 +1,20 @@
+<!--    
+                This is the Current DTD for elink program
+
+
+-->
+<!-- ================================================================= -->
+
+<!ELEMENT id        (#PCDATA)>
+<!ELEMENT Score     (#PCDATA)>
+<!ELEMENT DbTo      (#PCDATA)>
+<!ELEMENT DbFrom    (#PCDATA)>
+<!ELEMENT LinkName  (#PCDATA)>
+
+<!ELEMENT Link      (id, Score?)>
+
+<!ELEMENT LinkSetDb (DbTo, LinkName, Link*)>
+
+<!ELEMENT LinkSet (id+, LinkSetDb*)>
+
+<!ELEMENT ElinkResult (DbFrom, LinkSet*)>
diff --git a/code/lib/Bio/Entrez/DTDs/esearch.dtd b/code/lib/Bio/Entrez/DTDs/esearch.dtd
new file mode 100644
index 0000000..bd11e35
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/esearch.dtd
@@ -0,0 +1,103 @@
+<!--
+                This is the Current DTD for Entrez eSearch
+$Id: eSearch_020511.dtd 85163 2006-06-28 17:35:21Z olegh $
+-->
+<!-- ================================================================= -->
+
+<!--~~ !dtd
+~~json 
+    <json type='esearch' version='0.3'>
+      <config lcnames='true'/>
+    </json>
+~~-->
+
+<!ELEMENT       eSearchResult  (
+                                 (
+                                   (
+                                     Count,
+                                     ( RetMax,
+                                       RetStart,
+                                       QueryKey?,
+                                       WebEnv?,
+                                       IdList,
+                                       TranslationSet,
+                                       TranslationStack?,
+                                       QueryTranslation
+                                      )?
+                                   ) | ERROR
+                                 ),
+                                 ErrorList?,
+                                 WarningList?
+                               )>
+
+
+<!ELEMENT       Count             (#PCDATA)>  <!-- \d+ -->
+<!ELEMENT       RetMax            (#PCDATA)>  <!-- \d+ -->
+<!ELEMENT       RetStart          (#PCDATA)>  <!-- \d+ -->
+<!ELEMENT       Id                (#PCDATA)>  <!-- \d+ -->
+
+<!ELEMENT       From              (#PCDATA)>  <!-- .+ -->
+<!ELEMENT       To                (#PCDATA)>  <!-- .+ -->
+<!ELEMENT       Term              (#PCDATA)>  <!-- .+ -->
+
+<!ELEMENT       Field             (#PCDATA)>  <!-- .+ -->
+
+<!ELEMENT       QueryKey          (#PCDATA)>  <!-- \d+ -->
+<!ELEMENT       WebEnv            (#PCDATA)>  <!-- \S+ -->
+
+<!ELEMENT       Explode           (#PCDATA)>  <!-- (Y|N) -->
+<!ELEMENT       OP                (#PCDATA)>  <!-- (AND|OR|NOT|RANGE|GROUP) -->
+<!ELEMENT       IdList            (Id*)>
+
+<!ELEMENT       Translation       (From, To)>
+<!ELEMENT       TranslationSet    (Translation*)>
+
+<!ELEMENT       TermSet           (Term, Field, Count, Explode)>
+
+<!--~~ <TranslationStack>
+~~ json <array/>
+~~-->
+<!ELEMENT       TranslationStack  ((TermSet|OP)*)>
+
+<!-- Error message tags  -->
+<!--~~ <ERROR>
+~~ json <json key="ERROR"/>
+~~-->
+<!ELEMENT        ERROR                  (#PCDATA)>  <!-- .+ -->
+
+<!ELEMENT        OutputMessage          (#PCDATA)>  <!-- .+ -->
+
+<!ELEMENT        QuotedPhraseNotFound   (#PCDATA)>  <!-- .+ -->
+
+<!ELEMENT        PhraseIgnored          (#PCDATA)>  <!-- .+ -->
+
+<!ELEMENT        FieldNotFound          (#PCDATA)>  <!-- .+ -->
+
+<!ELEMENT        PhraseNotFound         (#PCDATA)>  <!-- .+ -->
+
+
+<!ELEMENT        QueryTranslation       (#PCDATA)>  <!-- .+ -->
+
+<!--~~ <ErrorList>
+~~ json
+  <object>
+    <array key="phrasesnotfound" select='PhraseNotFound'/>
+    <array key="fieldsnotfound" select='FieldsNotFound'/>
+  </object>
+~~-->
+<!ELEMENT        ErrorList      (PhraseNotFound*,FieldNotFound*)>
+
+<!--~~ <WarningList>
+~~ json
+  <object>
+    <array key="phrasesignored" select='PhraseIgnored'/>
+    <array key="quotedphrasesnotfound" select='QuotedPhraseNotFound'/>
+    <array key="outputmessages" select='OutputMessage'/>
+  </object>
+~~-->
+<!ELEMENT        WarningList    ( PhraseIgnored*,
+                                  QuotedPhraseNotFound*,
+                                  OutputMessage* )>
+
+
+
diff --git a/code/lib/Bio/Entrez/DTDs/esummary-v1.dtd b/code/lib/Bio/Entrez/DTDs/esummary-v1.dtd
new file mode 100644
index 0000000..a10572a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/esummary-v1.dtd
@@ -0,0 +1,20 @@
+<!--    
+This is the Current DTD for Entrez eSummary version 2
+$Id: eSummary_041029.dtd 49514 2004-10-29 15:52:04Z parantha $
+-->
+<!-- ================================================================= -->
+
+<!ELEMENT Id                (#PCDATA)>          <!-- \d+ -->
+
+<!ELEMENT Item              (#PCDATA|Item)*>   <!-- .+ -->
+
+<!ATTLIST Item
+    Name CDATA #REQUIRED
+    Type (Integer|Date|String|Structure|List|Flags|Qualifier|Enumerator|Unknown) #REQUIRED
+>
+
+<!ELEMENT ERROR             (#PCDATA)>  <!-- .+ -->
+
+<!ELEMENT DocSum            (Id, Item+)>
+
+<!ELEMENT eSummaryResult    (DocSum|ERROR)+>
diff --git a/code/lib/Bio/Entrez/DTDs/format.ent b/code/lib/Bio/Entrez/DTDs/format.ent
new file mode 100644
index 0000000..b702a9b
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/format.ent
@@ -0,0 +1,412 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Formatting Element Classes                        -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+ "-//NLM//DTD Archiving and Interchange DTD Suite Formatting Element Classes v2.0 20040830//EN"
+  Delivered as file "format.ent"                                   -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!--                                                               -->
+<!-- PURPOSE:    Elements concerned with rendition of output, for  -->
+<!--             example printing on a page or display on a screen -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Default definitions of format classes          -->
+<!--             2) Appearance class elements                      -->
+<!--             3) Break class elements                           -->
+<!--             4) Typographic emphasis elements                  -->
+<!--             5) Advanced overline and underline elements       -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  4. NEW MIXES - To correct potential classing problems, new 
+     Parameter Entities were created. The following content 
+     models were changed to use these new entities.
+       - <font>    %font-elements;
+
+  3. INLINE PARAMETER MIXES
+     ### Customization Alert ###
+     a. Changed the inline-mix Parameter Entities to use the 
+        OR-bar-first mechanism. This requires changing not
+        only the Parameter Entity, but all content models that
+        use the entity.
+        - %emphasized-text; (used in most of the elements below)
+
+  2. DEFAULT CLASSES - Were moved from this module to 
+     %default-classes.ent;
+  
+  1. Updated public identifier to "v2.0 20040830"       
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module, 
+                        usually accomplished in the Customization
+                        Module for the specific DTD:
+
+                        -%emphasized-text; - what elements can be 
+                                             inside an emphasis 
+                                             element
+                        -%rendition-plus;                          -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR ATTRIBUTE LISTS     -->
+<!-- ============================================================= -->
+
+
+<!--                    UNDERLINE ATTRIBUTES                       -->
+<!--                    Attributes for the <underline> element     -->
+<!ENTITY % underline-atts 
+            "underline-style   
+                        CDATA                              #IMPLIED" >
+
+
+<!-- ============================================================= -->
+<!--                    APPEARANCE CLASS ELEMENTS                  -->
+<!-- ============================================================= -->
+
+
+<!--                    FONT ELEMENTS                              -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <font> element.
+                        Design Note: This inline mix begins with an
+                        OR bar, since -%rendition-plus begins with
+                        one.                                       -->
+<!ENTITY % font-elements 
+                        "%rendition-plus;"                           >
+
+
+<!--                    FONT                                       -->
+<!--                    Defined to allow the user to specify an
+                        explicit (non stylesheet-generated) font.
+                        This element was originally restricted to 
+                        use inside table cells <td> and <th> and was 
+                        inherited from the XHTML table DTD.
+                        Authoring Note: Usage is discouraged.      -->
+<!ELEMENT  font         (#PCDATA %font-elements;)*                   >
+<!ATTLIST  font
+             color      CDATA                             #IMPLIED   >
+
+
+<!--                    HORIZONTAL RULE                            -->
+<!--                    Defined to allow the user to specify an
+                        explicit (non machine-generated) rule.
+                        This element is restricted to use inside
+                        table cells <td> and <th> and was inherited
+                        from the XHTML table DTD.
+                        Authoring Note: Usage is discouraged.      -->
+<!ELEMENT  hr           EMPTY                                        >
+
+
+<!-- ============================================================= -->
+<!--                    BREAK CLASS ELEMENTS                       -->
+<!-- ============================================================= -->
+
+
+<!--                    LINE BREAK                                 -->
+<!--                    Defined to allow the user to specify an
+                        explicit (non machine-generated) linebreak.
+                        This element is restricted to a very few
+                        contexts, for example inside <title>s
+                        and inside table cells <td> and <th>.
+                        Authoring Note: Usage is discouraged.      -->
+<!ELEMENT  break        EMPTY                                        >
+
+
+<!-- ============================================================= -->
+<!--                    EMPHASIS/RENDITION CLASS ELEMENTS          -->
+<!-- ============================================================= -->
+
+
+<!--                    BOLD                                       -->
+<!--                    Used to mark text that should appear in bold 
+                        face type for print or display             -->
+<!ELEMENT  bold         (#PCDATA %emphasized-text;)*                 >
+
+
+<!--                    ITALIC                                     -->
+<!--                    Used to mark text that should appear in 
+                        italic type on output.                     -->
+<!ELEMENT  italic       (#PCDATA %emphasized-text;)*                 >
+
+
+<!--                    MONOSPACE TEXT (TYPEWRITER TEXT)           -->
+<!--                    Used to mark text that should appear in 
+                        a non-proportional font, such as courier
+                        for display or print.  This is common for
+                        computer code examples, man-machine
+                        dialogues, etc.                            -->
+<!ELEMENT  monospace    (#PCDATA %emphasized-text;)*                 >
+
+
+
+<!--                    SMALL CAPS                                 -->
+<!--                    Used to mark text that should appear in a 
+                        font which creates smaller capital letters
+                        on output.                                 -->
+<!ELEMENT  sc           (#PCDATA %emphasized-text;)*                 >
+
+
+<!--                    OVERLINE                                   -->
+<!--                    Used to mark text that should appear with a 
+                        horizontal line above each character in
+                        display or print                           -->
+<!ELEMENT  overline     (#PCDATA %emphasized-text;)*                 >
+
+
+<!--                    STRIKE THROUGH                             -->
+<!--                    Used to mark text that should appear with 
+                        a line through it so as to appear "struck out" 
+                        on display or print                        -->
+<!ELEMENT  strike       (#PCDATA %emphasized-text;)*                 >
+
+
+<!--                    SUBSCRIPT                                  -->
+<!--                    A number or expression that is set lower
+                        than the baseline and slightly smaller,
+                        to act as an inferior or subscript         -->
+<!ELEMENT  sub          (#PCDATA %emphasized-text;)*                 >
+
+<!--         arrange    Used to indicate whether multiple subscripts
+                        or superscripts that apply to the same
+                        character should stack, i.e., be placed
+                        one above the other, or stagger, i.e., be
+                        stretched out in a line.                   -->
+<!ATTLIST  sub 
+             arrange    (stack | stagger)                  #IMPLIED  >
+
+
+<!--                    SUPERSCRIPT                                -->
+<!--                    A number or expression that is set higher
+                        than the baseline and slightly smaller,
+                        to act as a superior or superscript        -->
+<!ELEMENT  sup          (#PCDATA %emphasized-text;)*                 >
+
+                        
+<!--         arrange    Used to indicate whether multiple subscripts
+                        or superscripts that apply to the same
+                        character should stack, i.e., be placed
+                        one above the other as compactly as
+                        possible, or stagger, i.e., be stretched
+                        out in a line.                             -->
+<!ATTLIST  sup 
+             arrange    (stack | stagger)                  #IMPLIED  >
+
+
+<!--                    UNDERLINE                                  -->
+<!--                    Used to mark text that should appear with 
+                        a horizontal line beneath it for display
+                        or print                                   -->
+<!ELEMENT  underline    (#PCDATA %emphasized-text;)*                 >
+<!ATTLIST  underline 
+             %underline-atts;                                        >
+
+
+<!-- ============================================================= -->
+<!--                    ADVANCED UNDERLINE AND OVERLINE ELEMENTS
+                        (From the Elsevier DTD, used mostly within    
+                        mathematics, when ordinary MathML underline   
+                        and overline functions are inadequate. The
+                        elements act as milestone tags marking the
+                        start and end of over or underlining that
+                        may extend across element boundaries.)     -->
+<!-- ============================================================= -->
+
+
+<!--                    OVERLINE START                             -->
+<!--                    The start of a milestone-created overline, the
+                        end of ornament will be marked by an
+                        Overline End element.
+                        Remarks:
+                        The <overline-start> and <overline-end> 
+                        elements enable creation of overlines in 
+                        content, most commonly in mathematical 
+                        content, where an normal overline tag 
+                        <overline> would break the XML 
+                        well-formedness rule that no two elements
+                        may overlap.  The overline created using
+                        these two elements can have arbitrary start 
+                        and end points because the start and end 
+                        elements are paired through the use of the
+                        unique IDs, not as the start and end of a 
+                        single element.
+                        For example, in the text "ABCD", if the text
+                        "ABC" is the be overlined and the text "CD" 
+                        is to be underlined, ordinary overlining with
+                        <overline> would create invalid tag overlap. 
+                        The text could be correctly tagged, using the 
+                        Overline Start and Overline End elements
+                        as:
+                          <overline-start id="ovl4"/>AB
+                          <underline-start id="ul1"/>C
+                          </overline-end rid="ov14"/>D
+                          <underline-end rid="ul1"/>
+                                                                   -->
+<!ELEMENT  overline-start     
+                        EMPTY                                        >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 -->
+<!ATTLIST  overline-start
+             id         ID                                 #REQUIRED >
+
+
+<!--                    OVERLINE END                               -->
+<!--                    The end of a milestone-created overline, the
+                        start of ornament will be marked by an
+                        Overline Start element.                    -->
+<!ELEMENT  overline-end EMPTY                                        >
+<!--         rid        Points to the identifier of an Overline 
+                        Start, so that the two may be linked       -->
+<!ATTLIST  overline-end
+             rid        IDREF                              #REQUIRED >
+
+
+<!--                    UNDERLINE START                            -->
+<!--                    The start of a milestone-created underline,
+                        the end of ornament will be marked by an
+                        Underline End element. 
+                        Remarks: The <underline-start> and 
+                        <underline-end> elements enable creation of 
+                        underlines in content, most commonly in 
+                        mathematical content, where an normal 
+                        underline tag <underline> would break the
+                        XML well-formedness rule that no two elements
+                        may overlap.  The underline created using
+                        these two elements can have arbitrary start 
+                        and end points because the start and end 
+                        elements are paired through the use of the
+                        unique IDs, not as the start and end of a 
+                        single element.
+                        For example, in the text "ABCD", if the text
+                        "ABC" is the be overlined and the text "CD" 
+                        is to be underlined, ordinary overlining with
+                        <overline> would create invalid tag overlap. 
+                        The text could be correctly tagged, using the 
+                        Overline Start and Overline End elements
+                        as:
+                          <overline-start id="ovl"/>AB
+                          <underline-start id="ul1"/>C
+                          </overline-end rid="ov1"/>D
+                          <underline-end rid="ul1"/>
+                                                                   -->
+<!ELEMENT  underline-start      
+                        EMPTY                                        >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 -->
+<!ATTLIST  underline-start
+             id         ID                                 #REQUIRED >
+
+
+<!--                    UNDERLINE END                              -->
+<!--                    The end of a milestone-created underline, the
+                        start of ornament will be marked by an
+                        Underline Start element.                    -->
+<!ELEMENT  underline-end        
+                        EMPTY                                         >
+<!--         rid        Points to the identifier of an Underline 
+                        Start, so that the two may be linked        -->
+<!ATTLIST  underline-end
+             rid        IDREF                               #REQUIRED >
+
+
+<!-- ================== End Format Class Module =================== -->
diff --git a/code/lib/Bio/Entrez/DTDs/htmltable.dtd b/code/lib/Bio/Entrez/DTDs/htmltable.dtd
new file mode 100644
index 0000000..f4432ad
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/htmltable.dtd
@@ -0,0 +1,334 @@
+<!-- ...................................................................... -->
+<!-- XHTML Table Module  .................................................. -->
+<!-- file: xhtml-table-1.mod
+
+     This is XHTML, a reformulation of HTML as a modular XML application.
+     Copyright 1998-2001 W3C (MIT, INRIA, Keio), All Rights Reserved.
+     Revision: $Id: htmltable.dtd,v 1.3 2004/12/28 20:31:20 beck Exp $ SMI
+
+     This DTD module is identified by the PUBLIC and SYSTEM identifiers:
+
+       PUBLIC "-//W3C//ELEMENTS XHTML Tables 1.0//EN"
+       SYSTEM "http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-table-1.mod"
+
+     Revisions:
+     (none)
+     ....................................................................... -->
+
+<!-- Tables
+
+        table, caption, thead, tfoot, tbody, colgroup, col, tr, th, td
+
+     This module declares element types and attributes used to provide
+     table markup similar to HTML 4, including features that enable
+     better accessibility for non-visual user agents.
+-->
+
+<!-- declare qualified element type names:
+-->
+<!ENTITY % table.qname  "table" >
+<!ENTITY % caption.qname  "caption" >
+<!ENTITY % thead.qname  "thead" >
+<!ENTITY % tfoot.qname  "tfoot" >
+<!ENTITY % tbody.qname  "tbody" >
+<!ENTITY % colgroup.qname  "colgroup" >
+<!ENTITY % col.qname  "col" >
+<!ENTITY % tr.qname  "tr" >
+<!ENTITY % th.qname  "th" >
+<!ENTITY % td.qname  "td" >
+
+<!-- The frame attribute specifies which parts of the frame around
+     the table should be rendered. The values are not the same as
+     CALS to avoid a name clash with the valign attribute.
+-->
+<!ENTITY % frame.attrib
+     "frame        ( void
+                   | above
+                   | below
+                   | hsides
+                   | lhs
+                   | rhs
+                   | vsides
+                   | box
+                   | border )               #IMPLIED"
+>
+
+<!-- The rules attribute defines which rules to draw between cells:
+
+     If rules is absent then assume:
+
+       "none" if border is absent or border="0" otherwise "all"
+-->
+<!ENTITY % rules.attrib
+     "rules        ( none
+                   | groups
+                   | rows
+                   | cols
+                   | all )                  #IMPLIED"
+>
+
+<!-- horizontal alignment attributes for cell contents
+-->
+<!ENTITY % CellHAlign.attrib
+     "align        ( left
+                   | center
+                   | right
+                   | justify
+                   | char )                 #IMPLIED
+      char         %Character.datatype;     #IMPLIED
+      charoff      %Length.datatype;        #IMPLIED"
+>
+
+<!-- vertical alignment attribute for cell contents
+-->
+<!ENTITY % CellVAlign.attrib
+     "valign       ( top
+                   | middle
+                   | bottom
+                   | baseline )             #IMPLIED"
+>
+
+<!-- scope is simpler than axes attribute for common tables
+-->
+<!ENTITY % scope.attrib
+     "scope        ( row
+                   | col
+                   | rowgroup
+                   | colgroup )             #IMPLIED"
+>
+
+<!-- table: Table Element .............................. -->
+
+<!ENTITY % table.element  "INCLUDE" >
+<![%table.element;[
+<!ENTITY % table.content
+     "( %caption.qname;?, ( %col.qname;* | %colgroup.qname;* ),
+      (( %thead.qname;?, %tfoot.qname;?, %tbody.qname;+ ) | ( %tr.qname;+ )))"
+>
+<!ELEMENT %table.qname;  %table.content; >
+<!-- end of table.element -->]]>
+
+<!ENTITY % table.attlist  "INCLUDE" >
+<![%table.attlist;[
+<!ATTLIST %table.qname;
+      %Common.attrib;
+      summary      %Text.datatype;          #IMPLIED
+      width        %Length.datatype;        #IMPLIED
+      border       %Pixels.datatype;        #IMPLIED
+      %frame.attrib;
+      %rules.attrib;
+      cellspacing  %Length.datatype;        #IMPLIED
+      cellpadding  %Length.datatype;        #IMPLIED
+>
+<!-- end of table.attlist -->]]>
+
+<!-- caption: Table Caption ............................ -->
+
+<!ENTITY % caption.element  "INCLUDE" >
+<![%caption.element;[
+<!ENTITY % caption.content
+     "( #PCDATA | %Inline.mix; )*"
+>
+<!ELEMENT %caption.qname;  %caption.content; >
+<!-- end of caption.element -->]]>
+
+<!ENTITY % caption.attlist  "INCLUDE" >
+<![%caption.attlist;[
+<!ATTLIST %caption.qname;
+      %Common.attrib;
+>
+<!-- end of caption.attlist -->]]>
+
+<!-- thead: Table Header ............................... -->
+
+<!-- Use thead to duplicate headers when breaking table
+     across page boundaries, or for static headers when
+     tbody sections are rendered in scrolling panel.
+-->
+
+<!ENTITY % thead.element  "INCLUDE" >
+<![%thead.element;[
+<!ENTITY % thead.content  "( %tr.qname; )+" >
+<!ELEMENT %thead.qname;  %thead.content; >
+<!-- end of thead.element -->]]>
+
+<!ENTITY % thead.attlist  "INCLUDE" >
+<![%thead.attlist;[
+<!ATTLIST %thead.qname;
+      %Common.attrib;
+      %CellHAlign.attrib;
+      %CellVAlign.attrib;
+>
+<!-- end of thead.attlist -->]]>
+
+<!-- tfoot: Table Footer ............................... -->
+
+<!-- Use tfoot to duplicate footers when breaking table
+     across page boundaries, or for static footers when
+     tbody sections are rendered in scrolling panel.
+-->
+
+<!ENTITY % tfoot.element  "INCLUDE" >
+<![%tfoot.element;[
+<!ENTITY % tfoot.content  "( %tr.qname; )+" >
+<!ELEMENT %tfoot.qname;  %tfoot.content; >
+<!-- end of tfoot.element -->]]>
+
+<!ENTITY % tfoot.attlist  "INCLUDE" >
+<![%tfoot.attlist;[
+<!ATTLIST %tfoot.qname;
+      %Common.attrib;
+      %CellHAlign.attrib;
+      %CellVAlign.attrib;
+>
+<!-- end of tfoot.attlist -->]]>
+
+<!-- tbody: Table Body ................................. -->
+
+<!-- Use multiple tbody sections when rules are needed
+     between groups of table rows.
+-->
+
+<!ENTITY % tbody.element  "INCLUDE" >
+<![%tbody.element;[
+<!ENTITY % tbody.content  "( %tr.qname; )+" >
+<!ELEMENT %tbody.qname;  %tbody.content; >
+<!-- end of tbody.element -->]]>
+
+<!ENTITY % tbody.attlist  "INCLUDE" >
+<![%tbody.attlist;[
+<!ATTLIST %tbody.qname;
+      %Common.attrib;
+      %CellHAlign.attrib;
+      %CellVAlign.attrib;
+>
+<!-- end of tbody.attlist -->]]>
+
+<!-- colgroup: Table Column Group ...................... -->
+
+<!-- colgroup groups a set of col elements. It allows you
+     to group several semantically-related columns together.
+-->
+
+<!ENTITY % colgroup.element  "INCLUDE" >
+<![%colgroup.element;[
+<!ENTITY % colgroup.content  "( %col.qname; )*" >
+<!ELEMENT %colgroup.qname;  %colgroup.content; >
+<!-- end of colgroup.element -->]]>
+
+<!ENTITY % colgroup.attlist  "INCLUDE" >
+<![%colgroup.attlist;[
+<!ATTLIST %colgroup.qname;
+      %Common.attrib;
+      span         %Number.datatype;        '1'
+      width        %MultiLength.datatype;   #IMPLIED
+      %CellHAlign.attrib;
+      %CellVAlign.attrib;
+>
+<!-- end of colgroup.attlist -->]]>
+
+<!-- col: Table Column ................................. -->
+
+<!-- col elements define the alignment properties for
+     cells in one or more columns.
+
+     The width attribute specifies the width of the
+     columns, e.g.
+
+       width="64"        width in screen pixels
+       width="0.5*"      relative width of 0.5
+
+     The span attribute causes the attributes of one
+     col element to apply to more than one column.
+-->
+
+<!ENTITY % col.element  "INCLUDE" >
+<![%col.element;[
+<!ENTITY % col.content  "EMPTY" >
+<!ELEMENT %col.qname;  %col.content; >
+<!-- end of col.element -->]]>
+
+<!ENTITY % col.attlist  "INCLUDE" >
+<![%col.attlist;[
+<!ATTLIST %col.qname;
+      %Common.attrib;
+      span         %Number.datatype;        '1'
+      width        %MultiLength.datatype;   #IMPLIED
+      %CellHAlign.attrib;
+      %CellVAlign.attrib;
+>
+<!-- end of col.attlist -->]]>
+
+<!-- tr: Table Row ..................................... -->
+
+<!ENTITY % tr.element  "INCLUDE" >
+<![%tr.element;[
+<!ENTITY % tr.content  "( %th.qname; | %td.qname; )+" >
+<!ELEMENT %tr.qname;  %tr.content; >
+<!-- end of tr.element -->]]>
+
+<!ENTITY % tr.attlist  "INCLUDE" >
+<![%tr.attlist;[
+<!ATTLIST %tr.qname;
+      %Common.attrib;
+      %CellHAlign.attrib;
+      %CellVAlign.attrib;
+>
+<!-- end of tr.attlist -->]]>
+
+<!-- th: Table Header Cell ............................. -->
+
+<!-- th is for header cells, td for data,
+     but for cells acting as both use td
+-->
+
+<!ENTITY % th.element  "INCLUDE" >
+<![%th.element;[
+<!ENTITY % th.content
+     "( #PCDATA | %Flow.mix; )*"
+>
+<!ELEMENT %th.qname;  %th.content; >
+<!-- end of th.element -->]]>
+
+<!ENTITY % th.attlist  "INCLUDE" >
+<![%th.attlist;[
+<!ATTLIST %th.qname;
+      %Common.attrib;
+      abbr         %Text.datatype;          #IMPLIED
+      axis         CDATA                    #IMPLIED
+      headers      IDREFS                   #IMPLIED
+      %scope.attrib;
+      rowspan      %Number.datatype;        '1'
+      colspan      %Number.datatype;        '1'
+      %CellHAlign.attrib;
+      %CellVAlign.attrib;
+>
+<!-- end of th.attlist -->]]>
+
+<!-- td: Table Data Cell ............................... -->
+
+<!ENTITY % td.element  "INCLUDE" >
+<![%td.element;[
+<!ENTITY % td.content
+     "( #PCDATA | %Flow.mix; )*"
+>
+<!ELEMENT %td.qname;  %td.content; >
+<!-- end of td.element -->]]>
+
+<!ENTITY % td.attlist  "INCLUDE" >
+<![%td.attlist;[
+<!ATTLIST %td.qname;
+      %Common.attrib;
+      abbr         %Text.datatype;          #IMPLIED
+      axis         CDATA                    #IMPLIED
+      headers      IDREFS                   #IMPLIED
+      %scope.attrib;
+      rowspan      %Number.datatype;        '1'
+      colspan      %Number.datatype;        '1'
+      %CellHAlign.attrib;
+      %CellVAlign.attrib;
+>
+<!-- end of td.attlist -->]]>
+
+<!-- end of xhtml-table-1.mod -->
+
diff --git a/code/lib/Bio/Entrez/DTDs/isoamsa.ent b/code/lib/Bio/Entrez/DTDs/isoamsa.ent
new file mode 100644
index 0000000..c413168
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isoamsa.ent
@@ -0,0 +1,167 @@
+
+<!--
+     File isoamsa.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1991
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY angzarr          "&#x0237C;" ><!--angle with down zig-zag arrow -->
+<!ENTITY cirmid           "&#x02AEF;" ><!--circle, mid below -->
+<!ENTITY cudarrl          "&#x02938;" ><!--left, curved, down arrow -->
+<!ENTITY cudarrr          "&#x02935;" ><!--right, curved, down arrow -->
+<!ENTITY cularr           "&#x021B6;" ><!--/curvearrowleft A: left curved arrow -->
+<!ENTITY cularrp          "&#x0293D;" ><!--curved left arrow with plus -->
+<!ENTITY curarr           "&#x021B7;" ><!--/curvearrowright A: rt curved arrow -->
+<!ENTITY curarrm          "&#x0293C;" ><!--curved right arrow with minus -->
+<!ENTITY Darr             "&#x021A1;" ><!--down two-headed arrow -->
+<!ENTITY dArr             "&#x021D3;" ><!--/Downarrow A: down dbl arrow -->
+<!ENTITY ddarr            "&#x021CA;" ><!--/downdownarrows A: two down arrows -->
+<!ENTITY DDotrahd         "&#x02911;" ><!--right arrow with dotted stem -->
+<!ENTITY dfisht           "&#x0297F;" ><!--down fish tail -->
+<!ENTITY dHar             "&#x02965;" ><!--down harpoon-left, down harpoon-right -->
+<!ENTITY dharl            "&#x021C3;" ><!--/downharpoonleft A: dn harpoon-left -->
+<!ENTITY dharr            "&#x021C2;" ><!--/downharpoonright A: down harpoon-rt -->
+<!ENTITY duarr            "&#x021F5;" ><!--down arrow, up arrow -->
+<!ENTITY duhar            "&#x0296F;" ><!--down harp, up harp -->
+<!ENTITY dzigrarr         "&#x027FF;" ><!--right long zig-zag arrow -->
+<!ENTITY erarr            "&#x02971;" ><!--equal, right arrow below -->
+<!ENTITY hArr             "&#x021D4;" ><!--/Leftrightarrow A: l&r dbl arrow -->
+<!ENTITY harr             "&#x02194;" ><!--/leftrightarrow A: l&r arrow -->
+<!ENTITY harrcir          "&#x02948;" ><!--left and right arrow with a circle -->
+<!ENTITY harrw            "&#x021AD;" ><!--/leftrightsquigarrow A: l&r arr-wavy -->
+<!ENTITY hoarr            "&#x021FF;" ><!--horizontal open arrow -->
+<!ENTITY imof             "&#x022B7;" ><!--image of -->
+<!ENTITY lAarr            "&#x021DA;" ><!--/Lleftarrow A: left triple arrow -->
+<!ENTITY Larr             "&#x0219E;" ><!--/twoheadleftarrow A: -->
+<!ENTITY larrbfs          "&#x0291F;" ><!--left arrow-bar, filled square -->
+<!ENTITY larrfs           "&#x0291D;" ><!--left arrow, filled square -->
+<!ENTITY larrhk           "&#x021A9;" ><!--/hookleftarrow A: left arrow-hooked -->
+<!ENTITY larrlp           "&#x021AB;" ><!--/looparrowleft A: left arrow-looped -->
+<!ENTITY larrpl           "&#x02939;" ><!--left arrow, plus -->
+<!ENTITY larrsim          "&#x02973;" ><!--left arrow, similar -->
+<!ENTITY larrtl           "&#x021A2;" ><!--/leftarrowtail A: left arrow-tailed -->
+<!ENTITY lAtail           "&#x0291B;" ><!--left double arrow-tail -->
+<!ENTITY latail           "&#x02919;" ><!--left arrow-tail -->
+<!ENTITY lBarr            "&#x0290E;" ><!--left doubly broken arrow -->
+<!ENTITY lbarr            "&#x0290C;" ><!--left broken arrow -->
+<!ENTITY ldca             "&#x02936;" ><!--left down curved arrow -->
+<!ENTITY ldrdhar          "&#x02967;" ><!--left harpoon-down over right harpoon-down -->
+<!ENTITY ldrushar         "&#x0294B;" ><!--left-down-right-up harpoon -->
+<!ENTITY ldsh             "&#x021B2;" ><!--left down angled arrow -->
+<!ENTITY lfisht           "&#x0297C;" ><!--left fish tail -->
+<!ENTITY lHar             "&#x02962;" ><!--left harpoon-up over left harpoon-down -->
+<!ENTITY lhard            "&#x021BD;" ><!--/leftharpoondown A: l harpoon-down -->
+<!ENTITY lharu            "&#x021BC;" ><!--/leftharpoonup A: left harpoon-up -->
+<!ENTITY lharul           "&#x0296A;" ><!--left harpoon-up over long dash -->
+<!ENTITY llarr            "&#x021C7;" ><!--/leftleftarrows A: two left arrows -->
+<!ENTITY llhard           "&#x0296B;" ><!--left harpoon-down below long dash -->
+<!ENTITY loarr            "&#x021FD;" ><!--left open arrow -->
+<!ENTITY lrarr            "&#x021C6;" ><!--/leftrightarrows A: l arr over r arr -->
+<!ENTITY lrhar            "&#x021CB;" ><!--/leftrightharpoons A: l harp over r -->
+<!ENTITY lrhard           "&#x0296D;" ><!--right harpoon-down below long dash -->
+<!ENTITY lsh              "&#x021B0;" ><!--/Lsh A: -->
+<!ENTITY lurdshar         "&#x0294A;" ><!--left-up-right-down harpoon -->
+<!ENTITY luruhar          "&#x02966;" ><!--left harpoon-up over right harpoon-up -->
+<!ENTITY Map              "&#x02905;" ><!--twoheaded mapsto -->
+<!ENTITY map              "&#x021A6;" ><!--/mapsto A: -->
+<!ENTITY midcir           "&#x02AF0;" ><!--mid, circle below  -->
+<!ENTITY mumap            "&#x022B8;" ><!--/multimap A: -->
+<!ENTITY nearhk           "&#x02924;" ><!--NE arrow-hooked -->
+<!ENTITY neArr            "&#x021D7;" ><!--NE pointing dbl arrow -->
+<!ENTITY nearr            "&#x02197;" ><!--/nearrow A: NE pointing arrow -->
+<!ENTITY nesear           "&#x02928;" ><!--/toea A: NE & SE arrows -->
+<!ENTITY nhArr            "&#x021CE;" ><!--/nLeftrightarrow A: not l&r dbl arr -->
+<!ENTITY nharr            "&#x021AE;" ><!--/nleftrightarrow A: not l&r arrow -->
+<!ENTITY nlArr            "&#x021CD;" ><!--/nLeftarrow A: not implied by -->
+<!ENTITY nlarr            "&#x0219A;" ><!--/nleftarrow A: not left arrow -->
+<!ENTITY nrArr            "&#x021CF;" ><!--/nRightarrow A: not implies -->
+<!ENTITY nrarr            "&#x0219B;" ><!--/nrightarrow A: not right arrow -->
+<!ENTITY nrarrc           "&#x02933;&#x00338;" ><!--not right arrow-curved -->
+<!ENTITY nrarrw           "&#x0219D;&#x00338;" ><!--not right arrow-wavy -->
+<!ENTITY nvHarr           "&#x02904;" ><!--not, vert, left and right double arrow  -->
+<!ENTITY nvlArr           "&#x02902;" ><!--not, vert, left double arrow -->
+<!ENTITY nvrArr           "&#x02903;" ><!--not, vert, right double arrow -->
+<!ENTITY nwarhk           "&#x02923;" ><!--NW arrow-hooked -->
+<!ENTITY nwArr            "&#x021D6;" ><!--NW pointing dbl arrow -->
+<!ENTITY nwarr            "&#x02196;" ><!--/nwarrow A: NW pointing arrow -->
+<!ENTITY nwnear           "&#x02927;" ><!--NW & NE arrows -->
+<!ENTITY olarr            "&#x021BA;" ><!--/circlearrowleft A: l arr in circle -->
+<!ENTITY orarr            "&#x021BB;" ><!--/circlearrowright A: r arr in circle -->
+<!ENTITY origof           "&#x022B6;" ><!--original of -->
+<!ENTITY rAarr            "&#x021DB;" ><!--/Rrightarrow A: right triple arrow -->
+<!ENTITY Rarr             "&#x021A0;" ><!--/twoheadrightarrow A: -->
+<!ENTITY rarrap           "&#x02975;" ><!--approximate, right arrow above -->
+<!ENTITY rarrbfs          "&#x02920;" ><!--right arrow-bar, filled square -->
+<!ENTITY rarrc            "&#x02933;" ><!--right arrow-curved -->
+<!ENTITY rarrfs           "&#x0291E;" ><!--right arrow, filled square -->
+<!ENTITY rarrhk           "&#x021AA;" ><!--/hookrightarrow A: rt arrow-hooked -->
+<!ENTITY rarrlp           "&#x021AC;" ><!--/looparrowright A: rt arrow-looped -->
+<!ENTITY rarrpl           "&#x02945;" ><!--right arrow, plus -->
+<!ENTITY rarrsim          "&#x02974;" ><!--right arrow, similar -->
+<!ENTITY Rarrtl           "&#x02916;" ><!--right two-headed arrow with tail -->
+<!ENTITY rarrtl           "&#x021A3;" ><!--/rightarrowtail A: rt arrow-tailed -->
+<!ENTITY rarrw            "&#x0219D;" ><!--/rightsquigarrow A: rt arrow-wavy -->
+<!ENTITY rAtail           "&#x0291C;" ><!--right double arrow-tail -->
+<!ENTITY ratail           "&#x0291A;" ><!--right arrow-tail -->
+<!ENTITY RBarr            "&#x02910;" ><!--/drbkarow A: twoheaded right broken arrow -->
+<!ENTITY rBarr            "&#x0290F;" ><!--/dbkarow A: right doubly broken arrow -->
+<!ENTITY rbarr            "&#x0290D;" ><!--/bkarow A: right broken arrow -->
+<!ENTITY rdca             "&#x02937;" ><!--right down curved arrow -->
+<!ENTITY rdldhar          "&#x02969;" ><!--right harpoon-down over left harpoon-down -->
+<!ENTITY rdsh             "&#x021B3;" ><!--right down angled arrow -->
+<!ENTITY rfisht           "&#x0297D;" ><!--right fish tail -->
+<!ENTITY rHar             "&#x02964;" ><!--right harpoon-up over right harpoon-down -->
+<!ENTITY rhard            "&#x021C1;" ><!--/rightharpoondown A: rt harpoon-down -->
+<!ENTITY rharu            "&#x021C0;" ><!--/rightharpoonup A: rt harpoon-up -->
+<!ENTITY rharul           "&#x0296C;" ><!--right harpoon-up over long dash -->
+<!ENTITY rlarr            "&#x021C4;" ><!--/rightleftarrows A: r arr over l arr -->
+<!ENTITY rlhar            "&#x021CC;" ><!--/rightleftharpoons A: r harp over l -->
+<!ENTITY roarr            "&#x021FE;" ><!--right open arrow -->
+<!ENTITY rrarr            "&#x021C9;" ><!--/rightrightarrows A: two rt arrows -->
+<!ENTITY rsh              "&#x021B1;" ><!--/Rsh A: -->
+<!ENTITY ruluhar          "&#x02968;" ><!--right harpoon-up over left harpoon-up -->
+<!ENTITY searhk           "&#x02925;" ><!--/hksearow A: SE arrow-hooken -->
+<!ENTITY seArr            "&#x021D8;" ><!--SE pointing dbl arrow -->
+<!ENTITY searr            "&#x02198;" ><!--/searrow A: SE pointing arrow -->
+<!ENTITY seswar           "&#x02929;" ><!--/tosa A: SE & SW arrows -->
+<!ENTITY simrarr          "&#x02972;" ><!--similar, right arrow below -->
+<!ENTITY slarr            "&#x02190;" ><!--short left arrow -->
+<!ENTITY srarr            "&#x02192;" ><!--short right arrow -->
+<!ENTITY swarhk           "&#x02926;" ><!--/hkswarow A: SW arrow-hooked -->
+<!ENTITY swArr            "&#x021D9;" ><!--SW pointing dbl arrow -->
+<!ENTITY swarr            "&#x02199;" ><!--/swarrow A: SW pointing arrow -->
+<!ENTITY swnwar           "&#x0292A;" ><!--SW & NW arrows -->
+<!ENTITY Uarr             "&#x0219F;" ><!--up two-headed arrow -->
+<!ENTITY uArr             "&#x021D1;" ><!--/Uparrow A: up dbl arrow -->
+<!ENTITY Uarrocir         "&#x02949;" ><!--up two-headed arrow above circle -->
+<!ENTITY udarr            "&#x021C5;" ><!--up arrow, down arrow -->
+<!ENTITY udhar            "&#x0296E;" ><!--up harp, down harp -->
+<!ENTITY ufisht           "&#x0297E;" ><!--up fish tail -->
+<!ENTITY uHar             "&#x02963;" ><!--up harpoon-left, up harpoon-right -->
+<!ENTITY uharl            "&#x021BF;" ><!--/upharpoonleft A: up harpoon-left -->
+<!ENTITY uharr            "&#x021BE;" ><!--/upharpoonright /restriction A: up harp-r -->
+<!ENTITY uuarr            "&#x021C8;" ><!--/upuparrows A: two up arrows -->
+<!ENTITY vArr             "&#x021D5;" ><!--/Updownarrow A: up&down dbl arrow -->
+<!ENTITY varr             "&#x02195;" ><!--/updownarrow A: up&down arrow -->
+<!ENTITY xhArr            "&#x027FA;" ><!--/Longleftrightarrow A: long l&r dbl arr -->
+<!ENTITY xharr            "&#x027F7;" ><!--/longleftrightarrow A: long l&r arr -->
+<!ENTITY xlArr            "&#x027F8;" ><!--/Longleftarrow A: long l dbl arrow -->
+<!ENTITY xlarr            "&#x027F5;" ><!--/longleftarrow A: long left arrow -->
+<!ENTITY xmap             "&#x027FC;" ><!--/longmapsto A: -->
+<!ENTITY xrArr            "&#x027F9;" ><!--/Longrightarrow A: long rt dbl arr -->
+<!ENTITY xrarr            "&#x027F6;" ><!--/longrightarrow A: long right arrow -->
+<!ENTITY zigrarr          "&#x021DD;" ><!--right zig-zag arrow -->
diff --git a/code/lib/Bio/Entrez/DTDs/isoamsb.ent b/code/lib/Bio/Entrez/DTDs/isoamsb.ent
new file mode 100644
index 0000000..b74414b
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isoamsb.ent
@@ -0,0 +1,143 @@
+
+<!--
+     File isoamsb.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     References to the VARIANT SELECTOR 1 character (&#x0FE00;)
+     should match the uses listed in Unicode Technical Report 25.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1991
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY ac               "&#x0223E;" ><!--most positive -->
+<!ENTITY acE              "&#x0223E;&#x00333;" ><!--most positive, two lines below -->
+<!ENTITY amalg            "&#x02A3F;" ><!--/amalg B: amalgamation or coproduct -->
+<!ENTITY barvee           "&#x022BD;" ><!--bar, vee -->
+<!ENTITY Barwed           "&#x02306;" ><!--/doublebarwedge B: log and, dbl bar above -->
+<!ENTITY barwed           "&#x02305;" ><!--/barwedge B: logical and, bar above -->
+<!ENTITY bsolb            "&#x029C5;" ><!--reverse solidus in square -->
+<!ENTITY Cap              "&#x022D2;" ><!--/Cap /doublecap B: dbl intersection -->
+<!ENTITY capand           "&#x02A44;" ><!--intersection, and -->
+<!ENTITY capbrcup         "&#x02A49;" ><!--intersection, bar, union -->
+<!ENTITY capcap           "&#x02A4B;" ><!--intersection, intersection, joined -->
+<!ENTITY capcup           "&#x02A47;" ><!--intersection above union -->
+<!ENTITY capdot           "&#x02A40;" ><!--intersection, with dot -->
+<!ENTITY caps             "&#x02229;&#x0FE00;" ><!--intersection, serifs -->
+<!ENTITY ccaps            "&#x02A4D;" ><!--closed intersection, serifs -->
+<!ENTITY ccups            "&#x02A4C;" ><!--closed union, serifs -->
+<!ENTITY ccupssm          "&#x02A50;" ><!--closed union, serifs, smash product -->
+<!ENTITY coprod           "&#x02210;" ><!--/coprod L: coproduct operator -->
+<!ENTITY Cup              "&#x022D3;" ><!--/Cup /doublecup B: dbl union -->
+<!ENTITY cupbrcap         "&#x02A48;" ><!--union, bar, intersection -->
+<!ENTITY cupcap           "&#x02A46;" ><!--union above intersection -->
+<!ENTITY cupcup           "&#x02A4A;" ><!--union, union, joined -->
+<!ENTITY cupdot           "&#x0228D;" ><!--union, with dot -->
+<!ENTITY cupor            "&#x02A45;" ><!--union, or -->
+<!ENTITY cups             "&#x0222A;&#x0FE00;" ><!--union, serifs -->
+<!ENTITY cuvee            "&#x022CE;" ><!--/curlyvee B: curly logical or -->
+<!ENTITY cuwed            "&#x022CF;" ><!--/curlywedge B: curly logical and -->
+<!ENTITY Dagger           "&#x02021;" ><!--/ddagger B: double dagger relation -->
+<!ENTITY dagger           "&#x02020;" ><!--/dagger B: dagger relation -->
+<!ENTITY diam             "&#x022C4;" ><!--/diamond B: open diamond -->
+<!ENTITY divonx           "&#x022C7;" ><!--/divideontimes B: division on times -->
+<!ENTITY eplus            "&#x02A71;" ><!--equal, plus -->
+<!ENTITY hercon           "&#x022B9;" ><!--hermitian conjugate matrix -->
+<!ENTITY intcal           "&#x022BA;" ><!--/intercal B: intercal -->
+<!ENTITY iprod            "&#x02A3C;" ><!--/intprod -->
+<!ENTITY loplus           "&#x02A2D;" ><!--plus sign in left half circle -->
+<!ENTITY lotimes          "&#x02A34;" ><!--multiply sign in left half circle  -->
+<!ENTITY lthree           "&#x022CB;" ><!--/leftthreetimes B: -->
+<!ENTITY ltimes           "&#x022C9;" ><!--/ltimes B: times sign, left closed -->
+<!ENTITY midast           "&#x0002A;" ><!--/ast B: asterisk -->
+<!ENTITY minusb           "&#x0229F;" ><!--/boxminus B: minus sign in box -->
+<!ENTITY minusd           "&#x02238;" ><!--/dotminus B: minus sign, dot above -->
+<!ENTITY minusdu          "&#x02A2A;" ><!--minus sign, dot below -->
+<!ENTITY ncap             "&#x02A43;" ><!--bar, intersection -->
+<!ENTITY ncup             "&#x02A42;" ><!--bar, union -->
+<!ENTITY oast             "&#x0229B;" ><!--/circledast B: asterisk in circle -->
+<!ENTITY ocir             "&#x0229A;" ><!--/circledcirc B: small circle in circle -->
+<!ENTITY odash            "&#x0229D;" ><!--/circleddash B: hyphen in circle -->
+<!ENTITY odiv             "&#x02A38;" ><!--divide in circle -->
+<!ENTITY odot             "&#x02299;" ><!--/odot B: middle dot in circle -->
+<!ENTITY odsold           "&#x029BC;" ><!--dot, solidus, dot in circle -->
+<!ENTITY ofcir            "&#x029BF;" ><!--filled circle in circle -->
+<!ENTITY ogt              "&#x029C1;" ><!--greater-than in circle -->
+<!ENTITY ohbar            "&#x029B5;" ><!--circle with horizontal bar -->
+<!ENTITY olcir            "&#x029BE;" ><!--large circle in circle -->
+<!ENTITY olt              "&#x029C0;" ><!--less-than in circle -->
+<!ENTITY omid             "&#x029B6;" ><!--vertical bar in circle -->
+<!ENTITY ominus           "&#x02296;" ><!--/ominus B: minus sign in circle -->
+<!ENTITY opar             "&#x029B7;" ><!--parallel in circle -->
+<!ENTITY operp            "&#x029B9;" ><!--perpendicular in circle -->
+<!ENTITY oplus            "&#x02295;" ><!--/oplus B: plus sign in circle -->
+<!ENTITY osol             "&#x02298;" ><!--/oslash B: solidus in circle -->
+<!ENTITY Otimes           "&#x02A37;" ><!--multiply sign in double circle -->
+<!ENTITY otimes           "&#x02297;" ><!--/otimes B: multiply sign in circle -->
+<!ENTITY otimesas         "&#x02A36;" ><!--multiply sign in circle, circumflex accent -->
+<!ENTITY ovbar            "&#x0233D;" ><!--circle with vertical bar -->
+<!ENTITY plusacir         "&#x02A23;" ><!--plus, circumflex accent above -->
+<!ENTITY plusb            "&#x0229E;" ><!--/boxplus B: plus sign in box -->
+<!ENTITY pluscir          "&#x02A22;" ><!--plus, small circle above -->
+<!ENTITY plusdo           "&#x02214;" ><!--/dotplus B: plus sign, dot above -->
+<!ENTITY plusdu           "&#x02A25;" ><!--plus sign, dot below -->
+<!ENTITY pluse            "&#x02A72;" ><!--plus, equals -->
+<!ENTITY plussim          "&#x02A26;" ><!--plus, similar below -->
+<!ENTITY plustwo          "&#x02A27;" ><!--plus, two; Nim-addition -->
+<!ENTITY prod             "&#x0220F;" ><!--/prod L: product operator -->
+<!ENTITY race             "&#x029DA;" ><!--reverse most positive, line below -->
+<!ENTITY roplus           "&#x02A2E;" ><!--plus sign in right half circle -->
+<!ENTITY rotimes          "&#x02A35;" ><!--multiply sign in right half circle -->
+<!ENTITY rthree           "&#x022CC;" ><!--/rightthreetimes B: -->
+<!ENTITY rtimes           "&#x022CA;" ><!--/rtimes B: times sign, right closed -->
+<!ENTITY sdot             "&#x022C5;" ><!--/cdot B: small middle dot -->
+<!ENTITY sdotb            "&#x022A1;" ><!--/dotsquare /boxdot B: small dot in box -->
+<!ENTITY setmn            "&#x02216;" ><!--/setminus B: reverse solidus -->
+<!ENTITY simplus          "&#x02A24;" ><!--plus, similar above -->
+<!ENTITY smashp           "&#x02A33;" ><!--smash product -->
+<!ENTITY solb             "&#x029C4;" ><!--solidus in square -->
+<!ENTITY sqcap            "&#x02293;" ><!--/sqcap B: square intersection -->
+<!ENTITY sqcaps           "&#x02293;&#x0FE00;" ><!--square intersection, serifs -->
+<!ENTITY sqcup            "&#x02294;" ><!--/sqcup B: square union -->
+<!ENTITY sqcups           "&#x02294;&#x0FE00;" ><!--square union, serifs -->
+<!ENTITY ssetmn           "&#x02216;" ><!--/smallsetminus B: sm reverse solidus -->
+<!ENTITY sstarf           "&#x022C6;" ><!--/star B: small star, filled -->
+<!ENTITY subdot           "&#x02ABD;" ><!--subset, with dot -->
+<!ENTITY sum              "&#x02211;" ><!--/sum L: summation operator -->
+<!ENTITY supdot           "&#x02ABE;" ><!--superset, with dot -->
+<!ENTITY timesb           "&#x022A0;" ><!--/boxtimes B: multiply sign in box -->
+<!ENTITY timesbar         "&#x02A31;" ><!--multiply sign, bar below -->
+<!ENTITY timesd           "&#x02A30;" ><!--times, dot -->
+<!ENTITY tridot           "&#x025EC;" ><!--dot in triangle -->
+<!ENTITY triminus         "&#x02A3A;" ><!--minus in triangle -->
+<!ENTITY triplus          "&#x02A39;" ><!--plus in triangle -->
+<!ENTITY trisb            "&#x029CD;" ><!--triangle, serifs at bottom -->
+<!ENTITY tritime          "&#x02A3B;" ><!--multiply in triangle -->
+<!ENTITY uplus            "&#x0228E;" ><!--/uplus B: plus sign in union -->
+<!ENTITY veebar           "&#x022BB;" ><!--/veebar B: logical or, bar below -->
+<!ENTITY wedbar           "&#x02A5F;" ><!--wedge, bar below -->
+<!ENTITY wreath           "&#x02240;" ><!--/wr B: wreath product -->
+<!ENTITY xcap             "&#x022C2;" ><!--/bigcap L: intersection operator -->
+<!ENTITY xcirc            "&#x025EF;" ><!--/bigcirc B: large circle -->
+<!ENTITY xcup             "&#x022C3;" ><!--/bigcup L: union operator -->
+<!ENTITY xdtri            "&#x025BD;" ><!--/bigtriangledown B: big dn tri, open -->
+<!ENTITY xodot            "&#x02A00;" ><!--/bigodot L: circle dot operator -->
+<!ENTITY xoplus           "&#x02A01;" ><!--/bigoplus L: circle plus operator -->
+<!ENTITY xotime           "&#x02A02;" ><!--/bigotimes L: circle times operator -->
+<!ENTITY xsqcup           "&#x02A06;" ><!--/bigsqcup L: square union operator -->
+<!ENTITY xuplus           "&#x02A04;" ><!--/biguplus L: -->
+<!ENTITY xutri            "&#x025B3;" ><!--/bigtriangleup B: big up tri, open -->
+<!ENTITY xvee             "&#x022C1;" ><!--/bigvee L: logical and operator -->
+<!ENTITY xwedge           "&#x022C0;" ><!--/bigwedge L: logical or operator -->
diff --git a/code/lib/Bio/Entrez/DTDs/isoamsc.ent b/code/lib/Bio/Entrez/DTDs/isoamsc.ent
new file mode 100644
index 0000000..46ea221
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isoamsc.ent
@@ -0,0 +1,43 @@
+
+<!--
+     File isoamsc.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1991
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY dlcorn           "&#x0231E;" ><!--/llcorner O: lower left corner -->
+<!ENTITY drcorn           "&#x0231F;" ><!--/lrcorner C: lower right corner -->
+<!ENTITY gtlPar           "&#x02995;" ><!--dbl left parenthesis, greater -->
+<!ENTITY langd            "&#x02991;" ><!--left angle, dot -->
+<!ENTITY lbrke            "&#x0298B;" ><!--left bracket, equal -->
+<!ENTITY lbrksld          "&#x0298F;" ><!--left bracket, solidus bottom corner -->
+<!ENTITY lbrkslu          "&#x0298D;" ><!--left bracket, solidus top corner -->
+<!ENTITY lceil            "&#x02308;" ><!--/lceil O: left ceiling -->
+<!ENTITY lfloor           "&#x0230A;" ><!--/lfloor O: left floor -->
+<!ENTITY lmoust           "&#x023B0;" ><!--/lmoustache -->
+<!ENTITY lparlt           "&#x02993;" ><!--O: left parenthesis, lt -->
+<!ENTITY ltrPar           "&#x02996;" ><!--dbl right parenthesis, less -->
+<!ENTITY rangd            "&#x02992;" ><!--right angle, dot -->
+<!ENTITY rbrke            "&#x0298C;" ><!--right bracket, equal -->
+<!ENTITY rbrksld          "&#x0298E;" ><!--right bracket, solidus bottom corner -->
+<!ENTITY rbrkslu          "&#x02990;" ><!--right bracket, solidus top corner -->
+<!ENTITY rceil            "&#x02309;" ><!--/rceil C: right ceiling -->
+<!ENTITY rfloor           "&#x0230B;" ><!--/rfloor C: right floor -->
+<!ENTITY rmoust           "&#x023B1;" ><!--/rmoustache -->
+<!ENTITY rpargt           "&#x02994;" ><!--C: right paren, gt -->
+<!ENTITY ulcorn           "&#x0231C;" ><!--/ulcorner O: upper left corner -->
+<!ENTITY urcorn           "&#x0231D;" ><!--/urcorner C: upper right corner -->
diff --git a/code/lib/Bio/Entrez/DTDs/isoamsn.ent b/code/lib/Bio/Entrez/DTDs/isoamsn.ent
new file mode 100644
index 0000000..a1df8b7
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isoamsn.ent
@@ -0,0 +1,114 @@
+
+<!--
+     File isoamsn.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     References to the VARIANT SELECTOR 1 character (&#x0FE00;)
+     should match the uses listed in Unicode Technical Report 25.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1991
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY gnap             "&#x02A8A;" ><!--/gnapprox N: greater, not approximate -->
+<!ENTITY gnE              "&#x02269;" ><!--/gneqq N: greater, not dbl equals -->
+<!ENTITY gne              "&#x02A88;" ><!--/gneq N: greater, not equals -->
+<!ENTITY gnsim            "&#x022E7;" ><!--/gnsim N: greater, not similar -->
+<!ENTITY gvnE             "&#x02269;&#x0FE00;" ><!--/gvertneqq N: gt, vert, not dbl eq -->
+<!ENTITY lnap             "&#x02A89;" ><!--/lnapprox N: less, not approximate -->
+<!ENTITY lnE              "&#x02268;" ><!--/lneqq N: less, not double equals -->
+<!ENTITY lne              "&#x02A87;" ><!--/lneq N: less, not equals -->
+<!ENTITY lnsim            "&#x022E6;" ><!--/lnsim N: less, not similar -->
+<!ENTITY lvnE             "&#x02268;&#x0FE00;" ><!--/lvertneqq N: less, vert, not dbl eq -->
+<!ENTITY nap              "&#x02249;" ><!--/napprox N: not approximate -->
+<!ENTITY napE             "&#x02A70;&#x00338;" ><!--not approximately equal or equal to -->
+<!ENTITY napid            "&#x0224B;&#x00338;" ><!--not approximately identical to -->
+<!ENTITY ncong            "&#x02247;" ><!--/ncong N: not congruent with -->
+<!ENTITY ncongdot         "&#x02A6D;&#x00338;" ><!--not congruent, dot -->
+<!ENTITY nequiv           "&#x02262;" ><!--/nequiv N: not identical with -->
+<!ENTITY ngE              "&#x02267;&#x00338;" ><!--/ngeqq N: not greater, dbl equals -->
+<!ENTITY nge              "&#x02271;" ><!--/ngeq N: not greater-than-or-equal -->
+<!ENTITY nges             "&#x02A7E;&#x00338;" ><!--/ngeqslant N: not gt-or-eq, slanted -->
+<!ENTITY nGg              "&#x022D9;&#x00338;" ><!--not triple greater than -->
+<!ENTITY ngsim            "&#x02275;" ><!--not greater, similar -->
+<!ENTITY nGt              "&#x0226B;&#x020D2;" ><!--not, vert, much greater than -->
+<!ENTITY ngt              "&#x0226F;" ><!--/ngtr N: not greater-than -->
+<!ENTITY nGtv             "&#x0226B;&#x00338;" ><!--not much greater than, variant -->
+<!ENTITY nlE              "&#x02266;&#x00338;" ><!--/nleqq N: not less, dbl equals -->
+<!ENTITY nle              "&#x02270;" ><!--/nleq N: not less-than-or-equal -->
+<!ENTITY nles             "&#x02A7D;&#x00338;" ><!--/nleqslant N: not less-or-eq, slant -->
+<!ENTITY nLl              "&#x022D8;&#x00338;" ><!--not triple less than -->
+<!ENTITY nlsim            "&#x02274;" ><!--not less, similar -->
+<!ENTITY nLt              "&#x0226A;&#x020D2;" ><!--not, vert, much less than -->
+<!ENTITY nlt              "&#x0226E;" ><!--/nless N: not less-than -->
+<!ENTITY nltri            "&#x022EA;" ><!--/ntriangleleft N: not left triangle -->
+<!ENTITY nltrie           "&#x022EC;" ><!--/ntrianglelefteq N: not l tri, eq -->
+<!ENTITY nLtv             "&#x0226A;&#x00338;" ><!--not much less than, variant -->
+<!ENTITY nmid             "&#x02224;" ><!--/nmid -->
+<!ENTITY npar             "&#x02226;" ><!--/nparallel N: not parallel -->
+<!ENTITY npr              "&#x02280;" ><!--/nprec N: not precedes -->
+<!ENTITY nprcue           "&#x022E0;" ><!--not curly precedes, eq -->
+<!ENTITY npre             "&#x02AAF;&#x00338;" ><!--/npreceq N: not precedes, equals -->
+<!ENTITY nrtri            "&#x022EB;" ><!--/ntriangleright N: not rt triangle -->
+<!ENTITY nrtrie           "&#x022ED;" ><!--/ntrianglerighteq N: not r tri, eq -->
+<!ENTITY nsc              "&#x02281;" ><!--/nsucc N: not succeeds -->
+<!ENTITY nsccue           "&#x022E1;" ><!--not succeeds, curly eq -->
+<!ENTITY nsce             "&#x02AB0;&#x00338;" ><!--/nsucceq N: not succeeds, equals -->
+<!ENTITY nsim             "&#x02241;" ><!--/nsim N: not similar -->
+<!ENTITY nsime            "&#x02244;" ><!--/nsimeq N: not similar, equals -->
+<!ENTITY nsmid            "&#x02224;" ><!--/nshortmid -->
+<!ENTITY nspar            "&#x02226;" ><!--/nshortparallel N: not short par -->
+<!ENTITY nsqsube          "&#x022E2;" ><!--not, square subset, equals -->
+<!ENTITY nsqsupe          "&#x022E3;" ><!--not, square superset, equals -->
+<!ENTITY nsub             "&#x02284;" ><!--not subset -->
+<!ENTITY nsubE            "&#x02AC5;&#x00338;" ><!--/nsubseteqq N: not subset, dbl eq -->
+<!ENTITY nsube            "&#x02288;" ><!--/nsubseteq N: not subset, equals -->
+<!ENTITY nsup             "&#x02285;" ><!--not superset -->
+<!ENTITY nsupE            "&#x02AC6;&#x00338;" ><!--/nsupseteqq N: not superset, dbl eq -->
+<!ENTITY nsupe            "&#x02289;" ><!--/nsupseteq N: not superset, equals -->
+<!ENTITY ntgl             "&#x02279;" ><!--not greater, less -->
+<!ENTITY ntlg             "&#x02278;" ><!--not less, greater -->
+<!ENTITY nvap             "&#x0224D;&#x020D2;" ><!--not, vert, approximate -->
+<!ENTITY nVDash           "&#x022AF;" ><!--/nVDash N: not dbl vert, dbl dash -->
+<!ENTITY nVdash           "&#x022AE;" ><!--/nVdash N: not dbl vertical, dash -->
+<!ENTITY nvDash           "&#x022AD;" ><!--/nvDash N: not vertical, dbl dash -->
+<!ENTITY nvdash           "&#x022AC;" ><!--/nvdash N: not vertical, dash -->
+<!ENTITY nvge             "&#x02265;&#x020D2;" ><!--not, vert, greater-than-or-equal -->
+<!ENTITY nvgt             "&#x0003E;&#x020D2;" ><!--not, vert, greater-than -->
+<!ENTITY nvle             "&#x02264;&#x020D2;" ><!--not, vert, less-than-or-equal -->
+<!ENTITY nvlt             "&#38;#x0003C;&#x020D2;" ><!--not, vert, less-than -->
+<!ENTITY nvltrie          "&#x022B4;&#x020D2;" ><!--not, vert, left triangle, equals -->
+<!ENTITY nvrtrie          "&#x022B5;&#x020D2;" ><!--not, vert, right triangle, equals -->
+<!ENTITY nvsim            "&#x0223C;&#x020D2;" ><!--not, vert, similar -->
+<!ENTITY parsim           "&#x02AF3;" ><!--parallel, similar -->
+<!ENTITY prnap            "&#x02AB9;" ><!--/precnapprox N: precedes, not approx -->
+<!ENTITY prnE             "&#x02AB5;" ><!--/precneqq N: precedes, not dbl eq -->
+<!ENTITY prnsim           "&#x022E8;" ><!--/precnsim N: precedes, not similar -->
+<!ENTITY rnmid            "&#x02AEE;" ><!--reverse /nmid -->
+<!ENTITY scnap            "&#x02ABA;" ><!--/succnapprox N: succeeds, not approx -->
+<!ENTITY scnE             "&#x02AB6;" ><!--/succneqq N: succeeds, not dbl eq -->
+<!ENTITY scnsim           "&#x022E9;" ><!--/succnsim N: succeeds, not similar -->
+<!ENTITY simne            "&#x02246;" ><!--similar, not equals -->
+<!ENTITY solbar           "&#x0233F;" ><!--solidus, bar through -->
+<!ENTITY subnE            "&#x02ACB;" ><!--/subsetneqq N: subset, not dbl eq -->
+<!ENTITY subne            "&#x0228A;" ><!--/subsetneq N: subset, not equals -->
+<!ENTITY supnE            "&#x02ACC;" ><!--/supsetneqq N: superset, not dbl eq -->
+<!ENTITY supne            "&#x0228B;" ><!--/supsetneq N: superset, not equals -->
+<!ENTITY vnsub            "&#x02282;&#x020D2;" ><!--/nsubset N: not subset, var -->
+<!ENTITY vnsup            "&#x02283;&#x020D2;" ><!--/nsupset N: not superset, var -->
+<!ENTITY vsubnE           "&#x02ACB;&#x0FE00;" ><!--/varsubsetneqq N: subset not dbl eq, var -->
+<!ENTITY vsubne           "&#x0228A;&#x0FE00;" ><!--/varsubsetneq N: subset, not eq, var -->
+<!ENTITY vsupnE           "&#x02ACC;&#x0FE00;" ><!--/varsupsetneqq N: super not dbl eq, var -->
+<!ENTITY vsupne           "&#x0228B;&#x0FE00;" ><!--/varsupsetneq N: superset, not eq, var -->
diff --git a/code/lib/Bio/Entrez/DTDs/isoamso.ent b/code/lib/Bio/Entrez/DTDs/isoamso.ent
new file mode 100644
index 0000000..f99cf11
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isoamso.ent
@@ -0,0 +1,73 @@
+
+<!--
+     File isoamso.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1991
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY ang              "&#x02220;" ><!--/angle - angle -->
+<!ENTITY ange             "&#x029A4;" ><!--angle, equal -->
+<!ENTITY angmsd           "&#x02221;" ><!--/measuredangle - angle-measured -->
+<!ENTITY angmsdaa         "&#x029A8;" ><!--angle-measured, arrow, up, right -->
+<!ENTITY angmsdab         "&#x029A9;" ><!--angle-measured, arrow, up, left -->
+<!ENTITY angmsdac         "&#x029AA;" ><!--angle-measured, arrow, down, right -->
+<!ENTITY angmsdad         "&#x029AB;" ><!--angle-measured, arrow, down, left -->
+<!ENTITY angmsdae         "&#x029AC;" ><!--angle-measured, arrow, right, up -->
+<!ENTITY angmsdaf         "&#x029AD;" ><!--angle-measured, arrow, left, up -->
+<!ENTITY angmsdag         "&#x029AE;" ><!--angle-measured, arrow, right, down -->
+<!ENTITY angmsdah         "&#x029AF;" ><!--angle-measured, arrow, left, down -->
+<!ENTITY angrtvb          "&#x022BE;" ><!--right angle-measured -->
+<!ENTITY angrtvbd         "&#x0299D;" ><!--right angle-measured, dot -->
+<!ENTITY bbrk             "&#x023B5;" ><!--bottom square bracket -->
+<!ENTITY bbrktbrk         "&#x023B6;" ><!--bottom above top square bracket -->
+<!ENTITY bemptyv          "&#x029B0;" ><!--reversed circle, slash -->
+<!ENTITY beth             "&#x02136;" ><!--/beth - beth, Hebrew -->
+<!ENTITY boxbox           "&#x029C9;" ><!--two joined squares -->
+<!ENTITY bprime           "&#x02035;" ><!--/backprime - reverse prime -->
+<!ENTITY bsemi            "&#x0204F;" ><!--reverse semi-colon -->
+<!ENTITY cemptyv          "&#x029B2;" ><!--circle, slash, small circle above -->
+<!ENTITY cirE             "&#x029C3;" ><!--circle, two horizontal stroked to the right -->
+<!ENTITY cirscir          "&#x029C2;" ><!--circle, small circle to the right -->
+<!ENTITY comp             "&#x02201;" ><!--/complement - complement sign -->
+<!ENTITY daleth           "&#x02138;" ><!--/daleth - daleth, Hebrew -->
+<!ENTITY demptyv          "&#x029B1;" ><!--circle, slash, bar above -->
+<!ENTITY ell              "&#x02113;" ><!--/ell - cursive small l -->
+<!ENTITY empty            "&#x02205;" ><!--/emptyset - zero, slash -->
+<!ENTITY emptyv           "&#x02205;" ><!--/varnothing - circle, slash -->
+<!ENTITY gimel            "&#x02137;" ><!--/gimel - gimel, Hebrew -->
+<!ENTITY iiota            "&#x02129;" ><!--inverted iota -->
+<!ENTITY image            "&#x02111;" ><!--/Im - imaginary   -->
+<!ENTITY imath            "&#x00131;" ><!--/imath - small i, no dot -->
+<!ENTITY jmath            "&#x0006A;" ><!--/jmath - small j, no dot -->
+<!ENTITY laemptyv         "&#x029B4;" ><!--circle, slash, left arrow above -->
+<!ENTITY lltri            "&#x025FA;" ><!--lower left triangle -->
+<!ENTITY lrtri            "&#x022BF;" ><!--lower right triangle -->
+<!ENTITY mho              "&#x02127;" ><!--/mho - conductance -->
+<!ENTITY nang             "&#x02220;&#x020D2;" ><!--not, vert, angle -->
+<!ENTITY nexist           "&#x02204;" ><!--/nexists - negated exists -->
+<!ENTITY oS               "&#x024C8;" ><!--/circledS - capital S in circle -->
+<!ENTITY planck           "&#x0210F;" ><!--/hbar - Planck's over 2pi -->
+<!ENTITY plankv           "&#x0210F;" ><!--/hslash - variant Planck's over 2pi -->
+<!ENTITY raemptyv         "&#x029B3;" ><!--circle, slash, right arrow above -->
+<!ENTITY range            "&#x029A5;" ><!--reverse angle, equal -->
+<!ENTITY real             "&#x0211C;" ><!--/Re - real -->
+<!ENTITY tbrk             "&#x023B4;" ><!--top square bracket -->
+<!ENTITY trpezium         "&#x0FFFD;" ><!--trapezium -->
+<!ENTITY ultri            "&#x025F8;" ><!--upper left triangle -->
+<!ENTITY urtri            "&#x025F9;" ><!--upper right triangle -->
+<!ENTITY vzigzag          "&#x0299A;" ><!--vertical zig-zag line -->
+<!ENTITY weierp           "&#x02118;" ><!--/wp - Weierstrass p -->
diff --git a/code/lib/Bio/Entrez/DTDs/isoamsr.ent b/code/lib/Bio/Entrez/DTDs/isoamsr.ent
new file mode 100644
index 0000000..2251ef1
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isoamsr.ent
@@ -0,0 +1,204 @@
+
+<!--
+     File isoamsr.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     References to the VARIANT SELECTOR 1 character (&#x0FE00;)
+     should match the uses listed in Unicode Technical Report 25.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1991
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY apE              "&#x02A70;" ><!--approximately equal or equal to -->
+<!ENTITY ape              "&#x0224A;" ><!--/approxeq R: approximate, equals -->
+<!ENTITY apid             "&#x0224B;" ><!--approximately identical to -->
+<!ENTITY asymp            "&#x02248;" ><!--/asymp R: asymptotically equal to -->
+<!ENTITY Barv             "&#x02AE7;" ><!--vert, dbl bar (over) -->
+<!ENTITY bcong            "&#x0224C;" ><!--/backcong R: reverse congruent -->
+<!ENTITY bepsi            "&#x003F6;" ><!--/backepsilon R: such that -->
+<!ENTITY bowtie           "&#x022C8;" ><!--/bowtie R: -->
+<!ENTITY bsim             "&#x0223D;" ><!--/backsim R: reverse similar -->
+<!ENTITY bsime            "&#x022CD;" ><!--/backsimeq R: reverse similar, eq -->
+<!ENTITY bsolhsub         "&#x0005C;&#x02282;" ><!--reverse solidus, subset -->
+<!ENTITY bump             "&#x0224E;" ><!--/Bumpeq R: bumpy equals -->
+<!ENTITY bumpE            "&#x02AAE;" ><!--bump, equals -->
+<!ENTITY bumpe            "&#x0224F;" ><!--/bumpeq R: bumpy equals, equals -->
+<!ENTITY cire             "&#x02257;" ><!--/circeq R: circle, equals -->
+<!ENTITY Colon            "&#x02237;" ><!--/Colon, two colons -->
+<!ENTITY Colone           "&#x02A74;" ><!--double colon, equals -->
+<!ENTITY colone           "&#x02254;" ><!--/coloneq R: colon, equals -->
+<!ENTITY congdot          "&#x02A6D;" ><!--congruent, dot -->
+<!ENTITY csub             "&#x02ACF;" ><!--subset, closed -->
+<!ENTITY csube            "&#x02AD1;" ><!--subset, closed, equals -->
+<!ENTITY csup             "&#x02AD0;" ><!--superset, closed -->
+<!ENTITY csupe            "&#x02AD2;" ><!--superset, closed, equals -->
+<!ENTITY cuepr            "&#x022DE;" ><!--/curlyeqprec R: curly eq, precedes -->
+<!ENTITY cuesc            "&#x022DF;" ><!--/curlyeqsucc R: curly eq, succeeds -->
+<!ENTITY Dashv            "&#x02AE4;" ><!--dbl dash, vertical -->
+<!ENTITY dashv            "&#x022A3;" ><!--/dashv R: dash, vertical -->
+<!ENTITY easter           "&#x02A6E;" ><!--equal, asterisk above -->
+<!ENTITY ecir             "&#x02256;" ><!--/eqcirc R: circle on equals sign -->
+<!ENTITY ecolon           "&#x02255;" ><!--/eqcolon R: equals, colon -->
+<!ENTITY eDDot            "&#x02A77;" ><!--/ddotseq R: equal with four dots -->
+<!ENTITY eDot             "&#x02251;" ><!--/doteqdot /Doteq R: eq, even dots -->
+<!ENTITY efDot            "&#x02252;" ><!--/fallingdotseq R: eq, falling dots -->
+<!ENTITY eg               "&#x02A9A;" ><!--equal-or-greater -->
+<!ENTITY egs              "&#x02A96;" ><!--/eqslantgtr R: equal-or-gtr, slanted -->
+<!ENTITY egsdot           "&#x02A98;" ><!--equal-or-greater, slanted, dot inside -->
+<!ENTITY el               "&#x02A99;" ><!--equal-or-less -->
+<!ENTITY els              "&#x02A95;" ><!--/eqslantless R: eq-or-less, slanted -->
+<!ENTITY elsdot           "&#x02A97;" ><!--equal-or-less, slanted, dot inside -->
+<!ENTITY equest           "&#x0225F;" ><!--/questeq R: equal with questionmark -->
+<!ENTITY equivDD          "&#x02A78;" ><!--equivalent, four dots above -->
+<!ENTITY erDot            "&#x02253;" ><!--/risingdotseq R: eq, rising dots -->
+<!ENTITY esdot            "&#x02250;" ><!--/doteq R: equals, single dot above -->
+<!ENTITY Esim             "&#x02A73;" ><!--equal, similar -->
+<!ENTITY esim             "&#x02242;" ><!--/esim R: equals, similar -->
+<!ENTITY fork             "&#x022D4;" ><!--/pitchfork R: pitchfork -->
+<!ENTITY forkv            "&#x02AD9;" ><!--fork, variant -->
+<!ENTITY frown            "&#x02322;" ><!--/frown R: down curve -->
+<!ENTITY gap              "&#x02A86;" ><!--/gtrapprox R: greater, approximate -->
+<!ENTITY gE               "&#x02267;" ><!--/geqq R: greater, double equals -->
+<!ENTITY gEl              "&#x02A8C;" ><!--/gtreqqless R: gt, dbl equals, less -->
+<!ENTITY gel              "&#x022DB;" ><!--/gtreqless R: greater, equals, less -->
+<!ENTITY ges              "&#x02A7E;" ><!--/geqslant R: gt-or-equal, slanted -->
+<!ENTITY gescc            "&#x02AA9;" ><!--greater than, closed by curve, equal, slanted -->
+<!ENTITY gesdot           "&#x02A80;" ><!--greater-than-or-equal, slanted, dot inside -->
+<!ENTITY gesdoto          "&#x02A82;" ><!--greater-than-or-equal, slanted, dot above -->
+<!ENTITY gesdotol         "&#x02A84;" ><!--greater-than-or-equal, slanted, dot above left -->
+<!ENTITY gesl             "&#x022DB;&#x0FE00;" ><!--greater, equal, slanted, less -->
+<!ENTITY gesles           "&#x02A94;" ><!--greater, equal, slanted, less, equal, slanted -->
+<!ENTITY Gg               "&#x022D9;" ><!--/ggg /Gg /gggtr R: triple gtr-than -->
+<!ENTITY gl               "&#x02277;" ><!--/gtrless R: greater, less -->
+<!ENTITY gla              "&#x02AA5;" ><!--greater, less, apart -->
+<!ENTITY glE              "&#x02A92;" ><!--greater, less, equal -->
+<!ENTITY glj              "&#x02AA4;" ><!--greater, less, overlapping -->
+<!ENTITY gsim             "&#x02273;" ><!--/gtrsim R: greater, similar -->
+<!ENTITY gsime            "&#x02A8E;" ><!--greater, similar, equal -->
+<!ENTITY gsiml            "&#x02A90;" ><!--greater, similar, less -->
+<!ENTITY Gt               "&#x0226B;" ><!--/gg R: dbl greater-than sign -->
+<!ENTITY gtcc             "&#x02AA7;" ><!--greater than, closed by curve -->
+<!ENTITY gtcir            "&#x02A7A;" ><!--greater than, circle inside -->
+<!ENTITY gtdot            "&#x022D7;" ><!--/gtrdot R: greater than, with dot -->
+<!ENTITY gtquest          "&#x02A7C;" ><!--greater than, questionmark above -->
+<!ENTITY gtrarr           "&#x02978;" ><!--greater than, right arrow -->
+<!ENTITY homtht           "&#x0223B;" ><!--homothetic -->
+<!ENTITY lap              "&#x02A85;" ><!--/lessapprox R: less, approximate -->
+<!ENTITY lat              "&#x02AAB;" ><!--larger than -->
+<!ENTITY late             "&#x02AAD;" ><!--larger than or equal -->
+<!ENTITY lates            "&#x02AAD;&#x0FE00;" ><!--larger than or equal, slanted -->
+<!ENTITY lE               "&#x02266;" ><!--/leqq R: less, double equals -->
+<!ENTITY lEg              "&#x02A8B;" ><!--/lesseqqgtr R: less, dbl eq, greater -->
+<!ENTITY leg              "&#x022DA;" ><!--/lesseqgtr R: less, eq, greater -->
+<!ENTITY les              "&#x02A7D;" ><!--/leqslant R: less-than-or-eq, slant -->
+<!ENTITY lescc            "&#x02AA8;" ><!--less than, closed by curve, equal, slanted -->
+<!ENTITY lesdot           "&#x02A7F;" ><!--less-than-or-equal, slanted, dot inside -->
+<!ENTITY lesdoto          "&#x02A81;" ><!--less-than-or-equal, slanted, dot above -->
+<!ENTITY lesdotor         "&#x02A83;" ><!--less-than-or-equal, slanted, dot above right -->
+<!ENTITY lesg             "&#x022DA;&#x0FE00;" ><!--less, equal, slanted, greater -->
+<!ENTITY lesges           "&#x02A93;" ><!--less, equal, slanted, greater, equal, slanted -->
+<!ENTITY lg               "&#x02276;" ><!--/lessgtr R: less, greater -->
+<!ENTITY lgE              "&#x02A91;" ><!--less, greater, equal -->
+<!ENTITY Ll               "&#x022D8;" ><!--/Ll /lll /llless R: triple less-than -->
+<!ENTITY lsim             "&#x02272;" ><!--/lesssim R: less, similar -->
+<!ENTITY lsime            "&#x02A8D;" ><!--less, similar, equal -->
+<!ENTITY lsimg            "&#x02A8F;" ><!--less, similar, greater -->
+<!ENTITY Lt               "&#x0226A;" ><!--/ll R: double less-than sign -->
+<!ENTITY ltcc             "&#x02AA6;" ><!--less than, closed by curve -->
+<!ENTITY ltcir            "&#x02A79;" ><!--less than, circle inside -->
+<!ENTITY ltdot            "&#x022D6;" ><!--/lessdot R: less than, with dot -->
+<!ENTITY ltlarr           "&#x02976;" ><!--less than, left arrow -->
+<!ENTITY ltquest          "&#x02A7B;" ><!--less than, questionmark above -->
+<!ENTITY ltrie            "&#x022B4;" ><!--/trianglelefteq R: left triangle, eq -->
+<!ENTITY mcomma           "&#x02A29;" ><!--minus, comma above -->
+<!ENTITY mDDot            "&#x0223A;" ><!--minus with four dots, geometric properties -->
+<!ENTITY mid              "&#x02223;" ><!--/mid R: -->
+<!ENTITY mlcp             "&#x02ADB;" ><!--/mlcp -->
+<!ENTITY models           "&#x022A7;" ><!--/models R: -->
+<!ENTITY mstpos           "&#x0223E;" ><!--most positive -->
+<!ENTITY Pr               "&#x02ABB;" ><!--dbl precedes -->
+<!ENTITY pr               "&#x0227A;" ><!--/prec R: precedes -->
+<!ENTITY prap             "&#x02AB7;" ><!--/precapprox R: precedes, approximate -->
+<!ENTITY prcue            "&#x0227C;" ><!--/preccurlyeq R: precedes, curly eq -->
+<!ENTITY prE              "&#x02AB3;" ><!--precedes, dbl equals -->
+<!ENTITY pre              "&#x02AAF;" ><!--/preceq R: precedes, equals -->
+<!ENTITY prsim            "&#x0227E;" ><!--/precsim R: precedes, similar -->
+<!ENTITY prurel           "&#x022B0;" ><!--element precedes under relation -->
+<!ENTITY ratio            "&#x02236;" ><!--/ratio -->
+<!ENTITY rtrie            "&#x022B5;" ><!--/trianglerighteq R: right tri, eq -->
+<!ENTITY rtriltri         "&#x029CE;" ><!--right triangle above left triangle -->
+<!ENTITY Sc               "&#x02ABC;" ><!--dbl succeeds -->
+<!ENTITY sc               "&#x0227B;" ><!--/succ R: succeeds -->
+<!ENTITY scap             "&#x02AB8;" ><!--/succapprox R: succeeds, approximate -->
+<!ENTITY sccue            "&#x0227D;" ><!--/succcurlyeq R: succeeds, curly eq -->
+<!ENTITY scE              "&#x02AB4;" ><!--succeeds, dbl equals -->
+<!ENTITY sce              "&#x02AB0;" ><!--/succeq R: succeeds, equals -->
+<!ENTITY scsim            "&#x0227F;" ><!--/succsim R: succeeds, similar -->
+<!ENTITY sdote            "&#x02A66;" ><!--equal, dot below -->
+<!ENTITY sfrown           "&#x02322;" ><!--/smallfrown R: small down curve -->
+<!ENTITY simg             "&#x02A9E;" ><!--similar, greater -->
+<!ENTITY simgE            "&#x02AA0;" ><!--similar, greater, equal -->
+<!ENTITY siml             "&#x02A9D;" ><!--similar, less -->
+<!ENTITY simlE            "&#x02A9F;" ><!--similar, less, equal -->
+<!ENTITY smid             "&#x02223;" ><!--/shortmid R: -->
+<!ENTITY smile            "&#x02323;" ><!--/smile R: up curve -->
+<!ENTITY smt              "&#x02AAA;" ><!--smaller than -->
+<!ENTITY smte             "&#x02AAC;" ><!--smaller than or equal -->
+<!ENTITY smtes            "&#x02AAC;&#x0FE00;" ><!--smaller than or equal, slanted -->
+<!ENTITY spar             "&#x02225;" ><!--/shortparallel R: short parallel -->
+<!ENTITY sqsub            "&#x0228F;" ><!--/sqsubset R: square subset -->
+<!ENTITY sqsube           "&#x02291;" ><!--/sqsubseteq R: square subset, equals -->
+<!ENTITY sqsup            "&#x02290;" ><!--/sqsupset R: square superset -->
+<!ENTITY sqsupe           "&#x02292;" ><!--/sqsupseteq R: square superset, eq -->
+<!ENTITY ssmile           "&#x02323;" ><!--/smallsmile R: small up curve -->
+<!ENTITY Sub              "&#x022D0;" ><!--/Subset R: double subset -->
+<!ENTITY subE             "&#x02AC5;" ><!--/subseteqq R: subset, dbl equals -->
+<!ENTITY subedot          "&#x02AC3;" ><!--subset, equals, dot -->
+<!ENTITY submult          "&#x02AC1;" ><!--subset, multiply -->
+<!ENTITY subplus          "&#x02ABF;" ><!--subset, plus -->
+<!ENTITY subrarr          "&#x02979;" ><!--subset, right arrow -->
+<!ENTITY subsim           "&#x02AC7;" ><!--subset, similar -->
+<!ENTITY subsub           "&#x02AD5;" ><!--subset above subset -->
+<!ENTITY subsup           "&#x02AD3;" ><!--subset above superset -->
+<!ENTITY Sup              "&#x022D1;" ><!--/Supset R: dbl superset -->
+<!ENTITY supdsub          "&#x02AD8;" ><!--superset, subset, dash joining them -->
+<!ENTITY supE             "&#x02AC6;" ><!--/supseteqq R: superset, dbl equals -->
+<!ENTITY supedot          "&#x02AC4;" ><!--superset, equals, dot -->
+<!ENTITY suphsol          "&#x02283;&#x0002F;" ><!--superset, solidus -->
+<!ENTITY suphsub          "&#x02AD7;" ><!--superset, subset -->
+<!ENTITY suplarr          "&#x0297B;" ><!--superset, left arrow -->
+<!ENTITY supmult          "&#x02AC2;" ><!--superset, multiply -->
+<!ENTITY supplus          "&#x02AC0;" ><!--superset, plus -->
+<!ENTITY supsim           "&#x02AC8;" ><!--superset, similar -->
+<!ENTITY supsub           "&#x02AD4;" ><!--superset above subset -->
+<!ENTITY supsup           "&#x02AD6;" ><!--superset above superset -->
+<!ENTITY thkap            "&#x02248;" ><!--/thickapprox R: thick approximate -->
+<!ENTITY thksim           "&#x0223C;" ><!--/thicksim R: thick similar -->
+<!ENTITY topfork          "&#x02ADA;" ><!--fork with top -->
+<!ENTITY trie             "&#x0225C;" ><!--/triangleq R: triangle, equals -->
+<!ENTITY twixt            "&#x0226C;" ><!--/between R: between -->
+<!ENTITY Vbar             "&#x02AEB;" ><!--dbl vert, bar (under) -->
+<!ENTITY vBar             "&#x02AE8;" ><!--vert, dbl bar (under) -->
+<!ENTITY vBarv            "&#x02AE9;" ><!--dbl bar, vert over and under -->
+<!ENTITY VDash            "&#x022AB;" ><!--dbl vert, dbl dash -->
+<!ENTITY Vdash            "&#x022A9;" ><!--/Vdash R: dbl vertical, dash -->
+<!ENTITY vDash            "&#x022A8;" ><!--/vDash R: vertical, dbl dash -->
+<!ENTITY vdash            "&#x022A2;" ><!--/vdash R: vertical, dash -->
+<!ENTITY Vdashl           "&#x02AE6;" ><!--vertical, dash (long) -->
+<!ENTITY vltri            "&#x022B2;" ><!--/vartriangleleft R: l tri, open, var -->
+<!ENTITY vprop            "&#x0221D;" ><!--/varpropto R: proportional, variant -->
+<!ENTITY vrtri            "&#x022B3;" ><!--/vartriangleright R: r tri, open, var -->
+<!ENTITY Vvdash           "&#x022AA;" ><!--/Vvdash R: triple vertical, dash -->
diff --git a/code/lib/Bio/Entrez/DTDs/isobox.ent b/code/lib/Bio/Entrez/DTDs/isobox.ent
new file mode 100644
index 0000000..05e2b13
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isobox.ent
@@ -0,0 +1,61 @@
+
+<!--
+     File isobox.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1986
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY boxDL            "&#x02557;" ><!--lower left quadrant -->
+<!ENTITY boxDl            "&#x02556;" ><!--lower left quadrant -->
+<!ENTITY boxdL            "&#x02555;" ><!--lower left quadrant -->
+<!ENTITY boxdl            "&#x02510;" ><!--lower left quadrant -->
+<!ENTITY boxDR            "&#x02554;" ><!--lower right quadrant -->
+<!ENTITY boxDr            "&#x02553;" ><!--lower right quadrant -->
+<!ENTITY boxdR            "&#x02552;" ><!--lower right quadrant -->
+<!ENTITY boxdr            "&#x0250C;" ><!--lower right quadrant -->
+<!ENTITY boxH             "&#x02550;" ><!--horizontal line -->
+<!ENTITY boxh             "&#x02500;" ><!--horizontal line  -->
+<!ENTITY boxHD            "&#x02566;" ><!--lower left and right quadrants -->
+<!ENTITY boxHd            "&#x02564;" ><!--lower left and right quadrants -->
+<!ENTITY boxhD            "&#x02565;" ><!--lower left and right quadrants -->
+<!ENTITY boxhd            "&#x0252C;" ><!--lower left and right quadrants -->
+<!ENTITY boxHU            "&#x02569;" ><!--upper left and right quadrants -->
+<!ENTITY boxHu            "&#x02567;" ><!--upper left and right quadrants -->
+<!ENTITY boxhU            "&#x02568;" ><!--upper left and right quadrants -->
+<!ENTITY boxhu            "&#x02534;" ><!--upper left and right quadrants -->
+<!ENTITY boxUL            "&#x0255D;" ><!--upper left quadrant -->
+<!ENTITY boxUl            "&#x0255C;" ><!--upper left quadrant -->
+<!ENTITY boxuL            "&#x0255B;" ><!--upper left quadrant -->
+<!ENTITY boxul            "&#x02518;" ><!--upper left quadrant -->
+<!ENTITY boxUR            "&#x0255A;" ><!--upper right quadrant -->
+<!ENTITY boxUr            "&#x02559;" ><!--upper right quadrant -->
+<!ENTITY boxuR            "&#x02558;" ><!--upper right quadrant -->
+<!ENTITY boxur            "&#x02514;" ><!--upper right quadrant -->
+<!ENTITY boxV             "&#x02551;" ><!--vertical line -->
+<!ENTITY boxv             "&#x02502;" ><!--vertical line -->
+<!ENTITY boxVH            "&#x0256C;" ><!--all four quadrants -->
+<!ENTITY boxVh            "&#x0256B;" ><!--all four quadrants -->
+<!ENTITY boxvH            "&#x0256A;" ><!--all four quadrants -->
+<!ENTITY boxvh            "&#x0253C;" ><!--all four quadrants -->
+<!ENTITY boxVL            "&#x02563;" ><!--upper and lower left quadrants -->
+<!ENTITY boxVl            "&#x02562;" ><!--upper and lower left quadrants -->
+<!ENTITY boxvL            "&#x02561;" ><!--upper and lower left quadrants -->
+<!ENTITY boxvl            "&#x02524;" ><!--upper and lower left quadrants -->
+<!ENTITY boxVR            "&#x02560;" ><!--upper and lower right quadrants -->
+<!ENTITY boxVr            "&#x0255F;" ><!--upper and lower right quadrants -->
+<!ENTITY boxvR            "&#x0255E;" ><!--upper and lower right quadrants -->
+<!ENTITY boxvr            "&#x0251C;" ><!--upper and lower right quadrants -->
diff --git a/code/lib/Bio/Entrez/DTDs/isocyr1.ent b/code/lib/Bio/Entrez/DTDs/isocyr1.ent
new file mode 100644
index 0000000..b4149c7
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isocyr1.ent
@@ -0,0 +1,88 @@
+
+<!--
+     File isocyr1.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1986
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY Acy              "&#x00410;" ><!--=capital A, Cyrillic -->
+<!ENTITY acy              "&#x00430;" ><!--=small a, Cyrillic -->
+<!ENTITY Bcy              "&#x00411;" ><!--=capital BE, Cyrillic -->
+<!ENTITY bcy              "&#x00431;" ><!--=small be, Cyrillic -->
+<!ENTITY CHcy             "&#x00427;" ><!--=capital CHE, Cyrillic -->
+<!ENTITY chcy             "&#x00447;" ><!--=small che, Cyrillic -->
+<!ENTITY Dcy              "&#x00414;" ><!--=capital DE, Cyrillic -->
+<!ENTITY dcy              "&#x00434;" ><!--=small de, Cyrillic -->
+<!ENTITY Ecy              "&#x0042D;" ><!--=capital E, Cyrillic -->
+<!ENTITY ecy              "&#x0044D;" ><!--=small e, Cyrillic -->
+<!ENTITY Fcy              "&#x00424;" ><!--=capital EF, Cyrillic -->
+<!ENTITY fcy              "&#x00444;" ><!--=small ef, Cyrillic -->
+<!ENTITY Gcy              "&#x00413;" ><!--=capital GHE, Cyrillic -->
+<!ENTITY gcy              "&#x00433;" ><!--=small ghe, Cyrillic -->
+<!ENTITY HARDcy           "&#x0042A;" ><!--=capital HARD sign, Cyrillic -->
+<!ENTITY hardcy           "&#x0044A;" ><!--=small hard sign, Cyrillic -->
+<!ENTITY Icy              "&#x00418;" ><!--=capital I, Cyrillic -->
+<!ENTITY icy              "&#x00438;" ><!--=small i, Cyrillic -->
+<!ENTITY IEcy             "&#x00415;" ><!--=capital IE, Cyrillic -->
+<!ENTITY iecy             "&#x00435;" ><!--=small ie, Cyrillic -->
+<!ENTITY IOcy             "&#x00401;" ><!--=capital IO, Russian -->
+<!ENTITY iocy             "&#x00451;" ><!--=small io, Russian -->
+<!ENTITY Jcy              "&#x00419;" ><!--=capital short I, Cyrillic -->
+<!ENTITY jcy              "&#x00439;" ><!--=small short i, Cyrillic -->
+<!ENTITY Kcy              "&#x0041A;" ><!--=capital KA, Cyrillic -->
+<!ENTITY kcy              "&#x0043A;" ><!--=small ka, Cyrillic -->
+<!ENTITY KHcy             "&#x00425;" ><!--=capital HA, Cyrillic -->
+<!ENTITY khcy             "&#x00445;" ><!--=small ha, Cyrillic -->
+<!ENTITY Lcy              "&#x0041B;" ><!--=capital EL, Cyrillic -->
+<!ENTITY lcy              "&#x0043B;" ><!--=small el, Cyrillic -->
+<!ENTITY Mcy              "&#x0041C;" ><!--=capital EM, Cyrillic -->
+<!ENTITY mcy              "&#x0043C;" ><!--=small em, Cyrillic -->
+<!ENTITY Ncy              "&#x0041D;" ><!--=capital EN, Cyrillic -->
+<!ENTITY ncy              "&#x0043D;" ><!--=small en, Cyrillic -->
+<!ENTITY numero           "&#x02116;" ><!--=numero sign -->
+<!ENTITY Ocy              "&#x0041E;" ><!--=capital O, Cyrillic -->
+<!ENTITY ocy              "&#x0043E;" ><!--=small o, Cyrillic -->
+<!ENTITY Pcy              "&#x0041F;" ><!--=capital PE, Cyrillic -->
+<!ENTITY pcy              "&#x0043F;" ><!--=small pe, Cyrillic -->
+<!ENTITY Rcy              "&#x00420;" ><!--=capital ER, Cyrillic -->
+<!ENTITY rcy              "&#x00440;" ><!--=small er, Cyrillic -->
+<!ENTITY Scy              "&#x00421;" ><!--=capital ES, Cyrillic -->
+<!ENTITY scy              "&#x00441;" ><!--=small es, Cyrillic -->
+<!ENTITY SHCHcy           "&#x00429;" ><!--=capital SHCHA, Cyrillic -->
+<!ENTITY shchcy           "&#x00449;" ><!--=small shcha, Cyrillic -->
+<!ENTITY SHcy             "&#x00428;" ><!--=capital SHA, Cyrillic -->
+<!ENTITY shcy             "&#x00448;" ><!--=small sha, Cyrillic -->
+<!ENTITY SOFTcy           "&#x0042C;" ><!--=capital SOFT sign, Cyrillic -->
+<!ENTITY softcy           "&#x0044C;" ><!--=small soft sign, Cyrillic -->
+<!ENTITY Tcy              "&#x00422;" ><!--=capital TE, Cyrillic -->
+<!ENTITY tcy              "&#x00442;" ><!--=small te, Cyrillic -->
+<!ENTITY TScy             "&#x00426;" ><!--=capital TSE, Cyrillic -->
+<!ENTITY tscy             "&#x00446;" ><!--=small tse, Cyrillic -->
+<!ENTITY Ucy              "&#x00423;" ><!--=capital U, Cyrillic -->
+<!ENTITY ucy              "&#x00443;" ><!--=small u, Cyrillic -->
+<!ENTITY Vcy              "&#x00412;" ><!--=capital VE, Cyrillic -->
+<!ENTITY vcy              "&#x00432;" ><!--=small ve, Cyrillic -->
+<!ENTITY YAcy             "&#x0042F;" ><!--=capital YA, Cyrillic -->
+<!ENTITY yacy             "&#x0044F;" ><!--=small ya, Cyrillic -->
+<!ENTITY Ycy              "&#x0042B;" ><!--=capital YERU, Cyrillic -->
+<!ENTITY ycy              "&#x0044B;" ><!--=small yeru, Cyrillic -->
+<!ENTITY YUcy             "&#x0042E;" ><!--=capital YU, Cyrillic -->
+<!ENTITY yucy             "&#x0044E;" ><!--=small yu, Cyrillic -->
+<!ENTITY Zcy              "&#x00417;" ><!--=capital ZE, Cyrillic -->
+<!ENTITY zcy              "&#x00437;" ><!--=small ze, Cyrillic -->
+<!ENTITY ZHcy             "&#x00416;" ><!--=capital ZHE, Cyrillic -->
+<!ENTITY zhcy             "&#x00436;" ><!--=small zhe, Cyrillic -->
diff --git a/code/lib/Bio/Entrez/DTDs/isocyr2.ent b/code/lib/Bio/Entrez/DTDs/isocyr2.ent
new file mode 100644
index 0000000..b038bd9
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isocyr2.ent
@@ -0,0 +1,47 @@
+
+<!--
+     File isocyr2.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1986
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY DJcy             "&#x00402;" ><!--=capital DJE, Serbian -->
+<!ENTITY djcy             "&#x00452;" ><!--=small dje, Serbian -->
+<!ENTITY DScy             "&#x00405;" ><!--=capital DSE, Macedonian -->
+<!ENTITY dscy             "&#x00455;" ><!--=small dse, Macedonian -->
+<!ENTITY DZcy             "&#x0040F;" ><!--=capital dze, Serbian -->
+<!ENTITY dzcy             "&#x0045F;" ><!--=small dze, Serbian -->
+<!ENTITY GJcy             "&#x00403;" ><!--=capital GJE Macedonian -->
+<!ENTITY gjcy             "&#x00453;" ><!--=small gje, Macedonian -->
+<!ENTITY Iukcy            "&#x00406;" ><!--=capital I, Ukrainian -->
+<!ENTITY iukcy            "&#x00456;" ><!--=small i, Ukrainian -->
+<!ENTITY Jsercy           "&#x00408;" ><!--=capital JE, Serbian -->
+<!ENTITY jsercy           "&#x00458;" ><!--=small je, Serbian -->
+<!ENTITY Jukcy            "&#x00404;" ><!--=capital JE, Ukrainian -->
+<!ENTITY jukcy            "&#x00454;" ><!--=small je, Ukrainian -->
+<!ENTITY KJcy             "&#x0040C;" ><!--=capital KJE, Macedonian -->
+<!ENTITY kjcy             "&#x0045C;" ><!--=small kje Macedonian -->
+<!ENTITY LJcy             "&#x00409;" ><!--=capital LJE, Serbian -->
+<!ENTITY ljcy             "&#x00459;" ><!--=small lje, Serbian -->
+<!ENTITY NJcy             "&#x0040A;" ><!--=capital NJE, Serbian -->
+<!ENTITY njcy             "&#x0045A;" ><!--=small nje, Serbian -->
+<!ENTITY TSHcy            "&#x0040B;" ><!--=capital TSHE, Serbian -->
+<!ENTITY tshcy            "&#x0045B;" ><!--=small tshe, Serbian -->
+<!ENTITY Ubrcy            "&#x0040E;" ><!--=capital U, Byelorussian -->
+<!ENTITY ubrcy            "&#x0045E;" ><!--=small u, Byelorussian -->
+<!ENTITY YIcy             "&#x00407;" ><!--=capital YI, Ukrainian -->
+<!ENTITY yicy             "&#x00457;" ><!--=small yi, Ukrainian -->
diff --git a/code/lib/Bio/Entrez/DTDs/isodia.ent b/code/lib/Bio/Entrez/DTDs/isodia.ent
new file mode 100644
index 0000000..39ccfcd
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isodia.ent
@@ -0,0 +1,35 @@
+
+<!--
+     File isodia.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1986
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY acute            "&#x000B4;" ><!--=acute accent -->
+<!ENTITY breve            "&#x002D8;" ><!--=breve -->
+<!ENTITY caron            "&#x002C7;" ><!--=caron -->
+<!ENTITY cedil            "&#x000B8;" ><!--=cedilla -->
+<!ENTITY circ             "&#x002C6;" ><!--circumflex accent -->
+<!ENTITY dblac            "&#x002DD;" ><!--=double acute accent -->
+<!ENTITY die              "&#x000A8;" ><!--=dieresis -->
+<!ENTITY dot              "&#x002D9;" ><!--=dot above -->
+<!ENTITY grave            "&#x00060;" ><!--=grave accent -->
+<!ENTITY macr             "&#x000AF;" ><!--=macron -->
+<!ENTITY ogon             "&#x002DB;" ><!--=ogonek -->
+<!ENTITY ring             "&#x002DA;" ><!--=ring -->
+<!ENTITY tilde            "&#x002DC;" ><!--=tilde -->
+<!ENTITY uml              "&#x000A8;" ><!--=umlaut mark -->
diff --git a/code/lib/Bio/Entrez/DTDs/isogrk1.ent b/code/lib/Bio/Entrez/DTDs/isogrk1.ent
new file mode 100644
index 0000000..a5f52ef
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isogrk1.ent
@@ -0,0 +1 @@
+<!--      File isogrk1.ent produced by the dsssl script ent.dsl     from input data in unicode.xml.     Please report any errors to      David Carlisle <davidc@nag.co.uk>.     The numeric character values assigned to each entity     (should) match either official Unicode assignments     or assignments in the STIX proposal for characters     for Mathematics.     The STIX assignments are temporary and will change if     the proposal or some variant of it is adopted by the     Unicode Consortium.     Entity names in this file are derived from files carrying the     following notice:     (C) International Organization for Standardization 1986     Permission to copy in any form is granted for use with     conforming SGML systems and applications as defined in     ISO 8879, provided this notice is included in all copies.--><!ENTITY agr        "&#945;"   ><!--U03B1 =small alpha, Greek --><!ENTITY Agr        "&#913;"   ><!--U0391 =capital Alpha, Greek --><!ENTITY bgr        "&#946;"   ><!--U03B2 =small beta, Greek --><!ENTITY Bgr        "&#914;"   ><!--U0392 =capital Beta, Greek --><!ENTITY dgr        "&#948;"   ><!--U03B4 =small delta, Greek --><!ENTITY Dgr        "&#916;"   ><!--U0394 =capital Delta, Greek --><!ENTITY eegr       "&#951;"   ><!--U03B7 =small eta, Greek --><!ENTITY egr        "&#949;"   ><!--U03B5 =small epsilon, Greek --><!ENTITY EEgr       "&#919;"   ><!--U0397 =capital Eta, Greek --><!ENTITY Egr        "&#917;"   ><!--U0395 =capital Epsilon, Greek --><!ENTITY ggr        "&#947;"   ><!--U03B3 =small gamma, Greek --><!ENTITY Ggr        "&#915;"   ><!--U0393 =capital Gamma, Greek --><!ENTITY igr        "&#953;"   ><!--U03B9 =small iota, Greek --><!ENTITY Igr        "&#921;"   ><!--U0399 =capital Iota, Greek --><!ENTITY kgr        "&#954;"   ><!--U03BA =small kappa, Greek --><!ENTITY khgr       "&#967;"   ><!--U03C7 =small chi, Greek --><!ENTITY Kgr        "&#922;"   ><!--U039A =capital Kappa, Greek --><!ENTITY KHgr       "&#935;"   ><!--U03A7 =capital Chi, Greek --><!ENTITY lgr        "&#955;"   ><!--U03BB =small lambda, Greek --><!ENTITY Lgr        "&#923;"   ><!--U039B =capital Lambda, Greek --><!ENTITY mgr        "&#956;"   ><!--U03BC =small mu, Greek --><!ENTITY Mgr        "&#924;"   ><!--U039C =capital Mu, Greek --><!ENTITY ngr        "&#957;"   ><!--U03BD =small nu, Greek --><!ENTITY Ngr        "&#925;"   ><!--U039D =capital Nu, Greek --><!ENTITY ogr        "&#959;"   ><!--U03BF =small omicron, Greek --><!ENTITY ohgr       "&#969;"   ><!--U03C9 =small omega, Greek --><!ENTITY Ogr        "&#927;"   ><!--U039F =capital Omicron, Greek --><!ENTITY OHgr       "&#937;"   ><!--U03A9 =capital Omega, Greek --><!ENTITY pgr        "&#960;"   ><!--U03C0 =small pi, Greek --><!ENTITY phgr       "&#966;"   ><!--U03C6 =small phi, Greek --><!ENTITY psgr       "&#968;"   ><!--U03C8 =small psi, Greek --><!ENTITY Pgr        "&#928;"   ><!--U03A0 =capital Pi, Greek --><!ENTITY PHgr       "&#934;"   ><!--U03A6 =capital Phi, Greek --><!ENTITY PSgr       "&#936;"   ><!--U03A8 =capital Psi, Greek --><!ENTITY rgr        "&#961;"   ><!--U03C1 =small rho, Greek --><!ENTITY Rgr        "&#929;"   ><!--U03A1 =capital Rho, Greek --><!ENTITY sfgr       "&#962;"   ><!--U03C2 =final small sigma, Greek --><!ENTITY sgr        "&#963;"   ><!--U03C3 =small sigma, Greek --><!ENTITY Sgr        "&#931;"   ><!--U03A3 =capital Sigma, Greek --><!ENTITY tgr        "&#964;"   ><!--U03C4 =small tau, Greek --><!ENTITY thgr       "&#952;"   ><!--U03B8 =small theta, Greek --><!ENTITY Tgr        "&#932;"   ><!--U03A4 =capital Tau, Greek --><!ENTITY THgr       "&#920;"   ><!--U0398 =capital Theta, Greek --><!ENTITY ugr        "&#965;"   ><!--U03C5 =small upsilon, Greek --><!ENTITY Ugr        "&#933;"   ><!--U03A5 =capital Upsilon, Greek --><!ENTITY xgr        "&#958;"   ><!--U03BE =small xi, Greek --><!ENTITY Xgr        "&#926;"   ><!--U039E =capital Xi, Greek --><!ENTITY zgr        "&#950;"   ><!--U03B6 =small zeta, Greek --><!ENTITY Zgr        "&#918;"   ><!--U0396 =capital Zeta, Greek -->
\ No newline at end of file
diff --git a/code/lib/Bio/Entrez/DTDs/isogrk2.ent b/code/lib/Bio/Entrez/DTDs/isogrk2.ent
new file mode 100644
index 0000000..d27cc30
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isogrk2.ent
@@ -0,0 +1 @@
+<!--      File isogrk2.ent produced by the dsssl script ent.dsl     from input data in unicode.xml.     Please report any errors to      David Carlisle <davidc@nag.co.uk>.     The numeric character values assigned to each entity     (should) match either official Unicode assignments     or assignments in the STIX proposal for characters     for Mathematics.     The STIX assignments are temporary and will change if     the proposal or some variant of it is adopted by the     Unicode Consortium.     Entity names in this file are derived from files carrying the     following notice:     (C) International Organization for Standardization 1986     Permission to copy in any form is granted for use with     conforming SGML systems and applications as defined in     ISO 8879, provided this notice is included in all copies.--><!ENTITY aacgr      "&#940;"   ><!--U03AC =small alpha, accent, Greek --><!ENTITY Aacgr      "&#902;"   ><!--U0386 =capital Alpha, accent, Greek --><!ENTITY eacgr      "&#941;"   ><!--U03AD =small epsilon, accent, Greek --><!ENTITY eeacgr     "&#942;"   ><!--U03AE =small eta, accent, Greek --><!ENTITY Eacgr      "&#904;"   ><!--U0388 =capital Epsilon, accent, Greek --><!ENTITY EEacgr     "&#905;"   ><!--U0389 =capital Eta, accent, Greek --><!ENTITY iacgr      "&#943;"   ><!--U03AF =small iota, accent, Greek --><!ENTITY idiagr     "&#912;"   ><!--U0390 =small iota, dieresis, accent, Greek --><!ENTITY idigr      "&#970;"   ><!--U03CA =small iota, dieresis, Greek --><!ENTITY Iacgr      "&#906;"   ><!--U038A =capital Iota, accent, Greek --><!ENTITY Idigr      "&#938;"   ><!--U03AA =capital Iota, dieresis, Greek --><!ENTITY oacgr      "&#972;"   ><!--U03CC =small omicron, accent, Greek --><!ENTITY ohacgr     "&#974;"   ><!--U03CE =small omega, accent, Greek --><!ENTITY Oacgr      "&#908;"   ><!--U038C =capital Omicron, accent, Greek --><!ENTITY OHacgr     "&#911;"   ><!--U038F =capital Omega, accent, Greek --><!ENTITY uacgr      "&#973;"   ><!--U03CD =small upsilon, accent, Greek --><!ENTITY udiagr     "&#944;"   ><!--U03B0 =small upsilon, dieresis, accent, Greek --><!ENTITY udigr      "&#971;"   ><!--U03CB =small upsilon, dieresis, Greek --><!ENTITY Uacgr      "&#910;"   ><!--U038E =capital Upsilon, accent, Greek --><!ENTITY Udigr      "&#939;"   ><!--U03AB =capital Upsilon, dieresis, Greek -->
\ No newline at end of file
diff --git a/code/lib/Bio/Entrez/DTDs/isogrk3.ent b/code/lib/Bio/Entrez/DTDs/isogrk3.ent
new file mode 100644
index 0000000..0cbde88
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isogrk3.ent
@@ -0,0 +1,64 @@
+
+<!--
+     File isogrk3.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1991
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY alpha            "&#x003B1;" ><!--/alpha small alpha, Greek -->
+<!ENTITY beta             "&#x003B2;" ><!--/beta small beta, Greek -->
+<!ENTITY chi              "&#x003C7;" ><!--/chi small chi, Greek -->
+<!ENTITY Delta            "&#x00394;" ><!--/Delta capital Delta, Greek -->
+<!ENTITY delta            "&#x003B4;" ><!--/delta small delta, Greek -->
+<!ENTITY epsi             "&#x003F5;" ><!--/straightepsilon, small epsilon, Greek -->
+<!ENTITY epsiv            "&#x003B5;" ><!--/varepsilon -->
+<!ENTITY eta              "&#x003B7;" ><!--/eta small eta, Greek -->
+<!ENTITY Gamma            "&#x00393;" ><!--/Gamma capital Gamma, Greek -->
+<!ENTITY gamma            "&#x003B3;" ><!--/gamma small gamma, Greek -->
+<!ENTITY Gammad           "&#x003DC;" ><!--capital digamma -->
+<!ENTITY gammad           "&#x003DD;" ><!--/digamma -->
+<!ENTITY iota             "&#x003B9;" ><!--/iota small iota, Greek -->
+<!ENTITY kappa            "&#x003BA;" ><!--/kappa small kappa, Greek -->
+<!ENTITY kappav           "&#x003F0;" ><!--/varkappa -->
+<!ENTITY Lambda           "&#x0039B;" ><!--/Lambda capital Lambda, Greek -->
+<!ENTITY lambda           "&#x003BB;" ><!--/lambda small lambda, Greek -->
+<!ENTITY mu               "&#x003BC;" ><!--/mu small mu, Greek -->
+<!ENTITY nu               "&#x003BD;" ><!--/nu small nu, Greek -->
+<!ENTITY Omega            "&#x003A9;" ><!--/Omega capital Omega, Greek -->
+<!ENTITY omega            "&#x003C9;" ><!--/omega small omega, Greek -->
+<!ENTITY Phi              "&#x003A6;" ><!--/Phi capital Phi, Greek -->
+<!ENTITY phi              "&#x003D5;" ><!--/straightphi - small phi, Greek -->
+<!ENTITY phiv             "&#x003C6;" ><!--/varphi - curly or open phi -->
+<!ENTITY Pi               "&#x003A0;" ><!--/Pi capital Pi, Greek -->
+<!ENTITY pi               "&#x003C0;" ><!--/pi small pi, Greek -->
+<!ENTITY piv              "&#x003D6;" ><!--/varpi -->
+<!ENTITY Psi              "&#x003A8;" ><!--/Psi capital Psi, Greek -->
+<!ENTITY psi              "&#x003C8;" ><!--/psi small psi, Greek -->
+<!ENTITY rho              "&#x003C1;" ><!--/rho small rho, Greek -->
+<!ENTITY rhov             "&#x003F1;" ><!--/varrho -->
+<!ENTITY Sigma            "&#x003A3;" ><!--/Sigma capital Sigma, Greek -->
+<!ENTITY sigma            "&#x003C3;" ><!--/sigma small sigma, Greek -->
+<!ENTITY sigmav           "&#x003C2;" ><!--/varsigma -->
+<!ENTITY tau              "&#x003C4;" ><!--/tau small tau, Greek -->
+<!ENTITY Theta            "&#x00398;" ><!--/Theta capital Theta, Greek -->
+<!ENTITY theta            "&#x003B8;" ><!--/theta straight theta, small theta, Greek -->
+<!ENTITY thetav           "&#x003D1;" ><!--/vartheta - curly or open theta -->
+<!ENTITY Upsi             "&#x003D2;" ><!--/Upsilon capital Upsilon, Greek -->
+<!ENTITY upsi             "&#x003C5;" ><!--/upsilon small upsilon, Greek -->
+<!ENTITY Xi               "&#x0039E;" ><!--/Xi capital Xi, Greek -->
+<!ENTITY xi               "&#x003BE;" ><!--/xi small xi, Greek -->
+<!ENTITY zeta             "&#x003B6;" ><!--/zeta small zeta, Greek -->
diff --git a/code/lib/Bio/Entrez/DTDs/isogrk4.ent b/code/lib/Bio/Entrez/DTDs/isogrk4.ent
new file mode 100644
index 0000000..07c4d06
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isogrk4.ent
@@ -0,0 +1,69 @@
+<!-- 
+
+     File isogrk4.ent produced by the dsssl script ent.dsl
+     from input data in unicode.xml.
+
+     Please report any errors to 
+     David Carlisle <davidc@nag.co.uk>.
+
+     The numeric character values assigned to each entity
+     (should) match either official Unicode assignments
+     or assignments in the STIX proposal for characters
+     for Mathematics.
+
+     The STIX assignments are temporary and will change if
+     the proposal or some variant of it is adopted by the
+     Unicode Consortium.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1991
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+-->
+<!ENTITY b.alpha    "&#59136;" ><!--UE700 small alpha, Greek -->
+<!ENTITY b.beta     "&#59137;" ><!--UE701 small beta, Greek -->
+<!ENTITY b.chi      "&#59175;" ><!--UE727 small chi, Greek -->
+<!ENTITY b.delta    "&#59142;" ><!--UE706 small delta, Greek -->
+<!ENTITY b.Delta    "&#59143;" ><!--UE707 capital delta, Greek -->
+<!ENTITY b.epsi     "&#59144;" ><!--UE708 small epsilon, Greek -->
+<!ENTITY b.epsiv    "&#59146;" ><!--UE709 varepsilion -->
+<!ENTITY b.eta      "&#59148;" ><!--UE70C small eta, Greek -->
+<!ENTITY b.gamma    "&#59138;" ><!--UE702 small gamma, Greek -->
+<!ENTITY b.gammad   "&#59140;" ><!--UE704 digamma, Greek -->
+<!ENTITY b.Gamma    "&#59139;" ><!--UE703 capital gamma, Greek -->
+<!ENTITY b.Gammad   "&#59141;" ><!--UE705 capital digamma, Greek -->
+<!ENTITY b.iota     "&#59152;" ><!--UE710 small iota, Greek -->
+<!ENTITY b.kappa    "&#59153;" ><!--UE711 small kappa, Greek -->
+<!ENTITY b.kappav   "&#59154;" ><!--UE712 var kappa, Greek -->
+<!ENTITY b.lambda   "&#59155;" ><!--UE713 small lambda, Greek -->
+<!ENTITY b.Lambda   "&#59156;" ><!--UE714 capital lambda, Greek -->
+<!ENTITY b.mu       "&#59157;" ><!--UE715 small mu, Greek -->
+<!ENTITY b.nu       "&#59158;" ><!--UE716 small nu, Greek -->
+<!ENTITY b.omega    "&#59178;" ><!--UE72A small omega, Greek -->
+<!ENTITY b.Omega    "&#59179;" ><!--UE72B capital Omega, Greek -->
+<!ENTITY b.phis     "&#59172;" ><!--UE724 straight phi, Greek -->
+<!ENTITY b.phiv     "&#59174;" ><!--UE726 varphi -->
+<!ENTITY b.pi       "&#59161;" ><!--UE719 small pi, Greek -->
+<!ENTITY b.piv      "&#59163;" ><!--UE71B varpi -->
+<!ENTITY b.psi      "&#59176;" ><!--UE728 small psi, Greek -->
+<!ENTITY b.Phi      "&#59173;" ><!--UE725 capital Phi, Greek -->
+<!ENTITY b.Pi       "&#59162;" ><!--UE71A capital pi, Greek -->
+<!ENTITY b.Psi      "&#59177;" ><!--UE729 capital Psi, Greek -->
+<!ENTITY b.rho      "&#59164;" ><!--UE71C small rho, Greek -->
+<!ENTITY b.rhov     "&#59165;" ><!--UE71D varrho -->
+<!ENTITY b.sigma    "&#59166;" ><!--UE71E small sigma, Greek -->
+<!ENTITY b.sigmav   "&#59168;" ><!--UE720 varsigma -->
+<!ENTITY b.Sigma    "&#59167;" ><!--UE71F capital sigma, Greek -->
+<!ENTITY b.tau      "&#59169;" ><!--UE721 small tau, Greek -->
+<!ENTITY b.thetas   "&#59149;" ><!--UE70D straight theta, Greek -->
+<!ENTITY b.thetav   "&#59151;" ><!--UE70F var theta, Greek -->
+<!ENTITY b.Theta    "&#59150;" ><!--UE70E capital theta, Greek -->
+<!ENTITY b.upsi     "&#59170;" ><!--UE722 small upsilon, Greek -->
+<!ENTITY b.Upsilon  "&#59171;" ><!--UE723 capital upsilon, Greek -->
+<!ENTITY b.xi       "&#59159;" ><!--UE717 small xi, Greek -->
+<!ENTITY b.Xi       "&#59160;" ><!--UE718 capital xi, Greek -->
+<!ENTITY b.zeta     "&#59145;" ><!--UE70B small zeta, Greek -->
+
diff --git a/code/lib/Bio/Entrez/DTDs/isolat1.ent b/code/lib/Bio/Entrez/DTDs/isolat1.ent
new file mode 100644
index 0000000..43ae764
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isolat1.ent
@@ -0,0 +1,83 @@
+
+<!--
+     File isolat1.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1986
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY Aacute           "&#x000C1;" ><!--=capital A, acute accent -->
+<!ENTITY aacute           "&#x000E1;" ><!--=small a, acute accent -->
+<!ENTITY Acirc            "&#x000C2;" ><!--=capital A, circumflex accent -->
+<!ENTITY acirc            "&#x000E2;" ><!--=small a, circumflex accent -->
+<!ENTITY AElig            "&#x000C6;" ><!--=capital AE diphthong (ligature) -->
+<!ENTITY aelig            "&#x000E6;" ><!--=small ae diphthong (ligature) -->
+<!ENTITY Agrave           "&#x000C0;" ><!--=capital A, grave accent -->
+<!ENTITY agrave           "&#x000E0;" ><!--=small a, grave accent -->
+<!ENTITY Aring            "&#x000C5;" ><!--=capital A, ring -->
+<!ENTITY aring            "&#x000E5;" ><!--=small a, ring -->
+<!ENTITY Atilde           "&#x000C3;" ><!--=capital A, tilde -->
+<!ENTITY atilde           "&#x000E3;" ><!--=small a, tilde -->
+<!ENTITY Auml             "&#x000C4;" ><!--=capital A, dieresis or umlaut mark -->
+<!ENTITY auml             "&#x000E4;" ><!--=small a, dieresis or umlaut mark -->
+<!ENTITY Ccedil           "&#x000C7;" ><!--=capital C, cedilla -->
+<!ENTITY ccedil           "&#x000E7;" ><!--=small c, cedilla -->
+<!ENTITY Eacute           "&#x000C9;" ><!--=capital E, acute accent -->
+<!ENTITY eacute           "&#x000E9;" ><!--=small e, acute accent -->
+<!ENTITY Ecirc            "&#x000CA;" ><!--=capital E, circumflex accent -->
+<!ENTITY ecirc            "&#x000EA;" ><!--=small e, circumflex accent -->
+<!ENTITY Egrave           "&#x000C8;" ><!--=capital E, grave accent -->
+<!ENTITY egrave           "&#x000E8;" ><!--=small e, grave accent -->
+<!ENTITY ETH              "&#x000D0;" ><!--=capital Eth, Icelandic -->
+<!ENTITY eth              "&#x000F0;" ><!--=small eth, Icelandic -->
+<!ENTITY Euml             "&#x000CB;" ><!--=capital E, dieresis or umlaut mark -->
+<!ENTITY euml             "&#x000EB;" ><!--=small e, dieresis or umlaut mark -->
+<!ENTITY Iacute           "&#x000CD;" ><!--=capital I, acute accent -->
+<!ENTITY iacute           "&#x000ED;" ><!--=small i, acute accent -->
+<!ENTITY Icirc            "&#x000CE;" ><!--=capital I, circumflex accent -->
+<!ENTITY icirc            "&#x000EE;" ><!--=small i, circumflex accent -->
+<!ENTITY Igrave           "&#x000CC;" ><!--=capital I, grave accent -->
+<!ENTITY igrave           "&#x000EC;" ><!--=small i, grave accent -->
+<!ENTITY Iuml             "&#x000CF;" ><!--=capital I, dieresis or umlaut mark -->
+<!ENTITY iuml             "&#x000EF;" ><!--=small i, dieresis or umlaut mark -->
+<!ENTITY Ntilde           "&#x000D1;" ><!--=capital N, tilde -->
+<!ENTITY ntilde           "&#x000F1;" ><!--=small n, tilde -->
+<!ENTITY Oacute           "&#x000D3;" ><!--=capital O, acute accent -->
+<!ENTITY oacute           "&#x000F3;" ><!--=small o, acute accent -->
+<!ENTITY Ocirc            "&#x000D4;" ><!--=capital O, circumflex accent -->
+<!ENTITY ocirc            "&#x000F4;" ><!--=small o, circumflex accent -->
+<!ENTITY Ograve           "&#x000D2;" ><!--=capital O, grave accent -->
+<!ENTITY ograve           "&#x000F2;" ><!--=small o, grave accent -->
+<!ENTITY Oslash           "&#x000D8;" ><!--=capital O, slash -->
+<!ENTITY oslash           "&#x000F8;" ><!--latin small letter o with stroke -->
+<!ENTITY Otilde           "&#x000D5;" ><!--=capital O, tilde -->
+<!ENTITY otilde           "&#x000F5;" ><!--=small o, tilde -->
+<!ENTITY Ouml             "&#x000D6;" ><!--=capital O, dieresis or umlaut mark -->
+<!ENTITY ouml             "&#x000F6;" ><!--=small o, dieresis or umlaut mark -->
+<!ENTITY szlig            "&#x000DF;" ><!--=small sharp s, German (sz ligature) -->
+<!ENTITY THORN            "&#x000DE;" ><!--=capital THORN, Icelandic -->
+<!ENTITY thorn            "&#x000FE;" ><!--=small thorn, Icelandic -->
+<!ENTITY Uacute           "&#x000DA;" ><!--=capital U, acute accent -->
+<!ENTITY uacute           "&#x000FA;" ><!--=small u, acute accent -->
+<!ENTITY Ucirc            "&#x000DB;" ><!--=capital U, circumflex accent -->
+<!ENTITY ucirc            "&#x000FB;" ><!--=small u, circumflex accent -->
+<!ENTITY Ugrave           "&#x000D9;" ><!--=capital U, grave accent -->
+<!ENTITY ugrave           "&#x000F9;" ><!--=small u, grave accent -->
+<!ENTITY Uuml             "&#x000DC;" ><!--=capital U, dieresis or umlaut mark -->
+<!ENTITY uuml             "&#x000FC;" ><!--=small u, dieresis or umlaut mark -->
+<!ENTITY Yacute           "&#x000DD;" ><!--=capital Y, acute accent -->
+<!ENTITY yacute           "&#x000FD;" ><!--=small y, acute accent -->
+<!ENTITY yuml             "&#x000FF;" ><!--=small y, dieresis or umlaut mark -->
diff --git a/code/lib/Bio/Entrez/DTDs/isolat2.ent b/code/lib/Bio/Entrez/DTDs/isolat2.ent
new file mode 100644
index 0000000..c29b828
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isolat2.ent
@@ -0,0 +1,142 @@
+
+<!--
+     File isolat2.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1986
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY Abreve           "&#x00102;" ><!--=capital A, breve -->
+<!ENTITY abreve           "&#x00103;" ><!--=small a, breve -->
+<!ENTITY Amacr            "&#x00100;" ><!--=capital A, macron -->
+<!ENTITY amacr            "&#x00101;" ><!--=small a, macron -->
+<!ENTITY Aogon            "&#x00104;" ><!--=capital A, ogonek -->
+<!ENTITY aogon            "&#x00105;" ><!--=small a, ogonek -->
+<!ENTITY Cacute           "&#x00106;" ><!--=capital C, acute accent -->
+<!ENTITY cacute           "&#x00107;" ><!--=small c, acute accent -->
+<!ENTITY Ccaron           "&#x0010C;" ><!--=capital C, caron -->
+<!ENTITY ccaron           "&#x0010D;" ><!--=small c, caron -->
+<!ENTITY Ccirc            "&#x00108;" ><!--=capital C, circumflex accent -->
+<!ENTITY ccirc            "&#x00109;" ><!--=small c, circumflex accent -->
+<!ENTITY Cdot             "&#x0010A;" ><!--=capital C, dot above -->
+<!ENTITY cdot             "&#x0010B;" ><!--=small c, dot above -->
+<!ENTITY Dcaron           "&#x0010E;" ><!--=capital D, caron -->
+<!ENTITY dcaron           "&#x0010F;" ><!--=small d, caron -->
+<!ENTITY Dstrok           "&#x00110;" ><!--=capital D, stroke -->
+<!ENTITY dstrok           "&#x00111;" ><!--=small d, stroke -->
+<!ENTITY Ecaron           "&#x0011A;" ><!--=capital E, caron -->
+<!ENTITY ecaron           "&#x0011B;" ><!--=small e, caron -->
+<!ENTITY Edot             "&#x00116;" ><!--=capital E, dot above -->
+<!ENTITY edot             "&#x00117;" ><!--=small e, dot above -->
+<!ENTITY Emacr            "&#x00112;" ><!--=capital E, macron -->
+<!ENTITY emacr            "&#x00113;" ><!--=small e, macron -->
+<!ENTITY ENG              "&#x0014A;" ><!--=capital ENG, Lapp -->
+<!ENTITY eng              "&#x0014B;" ><!--=small eng, Lapp -->
+<!ENTITY Eogon            "&#x00118;" ><!--=capital E, ogonek -->
+<!ENTITY eogon            "&#x00119;" ><!--=small e, ogonek -->
+<!ENTITY gacute           "&#x001F5;" ><!--=small g, acute accent -->
+<!ENTITY Gbreve           "&#x0011E;" ><!--=capital G, breve -->
+<!ENTITY gbreve           "&#x0011F;" ><!--=small g, breve -->
+<!ENTITY Gcedil           "&#x00122;" ><!--=capital G, cedilla -->
+<!ENTITY Gcirc            "&#x0011C;" ><!--=capital G, circumflex accent -->
+<!ENTITY gcirc            "&#x0011D;" ><!--=small g, circumflex accent -->
+<!ENTITY Gdot             "&#x00120;" ><!--=capital G, dot above -->
+<!ENTITY gdot             "&#x00121;" ><!--=small g, dot above -->
+<!ENTITY Hcirc            "&#x00124;" ><!--=capital H, circumflex accent -->
+<!ENTITY hcirc            "&#x00125;" ><!--=small h, circumflex accent -->
+<!ENTITY Hstrok           "&#x00126;" ><!--=capital H, stroke -->
+<!ENTITY hstrok           "&#x00127;" ><!--=small h, stroke -->
+<!ENTITY Idot             "&#x00130;" ><!--=capital I, dot above -->
+<!ENTITY IJlig            "&#x00132;" ><!--=capital IJ ligature -->
+<!ENTITY ijlig            "&#x00133;" ><!--=small ij ligature -->
+<!ENTITY Imacr            "&#x0012A;" ><!--=capital I, macron -->
+<!ENTITY imacr            "&#x0012B;" ><!--=small i, macron -->
+<!ENTITY inodot           "&#x00131;" ><!--=small i without dot -->
+<!ENTITY Iogon            "&#x0012E;" ><!--=capital I, ogonek -->
+<!ENTITY iogon            "&#x0012F;" ><!--=small i, ogonek -->
+<!ENTITY Itilde           "&#x00128;" ><!--=capital I, tilde -->
+<!ENTITY itilde           "&#x00129;" ><!--=small i, tilde -->
+<!ENTITY Jcirc            "&#x00134;" ><!--=capital J, circumflex accent -->
+<!ENTITY jcirc            "&#x00135;" ><!--=small j, circumflex accent -->
+<!ENTITY Kcedil           "&#x00136;" ><!--=capital K, cedilla -->
+<!ENTITY kcedil           "&#x00137;" ><!--=small k, cedilla -->
+<!ENTITY kgreen           "&#x00138;" ><!--=small k, Greenlandic -->
+<!ENTITY Lacute           "&#x00139;" ><!--=capital L, acute accent -->
+<!ENTITY lacute           "&#x0013A;" ><!--=small l, acute accent -->
+<!ENTITY Lcaron           "&#x0013D;" ><!--=capital L, caron -->
+<!ENTITY lcaron           "&#x0013E;" ><!--=small l, caron -->
+<!ENTITY Lcedil           "&#x0013B;" ><!--=capital L, cedilla -->
+<!ENTITY lcedil           "&#x0013C;" ><!--=small l, cedilla -->
+<!ENTITY Lmidot           "&#x0013F;" ><!--=capital L, middle dot -->
+<!ENTITY lmidot           "&#x00140;" ><!--=small l, middle dot -->
+<!ENTITY Lstrok           "&#x00141;" ><!--=capital L, stroke -->
+<!ENTITY lstrok           "&#x00142;" ><!--=small l, stroke -->
+<!ENTITY Nacute           "&#x00143;" ><!--=capital N, acute accent -->
+<!ENTITY nacute           "&#x00144;" ><!--=small n, acute accent -->
+<!ENTITY napos            "&#x00149;" ><!--=small n, apostrophe -->
+<!ENTITY Ncaron           "&#x00147;" ><!--=capital N, caron -->
+<!ENTITY ncaron           "&#x00148;" ><!--=small n, caron -->
+<!ENTITY Ncedil           "&#x00145;" ><!--=capital N, cedilla -->
+<!ENTITY ncedil           "&#x00146;" ><!--=small n, cedilla -->
+<!ENTITY Odblac           "&#x00150;" ><!--=capital O, double acute accent -->
+<!ENTITY odblac           "&#x00151;" ><!--=small o, double acute accent -->
+<!ENTITY OElig            "&#x00152;" ><!--=capital OE ligature -->
+<!ENTITY oelig            "&#x00153;" ><!--=small oe ligature -->
+<!ENTITY Omacr            "&#x0014C;" ><!--=capital O, macron -->
+<!ENTITY omacr            "&#x0014D;" ><!--=small o, macron -->
+<!ENTITY Racute           "&#x00154;" ><!--=capital R, acute accent -->
+<!ENTITY racute           "&#x00155;" ><!--=small r, acute accent -->
+<!ENTITY Rcaron           "&#x00158;" ><!--=capital R, caron -->
+<!ENTITY rcaron           "&#x00159;" ><!--=small r, caron -->
+<!ENTITY Rcedil           "&#x00156;" ><!--=capital R, cedilla -->
+<!ENTITY rcedil           "&#x00157;" ><!--=small r, cedilla -->
+<!ENTITY Sacute           "&#x0015A;" ><!--=capital S, acute accent -->
+<!ENTITY sacute           "&#x0015B;" ><!--=small s, acute accent -->
+<!ENTITY Scaron           "&#x00160;" ><!--=capital S, caron -->
+<!ENTITY scaron           "&#x00161;" ><!--=small s, caron -->
+<!ENTITY Scedil           "&#x0015E;" ><!--=capital S, cedilla -->
+<!ENTITY scedil           "&#x0015F;" ><!--=small s, cedilla -->
+<!ENTITY Scirc            "&#x0015C;" ><!--=capital S, circumflex accent -->
+<!ENTITY scirc            "&#x0015D;" ><!--=small s, circumflex accent -->
+<!ENTITY Tcaron           "&#x00164;" ><!--=capital T, caron -->
+<!ENTITY tcaron           "&#x00165;" ><!--=small t, caron -->
+<!ENTITY Tcedil           "&#x00162;" ><!--=capital T, cedilla -->
+<!ENTITY tcedil           "&#x00163;" ><!--=small t, cedilla -->
+<!ENTITY Tstrok           "&#x00166;" ><!--=capital T, stroke -->
+<!ENTITY tstrok           "&#x00167;" ><!--=small t, stroke -->
+<!ENTITY Ubreve           "&#x0016C;" ><!--=capital U, breve -->
+<!ENTITY ubreve           "&#x0016D;" ><!--=small u, breve -->
+<!ENTITY Udblac           "&#x00170;" ><!--=capital U, double acute accent -->
+<!ENTITY udblac           "&#x00171;" ><!--=small u, double acute accent -->
+<!ENTITY Umacr            "&#x0016A;" ><!--=capital U, macron -->
+<!ENTITY umacr            "&#x0016B;" ><!--=small u, macron -->
+<!ENTITY Uogon            "&#x00172;" ><!--=capital U, ogonek -->
+<!ENTITY uogon            "&#x00173;" ><!--=small u, ogonek -->
+<!ENTITY Uring            "&#x0016E;" ><!--=capital U, ring -->
+<!ENTITY uring            "&#x0016F;" ><!--=small u, ring -->
+<!ENTITY Utilde           "&#x00168;" ><!--=capital U, tilde -->
+<!ENTITY utilde           "&#x00169;" ><!--=small u, tilde -->
+<!ENTITY Wcirc            "&#x00174;" ><!--=capital W, circumflex accent -->
+<!ENTITY wcirc            "&#x00175;" ><!--=small w, circumflex accent -->
+<!ENTITY Ycirc            "&#x00176;" ><!--=capital Y, circumflex accent -->
+<!ENTITY ycirc            "&#x00177;" ><!--=small y, circumflex accent -->
+<!ENTITY Yuml             "&#x00178;" ><!--=capital Y, dieresis or umlaut mark -->
+<!ENTITY Zacute           "&#x00179;" ><!--=capital Z, acute accent -->
+<!ENTITY zacute           "&#x0017A;" ><!--=small z, acute accent -->
+<!ENTITY Zcaron           "&#x0017D;" ><!--=capital Z, caron -->
+<!ENTITY zcaron           "&#x0017E;" ><!--=small z, caron -->
+<!ENTITY Zdot             "&#x0017B;" ><!--=capital Z, dot above -->
+<!ENTITY zdot             "&#x0017C;" ><!--=small z, dot above -->
diff --git a/code/lib/Bio/Entrez/DTDs/isomfrk.ent b/code/lib/Bio/Entrez/DTDs/isomfrk.ent
new file mode 100644
index 0000000..0e1a943
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isomfrk.ent
@@ -0,0 +1,75 @@
+
+<!--
+     File isomfrk.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1991
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY % plane1D  "&#38;#38;#x1D">
+
+<!ENTITY Afr              "%plane1D;504;" ><!--/frak A, upper case a -->
+<!ENTITY afr              "%plane1D;51E;" ><!--/frak a, lower case a -->
+<!ENTITY Bfr              "%plane1D;505;" ><!--/frak B, upper case b -->
+<!ENTITY bfr              "%plane1D;51F;" ><!--/frak b, lower case b -->
+<!ENTITY Cfr              "&#x0212D;" ><!--/frak C, upper case c -->
+<!ENTITY cfr              "%plane1D;520;" ><!--/frak c, lower case c -->
+<!ENTITY Dfr              "%plane1D;507;" ><!--/frak D, upper case d -->
+<!ENTITY dfr              "%plane1D;521;" ><!--/frak d, lower case d -->
+<!ENTITY Efr              "%plane1D;508;" ><!--/frak E, upper case e -->
+<!ENTITY efr              "%plane1D;522;" ><!--/frak e, lower case e -->
+<!ENTITY Ffr              "%plane1D;509;" ><!--/frak F, upper case f -->
+<!ENTITY ffr              "%plane1D;523;" ><!--/frak f, lower case f -->
+<!ENTITY Gfr              "%plane1D;50A;" ><!--/frak G, upper case g -->
+<!ENTITY gfr              "%plane1D;524;" ><!--/frak g, lower case g -->
+<!ENTITY Hfr              "&#x0210C;" ><!--/frak H, upper case h -->
+<!ENTITY hfr              "%plane1D;525;" ><!--/frak h, lower case h -->
+<!ENTITY Ifr              "&#x02111;" ><!--/frak I, upper case i -->
+<!ENTITY ifr              "%plane1D;526;" ><!--/frak i, lower case i -->
+<!ENTITY Jfr              "%plane1D;50D;" ><!--/frak J, upper case j -->
+<!ENTITY jfr              "%plane1D;527;" ><!--/frak j, lower case j -->
+<!ENTITY Kfr              "%plane1D;50E;" ><!--/frak K, upper case k -->
+<!ENTITY kfr              "%plane1D;528;" ><!--/frak k, lower case k -->
+<!ENTITY Lfr              "%plane1D;50F;" ><!--/frak L, upper case l -->
+<!ENTITY lfr              "%plane1D;529;" ><!--/frak l, lower case l -->
+<!ENTITY Mfr              "%plane1D;510;" ><!--/frak M, upper case m -->
+<!ENTITY mfr              "%plane1D;52A;" ><!--/frak m, lower case m -->
+<!ENTITY Nfr              "%plane1D;511;" ><!--/frak N, upper case n -->
+<!ENTITY nfr              "%plane1D;52B;" ><!--/frak n, lower case n -->
+<!ENTITY Ofr              "%plane1D;512;" ><!--/frak O, upper case o -->
+<!ENTITY ofr              "%plane1D;52C;" ><!--/frak o, lower case o -->
+<!ENTITY Pfr              "%plane1D;513;" ><!--/frak P, upper case p -->
+<!ENTITY pfr              "%plane1D;52D;" ><!--/frak p, lower case p -->
+<!ENTITY Qfr              "%plane1D;514;" ><!--/frak Q, upper case q -->
+<!ENTITY qfr              "%plane1D;52E;" ><!--/frak q, lower case q -->
+<!ENTITY Rfr              "&#x0211C;" ><!--/frak R, upper case r -->
+<!ENTITY rfr              "%plane1D;52F;" ><!--/frak r, lower case r -->
+<!ENTITY Sfr              "%plane1D;516;" ><!--/frak S, upper case s -->
+<!ENTITY sfr              "%plane1D;530;" ><!--/frak s, lower case s -->
+<!ENTITY Tfr              "%plane1D;517;" ><!--/frak T, upper case t -->
+<!ENTITY tfr              "%plane1D;531;" ><!--/frak t, lower case t -->
+<!ENTITY Ufr              "%plane1D;518;" ><!--/frak U, upper case u -->
+<!ENTITY ufr              "%plane1D;532;" ><!--/frak u, lower case u -->
+<!ENTITY Vfr              "%plane1D;519;" ><!--/frak V, upper case v -->
+<!ENTITY vfr              "%plane1D;533;" ><!--/frak v, lower case v -->
+<!ENTITY Wfr              "%plane1D;51A;" ><!--/frak W, upper case w -->
+<!ENTITY wfr              "%plane1D;534;" ><!--/frak w, lower case w -->
+<!ENTITY Xfr              "%plane1D;51B;" ><!--/frak X, upper case x -->
+<!ENTITY xfr              "%plane1D;535;" ><!--/frak x, lower case x -->
+<!ENTITY Yfr              "%plane1D;51C;" ><!--/frak Y, upper case y -->
+<!ENTITY yfr              "%plane1D;536;" ><!--/frak y, lower case y -->
+<!ENTITY Zfr              "&#x02128;" ><!--/frak Z, upper case z  -->
+<!ENTITY zfr              "%plane1D;537;" ><!--/frak z, lower case z -->
diff --git a/code/lib/Bio/Entrez/DTDs/isomopf.ent b/code/lib/Bio/Entrez/DTDs/isomopf.ent
new file mode 100644
index 0000000..4b26425
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isomopf.ent
@@ -0,0 +1,49 @@
+
+<!--
+     File isomopf.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1991
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY % plane1D  "&#38;#38;#x1D">
+
+<!ENTITY Aopf             "%plane1D;538;" ><!--/Bbb A, open face A -->
+<!ENTITY Bopf             "%plane1D;539;" ><!--/Bbb B, open face B -->
+<!ENTITY Copf             "&#x02102;" ><!--/Bbb C, open face C -->
+<!ENTITY Dopf             "%plane1D;53B;" ><!--/Bbb D, open face D -->
+<!ENTITY Eopf             "%plane1D;53C;" ><!--/Bbb E, open face E -->
+<!ENTITY Fopf             "%plane1D;53D;" ><!--/Bbb F, open face F -->
+<!ENTITY Gopf             "%plane1D;53E;" ><!--/Bbb G, open face G -->
+<!ENTITY Hopf             "&#x0210D;" ><!--/Bbb H, open face H -->
+<!ENTITY Iopf             "%plane1D;540;" ><!--/Bbb I, open face I -->
+<!ENTITY Jopf             "%plane1D;541;" ><!--/Bbb J, open face J -->
+<!ENTITY Kopf             "%plane1D;542;" ><!--/Bbb K, open face K  -->
+<!ENTITY Lopf             "%plane1D;543;" ><!--/Bbb L, open face L  -->
+<!ENTITY Mopf             "%plane1D;544;" ><!--/Bbb M, open face M  -->
+<!ENTITY Nopf             "&#x02115;" ><!--/Bbb N, open face N -->
+<!ENTITY Oopf             "%plane1D;546;" ><!--/Bbb O, open face O -->
+<!ENTITY Popf             "&#x02119;" ><!--/Bbb P, open face P -->
+<!ENTITY Qopf             "&#x0211A;" ><!--/Bbb Q, open face Q -->
+<!ENTITY Ropf             "&#x0211D;" ><!--/Bbb R, open face R -->
+<!ENTITY Sopf             "%plane1D;54A;" ><!--/Bbb S, open face S -->
+<!ENTITY Topf             "%plane1D;54B;" ><!--/Bbb T, open face T -->
+<!ENTITY Uopf             "%plane1D;54C;" ><!--/Bbb U, open face U -->
+<!ENTITY Vopf             "%plane1D;54D;" ><!--/Bbb V, open face V -->
+<!ENTITY Wopf             "%plane1D;54E;" ><!--/Bbb W, open face W -->
+<!ENTITY Xopf             "%plane1D;54F;" ><!--/Bbb X, open face X -->
+<!ENTITY Yopf             "%plane1D;550;" ><!--/Bbb Y, open face Y -->
+<!ENTITY Zopf             "&#x02124;" ><!--/Bbb Z, open face Z -->
diff --git a/code/lib/Bio/Entrez/DTDs/isomscr.ent b/code/lib/Bio/Entrez/DTDs/isomscr.ent
new file mode 100644
index 0000000..a2174f0
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isomscr.ent
@@ -0,0 +1,75 @@
+
+<!--
+     File isomscr.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1991
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY % plane1D  "&#38;#38;#x1D">
+
+<!ENTITY Ascr             "%plane1D;49C;" ><!--/scr A, script letter A -->
+<!ENTITY ascr             "%plane1D;4B6;" ><!--/scr a, script letter a -->
+<!ENTITY Bscr             "&#x0212C;" ><!--/scr B, script letter B -->
+<!ENTITY bscr             "%plane1D;4B7;" ><!--/scr b, script letter b -->
+<!ENTITY Cscr             "%plane1D;49E;" ><!--/scr C, script letter C -->
+<!ENTITY cscr             "%plane1D;4B8;" ><!--/scr c, script letter c -->
+<!ENTITY Dscr             "%plane1D;49F;" ><!--/scr D, script letter D -->
+<!ENTITY dscr             "%plane1D;4B9;" ><!--/scr d, script letter d -->
+<!ENTITY Escr             "&#x02130;" ><!--/scr E, script letter E -->
+<!ENTITY escr             "&#x0212F;" ><!--/scr e, script letter e -->
+<!ENTITY Fscr             "&#x02131;" ><!--/scr F, script letter F -->
+<!ENTITY fscr             "%plane1D;4BB;" ><!--/scr f, script letter f -->
+<!ENTITY Gscr             "%plane1D;4A2;" ><!--/scr G, script letter G -->
+<!ENTITY gscr             "&#x0210A;" ><!--/scr g, script letter g -->
+<!ENTITY Hscr             "&#x0210B;" ><!--/scr H, script letter H -->
+<!ENTITY hscr             "%plane1D;4BD;" ><!--/scr h, script letter h -->
+<!ENTITY Iscr             "&#x02110;" ><!--/scr I, script letter I -->
+<!ENTITY iscr             "%plane1D;4BE;" ><!--/scr i, script letter i -->
+<!ENTITY Jscr             "%plane1D;4A5;" ><!--/scr J, script letter J -->
+<!ENTITY jscr             "%plane1D;4BF;" ><!--/scr j, script letter j -->
+<!ENTITY Kscr             "%plane1D;4A6;" ><!--/scr K, script letter K -->
+<!ENTITY kscr             "%plane1D;4C0;" ><!--/scr k, script letter k -->
+<!ENTITY Lscr             "&#x02112;" ><!--/scr L, script letter L -->
+<!ENTITY lscr             "%plane1D;4C1;" ><!--/scr l, script letter l -->
+<!ENTITY Mscr             "&#x02133;" ><!--/scr M, script letter M -->
+<!ENTITY mscr             "%plane1D;4C2;" ><!--/scr m, script letter m -->
+<!ENTITY Nscr             "%plane1D;4A9;" ><!--/scr N, script letter N -->
+<!ENTITY nscr             "%plane1D;4C3;" ><!--/scr n, script letter n -->
+<!ENTITY Oscr             "%plane1D;4AA;" ><!--/scr O, script letter O -->
+<!ENTITY oscr             "&#x02134;" ><!--/scr o, script letter o -->
+<!ENTITY Pscr             "%plane1D;4AB;" ><!--/scr P, script letter P -->
+<!ENTITY pscr             "%plane1D;4C5;" ><!--/scr p, script letter p -->
+<!ENTITY Qscr             "%plane1D;4AC;" ><!--/scr Q, script letter Q -->
+<!ENTITY qscr             "%plane1D;4C6;" ><!--/scr q, script letter q -->
+<!ENTITY Rscr             "&#x0211B;" ><!--/scr R, script letter R -->
+<!ENTITY rscr             "%plane1D;4C7;" ><!--/scr r, script letter r -->
+<!ENTITY Sscr             "%plane1D;4AE;" ><!--/scr S, script letter S -->
+<!ENTITY sscr             "%plane1D;4C8;" ><!--/scr s, script letter s -->
+<!ENTITY Tscr             "%plane1D;4AF;" ><!--/scr T, script letter T -->
+<!ENTITY tscr             "%plane1D;4C9;" ><!--/scr t, script letter t -->
+<!ENTITY Uscr             "%plane1D;4B0;" ><!--/scr U, script letter U -->
+<!ENTITY uscr             "%plane1D;4CA;" ><!--/scr u, script letter u -->
+<!ENTITY Vscr             "%plane1D;4B1;" ><!--/scr V, script letter V -->
+<!ENTITY vscr             "%plane1D;4CB;" ><!--/scr v, script letter v -->
+<!ENTITY Wscr             "%plane1D;4B2;" ><!--/scr W, script letter W -->
+<!ENTITY wscr             "%plane1D;4CC;" ><!--/scr w, script letter w -->
+<!ENTITY Xscr             "%plane1D;4B3;" ><!--/scr X, script letter X -->
+<!ENTITY xscr             "%plane1D;4CD;" ><!--/scr x, script letter x -->
+<!ENTITY Yscr             "%plane1D;4B4;" ><!--/scr Y, script letter Y -->
+<!ENTITY yscr             "%plane1D;4CE;" ><!--/scr y, script letter y -->
+<!ENTITY Zscr             "%plane1D;4B5;" ><!--/scr Z, script letter Z -->
+<!ENTITY zscr             "%plane1D;4CF;" ><!--/scr z, script letter z -->
diff --git a/code/lib/Bio/Entrez/DTDs/isonum.ent b/code/lib/Bio/Entrez/DTDs/isonum.ent
new file mode 100644
index 0000000..79f4380
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isonum.ent
@@ -0,0 +1,97 @@
+
+<!--
+     File isonum.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1986
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY amp              "&#38;#38;" ><!--=ampersand -->
+<!ENTITY apos             "&#x00027;" ><!--=apostrophe -->
+<!ENTITY ast              "&#x0002A;" ><!--/ast B: =asterisk -->
+<!ENTITY brvbar           "&#x000A6;" ><!--=broken (vertical) bar -->
+<!ENTITY bsol             "&#x0005C;" ><!--/backslash =reverse solidus -->
+<!ENTITY cent             "&#x000A2;" ><!--=cent sign -->
+<!ENTITY colon            "&#x0003A;" ><!--/colon P: -->
+<!ENTITY comma            "&#x0002C;" ><!--P: =comma -->
+<!ENTITY commat           "&#x00040;" ><!--=commercial at -->
+<!ENTITY copy             "&#x000A9;" ><!--=copyright sign -->
+<!ENTITY curren           "&#x000A4;" ><!--=general currency sign -->
+<!ENTITY darr             "&#x02193;" ><!--/downarrow A: =downward arrow -->
+<!ENTITY deg              "&#x000B0;" ><!--=degree sign -->
+<!ENTITY divide           "&#x000F7;" ><!--/div B: =divide sign -->
+<!ENTITY dollar           "&#x00024;" ><!--=dollar sign -->
+<!ENTITY equals           "&#x0003D;" ><!--=equals sign R: -->
+<!ENTITY excl             "&#x00021;" ><!--=exclamation mark -->
+<!ENTITY frac12           "&#x000BD;" ><!--=fraction one-half -->
+<!ENTITY frac14           "&#x000BC;" ><!--=fraction one-quarter -->
+<!ENTITY frac18           "&#x0215B;" ><!--=fraction one-eighth -->
+<!ENTITY frac34           "&#x000BE;" ><!--=fraction three-quarters -->
+<!ENTITY frac38           "&#x0215C;" ><!--=fraction three-eighths -->
+<!ENTITY frac58           "&#x0215D;" ><!--=fraction five-eighths -->
+<!ENTITY frac78           "&#x0215E;" ><!--=fraction seven-eighths -->
+<!ENTITY gt               "&#x0003E;" ><!--=greater-than sign R: -->
+<!ENTITY half             "&#x000BD;" ><!--=fraction one-half -->
+<!ENTITY horbar           "&#x02015;" ><!--=horizontal bar -->
+<!ENTITY hyphen           "&#x02010;" ><!--=hyphen -->
+<!ENTITY iexcl            "&#x000A1;" ><!--=inverted exclamation mark -->
+<!ENTITY iquest           "&#x000BF;" ><!--=inverted question mark -->
+<!ENTITY laquo            "&#x000AB;" ><!--=angle quotation mark, left -->
+<!ENTITY larr             "&#x02190;" ><!--/leftarrow /gets A: =leftward arrow -->
+<!ENTITY lcub             "&#x0007B;" ><!--/lbrace O: =left curly bracket -->
+<!ENTITY ldquo            "&#x0201C;" ><!--=double quotation mark, left -->
+<!ENTITY lowbar           "&#x0005F;" ><!--=low line -->
+<!ENTITY lpar             "&#x00028;" ><!--O: =left parenthesis -->
+<!ENTITY lsqb             "&#x0005B;" ><!--/lbrack O: =left square bracket -->
+<!ENTITY lsquo            "&#x02018;" ><!--=single quotation mark, left -->
+<!ENTITY lt               "&#38;#60;" ><!--=less-than sign R: -->
+<!ENTITY micro            "&#x000B5;" ><!--=micro sign -->
+<!ENTITY middot           "&#x000B7;" ><!--/centerdot B: =middle dot -->
+<!ENTITY nbsp             "&#x000A0;" ><!--=no break (required) space -->
+<!ENTITY not              "&#x000AC;" ><!--/neg /lnot =not sign -->
+<!ENTITY num              "&#x00023;" ><!--=number sign -->
+<!ENTITY ohm              "&#x02126;" ><!--=ohm sign -->
+<!ENTITY ordf             "&#x000AA;" ><!--=ordinal indicator, feminine -->
+<!ENTITY ordm             "&#x000BA;" ><!--=ordinal indicator, masculine -->
+<!ENTITY para             "&#x000B6;" ><!--=pilcrow (paragraph sign) -->
+<!ENTITY percnt           "&#x00025;" ><!--=percent sign -->
+<!ENTITY period           "&#x0002E;" ><!--=full stop, period -->
+<!ENTITY plus             "&#x0002B;" ><!--=plus sign B: -->
+<!ENTITY plusmn           "&#x000B1;" ><!--/pm B: =plus-or-minus sign -->
+<!ENTITY pound            "&#x000A3;" ><!--=pound sign -->
+<!ENTITY quest            "&#x0003F;" ><!--=question mark -->
+<!ENTITY quot             "&#x00022;" ><!--=quotation mark -->
+<!ENTITY raquo            "&#x000BB;" ><!--=angle quotation mark, right -->
+<!ENTITY rarr             "&#x02192;" ><!--/rightarrow /to A: =rightward arrow -->
+<!ENTITY rcub             "&#x0007D;" ><!--/rbrace C: =right curly bracket -->
+<!ENTITY rdquo            "&#x0201D;" ><!--=double quotation mark, right -->
+<!ENTITY reg              "&#x000AE;" ><!--/circledR =registered sign -->
+<!ENTITY rpar             "&#x00029;" ><!--C: =right parenthesis -->
+<!ENTITY rsqb             "&#x0005D;" ><!--/rbrack C: =right square bracket -->
+<!ENTITY rsquo            "&#x02019;" ><!--=single quotation mark, right -->
+<!ENTITY sect             "&#x000A7;" ><!--=section sign -->
+<!ENTITY semi             "&#x0003B;" ><!--=semicolon P: -->
+<!ENTITY shy              "&#x000AD;" ><!--=soft hyphen -->
+<!ENTITY sol              "&#x0002F;" ><!--=solidus -->
+<!ENTITY sung             "&#x0266A;" ><!--=music note (sung text sign) -->
+<!ENTITY sup1             "&#x000B9;" ><!--=superscript one -->
+<!ENTITY sup2             "&#x000B2;" ><!--=superscript two -->
+<!ENTITY sup3             "&#x000B3;" ><!--=superscript three -->
+<!ENTITY times            "&#x000D7;" ><!--/times B: =multiply sign -->
+<!ENTITY trade            "&#x02122;" ><!--=trade mark sign -->
+<!ENTITY uarr             "&#x02191;" ><!--/uparrow A: =upward arrow -->
+<!ENTITY verbar           "&#x0007C;" ><!--/vert =vertical bar -->
+<!ENTITY yen              "&#x000A5;" ><!--/yen =yen sign -->
diff --git a/code/lib/Bio/Entrez/DTDs/isopub.ent b/code/lib/Bio/Entrez/DTDs/isopub.ent
new file mode 100644
index 0000000..9b27b63
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isopub.ent
@@ -0,0 +1,105 @@
+
+<!--
+     File isopub.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1986
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY blank            "&#x02423;" ><!--=significant blank symbol -->
+<!ENTITY blk12            "&#x02592;" ><!--=50% shaded block -->
+<!ENTITY blk14            "&#x02591;" ><!--=25% shaded block -->
+<!ENTITY blk34            "&#x02593;" ><!--=75% shaded block -->
+<!ENTITY block            "&#x02588;" ><!--=full block -->
+<!ENTITY bull             "&#x02022;" ><!--/bullet B: =round bullet, filled -->
+<!ENTITY caret            "&#x02041;" ><!--=caret (insertion mark) -->
+<!ENTITY check            "&#x02713;" ><!--/checkmark =tick, check mark -->
+<!ENTITY cir              "&#x025CB;" ><!--/circ B: =circle, open -->
+<!ENTITY clubs            "&#x02663;" ><!--/clubsuit =club suit symbol  -->
+<!ENTITY copysr           "&#x02117;" ><!--=sound recording copyright sign -->
+<!ENTITY cross            "&#x02717;" ><!--=ballot cross -->
+<!ENTITY Dagger           "&#x02021;" ><!--/ddagger B: =double dagger -->
+<!ENTITY dagger           "&#x02020;" ><!--/dagger B: =dagger -->
+<!ENTITY dash             "&#x02010;" ><!--=hyphen (true graphic) -->
+<!ENTITY diams            "&#x02666;" ><!--/diamondsuit =diamond suit symbol  -->
+<!ENTITY dlcrop           "&#x0230D;" ><!--downward left crop mark  -->
+<!ENTITY drcrop           "&#x0230C;" ><!--downward right crop mark  -->
+<!ENTITY dtri             "&#x025BF;" ><!--/triangledown =down triangle, open -->
+<!ENTITY dtrif            "&#x025BE;" ><!--/blacktriangledown =dn tri, filled -->
+<!ENTITY emsp             "&#x02003;" ><!--=em space -->
+<!ENTITY emsp13           "&#x02004;" ><!--=1/3-em space -->
+<!ENTITY emsp14           "&#x02005;" ><!--=1/4-em space -->
+<!ENTITY ensp             "&#x02002;" ><!--=en space (1/2-em) -->
+<!ENTITY female           "&#x02640;" ><!--=female symbol -->
+<!ENTITY ffilig           "&#x0FB03;" ><!--small ffi ligature -->
+<!ENTITY fflig            "&#x0FB00;" ><!--small ff ligature -->
+<!ENTITY ffllig           "&#x0FB04;" ><!--small ffl ligature -->
+<!ENTITY filig            "&#x0FB01;" ><!--small fi ligature -->
+<!ENTITY flat             "&#x0266D;" ><!--/flat =musical flat -->
+<!ENTITY fllig            "&#x0FB02;" ><!--small fl ligature -->
+<!ENTITY frac13           "&#x02153;" ><!--=fraction one-third -->
+<!ENTITY frac15           "&#x02155;" ><!--=fraction one-fifth -->
+<!ENTITY frac16           "&#x02159;" ><!--=fraction one-sixth -->
+<!ENTITY frac23           "&#x02154;" ><!--=fraction two-thirds -->
+<!ENTITY frac25           "&#x02156;" ><!--=fraction two-fifths -->
+<!ENTITY frac35           "&#x02157;" ><!--=fraction three-fifths -->
+<!ENTITY frac45           "&#x02158;" ><!--=fraction four-fifths -->
+<!ENTITY frac56           "&#x0215A;" ><!--=fraction five-sixths -->
+<!ENTITY hairsp           "&#x0200A;" ><!--=hair space -->
+<!ENTITY hearts           "&#x02665;" ><!--/heartsuit =heart suit symbol -->
+<!ENTITY hellip           "&#x02026;" ><!--=ellipsis (horizontal) -->
+<!ENTITY hybull           "&#x02043;" ><!--rectangle, filled (hyphen bullet) -->
+<!ENTITY incare           "&#x02105;" ><!--=in-care-of symbol -->
+<!ENTITY ldquor           "&#x0201E;" ><!--=rising dbl quote, left (low) -->
+<!ENTITY lhblk            "&#x02584;" ><!--=lower half block -->
+<!ENTITY loz              "&#x025CA;" ><!--/lozenge - lozenge or total mark -->
+<!ENTITY lozf             "&#x029EB;" ><!--/blacklozenge - lozenge, filled -->
+<!ENTITY lsquor           "&#x0201A;" ><!--=rising single quote, left (low) -->
+<!ENTITY ltri             "&#x025C3;" ><!--/triangleleft B: l triangle, open -->
+<!ENTITY ltrif            "&#x025C2;" ><!--/blacktriangleleft R: =l tri, filled -->
+<!ENTITY male             "&#x02642;" ><!--=male symbol -->
+<!ENTITY malt             "&#x02720;" ><!--/maltese =maltese cross -->
+<!ENTITY marker           "&#x025AE;" ><!--=histogram marker -->
+<!ENTITY mdash            "&#x02014;" ><!--=em dash  -->
+<!ENTITY mldr             "&#x02026;" ><!--em leader -->
+<!ENTITY natur            "&#x0266E;" ><!--/natural - music natural -->
+<!ENTITY ndash            "&#x02013;" ><!--=en dash -->
+<!ENTITY nldr             "&#x02025;" ><!--=double baseline dot (en leader) -->
+<!ENTITY numsp            "&#x02007;" ><!--=digit space (width of a number) -->
+<!ENTITY phone            "&#x0260E;" ><!--=telephone symbol  -->
+<!ENTITY puncsp           "&#x02008;" ><!--=punctuation space (width of comma) -->
+<!ENTITY rdquor           "&#x0201D;" ><!--rising dbl quote, right (high) -->
+<!ENTITY rect             "&#x025AD;" ><!--=rectangle, open -->
+<!ENTITY rsquor           "&#x02019;" ><!--rising single quote, right (high) -->
+<!ENTITY rtri             "&#x025B9;" ><!--/triangleright B: r triangle, open -->
+<!ENTITY rtrif            "&#x025B8;" ><!--/blacktriangleright R: =r tri, filled -->
+<!ENTITY rx               "&#x0211E;" ><!--pharmaceutical prescription (Rx) -->
+<!ENTITY sext             "&#x02736;" ><!--sextile (6-pointed star) -->
+<!ENTITY sharp            "&#x0266F;" ><!--/sharp =musical sharp -->
+<!ENTITY spades           "&#x02660;" ><!--/spadesuit =spades suit symbol  -->
+<!ENTITY squ              "&#x025A1;" ><!--=square, open -->
+<!ENTITY squf             "&#x025AA;" ><!--/blacksquare =sq bullet, filled -->
+<!ENTITY star             "&#x02606;" ><!--=star, open -->
+<!ENTITY starf            "&#x02605;" ><!--/bigstar - star, filled  -->
+<!ENTITY target           "&#x02316;" ><!--register mark or target -->
+<!ENTITY telrec           "&#x02315;" ><!--=telephone recorder symbol -->
+<!ENTITY thinsp           "&#x02009;" ><!--=thin space (1/6-em) -->
+<!ENTITY uhblk            "&#x02580;" ><!--=upper half block -->
+<!ENTITY ulcrop           "&#x0230F;" ><!--upward left crop mark  -->
+<!ENTITY urcrop           "&#x0230E;" ><!--upward right crop mark  -->
+<!ENTITY utri             "&#x025B5;" ><!--/triangle =up triangle, open -->
+<!ENTITY utrif            "&#x025B4;" ><!--/blacktriangle =up tri, filled -->
+<!ENTITY vellip           "&#x022EE;" ><!--vertical ellipsis -->
diff --git a/code/lib/Bio/Entrez/DTDs/isotech.ent b/code/lib/Bio/Entrez/DTDs/isotech.ent
new file mode 100644
index 0000000..d94c775
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/isotech.ent
@@ -0,0 +1,182 @@
+
+<!--
+     File isotech.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     Entity names in this file are derived from files carrying the
+     following notice:
+
+     (C) International Organization for Standardization 1991
+     Permission to copy in any form is granted for use with
+     conforming SGML systems and applications as defined in
+     ISO 8879, provided this notice is included in all copies.
+
+-->
+
+<!ENTITY acd              "&#x0223F;" ><!--ac current -->
+<!ENTITY aleph            "&#x02135;" ><!--/aleph aleph, Hebrew -->
+<!ENTITY And              "&#x02A53;" ><!--dbl logical and -->
+<!ENTITY and              "&#x02227;" ><!--/wedge /land B: logical and -->
+<!ENTITY andand           "&#x02A55;" ><!--two logical and -->
+<!ENTITY andd             "&#x02A5C;" ><!--and, horizontal dash -->
+<!ENTITY andslope         "&#x02A58;" ><!--sloping large and -->
+<!ENTITY andv             "&#x02A5A;" ><!--and with middle stem -->
+<!ENTITY angrt            "&#x0221F;" ><!--right (90 degree) angle -->
+<!ENTITY angsph           "&#x02222;" ><!--/sphericalangle angle-spherical -->
+<!ENTITY angst            "&#x0212B;" ><!--Angstrom capital A, ring -->
+<!ENTITY ap               "&#x02248;" ><!--/approx R: approximate -->
+<!ENTITY apacir           "&#x02A6F;" ><!--approximate, circumflex accent -->
+<!ENTITY awconint         "&#x02233;" ><!--contour integral, anti-clockwise -->
+<!ENTITY awint            "&#x02A11;" ><!--anti clock-wise integration -->
+<!ENTITY becaus           "&#x02235;" ><!--/because R: because -->
+<!ENTITY bernou           "&#x0212C;" ><!--Bernoulli function (script capital B)  -->
+<!ENTITY bne              "&#x0003D;&#x020E5;" ><!--reverse not equal -->
+<!ENTITY bnequiv          "&#x02261;&#x020E5;" ><!--reverse not equivalent -->
+<!ENTITY bNot             "&#x02AED;" ><!--reverse not with two horizontal strokes -->
+<!ENTITY bnot             "&#x02310;" ><!--reverse not -->
+<!ENTITY bottom           "&#x022A5;" ><!--/bot bottom -->
+<!ENTITY cap              "&#x02229;" ><!--/cap B: intersection -->
+<!ENTITY Cconint          "&#x02230;" ><!--triple contour integral operator -->
+<!ENTITY cirfnint         "&#x02A10;" ><!--circulation function -->
+<!ENTITY compfn           "&#x02218;" ><!--/circ B: composite function (small circle) -->
+<!ENTITY cong             "&#x02245;" ><!--/cong R: congruent with -->
+<!ENTITY Conint           "&#x0222F;" ><!--double contour integral operator -->
+<!ENTITY conint           "&#x0222E;" ><!--/oint L: contour integral operator -->
+<!ENTITY ctdot            "&#x022EF;" ><!--/cdots, three dots, centered -->
+<!ENTITY cup              "&#x0222A;" ><!--/cup B: union or logical sum -->
+<!ENTITY cwconint         "&#x02232;" ><!--contour integral, clockwise -->
+<!ENTITY cwint            "&#x02231;" ><!--clockwise integral -->
+<!ENTITY cylcty           "&#x0232D;" ><!--cylindricity -->
+<!ENTITY disin            "&#x022F2;" ><!--set membership, long horizontal stroke -->
+<!ENTITY Dot              "&#x000A8;" ><!--dieresis or umlaut mark -->
+<!ENTITY DotDot           "&#x020DC;" ><!--four dots above -->
+<!ENTITY dsol             "&#x029F6;" ><!--solidus, bar above -->
+<!ENTITY dtdot            "&#x022F1;" ><!--/ddots, three dots, descending -->
+<!ENTITY dwangle          "&#x029A6;" ><!--large downward pointing angle -->
+<!ENTITY elinters         "&#x0FFFD;" ><!--electrical intersection -->
+<!ENTITY epar             "&#x022D5;" ><!--parallel, equal; equal or parallel -->
+<!ENTITY eparsl           "&#x029E3;" ><!--parallel, slanted, equal; homothetically congruent to -->
+<!ENTITY equiv            "&#x02261;" ><!--/equiv R: identical with -->
+<!ENTITY eqvparsl         "&#x029E5;" ><!--equivalent, equal; congruent and parallel -->
+<!ENTITY exist            "&#x02203;" ><!--/exists at least one exists -->
+<!ENTITY fltns            "&#x025B1;" ><!--flatness -->
+<!ENTITY fnof             "&#x00192;" ><!--function of (italic small f) -->
+<!ENTITY forall           "&#x02200;" ><!--/forall for all -->
+<!ENTITY fpartint         "&#x02A0D;" ><!--finite part integral -->
+<!ENTITY ge               "&#x02265;" ><!--/geq /ge R: greater-than-or-equal -->
+<!ENTITY hamilt           "&#x0210B;" ><!--Hamiltonian (script capital H)  -->
+<!ENTITY iff              "&#x021D4;" ><!--/iff if and only if  -->
+<!ENTITY iinfin           "&#x029DC;" ><!--infinity sign, incomplete -->
+<!ENTITY imped            "&#x001B5;" ><!--impedance -->
+<!ENTITY infin            "&#x0221E;" ><!--/infty infinity -->
+<!ENTITY infintie         "&#x029DD;" ><!--tie, infinity -->
+<!ENTITY Int              "&#x0222C;" ><!--double integral operator -->
+<!ENTITY int              "&#x0222B;" ><!--/int L: integral operator -->
+<!ENTITY intlarhk         "&#x02A17;" ><!--integral, left arrow with hook -->
+<!ENTITY isin             "&#x02208;" ><!--/in R: set membership  -->
+<!ENTITY isindot          "&#x022F5;" ><!--set membership, dot above -->
+<!ENTITY isinE            "&#x022F9;" ><!--set membership, two horizontal strokes -->
+<!ENTITY isins            "&#x022F4;" ><!--set membership, vertical bar on horizontal stroke -->
+<!ENTITY isinsv           "&#x022F3;" ><!--large set membership, vertical bar on horizontal stroke -->
+<!ENTITY isinv            "&#x02208;" ><!--set membership, variant -->
+<!ENTITY lagran           "&#x02112;" ><!--Lagrangian (script capital L)  -->
+<!ENTITY Lang             "&#x0300A;" ><!--left angle bracket, double -->
+<!ENTITY lang             "&#x02329;" ><!--/langle O: left angle bracket -->
+<!ENTITY lArr             "&#x021D0;" ><!--/Leftarrow A: is implied by -->
+<!ENTITY lbbrk            "&#x03014;" ><!--left broken bracket -->
+<!ENTITY le               "&#x02264;" ><!--/leq /le R: less-than-or-equal -->
+<!ENTITY loang            "&#x03018;" ><!--left open angular bracket -->
+<!ENTITY lobrk            "&#x0301A;" ><!--left open bracket -->
+<!ENTITY lopar            "&#x02985;" ><!--left open parenthesis -->
+<!ENTITY lowast           "&#x02217;" ><!--low asterisk -->
+<!ENTITY minus            "&#x02212;" ><!--B: minus sign -->
+<!ENTITY mnplus           "&#x02213;" ><!--/mp B: minus-or-plus sign -->
+<!ENTITY nabla            "&#x02207;" ><!--/nabla del, Hamilton operator -->
+<!ENTITY ne               "&#x02260;" ><!--/ne /neq R: not equal -->
+<!ENTITY nedot            "&#x02250;&#x00338;" ><!--not equal, dot -->
+<!ENTITY nhpar            "&#x02AF2;" ><!--not, horizontal, parallel -->
+<!ENTITY ni               "&#x0220B;" ><!--/ni /owns R: contains -->
+<!ENTITY nis              "&#x022FC;" ><!--contains, vertical bar on horizontal stroke -->
+<!ENTITY nisd             "&#x022FA;" ><!--contains, long horizontal stroke -->
+<!ENTITY niv              "&#x0220B;" ><!--contains, variant -->
+<!ENTITY Not              "&#x02AEC;" ><!--not with two horizontal strokes -->
+<!ENTITY notin            "&#x02209;" ><!--/notin N: negated set membership -->
+<!ENTITY notindot         "&#x022F5;&#x00338;" ><!--negated set membership, dot above -->
+<!ENTITY notinE           "&#x022F9;&#x00338;" ><!--negated set membership, two horizontal strokes -->
+<!ENTITY notinva          "&#x02209;" ><!--negated set membership, variant -->
+<!ENTITY notinvb          "&#x022F7;" ><!--negated set membership, variant -->
+<!ENTITY notinvc          "&#x022F6;" ><!--negated set membership, variant -->
+<!ENTITY notni            "&#x0220C;" ><!--negated contains -->
+<!ENTITY notniva          "&#x0220C;" ><!--negated contains, variant -->
+<!ENTITY notnivb          "&#x022FE;" ><!--contains, variant -->
+<!ENTITY notnivc          "&#x022FD;" ><!--contains, variant -->
+<!ENTITY nparsl           "&#x02AFD;&#x020E5;" ><!--not parallel, slanted -->
+<!ENTITY npart            "&#x02202;&#x00338;" ><!--not partial differential -->
+<!ENTITY npolint          "&#x02A14;" ><!--line integration, not including the pole -->
+<!ENTITY nvinfin          "&#x029DE;" ><!--not, vert, infinity -->
+<!ENTITY olcross          "&#x029BB;" ><!--circle, cross -->
+<!ENTITY Or               "&#x02A54;" ><!--dbl logical or -->
+<!ENTITY or               "&#x02228;" ><!--/vee /lor B: logical or -->
+<!ENTITY ord              "&#x02A5D;" ><!--or, horizontal dash -->
+<!ENTITY order            "&#x02134;" ><!--order of (script small o)  -->
+<!ENTITY oror             "&#x02A56;" ><!--two logical or -->
+<!ENTITY orslope          "&#x02A57;" ><!--sloping large or -->
+<!ENTITY orv              "&#x02A5B;" ><!--or with middle stem -->
+<!ENTITY par              "&#x02225;" ><!--/parallel R: parallel -->
+<!ENTITY parsl            "&#x02AFD;" ><!--parallel, slanted -->
+<!ENTITY part             "&#x02202;" ><!--/partial partial differential -->
+<!ENTITY permil           "&#x02030;" ><!--per thousand -->
+<!ENTITY perp             "&#x022A5;" ><!--/perp R: perpendicular -->
+<!ENTITY pertenk          "&#x02031;" ><!--per 10 thousand -->
+<!ENTITY phmmat           "&#x02133;" ><!--physics M-matrix (script capital M)  -->
+<!ENTITY pointint         "&#x02A15;" ><!--integral around a point operator -->
+<!ENTITY Prime            "&#x02033;" ><!--double prime or second -->
+<!ENTITY prime            "&#x02032;" ><!--/prime prime or minute -->
+<!ENTITY profalar         "&#x0232E;" ><!--all-around profile -->
+<!ENTITY profline         "&#x02312;" ><!--profile of a line -->
+<!ENTITY profsurf         "&#x02313;" ><!--profile of a surface -->
+<!ENTITY prop             "&#x0221D;" ><!--/propto R: is proportional to -->
+<!ENTITY qint             "&#x02A0C;" ><!--/iiiint quadruple integral operator -->
+<!ENTITY qprime           "&#x02057;" ><!--quadruple prime -->
+<!ENTITY quatint          "&#x02A16;" ><!--quaternion integral operator -->
+<!ENTITY radic            "&#x0221A;" ><!--/surd radical -->
+<!ENTITY Rang             "&#x0300B;" ><!--right angle bracket, double -->
+<!ENTITY rang             "&#x0232A;" ><!--/rangle C: right angle bracket -->
+<!ENTITY rArr             "&#x021D2;" ><!--/Rightarrow A: implies -->
+<!ENTITY rbbrk            "&#x03015;" ><!--right broken bracket -->
+<!ENTITY roang            "&#x03019;" ><!--right open angular bracket -->
+<!ENTITY robrk            "&#x0301B;" ><!--right open bracket -->
+<!ENTITY ropar            "&#x02986;" ><!--right open parenthesis -->
+<!ENTITY rppolint         "&#x02A12;" ><!--line integration, rectangular path around pole -->
+<!ENTITY scpolint         "&#x02A13;" ><!--line integration, semi-circular path around pole -->
+<!ENTITY sim              "&#x0223C;" ><!--/sim R: similar -->
+<!ENTITY simdot           "&#x02A6A;" ><!--similar, dot -->
+<!ENTITY sime             "&#x02243;" ><!--/simeq R: similar, equals -->
+<!ENTITY smeparsl         "&#x029E4;" ><!--similar, parallel, slanted, equal -->
+<!ENTITY square           "&#x025A1;" ><!--/square, square -->
+<!ENTITY squarf           "&#x025AA;" ><!--/blacksquare, square, filled  -->
+<!ENTITY strns            "&#x000AF;" ><!--straightness -->
+<!ENTITY sub              "&#x02282;" ><!--/subset R: subset or is implied by -->
+<!ENTITY sube             "&#x02286;" ><!--/subseteq R: subset, equals -->
+<!ENTITY sup              "&#x02283;" ><!--/supset R: superset or implies -->
+<!ENTITY supe             "&#x02287;" ><!--/supseteq R: superset, equals -->
+<!ENTITY tdot             "&#x020DB;" ><!--three dots above -->
+<!ENTITY there4           "&#x02234;" ><!--/therefore R: therefore -->
+<!ENTITY tint             "&#x0222D;" ><!--/iiint triple integral operator -->
+<!ENTITY top              "&#x022A4;" ><!--/top top -->
+<!ENTITY topbot           "&#x02336;" ><!--top and bottom -->
+<!ENTITY topcir           "&#x02AF1;" ><!--top, circle below -->
+<!ENTITY tprime           "&#x02034;" ><!--triple prime -->
+<!ENTITY utdot            "&#x022F0;" ><!--three dots, ascending -->
+<!ENTITY uwangle          "&#x029A7;" ><!--large upward pointing angle -->
+<!ENTITY vangrt           "&#x0299C;" ><!--right angle, variant -->
+<!ENTITY veeeq            "&#x0225A;" ><!--logical or, equals -->
+<!ENTITY Verbar           "&#x02016;" ><!--/Vert dbl vertical bar -->
+<!ENTITY wedgeq           "&#x02259;" ><!--/wedgeq R: corresponds to (wedge, equals) -->
+<!ENTITY xnis             "&#x022FB;" ><!--large contains, vertical bar on horizontal stroke -->
diff --git a/code/lib/Bio/Entrez/DTDs/journalmeta.ent b/code/lib/Bio/Entrez/DTDs/journalmeta.ent
new file mode 100644
index 0000000..c615e2f
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/journalmeta.ent
@@ -0,0 +1,341 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Journal Metadata Elements                         -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Journal Metadata Elements v2.0 20040830//EN"
+     Delivered as file "journalmeta.ent"                           -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Names all elements used to describe the journal   -->
+<!--             in which the journal article is published.        -->
+<!--                                                               -->
+<!-- CONTAINS:   1. Default values for attribute lists             -->
+<!--             2. Journal metadata content model                 -->
+<!--             3. Journal metadata elements in the order used in -->
+<!--                the journal metadata element                   -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+                        
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)     
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  5. PUBLISHER - Moved element here from this module to the
+     %common.ent; module since Book DTD also needed it and was 
+     not including this module. 
+     
+  4. COMPLETE MODELS WHEN OVER-RIDING A MODEL 
+     (for all Parameter Entities suffixed "-model")
+     ### Customization Alert ###
+     Made all the model over-rides consistent. Some included
+     the outer parentheses, some did not. They all do now,
+     -  %journal-meta-model; 
+
+  3. CUSTOM METADATA - Added the new <custom-meta> element, its
+     components, and its wrapper. This element is used to insert
+     name/value pairs for metadata elements that are in source
+     material but were never envisioned by this DTD. Allowed this
+     element at the end of <journal-meta>.
+
+  2. FPI - Updated public identifier to "v2.0 20040830" 
+
+     =============================================================
+     Version 1.1                           (TRG) v1.1 (2003-11-01)
+     
+  1. Created element <journal-title> and added it to parameter 
+     entity %journal-meta-model;  
+     Rationale: To include full name of journal as part of 
+     journal metadata.
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module, 
+                        usually accomplished in the Customization
+                        Module for the specific DTD:
+                           -%just-rendition;                       -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DEFAULTS FOR ATTRIBUTE LISTS               -->
+<!-- ============================================================= -->
+
+
+<!--                    JOURNAL IDENTIFIER ATTRIBUTES              -->
+<!--                    Attribute list for journal identifier 
+                        <journal-id> element                       -->
+<!--                    Indicates whose identifier this is, for
+                        example, "pub-id" for a publisher's
+                        identifier or "pmc" for PubMed Central.
+                        Suggested values include:
+                          archive Identifier assigned by an archive
+                                  or other repository
+                          aggregator
+                                 Identifier assigned by a data
+                                 aggregator
+                          doi    Digital Object Identifier for the
+                                 entire journal, not just for the
+                                 article (rare)
+                          index  Identifier assigned by an
+                                 abstracting or indexing service
+                          pmc    Identifier assigned by PubMed Central
+                                 for example, the pmc journal 
+                                 abbreviation such as "pnas", "mbc", 
+                                 "nar", "molcellb", which may be the 
+                                 same as the abbreviated journal 
+                                 title
+                          publisher-id 
+                                 Identifier assigned by the content
+                                 publisher, for example, "MOLEC", 
+                                 "MOLCEL"
+                          nlm-ta Identifier assigned by the 
+                                 PubMed/Medline, and is typically
+                                 the journal abbreviation, for 
+                                 example, "Mol Biol Cell", "Nucleic
+                                 Acids Res", which may be the
+                                 same as the abbreviated journal 
+                                 title.                            -->
+<!ENTITY %  journal-id-atts
+             "journal-id-type  
+                        CDATA                            #IMPLIED  " > 
+
+
+<!--         abbrev-type  
+                        The "type" attribute holds the name of the
+                        authority (if known) that defined the
+                        abbreviation.  For example, "medline" for
+                        the medline abbreviation, "publisher" for
+                        a publisher's abbreviation, etc.           -->
+
+
+<!--                    ABBREVIATED JOURNAL TITLE ATTRIBUTES       -->
+<!--                    Attribute list for Abbreviated Journal Title 
+                        <abbrev-journal-title> element             -->
+<!ENTITY %  abbrev-journal-title-atts
+             "abbrev-type  
+                        CDATA                              #IMPLIED" >
+
+
+<!-- ============================================================= -->
+<!--                    JOURNAL METADATA                           -->
+<!-- ============================================================= -->
+
+
+<!--                    JOURNAL METADATA MODEL                     -->
+<!--                    Content model for the journal metadata
+                        element <journal-meta>                     -->
+<!ENTITY % journal-meta-model  
+                        "(journal-id*, journal-title*, 
+                          abbrev-journal-title*, issn*, 
+                          publisher?, notes?, custom-meta-wrap?)"    >
+
+
+<!--                    JOURNAL METADATA                           -->
+<!--                    Metadata that identifies the journal in which
+                        the article was published                  -->
+<!ELEMENT  journal-meta %journal-meta-model;                         >
+  
+
+<!--ELEM   notes        Defined in %common.ent"                    -->
+<!--ELEM   issn         Defined in %common.ent"                    -->
+
+
+<!-- ============================================================= -->
+<!--                    JOURNAL METADATA ELEMENTS                  -->
+<!-- ============================================================= -->
+
+
+<!--                    JOURNAL IDENTIFIER                         -->
+<!--                    Short code that represents the journal; used
+                        as an alternative to or short form of the
+                        journal title; used for identification of 
+                        the journal domain.
+                        Authoring Note (PMC-only):
+                        The PMC identifiers come from an 
+                        authorized list, so that each journal 
+                        code is unique in the PMC system. The PMC 
+                        identifier is the PMC short abbreviation of 
+                        the journal title, used to identify the
+                        journal domain.                            -->
+<!ELEMENT  journal-id   (#PCDATA)                                    >
+<!--         journal-id-type   
+                        Indicates whose identifier this is, for
+                        example, "pub-id" for a publisher's
+                        identifier or "pmc" for PubMed Central.
+                        Suggested values include:
+                          archive Identifier assigned by an archive
+                                  or other repository
+                          aggregator
+                                 Identifier assigned by a data
+                                 aggregator
+                          doi    Digital Object Identifier for the
+                                 entire journal, not just for the
+                                 article (rare)
+                          index  Identifier assigned by an
+                                 abstracting or indexing service
+                          pmc    Identifier assigned by PubMed Central
+                                 for example, the pmc journal 
+                                 abbreviation such as "pnas", "mbc", 
+                                 "nar", "molcellb", which may be the 
+                                 same as the abbreviated journal 
+                                 title
+                          publisher-id 
+                                 Identifier assigned by the content
+                                 publisher, for example, "MOLEC", 
+                                 "MOLCEL"
+                          nlm-ta Identifier assigned by the 
+                                 PubMed/Medline, and is typically
+                                 the journal abbreviation, for 
+                                 example, "Mol Biol Cell", "Nucleic
+                                 Acids Res", which may be the
+                                 same as the abbreviated journal 
+                                 title.                            -->
+<!ATTLIST  journal-id
+             %journal-id-atts;                                       >
+
+
+<!--                    JOURNAL TITLE ELEMENTS                     -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <journal-title>
+                        Design Note: This PE begins with an OR
+                        bar because %just-rendition; begins with an
+                        OR bar.                                    -->
+<!ENTITY % journal-title-elements
+                        "%just-rendition;"                           >
+
+
+<!--                    JOURNAL TITLE (FULL)                       -->
+<!--                    The title of the journal in which the
+                        article is published.                      -->
+<!ELEMENT  journal-title 
+                        (#PCDATA %journal-title-elements;)*          >
+
+
+<!--                    ABBREVIATED JOURNAL TITLE ELEMENTS         -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <abbrev-journal-title> 
+                        Design Note: This PE begins with an OR
+                        bar because %just-rendition; begins with an
+                        OR bar.                                    -->
+<!ENTITY % abbrev-journal-title-elements
+                        "%just-rendition;"                           >
+
+
+<!--                    ABBREVIATED JOURNAL TITLE                  -->
+<!--                    A short form of the title of the journal
+                        in which the article is published.         -->
+<!ELEMENT  abbrev-journal-title 
+                        (#PCDATA %abbrev-journal-title-elements;)*   >
+<!--         abbrev-type  
+                        The "type" attribute holds the name of the
+                        authority (if known) that defined the
+                        abbreviation.  For example, "medline" for
+                        the medline abbreviation, "publisher" for
+                        a publisher's abbreviation, etc.           -->
+<!ATTLIST  abbrev-journal-title
+             %abbrev-journal-title-atts;                             >
+
+<!--ELEM   publisher    Defined in %common.ent"                    -->
+<!--ELEM   publisher-name      
+                        Defined in %common.ent"                    -->
+<!--ELEM   publisher-loc
+                        Defined in %common.ent"                    -->
+
+
+<!-- ================== End Journal Metadata Elements  =========== -->
diff --git a/code/lib/Bio/Entrez/DTDs/link.ent b/code/lib/Bio/Entrez/DTDs/link.ent
new file mode 100644
index 0000000..5481464
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/link.ent
@@ -0,0 +1,510 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Link Element Classes                              -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Link Class Elements v2.0 20040830//EN"
+     Delivered as file "link.ent"                                  -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Names all elements in the link class. These are   --> 
+<!--             elements that are links (internal or external)    -->
+<!--             by definition, such as URLs <uri> and             -->
+<!--             Cross(X)-references <xref>.                       -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Default definitions of the link classes        -->
+<!--             2) Default definitions for attribute values       -->
+<!--             3) Default definitions for attribute lists        -->
+<!--             4) Models for the link class elements, first      -->
+<!--             internal links then external links                -->
+<!--             (alphabetically)                                  -->
+<!--                                                               -->
+<!--             (Note: Links make their links using the XLink     -->
+<!--             global attributes. Many elements besides          -->
+<!--             explicit links may be made into links using       -->
+<!--             these attributes. Such elements are defined in    -->
+<!--             the module appropriate to their use and are not   -->
+<!--             listed here.)                                     -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+          
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  9. COMPLETE MODELS WHEN OVER-RIDING A MODEL 
+     (for all Parameter Entities suffixed "-model")
+     ### Customization Alert ###
+     Made all the model over-rides consistent. Some included
+     the outer parentheses, some did not. They all do now,
+     -  %fn-model; 
+
+  8. DEFAULT CLASSES - Were moved from this module to 
+     %default-classes.ent;
+
+  7. FOOTNOTE
+        - Made model into a Parameter Entity
+        - Added optional <label> to the beginning of the model,
+          before the paragraphs
+
+  6. To correct potential classing problems, used the following
+     new Parameter Entities:
+        - %just-para.class; used in <fn> (%fn-model; uses 
+          %just-para.class;)
+
+  5. Updated public identifier to "v2.0 20040830"
+
+     =============================================================
+     Version 1.1                         (TRG/BTU) v1.1 (2003-11-01)
+
+  4. Added "alternate-form-of", "content-type", and ID attributes 
+     to <inline-supplementary-material> (by modifying 
+     %inline-supplementary-material-atts;)
+     Rationale: To provide parallelism between attributes for 
+     <supplementary-material> and <inline-supplementary-material>
+
+  3. Added "mimetype" and "mime-subtype" attributes to 
+     <inline-supplementary-material> (as part of adding these 
+     attributes to all graphic and multi-media elements)
+
+  2. Added ID attribute information to <xref> (by modifying
+     parameter entity %xref-atts;)
+     Rationale: Provide unique identifier so <xref>
+     element can be linked to (creating two-way link). 
+
+  1. Added an optional "target-type" attribute to element 
+     <target>.  Rationale: To identify the reason for this target.
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module, 
+                        usually accomplished in the Customization
+                        Module for the specific DTD:
+                          %access.class;
+                          %emphasis.class;
+                          %link-elements;
+                          %might-link-atts;
+                          %subsup.class;
+                                                                   -->
+                                                                   
+
+<!-- ============================================================= -->
+<!--                    DEFAULTS FOR ATTRIBUTE VALUES              -->
+<!-- ============================================================= -->
+
+
+<!--                    DEFAULT TYPE OF CROSS(X)-REFERENCE         -->
+<!--                    Used to say to what the reference is pointing.
+                        May be used for type-specific processing or
+                        validation. Values are, for example:
+                        Affiliation "aff", Figure "fig", and
+                        Bibliographic ref (to a Citation <citation> or
+                        to a Reference Item <ref>) "bibr"          -->
+<!ENTITY % ref-types    "aff | app | author-notes | bibr | 
+                         boxed-text | chem | contrib | corresp | 
+                         disp-formula | fig | fn | kwd | list | 
+                         plate | scheme | sec | statement | 
+                         supplementary-material | 
+                         table | table-fn | 
+                         other"                                      >
+
+
+<!-- ============================================================= -->
+<!--                    DEFAULTS FOR ATTRIBUTE LISTS               -->
+<!-- ============================================================= -->
+
+
+<!--                    FOOTNOTE ATTRIBUTES                        -->
+<!--                    Attribute list for Footnote element        -->
+<!ENTITY %  fn-atts
+             "id         ID                                #IMPLIED  
+              symbol     CDATA                             #IMPLIED 
+              fn-type    CDATA                             #IMPLIED  
+              xml:lang   NMTOKEN                           #IMPLIED" > 
+
+
+<!--                    X(CROSS) REFERENCE ATTRIBUTES              -->
+<!--                    Attribute list for cross references        -->
+<!ENTITY %  xref-atts
+             "id         ID                               #IMPLIED
+              ref-type   (%ref-types;)                    #IMPLIED 
+              rid        IDREFS                           #IMPLIED"  >
+
+
+<!--                    INLINE SUPPLEMENTARY MATERIAL              -->
+<!--                    Attribute list for inline supplementary 
+                        material                                   -->
+<!ENTITY %  inline-supplementary-material-atts
+             "alternate-form-of
+                        IDREF                             #IMPLIED
+              content-type
+                        CDATA                             #IMPLIED
+              id        ID                                #IMPLIED
+              %might-link-atts;                                    
+              mimetype  CDATA                             #IMPLIED 
+              mime-subtype
+                        CDATA                             #IMPLIED"  >
+
+
+<!-- ============================================================= -->
+<!--                    INTERNAL LINKS                             -->
+<!-- ============================================================= -->
+
+
+<!--                    FOOTNOTE MODEL                             -->
+<!--                    Content model for Footnote <fn>            -->
+<!ENTITY % fn-model     "((%label.class;)?, (%just-para.class;)+ )"  > 
+
+
+
+<!--                    FOOTNOTE                                   -->
+<!--                    Additional information concerning material in
+                        a particular location in the text. This
+                        material is not considered to be part of the 
+                        body of the text, but in addition to or a 
+                        commentary on the body. Such material may be 
+                        displayed at the bottom of a printed page, 
+                        onscreen in a separate pop-up window, or in a
+                        list at the end of an article.
+                        Remarks: A reference (pointer) to a
+                        Footnote <fn> is made with the X(Cross)
+                        Reference element.
+                        The language attribute may be used to note
+                        a footnote that is not in the same language
+                        as the original document, for example, a
+                        Latin or Greek footnote for a document in 
+                        English.                                   -->
+<!ELEMENT  fn           %fn-model;                                   >
+<!--         id         Unique identifier for the element. A cross-
+                        reference will point to this ID.
+             symbol     If the symbol that was used in the original
+                        display to identify the specific footnote
+                        is to be preserved, this is the attribute
+                        where it may be stored. One might store the
+                        symbol to disambiguate mistagged cases, for
+                        example, in which the author has stated
+                        "see footnote b" or "see footnote 14"  in
+                        the text but the mention of the footnote has 
+                        not been tagged as a cross references.
+             fn-type    Type of footnote, for those footnotes that
+                        point to a particular type of information 
+                        and that type is known, this attribute is
+                        intended as a way to preserve that
+                        knowledge.  For example, some footnotes
+                        point to or are used for the purpose of
+                        recording:
+                          abbr    Abbreviations
+                          com     Communicated-by information
+                          con     Contributed-by information
+                          conflict
+                                  Conflict of interest statements
+                          equal   Contributed to the creation of
+                                  the document equally
+                          corresp For use when the corresponding
+                                  author information is not 
+                                  identified separately, but is 
+                                  merely a footnote
+                          current-aff
+                                  Contributor's current affiliation
+                          deceased
+                                  Person has died since article was
+                                  written
+                          edited-by
+                                  Contributor has the role of an
+                                  editor
+                          financial-disclosure
+                                  Statement of funding or denial
+                                  of funds received in support of
+                                  the research on which this article
+                                  is based
+                          on-leave
+                                  Contributor is on sabbatical or
+                                  other leave of absence
+                          participating-researchers
+                                  Contributor was a researcher for
+                                  this article
+                          present-address
+                                  Contributor's current address
+                          presented-at
+                                  Conference, colloquium, or other
+                                  occasion at which this paper
+                                  was presented
+                          presented-by
+                                  Contributor who presented this
+                                  material
+                          previously-at
+                                  Contributor's previous location
+                                  or affiliation
+                          reprint Method or place to obtain reprints,
+                                  typically the name and address of
+                                  a person from whom reprints can be
+                                  requested
+                          study-group-members
+                                  Contributor was a member of the
+                                  study group for this research
+                          supplementary-material
+                                  Points to or describes
+                                  supplementary material for this
+                                  article
+                          supported-by
+                                  The research upon which this
+                                  article is based was supported by
+                          other    Not in the above list of
+                                   footnote types
+            xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  fn
+             %fn-atts;                                               > 
+
+
+<!--                    TARGET ELEMENTS                            -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <target>  
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %link-elements; is an inline
+                        mix, the OR bar is already there.          -->
+<!ENTITY % target-elements
+                        "%link-elements;"                            >
+
+
+<!--                    TARGET OF AN INTERNAL LINK                 -->
+<!--                    May be placed anywhere within textual 
+                        material such as a paragraph to provide a
+                        location (anchor) to which a cross reference
+                        can point                                  -->
+<!ELEMENT  target       (#PCDATA %target-elements;)*                 >
+<!--         id         Unique identifier so the element may be
+                        referenced
+             target-type
+                        Indicates the kind of <target> that has 
+                        been created, i.e., describes the reason 
+                        why some portion of text has been 
+                        designated for cross-references purposes   -->
+<!ATTLIST  target
+             id         ID                                #IMPLIED
+             target-type
+                        CDATA                             #REQUIRED  >
+
+
+<!--                    X(CROSS) REFERENCE ELEMENTS                -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <xref>  
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %link-elements; is an inline
+                        mix, the OR bar is already there.          -->
+<!ENTITY % xref-elements
+                        "%link-elements;"                            >
+
+  
+<!--                    X(CROSS) REFERENCE                         -->
+<!--                    Used for any kind of internal article
+                        referencing. The content of the reference 
+                        (if present) will be displayed as the link.
+                        This element may be used to anything that
+                        has an "id".  The "ref-type" attribute says
+                        what the reference is pointing to.         -->
+<!ELEMENT  xref         (#PCDATA %xref-elements;)*                   >
+<!--         id         Unique identifier so the element may be
+                        referenced (creating two-way link)
+             ref-types  Used to say to what the reference is pointing.
+                        May be used for type-specific processing or
+                        validation.  
+                        Values are:
+                          aff       Affiliation
+                          app       Appendix
+                          author-note
+                                    Author Note
+                          bibr      Bibliographic ref (to 
+                                    a Citation <citation>
+                                    or to a Reference 
+                                    Item <ref>) 
+                          boxed-text
+                                    Textbox or sidebar  
+                          chem      Chemical Structure 
+                                    Wrapper or Chemical
+                                    Structure
+                          contrib   Contributor
+                          corresp   Corresponding Author
+                          disp-formula        
+                                    Display Formula 
+                          fig       Figure or fig-group
+                          kwd       Keyword
+                          fn        Footnote
+                          list      List 
+                          plate     Plate
+                          scheme    Scheme
+                          sec       Section
+                          statement Statement
+                          supplementary-material  
+                                    Supplementary Information
+                          table     Table Group or Table Wrapper              
+                          table-fn  Table foot Note
+                                    A <fn> that is found within a
+                                    <table-wrap> element and that
+                                    should be displayed with the 
+                                    table rather than, for example, 
+                                    at the bottom of the page or 
+                                    in a window that does not contain
+                                    the table.
+                          other     Not listed above               
+             rid        Points to the identifier of a bibliographic
+                        reference, boxed-text, figure, footnote, 
+                        table, etc. (creating a link)              -->
+<!ATTLIST  xref
+             %xref-atts;                                             >
+
+
+<!-- ============================================================= -->
+<!--                    EXTERNAL LINKS                             -->
+<!-- ============================================================= -->
+
+
+<!--                    INLINE SUPPLEMENTARY MATERIAL ELEMENTS     -->
+<!--                    Elements for use in the 
+                        <inline-supplementary-material> element    -->
+<!ENTITY % inline-supplementary-material-elements 
+                        "| %access.class; | %address-link.class; | 
+                         %emphasis.class; | %subsup.class;"          > 
+
+
+<!--                    INLINE SUPPLEMENTARY MATERIAL              -->
+<!--                    An in-text link to an external file that
+                        provides supplementary information for
+                        the document, for example, an audio clip   -->
+<!ELEMENT  inline-supplementary-material
+                        (#PCDATA 
+                         %inline-supplementary-material-elements;)*  >
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.                      
+             id         Unique identifier so the element may be
+                        referenced                              
+             mimetype   The mime type of the supplementary material
+                        Authoring Note: This attribute might be
+                        required when creating new articles.       
+             mime-subtype   
+                        The subtype of the supplementary material 
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                -->
+<!ATTLIST  inline-supplementary-material
+             %inline-supplementary-material-atts;                    >
+
+
+<!-- ================== End Link Class Module ==================== -->
diff --git a/code/lib/Bio/Entrez/DTDs/list.ent b/code/lib/Bio/Entrez/DTDs/list.ent
new file mode 100644
index 0000000..ab18cd9
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/list.ent
@@ -0,0 +1,465 @@
+<!-- ============================================================= -->
+<!--  MODULE:    List Element Classes                              -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite List Class Elements v2.0 20040830//EN"
+     Delivered as file "list.ent"                                  -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Names all elements in the list class. These are   -->
+<!--             all lists except the lists of bibliographic       -->
+<!--             references (citations). Lists are considered      -->
+<!--             to be composed of items.                          -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Default definition of the list class           -->
+<!--             2) Definition List attributes                     -->
+<!--             3) Definition Lists <def-list>                    -->
+<!--             4) Ordinary lists (number, bullet, plain) <l>     -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  8. COMPLETE MODELS WHEN OVER-RIDING A MODEL 
+     (for all Parameter Entities suffixed "-model")
+     ### Customization Alert ###
+     Made all the model over-rides consistent. Some included
+     the outer parentheses, some did not. They all do now,
+     as this is the most flexible system, allowing for
+     #PCDATA, mixed, or element content. (This is in direct
+     contrast to the "-element" suffixed models, which are
+     designed to prohibit element content and permit only
+     #PCDATA or mixed content.)
+     -  %def-item-model;
+     -  %def-list-model;
+     -  %list-model;
+     -  %list-item-model;
+
+  7. PARAMETER ENTITY RENAMING
+     a. ELEMENTS CHANGED TO MODELS
+        ### Customization Alert ###
+        There is a DTD naming convention that PEs which contain full
+        content models are named with a "-model" suffix, in contrast 
+        to OR-groups of elements which are added to #PCDATA 
+        within a specific content model that are suffixed "-elements". 
+        The few Parameter Entities that broke that pattern and were 
+        renamed:
+        - %def-item-elements;  ==> %def-item-model;       
+        - %list-item-elements; ==> %list-item-model;       
+      
+     b. RENAME EXISTING CLASSES
+        ### Customization Alert ###
+        Some classes did not have the ".class" suffix. Changed the 
+        names to add the class suffix:
+        - %block-math.class; (used in %term-elements;)
+
+  6. DEFAULT CLASSES - Were moved from this module to 
+     %default-classes.ent;
+  
+  5. Updated public identifier to "v2.0 20040830"
+
+     =============================================================
+     Version 1.1                           (TRG) v1.1 (2003-11-01)
+
+  4. Added ID attribute to element <def-item>. 
+     Rationale: Provide unique identifier so <def-item> element 
+     can be linked to. 
+
+  3. Added element <label> to parameter entities %def-item-elements;
+     and %list-item-elements; (now named %list-item-model;)
+     Rationale: To provide <label> when needed for format 
+     over-ride.
+
+  2. Added new parameter entity %list-item-elements (now named
+     %list-item-model;) to contain contents of <list-item>
+     Rationale: In order to distinguish between models used by
+     the Archiving DTD and the Publishing DTD, it was necessary 
+     to create a parameter entity that could be overridden.
+ 
+  1. Added new parameter entity %def-item-elements to contain 
+     contents of <def-item>
+     Rationale: In order to distinguish between models used by
+     the Archiving DTD and the Publishing DTD, it was necessary 
+     to create a parameter entity that could be overridden.
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module, 
+                        usually accomplished in the Customization
+                        Module for the specific DTD:
+                          - %block-math.class;
+                          - %simple-display.class;
+                          - %simple-phrase;                            
+                                                                   -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DEFAULT PE FOR ATTRIBUTE LISTS             -->
+<!-- ============================================================= -->
+
+
+<!--                    DEFAULT LIST CLASS ATTRIBUTES              -->
+<!--                    Default attribute lists to be used for most
+                        of the types of lists.                     -->
+<!ENTITY % list-atts
+             "id        CDATA                              #IMPLIED 
+              list-type CDATA                              #IMPLIED 
+              prefix-word
+                        CDATA                              #IMPLIED
+              list-content
+                        CDATA                              #IMPLIED" >
+
+
+<!--                    DEFAULT DEFINITION LIST ATTRIBUTES         -->
+<!--                    Default attribute lists to be used for 
+                        Definition (2-part) lists                  -->
+<!ENTITY % def-list-atts
+             ""                                                      >
+
+
+<!-- ============================================================= -->
+<!--                    DEFINITION LIST                            -->
+<!-- ============================================================= -->
+
+
+<!--                    DEFINITION LIST MODEL                      -->
+<!--                    Content model for the <def-list> element   -->
+<!ENTITY % def-list-model   
+                        "(label?, title?, term-head?, def-head?, 
+                          def-item*, def-list*)"                     > 
+
+
+<!--                    DEFINITION LIST                            -->
+<!ELEMENT  def-list     %def-list-model;                             >
+<!--         id         A unique identifier so that the list
+                        may be referenced by an <xref> element   
+             list-type  List prefix types.  Type describes the
+                        type of prefix character to be used before
+                        each list item:
+                          order - Ordered list. Prefix character is
+                            a number or a letter, depending on
+                            style
+                          bullet - Unordered list. Prefix character
+                            is a bullet, dash, or other symbol
+                          alpha-lower - Ordered list. Prefix 
+                             character is a lowercase alphabetical 
+                             character
+                          alpha-upper - Ordered list. Prefix 
+                             character is an uppercase alphabetical 
+                             character
+                          roman-lower  - Ordered list. Prefix 
+                             character is a lowercase roman 
+                             numeral
+                          roman-upper  - Ordered list. Prefix 
+                             character is an uppercase roman 
+                             numeral
+                          simple - Simple or plain list (no prefix
+                              character)
+             prefix-word
+                        Holds a word, such as "Step" or "Process"
+                        that is to be added to each item of a list.
+                        The word is in addition to any prefix
+                        characters such as numbers or bullets,
+                        and typically follows the prefix character.
+                        For example, A list type of "1" and a
+                        prefix word of "Step", would produce:
+                        "Step 1. aaaa", "Step 2. bbbb", etc.
+                        Note that the word is "Step" and not "Step ".
+                        The odd man out is the unordered list,
+                        type "2", which is the only case in which
+                        the prefix character would precede the 
+                        prefix word: - Step   - Step, but the
+                        combination is not usually sensible.       
+             list-content
+                        There are some list models in use in journal
+                        DTDs, where a specific named list has been
+                        created instead of using <list> or <def-list>.
+                        The "list content" attribute should be used 
+                        to preserve the semantic information 
+                        concerning such lists types.  Potential 
+                        values include: "procedure", "where-list" 
+                        (used in equations), "synonymy", 
+                        "compound-info", "algorithm", "notation",
+                        etc. For example, if a list were tagged as
+                              <procedure>
+                              <list-item>...</list-item>
+                              </procedure>
+                        that tagging could be preserved as:
+                              <list list-content="procedure">
+                              <list-item>...</list-item>
+                              </list>                              -->
+<!ATTLIST  def-list
+             %list-atts;                                             
+             %def-list-atts;                                         >
+
+
+<!--                    DEFINITION LIST HEAD ELEMENTS              -->
+<!--                    Elements for use in the <def-list> heading
+                        elements <term-head> and <def-head>. 
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-phrase; is an 
+                        inline mix, the OR bar is already there.   -->
+<!ENTITY % def-list-head-elements
+                        "%simple-phrase;"                            >
+                         
+                         
+<!--                    DEFINITION LIST: TERM HEAD                 -->
+<!--                    Title over the first (term) column of a
+                        two-part list                              -->
+<!ELEMENT  term-head    (#PCDATA %def-list-head-elements;)*          >
+
+
+<!--                    DEFINITION LIST: DEFINITION HEAD           -->
+<!--                    Title over the second (definition) column 
+                        of a two-part list                         -->
+<!ELEMENT  def-head     (#PCDATA %def-list-head-elements;)*          >
+
+
+<!--                    DEFINITION LIST: DEFINITION ITEM MODEL     -->
+<!--                    The content model of a <def-item>          -->
+<!ENTITY % def-item-model 
+                        "(label?, term, def*)"                       >
+                         
+                         
+<!--                    DEFINITION LIST: DEFINITION ITEM           -->
+<!--                    A term and definition pair inside a
+                        definition or two-part list               
+                        of a two-part list 
+                        Authoring Note:  While this element 
+                        contains an optional Label element, the 
+                        Label element should be included only in 
+                        those circumstances where a formatting 
+                        override is needed; Label should NOT 
+                        be used in the ordinary course of 
+                        tagging.                                   -->
+<!ELEMENT  def-item     %def-item-model;                             >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 -->
+<!ATTLIST  def-item                                
+             id         ID                                 #IMPLIED  >
+             
+
+<!--ELEM   def          Defined in %common.ent;                    -->
+
+
+<!--                    DEFINITION LIST: TERM ELEMENTS             -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <term>. 
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-phrase; is an 
+                        inline mix, the OR bar is already there.   -->
+<!ENTITY % term-elements
+                        "%simple-phrase; | %block-math.class; | 
+                         %simple-display.class;"                     >
+
+
+<!--                    DEFINITION LIST: TERM                      -->
+<!--                    The word, phrase, picture, or other noun
+                        being defined or description that occupies
+                        the first column of a definition or 2-part
+                        list and is the subject of the definition or
+                        description.                               -->
+<!ELEMENT  term         (#PCDATA %term-elements;)*                   >
+<!--         id         Unique identifier so the element may be
+                        referenced 
+             rid        Points to the identifier of a definition,
+                        so that a term and definition may be linked
+                                                                   -->
+<!ATTLIST  term
+             rid        IDREFS                             #IMPLIED 
+             id         ID                                 #IMPLIED  >
+
+
+<!-- ============================================================= -->
+<!--                    LIST ELEMENTS (PARAGRAPH-LEVEL ELEMENTS)   -->
+<!-- ============================================================= -->
+
+
+<!--                    LIST MODEL                                 -->
+<!--                    Content model for the <list> element       -->
+<!ENTITY % list-model   "(label?, title?, list-item+)"               >
+
+
+<!--                    LIST                                       -->
+<!ELEMENT  list         %list-model;                                 >
+<!--         id         A unique identifier so that the list
+                        may be referenced by an <xref> element   
+             list-type  List prefix types.  Type describes the
+                        type of prefix character to be used before
+                        each list item:
+                          order - Ordered list. Prefix character is
+                            a number or a letter, depending on
+                            style
+                          bullet - Unordered list. Prefix character
+                            is a bullet, dash, or other symbol
+                          alpha-lower - Ordered list. Prefix 
+                             character is a lowercase alphabetical 
+                             character
+                          alpha-upper - Ordered list. Prefix 
+                             character is an uppercase alphabetical 
+                             character
+                          roman-lower  - Ordered list. Prefix 
+                             character is a lowercase roman 
+                             numeral
+                          roman-upper  - Ordered list. Prefix 
+                             character is an uppercase roman 
+                             numeral
+                          simple - simple or plain list (no prefix
+                              character)
+             prefix-word
+                        Holds a word, such as "Step" or "Process"
+                        that is to be added to each item of a list.
+                        The word is in addition to any prefix
+                        characters such as numbers or bullets,
+                        and typically follows the prefix character.
+                        For example, a list type of "1" and a
+                        prefix word of "Step", would produce:
+                        "Step 1. aaaa", "Step 2. bbbb", etc.
+                        Note that the word is "Step" and not "Step ".
+                        The odd man out is the unordered list,
+                        type "2", which is the only case in which
+                        the prefix character would precede the 
+                        prefix word: - Step   - Step, but the
+                        combination is not usually sensible.       
+             list-content
+                        There are some list models in use in journal
+                        DTDs, where a specific named list has been
+                        created instead of using <list> or <def-list>.
+                        The "list content" attribute should be used 
+                        to preserve the semantic information 
+                        concerning such lists types.  Potential 
+                        values include: "procedure", "where-list" 
+                        (used in equations), "synonymy", 
+                        "compound-info", "algorithm", "notation",
+                        etc. For example, if a list were tagged as
+                              <procedure>
+                              <list-item>...</list-item>
+                              </procedure>
+                        that tagging could be preserved as:
+                             <list list-content="procedure">
+                              <list-item>...</list-item>
+                              </list>                              -->
+<!ATTLIST  list
+             %list-atts;                                             >
+
+
+<!--                    LIST ITEM ELEMENTS                         -->
+<!--                    The content model of a <list-item>.        -->
+<!ENTITY % list-item-model 
+                        "(label?, (p | %list.class;)+)"              >
+                         
+ 
+<!--                    LIST ITEM                                  -->
+<!--                    Authoring Note:  While this element 
+                        contains an optional Label element, the 
+                        Label element should be included only in 
+                        those circumstances where a formatting 
+                        override is needed; Label should NOT 
+                        be used in the ordinary course of 
+                        tagging.                                   -->
+<!ELEMENT  list-item    %list-item-model;                            >
+<!--         id         A unique identifier so that the list item
+                        may be referenced by an <xref> element     -->
+<!ATTLIST  list-item
+             id         CDATA                              #IMPLIED  >
+
+
+<!-- ================== End List Class Module ==================== -->
diff --git a/code/lib/Bio/Entrez/DTDs/math.ent b/code/lib/Bio/Entrez/DTDs/math.ent
new file mode 100644
index 0000000..1aa543b
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/math.ent
@@ -0,0 +1,329 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Math Element Classes                              -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Math Class Elements v2.0 20040830//EN"
+     Delivered as file "math.ent"                                  -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Names all elements in the math classes            -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Default definition of the math classes         -->
+<!--             2) Inline formula <inline-formula>                -->
+<!--             3) Display Formula <disp-formula>                 -->
+<!--             4) TeX Math Equation <tex-math>                   -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  7. INLINE FORMULA/DISPLAY FORMULA
+     ### Customization Alert ###
+     a. Inline Formula
+        - Made the content model into a separate parameter entity
+          %inline-formula-model;, which uses the Parameter
+          Entity %inline-formula-elements;
+        - Changed the Parameter Entities and models to use the 
+          OR-bar-first mechanism. In other words: Changed 
+          "%all-phrase;" to "| %all-phrase;" and removed the
+          OR bar following #PCDATA in the content model to match.
+        - Added %inline-math.class; to the model
+     b. Display Formula - Similar changes made:
+        - added new content model PE %disp-formula-model; 
+        - changed %disp-formula-elements; and %disp-formula-model;
+          to use the OR-bar-first
+        - Added %inline-math.class;
+
+  5. DEFAULT CLASSES - Were moved from this module to 
+     %default-classes.ent;
+  
+  4. %tex-math-atts; - Added the value "LaTeX" to the "notation"
+     attribute
+  
+  3. Updated public identifier to "v2.0 20040830"         
+
+     =============================================================
+     Version 1.1                           (TRG) v1.1 (2003-11-01)
+
+  2. Added attribute "alternate-form-of" to:
+       -  <disp-formula> (by modifying parameter entity 
+          %disp-formula-atts;)
+       -  <inline-formula>
+       -  <tex-math> (by modifying parameter entity 
+          %tex-math-atts;)
+     Rationale: Where multiple formats of an item (e.g., graphic 
+     file, media object, chemical structure) are available, this 
+     attribute indicates that a format is a secondary one and 
+     provides a link to the primary format, so that only one 
+     format of an item is displayed.
+     
+  1. Added attribute "id" to element <inline-formula> 
+     Rationale: To allow an <inline-formula> to be linked to, 
+     especially by alternate forms of the same formula (which will
+     use the "alternate-form-of" attribute to link to the primary)
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module, 
+                        usually accomplished in the Customization
+                        Module for the specific DTD:
+                          %access.class;
+                          %break.class;
+                          %emphasis.class;
+                          %inline-display.class;
+                          %inline-formula-elements;
+                          %label.class;
+                          %math.class;
+                          %simple-display.class;
+                          %subsup.class;
+                                                                   -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR ATTRIBUTE LISTS     -->
+<!-- ============================================================= -->
+                                                                
+                                                                 
+<!--                    DISPLAY FORMULA ATTRIBUTES                 -->
+<!--                    Attributes for the <disp-formula> element  -->
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             id         Unique identifier so the element may be
+                        referenced                                 -->
+<!ENTITY % disp-formula-atts
+            "alternate-form-of
+                        IDREF                              #IMPLIED
+             id         ID                                 #IMPLIED" >
+                                                                
+                                                                 
+<!--                    TEX MATH ATTRIBUTES                        -->
+<!--                    Attributes for the <disp-formula> element  -->
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             id         Unique identifier so the element may be
+                        referenced
+             version    Which version of TeX used
+             notation   Says that the content of this element will
+                        be in TeX and needs to be surrounded by
+                        a CDATA section.                           -->
+<!ENTITY % tex-math-atts                                  
+            "alternate-form-of
+                        IDREF                             #IMPLIED
+             id         CDATA                             #IMPLIED
+             notation   NOTATION (LaTeX | tex | TEX | TeX)
+                                                          #IMPLIED
+             version    CDATA                             #IMPLIED"  >
+
+
+<!-- ============================================================= -->
+<!--                    MATH ELEMENTS (PARAGRAPH LEVEL)            -->
+<!-- ============================================================= -->
+
+
+<!--                    FORMULA, INLINE ELEMENTS                   -->
+<!--                    Elements for use in the <inline-formula> 
+                        element                                    -->
+<!ENTITY % inline-formula-elements   
+                        "| %emphasis.class; | %inline-display.class; |
+                         %inline-math.class; |
+                         %math.class; | %subsup.class;"              > 
+
+
+<!--                    FORMULA, INLINE MODEL                      -->
+<!--                    Content model for an <inline-formula>      -->
+<!ENTITY % inline-formula-model   
+                        "(#PCDATA %inline-formula-elements;)*"       > 
+
+
+<!--                    FORMULA, INLINE                            -->
+<!--                    Inline element for a mathematical
+                        equation, expression, or formula           -->
+<!ELEMENT  inline-formula            
+                        %inline-formula-model;                       >
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             id         Unique identifier so an inline-formula
+                        can be referenced                          -->
+<!ATTLIST  inline-formula
+             alternate-form-of
+                        IDREF                              #IMPLIED
+             id         ID                                 #IMPLIED  >
+
+
+<!--                    DISPLAY FORMULA ELEMENTS                   -->
+<!--                    Elements for use in the <disp-formula> 
+                        element                                    -->
+<!ENTITY % disp-formula-elements  
+                        "| %access.class; | %address-link.class; |
+                         %break.class; | %emphasis.class; | 
+                         %inline-display.class; |
+                         %inline-math.class; | 
+                         %label.class; | %math.class; | 
+                         %simple-display.class; | %subsup.class;"    > 
+
+
+<!--                    FORMULA, DISPLAY MODEL                     -->
+<!--                    Content model for an <disp-formula>        -->
+<!ENTITY % disp-formula-model   
+                        "(#PCDATA %disp-formula-elements;)*"         > 
+
+
+<!--                    FORMULA, DISPLAY                           -->
+<!--                    Block-level (callout) element for a 
+                        mathematical equation, expression, or 
+                        formula.  The characters representing the
+                        equation may be present, or the equation
+                        could be a graphic.                        -->
+<!ELEMENT  disp-formula %disp-formula-model;                         >
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             id         Unique identifier so the element may be
+                        referenced                                 -->
+<!ATTLIST  disp-formula
+             %disp-formula-atts;                                     >
+
+
+<!--                    TEX MATH EQUATION                          -->
+<!--                    Used to hold encoded math, expressed in TeX-->
+<!ELEMENT  tex-math      (#PCDATA)                                   >
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an
+                        item is displayed.
+             id         Unique identifier so the element may be
+                        referenced
+             version    Which version of TeX or LaTeX used
+             notation   Says that the content of this element will
+                        be in TeX and needs to be surrounded by
+                        a CDATA section.                           -->
+<!ATTLIST  tex-math                                  
+             %tex-math-atts;                                         >
+
+
+<!-- ================== End List Class Module ==================== -->
diff --git a/code/lib/Bio/Entrez/DTDs/mathml-in-pubmed.mod b/code/lib/Bio/Entrez/DTDs/mathml-in-pubmed.mod
new file mode 100644
index 0000000..ce95673
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/mathml-in-pubmed.mod
@@ -0,0 +1,151 @@
+<!-- ============================================================= -->
+<!--                     MATHML 3.0 MODULES                        -->
+<!-- ============================================================= -->
+<!--                    MATHML 3.0 QUALIFIED NAMES                 -->
+<!ENTITY % mathml-qname.mod
+                        PUBLIC
+"-//W3C//ENTITIES MathML 3.0 Qualified Names 1.0//EN"
+"mathml3-qname1.mod"                                                 >
+
+
+<!--                    MATHML 3.0 DTD                             -->
+<!ENTITY % mathml.dtd   PUBLIC
+"-//W3C//DTD MathML 3.0//EN" 
+"mathml3.dtd"                                                        >
+
+
+<!--                    MATHML 3.0 EXTRA ENTITIES                  -->
+<!ENTITY % ent-mmlextra
+                        PUBLIC
+"-//W3C//ENTITIES Extra for MathML 3.0//EN" 
+"mathml/mmlextra.ent"                                                >
+
+
+<!--                    MATHML 3.0 ALIASES                         -->
+<!ENTITY % ent-mmlalias
+                        PUBLIC
+"-//W3C//ENTITIES Aliases for MathML 3.0//EN" 
+"mathml/mmlalias.ent"                                                >
+
+<!--                    NAMED CHARACTERS                           -->
+<!ENTITY % isobox PUBLIC "-//W3C//ENTITIES Box and Line Drawing//EN" "iso8879/isobox.ent">
+<!ENTITY % isocyr1 PUBLIC "-//W3C//ENTITIES Russian Cyrillic//EN" "iso8879/isocyr1.ent">
+<!ENTITY % isocyr2 PUBLIC "-//W3C//ENTITIES Non-Russian Cyrillic//EN" "iso8879/isocyr2.ent">
+<!ENTITY % isodia PUBLIC "-//W3C//ENTITIES Diacritical Marks//EN" "iso8879/isodia.ent">
+<!ENTITY % isolat1 PUBLIC "-//W3C//ENTITIES Added Latin 1//EN" "iso8879/isolat1.ent">
+<!ENTITY % isolat2 PUBLIC "-//W3C//ENTITIES Added Latin 2//EN" "iso8879/isolat2.ent">
+<!ENTITY % isonum PUBLIC "-//W3C//ENTITIES Numeric and Special Graphic//EN" "iso8879/isonum.ent">
+<!ENTITY % isopub PUBLIC "-//W3C//ENTITIES Publishing//EN" "iso8879/isopub.ent">
+<!ENTITY % isoamsa PUBLIC "-//W3C//ENTITIES Added Math Symbols: Arrow Relations//EN" "iso9573-13/isoamsa.ent">
+<!ENTITY % isoamsb PUBLIC "-//W3C//ENTITIES Added Math Symbols: Binary Operators//EN" "iso9573-13/isoamsb.ent">
+<!ENTITY % isoamsc PUBLIC "-//W3C//ENTITIES Added Math Symbols: Delimiters//EN" "iso9573-13/isoamsc.ent">
+<!ENTITY % isoamsn PUBLIC "-//W3C//ENTITIES Added Math Symbols: Negated Relations//EN" "iso9573-13/isoamsn.ent">
+<!ENTITY % isoamso PUBLIC "-//W3C//ENTITIES Added Math Symbols: Ordinary//EN" "iso9573-13/isoamso.ent">
+<!ENTITY % isoamsr PUBLIC "-//W3C//ENTITIES Added Math Symbols: Relations//EN" "iso9573-13/isoamsr.ent">
+<!ENTITY % isogrk3 PUBLIC "-//W3C//ENTITIES Greek Symbols//EN" "iso9573-13/isogrk3.ent">
+<!ENTITY % isomfrk PUBLIC "-//W3C//ENTITIES Math Alphabets: Fraktur//EN" "iso9573-13/isomfrk.ent">
+<!ENTITY % isomopf PUBLIC "-//W3C//ENTITIES Math Alphabets: Open Face//EN" "iso9573-13/isomopf.ent">
+<!ENTITY % isomscr PUBLIC "-//W3C//ENTITIES Math Alphabets: Script//EN" "iso9573-13/isomscr.ent">
+<!ENTITY % isotech PUBLIC "-//W3C//ENTITIES General Technical//EN" "iso9573-13/isotech.ent">
+
+
+<!-- ============================================================= -->
+<!--                    SET UP "mml" AS THE MATH PREFIX            -->
+<!-- ============================================================= -->
+
+
+<!--                    MAKE MATH PREFIX PARAMETER ENTITY HAPPEN   -->
+<!ENTITY % MATHML.prefixed 
+                        "INCLUDE"                                    >
+
+
+<!--                    MAKE PREFIX EQUAL "mml"                    -->
+<!ENTITY % MATHML.prefix "mml"                                       >
+
+
+<!--                    SET UP "pfx"                               -->
+<![%MATHML.prefixed;[
+<!ENTITY % MATHML.pfx   "%MATHML.prefix;:"                           >
+]]>
+
+
+<!--                    USE "pfx" TO SET THE MATH ELEMENT NAME     -->
+<!ENTITY % math.qname   "%MATHML.pfx;math"                           >
+
+
+
+<!-- ============================================================= -->
+<!--                    CALL THE MATHML ENTITIES                   -->
+<!-- ============================================================= -->
+
+
+<!--                    MATHML CHARACTER ENTITIES                  -->
+<!--                    Set the "INCLUDE" or "IGNORE" marked section
+                        switch for the MATHML 3.0 DTD. This switch
+                        determines whether the math processing in
+                        the -%mathml.dtd; module or the
+                        -%xmlspecchars.ent; module in this DTD Suite
+                        will invoke the sets of special characters
+                        that are defined AND INVOKED in the
+                        -%xmlspecchars.ent; module.
+                        A value of "IGNORE" turns off the second
+                        invocation from the MathML DTD.            -->
+<!ENTITY % mathml-charent.module
+                        "IGNORE"                                     >
+
+
+<!--                    MATHML-SPECIFIC CHARACTER ENTITIES         -->
+<!--                    Because the MATHML invocation is canceled
+                        using the parameter entity just defined,
+                        the two external entities below must be
+                        invoked from here, as neither XMLspecchars
+                        nor the MathML DTD will invoke them.       -->
+
+<!--                    New characters defined by MathML           -->
+%ent-mmlextra;
+
+
+<!--                    MathML aliases for characters defined above-->
+%ent-mmlalias;
+
+<!--                    NAMED CHARACTERS                           -->
+%isobox;
+%isocyr1;
+%isocyr2;
+%isodia;
+%isolat1;
+%isolat2;
+%isonum;
+%isopub;
+%isoamsa;
+%isoamsb;
+%isoamsc;
+%isoamsn;
+%isoamso;
+%isoamsr;
+%isogrk3;
+%isomfrk;
+%isomopf;
+%isomscr;
+%isotech;
+
+<!-- ============================================================= -->
+<!--                    RESTRICT CONTENT OF ANNOTATION             -->
+<!-- ============================================================= -->
+
+<!--                    MATHML ANNOTATION MODEL                    -->
+<!--                    The MathML 3.0 DTD establishes the content of 
+                        the <mml:annotation-xml> element as any MathML
+                        expression. This is unnecessarily broad, 
+                        causing inconvenience during conversion. 
+                        The model has been restricted to
+                        one or more paragraphs.                    -->
+<!ENTITY % annotation-xml.model
+                        "#PCDATA"                                       >
+
+
+<!-- ============================================================= -->
+<!--                    MathML 3.0 INVOCATION                      -->
+<!-- ============================================================= -->
+
+%mathml.dtd;
diff --git a/code/lib/Bio/Entrez/DTDs/mathml2-qname-1.mod b/code/lib/Bio/Entrez/DTDs/mathml2-qname-1.mod
new file mode 100644
index 0000000..92a7621
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/mathml2-qname-1.mod
@@ -0,0 +1 @@
+<!-- ....................................................................... --><!-- MathML Qualified Names Module  ........................................ --><!-- file: mathml2-qname-1.mod     This is the Mathematical Markup Language (MathML) 2.0, an XML      application for describing mathematical notation and capturing      both its structure and content.     Copyright 1998-2000 W3C (MIT, INRIA, Keio), All Rights Reserved.     Revision: $Id: mathml2-qname-1.mod,v 1.3 2004/12/28 20:31:20 beck Exp $      This DTD module is identified by the PUBLIC and SYSTEM identifiers:       PUBLIC "-//W3C//ENTITIES MathML 2.0 Qualified Names 1.0//EN"       SYSTEM "mathml2-qname-1.mod"     Revisions:     (none)     ....................................................................... --><!-- MathML Qualified Names     This module is contained in two parts, labeled Section 'A' and 'B':       Section A declares parameter entities to support namespace-       qualified names, namespace declarations, and name prefixing        for MathML.           Section B declares parameter entities used to provide       namespace-qualified names for all MathML element types.     This module is derived from the XHTML Qualified Names Template module.--><!-- Section A: XHTML XML Namespace Framework :::::::::::::::::::: --><!ENTITY % NS.prefixed     "IGNORE" ><!ENTITY % MATHML.prefixed "%NS.prefixed;" ><!-- XLink ............... --><!ENTITY % XLINK.xmlns "http://www.w3.org/1999/xlink" ><!ENTITY % XLINK.xmlns.attrib     "xmlns:xlink  CDATA           #FIXED '%XLINK.xmlns;'"><!-- MathML .............. --><!ENTITY % MATHML.xmlns    "http://www.w3.org/1998/Math/MathML" ><!ENTITY % MATHML.prefix   "m" ><![%MATHML.prefixed;[<!ENTITY % MATHML.xmlns.extra.attrib  "" >]]><!ENTITY % MATHML.xmlns.extra.attrib      "%XLINK.xmlns.attrib;" ><![%MATHML.prefixed;[<!ENTITY % MATHML.pfx  "%MATHML.prefix;:" ><!ENTITY % MATHML.xmlns.attrib     "xmlns:%MATHML.prefix;  CDATA   #FIXED '%MATHML.xmlns;'      %MATHML.xmlns.extra.attrib;">]]><!ENTITY % MATHML.pfx  "" ><!ENTITY % MATHML.xmlns.attrib     "xmlns        CDATA           #FIXED '%MATHML.xmlns;'      %MATHML.xmlns.extra.attrib;"><![%NS.prefixed;[<!ENTITY % XHTML.xmlns.extra.attrib      "%MATHML.xmlns.attrib;" >]]><!ENTITY % XHTML.xmlns.extra.attrib     "%XLINK.xmlns.attrib;"><!-- Section B: MathML Qualified Names ::::::::::::::::::::::::::::: --><!-- 9. This section declares parameter entities used to provide        namespace-qualified names for all MathML element types.--><!ENTITY % abs.qname            "%MATHML.pfx;abs" ><!ENTITY % and.qname            "%MATHML.pfx;and" ><!ENTITY % annotation-xml.qname "%MATHML.pfx;annotation-xml" ><!ENTITY % annotation.qname     "%MATHML.pfx;annotation" ><!ENTITY % apply.qname          "%MATHML.pfx;apply" ><!ENTITY % approx.qname         "%MATHML.pfx;approx" ><!ENTITY % arccos.qname         "%MATHML.pfx;arccos" ><!ENTITY % arccosh.qname        "%MATHML.pfx;arccosh" ><!ENTITY % arccosh.qname        "%MATHML.pfx;arccosh" ><!ENTITY % arccot.qname         "%MATHML.pfx;arccot" ><!ENTITY % arccoth.qname        "%MATHML.pfx;arccoth" ><!ENTITY % arccsc.qname         "%MATHML.pfx;arccsc" ><!ENTITY % arccsch.qname        "%MATHML.pfx;arccsch" ><!ENTITY % arcsec.qname         "%MATHML.pfx;arcsec" ><!ENTITY % arcsech.qname        "%MATHML.pfx;arcsech" ><!ENTITY % arcsin.qname         "%MATHML.pfx;arcsin" ><!ENTITY % arcsinh.qname        "%MATHML.pfx;arcsinh" ><!ENTITY % arctan.qname         "%MATHML.pfx;arctan" ><!ENTITY % arctanh.qname        "%MATHML.pfx;arctanh" ><!ENTITY % arg.qname            "%MATHML.pfx;arg" ><!ENTITY % bvar.qname           "%MATHML.pfx;bvar" ><!ENTITY % card.qname           "%MATHML.pfx;card" ><!ENTITY % cartesianproduct.qname "%MATHML.pfx;cartesianproduct" ><!ENTITY % ceiling.qname         "%MATHML.pfx;ceiling" ><!ENTITY % ci.qname             "%MATHML.pfx;ci" ><!ENTITY % cn.qname             "%MATHML.pfx;cn" ><!ENTITY % codomain.qname       "%MATHML.pfx;codomain" ><!ENTITY % complexes.qname      "%MATHML.pfx;complexes" ><!ENTITY % compose.qname        "%MATHML.pfx;compose" ><!ENTITY % condition.qname      "%MATHML.pfx;condition" ><!ENTITY % conjugate.qname      "%MATHML.pfx;conjugate" ><!ENTITY % cos.qname            "%MATHML.pfx;cos" ><!ENTITY % cosh.qname           "%MATHML.pfx;cosh" ><!ENTITY % cot.qname            "%MATHML.pfx;cot" ><!ENTITY % coth.qname           "%MATHML.pfx;coth" ><!ENTITY % csc.qname            "%MATHML.pfx;csc" ><!ENTITY % csch.qname           "%MATHML.pfx;csch" ><!ENTITY % csymbol.qname        "%MATHML.pfx;csymbol" ><!ENTITY % curl.qname           "%MATHML.pfx;curl" ><!ENTITY % declare.qname        "%MATHML.pfx;declare" ><!ENTITY % degree.qname         "%MATHML.pfx;degree" ><!ENTITY % determinant.qname    "%MATHML.pfx;determinant" ><!ENTITY % diff.qname           "%MATHML.pfx;diff" ><!ENTITY % divergence.qname     "%MATHML.pfx;divergence" ><!ENTITY % divide.qname         "%MATHML.pfx;divide" ><!ENTITY % domain.qname         "%MATHML.pfx;domain" ><!ENTITY % domainofapplication.qname "%MATHML.pfx;domainofapplication" ><!ENTITY % emptyset.qname       "%MATHML.pfx;emptyset" ><!ENTITY % eq.qname             "%MATHML.pfx;eq" ><!ENTITY % equivalent.qname     "%MATHML.pfx;equivalent" ><!ENTITY % eulergamma.qname     "%MATHML.pfx;eulergamma" ><!ENTITY % exists.qname         "%MATHML.pfx;exists" ><!ENTITY % exp.qname            "%MATHML.pfx;exp" ><!ENTITY % exponentiale.qname   "%MATHML.pfx;exponentiale" ><!ENTITY % factorial.qname      "%MATHML.pfx;factorial" ><!ENTITY % factorof.qname       "%MATHML.pfx;factorof" ><!ENTITY % false.qname          "%MATHML.pfx;false" ><!ENTITY % floor.qname          "%MATHML.pfx;floor" ><!ENTITY % fn.qname             "%MATHML.pfx;fn" ><!ENTITY % forall.qname         "%MATHML.pfx;forall" ><!ENTITY % gcd.qname            "%MATHML.pfx;gcd" ><!ENTITY % geq.qname            "%MATHML.pfx;geq" ><!ENTITY % grad.qname           "%MATHML.pfx;grad" ><!ENTITY % gt.qname             "%MATHML.pfx;gt" ><!ENTITY % ident.qname          "%MATHML.pfx;ident" ><!ENTITY % image.qname          "%MATHML.pfx;image" ><!ENTITY % imaginary.qname      "%MATHML.pfx;imaginary" ><!ENTITY % imaginaryi.qname     "%MATHML.pfx;imaginaryi" ><!ENTITY % implies.qname        "%MATHML.pfx;implies" ><!ENTITY % in.qname             "%MATHML.pfx;in" ><!ENTITY % infinity.qname       "%MATHML.pfx;infinity" ><!ENTITY % int.qname            "%MATHML.pfx;int" ><!ENTITY % integers.qname       "%MATHML.pfx;integers" ><!ENTITY % intersect.qname      "%MATHML.pfx;intersect" ><!ENTITY % interval.qname       "%MATHML.pfx;interval" ><!ENTITY % inverse.qname        "%MATHML.pfx;inverse" ><!ENTITY % lambda.qname         "%MATHML.pfx;lambda" ><!ENTITY % laplacian.qname      "%MATHML.pfx;laplacian" ><!ENTITY % lcm.qname            "%MATHML.pfx;lcm" ><!ENTITY % leq.qname            "%MATHML.pfx;leq" ><!ENTITY % limit.qname          "%MATHML.pfx;limit" ><!ENTITY % list.qname           "%MATHML.pfx;list" ><!ENTITY % ln.qname             "%MATHML.pfx;ln" ><!ENTITY % log.qname            "%MATHML.pfx;log" ><!ENTITY % logbase.qname        "%MATHML.pfx;logbase" ><!ENTITY % lowlimit.qname       "%MATHML.pfx;lowlimit" ><!ENTITY % lt.qname             "%MATHML.pfx;lt" ><!ENTITY % maction.qname        "%MATHML.pfx;maction" ><!ENTITY % maligngroup.qname    "%MATHML.pfx;maligngroup" ><!ENTITY % malignmark.qname     "%MATHML.pfx;malignmark" ><!ENTITY % math.qname           "%MATHML.pfx;math" ><!ENTITY % matrix.qname         "%MATHML.pfx;matrix" ><!ENTITY % matrixrow.qname      "%MATHML.pfx;matrixrow" ><!ENTITY % max.qname            "%MATHML.pfx;max" ><!ENTITY % mean.qname           "%MATHML.pfx;mean" ><!ENTITY % median.qname         "%MATHML.pfx;median" ><!ENTITY % menclose.qname       "%MATHML.pfx;menclose" ><!ENTITY % merror.qname         "%MATHML.pfx;merror" ><!ENTITY % mfenced.qname        "%MATHML.pfx;mfenced" ><!ENTITY % mfrac.qname          "%MATHML.pfx;mfrac" ><!ENTITY % mglyph.qname         "%MATHML.pfx;mglyph" ><!ENTITY % mi.qname             "%MATHML.pfx;mi" ><!ENTITY % min.qname            "%MATHML.pfx;min" ><!ENTITY % minus.qname          "%MATHML.pfx;minus" ><!ENTITY % mlabeledtr.qname     "%MATHML.pfx;mlabeledtr" ><!ENTITY % mmultiscripts.qname  "%MATHML.pfx;mmultiscripts" ><!ENTITY % mn.qname             "%MATHML.pfx;mn" ><!ENTITY % mo.qname             "%MATHML.pfx;mo" ><!ENTITY % mode.qname           "%MATHML.pfx;mode" ><!ENTITY % moment.qname         "%MATHML.pfx;moment" ><!ENTITY % momentabout.qname    "%MATHML.pfx;momentabout" ><!ENTITY % mover.qname          "%MATHML.pfx;mover" ><!ENTITY % mpadded.qname        "%MATHML.pfx;mpadded" ><!ENTITY % mphantom.qname       "%MATHML.pfx;mphantom" ><!ENTITY % mprescripts.qname    "%MATHML.pfx;mprescripts" ><!ENTITY % mroot.qname          "%MATHML.pfx;mroot" ><!ENTITY % mrow.qname           "%MATHML.pfx;mrow" ><!ENTITY % ms.qname             "%MATHML.pfx;ms" ><!ENTITY % mspace.qname         "%MATHML.pfx;mspace" ><!ENTITY % msqrt.qname          "%MATHML.pfx;msqrt" ><!ENTITY % mstyle.qname         "%MATHML.pfx;mstyle" ><!ENTITY % msub.qname           "%MATHML.pfx;msub" ><!ENTITY % msubsup.qname        "%MATHML.pfx;msubsup" ><!ENTITY % msup.qname           "%MATHML.pfx;msup" ><!ENTITY % mtable.qname         "%MATHML.pfx;mtable" ><!ENTITY % mtd.qname            "%MATHML.pfx;mtd" ><!ENTITY % mtext.qname          "%MATHML.pfx;mtext" ><!ENTITY % mtr.qname            "%MATHML.pfx;mtr" ><!ENTITY % munder.qname         "%MATHML.pfx;munder" ><!ENTITY % munderover.qname     "%MATHML.pfx;munderover" ><!ENTITY % naturalnumbers.qname "%MATHML.pfx;naturalnumbers" ><!ENTITY % neq.qname            "%MATHML.pfx;neq" ><!ENTITY % none.qname           "%MATHML.pfx;none" ><!ENTITY % not.qname            "%MATHML.pfx;not" ><!ENTITY % notanumber.qname     "%MATHML.pfx;notanumber" ><!ENTITY % notin.qname          "%MATHML.pfx;notin" ><!ENTITY % notprsubset.qname    "%MATHML.pfx;notprsubset" ><!ENTITY % notsubset.qname      "%MATHML.pfx;notsubset" ><!ENTITY % or.qname             "%MATHML.pfx;or" ><!ENTITY % otherwise.qname      "%MATHML.pfx;otherwise" ><!ENTITY % outerproduct.qname   "%MATHML.pfx;outerproduct" ><!ENTITY % partialdiff.qname    "%MATHML.pfx;partialdiff" ><!ENTITY % pi.qname             "%MATHML.pfx;pi" ><!ENTITY % piece.qname          "%MATHML.pfx;piece" ><!ENTITY % piecewise.qname      "%MATHML.pfx;piecewise" ><!ENTITY % plus.qname           "%MATHML.pfx;plus" ><!ENTITY % power.qname          "%MATHML.pfx;power" ><!ENTITY % primes.qname         "%MATHML.pfx;primes" ><!ENTITY % product.qname        "%MATHML.pfx;product" ><!ENTITY % prsubset.qname       "%MATHML.pfx;prsubset" ><!ENTITY % quotient.qname       "%MATHML.pfx;quotient" ><!ENTITY % rationals.qname      "%MATHML.pfx;rationals" ><!ENTITY % real.qname           "%MATHML.pfx;real" ><!ENTITY % reals.qname          "%MATHML.pfx;reals" ><!ENTITY % reln.qname           "%MATHML.pfx;reln" ><!ENTITY % rem.qname            "%MATHML.pfx;rem" ><!ENTITY % root.qname           "%MATHML.pfx;root" ><!ENTITY % scalarproduct.qname  "%MATHML.pfx;scalarproduct" ><!ENTITY % sdev.qname           "%MATHML.pfx;sdev" ><!ENTITY % sec.qname            "%MATHML.pfx;sec" ><!ENTITY % sech.qname           "%MATHML.pfx;sech" ><!ENTITY % selector.qname       "%MATHML.pfx;selector" ><!ENTITY % semantics.qname      "%MATHML.pfx;semantics" ><!ENTITY % sep.qname            "%MATHML.pfx;sep" ><!ENTITY % set.qname            "%MATHML.pfx;set" ><!ENTITY % setdiff.qname        "%MATHML.pfx;setdiff" ><!ENTITY % sin.qname            "%MATHML.pfx;sin" ><!ENTITY % sinh.qname           "%MATHML.pfx;sinh" ><!ENTITY % subset.qname         "%MATHML.pfx;subset" ><!ENTITY % sum.qname            "%MATHML.pfx;sum" ><!ENTITY % tan.qname            "%MATHML.pfx;tan" ><!ENTITY % tanh.qname           "%MATHML.pfx;tanh" ><!ENTITY % tendsto.qname        "%MATHML.pfx;tendsto" ><!ENTITY % times.qname          "%MATHML.pfx;times" ><!ENTITY % transpose.qname      "%MATHML.pfx;transpose" ><!ENTITY % true.qname           "%MATHML.pfx;true" ><!ENTITY % union.qname          "%MATHML.pfx;union" ><!ENTITY % uplimit.qname        "%MATHML.pfx;uplimit" ><!ENTITY % variance.qname       "%MATHML.pfx;variance" ><!ENTITY % vector.qname         "%MATHML.pfx;vector" ><!ENTITY % vectorproduct.qname  "%MATHML.pfx;vectorproduct" ><!ENTITY % xor.qname            "%MATHML.pfx;xor" ><!-- ignores subsequent instantiation of this module when     used as external subset rather than module fragment.     NOTE: Do not modify this parameter entity, otherwise     a recursive parsing situation may result.--><!ENTITY % mathml-qname.module "IGNORE" ><!-- end of template-qname-1.mod -->
\ No newline at end of file
diff --git a/code/lib/Bio/Entrez/DTDs/mathml2.dtd b/code/lib/Bio/Entrez/DTDs/mathml2.dtd
new file mode 100644
index 0000000..ddd60eb
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/mathml2.dtd
@@ -0,0 +1,1960 @@
+<!-- MathML 2.0 DTD  ....................................................... -->
+<!-- file: mathml2.dtd
+-->
+
+<!-- MathML 2.0 DTD
+
+     This is the Mathematical Markup Language (MathML) 2.0, an XML
+     application for describing mathematical notation and capturing
+     both its structure and content.
+
+     Copyright 1998-2000 World Wide Web Consortium
+        (Massachusetts Institute of Technology, Institut National de
+         Recherche en Informatique et en Automatique, Keio University).
+         All Rights Reserved.
+
+     Permission to use, copy, modify and distribute the XHTML 1.1 DTD and
+     its accompanying documentation for any purpose and without fee is
+     hereby granted in perpetuity, provided that the above copyright notice
+     and this paragraph appear in all copies.  The copyright holders make
+     no representation about the suitability of the DTD for any purpose.
+
+     It is provided "as is" without expressed or implied warranty.
+
+        Revision:   $Id: mathml2.dtd,v 1.1 2002/04/16 18:47:11 beck Exp $
+
+     This entity may be identified by the PUBLIC and SYSTEM identifiers:
+
+       PUBLIC "-//W3C//DTD MathML 2.0//EN"
+       SYSTEM "mathml2.dtd"
+
+     Revisions: editor and revision history at EOF
+-->
+
+
+<!-- MathML Qualified Names module ............................... -->
+<!ENTITY % mathml-qname.module "INCLUDE" >
+<![%mathml-qname.module;[
+<!ENTITY % mathml-qname.mod
+     PUBLIC "-//W3C//ENTITIES MathML 2.0 Qualified Names 1.0//EN"
+            "mathml2-qname-1.mod" >
+%mathml-qname.mod;]]>
+
+<!-- if %NS.prefixed; is INCLUDE, include all NS attributes, 
+     otherwise just those associated with MathML
+-->
+<![%NS.prefixed;[
+<!ENTITY % MATHML.NamespaceDecl.attrib 
+     "%NamespaceDecl.attrib;"
+>
+]]>
+<!ENTITY % MATHML.NamespaceDecl.attrib 
+     "%MATHML.xmlns.attrib;"
+>
+
+<!-- Attributes shared by all elements  .......................... -->
+
+<!ENTITY % MATHML.Common.attrib
+     "%MATHML.NamespaceDecl.attrib;
+      xlink:href   CDATA                    #IMPLIED
+      class        CDATA                    #IMPLIED
+      style        CDATA                    #IMPLIED
+      id           ID                       #IMPLIED
+      xref         IDREF                    #IMPLIED
+      other        CDATA                    #IMPLIED"
+>
+
+<!-- Presentation element set  ................................... -->
+
+<!-- Attribute definitions -->
+
+<!ENTITY % att-fontsize
+     "fontsize     CDATA                    #IMPLIED" >
+<!ENTITY % att-fontweight
+     "fontweight   ( normal | bold )        #IMPLIED" >
+<!ENTITY % att-fontstyle
+     "fontstyle    ( normal | italic )      #IMPLIED" >
+<!ENTITY % att-fontfamily
+     "fontfamily   CDATA                    #IMPLIED" >
+<!ENTITY % att-color
+     "color        CDATA                    #IMPLIED" >
+
+<!-- MathML2 typographically-distinguished symbol attributes -->
+
+<!ENTITY % att-mathvariant
+     "mathvariant     CDATA                    #IMPLIED" >
+<!ENTITY % att-mathsize
+     "mathsize     CDATA                    #IMPLIED" >
+<!ENTITY % att-mathcolor
+     "mathcolor     CDATA                    #IMPLIED" >
+<!ENTITY % att-mathbackground
+     "mathbackground     CDATA                    #IMPLIED" >
+
+<!ENTITY % att-fontinfo
+     "%att-fontsize;
+      %att-fontweight;
+      %att-fontstyle;
+      %att-fontfamily;
+      %att-color;
+      %att-mathvariant;
+      %att-mathsize;
+      %att-mathcolor;
+      %att-mathbackground;"
+>
+
+<!ENTITY % att-form
+     "form         ( prefix | infix | postfix )  #IMPLIED" >
+<!ENTITY % att-fence
+     "fence        ( true | false )         #IMPLIED" >
+<!ENTITY % att-separator
+     "separator    ( true | false )         #IMPLIED" >
+<!ENTITY % att-lspace
+     "lspace       CDATA                    #IMPLIED" >
+<!ENTITY % att-rspace
+     "rspace       CDATA                    #IMPLIED" >
+<!ENTITY % att-stretchy
+     "stretchy     ( true | false )         #IMPLIED" >
+<!ENTITY % att-symmetric
+     "symmetric    ( true | false )         #IMPLIED" >
+<!ENTITY % att-maxsize
+     "maxsize      CDATA                    #IMPLIED" >
+<!ENTITY % att-minsize
+     "minsize      CDATA                    #IMPLIED" >
+<!ENTITY % att-largeop
+     "largeop      ( true | false)          #IMPLIED" >
+<!ENTITY % att-movablelimits
+     "movablelimits ( true | false )        #IMPLIED" >
+<!ENTITY % att-accent
+     "accent       ( true | false )         #IMPLIED" >
+
+<!ENTITY % att-opinfo
+     "%att-form;
+      %att-fence;
+      %att-separator;
+      %att-lspace;
+      %att-rspace;
+      %att-stretchy;
+      %att-symmetric;
+      %att-maxsize;
+      %att-minsize;
+      %att-largeop;
+      %att-movablelimits;
+      %att-accent;"
+>
+<!ENTITY % att-width
+     "width        CDATA                    #IMPLIED" >
+<!ENTITY % att-height
+     "height       CDATA                    #IMPLIED" >
+<!ENTITY % att-depth
+     "depth        CDATA                    #IMPLIED" >
+<!ENTITY % att-linebreak
+     "linebreak    CDATA                    #IMPLIED" >
+<!ENTITY % att-sizeinfo
+     "%att-width;
+      %att-height;
+      %att-depth;"
+>
+<!ENTITY % att-lquote               
+     "lquote       CDATA                    #IMPLIED" >
+<!ENTITY % att-rquote               
+     "rquote       CDATA                    #IMPLIED" >
+<!ENTITY % att-linethickness        
+     "linethickness CDATA                   #IMPLIED" >
+<!ENTITY % att-scriptlevel          
+     "scriptlevel  CDATA                    #IMPLIED" >
+<!ENTITY % att-displaystyle         
+     "displaystyle ( true | false )         #IMPLIED" >
+<!ENTITY % att-scriptsizemultiplier 
+     "scriptsizemultiplier CDATA            #IMPLIED" >
+<!ENTITY % att-scriptminsize        
+     "scriptminsize CDATA                   #IMPLIED" >
+<!ENTITY % att-background           
+     "background   CDATA                    #IMPLIED" >
+<!ENTITY % att-veryverythinmathspace           
+     "veryverythinmathspace   CDATA         #IMPLIED" >
+<!ENTITY % att-verythinmathspace           
+     "verythinmathspace   CDATA             #IMPLIED" >
+<!ENTITY % att-thinmathspace           
+     "thinmathspace   CDATA                 #IMPLIED" >
+<!ENTITY % att-mediummathspace           
+     "mediummathspace   CDATA               #IMPLIED" >
+<!ENTITY % att-thickmathspace           
+     "thickmathspace   CDATA                #IMPLIED" >
+<!ENTITY % att-verythickmathspace           
+     "verythickmathspace   CDATA            #IMPLIED" >
+<!ENTITY % att-veryverythickmathspace           
+     "veryverythickmathspace   CDATA        #IMPLIED" >
+<!ENTITY % att-open                 
+     "open         CDATA                    #IMPLIED" >
+<!ENTITY % att-close                
+     "close        CDATA                    #IMPLIED" >
+<!ENTITY % att-separators          
+     "separators   CDATA                    #IMPLIED" >
+<!ENTITY % att-subscriptshift       
+     "subscriptshift CDATA                  #IMPLIED" >
+<!ENTITY % att-superscriptshift     
+     "superscriptshift CDATA                #IMPLIED" >
+<!ENTITY % att-accentunder          
+     "accentunder  ( true | false )         #IMPLIED" >
+<!ENTITY % att-align       
+     "align        CDATA                    #IMPLIED" >
+<!ENTITY % att-rowalign      
+     "rowalign     CDATA                    #IMPLIED" >
+<!ENTITY % att-columnalign     
+     "columnalign  CDATA                    #IMPLIED" >
+<!ENTITY % att-columnwidth   
+     "columnwidth  CDATA                    #IMPLIED" >
+<!ENTITY % att-groupalign      
+     "groupalign   CDATA                    #IMPLIED" >
+<!ENTITY % att-alignmentscope 
+     "alignmentscope CDATA                  #IMPLIED" >
+<!ENTITY % att-rowspacing           
+     "rowspacing   CDATA                    #IMPLIED" >
+<!ENTITY % att-columnspacing      
+     "columnspacing CDATA                   #IMPLIED" >
+<!ENTITY % att-rowlines            
+     "rowlines     CDATA                    #IMPLIED" >
+<!ENTITY % att-columnlines        
+     "columnlines  CDATA                    #IMPLIED" >
+<!ENTITY % att-frame            
+     "frame       ( none | solid | dashed ) #IMPLIED" >
+<!ENTITY % att-framespacing         
+     "framespacing CDATA                    #IMPLIED" >
+<!ENTITY % att-equalrows        
+     "equalrows    CDATA                    #IMPLIED" >
+<!ENTITY % att-equalcolumns         
+     "equalcolumns CDATA                    #IMPLIED" >
+
+<!ENTITY % att-tableinfo            
+     "%att-align;
+      %att-rowalign;
+      %att-columnalign;
+      %att-columnwidth;
+      %att-groupalign;
+      %att-alignmentscope;
+      %att-rowspacing;
+      %att-columnspacing;
+      %att-rowlines;
+      %att-columnlines;
+      %att-frame;
+      %att-framespacing;
+      %att-equalrows;
+      %att-equalcolumns;
+      %att-displaystyle;" 
+>
+
+<!ENTITY % att-rowspan              
+     "rowspan      CDATA                    #IMPLIED" >
+<!ENTITY % att-columnspan           
+     "columnspan   CDATA                    #IMPLIED" >
+<!ENTITY % att-edge        
+     "edge         ( left | right )         #IMPLIED" >
+<!ENTITY % att-actiontype          
+     "actiontype   CDATA                    #IMPLIED" >
+<!ENTITY % att-selection       
+     "selection    CDATA                    #IMPLIED" >
+
+<!ENTITY % att-name                 
+     "name         CDATA                    #IMPLIED" >
+<!ENTITY % att-alt              
+     "alt          CDATA                    #IMPLIED" >
+<!ENTITY % att-index           
+     "index        CDATA                    #IMPLIED" >
+
+<!ENTITY % att-bevelled       
+     "bevelled      CDATA                    #IMPLIED" >
+
+<!-- Presentation schemata with content -->
+
+<!ENTITY % ptoken                   
+     "%mi.qname; | %mn.qname; | %mo.qname;
+      | %mtext.qname; | %ms.qname;" >
+
+<!ATTLIST %mi.qname;
+      %MATHML.Common.attrib;
+      %att-fontinfo;
+>
+
+<!ATTLIST %mn.qname;      
+      %MATHML.Common.attrib; 
+      %att-fontinfo;
+>
+
+<!ATTLIST %mo.qname;     
+      %MATHML.Common.attrib; 
+      %att-fontinfo;
+      %att-opinfo;
+>
+
+<!ATTLIST %mtext.qname;  
+      %MATHML.Common.attrib;
+      %att-fontinfo;
+>
+
+<!ATTLIST %ms.qname;     
+      %MATHML.Common.attrib;
+      %att-fontinfo;
+      %att-lquote;
+      %att-rquote;
+>
+
+<!-- Empty presentation schemata -->
+
+<!ENTITY % petoken                  
+     "%mspace.qname;" >
+<!ELEMENT %mspace.qname;  EMPTY >
+
+<!ATTLIST %mspace.qname; 
+      %att-sizeinfo;
+      %att-linebreak;
+      %MATHML.Common.attrib;
+>
+
+<!-- Presentation: general layout schemata -->
+
+<!ENTITY % pgenschema               
+     "%mrow.qname; | %mfrac.qname; | %msqrt.qname; | %mroot.qname; 
+      | %menclose.qname; | %mstyle.qname; | %merror.qname; 
+      | %mpadded.qname; | %mphantom.qname; | %mfenced.qname;" >
+
+<!ATTLIST %mrow.qname;        
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %mfrac.qname;     
+      %MATHML.Common.attrib;
+      %att-bevelled;
+      %att-linethickness;
+>
+
+<!ATTLIST %msqrt.qname;     
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %menclose.qname;  
+      %MATHML.Common.attrib;
+      notation CDATA 'longdiv' >
+
+<!ATTLIST %mroot.qname;    
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %mstyle.qname;  
+      %MATHML.Common.attrib;
+      %att-fontinfo;
+      %att-opinfo;
+      %att-lquote;
+      %att-rquote;
+      %att-linethickness;
+      %att-scriptlevel;
+      %att-scriptsizemultiplier;
+      %att-scriptminsize;
+      %att-background;
+      %att-veryverythinmathspace;
+      %att-verythinmathspace;
+      %att-thinmathspace;
+      %att-mediummathspace;
+      %att-thickmathspace;
+      %att-verythickmathspace;
+      %att-veryverythickmathspace;
+      %att-open;
+      %att-close;
+      %att-separators;
+      %att-subscriptshift;
+      %att-superscriptshift;
+      %att-accentunder;
+      %att-tableinfo;
+      %att-rowspan;
+      %att-columnspan;
+      %att-edge;
+      %att-actiontype;
+      %att-selection;
+>
+
+<!ATTLIST %merror.qname;   
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %mpadded.qname;     
+      %MATHML.Common.attrib;
+      %att-sizeinfo;
+      %att-lspace;
+>
+
+<!ATTLIST %mphantom.qname;      
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %mfenced.qname;     
+      %MATHML.Common.attrib;
+      %att-open;
+      %att-close;
+      %att-separators;
+>
+
+<!-- Presentation layout schemata: scripts and limits -->
+
+<!ENTITY % pscrschema               
+     "%msub.qname; | %msup.qname; | %msubsup.qname; | %munder.qname; 
+      | %mover.qname; | %munderover.qname; | %mmultiscripts.qname;" >
+
+<!ATTLIST %msub.qname;      
+      %MATHML.Common.attrib;
+      %att-subscriptshift;
+>
+
+<!ATTLIST %msup.qname;         
+      %MATHML.Common.attrib;
+      %att-superscriptshift;
+>
+
+<!ATTLIST %msubsup.qname;    
+      %MATHML.Common.attrib;
+      %att-subscriptshift;
+      %att-superscriptshift;
+>
+
+<!ATTLIST %munder.qname;   
+      %MATHML.Common.attrib;
+      %att-accentunder;
+>
+
+<!ATTLIST %mover.qname;   
+      %MATHML.Common.attrib;
+      %att-accent;
+>
+
+<!ATTLIST %munderover.qname;   
+      %MATHML.Common.attrib;
+      %att-accent;
+      %att-accentunder;
+>
+
+<!ATTLIST %mmultiscripts.qname;   
+      %MATHML.Common.attrib;
+      %att-subscriptshift;
+      %att-superscriptshift;
+>
+
+<!-- Presentation layout schemata: empty elements for scripts -->
+
+<!ENTITY % pscreschema              
+     "%mprescripts.qname; | %none.qname;" >
+
+<!ELEMENT %mprescripts.qname;  EMPTY >
+<!ATTLIST %mprescripts.qname;   
+      %MATHML.xmlns.attrib; >
+
+<!ELEMENT %none.qname;  EMPTY >
+<!ATTLIST %none.qname;    
+      %MATHML.xmlns.attrib; >
+
+<!-- Presentation layout schemata: tables -->
+
+<!ENTITY % ptabschema               
+     "%mtable.qname; | %mtr.qname; | %mlabeledtr.qname; | %mtd.qname;" >
+
+<!ATTLIST %mtable.qname;
+      %MATHML.Common.attrib;
+      %att-tableinfo;
+>
+
+<!ATTLIST %mtr.qname;    
+      %MATHML.Common.attrib;
+      %att-rowalign;
+      %att-columnalign;
+      %att-groupalign;
+>
+
+<!ATTLIST %mlabeledtr.qname;  
+      %MATHML.Common.attrib;
+      %att-rowalign;
+      %att-columnalign;
+      %att-groupalign;
+>
+
+<!ATTLIST %mtd.qname;   
+      %MATHML.Common.attrib;
+      %att-rowalign;
+      %att-columnalign;
+      %att-groupalign;
+      %att-rowspan;
+      %att-columnspan;
+>
+
+<!ENTITY % plschema                 
+     "%pgenschema; | %pscrschema; | %ptabschema;" >
+
+<!-- Empty presentation layout schemata -->
+
+<!ENTITY % peschema                 
+     "%maligngroup.qname; | %malignmark.qname;" >
+
+<!ELEMENT %malignmark.qname;  EMPTY >
+
+<!ATTLIST %malignmark.qname;  
+      %att-edge; >
+
+<!ELEMENT %maligngroup.qname;  EMPTY >
+<!ATTLIST %maligngroup.qname;  
+      %MATHML.Common.attrib;
+      %att-groupalign;
+>
+
+
+<!ELEMENT %mglyph.qname;  EMPTY >
+<!ATTLIST %mglyph.qname;    
+      %att-alt;
+      %att-fontfamily;
+      %att-index; >
+
+<!-- Presentation action schemata -->
+
+<!ENTITY % pactions                 
+     "%maction.qname;" >
+<!ATTLIST %maction.qname;    
+      %MATHML.Common.attrib;
+      %att-actiontype;
+      %att-selection;
+>
+
+<!-- The following entity for substitution into
+     content constructs excludes elements that
+     are not valid as expressions.
+-->
+
+<!ENTITY % PresInCont               
+     "%ptoken; | %petoken; |
+      %plschema; | %peschema; | %pactions;" >
+
+<!-- Presentation entity: all presentation constructs -->
+
+<!ENTITY % Presentation             
+     "%ptoken; | %petoken; | %pscreschema; |
+      %plschema; | %peschema; | %pactions;">
+
+<!-- Content element set  ........................................ -->
+
+<!-- Attribute definitions -->
+
+<!ENTITY % att-base                 
+     "base         CDATA                    '10'" >
+<!ENTITY % att-closure              
+     "closure      CDATA                    'closed'" >
+<!ENTITY % att-definition           
+     "definitionURL CDATA                   ''" >
+<!ENTITY % att-encoding             
+     "encoding     CDATA                    ''" >
+<!ENTITY % att-nargs             
+     "nargs        CDATA                    '1'" >
+<!ENTITY % att-occurrence           
+     "occurrence   CDATA                    'function-model'" >
+<!ENTITY % att-order   
+     "order        CDATA                    'numeric'" >
+<!ENTITY % att-scope                
+     "scope        CDATA                    'local'" >
+<!ENTITY % att-type                 
+     "type         CDATA                    #IMPLIED" >
+
+<!-- Content elements: leaf nodes -->
+
+<!ENTITY % ctoken               
+     "%csymbol.qname; | %ci.qname; | %cn.qname;" >
+
+<!ATTLIST %ci.qname;     
+      %MATHML.Common.attrib;
+      %att-type;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ATTLIST %csymbol.qname;   
+      %MATHML.Common.attrib;
+      %att-encoding;
+      %att-type;
+      %att-definition;
+>
+
+<!ATTLIST %cn.qname;    
+      %MATHML.Common.attrib;
+      %att-type;
+      %att-base;
+      %att-definition;
+      %att-encoding;
+>
+
+<!-- Content elements: specials -->
+
+<!ENTITY % cspecial                 
+     "%apply.qname; | %reln.qname; |
+      %lambda.qname;" >
+
+<!ATTLIST %apply.qname;   
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %reln.qname;   
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %lambda.qname;      
+      %MATHML.Common.attrib;
+>
+
+<!-- Content elements: others -->
+
+<!ENTITY % cother                   
+     "%condition.qname; | %declare.qname; | %sep.qname;" >
+
+<!ATTLIST %condition.qname;     
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %declare.qname;    
+      %MATHML.Common.attrib;
+      %att-type;
+      %att-scope;
+      %att-nargs;
+      %att-occurrence;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %sep.qname;  EMPTY >
+<!ATTLIST %sep.qname;         
+      %MATHML.xmlns.attrib; >
+
+<!-- Content elements: semantic mapping -->
+
+<!ENTITY % csemantics               
+     "%semantics.qname; | %annotation.qname; |
+      %annotation-xml.qname;" >
+
+<!ATTLIST %semantics.qname;  
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ATTLIST %annotation.qname;  
+      %MATHML.Common.attrib;
+      %att-encoding;
+>
+
+<!ATTLIST %annotation-xml.qname; 
+      %MATHML.Common.attrib;
+      %att-encoding;
+>
+
+<!-- Content elements: constructors -->
+
+<!ENTITY % cconstructor             
+     "%interval.qname; | %list.qname; | %matrix.qname; 
+      | %matrixrow.qname; | %set.qname; | %vector.qname;
+      | %piecewise.qname; " >
+
+<!ATTLIST %interval.qname;   
+      %MATHML.Common.attrib;
+      %att-closure;
+>
+
+<!ATTLIST %set.qname;        
+      %MATHML.Common.attrib;
+      %att-type;
+>
+
+<!ATTLIST %list.qname;          
+      %MATHML.Common.attrib;
+      %att-order;
+>
+
+<!ATTLIST %vector.qname;    
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %matrix.qname;    
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %matrixrow.qname;     
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %piecewise.qname;   
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %piece.qname;   
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %otherwise.qname;   
+      %MATHML.Common.attrib;
+>
+
+
+<!-- Content elements: symbols -->
+
+<!ENTITY % c0ary              
+    "%integers.qname; |
+     %reals.qname; |
+     %rationals.qname; |
+     %naturalnumbers.qname; |
+     %complexes.qname; |
+     %primes.qname; |
+     %exponentiale.qname; |
+     %imaginaryi.qname; |
+     %notanumber.qname; |
+     %true.qname; |
+     %false.qname; |
+     %emptyset.qname; |
+     %pi.qname; |
+     %eulergamma.qname; |
+     %infinity.qname;" >
+
+<!ELEMENT %integers.qname;  EMPTY >
+<!ATTLIST %integers.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %reals.qname;  EMPTY >
+<!ATTLIST %reals.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %rationals.qname;  EMPTY >
+<!ATTLIST %rationals.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %naturalnumbers.qname;  EMPTY >
+<!ATTLIST %naturalnumbers.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %complexes.qname;  EMPTY >
+<!ATTLIST %complexes.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %primes.qname;  EMPTY >
+<!ATTLIST %primes.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %exponentiale.qname;  EMPTY >
+<!ATTLIST %exponentiale.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %imaginaryi.qname;  EMPTY >
+<!ATTLIST %imaginaryi.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %notanumber.qname;  EMPTY >
+<!ATTLIST %notanumber.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %true.qname;  EMPTY >
+<!ATTLIST %true.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %false.qname;  EMPTY >
+<!ATTLIST %false.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %emptyset.qname;  EMPTY >
+<!ATTLIST %emptyset.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %pi.qname;  EMPTY >
+<!ATTLIST %pi.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %eulergamma.qname;  EMPTY >
+<!ATTLIST %eulergamma.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %infinity.qname;  EMPTY >
+<!ATTLIST %infinity.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!-- Content elements: operators -->
+
+<!ENTITY % cfuncop1ary              
+     "%inverse.qname; | %ident.qname;|
+      %domain.qname; |  %codomain.qname; | 
+      %image.qname;  " >
+
+<!ELEMENT %inverse.qname;  EMPTY >
+<!ATTLIST %inverse.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %domain.qname;  EMPTY >
+<!ATTLIST %domain.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %codomain.qname;  EMPTY >
+<!ATTLIST %codomain.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %image.qname;  EMPTY >
+<!ATTLIST %image.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+
+
+<!ENTITY % cfuncopnary              
+     "%fn.qname; | %compose.qname;" >
+
+<!ATTLIST %fn.qname;     
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %ident.qname;  EMPTY >
+<!ATTLIST %ident.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %compose.qname;  EMPTY >
+<!ATTLIST %compose.qname;  
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % carithop1ary             
+     "%abs.qname; | %conjugate.qname; | %exp.qname; | %factorial.qname; |
+      %arg.qname; | %real.qname; | %imaginary.qname; |
+      %floor.qname; | %ceiling.qname;" >
+
+<!ELEMENT %exp.qname;  EMPTY >
+<!ATTLIST %exp.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %abs.qname;  EMPTY >
+<!ATTLIST %abs.qname;        
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %arg.qname;  EMPTY >
+<!ATTLIST %arg.qname;     
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %real.qname;  EMPTY >
+<!ATTLIST %real.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %imaginary.qname;  EMPTY >
+<!ATTLIST %imaginary.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %conjugate.qname;  EMPTY >
+<!ATTLIST %conjugate.qname;  
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %factorial.qname;  EMPTY >
+<!ATTLIST %factorial.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+
+<!ELEMENT %floor.qname;  EMPTY >
+<!ATTLIST %floor.qname;     
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %ceiling.qname;  EMPTY >
+<!ATTLIST %ceiling.qname;     
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+<!ENTITY % carithop1or2ary          
+     "%minus.qname;" >
+
+<!ELEMENT %minus.qname;  EMPTY >
+<!ATTLIST %minus.qname;     
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % carithop2ary             
+     "%quotient.qname; | %divide.qname; | %power.qname; | %rem.qname;" >
+
+<!ELEMENT %quotient.qname;  EMPTY >
+<!ATTLIST %quotient.qname;       
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %divide.qname;  EMPTY >
+<!ATTLIST %divide.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %power.qname;  EMPTY >
+<!ATTLIST %power.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %rem.qname;  EMPTY >
+<!ATTLIST %rem.qname;       
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % carithopnary             
+     "%plus.qname; | %times.qname; | %max.qname; 
+      | %min.qname; | %gcd.qname; | %lcm.qname;" >
+
+<!ELEMENT %plus.qname;  EMPTY >
+<!ATTLIST %plus.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %max.qname;  EMPTY >
+<!ATTLIST %max.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %min.qname;  EMPTY >
+<!ATTLIST %min.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %times.qname;  EMPTY >
+<!ATTLIST %times.qname;      
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %gcd.qname;  EMPTY >
+<!ATTLIST %gcd.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %lcm.qname;  EMPTY >
+<!ATTLIST %lcm.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % carithoproot             
+     "%root.qname;" >
+
+<!ELEMENT %root.qname;  EMPTY >
+<!ATTLIST %root.qname;     
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % clogicopquant            
+     "%exists.qname; | %forall.qname;" >
+
+<!ELEMENT %exists.qname;  EMPTY >
+<!ATTLIST %exists.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %forall.qname;  EMPTY >
+<!ATTLIST %forall.qname;     
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % clogicopnary             
+     "%and.qname; | %or.qname; | %xor.qname;" >
+
+<!ELEMENT %and.qname;  EMPTY >
+<!ATTLIST %and.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %or.qname;  EMPTY >
+<!ATTLIST %or.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %xor.qname;  EMPTY >
+<!ATTLIST %xor.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % clogicop1ary             
+     "%not.qname;" >
+
+<!ELEMENT %not.qname;  EMPTY >
+<!ATTLIST %not.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % clogicop2ary             
+     "%implies.qname;" >
+
+<!ELEMENT %implies.qname;  EMPTY >
+<!ATTLIST %implies.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % ccalcop                  
+     "%log.qname; | %int.qname; | %diff.qname; | %partialdiff.qname; |
+      %divergence.qname; | %grad.qname; | %curl.qname; | %laplacian.qname;" >
+
+<!ELEMENT %divergence.qname;  EMPTY >
+<!ATTLIST %divergence.qname;     
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %grad.qname;  EMPTY >
+<!ATTLIST %grad.qname;  
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %curl.qname;  EMPTY >
+<!ATTLIST %curl.qname;     
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %laplacian.qname;  EMPTY >
+<!ATTLIST %laplacian.qname;     
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %log.qname;  EMPTY >
+<!ATTLIST %log.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %int.qname;  EMPTY >
+<!ATTLIST %int.qname;    
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %diff.qname;  EMPTY >
+<!ATTLIST %diff.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %partialdiff.qname;  EMPTY >
+<!ATTLIST %partialdiff.qname;  
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % ccalcop1ary              
+     "%ln.qname;" >
+
+<!ELEMENT %ln.qname;  EMPTY >
+<!ATTLIST %ln.qname;   
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % csetop1ary               
+     "%card.qname;" >
+
+<!ELEMENT %card.qname;  EMPTY >
+<!ATTLIST %card.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % csetop2ary               
+     "%setdiff.qname;" >
+
+<!ELEMENT %setdiff.qname;  EMPTY >
+<!ATTLIST %setdiff.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % csetopnary               
+     "%union.qname; | %intersect.qname; | %cartesianproduct.qname; " >
+
+<!ELEMENT %union.qname;  EMPTY >
+<!ATTLIST %union.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %intersect.qname;  EMPTY >
+<!ATTLIST %intersect.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %cartesianproduct.qname;  EMPTY >
+<!ATTLIST %cartesianproduct.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % cseqop                   
+     "%sum.qname; | %product.qname; | %limit.qname;" >
+
+<!ELEMENT %sum.qname;  EMPTY >
+<!ATTLIST %sum.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %product.qname;  EMPTY >
+<!ATTLIST %product.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %limit.qname;  EMPTY >
+<!ATTLIST %limit.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % ctrigop                  
+     "%sin.qname; | %cos.qname; | %tan.qname; 
+      | %sec.qname; | %csc.qname; | %cot.qname; 
+      | %sinh.qname; | %cosh.qname; | %tanh.qname; 
+      | %sech.qname; | %csch.qname; | %coth.qname; 
+      | %arcsin.qname; | %arccos.qname; | %arctan.qname;
+      | %arccosh.qname; | %arccot.qname; | %arccoth.qname;
+      | %arccsc.qname; | %arccsch.qname; | %arcsec.qname;
+      | %arcsech.qname; | %arcsinh.qname; | %arctanh.qname;
+      " >
+
+<!ELEMENT %sin.qname;  EMPTY >
+<!ATTLIST %sin.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %cos.qname;  EMPTY >
+<!ATTLIST %cos.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %tan.qname;  EMPTY >
+<!ATTLIST %tan.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %sec.qname;  EMPTY >
+<!ATTLIST %sec.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %csc.qname;  EMPTY >
+<!ATTLIST %csc.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %cot.qname;  EMPTY >
+<!ATTLIST %cot.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %sinh.qname;  EMPTY >
+<!ATTLIST %sinh.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %cosh.qname;  EMPTY >
+<!ATTLIST %cosh.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %tanh.qname;  EMPTY >
+<!ATTLIST %tanh.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %sech.qname;  EMPTY >
+<!ATTLIST %sech.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %csch.qname;  EMPTY >
+<!ATTLIST %csch.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %coth.qname;  EMPTY >
+<!ATTLIST %coth.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %arcsin.qname;  EMPTY >
+<!ATTLIST %arcsin.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %arccos.qname;  EMPTY >
+<!ATTLIST %arccos.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %arctan.qname;  EMPTY >
+<!ATTLIST %arctan.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %arccosh.qname;  EMPTY >
+<!ATTLIST %arccosh.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+
+<!ELEMENT %arccot.qname;  EMPTY >
+<!ATTLIST %arccot.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %arccoth.qname;  EMPTY >
+<!ATTLIST %arccoth.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+
+<!ELEMENT %arccsc.qname;  EMPTY >
+<!ATTLIST %arccsc.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %arccsch.qname;  EMPTY >
+<!ATTLIST %arccsch.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %arcsec.qname;  EMPTY >
+<!ATTLIST %arcsec.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %arcsech.qname;  EMPTY >
+<!ATTLIST %arcsech.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %arcsinh.qname;  EMPTY >
+<!ATTLIST %arcsinh.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %arctanh.qname;  EMPTY >
+<!ATTLIST %arctanh.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+
+
+<!ENTITY % cstatopnary              
+     "%mean.qname; | %sdev.qname; |
+      %variance.qname; | %median.qname; |
+      %mode.qname;" >
+
+<!ELEMENT %mean.qname;  EMPTY >
+<!ATTLIST %mean.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %sdev.qname;  EMPTY >
+<!ATTLIST %sdev.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %variance.qname;  EMPTY >
+<!ATTLIST %variance.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %median.qname;  EMPTY >
+<!ATTLIST %median.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %mode.qname;  EMPTY >
+<!ATTLIST %mode.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % cstatopmoment            
+     "%moment.qname;" >
+
+<!ELEMENT %moment.qname;  EMPTY >
+<!ATTLIST %moment.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % clalgop1ary              
+     "%determinant.qname; |
+      %transpose.qname;" >
+
+<!ELEMENT %determinant.qname;  EMPTY >
+<!ATTLIST %determinant.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %transpose.qname;  EMPTY >
+<!ATTLIST %transpose.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % clalgop2ary              
+     "%vectorproduct.qname; 
+      | %scalarproduct.qname; 
+      | %outerproduct.qname;" >
+
+<!ELEMENT %vectorproduct.qname;  EMPTY >
+<!ATTLIST %vectorproduct.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %scalarproduct.qname;  EMPTY >
+<!ATTLIST %scalarproduct.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %outerproduct.qname;  EMPTY >
+<!ATTLIST %outerproduct.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % clalgopnary              
+     "%selector.qname;" >
+
+<!ELEMENT %selector.qname;  EMPTY >
+<!ATTLIST %selector.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!-- Content elements: relations -->
+
+<!ENTITY % cgenrel2ary             
+     "%neq.qname; | %factorof.qname;" >
+
+<!ELEMENT %neq.qname;  EMPTY >
+<!ATTLIST %neq.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %factorof.qname;  EMPTY >
+<!ATTLIST %factorof.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % cgenrelnary              
+     "%eq.qname; | %leq.qname; | %lt.qname; | %geq.qname; 
+      | %gt.qname;| %equivalent.qname; | %approx.qname;" >
+
+<!ELEMENT %eq.qname;  EMPTY >
+<!ATTLIST %eq.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %equivalent.qname;  EMPTY >
+<!ATTLIST %equivalent.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %approx.qname;  EMPTY >
+<!ATTLIST %approx.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %gt.qname;  EMPTY >
+<!ATTLIST %gt.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %lt.qname;  EMPTY >
+<!ATTLIST %lt.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %geq.qname;  EMPTY >
+<!ATTLIST %geq.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %leq.qname;  EMPTY >
+<!ATTLIST %leq.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % csetrel2ary              
+     "%in.qname; | %notin.qname; | %notsubset.qname; | %notprsubset.qname;" >
+
+<!ELEMENT %in.qname;  EMPTY >
+<!ATTLIST %in.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %notin.qname;  EMPTY >
+<!ATTLIST %notin.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %notsubset.qname;  EMPTY >
+<!ATTLIST %notsubset.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %notprsubset.qname;  EMPTY >
+<!ATTLIST %notprsubset.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % csetrelnary       
+     "%subset.qname; | %prsubset.qname;" >
+
+<!ELEMENT %subset.qname;  EMPTY >
+<!ATTLIST %subset.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ELEMENT %prsubset.qname;  EMPTY >
+<!ATTLIST %prsubset.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+>
+
+<!ENTITY % cseqrel2ary              
+     "%tendsto.qname;" >
+
+<!ELEMENT %tendsto.qname;  EMPTY >
+<!ATTLIST %tendsto.qname;
+      %MATHML.Common.attrib;
+      %att-definition;
+      %att-encoding;
+      %att-type;
+>
+
+<!-- Content elements: quantifiers -->
+
+<!ENTITY % cquantifier            
+     "%lowlimit.qname; | %uplimit.qname; | %bvar.qname; 
+      | %degree.qname; | %logbase.qname;
+      | %momentabout.qname; | %domainofapplication.qname; " >
+
+<!ATTLIST %lowlimit.qname;
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %uplimit.qname;
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %bvar.qname;
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %degree.qname;
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %logbase.qname;
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %momentabout.qname;
+      %MATHML.Common.attrib;
+>
+
+<!ATTLIST %domainofapplication.qname;
+      %MATHML.Common.attrib;
+>
+
+<!-- Operator groups -->
+
+<!ENTITY % cop1ary                  
+     "%cfuncop1ary; | %carithop1ary; | %clogicop1ary; |
+      %ccalcop1ary; | %ctrigop; | %clalgop1ary; |
+      %csetop1ary;" >
+
+<!ENTITY % cop2ary                  
+     "%carithop2ary; | %clogicop2ary;| %clalgop2ary; | %csetop2ary;" >
+
+<!ENTITY % copnary                  
+     "%cfuncopnary; | %carithopnary; | %clogicopnary; |
+      %csetopnary; | %cstatopnary; | %clalgopnary;" >
+
+<!ENTITY % copmisc                  
+     "%carithoproot; | %carithop1or2ary; | %ccalcop; |
+      %cseqop; | %cstatopmoment; | %clogicopquant;" >
+
+<!-- Relation groups -->
+
+<!ENTITY % crel2ary                 
+     "%cgenrel2ary; | %csetrel2ary; | %cseqrel2ary;" >
+
+<!ENTITY % crelnary                 
+     "%cgenrelnary; | %csetrelnary;" >
+
+<!-- Content constructs: all -->
+
+<!ENTITY % Content                  
+     "%ctoken; | %cspecial; | %cother; | %csemantics; | %c0ary;
+      | %cconstructor; | %cquantifier; | %cop1ary; | %cop2ary; 
+      | %copnary; |%copmisc; | %crel2ary; | %crelnary;" >
+
+<!-- Content constructs for substitution in presentation structures -->
+
+<!ENTITY % ContInPres               
+     "%ci.qname; |%csymbol.qname;| %cn.qname; | %c0ary; |
+      %apply.qname; | %fn.qname; |
+      %lambda.qname; | %reln.qname; |
+      %cconstructor; |
+      %semantics.qname; |%declare.qname;" >
+
+<!-- ............................................................. -->
+<!-- Recursive definition for content of expressions. Include
+     presentation constructs at lowest level so presentation
+     layout schemata hold presentation or content elements.
+     Include content constructs at lowest level so content
+     elements hold PCDATA or presentation elements at leaf
+     level (for permitted substitutable elements in context)
+-->
+
+<!ENTITY % ContentExpression        
+     "(%Content; | %PresInCont;)*" >
+<!ENTITY % PresExpression      
+     "(%Presentation; | %ContInPres;)*" >
+<!ENTITY % MathExpression           
+     "(%PresInCont; | %ContInPres;)*" >
+
+<!-- PCDATA or MathML character elements -->
+<!ENTITY % MathMLCharacters         
+     "#PCDATA | %mglyph.qname; " >
+
+<!-- Content elements: tokens                       -->
+<!-- (may contain embedded presentation constructs) -->
+
+<!ELEMENT %ci.qname;                 (%MathMLCharacters; | %PresInCont;)* >
+<!ELEMENT %csymbol.qname;            (%MathMLCharacters; | %PresInCont;)* >
+<!ELEMENT %cn.qname;                 (%MathMLCharacters; | %sep.qname; | %PresInCont;)* >
+
+<!-- Content elements: special -->
+
+<!ELEMENT %apply.qname;              (%ContentExpression;) >
+<!ELEMENT %reln.qname;               (%ContentExpression;) >
+<!ELEMENT %lambda.qname;             (%ContentExpression;) >
+
+<!-- Content elements: other -->
+
+<!ELEMENT %condition.qname;          (%ContentExpression;) >
+<!ELEMENT %declare.qname;            (%ContentExpression;) >
+
+<!-- Content elements: semantics -->
+
+<!ELEMENT %semantics.qname;          (%ContentExpression;) >
+<!ENTITY % Annotation.content  "( #PCDATA )" >
+<!ELEMENT %annotation.qname;         %Annotation.content; >
+
+<!ENTITY % Annotation-xml.content "ANY" >
+<!ELEMENT %annotation-xml.qname;     %Annotation-xml.content; >
+
+<!-- Content elements: constructors -->
+
+<!ELEMENT %interval.qname;           (%ContentExpression;) >
+<!ELEMENT %set.qname;                (%ContentExpression;) >
+<!ELEMENT %list.qname;               (%ContentExpression;) >
+<!ELEMENT %vector.qname;             (%ContentExpression;) >
+<!ELEMENT %matrix.qname;             (%ContentExpression;) >
+<!ELEMENT %matrixrow.qname;          (%ContentExpression;) >
+
+<!ELEMENT %piecewise.qname;          ((%piece.qname;)*, (%otherwise.qname;)? ) >
+<!ELEMENT %piece.qname;              (%ContentExpression;) >
+<!ELEMENT %otherwise.qname;          (%ContentExpression;) >
+
+<!-- Content elements: operator (user-defined) -->
+
+<!ELEMENT %fn.qname;                 (%ContentExpression;) >
+
+<!-- Content elements: quantifiers -->
+
+<!ELEMENT %lowlimit.qname;           (%ContentExpression;) >
+<!ELEMENT %uplimit.qname;            (%ContentExpression;) >
+<!ELEMENT %bvar.qname;               (%ContentExpression;) >
+<!ELEMENT %degree.qname;             (%ContentExpression;) >
+<!ELEMENT %logbase.qname;            (%ContentExpression;) >
+<!ELEMENT %momentabout.qname;        (%ContentExpression;) >
+<!ELEMENT %domainofapplication.qname; (%ContentExpression;) >
+
+<!-- ............................................................. -->
+<!-- Presentation layout schemata contain tokens,
+     layout and content schemata.
+-->
+
+<!ELEMENT %mstyle.qname;             (%PresExpression;) >
+<!ELEMENT %merror.qname;             (%PresExpression;) >
+<!ELEMENT %mphantom.qname;           (%PresExpression;) >
+<!ELEMENT %mrow.qname;               (%PresExpression;) >
+<!ELEMENT %mfrac.qname;              (%PresExpression;) >
+<!ELEMENT %msqrt.qname;              (%PresExpression;) >
+<!ELEMENT %menclose.qname;           (%PresExpression;) >
+<!ELEMENT %mroot.qname;              (%PresExpression;) >
+<!ELEMENT %msub.qname;               (%PresExpression;) >
+<!ELEMENT %msup.qname;               (%PresExpression;) >
+<!ELEMENT %msubsup.qname;            (%PresExpression;) >
+<!ELEMENT %mmultiscripts.qname;      (%PresExpression;) >
+<!ELEMENT %munder.qname;             (%PresExpression;) >
+<!ELEMENT %mover.qname;              (%PresExpression;) >
+<!ELEMENT %munderover.qname;         (%PresExpression;) >
+<!ELEMENT %mtable.qname;             (%PresExpression;) >
+<!ELEMENT %mtr.qname;                (%PresExpression;) >
+<!ELEMENT %mlabeledtr.qname;         (%PresExpression;) >
+<!ELEMENT %mtd.qname;                (%PresExpression;) >
+<!ELEMENT %maction.qname;            (%PresExpression;) >
+<!ELEMENT %mfenced.qname;            (%PresExpression;) >
+<!ELEMENT %mpadded.qname;            (%PresExpression;) >
+
+<!-- Presentation elements contain PCDATA or malignmark constructs. -->
+
+<!ELEMENT %mi.qname;                 (%MathMLCharacters; |
+      %malignmark.qname;)* >
+<!ELEMENT %mn.qname;                 (%MathMLCharacters; |
+      %malignmark.qname;)* >
+<!ELEMENT %mo.qname;                 (%MathMLCharacters; |
+      %malignmark.qname;)* >
+<!ELEMENT %mtext.qname;              (%MathMLCharacters; |
+      %malignmark.qname;)* >
+<!ELEMENT %ms.qname;                 (%MathMLCharacters; |
+      %malignmark.qname;)* >
+
+<!-- Browser interface definition  ............................... -->
+
+<!-- Attributes for top-level element "math" -->
+
+<!ENTITY % att-macros               
+     "macros       CDATA                    #IMPLIED" >
+<!ENTITY % att-mode                 
+     "mode         CDATA                    #IMPLIED" >
+<!ENTITY % att-display                
+     "display      CDATA                    #IMPLIED" >
+
+<!ENTITY % att-topinfo          
+     "%MATHML.Common.attrib;
+      %att-macros;
+      %att-mode;
+      %att-display;" >
+
+<!-- Attributes for browser interface element -->
+
+<!ENTITY % att-baseline             
+     "baseline     CDATA                    #IMPLIED" >
+<!ENTITY % att-overflow            
+     "overflow  ( scroll | elide | truncate | scale ) 'scroll'" >
+<!ENTITY % att-altimg               
+     "altimg       CDATA                    #IMPLIED" >
+<!ENTITY % att-alttext           
+     "alttext      CDATA                    #IMPLIED" >
+
+<!ENTITY % att-browif           
+     "%att-type;
+      %att-name;
+      %att-height;
+      %att-width;
+      %att-baseline;
+      %att-overflow;
+      %att-altimg;
+      %att-alttext;" >
+
+<!-- ............................................................. -->
+<!-- The top-level element "math" contains MathML encoded
+     mathematics. The "math" element has the browser info
+     attributes iff it is also the browser interface element.
+-->
+
+<!ELEMENT %math.qname;               (%MathExpression;) >
+
+<!ATTLIST %math.qname;
+      %att-topinfo;
+      %att-browif; >
+
+<!-- MathML Character Entities .............................................. -->
+<!ENTITY % mathml-charent.module "INCLUDE" >
+<![%mathml-charent.module;[
+
+<!-- New characters defined by MathML ............................ -->
+
+<!ENTITY % ent-mmlextra
+      PUBLIC "-//W3C//ENTITIES Extra for MathML 2.0//EN"
+             "mmlextra.ent" >
+%ent-mmlextra;
+
+<!-- MathML aliases for characters defined above ................. -->
+
+<!ENTITY % ent-mmlalias
+      PUBLIC "-//W3C//ENTITIES Aiases for MathML 2.0//EN"
+             "mmlalias.ent" >
+%ent-mmlalias;
+
+<!-- end of MathML Character Entity section -->]]>
+
+<!-- Revision History:
+
+       Initial draft (syntax = XML) 1997-05-09
+          Stephen Buswell
+       Revised 1997-05-14
+          Robert Miner
+       Revised 1997-06-29 and 1997-07-02
+          Stephen Buswell
+       Revised 1997-12-15
+          Stephen Buswell
+       Revised 1998-02-08
+          Stephen Buswell
+       Revised 1998-04-04
+          Stephen Buswell
+       Entities and small revisions 1999-02-21
+          David Carlisle
+       Added attribute definitionURL to ci and cn 1999-10-11
+          Nico Poppelier
+       Additions for MathML 2  1999-12-16
+          David Carlisle
+       Namespace support 2000-01-14
+          David Carlisle
+       XHTML Compatibility 2000-02-23
+          Murray Altheim
+       New content elements 2000-03-26
+          David Carlisle
+       Further revisions for MathML2 CR draft 2000-07-11
+          David Carlisle
+       Further revisions for MathML2 CR draft 2000-10-31
+          David Carlisle
+
+-->
+
+<!-- end of MathML 2.0 DTD  ................................................ -->
+<!-- ....................................................................... -->
+
+
diff --git a/code/lib/Bio/Entrez/DTDs/mathml3-qname1.mod b/code/lib/Bio/Entrez/DTDs/mathml3-qname1.mod
new file mode 100644
index 0000000..254bdb2
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/mathml3-qname1.mod
@@ -0,0 +1,294 @@
+<!-- ....................................................................... -->
+<!-- MathML Qualified Names Module  ........................................ -->
+<!-- file: mathml3-qname-1.mod
+
+     This is the Mathematical Markup Language (MathML) 2.0, an XML 
+     application for describing mathematical notation and capturing 
+     both its structure and content.
+
+     Copyright 1998-2010 W3C (MIT, INRIA, Keio), All Rights Reserved.
+
+     This DTD module is identified by the PUBLIC and SYSTEM identifiers:
+
+       PUBLIC "-//W3C//ENTITIES MathML 3.0 Qualified Names 1.0//EN"
+       SYSTEM "mathml3-qname.mod"
+
+     Revisions:
+     (none)
+     ....................................................................... -->
+
+<!-- MathML Qualified Names
+
+     This module is contained in two parts, labeled Section 'A' and 'B':
+
+       Section A declares parameter entities to support namespace-
+       qualified names, namespace declarations, and name prefixing 
+       for MathML.
+    
+       Section B declares parameter entities used to provide
+       namespace-qualified names for all MathML element types.
+
+     This module is derived from the XHTML Qualified Names Template module.
+-->
+
+<!-- Section A: XHTML XML Namespace Framework :::::::::::::::::::: -->
+
+<!ENTITY % NS.prefixed     "IGNORE" >
+<!ENTITY % MATHML.prefixed "%NS.prefixed;" >
+
+<!-- XLink ............... -->
+
+<!ENTITY % XLINK.prefix         "xlink" >		
+<!ENTITY % XLINK.xmlns "http://www.w3.org/1999/xlink" >
+<!ENTITY % XLINK.xmlns.attrib
+     "xmlns:%XLINK.prefix;  CDATA           #FIXED '%XLINK.xmlns;'"
+>
+
+<!-- W3C XML Schema ............... -->
+
+<!ENTITY % Schema.prefix         "xsi" >		
+<!ENTITY % Schema.xmlns "http://www.w3.org/2001/XMLSchema-instance" >
+<!ENTITY % Schema.xmlns.attrib
+     "xmlns:%Schema.prefix;  CDATA           #IMPLIED"
+>
+
+<!-- MathML .............. -->
+
+<!ENTITY % MATHML.xmlns    "http://www.w3.org/1998/Math/MathML" >
+<!ENTITY % MATHML.prefix   "m" >
+<![%MATHML.prefixed;[
+<!ENTITY % MATHML.xmlns.extra.attrib  "" >
+]]>
+<!ENTITY % MATHML.xmlns.extra.attrib 
+     "%XLINK.xmlns.attrib; 
+      %Schema.xmlns.attrib;" >
+
+<![%MATHML.prefixed;[
+<!ENTITY % MATHML.pfx  "%MATHML.prefix;:" >
+<!ENTITY % MATHML.xmlns.attrib
+     "xmlns:%MATHML.prefix;  CDATA   #FIXED '%MATHML.xmlns;'
+      %MATHML.xmlns.extra.attrib;"
+>
+]]>
+<!ENTITY % MATHML.pfx  "" >
+<!ENTITY % MATHML.xmlns.attrib
+     "xmlns        CDATA           #FIXED '%MATHML.xmlns;'
+      %MATHML.xmlns.extra.attrib;"
+>
+
+<![%NS.prefixed;[
+<!ENTITY % XHTML.xmlns.extra.attrib 
+     "%MATHML.xmlns.attrib;" >
+]]>
+<!ENTITY % XHTML.xmlns.extra.attrib
+     "%XLINK.xmlns.attrib;
+      %Schema.xmlns.attrib;"
+>
+
+
+<!-- ignores subsequent instantiation of this module when
+     used as external subset rather than module fragment.
+     NOTE: Do not modify this parameter entity, otherwise
+     a recursive parsing situation may result.
+-->
+<!ENTITY % mathml-qname.module "IGNORE" >
+
+<!-- Section B: MathML Qualified Names ::::::::::::::::::::::::::::: -->
+
+<!-- 9. This section declares parameter entities used to provide
+        namespace-qualified names for all MathML element types.
+-->
+
+<!ENTITY % abs.qname "%MATHML.pfx;abs" >
+<!ENTITY % and.qname "%MATHML.pfx;and" >
+<!ENTITY % annotation-xml.qname "%MATHML.pfx;annotation-xml" >
+<!ENTITY % annotation.qname "%MATHML.pfx;annotation" >
+<!ENTITY % apply.qname "%MATHML.pfx;apply" >
+<!ENTITY % approx.qname "%MATHML.pfx;approx" >
+<!ENTITY % arccos.qname "%MATHML.pfx;arccos" >
+<!ENTITY % arccosh.qname "%MATHML.pfx;arccosh" >
+<!ENTITY % arccot.qname "%MATHML.pfx;arccot" >
+<!ENTITY % arccoth.qname "%MATHML.pfx;arccoth" >
+<!ENTITY % arccsc.qname "%MATHML.pfx;arccsc" >
+<!ENTITY % arccsch.qname "%MATHML.pfx;arccsch" >
+<!ENTITY % arcsec.qname "%MATHML.pfx;arcsec" >
+<!ENTITY % arcsech.qname "%MATHML.pfx;arcsech" >
+<!ENTITY % arcsin.qname "%MATHML.pfx;arcsin" >
+<!ENTITY % arcsinh.qname "%MATHML.pfx;arcsinh" >
+<!ENTITY % arctan.qname "%MATHML.pfx;arctan" >
+<!ENTITY % arctanh.qname "%MATHML.pfx;arctanh" >
+<!ENTITY % arg.qname "%MATHML.pfx;arg" >
+<!ENTITY % bind.qname "%MATHML.pfx;bind" >
+<!ENTITY % bvar.qname "%MATHML.pfx;bvar" >
+<!ENTITY % card.qname "%MATHML.pfx;card" >
+<!ENTITY % cartesianproduct.qname "%MATHML.pfx;cartesianproduct" >
+<!ENTITY % cbytes.qname "%MATHML.pfx;cbytes" >
+<!ENTITY % ceiling.qname "%MATHML.pfx;ceiling" >
+<!ENTITY % cerror.qname "%MATHML.pfx;cerror" >
+<!ENTITY % ci.qname "%MATHML.pfx;ci" >
+<!ENTITY % cn.qname "%MATHML.pfx;cn" >
+<!ENTITY % codomain.qname "%MATHML.pfx;codomain" >
+<!ENTITY % complexes.qname "%MATHML.pfx;complexes" >
+<!ENTITY % compose.qname "%MATHML.pfx;compose" >
+<!ENTITY % condition.qname "%MATHML.pfx;condition" >
+<!ENTITY % conjugate.qname "%MATHML.pfx;conjugate" >
+<!ENTITY % cos.qname "%MATHML.pfx;cos" >
+<!ENTITY % cosh.qname "%MATHML.pfx;cosh" >
+<!ENTITY % cot.qname "%MATHML.pfx;cot" >
+<!ENTITY % coth.qname "%MATHML.pfx;coth" >
+<!ENTITY % cs.qname "%MATHML.pfx;cs" >
+<!ENTITY % csc.qname "%MATHML.pfx;csc" >
+<!ENTITY % csch.qname "%MATHML.pfx;csch" >
+<!ENTITY % csymbol.qname "%MATHML.pfx;csymbol" >
+<!ENTITY % curl.qname "%MATHML.pfx;curl" >
+<!ENTITY % declare.qname "%MATHML.pfx;declare" >
+<!ENTITY % degree.qname "%MATHML.pfx;degree" >
+<!ENTITY % determinant.qname "%MATHML.pfx;determinant" >
+<!ENTITY % diff.qname "%MATHML.pfx;diff" >
+<!ENTITY % divergence.qname "%MATHML.pfx;divergence" >
+<!ENTITY % divide.qname "%MATHML.pfx;divide" >
+<!ENTITY % domain.qname "%MATHML.pfx;domain" >
+<!ENTITY % domainofapplication.qname "%MATHML.pfx;domainofapplication" >
+<!ENTITY % emptyset.qname "%MATHML.pfx;emptyset" >
+<!ENTITY % eq.qname "%MATHML.pfx;eq" >
+<!ENTITY % equivalent.qname "%MATHML.pfx;equivalent" >
+<!ENTITY % eulergamma.qname "%MATHML.pfx;eulergamma" >
+<!ENTITY % exists.qname "%MATHML.pfx;exists" >
+<!ENTITY % exp.qname "%MATHML.pfx;exp" >
+<!ENTITY % exponentiale.qname "%MATHML.pfx;exponentiale" >
+<!ENTITY % factorial.qname "%MATHML.pfx;factorial" >
+<!ENTITY % factorof.qname "%MATHML.pfx;factorof" >
+<!ENTITY % false.qname "%MATHML.pfx;false" >
+<!ENTITY % floor.qname "%MATHML.pfx;floor" >
+<!ENTITY % fn.qname "%MATHML.pfx;fn" >
+<!ENTITY % forall.qname "%MATHML.pfx;forall" >
+<!ENTITY % gcd.qname "%MATHML.pfx;gcd" >
+<!ENTITY % geq.qname "%MATHML.pfx;geq" >
+<!ENTITY % grad.qname "%MATHML.pfx;grad" >
+<!ENTITY % gt.qname "%MATHML.pfx;gt" >
+<!ENTITY % ident.qname "%MATHML.pfx;ident" >
+<!ENTITY % image.qname "%MATHML.pfx;image" >
+<!ENTITY % imaginary.qname "%MATHML.pfx;imaginary" >
+<!ENTITY % imaginaryi.qname "%MATHML.pfx;imaginaryi" >
+<!ENTITY % implies.qname "%MATHML.pfx;implies" >
+<!ENTITY % in.qname "%MATHML.pfx;in" >
+<!ENTITY % infinity.qname "%MATHML.pfx;infinity" >
+<!ENTITY % int.qname "%MATHML.pfx;int" >
+<!ENTITY % integers.qname "%MATHML.pfx;integers" >
+<!ENTITY % intersect.qname "%MATHML.pfx;intersect" >
+<!ENTITY % interval.qname "%MATHML.pfx;interval" >
+<!ENTITY % inverse.qname "%MATHML.pfx;inverse" >
+<!ENTITY % lambda.qname "%MATHML.pfx;lambda" >
+<!ENTITY % laplacian.qname "%MATHML.pfx;laplacian" >
+<!ENTITY % lcm.qname "%MATHML.pfx;lcm" >
+<!ENTITY % leq.qname "%MATHML.pfx;leq" >
+<!ENTITY % limit.qname "%MATHML.pfx;limit" >
+<!ENTITY % list.qname "%MATHML.pfx;list" >
+<!ENTITY % ln.qname "%MATHML.pfx;ln" >
+<!ENTITY % log.qname "%MATHML.pfx;log" >
+<!ENTITY % logbase.qname "%MATHML.pfx;logbase" >
+<!ENTITY % lowlimit.qname "%MATHML.pfx;lowlimit" >
+<!ENTITY % lt.qname "%MATHML.pfx;lt" >
+<!ENTITY % maction.qname "%MATHML.pfx;maction" >
+<!ENTITY % maligngroup.qname "%MATHML.pfx;maligngroup" >
+<!ENTITY % malignmark.qname "%MATHML.pfx;malignmark" >
+<!ENTITY % math.qname "%MATHML.pfx;math" >
+<!ENTITY % matrix.qname "%MATHML.pfx;matrix" >
+<!ENTITY % matrixrow.qname "%MATHML.pfx;matrixrow" >
+<!ENTITY % max.qname "%MATHML.pfx;max" >
+<!ENTITY % mean.qname "%MATHML.pfx;mean" >
+<!ENTITY % median.qname "%MATHML.pfx;median" >
+<!ENTITY % menclose.qname "%MATHML.pfx;menclose" >
+<!ENTITY % merror.qname "%MATHML.pfx;merror" >
+<!ENTITY % mfenced.qname "%MATHML.pfx;mfenced" >
+<!ENTITY % mfrac.qname "%MATHML.pfx;mfrac" >
+<!ENTITY % mglyph.qname "%MATHML.pfx;mglyph" >
+<!ENTITY % mi.qname "%MATHML.pfx;mi" >
+<!ENTITY % min.qname "%MATHML.pfx;min" >
+<!ENTITY % minus.qname "%MATHML.pfx;minus" >
+<!ENTITY % mlabeledtr.qname "%MATHML.pfx;mlabeledtr" >
+<!ENTITY % mlongdiv.qname "%MATHML.pfx;mlongdiv" >
+<!ENTITY % mmultiscripts.qname "%MATHML.pfx;mmultiscripts" >
+<!ENTITY % mn.qname "%MATHML.pfx;mn" >
+<!ENTITY % mo.qname "%MATHML.pfx;mo" >
+<!ENTITY % mode.qname "%MATHML.pfx;mode" >
+<!ENTITY % moment.qname "%MATHML.pfx;moment" >
+<!ENTITY % momentabout.qname "%MATHML.pfx;momentabout" >
+<!ENTITY % mover.qname "%MATHML.pfx;mover" >
+<!ENTITY % mpadded.qname "%MATHML.pfx;mpadded" >
+<!ENTITY % mphantom.qname "%MATHML.pfx;mphantom" >
+<!ENTITY % mprescripts.qname "%MATHML.pfx;mprescripts" >
+<!ENTITY % mroot.qname "%MATHML.pfx;mroot" >
+<!ENTITY % mrow.qname "%MATHML.pfx;mrow" >
+<!ENTITY % ms.qname "%MATHML.pfx;ms" >
+<!ENTITY % mscarries.qname "%MATHML.pfx;mscarries" >
+<!ENTITY % mscarry.qname "%MATHML.pfx;mscarry" >
+<!ENTITY % msgroup.qname "%MATHML.pfx;msgroup" >
+<!ENTITY % msline.qname "%MATHML.pfx;msline" >
+<!ENTITY % mspace.qname "%MATHML.pfx;mspace" >
+<!ENTITY % msqrt.qname "%MATHML.pfx;msqrt" >
+<!ENTITY % msrow.qname "%MATHML.pfx;msrow" >
+<!ENTITY % mstack.qname "%MATHML.pfx;mstack" >
+<!ENTITY % mstyle.qname "%MATHML.pfx;mstyle" >
+<!ENTITY % msub.qname "%MATHML.pfx;msub" >
+<!ENTITY % msubsup.qname "%MATHML.pfx;msubsup" >
+<!ENTITY % msup.qname "%MATHML.pfx;msup" >
+<!ENTITY % mtable.qname "%MATHML.pfx;mtable" >
+<!ENTITY % mtd.qname "%MATHML.pfx;mtd" >
+<!ENTITY % mtext.qname "%MATHML.pfx;mtext" >
+<!ENTITY % mtr.qname "%MATHML.pfx;mtr" >
+<!ENTITY % munder.qname "%MATHML.pfx;munder" >
+<!ENTITY % munderover.qname "%MATHML.pfx;munderover" >
+<!ENTITY % naturalnumbers.qname "%MATHML.pfx;naturalnumbers" >
+<!ENTITY % neq.qname "%MATHML.pfx;neq" >
+<!ENTITY % none.qname "%MATHML.pfx;none" >
+<!ENTITY % not.qname "%MATHML.pfx;not" >
+<!ENTITY % notanumber.qname "%MATHML.pfx;notanumber" >
+<!ENTITY % notin.qname "%MATHML.pfx;notin" >
+<!ENTITY % notprsubset.qname "%MATHML.pfx;notprsubset" >
+<!ENTITY % notsubset.qname "%MATHML.pfx;notsubset" >
+<!ENTITY % or.qname "%MATHML.pfx;or" >
+<!ENTITY % otherwise.qname "%MATHML.pfx;otherwise" >
+<!ENTITY % outerproduct.qname "%MATHML.pfx;outerproduct" >
+<!ENTITY % partialdiff.qname "%MATHML.pfx;partialdiff" >
+<!ENTITY % pi.qname "%MATHML.pfx;pi" >
+<!ENTITY % piece.qname "%MATHML.pfx;piece" >
+<!ENTITY % piecewise.qname "%MATHML.pfx;piecewise" >
+<!ENTITY % plus.qname "%MATHML.pfx;plus" >
+<!ENTITY % power.qname "%MATHML.pfx;power" >
+<!ENTITY % primes.qname "%MATHML.pfx;primes" >
+<!ENTITY % product.qname "%MATHML.pfx;product" >
+<!ENTITY % prsubset.qname "%MATHML.pfx;prsubset" >
+<!ENTITY % quotient.qname "%MATHML.pfx;quotient" >
+<!ENTITY % rationals.qname "%MATHML.pfx;rationals" >
+<!ENTITY % real.qname "%MATHML.pfx;real" >
+<!ENTITY % reals.qname "%MATHML.pfx;reals" >
+<!ENTITY % reln.qname "%MATHML.pfx;reln" >
+<!ENTITY % rem.qname "%MATHML.pfx;rem" >
+<!ENTITY % root.qname "%MATHML.pfx;root" >
+<!ENTITY % scalarproduct.qname "%MATHML.pfx;scalarproduct" >
+<!ENTITY % sdev.qname "%MATHML.pfx;sdev" >
+<!ENTITY % sec.qname "%MATHML.pfx;sec" >
+<!ENTITY % sech.qname "%MATHML.pfx;sech" >
+<!ENTITY % selector.qname "%MATHML.pfx;selector" >
+<!ENTITY % semantics.qname "%MATHML.pfx;semantics" >
+<!ENTITY % sep.qname "%MATHML.pfx;sep" >
+<!ENTITY % set.qname "%MATHML.pfx;set" >
+<!ENTITY % setdiff.qname "%MATHML.pfx;setdiff" >
+<!ENTITY % share.qname "%MATHML.pfx;share" >
+<!ENTITY % sin.qname "%MATHML.pfx;sin" >
+<!ENTITY % sinh.qname "%MATHML.pfx;sinh" >
+<!ENTITY % subset.qname "%MATHML.pfx;subset" >
+<!ENTITY % sum.qname "%MATHML.pfx;sum" >
+<!ENTITY % tan.qname "%MATHML.pfx;tan" >
+<!ENTITY % tanh.qname "%MATHML.pfx;tanh" >
+<!ENTITY % tendsto.qname "%MATHML.pfx;tendsto" >
+<!ENTITY % times.qname "%MATHML.pfx;times" >
+<!ENTITY % transpose.qname "%MATHML.pfx;transpose" >
+<!ENTITY % true.qname "%MATHML.pfx;true" >
+<!ENTITY % union.qname "%MATHML.pfx;union" >
+<!ENTITY % uplimit.qname "%MATHML.pfx;uplimit" >
+<!ENTITY % variance.qname "%MATHML.pfx;variance" >
+<!ENTITY % vector.qname "%MATHML.pfx;vector" >
+<!ENTITY % vectorproduct.qname "%MATHML.pfx;vectorproduct" >
+<!ENTITY % xor.qname "%MATHML.pfx;xor" >
diff --git a/code/lib/Bio/Entrez/DTDs/mathml3.dtd b/code/lib/Bio/Entrez/DTDs/mathml3.dtd
new file mode 100644
index 0000000..3a8886e
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/mathml3.dtd
@@ -0,0 +1,1682 @@
+<!-- ================================================================= -->
+<!-- IEEE NISO JATS MODS                    Name Collisions 03/02/2012
+
+The following changes have been made (with the permission of the
+W3C MathML Committee), to this DTD:
+
+1) %product.class; is defined below and also defined in 
+JATS-default-classes.ent. Renamed class in the MathML DTD
+until JATS names can be changed. Renamed the parameter entity
+to "mml-product.class".
+
+2) The parameter entity named "name" was commented out. It is never
+used in the MathML 3.0 DTD, and its presence causes processing
+errors in the Mulberry Tag LIbrary software.
+
+3) id CDATA #IMPLIED ==> id ID #implied
+
+-->
+<!-- ================================================================= -->
+
+<!-- MathML 3.0 DTD  ....................................................... -->
+<!-- file: mathml3.dtd
+-->
+
+<!-- MathML 3.0 DTD
+
+     This is the Mathematical Markup Language (MathML) 3.0, an XML
+     application for describing mathematical notation and capturing
+     both its structure and content.
+
+     Copyright &#xa9; 1998-2010 W3C&#xae; (MIT, ERCIM, Keio), All Rights 
+     Reserved. W3C liability, trademark, document use and software
+     licensing rules apply. 
+
+     Permission to use, copy, modify and distribute the MathML 2.0 DTD and
+     its accompanying documentation for any purpose and without fee is
+     hereby granted in perpetuity, provided that the above copyright notice
+     and this paragraph appear in all copies.  The copyright holders make
+     no representation about the suitability of the DTD for any purpose.
+
+     It is provided "as is" without expressed or implied warranty.
+
+     This entity may be identified by the PUBLIC and SYSTEM identifiers:
+
+       PUBLIC "-//W3C//DTD MathML 3.0//EN"
+       SYSTEM "mathml3.dtd"
+
+     Revisions: editor and revision history at EOF
+-->
+<!-- Entity used to enable marked sections which enforces stricter		
+     checking of MathML syntax rules		
+-->
+<!ENTITY % MathMLstrict "IGNORE">		
+
+<!-- MathML Qualified Names module ............................... -->
+<!ENTITY % mathml-qname.module "INCLUDE" >
+<![%mathml-qname.module;[
+<!ENTITY % mathml-qname.mod
+     PUBLIC "-//W3C//ENTITIES MathML 3.0 Qualified Names 1.0//EN"
+            "mathml3-qname.mod" >
+%mathml-qname.mod;]]>
+
+<!-- if %NS.prefixed; is INCLUDE, include all NS attributes, 
+     otherwise just those associated with MathML
+-->
+<![%NS.prefixed;[
+  <!ENTITY % MATHML.NamespaceDecl.attrib 
+         "%NamespaceDecl.attrib;"
+>
+]]>
+<!ENTITY % MATHML.NamespaceDecl.attrib 
+     "%MATHML.xmlns.attrib;"
+>
+
+
+<!-- MathML Character Entities .............................................. -->
+<!ENTITY % mathml-charent.module "INCLUDE" >
+<![%mathml-charent.module;[
+
+
+<!ENTITY % isobox PUBLIC "-//W3C//ENTITIES Box and Line Drawing//EN" "isobox.ent">
+%isobox;
+<!ENTITY % isocyr1 PUBLIC "-//W3C//ENTITIES Russian Cyrillic//EN" "isocyr1.ent">
+%isocyr1;
+<!ENTITY % isocyr2 PUBLIC "-//W3C//ENTITIES Non-Russian Cyrillic//EN" "isocyr2.ent">
+%isocyr2;
+<!ENTITY % isodia PUBLIC "-//W3C//ENTITIES Diacritical Marks//EN" "isodia.ent">
+%isodia;
+<!ENTITY % isolat1 PUBLIC "-//W3C//ENTITIES Added Latin 1//EN" "isolat1.ent">
+%isolat1;
+<!ENTITY % isolat2 PUBLIC "-//W3C//ENTITIES Added Latin 2//EN" "isolat2.ent">
+%isolat2;
+<!ENTITY % isonum PUBLIC "-//W3C//ENTITIES Numeric and Special Graphic//EN" "isonum.ent">
+%isonum;
+<!ENTITY % isopub PUBLIC "-//W3C//ENTITIES Publishing//EN" "isopub.ent">
+%isopub;
+<!ENTITY % isoamsa PUBLIC "-//W3C//ENTITIES Added Math Symbols: Arrow Relations//EN" "isoamsa.ent">
+%isoamsa;
+<!ENTITY % isoamsb PUBLIC "-//W3C//ENTITIES Added Math Symbols: Binary Operators//EN" "isoamsb.ent">
+%isoamsb;
+<!ENTITY % isoamsc PUBLIC "-//W3C//ENTITIES Added Math Symbols: Delimiters//EN" "isoamsc.ent">
+%isoamsc;
+<!ENTITY % isoamsn PUBLIC "-//W3C//ENTITIES Added Math Symbols: Negated Relations//EN" "isoamsn.ent">
+%isoamsn;
+<!ENTITY % isoamso PUBLIC "-//W3C//ENTITIES Added Math Symbols: Ordinary//EN" "isoamso.ent">
+%isoamso;
+<!ENTITY % isoamsr PUBLIC "-//W3C//ENTITIES Added Math Symbols: Relations//EN" "isoamsr.ent">
+%isoamsr;
+<!ENTITY % isogrk3 PUBLIC "-//W3C//ENTITIES Greek Symbols//EN" "isogrk3.ent">
+%isogrk3;
+<!ENTITY % isomfrk PUBLIC "-//W3C//ENTITIES Math Alphabets: Fraktur//EN" "isomfrk.ent">
+%isomfrk;
+<!ENTITY % isomopf PUBLIC "-//W3C//ENTITIES Math Alphabets: Open Face//EN" "isomopf.ent">
+%isomopf;
+<!ENTITY % isomscr PUBLIC "-//W3C//ENTITIES Math Alphabets: Script//EN" "isomscr.ent">
+%isomscr;
+<!ENTITY % isotech PUBLIC "-//W3C//ENTITIES General Technical//EN" "isotech.ent">
+%isotech;
+<!ENTITY % mmlextra PUBLIC "-//W3C//ENTITIES Additional MathML Symbols//EN" "mmlextra.ent">
+%mmlextra;
+<!ENTITY % mmlalias PUBLIC "-//W3C//ENTITIES MathML Aliases//EN" "mmlalias.ent">
+%mmlalias;
+
+<!-- end of MathML Character Entity section -->]]>
+
+
+
+<!ENTITY % MalignExpression "%maligngroup.qname;|%malignmark.qname;">
+
+<!ENTITY % TokenExpression "%mi.qname;|%mn.qname;|%mo.qname;|%mtext.qname;
+                            |%mspace.qname;|%ms.qname;">
+
+<!ENTITY % PresentationExpression "%TokenExpression;|%MalignExpression;
+                                   |%mrow.qname;|%mfrac.qname;|%msqrt.qname;
+                                   |%mroot.qname;|%mstyle.qname;
+                                   |%merror.qname;|%mpadded.qname;
+                                   |%mphantom.qname;|%mfenced.qname;
+                                   |%menclose.qname;|%msub.qname;|%msup.qname;
+                                   |%msubsup.qname;|%munder.qname;
+                                   |%mover.qname;|%munderover.qname;
+                                   |%mmultiscripts.qname;|%mtable.qname;
+                                   |%mstack.qname;|%mlongdiv.qname;
+                                   |%maction.qname;">
+
+<!-- end of mathml3-strict-content.rng -->
+
+<!ENTITY % cn.content "(#PCDATA|%mglyph.qname;|%sep.qname;
+                        |%PresentationExpression;)*">
+
+<!-- start of mathml3-content.rng -->
+
+<!-- start of mathml3-strict-content.rng -->
+
+<!ELEMENT %cn.qname; %cn.content;>
+
+<!ENTITY % ci.content "(#PCDATA|%mglyph.qname;
+                        |%PresentationExpression;)*">
+
+<!ELEMENT %ci.qname; %ci.content;>
+
+<!ENTITY % csymbol.content "(#PCDATA|%mglyph.qname;
+                             |%PresentationExpression;)*">
+
+<!ELEMENT %csymbol.qname; %csymbol.content;>
+
+<!ENTITY % SymbolName "#PCDATA">
+
+<!ENTITY % BvarQ "(%bvar.qname;)*">
+
+<!ENTITY % DomainQ "(%domainofapplication.qname;|%condition.qname;
+                     |(%lowlimit.qname;,%uplimit.qname;?))*">
+
+<!ENTITY % constant-arith.class "%exponentiale.qname;|%imaginaryi.qname;
+                                 |%notanumber.qname;|%true.qname;
+                                 |%false.qname;|%pi.qname;|%eulergamma.qname;
+                                 |%infinity.qname;">
+
+<!ENTITY % constant-set.class "%integers.qname;|%reals.qname;
+                               |%rationals.qname;|%naturalnumbers.qname;
+                               |%complexes.qname;|%primes.qname;
+                               |%emptyset.qname;">
+
+<!ENTITY % binary-linalg.class "%vectorproduct.qname;|%scalarproduct.qname;
+                                |%outerproduct.qname;">
+
+<!ENTITY % nary-linalg.class "%selector.qname;">
+
+<!ENTITY % unary-linalg.class "%determinant.qname;|%transpose.qname;">
+
+<!ENTITY % nary-constructor.class "%vector.qname;|%matrix.qname;
+                                   |%matrixrow.qname;">
+
+<!ENTITY % nary-stats.class "%mean.qname;|%sdev.qname;|%variance.qname;
+                             |%median.qname;|%mode.qname;">
+
+<!ENTITY % unary-elementary.class "%sin.qname;|%cos.qname;|%tan.qname;
+                                   |%sec.qname;|%csc.qname;|%cot.qname;
+                                   |%sinh.qname;|%cosh.qname;|%tanh.qname;
+                                   |%sech.qname;|%csch.qname;|%coth.qname;
+                                   |%arcsin.qname;|%arccos.qname;
+                                   |%arctan.qname;|%arccosh.qname;
+                                   |%arccot.qname;|%arccoth.qname;
+                                   |%arccsc.qname;|%arccsch.qname;
+                                   |%arcsec.qname;|%arcsech.qname;
+                                   |%arcsinh.qname;|%arctanh.qname;">
+
+<!ENTITY % limit.class "%limit.qname;">
+
+<!ENTITY % mml-product.class "%product.qname;">
+
+<!ENTITY % sum.class "%sum.qname;">
+
+<!ENTITY % unary-set.class "%card.qname;">
+
+<!ENTITY % nary-set-reln.class "%subset.qname;|%prsubset.qname;">
+
+<!ENTITY % binary-set.class "%in.qname;|%notin.qname;|%notsubset.qname;
+                             |%notprsubset.qname;|%setdiff.qname;">
+
+<!ENTITY % nary-set.class "%union.qname;|%intersect.qname;
+                           |%cartesianproduct.qname;">
+
+<!ENTITY % nary-setlist-constructor.class "%set.qname;|%list.qname;">
+
+<!ENTITY % unary-veccalc.class "%divergence.qname;|%grad.qname;|%curl.qname;
+                                |%laplacian.qname;">
+
+<!ENTITY % partialdiff.class "%partialdiff.qname;">
+
+<!ENTITY % Differential-Operator.class "%diff.qname;">
+
+<!ENTITY % int.class "%int.qname;">
+
+<!ENTITY % binary-reln.class "%neq.qname;|%approx.qname;|%factorof.qname;
+                              |%tendsto.qname;">
+
+<!ENTITY % nary-reln.class "%eq.qname;|%gt.qname;|%lt.qname;|%geq.qname;
+                            |%leq.qname;">
+
+<!ENTITY % quantifier.class "%forall.qname;|%exists.qname;">
+
+<!ENTITY % binary-logical.class "%implies.qname;|%equivalent.qname;">
+
+<!ENTITY % unary-logical.class "%not.qname;">
+
+<!ENTITY % nary-logical.class "%and.qname;|%or.qname;|%xor.qname;">
+
+<!ENTITY % nary-arith.class "%plus.qname;|%times.qname;|%gcd.qname;
+                             |%lcm.qname;">
+
+<!ENTITY % nary-minmax.class "%max.qname;|%min.qname;">
+
+<!ENTITY % unary-arith.class "%factorial.qname;|%abs.qname;|%conjugate.qname;
+                              |%arg.qname;|%real.qname;|%imaginary.qname;
+                              |%floor.qname;|%ceiling.qname;|%exp.qname;">
+
+<!ENTITY % binary-arith.class "%quotient.qname;|%divide.qname;|%minus.qname;
+                               |%power.qname;|%rem.qname;|%root.qname;">
+
+<!ENTITY % nary-functional.class "%compose.qname;">
+
+<!ENTITY % lambda.class "%lambda.qname;">
+
+<!ENTITY % unary-functional.class "%inverse.qname;|%ident.qname;
+                                   |%domain.qname;|%codomain.qname;
+                                   |%image.qname;|%ln.qname;|%log.qname;
+                                   |%moment.qname;">
+
+<!ENTITY % interval.class "%interval.qname;">
+
+<!ENTITY % DeprecatedContExp "%reln.qname;|%fn.qname;|%declare.qname;">
+
+<!ENTITY % CommonDeprecatedAtt "
+  other CDATA #IMPLIED">
+
+<!ENTITY % Qualifier "(%DomainQ;)|%degree.qname;|%momentabout.qname;
+                      |%logbase.qname;">
+
+<!ENTITY % ContExp "%piecewise.qname;|%DeprecatedContExp;|%interval.class;
+                    |%unary-functional.class;|%lambda.class;
+                    |%nary-functional.class;|%binary-arith.class;
+                    |%unary-arith.class;|%nary-minmax.class;
+                    |%nary-arith.class;|%nary-logical.class;
+                    |%unary-logical.class;|%binary-logical.class;
+                    |%quantifier.class;|%nary-reln.class;
+                    |%binary-reln.class;|%int.class;
+                    |%Differential-Operator.class;|%partialdiff.class;
+                    |%unary-veccalc.class;
+                    |%nary-setlist-constructor.class;|%nary-set.class;
+                    |%binary-set.class;|%nary-set-reln.class;
+                    |%unary-set.class;|%sum.class;|%mml-product.class;
+                    |%limit.class;|%unary-elementary.class;
+                    |%nary-stats.class;|%nary-constructor.class;
+                    |%unary-linalg.class;|%nary-linalg.class;
+                    |%binary-linalg.class;|%constant-set.class;
+                    |%constant-arith.class;|%semantics.qname;|%cn.qname;
+                    |%ci.qname;|%csymbol.qname;|%apply.qname;|%bind.qname;
+                    |%share.qname;|%cerror.qname;|%cbytes.qname;|%cs.qname;">
+
+<!ENTITY % CommonAtt "
+%MATHML.NamespaceDecl.attrib;
+  %XLINK.prefix;:href   CDATA #IMPLIED	
+  %XLINK.prefix;:type   CDATA #IMPLIED
+  xml:lang   CDATA #IMPLIED
+  xml:space   (default|preserve) #IMPLIED
+  id ID #IMPLIED
+  xref CDATA #IMPLIED
+  class CDATA #IMPLIED
+  style CDATA #IMPLIED
+  href CDATA #IMPLIED
+  %CommonDeprecatedAtt;">
+
+<!ENTITY % apply.content "(%ContExp;),(%BvarQ;),(%Qualifier;)*,
+                          (%ContExp;)*">
+
+<!ELEMENT %apply.qname; (%apply.content;)>
+<!ATTLIST %apply.qname;
+  %CommonAtt;>
+
+<!ENTITY % bind.content "%apply.content;">
+
+<!ELEMENT %bind.qname; (%bind.content;)>
+<!ATTLIST %bind.qname;
+  %CommonAtt;>
+
+<!ENTITY % src "
+  src CDATA #IMPLIED">
+
+<!ELEMENT %share.qname; EMPTY>
+<!ATTLIST %share.qname;
+  %CommonAtt;
+  %src;>
+
+<!ELEMENT %cerror.qname; (%csymbol.qname;,(%ContExp;)*)>
+
+<!ATTLIST %cerror.qname;
+  %CommonAtt;>
+
+<!ELEMENT %cbytes.qname; (#PCDATA)>
+
+<!ENTITY % base64 "CDATA">
+
+<!ELEMENT %cs.qname; (#PCDATA)>
+
+<!ENTITY % DefEncAtt "
+  encoding CDATA #IMPLIED
+  definitionURL CDATA #IMPLIED">
+
+<!ATTLIST %cn.qname;
+  %CommonAtt;
+  %DefEncAtt;
+  type CDATA #IMPLIED
+  base CDATA #IMPLIED>
+
+<!ATTLIST %ci.qname;
+  %CommonAtt;
+  %DefEncAtt;
+  type CDATA #IMPLIED>
+
+<!ENTITY % ci.type "
+  type CDATA #REQUIRED">
+
+<!ATTLIST %csymbol.qname;
+  %CommonAtt;
+  %DefEncAtt;
+  type CDATA #IMPLIED
+  cd CDATA #IMPLIED>
+
+<!ELEMENT %bvar.qname; ((%degree.qname;,(%ci.qname;|%semantics.qname;))
+                      |((%ci.qname;|%semantics.qname;),(%degree.qname;)?))>
+
+<!ATTLIST %cbytes.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ATTLIST %cs.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ENTITY % base "
+  base CDATA #REQUIRED">
+
+<!ELEMENT %sep.qname; EMPTY>
+
+<!ELEMENT %domainofapplication.qname; (%ContExp;)>
+
+<!ELEMENT %condition.qname; (%ContExp;)>
+
+<!ELEMENT %uplimit.qname; (%ContExp;)>
+
+<!ELEMENT %lowlimit.qname; (%ContExp;)>
+
+<!ELEMENT %degree.qname; (%ContExp;)>
+
+<!ELEMENT %momentabout.qname; (%ContExp;)>
+
+<!ELEMENT %logbase.qname; (%ContExp;)>
+
+<!ENTITY % type "
+  type CDATA #REQUIRED">
+
+<!ENTITY % order "
+  order (numeric|lexicographic) #REQUIRED">
+
+<!ENTITY % closure "
+  closure CDATA #REQUIRED">
+
+<!ELEMENT %piecewise.qname; (%piece.qname;|%otherwise.qname;)*>
+<!ATTLIST %piecewise.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %piece.qname; ((%ContExp;),(%ContExp;))>
+<!ATTLIST %piece.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %otherwise.qname; (%ContExp;)>
+<!ATTLIST %otherwise.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %reln.qname; (%ContExp;)*>
+
+<!ELEMENT %fn.qname; (%ContExp;)>
+
+<!ELEMENT %declare.qname; (%ContExp;)+>
+<!ATTLIST %declare.qname;
+  type CDATA #IMPLIED
+  scope CDATA #IMPLIED
+  nargs CDATA #IMPLIED
+  occurrence (prefix|infix|function-model) #IMPLIED
+  %DefEncAtt;>
+
+<!ELEMENT %interval.qname; ((%ContExp;),(%ContExp;))>
+<!ATTLIST %interval.qname;
+  %CommonAtt;
+  %DefEncAtt;
+  closure CDATA #IMPLIED>
+
+<!ELEMENT %inverse.qname; EMPTY>
+<!ATTLIST %inverse.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %ident.qname; EMPTY>
+<!ATTLIST %ident.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %domain.qname; EMPTY>
+<!ATTLIST %domain.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %codomain.qname; EMPTY>
+<!ATTLIST %codomain.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %image.qname; EMPTY>
+<!ATTLIST %image.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %ln.qname; EMPTY>
+<!ATTLIST %ln.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %log.qname; EMPTY>
+<!ATTLIST %log.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %moment.qname; EMPTY>
+<!ATTLIST %moment.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %lambda.qname; ((%BvarQ;),(%DomainQ;),(%ContExp;))>
+<!ATTLIST %lambda.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %compose.qname; EMPTY>
+<!ATTLIST %compose.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %quotient.qname; EMPTY>
+<!ATTLIST %quotient.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %divide.qname; EMPTY>
+<!ATTLIST %divide.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %minus.qname; EMPTY>
+<!ATTLIST %minus.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %power.qname; EMPTY>
+<!ATTLIST %power.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %rem.qname; EMPTY>
+<!ATTLIST %rem.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %root.qname; EMPTY>
+<!ATTLIST %root.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %factorial.qname; EMPTY>
+<!ATTLIST %factorial.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %abs.qname; EMPTY>
+<!ATTLIST %abs.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %conjugate.qname; EMPTY>
+<!ATTLIST %conjugate.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arg.qname; EMPTY>
+<!ATTLIST %arg.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %real.qname; EMPTY>
+<!ATTLIST %real.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %imaginary.qname; EMPTY>
+<!ATTLIST %imaginary.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %floor.qname; EMPTY>
+<!ATTLIST %floor.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %ceiling.qname; EMPTY>
+<!ATTLIST %ceiling.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %exp.qname; EMPTY>
+<!ATTLIST %exp.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %max.qname; EMPTY>
+<!ATTLIST %max.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %min.qname; EMPTY>
+<!ATTLIST %min.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %plus.qname; EMPTY>
+<!ATTLIST %plus.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %times.qname; EMPTY>
+<!ATTLIST %times.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %gcd.qname; EMPTY>
+<!ATTLIST %gcd.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %lcm.qname; EMPTY>
+<!ATTLIST %lcm.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %and.qname; EMPTY>
+<!ATTLIST %and.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %or.qname; EMPTY>
+<!ATTLIST %or.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %xor.qname; EMPTY>
+<!ATTLIST %xor.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %not.qname; EMPTY>
+<!ATTLIST %not.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %implies.qname; EMPTY>
+<!ATTLIST %implies.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %equivalent.qname; EMPTY>
+<!ATTLIST %equivalent.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %forall.qname; EMPTY>
+<!ATTLIST %forall.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %exists.qname; EMPTY>
+<!ATTLIST %exists.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %eq.qname; EMPTY>
+<!ATTLIST %eq.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %gt.qname; EMPTY>
+<!ATTLIST %gt.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %lt.qname; EMPTY>
+<!ATTLIST %lt.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %geq.qname; EMPTY>
+<!ATTLIST %geq.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %leq.qname; EMPTY>
+<!ATTLIST %leq.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %neq.qname; EMPTY>
+<!ATTLIST %neq.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %approx.qname; EMPTY>
+<!ATTLIST %approx.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %factorof.qname; EMPTY>
+<!ATTLIST %factorof.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %tendsto.qname; EMPTY>
+<!ATTLIST %tendsto.qname;
+  %CommonAtt;
+  %DefEncAtt;
+  type CDATA #IMPLIED>
+
+<!ELEMENT %int.qname; EMPTY>
+<!ATTLIST %int.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %diff.qname; EMPTY>
+<!ATTLIST %diff.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %partialdiff.qname; EMPTY>
+<!ATTLIST %partialdiff.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %divergence.qname; EMPTY>
+<!ATTLIST %divergence.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %grad.qname; EMPTY>
+<!ATTLIST %grad.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %curl.qname; EMPTY>
+<!ATTLIST %curl.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %laplacian.qname; EMPTY>
+<!ATTLIST %laplacian.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %set.qname; ((%BvarQ;)*,(%DomainQ;)*,(%ContExp;)*)>
+<!ATTLIST %set.qname;
+  %CommonAtt;
+  %DefEncAtt;
+  type CDATA #IMPLIED>
+
+<!ELEMENT %list.qname; ((%BvarQ;)*,(%DomainQ;)*,(%ContExp;)*)>
+<!ATTLIST %list.qname;
+  %CommonAtt;
+  %DefEncAtt;
+  order (numeric|lexicographic) #IMPLIED>
+
+<!ELEMENT %union.qname; EMPTY>
+<!ATTLIST %union.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %intersect.qname; EMPTY>
+<!ATTLIST %intersect.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %cartesianproduct.qname; EMPTY>
+<!ATTLIST %cartesianproduct.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %in.qname; EMPTY>
+<!ATTLIST %in.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %notin.qname; EMPTY>
+<!ATTLIST %notin.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %notsubset.qname; EMPTY>
+<!ATTLIST %notsubset.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %notprsubset.qname; EMPTY>
+<!ATTLIST %notprsubset.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %setdiff.qname; EMPTY>
+<!ATTLIST %setdiff.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %subset.qname; EMPTY>
+<!ATTLIST %subset.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %prsubset.qname; EMPTY>
+<!ATTLIST %prsubset.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %card.qname; EMPTY>
+<!ATTLIST %card.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %sum.qname; EMPTY>
+<!ATTLIST %sum.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %product.qname; EMPTY>
+<!ATTLIST %product.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %limit.qname; EMPTY>
+<!ATTLIST %limit.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %sin.qname; EMPTY>
+<!ATTLIST %sin.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %cos.qname; EMPTY>
+<!ATTLIST %cos.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %tan.qname; EMPTY>
+<!ATTLIST %tan.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %sec.qname; EMPTY>
+<!ATTLIST %sec.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %csc.qname; EMPTY>
+<!ATTLIST %csc.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %cot.qname; EMPTY>
+<!ATTLIST %cot.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %sinh.qname; EMPTY>
+<!ATTLIST %sinh.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %cosh.qname; EMPTY>
+<!ATTLIST %cosh.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %tanh.qname; EMPTY>
+<!ATTLIST %tanh.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %sech.qname; EMPTY>
+<!ATTLIST %sech.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %csch.qname; EMPTY>
+<!ATTLIST %csch.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %coth.qname; EMPTY>
+<!ATTLIST %coth.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arcsin.qname; EMPTY>
+<!ATTLIST %arcsin.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arccos.qname; EMPTY>
+<!ATTLIST %arccos.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arctan.qname; EMPTY>
+<!ATTLIST %arctan.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arccosh.qname; EMPTY>
+<!ATTLIST %arccosh.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arccot.qname; EMPTY>
+<!ATTLIST %arccot.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arccoth.qname; EMPTY>
+<!ATTLIST %arccoth.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arccsc.qname; EMPTY>
+<!ATTLIST %arccsc.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arccsch.qname; EMPTY>
+<!ATTLIST %arccsch.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arcsec.qname; EMPTY>
+<!ATTLIST %arcsec.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arcsech.qname; EMPTY>
+<!ATTLIST %arcsech.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arcsinh.qname; EMPTY>
+<!ATTLIST %arcsinh.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %arctanh.qname; EMPTY>
+<!ATTLIST %arctanh.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %mean.qname; EMPTY>
+<!ATTLIST %mean.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %sdev.qname; EMPTY>
+<!ATTLIST %sdev.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %variance.qname; EMPTY>
+<!ATTLIST %variance.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %median.qname; EMPTY>
+<!ATTLIST %median.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %mode.qname; EMPTY>
+<!ATTLIST %mode.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %vector.qname; ((%BvarQ;),(%DomainQ;),(%ContExp;)*)>
+<!ATTLIST %vector.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %matrix.qname; ((%BvarQ;),(%DomainQ;),(%ContExp;)*)>
+<!ATTLIST %matrix.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %matrixrow.qname; ((%BvarQ;),(%DomainQ;),(%ContExp;)*)>
+<!ATTLIST %matrixrow.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %determinant.qname; EMPTY>
+<!ATTLIST %determinant.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %transpose.qname; EMPTY>
+<!ATTLIST %transpose.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %selector.qname; EMPTY>
+<!ATTLIST %selector.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %vectorproduct.qname; EMPTY>
+<!ATTLIST %vectorproduct.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %scalarproduct.qname; EMPTY>
+<!ATTLIST %scalarproduct.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %outerproduct.qname; EMPTY>
+<!ATTLIST %outerproduct.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %integers.qname; EMPTY>
+<!ATTLIST %integers.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %reals.qname; EMPTY>
+<!ATTLIST %reals.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %rationals.qname; EMPTY>
+<!ATTLIST %rationals.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %naturalnumbers.qname; EMPTY>
+<!ATTLIST %naturalnumbers.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %complexes.qname; EMPTY>
+<!ATTLIST %complexes.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %primes.qname; EMPTY>
+<!ATTLIST %primes.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %emptyset.qname; EMPTY>
+<!ATTLIST %emptyset.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %exponentiale.qname; EMPTY>
+<!ATTLIST %exponentiale.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %imaginaryi.qname; EMPTY>
+<!ATTLIST %imaginaryi.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %notanumber.qname; EMPTY>
+<!ATTLIST %notanumber.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %true.qname; EMPTY>
+<!ATTLIST %true.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %false.qname; EMPTY>
+<!ATTLIST %false.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %pi.qname; EMPTY>
+<!ATTLIST %pi.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %eulergamma.qname; EMPTY>
+<!ATTLIST %eulergamma.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!ELEMENT %infinity.qname; EMPTY>
+<!ATTLIST %infinity.qname;
+  %CommonAtt;
+  %DefEncAtt;>
+
+<!-- end of mathml3-common.rng -->
+
+<!ENTITY % MathExpression "%ContExp;|%PresentationExpression;">
+
+<!-- end of mathml3-content.rng -->
+
+<!-- start of mathml3-presentation.rng -->
+
+<!ENTITY % ImpliedMrow "(%MathExpression;)*">
+
+<!ENTITY % TableRowExpression "%mtr.qname;|%mlabeledtr.qname;">
+
+<!ENTITY % TableCellExpression "%mtd.qname;">
+
+<!ENTITY % MstackExpression "%MathExpression;|%mscarries.qname;
+                             |%msline.qname;|%msrow.qname;|%msgroup.qname;">
+
+<!ENTITY % MsrowExpression "%MathExpression;|%none.qname;">
+
+<!ENTITY % MultiScriptExpression "(%MathExpression;|%none.qname;),
+                                  (%MathExpression;|%none.qname;)">
+
+<!ENTITY % mpadded-length "CDATA">
+
+<!ENTITY % linestyle "none|solid|dashed">
+
+<!ENTITY % verticalalign "top|bottom|center|baseline|axis">
+
+<!ENTITY % columnalignstyle "left|center|right">
+
+<!ENTITY % notationstyle "longdiv|actuarial|radical|box|roundedbox
+                          |circle|left|right|top|bottom|updiagonalstrike
+                          |downdiagonalstrike|verticalstrike
+                          |horizontalstrike|madruwb">
+
+<!ENTITY % idref "#PCDATA">
+
+<!ENTITY % unsigned-integer "CDATA">
+
+<!ENTITY % integer "CDATA">
+
+<!ENTITY % number "CDATA">
+
+<!ENTITY % character "CDATA">
+
+<!ENTITY % color "CDATA">
+
+<!ENTITY % group-alignment "left|center|right|decimalpoint">
+
+<!ENTITY % group-alignment-list "#PCDATA">
+
+<!ENTITY % group-alignment-list-list "#PCDATA">
+
+<!ENTITY % positive-integer "CDATA">
+
+<!ENTITY % token.content "#PCDATA|%mglyph.qname;|%malignmark.qname;">
+
+<!ELEMENT %mi.qname; (%token.content;)*>
+
+<!ENTITY % length "CDATA">
+
+<!ENTITY % DeprecatedTokenAtt "
+  fontfamily CDATA #IMPLIED
+  fontweight (normal|bold) #IMPLIED
+  fontstyle (normal|italic) #IMPLIED
+  fontsize %length; #IMPLIED
+  color %color; #IMPLIED
+  background CDATA #IMPLIED">
+
+<!ENTITY % TokenAtt "
+  mathvariant (normal|bold|italic|bold-italic|double-struck|bold-fraktur
+               |script|bold-script|fraktur|sans-serif|bold-sans-serif
+               |sans-serif-italic|sans-serif-bold-italic|monospace
+               |initial|tailed|looped|stretched) #IMPLIED
+  mathsize CDATA #IMPLIED
+  dir (ltr|rtl) #IMPLIED
+  %DeprecatedTokenAtt;">
+
+<!ENTITY % CommonPresAtt "
+  mathcolor %color; #IMPLIED
+  mathbackground CDATA #IMPLIED">
+
+<!ATTLIST %mi.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  %TokenAtt;>
+
+<!ELEMENT %mn.qname; (%token.content;)*>
+
+<!ATTLIST %mn.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  %TokenAtt;>
+
+<!ELEMENT %mo.qname; (%token.content;)*>
+
+<!ATTLIST %mo.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  %TokenAtt;
+  form (prefix|infix|postfix) #IMPLIED
+  fence (true|false) #IMPLIED
+  separator (true|false) #IMPLIED
+  lspace %length; #IMPLIED
+  rspace %length; #IMPLIED
+  stretchy (true|false) #IMPLIED
+  symmetric (true|false) #IMPLIED
+  maxsize CDATA #IMPLIED
+  minsize %length; #IMPLIED
+  largeop (true|false) #IMPLIED
+  movablelimits (true|false) #IMPLIED
+  accent (true|false) #IMPLIED
+  linebreak (auto|newline|nobreak|goodbreak|badbreak) #IMPLIED
+  lineleading %length; #IMPLIED
+  linebreakstyle (before|after|duplicate|infixlinebreakstyle) #IMPLIED
+  linebreakmultchar CDATA #IMPLIED
+  indentalign (left|center|right|auto|id) #IMPLIED
+  indentshift %length; #IMPLIED
+  indenttarget CDATA #IMPLIED
+  indentalignfirst (left|center|right|auto|id|indentalign) #IMPLIED
+  indentshiftfirst CDATA #IMPLIED
+  indentalignlast (left|center|right|auto|id|indentalign) #IMPLIED
+  indentshiftlast CDATA #IMPLIED>
+
+<!ELEMENT %mtext.qname; (%token.content;)*>
+
+<!ATTLIST %mtext.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  %TokenAtt;>
+
+<!ELEMENT %mspace.qname; EMPTY>
+
+<!ATTLIST %mspace.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  %TokenAtt;
+  width %length; #IMPLIED
+  height %length; #IMPLIED
+  depth %length; #IMPLIED
+  linebreak (auto|newline|nobreak|goodbreak|badbreak
+             |indentingnewline) #IMPLIED>
+
+<!ELEMENT %ms.qname; (%token.content;)*>
+
+<!ATTLIST %ms.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  %TokenAtt;
+  lquote CDATA #IMPLIED
+  rquote CDATA #IMPLIED>
+
+<!ENTITY % mglyph.deprecatedattributes "
+  index %integer; #IMPLIED
+  mathvariant (normal|bold|italic|bold-italic|double-struck|bold-fraktur
+               |script|bold-script|fraktur|sans-serif|bold-sans-serif
+               |sans-serif-italic|sans-serif-bold-italic|monospace
+               |initial|tailed|looped|stretched) #IMPLIED
+  mathsize CDATA #IMPLIED
+  %DeprecatedTokenAtt;">
+
+<!ENTITY % mglyph.attributes "
+  %CommonAtt;
+  %CommonPresAtt;
+  src CDATA #IMPLIED
+  width %length; #IMPLIED
+  height %length; #IMPLIED
+  valign %length; #IMPLIED
+  alt CDATA #IMPLIED">
+
+<!ELEMENT %mglyph.qname; EMPTY>
+<!ATTLIST %mglyph.qname;
+  %mglyph.attributes;
+  %mglyph.deprecatedattributes;>
+
+<!ELEMENT %msline.qname; EMPTY>
+
+<!ATTLIST %msline.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  position %integer; #IMPLIED
+  length %unsigned-integer; #IMPLIED
+  leftoverhang %length; #IMPLIED
+  rightoverhang %length; #IMPLIED
+  mslinethickness CDATA #IMPLIED>
+
+<!ELEMENT %none.qname; EMPTY>
+
+<!ATTLIST %none.qname;
+  %CommonAtt;
+  %CommonPresAtt;>
+
+<!ELEMENT %mprescripts.qname; EMPTY>
+
+<!ATTLIST %mprescripts.qname;
+  %CommonAtt;
+  %CommonPresAtt;>
+
+<!ELEMENT %malignmark.qname; EMPTY>
+
+<!ATTLIST %malignmark.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  edge (left|right) #IMPLIED>
+
+<!ELEMENT %maligngroup.qname; EMPTY>
+
+<!ATTLIST %maligngroup.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  groupalign (left|center|right|decimalpoint) #IMPLIED>
+
+<!ELEMENT %mrow.qname; (%MathExpression;)*>
+
+<!ATTLIST %mrow.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  dir (ltr|rtl) #IMPLIED>
+
+<!ELEMENT %mfrac.qname; ((%MathExpression;),(%MathExpression;))>
+
+<!ATTLIST %mfrac.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  linethickness CDATA #IMPLIED
+  numalign (left|center|right) #IMPLIED
+  denomalign (left|center|right) #IMPLIED
+  bevelled (true|false) #IMPLIED>
+
+<!ELEMENT %msqrt.qname; (%ImpliedMrow;)>
+
+<!ATTLIST %msqrt.qname;
+  %CommonAtt;
+  %CommonPresAtt;>
+
+<!ELEMENT %mroot.qname; ((%MathExpression;),(%MathExpression;))>
+
+<!ATTLIST %mroot.qname;
+  %CommonAtt;
+  %CommonPresAtt;>
+
+<!ELEMENT %mstyle.qname; (%ImpliedMrow;)>
+
+<!ENTITY % mstyle.deprecatedattributes "
+  %DeprecatedTokenAtt;
+  veryverythinmathspace %length; #IMPLIED
+  verythinmathspace %length; #IMPLIED
+  thinmathspace %length; #IMPLIED
+  mediummathspace %length; #IMPLIED
+  thickmathspace %length; #IMPLIED
+  verythickmathspace %length; #IMPLIED
+  veryverythickmathspace %length; #IMPLIED">
+
+<!ENTITY % mstyle.generalattributes "
+  accent (true|false) #IMPLIED
+  accentunder (true|false) #IMPLIED
+  align (left|right|center) #IMPLIED
+  alignmentscope CDATA #IMPLIED
+  bevelled (true|false) #IMPLIED
+  charalign (left|center|right) #IMPLIED
+  charspacing CDATA #IMPLIED
+  close CDATA #IMPLIED
+  columnalign CDATA #IMPLIED
+  columnlines CDATA #IMPLIED
+  columnspacing CDATA #IMPLIED
+  columnspan %positive-integer; #IMPLIED
+  columnwidth CDATA #IMPLIED
+  crossout CDATA #IMPLIED
+  denomalign (left|center|right) #IMPLIED
+  depth %length; #IMPLIED
+  dir (ltr|rtl) #IMPLIED
+  edge (left|right) #IMPLIED
+  equalcolumns (true|false) #IMPLIED
+  equalrows (true|false) #IMPLIED
+  fence (true|false) #IMPLIED
+  form (prefix|infix|postfix) #IMPLIED
+  frame (%linestyle;) #IMPLIED
+  framespacing CDATA #IMPLIED
+  groupalign CDATA #IMPLIED
+  height %length; #IMPLIED
+  indentalign (left|center|right|auto|id) #IMPLIED
+  indentalignfirst (left|center|right|auto|id|indentalign) #IMPLIED
+  indentalignlast (left|center|right|auto|id|indentalign) #IMPLIED
+  indentshift %length; #IMPLIED
+  indentshiftfirst CDATA #IMPLIED
+  indentshiftlast CDATA #IMPLIED
+  indenttarget CDATA #IMPLIED
+  largeop (true|false) #IMPLIED
+  leftoverhang %length; #IMPLIED
+  length %unsigned-integer; #IMPLIED
+  linebreak (auto|newline|nobreak|goodbreak|badbreak) #IMPLIED
+  linebreakmultchar CDATA #IMPLIED
+  linebreakstyle (before|after|duplicate|infixlinebreakstyle) #IMPLIED
+  lineleading %length; #IMPLIED
+  linethickness CDATA #IMPLIED
+  location (w|nw|n|ne|e|se|s|sw) #IMPLIED
+  longdivstyle CDATA #IMPLIED
+  lquote CDATA #IMPLIED
+  lspace %length; #IMPLIED
+  mathsize CDATA #IMPLIED
+  mathvariant (normal|bold|italic|bold-italic|double-struck|bold-fraktur
+               |script|bold-script|fraktur|sans-serif|bold-sans-serif
+               |sans-serif-italic|sans-serif-bold-italic|monospace
+               |initial|tailed|looped|stretched) #IMPLIED
+  maxsize CDATA #IMPLIED
+  minlabelspacing %length; #IMPLIED
+  minsize %length; #IMPLIED
+  movablelimits (true|false) #IMPLIED
+  mslinethickness CDATA #IMPLIED
+  notation CDATA #IMPLIED
+  numalign (left|center|right) #IMPLIED
+  open CDATA #IMPLIED
+  position %integer; #IMPLIED
+  rightoverhang %length; #IMPLIED
+  rowalign CDATA #IMPLIED
+  rowlines CDATA #IMPLIED
+  rowspacing CDATA #IMPLIED
+  rowspan %positive-integer; #IMPLIED
+  rquote CDATA #IMPLIED
+  rspace %length; #IMPLIED
+  selection %positive-integer; #IMPLIED
+  separator (true|false) #IMPLIED
+  separators CDATA #IMPLIED
+  shift %integer; #IMPLIED
+  side (left|right|leftoverlap|rightoverlap) #IMPLIED
+  stackalign (left|center|right|decimalpoint) #IMPLIED
+  stretchy (true|false) #IMPLIED
+  subscriptshift %length; #IMPLIED
+  superscriptshift %length; #IMPLIED
+  symmetric (true|false) #IMPLIED
+  valign %length; #IMPLIED
+  width %length; #IMPLIED">
+
+<!ENTITY % mstyle.specificattributes "
+  scriptlevel %integer; #IMPLIED
+  displaystyle (true|false) #IMPLIED
+  scriptsizemultiplier %number; #IMPLIED
+  scriptminsize %length; #IMPLIED
+  infixlinebreakstyle (before|after|duplicate) #IMPLIED
+  decimalpoint %character; #IMPLIED">
+
+<!ATTLIST %mstyle.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  %mstyle.specificattributes;
+  %mstyle.generalattributes;
+  %mstyle.deprecatedattributes;>
+
+<!ELEMENT %merror.qname; (%ImpliedMrow;)>
+
+<!ATTLIST %merror.qname;
+  %CommonAtt;
+  %CommonPresAtt;>
+
+<!ELEMENT %mpadded.qname; (%ImpliedMrow;)>
+
+<!ATTLIST %mpadded.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  height %mpadded-length; #IMPLIED
+  depth %mpadded-length; #IMPLIED
+  width %mpadded-length; #IMPLIED
+  lspace %mpadded-length; #IMPLIED
+  voffset %mpadded-length; #IMPLIED>
+
+<!ELEMENT %mphantom.qname; (%ImpliedMrow;)>
+
+<!ATTLIST %mphantom.qname;
+  %CommonAtt;
+  %CommonPresAtt;>
+
+<!ELEMENT %mfenced.qname; (%MathExpression;)*>
+
+<!ATTLIST %mfenced.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  open CDATA #IMPLIED
+  close CDATA #IMPLIED
+  separators CDATA #IMPLIED>
+
+<!ELEMENT %menclose.qname; (%ImpliedMrow;)>
+
+<!ATTLIST %menclose.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  notation CDATA #IMPLIED>
+
+<!ELEMENT %msub.qname; ((%MathExpression;),(%MathExpression;))>
+
+<!ATTLIST %msub.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  subscriptshift %length; #IMPLIED>
+
+<!ELEMENT %msup.qname; ((%MathExpression;),(%MathExpression;))>
+
+<!ATTLIST %msup.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  superscriptshift %length; #IMPLIED>
+
+<!ENTITY % msubsup.attributes "
+  %CommonAtt;
+  %CommonPresAtt;
+  subscriptshift %length; #IMPLIED
+  superscriptshift %length; #IMPLIED">
+
+<!ELEMENT %msubsup.qname; ((%MathExpression;),(%MathExpression;),
+                         (%MathExpression;))>
+<!ATTLIST %msubsup.qname;
+  %msubsup.attributes;>
+
+<!ELEMENT %munder.qname; ((%MathExpression;),(%MathExpression;))>
+
+<!ATTLIST %munder.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  accentunder (true|false) #IMPLIED
+  align (left|right|center) #IMPLIED>
+
+<!ELEMENT %mover.qname; ((%MathExpression;),(%MathExpression;))>
+
+<!ATTLIST %mover.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  accent (true|false) #IMPLIED
+  align (left|right|center) #IMPLIED>
+
+<!ELEMENT %munderover.qname; ((%MathExpression;),(%MathExpression;),
+                            (%MathExpression;))>
+
+<!ATTLIST %munderover.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  accent (true|false) #IMPLIED
+  accentunder (true|false) #IMPLIED
+  align (left|right|center) #IMPLIED>
+
+<!ELEMENT %mmultiscripts.qname; ((%MathExpression;),
+                               (%MultiScriptExpression;)*,
+                               (%mprescripts.qname;,
+                                (%MultiScriptExpression;)*)?)>
+
+<!ATTLIST %mmultiscripts.qname;
+  %msubsup.attributes;>
+
+<!ELEMENT %mtable.qname; (%TableRowExpression;)*>
+
+<!ATTLIST %mtable.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  align CDATA #IMPLIED
+  rowalign CDATA #IMPLIED
+  columnalign CDATA #IMPLIED
+  groupalign CDATA #IMPLIED
+  alignmentscope CDATA #IMPLIED
+  columnwidth CDATA #IMPLIED
+  width CDATA #IMPLIED
+  rowspacing CDATA #IMPLIED
+  columnspacing CDATA #IMPLIED
+  rowlines CDATA #IMPLIED
+  columnlines CDATA #IMPLIED
+  frame (%linestyle;) #IMPLIED
+  framespacing CDATA #IMPLIED
+  equalrows (true|false) #IMPLIED
+  equalcolumns (true|false) #IMPLIED
+  displaystyle (true|false) #IMPLIED
+  side (left|right|leftoverlap|rightoverlap) #IMPLIED
+  minlabelspacing %length; #IMPLIED>
+
+<!ELEMENT %mlabeledtr.qname; (%TableCellExpression;)+>
+
+<!ENTITY % mtr.attributes "
+  %CommonAtt;
+  %CommonPresAtt;
+  rowalign (top|bottom|center|baseline|axis) #IMPLIED
+  columnalign CDATA #IMPLIED
+  groupalign CDATA #IMPLIED">
+
+<!ATTLIST %mlabeledtr.qname;
+  %mtr.attributes;>
+
+<!ELEMENT %mtr.qname; (%TableCellExpression;)*>
+<!ATTLIST %mtr.qname;
+  %mtr.attributes;>
+
+<!ELEMENT %mtd.qname; (%ImpliedMrow;)>
+
+<!ATTLIST %mtd.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  rowspan %positive-integer; #IMPLIED
+  columnspan %positive-integer; #IMPLIED
+  rowalign (top|bottom|center|baseline|axis) #IMPLIED
+  columnalign (%columnalignstyle;) #IMPLIED
+  groupalign CDATA #IMPLIED>
+
+<!ELEMENT %mstack.qname; (%MstackExpression;)*>
+
+<!ATTLIST %mstack.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  align CDATA #IMPLIED
+  stackalign (left|center|right|decimalpoint) #IMPLIED
+  charalign (left|center|right) #IMPLIED
+  charspacing CDATA #IMPLIED>
+
+<!ELEMENT %mlongdiv.qname; ((%MstackExpression;),(%MstackExpression;),
+                          (%MstackExpression;)+)>
+
+<!ENTITY % msgroup.attributes "
+  %CommonAtt;
+  %CommonPresAtt;
+  position %integer; #IMPLIED
+  shift %integer; #IMPLIED">
+
+<!ATTLIST %mlongdiv.qname;
+  %msgroup.attributes;
+  longdivstyle CDATA #IMPLIED>
+
+<!ELEMENT %msgroup.qname; (%MstackExpression;)*>
+<!ATTLIST %msgroup.qname;
+  %msgroup.attributes;>
+
+<!ELEMENT %msrow.qname; (%MsrowExpression;)*>
+
+<!ATTLIST %msrow.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  position %integer; #IMPLIED>
+
+<!ELEMENT %mscarries.qname; (%MsrowExpression;|%mscarry.qname;)*>
+
+<!ATTLIST %mscarries.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  position %integer; #IMPLIED
+  location (w|nw|n|ne|e|se|s|sw) #IMPLIED
+  crossout CDATA #IMPLIED
+  scriptsizemultiplier %number; #IMPLIED>
+
+<!ELEMENT %mscarry.qname; (%MsrowExpression;)*>
+
+<!ATTLIST %mscarry.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  location (w|nw|n|ne|e|se|s|sw) #IMPLIED
+  crossout CDATA #IMPLIED>
+
+<!ELEMENT %maction.qname; (%MathExpression;)+>
+
+<!ATTLIST %maction.qname;
+  %CommonAtt;
+  %CommonPresAtt;
+  actiontype CDATA #IMPLIED
+  selection %positive-integer; #IMPLIED>
+
+<!-- end of mathml3-presentation.rng -->
+
+<!-- start of mathml3-common.rng -->
+
+<!ELEMENT %math.qname; (%MathExpression;)*>
+
+<!ENTITY % NonMathMLAtt "">
+
+<!ENTITY % math.deprecatedattributes "
+  mode CDATA #IMPLIED
+  macros CDATA #IMPLIED">
+
+<!ATTLIST %math.qname;
+  %CommonAtt;
+  display (block|inline) #IMPLIED
+  maxwidth %length; #IMPLIED
+  overflow (linebreak|scroll|elide|truncate|scale) #IMPLIED
+  altimg CDATA #IMPLIED
+  altimg-width %length; #IMPLIED
+  altimg-height %length; #IMPLIED
+  altimg-valign CDATA #IMPLIED
+  alttext CDATA #IMPLIED
+  cdgroup CDATA #IMPLIED
+  %math.deprecatedattributes;
+  %CommonPresAtt;
+  %mstyle.specificattributes;
+  %mstyle.generalattributes;>
+
+<!--<!ENTITY % name "
+  name CDATA #REQUIRED">
+-->
+<!ENTITY % cd "
+  cd CDATA #REQUIRED">
+
+<!ENTITY % annotation.attributes "
+  %CommonAtt;
+  cd CDATA #IMPLIED
+  name CDATA #IMPLIED
+  %DefEncAtt;
+  src CDATA #IMPLIED">
+
+<!ELEMENT %annotation.qname; (#PCDATA)>
+<!ATTLIST %annotation.qname;
+  %annotation.attributes;>
+
+<!ENTITY % annotation-xml.model "(%MathExpression;)*">
+
+<!ENTITY % anyElement "">
+
+<!ELEMENT %annotation-xml.qname; (%annotation-xml.model;)>
+<!ATTLIST %annotation-xml.qname;
+  %annotation.attributes;>
+
+<!ELEMENT %semantics.qname; ((%MathExpression;),
+                           (%annotation.qname;|%annotation-xml.qname;)*)>
+
+<!ATTLIST %semantics.qname;
+  %CommonAtt;
+  %DefEncAtt;
+  cd CDATA #IMPLIED
+  name CDATA #IMPLIED>
+
diff --git a/code/lib/Bio/Entrez/DTDs/mathmlsetup.ent b/code/lib/Bio/Entrez/DTDs/mathmlsetup.ent
new file mode 100644
index 0000000..76215a5
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/mathmlsetup.ent
@@ -0,0 +1,191 @@
+<!-- ============================================================= -->
+<!--  MODULE:    MathML DTD SETUP MODULE                           -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite MathML Setup Module v2.0 20040830//EN"
+     Delivered as file "mathmlsetup.ent"                           -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Provides the organization for using the           -->
+<!--             MathML DTD                                        -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Overrides to standard parameter entities used  -->
+<!--                in the MathML 2.0 DTD                          -->
+<!--             2) Call to MathML 2.0 DTD                         -->
+<!--                                                               -->
+<!-- MODULES REQUIRED:                                             -->
+<!--             1) mathml2.dtd           MathML 2.0 DTD           -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+          
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+  
+  2. Updated public identifier to "v2.0 20040830"          
+
+     =============================================================
+     Version 1.1                           (TRG) v1.1 (2003-11-01)
+
+  1. Added attribute "alternate-form-of" to <mml:math> 
+     (Note: parameter entity %math.qname; resolves to <mml:math>.)
+     Rationale: Where multiple formats of an item (e.g., graphic 
+     file, media object, chemical structure) are available, this 
+     attribute indicates that a format is a secondary one and 
+     provides a link to the primary format, so that only one 
+     format of an item is displayed.
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    SET UP FOR THE MathML MODULE               -->
+<!-- ============================================================= -->
+
+
+<!--                    MathML DTD                                 -->
+<!--                    The official version of the MathML 2.0 can 
+                        be found at: http://www.w3.org/TR/MathML2/
+
+                        See also Mathematical Markup Language  
+                        (MathML) Version 2.0
+                       
+                        A. Parsing MathML
+                        A.6 The MathML DTD
+            http://www.w3.org/TR/MathML2/appendixa.html#parsing-dtd
+
+                        A zip file with entity declarations is 
+                        available from
+            http://www.w3.org/TR/MathML2/DTD-MathML-20010221.zip   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY OVERRIDES FOR MathML 2.0  -->
+<!-- ============================================================= -->
+
+
+<!ENTITY % MATHML.prefixed "INCLUDE"                                 >
+
+
+<!ENTITY % MATHML.prefix   "mml"                                     >
+
+
+<![%MATHML.prefixed;[
+<!ENTITY % MATHML.pfx      "%MATHML.prefix;:"                        >
+]]>
+
+
+<!ENTITY % math.qname      "%MATHML.pfx;math"                        >
+
+
+<!--         alternate-form-of
+                        Where multiple formats of an item (e.g., 
+                        graphic file, media object, chemical 
+                        structure) are available, this attribute 
+                        indicates that a format is a secondary 
+                        one and provides a link to the primary 
+                        format, so that only one format of an 
+                        item is displayed.                         -->
+<!ATTLIST  %math.qname;
+             alternate-form-of
+                        IDREF                              #IMPLIED  >
+
+
+<!-- ============================================================= -->
+<!--                    MathML 2.0 INVOCATION                      -->
+<!-- ============================================================= -->
+
+%mathml.dtd;
+
+
+<!-- ================== End MATHML Setup Module ================== -->
diff --git a/code/lib/Bio/Entrez/DTDs/mmlalias.ent b/code/lib/Bio/Entrez/DTDs/mmlalias.ent
new file mode 100644
index 0000000..1371af3
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/mmlalias.ent
@@ -0,0 +1,564 @@
+
+<!--
+     File mmlalias.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+     References to the VARIANT SELECTOR 1 character (&#x0FE00;)
+     should match the uses listed in Unicode Technical Report 25.
+
+-->
+
+<!ENTITY angle            "&#x02220;" ><!--alias ISOAMSO ang -->
+<!ENTITY ApplyFunction    "&#x02061;" ><!--character showing function application in presentation tagging -->
+<!ENTITY approx           "&#x02248;" ><!--alias ISOTECH ap -->
+<!ENTITY approxeq         "&#x0224A;" ><!--alias ISOAMSR ape -->
+<!ENTITY Assign           "&#x02254;" ><!--assignment operator, alias ISOAMSR colone -->
+<!ENTITY backcong         "&#x0224C;" ><!--alias ISOAMSR bcong -->
+<!ENTITY backepsilon      "&#x003F6;" ><!--alias ISOAMSR bepsi -->
+<!ENTITY backprime        "&#x02035;" ><!--alias ISOAMSO bprime -->
+<!ENTITY backsim          "&#x0223D;" ><!--alias ISOAMSR bsim -->
+<!ENTITY backsimeq        "&#x022CD;" ><!--alias ISOAMSR bsime -->
+<!ENTITY Backslash        "&#x02216;" ><!--alias ISOAMSB setmn -->
+<!ENTITY barwedge         "&#x02305;" ><!--alias ISOAMSB barwed -->
+<!ENTITY Because          "&#x02235;" ><!--alias ISOTECH becaus -->
+<!ENTITY because          "&#x02235;" ><!--alias ISOTECH becaus -->
+<!ENTITY Bernoullis       "&#x0212C;" ><!--alias ISOTECH bernou -->
+<!ENTITY between          "&#x0226C;" ><!--alias ISOAMSR twixt -->
+<!ENTITY bigcap           "&#x022C2;" ><!--alias ISOAMSB xcap -->
+<!ENTITY bigcirc          "&#x025EF;" ><!--alias ISOAMSB xcirc -->
+<!ENTITY bigcup           "&#x022C3;" ><!--alias ISOAMSB xcup -->
+<!ENTITY bigodot          "&#x02A00;" ><!--alias ISOAMSB xodot -->
+<!ENTITY bigoplus         "&#x02A01;" ><!--alias ISOAMSB xoplus -->
+<!ENTITY bigotimes        "&#x02A02;" ><!--alias ISOAMSB xotime -->
+<!ENTITY bigsqcup         "&#x02A06;" ><!--alias ISOAMSB xsqcup -->
+<!ENTITY bigstar          "&#x02605;" ><!--ISOPUB    starf  -->
+<!ENTITY bigtriangledown  "&#x025BD;" ><!--alias ISOAMSB xdtri -->
+<!ENTITY bigtriangleup    "&#x025B3;" ><!--alias ISOAMSB xutri -->
+<!ENTITY biguplus         "&#x02A04;" ><!--alias ISOAMSB xuplus -->
+<!ENTITY bigvee           "&#x022C1;" ><!--alias ISOAMSB xvee -->
+<!ENTITY bigwedge         "&#x022C0;" ><!--alias ISOAMSB xwedge -->
+<!ENTITY bkarow           "&#x0290D;" ><!--alias ISOAMSA rbarr -->
+<!ENTITY blacklozenge     "&#x029EB;" ><!--alias ISOPUB lozf -->
+<!ENTITY blacksquare      "&#x025AA;" ><!--ISOTECH  squarf  -->
+<!ENTITY blacktriangle    "&#x025B4;" ><!--alias ISOPUB utrif -->
+<!ENTITY blacktriangledown "&#x025BE;" ><!--alias ISOPUB dtrif -->
+<!ENTITY blacktriangleleft "&#x025C2;" ><!--alias ISOPUB ltrif -->
+<!ENTITY blacktriangleright "&#x025B8;" ><!--alias ISOPUB rtrif -->
+<!ENTITY bot              "&#x022A5;" ><!--alias ISOTECH bottom -->
+<!ENTITY boxminus         "&#x0229F;" ><!--alias ISOAMSB minusb -->
+<!ENTITY boxplus          "&#x0229E;" ><!--alias ISOAMSB plusb -->
+<!ENTITY boxtimes         "&#x022A0;" ><!--alias ISOAMSB timesb -->
+<!ENTITY Breve            "&#x002D8;" ><!--alias ISODIA breve -->
+<!ENTITY bullet           "&#x02022;" ><!--alias ISOPUB bull -->
+<!ENTITY Bumpeq           "&#x0224E;" ><!--alias ISOAMSR bump -->
+<!ENTITY bumpeq           "&#x0224F;" ><!--alias ISOAMSR bumpe -->
+<!ENTITY CapitalDifferentialD "&#x02145;" ><!--D for use in differentials, e.g., within integrals -->
+<!ENTITY Cayleys          "&#x0212D;" ><!--the non-associative ring of octonions or Cayley numbers -->
+<!ENTITY Cedilla          "&#x000B8;" ><!--alias ISODIA cedil -->
+<!ENTITY CenterDot        "&#x000B7;" ><!--alias ISONUM middot -->
+<!ENTITY centerdot        "&#x000B7;" ><!--alias ISONUM middot -->
+<!ENTITY checkmark        "&#x02713;" ><!--alias ISOPUB check -->
+<!ENTITY circeq           "&#x02257;" ><!--alias ISOAMSR cire -->
+<!ENTITY circlearrowleft  "&#x021BA;" ><!--alias ISOAMSA olarr -->
+<!ENTITY circlearrowright "&#x021BB;" ><!--alias ISOAMSA orarr -->
+<!ENTITY circledast       "&#x0229B;" ><!--alias ISOAMSB oast -->
+<!ENTITY circledcirc      "&#x0229A;" ><!--alias ISOAMSB ocir -->
+<!ENTITY circleddash      "&#x0229D;" ><!--alias ISOAMSB odash -->
+<!ENTITY CircleDot        "&#x02299;" ><!--alias ISOAMSB odot -->
+<!ENTITY circledR         "&#x000AE;" ><!--alias ISONUM reg -->
+<!ENTITY circledS         "&#x024C8;" ><!--alias ISOAMSO oS -->
+<!ENTITY CircleMinus      "&#x02296;" ><!--alias ISOAMSB ominus -->
+<!ENTITY CirclePlus       "&#x02295;" ><!--alias ISOAMSB oplus -->
+<!ENTITY CircleTimes      "&#x02297;" ><!--alias ISOAMSB otimes -->
+<!ENTITY ClockwiseContourIntegral "&#x02232;" ><!--alias ISOTECH cwconint -->
+<!ENTITY CloseCurlyDoubleQuote "&#x0201D;" ><!--alias ISONUM rdquo -->
+<!ENTITY CloseCurlyQuote  "&#x02019;" ><!--alias ISONUM rsquo -->
+<!ENTITY clubsuit         "&#x02663;" ><!--ISOPUB    clubs  -->
+<!ENTITY coloneq          "&#x02254;" ><!--alias ISOAMSR colone -->
+<!ENTITY complement       "&#x02201;" ><!--alias ISOAMSO comp -->
+<!ENTITY complexes        "&#x02102;" ><!--the field of complex numbers -->
+<!ENTITY Congruent        "&#x02261;" ><!--alias ISOTECH equiv -->
+<!ENTITY ContourIntegral  "&#x0222E;" ><!--alias ISOTECH conint -->
+<!ENTITY Coproduct        "&#x02210;" ><!--alias ISOAMSB coprod -->
+<!ENTITY CounterClockwiseContourIntegral "&#x02233;" ><!--alias ISOTECH awconint -->
+<!ENTITY CupCap           "&#x0224D;" ><!--alias asympeq -->
+<!ENTITY curlyeqprec      "&#x022DE;" ><!--alias ISOAMSR cuepr -->
+<!ENTITY curlyeqsucc      "&#x022DF;" ><!--alias ISOAMSR cuesc -->
+<!ENTITY curlyvee         "&#x022CE;" ><!--alias ISOAMSB cuvee -->
+<!ENTITY curlywedge       "&#x022CF;" ><!--alias ISOAMSB cuwed -->
+<!ENTITY curvearrowleft   "&#x021B6;" ><!--alias ISOAMSA cularr -->
+<!ENTITY curvearrowright  "&#x021B7;" ><!--alias ISOAMSA curarr -->
+<!ENTITY dbkarow          "&#x0290F;" ><!--alias ISOAMSA rBarr -->
+<!ENTITY ddagger          "&#x02021;" ><!--alias ISOPUB Dagger -->
+<!ENTITY ddotseq          "&#x02A77;" ><!--alias ISOAMSR eDDot -->
+<!ENTITY Del              "&#x02207;" ><!--alias ISOTECH nabla -->
+<!ENTITY DiacriticalAcute "&#x000B4;" ><!--alias ISODIA acute -->
+<!ENTITY DiacriticalDot   "&#x002D9;" ><!--alias ISODIA dot -->
+<!ENTITY DiacriticalDoubleAcute "&#x002DD;" ><!--alias ISODIA dblac -->
+<!ENTITY DiacriticalGrave "&#x00060;" ><!--alias ISODIA grave -->
+<!ENTITY DiacriticalTilde "&#x002DC;" ><!--alias ISODIA tilde -->
+<!ENTITY Diamond          "&#x022C4;" ><!--alias ISOAMSB diam -->
+<!ENTITY diamond          "&#x022C4;" ><!--alias ISOAMSB diam -->
+<!ENTITY diamondsuit      "&#x02666;" ><!--ISOPUB    diams  -->
+<!ENTITY DifferentialD    "&#x02146;" ><!--d for use in differentials, e.g., within integrals -->
+<!ENTITY digamma          "&#x003DD;" ><!--alias ISOGRK3 gammad -->
+<!ENTITY div              "&#x000F7;" ><!--alias ISONUM divide -->
+<!ENTITY divideontimes    "&#x022C7;" ><!--alias ISOAMSB divonx -->
+<!ENTITY doteq            "&#x02250;" ><!--alias ISOAMSR esdot -->
+<!ENTITY doteqdot         "&#x02251;" ><!--alias ISOAMSR eDot -->
+<!ENTITY DotEqual         "&#x02250;" ><!--alias ISOAMSR esdot -->
+<!ENTITY dotminus         "&#x02238;" ><!--alias ISOAMSB minusd -->
+<!ENTITY dotplus          "&#x02214;" ><!--alias ISOAMSB plusdo -->
+<!ENTITY dotsquare        "&#x022A1;" ><!--alias ISOAMSB sdotb -->
+<!ENTITY doublebarwedge   "&#x02306;" ><!--alias ISOAMSB Barwed -->
+<!ENTITY DoubleContourIntegral "&#x0222F;" ><!--alias ISOTECH Conint -->
+<!ENTITY DoubleDot        "&#x000A8;" ><!--alias ISODIA die -->
+<!ENTITY DoubleDownArrow  "&#x021D3;" ><!--alias ISOAMSA dArr -->
+<!ENTITY DoubleLeftArrow  "&#x021D0;" ><!--alias ISOTECH lArr -->
+<!ENTITY DoubleLeftRightArrow "&#x021D4;" ><!--alias ISOAMSA hArr -->
+<!ENTITY DoubleLeftTee    "&#x02AE4;" ><!--alias for  &Dashv;  -->
+<!ENTITY DoubleLongLeftArrow "&#x027F8;" ><!--alias ISOAMSA xlArr -->
+<!ENTITY DoubleLongLeftRightArrow "&#x027FA;" ><!--alias ISOAMSA xhArr -->
+<!ENTITY DoubleLongRightArrow "&#x027F9;" ><!--alias ISOAMSA xrArr -->
+<!ENTITY DoubleRightArrow "&#x021D2;" ><!--alias ISOTECH rArr -->
+<!ENTITY DoubleRightTee   "&#x022A8;" ><!--alias ISOAMSR vDash -->
+<!ENTITY DoubleUpArrow    "&#x021D1;" ><!--alias ISOAMSA uArr -->
+<!ENTITY DoubleUpDownArrow "&#x021D5;" ><!--alias ISOAMSA vArr -->
+<!ENTITY DoubleVerticalBar "&#x02225;" ><!--alias ISOTECH par -->
+<!ENTITY DownArrow        "&#x02193;" ><!--alias ISONUM darr -->
+<!ENTITY Downarrow        "&#x021D3;" ><!--alias ISOAMSA dArr -->
+<!ENTITY downarrow        "&#x02193;" ><!--alias ISONUM darr -->
+<!ENTITY DownArrowUpArrow "&#x021F5;" ><!--alias ISOAMSA duarr -->
+<!ENTITY downdownarrows   "&#x021CA;" ><!--alias ISOAMSA ddarr -->
+<!ENTITY downharpoonleft  "&#x021C3;" ><!--alias ISOAMSA dharl -->
+<!ENTITY downharpoonright "&#x021C2;" ><!--alias ISOAMSA dharr -->
+<!ENTITY DownLeftVector   "&#x021BD;" ><!--alias ISOAMSA lhard -->
+<!ENTITY DownRightVector  "&#x021C1;" ><!--alias ISOAMSA rhard -->
+<!ENTITY DownTee          "&#x022A4;" ><!--alias ISOTECH top -->
+<!ENTITY DownTeeArrow     "&#x021A7;" ><!--alias for mapstodown -->
+<!ENTITY drbkarow         "&#x02910;" ><!--alias ISOAMSA RBarr -->
+<!ENTITY Element          "&#x02208;" ><!--alias ISOTECH isinv -->
+<!ENTITY emptyset         "&#x02205;" ><!--alias ISOAMSO empty -->
+<!ENTITY eqcirc           "&#x02256;" ><!--alias ISOAMSR ecir -->
+<!ENTITY eqcolon          "&#x02255;" ><!--alias ISOAMSR ecolon -->
+<!ENTITY eqsim            "&#x02242;" ><!--alias ISOAMSR esim -->
+<!ENTITY eqslantgtr       "&#x02A96;" ><!--alias ISOAMSR egs -->
+<!ENTITY eqslantless      "&#x02A95;" ><!--alias ISOAMSR els -->
+<!ENTITY EqualTilde       "&#x02242;" ><!--alias ISOAMSR esim -->
+<!ENTITY Equilibrium      "&#x021CC;" ><!--alias ISOAMSA rlhar -->
+<!ENTITY Exists           "&#x02203;" ><!--alias ISOTECH exist -->
+<!ENTITY expectation      "&#x02130;" ><!--expectation (operator) -->
+<!ENTITY ExponentialE     "&#x02147;" ><!--e use for the exponential base of the natural logarithms -->
+<!ENTITY exponentiale     "&#x02147;" ><!--base of the Napierian logarithms -->
+<!ENTITY fallingdotseq    "&#x02252;" ><!--alias ISOAMSR efDot -->
+<!ENTITY ForAll           "&#x02200;" ><!--alias ISOTECH forall -->
+<!ENTITY Fouriertrf       "&#x02131;" ><!--Fourier transform -->
+<!ENTITY geq              "&#x02265;" ><!--alias ISOTECH ge -->
+<!ENTITY geqq             "&#x02267;" ><!--alias ISOAMSR gE -->
+<!ENTITY geqslant         "&#x02A7E;" ><!--alias ISOAMSR ges -->
+<!ENTITY gg               "&#x0226B;" ><!--alias ISOAMSR Gt -->
+<!ENTITY ggg              "&#x022D9;" ><!--alias ISOAMSR Gg -->
+<!ENTITY gnapprox         "&#x02A8A;" ><!--alias ISOAMSN gnap -->
+<!ENTITY gneq             "&#x02A88;" ><!--alias ISOAMSN gne -->
+<!ENTITY gneqq            "&#x02269;" ><!--alias ISOAMSN gnE -->
+<!ENTITY GreaterEqual     "&#x02265;" ><!--alias ISOTECH ge -->
+<!ENTITY GreaterEqualLess "&#x022DB;" ><!--alias ISOAMSR gel -->
+<!ENTITY GreaterFullEqual "&#x02267;" ><!--alias ISOAMSR gE -->
+<!ENTITY GreaterLess      "&#x02277;" ><!--alias ISOAMSR gl -->
+<!ENTITY GreaterSlantEqual "&#x02A7E;" ><!--alias ISOAMSR ges -->
+<!ENTITY GreaterTilde     "&#x02273;" ><!--alias ISOAMSR gsim -->
+<!ENTITY gtrapprox        "&#x02A86;" ><!--alias ISOAMSR gap -->
+<!ENTITY gtrdot           "&#x022D7;" ><!--alias ISOAMSR gtdot -->
+<!ENTITY gtreqless        "&#x022DB;" ><!--alias ISOAMSR gel -->
+<!ENTITY gtreqqless       "&#x02A8C;" ><!--alias ISOAMSR gEl -->
+<!ENTITY gtrless          "&#x02277;" ><!--alias ISOAMSR gl -->
+<!ENTITY gtrsim           "&#x02273;" ><!--alias ISOAMSR gsim -->
+<!ENTITY gvertneqq        "&#x02269;&#x0FE00;" ><!--alias ISOAMSN gvnE -->
+<!ENTITY Hacek            "&#x002C7;" ><!--alias ISODIA caron -->
+<!ENTITY hbar             "&#x0210F;" ><!--alias ISOAMSO plank -->
+<!ENTITY heartsuit        "&#x02665;" ><!--ISOPUB hearts -->
+<!ENTITY HilbertSpace     "&#x0210B;" ><!--Hilbert space -->
+<!ENTITY hksearow         "&#x02925;" ><!--alias ISOAMSA searhk -->
+<!ENTITY hkswarow         "&#x02926;" ><!--alias ISOAMSA swarhk -->
+<!ENTITY hookleftarrow    "&#x021A9;" ><!--alias ISOAMSA larrhk -->
+<!ENTITY hookrightarrow   "&#x021AA;" ><!--alias ISOAMSA rarrhk -->
+<!ENTITY hslash           "&#x0210F;" ><!--alias ISOAMSO plankv -->
+<!ENTITY HumpDownHump     "&#x0224E;" ><!--alias ISOAMSR bump -->
+<!ENTITY HumpEqual        "&#x0224F;" ><!--alias ISOAMSR bumpe -->
+<!ENTITY iiiint           "&#x02A0C;" ><!--alias ISOTECH qint -->
+<!ENTITY iiint            "&#x0222D;" ><!--alias ISOTECH tint -->
+<!ENTITY Im               "&#x02111;" ><!--alias ISOAMSO image -->
+<!ENTITY ImaginaryI       "&#x02148;" ><!--i for use as a square root of -1 -->
+<!ENTITY imagline         "&#x02110;" ><!--the geometric imaginary line -->
+<!ENTITY imagpart         "&#x02111;" ><!--alias ISOAMSO image -->
+<!ENTITY Implies          "&#x021D2;" ><!--alias ISOTECH rArr -->
+<!ENTITY in               "&#x02208;" ><!--ISOTECH   isin  -->
+<!ENTITY integers         "&#x02124;" ><!--the ring of integers -->
+<!ENTITY Integral         "&#x0222B;" ><!--alias ISOTECH int -->
+<!ENTITY intercal         "&#x022BA;" ><!--alias ISOAMSB intcal -->
+<!ENTITY Intersection     "&#x022C2;" ><!--alias ISOAMSB xcap -->
+<!ENTITY intprod          "&#x02A3C;" ><!--alias ISOAMSB iprod -->
+<!ENTITY InvisibleComma   "&#x02063;" ><!--used as a separator, e.g., in indices -->
+<!ENTITY InvisibleTimes   "&#x02062;" ><!--marks multiplication when it is understood without a mark -->
+<!ENTITY langle           "&#x02329;" ><!--alias ISOTECH lang -->
+<!ENTITY Laplacetrf       "&#x02112;" ><!--Laplace transform -->
+<!ENTITY lbrace           "&#x0007B;" ><!--alias ISONUM lcub -->
+<!ENTITY lbrack           "&#x0005B;" ><!--alias ISONUM lsqb -->
+<!ENTITY LeftAngleBracket "&#x02329;" ><!--alias ISOTECH lang -->
+<!ENTITY LeftArrow        "&#x02190;" ><!--alias ISONUM larr -->
+<!ENTITY Leftarrow        "&#x021D0;" ><!--alias ISOTECH lArr -->
+<!ENTITY leftarrow        "&#x02190;" ><!--alias ISONUM larr -->
+<!ENTITY LeftArrowBar     "&#x021E4;" ><!--alias for larrb -->
+<!ENTITY LeftArrowRightArrow "&#x021C6;" ><!--alias ISOAMSA lrarr -->
+<!ENTITY leftarrowtail    "&#x021A2;" ><!--alias ISOAMSA larrtl -->
+<!ENTITY LeftCeiling      "&#x02308;" ><!--alias ISOAMSC lceil -->
+<!ENTITY LeftDoubleBracket "&#x0301A;" ><!--left double bracket delimiter -->
+<!ENTITY LeftDownVector   "&#x021C3;" ><!--alias ISOAMSA dharl -->
+<!ENTITY LeftFloor        "&#x0230A;" ><!--alias ISOAMSC lfloor -->
+<!ENTITY leftharpoondown  "&#x021BD;" ><!--alias ISOAMSA lhard -->
+<!ENTITY leftharpoonup    "&#x021BC;" ><!--alias ISOAMSA lharu -->
+<!ENTITY leftleftarrows   "&#x021C7;" ><!--alias ISOAMSA llarr -->
+<!ENTITY LeftRightArrow   "&#x02194;" ><!--alias ISOAMSA harr -->
+<!ENTITY Leftrightarrow   "&#x021D4;" ><!--alias ISOAMSA hArr -->
+<!ENTITY leftrightarrow   "&#x02194;" ><!--alias ISOAMSA harr -->
+<!ENTITY leftrightarrows  "&#x021C6;" ><!--alias ISOAMSA lrarr -->
+<!ENTITY leftrightharpoons "&#x021CB;" ><!--alias ISOAMSA lrhar -->
+<!ENTITY leftrightsquigarrow "&#x021AD;" ><!--alias ISOAMSA harrw -->
+<!ENTITY LeftTee          "&#x022A3;" ><!--alias ISOAMSR dashv -->
+<!ENTITY LeftTeeArrow     "&#x021A4;" ><!--alias for mapstoleft -->
+<!ENTITY leftthreetimes   "&#x022CB;" ><!--alias ISOAMSB lthree -->
+<!ENTITY LeftTriangle     "&#x022B2;" ><!--alias ISOAMSR vltri -->
+<!ENTITY LeftTriangleEqual "&#x022B4;" ><!--alias ISOAMSR ltrie -->
+<!ENTITY LeftUpVector     "&#x021BF;" ><!--alias ISOAMSA uharl -->
+<!ENTITY LeftVector       "&#x021BC;" ><!--alias ISOAMSA lharu -->
+<!ENTITY leq              "&#x02264;" ><!--alias ISOTECH le -->
+<!ENTITY leqq             "&#x02266;" ><!--alias ISOAMSR lE -->
+<!ENTITY leqslant         "&#x02A7D;" ><!--alias ISOAMSR les -->
+<!ENTITY lessapprox       "&#x02A85;" ><!--alias ISOAMSR lap -->
+<!ENTITY lessdot          "&#x022D6;" ><!--alias ISOAMSR ltdot -->
+<!ENTITY lesseqgtr        "&#x022DA;" ><!--alias ISOAMSR leg -->
+<!ENTITY lesseqqgtr       "&#x02A8B;" ><!--alias ISOAMSR lEg -->
+<!ENTITY LessEqualGreater "&#x022DA;" ><!--alias ISOAMSR leg -->
+<!ENTITY LessFullEqual    "&#x02266;" ><!--alias ISOAMSR lE -->
+<!ENTITY LessGreater      "&#x02276;" ><!--alias ISOAMSR lg -->
+<!ENTITY lessgtr          "&#x02276;" ><!--alias ISOAMSR lg -->
+<!ENTITY lesssim          "&#x02272;" ><!--alias ISOAMSR lsim -->
+<!ENTITY LessSlantEqual   "&#x02A7D;" ><!--alias ISOAMSR les -->
+<!ENTITY LessTilde        "&#x02272;" ><!--alias ISOAMSR lsim -->
+<!ENTITY ll               "&#x0226A;" ><!--alias ISOAMSR Lt -->
+<!ENTITY llcorner         "&#x0231E;" ><!--alias ISOAMSC dlcorn -->
+<!ENTITY Lleftarrow       "&#x021DA;" ><!--alias ISOAMSA lAarr -->
+<!ENTITY lmoustache       "&#x023B0;" ><!--alias ISOAMSC lmoust -->
+<!ENTITY lnapprox         "&#x02A89;" ><!--alias ISOAMSN lnap -->
+<!ENTITY lneq             "&#x02A87;" ><!--alias ISOAMSN lne -->
+<!ENTITY lneqq            "&#x02268;" ><!--alias ISOAMSN lnE -->
+<!ENTITY LongLeftArrow    "&#x027F5;" ><!--alias ISOAMSA xlarr -->
+<!ENTITY Longleftarrow    "&#x027F8;" ><!--alias ISOAMSA xlArr -->
+<!ENTITY longleftarrow    "&#x027F5;" ><!--alias ISOAMSA xlarr -->
+<!ENTITY LongLeftRightArrow "&#x027F7;" ><!--alias ISOAMSA xharr -->
+<!ENTITY Longleftrightarrow "&#x027FA;" ><!--alias ISOAMSA xhArr -->
+<!ENTITY longleftrightarrow "&#x027F7;" ><!--alias ISOAMSA xharr -->
+<!ENTITY longmapsto       "&#x027FC;" ><!--alias ISOAMSA xmap -->
+<!ENTITY LongRightArrow   "&#x027F6;" ><!--alias ISOAMSA xrarr -->
+<!ENTITY Longrightarrow   "&#x027F9;" ><!--alias ISOAMSA xrArr -->
+<!ENTITY longrightarrow   "&#x027F6;" ><!--alias ISOAMSA xrarr -->
+<!ENTITY looparrowleft    "&#x021AB;" ><!--alias ISOAMSA larrlp -->
+<!ENTITY looparrowright   "&#x021AC;" ><!--alias ISOAMSA rarrlp -->
+<!ENTITY LowerLeftArrow   "&#x02199;" ><!--alias ISOAMSA swarr -->
+<!ENTITY LowerRightArrow  "&#x02198;" ><!--alias ISOAMSA searr -->
+<!ENTITY lozenge          "&#x025CA;" ><!--alias ISOPUB loz -->
+<!ENTITY lrcorner         "&#x0231F;" ><!--alias ISOAMSC drcorn -->
+<!ENTITY Lsh              "&#x021B0;" ><!--alias ISOAMSA lsh -->
+<!ENTITY lvertneqq        "&#x02268;&#x0FE00;" ><!--alias ISOAMSN lvnE -->
+<!ENTITY maltese          "&#x02720;" ><!--alias ISOPUB malt -->
+<!ENTITY mapsto           "&#x021A6;" ><!--alias ISOAMSA map -->
+<!ENTITY measuredangle    "&#x02221;" ><!--alias ISOAMSO angmsd -->
+<!ENTITY Mellintrf        "&#x02133;" ><!--Mellin transform -->
+<!ENTITY MinusPlus        "&#x02213;" ><!--alias ISOTECH mnplus -->
+<!ENTITY mp               "&#x02213;" ><!--alias ISOTECH mnplus -->
+<!ENTITY multimap         "&#x022B8;" ><!--alias ISOAMSA mumap -->
+<!ENTITY napprox          "&#x02249;" ><!--alias ISOAMSN nap -->
+<!ENTITY natural          "&#x0266E;" ><!--alias ISOPUB natur -->
+<!ENTITY naturals         "&#x02115;" ><!--the semi-ring of natural numbers -->
+<!ENTITY nearrow          "&#x02197;" ><!--alias ISOAMSA nearr -->
+<!ENTITY NegativeMediumSpace "&#x0200B;" ><!--space of width -4/18 em -->
+<!ENTITY NegativeThickSpace "&#x0200B;" ><!--space of width -5/18 em -->
+<!ENTITY NegativeThinSpace "&#x0200B;" ><!--space of width -3/18 em -->
+<!ENTITY NegativeVeryThinSpace "&#x0200B;" ><!--space of width -1/18 em -->
+<!ENTITY NestedGreaterGreater "&#x0226B;" ><!--alias ISOAMSR Gt -->
+<!ENTITY NestedLessLess   "&#x0226A;" ><!--alias ISOAMSR Lt -->
+<!ENTITY nexists          "&#x02204;" ><!--alias ISOAMSO nexist -->
+<!ENTITY ngeq             "&#x02271;" ><!--alias ISOAMSN nge -->
+<!ENTITY ngeqq            "&#x02267;&#x00338;" ><!--alias ISOAMSN ngE -->
+<!ENTITY ngeqslant        "&#x02A7E;&#x00338;" ><!--alias ISOAMSN nges -->
+<!ENTITY ngtr             "&#x0226F;" ><!--alias ISOAMSN ngt -->
+<!ENTITY nLeftarrow       "&#x021CD;" ><!--alias ISOAMSA nlArr -->
+<!ENTITY nleftarrow       "&#x0219A;" ><!--alias ISOAMSA nlarr -->
+<!ENTITY nLeftrightarrow  "&#x021CE;" ><!--alias ISOAMSA nhArr -->
+<!ENTITY nleftrightarrow  "&#x021AE;" ><!--alias ISOAMSA nharr -->
+<!ENTITY nleq             "&#x02270;" ><!--alias ISOAMSN nle -->
+<!ENTITY nleqq            "&#x02266;&#x00338;" ><!--alias ISOAMSN nlE -->
+<!ENTITY nleqslant        "&#x02A7D;&#x00338;" ><!--alias ISOAMSN nles -->
+<!ENTITY nless            "&#x0226E;" ><!--alias ISOAMSN nlt -->
+<!ENTITY NonBreakingSpace "&#x000A0;" ><!--alias ISONUM nbsp -->
+<!ENTITY NotCongruent     "&#x02262;" ><!--alias ISOAMSN nequiv -->
+<!ENTITY NotDoubleVerticalBar "&#x02226;" ><!--alias ISOAMSN npar -->
+<!ENTITY NotElement       "&#x02209;" ><!--alias ISOTECH notin -->
+<!ENTITY NotEqual         "&#x02260;" ><!--alias ISOTECH ne -->
+<!ENTITY NotEqualTilde    "&#x02242;&#x00338;" ><!--alias for  &nesim; -->
+<!ENTITY NotExists        "&#x02204;" ><!--alias ISOAMSO nexist -->
+<!ENTITY NotGreater       "&#x0226F;" ><!--alias ISOAMSN ngt -->
+<!ENTITY NotGreaterEqual  "&#x02271;" ><!--alias ISOAMSN nge -->
+<!ENTITY NotGreaterFullEqual "&#x02266;&#x00338;" ><!--alias ISOAMSN nlE -->
+<!ENTITY NotGreaterGreater "&#x0226B;&#x00338;" ><!--alias ISOAMSN nGtv -->
+<!ENTITY NotGreaterLess   "&#x02279;" ><!--alias ISOAMSN ntvgl -->
+<!ENTITY NotGreaterSlantEqual "&#x02A7E;&#x00338;" ><!--alias ISOAMSN nges -->
+<!ENTITY NotGreaterTilde  "&#x02275;" ><!--alias ISOAMSN ngsim -->
+<!ENTITY NotHumpDownHump  "&#x0224E;&#x00338;" ><!--alias for &nbump; -->
+<!ENTITY NotLeftTriangle  "&#x022EA;" ><!--alias ISOAMSN nltri -->
+<!ENTITY NotLeftTriangleEqual "&#x022EC;" ><!--alias ISOAMSN nltrie -->
+<!ENTITY NotLess          "&#x0226E;" ><!--alias ISOAMSN nlt -->
+<!ENTITY NotLessEqual     "&#x02270;" ><!--alias ISOAMSN nle -->
+<!ENTITY NotLessGreater   "&#x02278;" ><!--alias ISOAMSN ntvlg -->
+<!ENTITY NotLessLess      "&#x0226A;&#x00338;" ><!--alias ISOAMSN nLtv -->
+<!ENTITY NotLessSlantEqual "&#x02A7D;&#x00338;" ><!--alias ISOAMSN nles -->
+<!ENTITY NotLessTilde     "&#x02274;" ><!--alias ISOAMSN nlsim -->
+<!ENTITY NotPrecedes      "&#x02280;" ><!--alias ISOAMSN npr -->
+<!ENTITY NotPrecedesEqual "&#x02AAF;&#x00338;" ><!--alias ISOAMSN npre -->
+<!ENTITY NotPrecedesSlantEqual "&#x022E0;" ><!--alias ISOAMSN nprcue -->
+<!ENTITY NotReverseElement "&#x0220C;" ><!--alias ISOTECH notniva -->
+<!ENTITY NotRightTriangle "&#x022EB;" ><!--alias ISOAMSN nrtri -->
+<!ENTITY NotRightTriangleEqual "&#x022ED;" ><!--alias ISOAMSN nrtrie -->
+<!ENTITY NotSquareSubsetEqual "&#x022E2;" ><!--alias ISOAMSN nsqsube -->
+<!ENTITY NotSquareSupersetEqual "&#x022E3;" ><!--alias ISOAMSN nsqsupe -->
+<!ENTITY NotSubset        "&#x02282;&#x020D2;" ><!--alias ISOAMSN vnsub -->
+<!ENTITY NotSubsetEqual   "&#x02288;" ><!--alias ISOAMSN nsube -->
+<!ENTITY NotSucceeds      "&#x02281;" ><!--alias ISOAMSN nsc -->
+<!ENTITY NotSucceedsEqual "&#x02AB0;&#x00338;" ><!--alias ISOAMSN nsce -->
+<!ENTITY NotSucceedsSlantEqual "&#x022E1;" ><!--alias ISOAMSN nsccue -->
+<!ENTITY NotSuperset      "&#x02283;&#x020D2;" ><!--alias ISOAMSN vnsup -->
+<!ENTITY NotSupersetEqual "&#x02289;" ><!--alias ISOAMSN nsupe -->
+<!ENTITY NotTilde         "&#x02241;" ><!--alias ISOAMSN nsim -->
+<!ENTITY NotTildeEqual    "&#x02244;" ><!--alias ISOAMSN nsime -->
+<!ENTITY NotTildeFullEqual "&#x02247;" ><!--alias ISOAMSN ncong -->
+<!ENTITY NotTildeTilde    "&#x02249;" ><!--alias ISOAMSN nap -->
+<!ENTITY NotVerticalBar   "&#x02224;" ><!--alias ISOAMSN nmid -->
+<!ENTITY nparallel        "&#x02226;" ><!--alias ISOAMSN npar -->
+<!ENTITY nprec            "&#x02280;" ><!--alias ISOAMSN npr -->
+<!ENTITY npreceq          "&#x02AAF;&#x00338;" ><!--alias ISOAMSN npre -->
+<!ENTITY nRightarrow      "&#x021CF;" ><!--alias ISOAMSA nrArr -->
+<!ENTITY nrightarrow      "&#x0219B;" ><!--alias ISOAMSA nrarr -->
+<!ENTITY nshortmid        "&#x02224;" ><!--alias ISOAMSN nsmid -->
+<!ENTITY nshortparallel   "&#x02226;" ><!--alias ISOAMSN nspar -->
+<!ENTITY nsimeq           "&#x02244;" ><!--alias ISOAMSN nsime -->
+<!ENTITY nsubset          "&#x02282;&#x020D2;" ><!--alias ISOAMSN vnsub -->
+<!ENTITY nsubseteq        "&#x02288;" ><!--alias ISOAMSN nsube -->
+<!ENTITY nsubseteqq       "&#x02AC5;&#x00338;" ><!--alias ISOAMSN nsubE -->
+<!ENTITY nsucc            "&#x02281;" ><!--alias ISOAMSN nsc -->
+<!ENTITY nsucceq          "&#x02AB0;&#x00338;" ><!--alias ISOAMSN nsce -->
+<!ENTITY nsupset          "&#x02283;&#x020D2;" ><!--alias ISOAMSN vnsup -->
+<!ENTITY nsupseteq        "&#x02289;" ><!--alias ISOAMSN nsupe -->
+<!ENTITY nsupseteqq       "&#x02AC6;&#x00338;" ><!--alias ISOAMSN nsupE -->
+<!ENTITY ntriangleleft    "&#x022EA;" ><!--alias ISOAMSN nltri -->
+<!ENTITY ntrianglelefteq  "&#x022EC;" ><!--alias ISOAMSN nltrie -->
+<!ENTITY ntriangleright   "&#x022EB;" ><!--alias ISOAMSN nrtri -->
+<!ENTITY ntrianglerighteq "&#x022ED;" ><!--alias ISOAMSN nrtrie -->
+<!ENTITY nwarrow          "&#x02196;" ><!--alias ISOAMSA nwarr -->
+<!ENTITY oint             "&#x0222E;" ><!--alias ISOTECH conint -->
+<!ENTITY OpenCurlyDoubleQuote "&#x0201C;" ><!--alias ISONUM ldquo -->
+<!ENTITY OpenCurlyQuote   "&#x02018;" ><!--alias ISONUM lsquo -->
+<!ENTITY orderof          "&#x02134;" ><!--alias ISOTECH order -->
+<!ENTITY parallel         "&#x02225;" ><!--alias ISOTECH par -->
+<!ENTITY PartialD         "&#x02202;" ><!--alias ISOTECH part -->
+<!ENTITY pitchfork        "&#x022D4;" ><!--alias ISOAMSR fork -->
+<!ENTITY PlusMinus        "&#x000B1;" ><!--alias ISONUM plusmn -->
+<!ENTITY pm               "&#x000B1;" ><!--alias ISONUM plusmn -->
+<!ENTITY Poincareplane    "&#x0210C;" ><!--the Poincare upper half-plane -->
+<!ENTITY prec             "&#x0227A;" ><!--alias ISOAMSR pr -->
+<!ENTITY precapprox       "&#x02AB7;" ><!--alias ISOAMSR prap -->
+<!ENTITY preccurlyeq      "&#x0227C;" ><!--alias ISOAMSR prcue -->
+<!ENTITY Precedes         "&#x0227A;" ><!--alias ISOAMSR pr -->
+<!ENTITY PrecedesEqual    "&#x02AAF;" ><!--alias ISOAMSR pre -->
+<!ENTITY PrecedesSlantEqual "&#x0227C;" ><!--alias ISOAMSR prcue -->
+<!ENTITY PrecedesTilde    "&#x0227E;" ><!--alias ISOAMSR prsim -->
+<!ENTITY preceq           "&#x02AAF;" ><!--alias ISOAMSR pre -->
+<!ENTITY precnapprox      "&#x02AB9;" ><!--alias ISOAMSN prnap -->
+<!ENTITY precneqq         "&#x02AB5;" ><!--alias ISOAMSN prnE -->
+<!ENTITY precnsim         "&#x022E8;" ><!--alias ISOAMSN prnsim -->
+<!ENTITY precsim          "&#x0227E;" ><!--alias ISOAMSR prsim -->
+<!ENTITY primes           "&#x02119;" ><!--the prime natural numbers -->
+<!ENTITY Proportion       "&#x02237;" ><!--alias ISOAMSR Colon -->
+<!ENTITY Proportional     "&#x0221D;" ><!--alias ISOTECH prop -->
+<!ENTITY propto           "&#x0221D;" ><!--alias ISOTECH prop -->
+<!ENTITY quaternions      "&#x0210D;" ><!--the ring (skew field) of quaternions -->
+<!ENTITY questeq          "&#x0225F;" ><!--alias ISOAMSR equest -->
+<!ENTITY rangle           "&#x0232A;" ><!--alias ISOTECH rang -->
+<!ENTITY rationals        "&#x0211A;" ><!--the field of rational numbers -->
+<!ENTITY rbrace           "&#x0007D;" ><!--alias ISONUM rcub -->
+<!ENTITY rbrack           "&#x0005D;" ><!--alias ISONUM rsqb -->
+<!ENTITY Re               "&#x0211C;" ><!--alias ISOAMSO real -->
+<!ENTITY realine          "&#x0211B;" ><!--the geometric real line -->
+<!ENTITY realpart         "&#x0211C;" ><!--alias ISOAMSO real -->
+<!ENTITY reals            "&#x0211D;" ><!--the field of real numbers -->
+<!ENTITY ReverseElement   "&#x0220B;" ><!--alias ISOTECH niv -->
+<!ENTITY ReverseEquilibrium "&#x021CB;" ><!--alias ISOAMSA lrhar -->
+<!ENTITY ReverseUpEquilibrium "&#x0296F;" ><!--alias ISOAMSA duhar -->
+<!ENTITY RightAngleBracket "&#x0232A;" ><!--alias ISOTECH rang -->
+<!ENTITY RightArrow       "&#x02192;" ><!--alias ISONUM rarr -->
+<!ENTITY Rightarrow       "&#x021D2;" ><!--alias ISOTECH rArr -->
+<!ENTITY rightarrow       "&#x02192;" ><!--alias ISONUM rarr -->
+<!ENTITY RightArrowBar    "&#x021E5;" ><!--alias for rarrb -->
+<!ENTITY RightArrowLeftArrow "&#x021C4;" ><!--alias ISOAMSA rlarr -->
+<!ENTITY rightarrowtail   "&#x021A3;" ><!--alias ISOAMSA rarrtl -->
+<!ENTITY RightCeiling     "&#x02309;" ><!--alias ISOAMSC rceil -->
+<!ENTITY RightDoubleBracket "&#x0301B;" ><!--right double bracket delimiter -->
+<!ENTITY RightDownVector  "&#x021C2;" ><!--alias ISOAMSA dharr -->
+<!ENTITY RightFloor       "&#x0230B;" ><!--alias ISOAMSC rfloor -->
+<!ENTITY rightharpoondown "&#x021C1;" ><!--alias ISOAMSA rhard -->
+<!ENTITY rightharpoonup   "&#x021C0;" ><!--alias ISOAMSA rharu -->
+<!ENTITY rightleftarrows  "&#x021C4;" ><!--alias ISOAMSA rlarr -->
+<!ENTITY rightleftharpoons "&#x021CC;" ><!--alias ISOAMSA rlhar -->
+<!ENTITY rightrightarrows "&#x021C9;" ><!--alias ISOAMSA rrarr -->
+<!ENTITY rightsquigarrow  "&#x0219D;" ><!--alias ISOAMSA rarrw -->
+<!ENTITY RightTee         "&#x022A2;" ><!--alias ISOAMSR vdash -->
+<!ENTITY RightTeeArrow    "&#x021A6;" ><!--alias ISOAMSA map -->
+<!ENTITY rightthreetimes  "&#x022CC;" ><!--alias ISOAMSB rthree -->
+<!ENTITY RightTriangle    "&#x022B3;" ><!--alias ISOAMSR vrtri -->
+<!ENTITY RightTriangleEqual "&#x022B5;" ><!--alias ISOAMSR rtrie -->
+<!ENTITY RightUpVector    "&#x021BE;" ><!--alias ISOAMSA uharr -->
+<!ENTITY RightVector      "&#x021C0;" ><!--alias ISOAMSA rharu -->
+<!ENTITY risingdotseq     "&#x02253;" ><!--alias ISOAMSR erDot -->
+<!ENTITY rmoustache       "&#x023B1;" ><!--alias ISOAMSC rmoust -->
+<!ENTITY Rrightarrow      "&#x021DB;" ><!--alias ISOAMSA rAarr -->
+<!ENTITY Rsh              "&#x021B1;" ><!--alias ISOAMSA rsh -->
+<!ENTITY searrow          "&#x02198;" ><!--alias ISOAMSA searr -->
+<!ENTITY setminus         "&#x02216;" ><!--alias ISOAMSB setmn -->
+<!ENTITY ShortDownArrow   "&#x02193;" ><!--short down arrow -->
+<!ENTITY ShortLeftArrow   "&#x02190;" ><!--alias ISOAMSA slarr -->
+<!ENTITY shortmid         "&#x02223;" ><!--alias ISOAMSR smid -->
+<!ENTITY shortparallel    "&#x02225;" ><!--alias ISOAMSR spar -->
+<!ENTITY ShortRightArrow  "&#x02192;" ><!--alias ISOAMSA srarr -->
+<!ENTITY ShortUpArrow     "&#x02191;" ><!--short up arrow  -->
+<!ENTITY simeq            "&#x02243;" ><!--alias ISOTECH sime -->
+<!ENTITY SmallCircle      "&#x02218;" ><!--alias ISOTECH compfn -->
+<!ENTITY smallsetminus    "&#x02216;" ><!--alias ISOAMSB ssetmn -->
+<!ENTITY spadesuit        "&#x02660;" ><!--ISOPUB    spades  -->
+<!ENTITY Sqrt             "&#x0221A;" ><!--alias ISOTECH radic -->
+<!ENTITY sqsubset         "&#x0228F;" ><!--alias ISOAMSR sqsub -->
+<!ENTITY sqsubseteq       "&#x02291;" ><!--alias ISOAMSR sqsube -->
+<!ENTITY sqsupset         "&#x02290;" ><!--alias ISOAMSR sqsup -->
+<!ENTITY sqsupseteq       "&#x02292;" ><!--alias ISOAMSR sqsupe -->
+<!ENTITY Square           "&#x025A1;" ><!--alias for square -->
+<!ENTITY SquareIntersection "&#x02293;" ><!--alias ISOAMSB sqcap -->
+<!ENTITY SquareSubset     "&#x0228F;" ><!--alias ISOAMSR sqsub -->
+<!ENTITY SquareSubsetEqual "&#x02291;" ><!--alias ISOAMSR sqsube -->
+<!ENTITY SquareSuperset   "&#x02290;" ><!--alias ISOAMSR sqsup -->
+<!ENTITY SquareSupersetEqual "&#x02292;" ><!--alias ISOAMSR sqsupe -->
+<!ENTITY SquareUnion      "&#x02294;" ><!--alias ISOAMSB sqcup -->
+<!ENTITY Star             "&#x022C6;" ><!--alias ISOAMSB sstarf -->
+<!ENTITY straightepsilon  "&#x003F5;" ><!--alias ISOGRK3 epsi -->
+<!ENTITY straightphi      "&#x003D5;" ><!--alias ISOGRK3 phi -->
+<!ENTITY Subset           "&#x022D0;" ><!--alias ISOAMSR Sub -->
+<!ENTITY subset           "&#x02282;" ><!--alias ISOTECH sub -->
+<!ENTITY subseteq         "&#x02286;" ><!--alias ISOTECH sube -->
+<!ENTITY subseteqq        "&#x02AC5;" ><!--alias ISOAMSR subE -->
+<!ENTITY SubsetEqual      "&#x02286;" ><!--alias ISOTECH sube -->
+<!ENTITY subsetneq        "&#x0228A;" ><!--alias ISOAMSN subne -->
+<!ENTITY subsetneqq       "&#x02ACB;" ><!--alias ISOAMSN subnE -->
+<!ENTITY succ             "&#x0227B;" ><!--alias ISOAMSR sc -->
+<!ENTITY succapprox       "&#x02AB8;" ><!--alias ISOAMSR scap -->
+<!ENTITY succcurlyeq      "&#x0227D;" ><!--alias ISOAMSR sccue -->
+<!ENTITY Succeeds         "&#x0227B;" ><!--alias ISOAMSR sc -->
+<!ENTITY SucceedsEqual    "&#x02AB0;" ><!--alias ISOAMSR sce -->
+<!ENTITY SucceedsSlantEqual "&#x0227D;" ><!--alias ISOAMSR sccue -->
+<!ENTITY SucceedsTilde    "&#x0227F;" ><!--alias ISOAMSR scsim -->
+<!ENTITY succeq           "&#x02AB0;" ><!--alias ISOAMSR sce -->
+<!ENTITY succnapprox      "&#x02ABA;" ><!--alias ISOAMSN scnap -->
+<!ENTITY succneqq         "&#x02AB6;" ><!--alias ISOAMSN scnE -->
+<!ENTITY succnsim         "&#x022E9;" ><!--alias ISOAMSN scnsim -->
+<!ENTITY succsim          "&#x0227F;" ><!--alias ISOAMSR scsim -->
+<!ENTITY SuchThat         "&#x0220B;" ><!--ISOTECH  ni -->
+<!ENTITY Sum              "&#x02211;" ><!--alias ISOAMSB sum -->
+<!ENTITY Superset         "&#x02283;" ><!--alias ISOTECH sup -->
+<!ENTITY SupersetEqual    "&#x02287;" ><!--alias ISOTECH supe -->
+<!ENTITY Supset           "&#x022D1;" ><!--alias ISOAMSR Sup -->
+<!ENTITY supset           "&#x02283;" ><!--alias ISOTECH sup -->
+<!ENTITY supseteq         "&#x02287;" ><!--alias ISOTECH supe -->
+<!ENTITY supseteqq        "&#x02AC6;" ><!--alias ISOAMSR supE -->
+<!ENTITY supsetneq        "&#x0228B;" ><!--alias ISOAMSN supne -->
+<!ENTITY supsetneqq       "&#x02ACC;" ><!--alias ISOAMSN supnE -->
+<!ENTITY swarrow          "&#x02199;" ><!--alias ISOAMSA swarr -->
+<!ENTITY Therefore        "&#x02234;" ><!--alias ISOTECH there4 -->
+<!ENTITY therefore        "&#x02234;" ><!--alias ISOTECH there4 -->
+<!ENTITY thickapprox      "&#x02248;" ><!--ISOAMSR   thkap  -->
+<!ENTITY thicksim         "&#x0223C;" ><!--ISOAMSR   thksim -->
+<!ENTITY ThinSpace        "&#x02009;" ><!--space of width 3/18 em alias ISOPUB thinsp -->
+<!ENTITY Tilde            "&#x0223C;" ><!--alias ISOTECH sim -->
+<!ENTITY TildeEqual       "&#x02243;" ><!--alias ISOTECH sime -->
+<!ENTITY TildeFullEqual   "&#x02245;" ><!--alias ISOTECH cong -->
+<!ENTITY TildeTilde       "&#x02248;" ><!--alias ISOTECH ap -->
+<!ENTITY toea             "&#x02928;" ><!--alias ISOAMSA nesear -->
+<!ENTITY tosa             "&#x02929;" ><!--alias ISOAMSA seswar -->
+<!ENTITY triangle         "&#x025B5;" ><!--alias ISOPUB utri -->
+<!ENTITY triangledown     "&#x025BF;" ><!--alias ISOPUB dtri -->
+<!ENTITY triangleleft     "&#x025C3;" ><!--alias ISOPUB ltri -->
+<!ENTITY trianglelefteq   "&#x022B4;" ><!--alias ISOAMSR ltrie -->
+<!ENTITY triangleq        "&#x0225C;" ><!--alias ISOAMSR trie -->
+<!ENTITY triangleright    "&#x025B9;" ><!--alias ISOPUB rtri -->
+<!ENTITY trianglerighteq  "&#x022B5;" ><!--alias ISOAMSR rtrie -->
+<!ENTITY TripleDot        "&#x020DB;" ><!--alias ISOTECH tdot -->
+<!ENTITY twoheadleftarrow "&#x0219E;" ><!--alias ISOAMSA Larr -->
+<!ENTITY twoheadrightarrow "&#x021A0;" ><!--alias ISOAMSA Rarr -->
+<!ENTITY ulcorner         "&#x0231C;" ><!--alias ISOAMSC ulcorn -->
+<!ENTITY Union            "&#x022C3;" ><!--alias ISOAMSB xcup -->
+<!ENTITY UnionPlus        "&#x0228E;" ><!--alias ISOAMSB uplus -->
+<!ENTITY UpArrow          "&#x02191;" ><!--alias ISONUM uarr -->
+<!ENTITY Uparrow          "&#x021D1;" ><!--alias ISOAMSA uArr -->
+<!ENTITY uparrow          "&#x02191;" ><!--alias ISONUM uarr -->
+<!ENTITY UpArrowDownArrow "&#x021C5;" ><!--alias ISOAMSA udarr -->
+<!ENTITY UpDownArrow      "&#x02195;" ><!--alias ISOAMSA varr -->
+<!ENTITY Updownarrow      "&#x021D5;" ><!--alias ISOAMSA vArr -->
+<!ENTITY updownarrow      "&#x02195;" ><!--alias ISOAMSA varr -->
+<!ENTITY UpEquilibrium    "&#x0296E;" ><!--alias ISOAMSA udhar -->
+<!ENTITY upharpoonleft    "&#x021BF;" ><!--alias ISOAMSA uharl -->
+<!ENTITY upharpoonright   "&#x021BE;" ><!--alias ISOAMSA uharr -->
+<!ENTITY UpperLeftArrow   "&#x02196;" ><!--alias ISOAMSA nwarr -->
+<!ENTITY UpperRightArrow  "&#x02197;" ><!--alias ISOAMSA nearr -->
+<!ENTITY upsilon          "&#x003C5;" ><!--alias ISOGRK3 upsi -->
+<!ENTITY UpTee            "&#x022A5;" ><!--alias ISOTECH perp -->
+<!ENTITY UpTeeArrow       "&#x021A5;" ><!--Alias mapstoup -->
+<!ENTITY upuparrows       "&#x021C8;" ><!--alias ISOAMSA uuarr -->
+<!ENTITY urcorner         "&#x0231D;" ><!--alias ISOAMSC urcorn -->
+<!ENTITY varepsilon       "&#x003B5;" ><!--alias ISOGRK3 epsiv -->
+<!ENTITY varkappa         "&#x003F0;" ><!--alias ISOGRK3 kappav -->
+<!ENTITY varnothing       "&#x02205;" ><!--alias ISOAMSO emptyv -->
+<!ENTITY varphi           "&#x003C6;" ><!--alias ISOGRK3 phiv -->
+<!ENTITY varpi            "&#x003D6;" ><!--alias ISOGRK3 piv -->
+<!ENTITY varpropto        "&#x0221D;" ><!--alias ISOAMSR vprop -->
+<!ENTITY varrho           "&#x003F1;" ><!--alias ISOGRK3 rhov -->
+<!ENTITY varsigma         "&#x003C2;" ><!--alias ISOGRK3 sigmav -->
+<!ENTITY varsubsetneq     "&#x0228A;&#x0FE00;" ><!--alias ISOAMSN vsubne -->
+<!ENTITY varsubsetneqq    "&#x02ACB;&#x0FE00;" ><!--alias ISOAMSN vsubnE -->
+<!ENTITY varsupsetneq     "&#x0228B;&#x0FE00;" ><!--alias ISOAMSN vsupne -->
+<!ENTITY varsupsetneqq    "&#x02ACC;&#x0FE00;" ><!--alias ISOAMSN vsupnE -->
+<!ENTITY vartheta         "&#x003D1;" ><!--alias ISOGRK3 thetav -->
+<!ENTITY vartriangleleft  "&#x022B2;" ><!--alias ISOAMSR vltri -->
+<!ENTITY vartriangleright "&#x022B3;" ><!--alias ISOAMSR vrtri -->
+<!ENTITY Vee              "&#x022C1;" ><!--alias ISOAMSB xvee -->
+<!ENTITY vee              "&#x02228;" ><!--alias ISOTECH or -->
+<!ENTITY Vert             "&#x02016;" ><!--alias ISOTECH Verbar -->
+<!ENTITY vert             "&#x0007C;" ><!--alias ISONUM verbar -->
+<!ENTITY VerticalBar      "&#x02223;" ><!--alias ISOAMSR mid -->
+<!ENTITY VerticalTilde    "&#x02240;" ><!--alias ISOAMSB wreath -->
+<!ENTITY VeryThinSpace    "&#x0200A;" ><!--space of width 1/18 em alias ISOPUB hairsp -->
+<!ENTITY Wedge            "&#x022C0;" ><!--alias ISOAMSB xwedge -->
+<!ENTITY wedge            "&#x02227;" ><!--alias ISOTECH and -->
+<!ENTITY wp               "&#x02118;" ><!--alias ISOAMSO weierp -->
+<!ENTITY wr               "&#x02240;" ><!--alias ISOAMSB wreath -->
+<!ENTITY zeetrf           "&#x02128;" ><!--zee transform -->
diff --git a/code/lib/Bio/Entrez/DTDs/mmlextra.ent b/code/lib/Bio/Entrez/DTDs/mmlextra.ent
new file mode 100644
index 0000000..850c7e7
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/mmlextra.ent
@@ -0,0 +1,122 @@
+
+<!--
+     File mmlextra.ent produced by the XSL script characters.xsl
+     from input data in unicode.xml.
+
+     Please report any errors to David Carlisle
+     via the public W3C list www-math@w3.org.
+
+     The numeric character values assigned to each entity
+     (should) match the Unicode assignments in Unicode 4.0.
+
+-->
+
+<!ENTITY % plane1D  "&#38;#38;#x1D">
+
+<!ENTITY af               "&#x02061;" ><!--character showing function application in presentation tagging -->
+<!ENTITY aopf             "%plane1D;552;" ><!-- -->
+<!ENTITY asympeq          "&#x0224D;" ><!--Old ISOAMSR asymp (for HTML compatibility) -->
+<!ENTITY bopf             "%plane1D;553;" ><!-- -->
+<!ENTITY copf             "%plane1D;554;" ><!-- -->
+<!ENTITY Cross            "&#x02A2F;" ><!--cross or vector product -->
+<!ENTITY DD               "&#x02145;" ><!--D for use in differentials, e.g., within integrals -->
+<!ENTITY dd               "&#x02146;" ><!--d for use in differentials, e.g., within integrals -->
+<!ENTITY dopf             "%plane1D;555;" ><!-- -->
+<!ENTITY DownArrowBar     "&#x02913;" ><!--down arrow to bar -->
+<!ENTITY DownBreve        "&#x00311;" ><!--breve, inverted (non-spacing) -->
+<!ENTITY DownLeftRightVector "&#x02950;" ><!--left-down-right-down harpoon -->
+<!ENTITY DownLeftTeeVector "&#x0295E;" ><!--left-down harpoon from bar -->
+<!ENTITY DownLeftVectorBar "&#x02956;" ><!--left-down harpoon to bar -->
+<!ENTITY DownRightTeeVector "&#x0295F;" ><!--right-down harpoon from bar -->
+<!ENTITY DownRightVectorBar "&#x02957;" ><!--right-down harpoon to bar -->
+<!ENTITY ee               "&#x02147;" ><!--e use for the exponential base of the natural logarithms -->
+<!ENTITY EmptySmallSquare "&#x025FB;" ><!--empty small square -->
+<!ENTITY EmptyVerySmallSquare "&#x025AB;" ><!--empty small square -->
+<!ENTITY eopf             "%plane1D;556;" ><!-- -->
+<!ENTITY Equal            "&#x02A75;" ><!--two consecutive equal signs -->
+<!ENTITY FilledSmallSquare "&#x025FC;" ><!--filled small square -->
+<!ENTITY FilledVerySmallSquare "&#x025AA;" ><!--filled very small square -->
+<!ENTITY fopf             "%plane1D;557;" ><!-- -->
+<!ENTITY gopf             "%plane1D;558;" ><!-- -->
+<!ENTITY GreaterGreater   "&#x02AA2;" ><!--alias for GT -->
+<!ENTITY Hat              "&#x0005E;" ><!--circumflex accent -->
+<!ENTITY hopf             "%plane1D;559;" ><!-- -->
+<!ENTITY HorizontalLine   "&#x02500;" ><!--short horizontal line  -->
+<!ENTITY ic               "&#x02063;" ><!--short form of  &InvisibleComma; -->
+<!ENTITY ii               "&#x02148;" ><!--i for use as a square root of -1 -->
+<!ENTITY iopf             "%plane1D;55A;" ><!-- -->
+<!ENTITY it               "&#x02062;" ><!--marks multiplication when it is understood without a mark -->
+<!ENTITY jopf             "%plane1D;55B;" ><!-- -->
+<!ENTITY kopf             "%plane1D;55C;" ><!-- -->
+<!ENTITY larrb            "&#x021E4;" ><!--leftwards arrow to bar -->
+<!ENTITY LeftDownTeeVector "&#x02961;" ><!--down-left harpoon from bar -->
+<!ENTITY LeftDownVectorBar "&#x02959;" ><!--down-left harpoon to bar -->
+<!ENTITY LeftRightVector  "&#x0294E;" ><!--left-up-right-up harpoon -->
+<!ENTITY LeftTeeVector    "&#x0295A;" ><!--left-up harpoon from bar -->
+<!ENTITY LeftTriangleBar  "&#x029CF;" ><!--left triangle, vertical bar -->
+<!ENTITY LeftUpDownVector "&#x02951;" ><!--up-left-down-left harpoon -->
+<!ENTITY LeftUpTeeVector  "&#x02960;" ><!--up-left harpoon from bar -->
+<!ENTITY LeftUpVectorBar  "&#x02958;" ><!--up-left harpoon to bar -->
+<!ENTITY LeftVectorBar    "&#x02952;" ><!--left-up harpoon to bar -->
+<!ENTITY LessLess         "&#x02AA1;" ><!--alias for Lt -->
+<!ENTITY lopf             "%plane1D;55D;" ><!-- -->
+<!ENTITY mapstodown       "&#x021A7;" ><!--downwards arrow from bar -->
+<!ENTITY mapstoleft       "&#x021A4;" ><!--leftwards arrow from bar -->
+<!ENTITY mapstoup         "&#x021A5;" ><!--upwards arrow from bar -->
+<!ENTITY MediumSpace      "&#x0205F;" ><!--space of width 4/18 em -->
+<!ENTITY mopf             "%plane1D;55E;" ><!-- -->
+<!ENTITY nbump            "&#x0224E;&#x00338;" ><!--not bumpy equals -->
+<!ENTITY nbumpe           "&#x0224F;&#x00338;" ><!--not bumpy single equals -->
+<!ENTITY nesim            "&#x02242;&#x00338;" ><!--not equal or similar -->
+<!ENTITY NewLine          "&#x0000A;" ><!--force a line break; line feed -->
+<!ENTITY NoBreak          "&#x02060;" ><!--never break line here -->
+<!ENTITY nopf             "%plane1D;55F;" ><!-- -->
+<!ENTITY NotCupCap        "&#x0226D;" ><!--alias for &nasymp; -->
+<!ENTITY NotHumpEqual     "&#x0224F;&#x00338;" ><!--alias for &nbumpe; -->
+<!ENTITY NotLeftTriangleBar "&#x029CF;&#x00338;" ><!--not left triangle, vertical bar -->
+<!ENTITY NotNestedGreaterGreater "&#x02AA2;&#x00338;" ><!--not double greater-than sign -->
+<!ENTITY NotNestedLessLess "&#x02AA1;&#x00338;" ><!--not double less-than sign -->
+<!ENTITY NotRightTriangleBar "&#x029D0;&#x00338;" ><!--not vertical bar, right triangle -->
+<!ENTITY NotSquareSubset  "&#x0228F;&#x00338;" ><!--square not subset -->
+<!ENTITY NotSquareSuperset "&#x02290;&#x00338;" ><!--negated set-like partial order operator -->
+<!ENTITY NotSucceedsTilde "&#x0227F;&#x00338;" ><!--not succeeds or similar -->
+<!ENTITY oopf             "%plane1D;560;" ><!-- -->
+<!ENTITY OverBar          "&#x000AF;" ><!--over bar -->
+<!ENTITY OverBrace        "&#x0FE37;" ><!--over brace  -->
+<!ENTITY OverBracket      "&#x023B4;" ><!--over bracket -->
+<!ENTITY OverParenthesis  "&#x0FE35;" ><!--over parenthesis -->
+<!ENTITY planckh          "&#x0210E;" ><!--the ring (skew field) of quaternions -->
+<!ENTITY popf             "%plane1D;561;" ><!-- -->
+<!ENTITY Product          "&#x0220F;" ><!--alias for &prod; -->
+<!ENTITY qopf             "%plane1D;562;" ><!-- -->
+<!ENTITY rarrb            "&#x021E5;" ><!--leftwards arrow to bar -->
+<!ENTITY RightDownTeeVector "&#x0295D;" ><!--down-right harpoon from bar -->
+<!ENTITY RightDownVectorBar "&#x02955;" ><!--down-right harpoon to bar -->
+<!ENTITY RightTeeVector   "&#x0295B;" ><!--right-up harpoon from bar -->
+<!ENTITY RightTriangleBar "&#x029D0;" ><!--vertical bar, right triangle -->
+<!ENTITY RightUpDownVector "&#x0294F;" ><!--up-right-down-right harpoon -->
+<!ENTITY RightUpTeeVector "&#x0295C;" ><!--up-right harpoon from bar -->
+<!ENTITY RightUpVectorBar "&#x02954;" ><!--up-right harpoon to bar -->
+<!ENTITY RightVectorBar   "&#x02953;" ><!--up-right harpoon to bar -->
+<!ENTITY ropf             "%plane1D;563;" ><!-- -->
+<!ENTITY RoundImplies     "&#x02970;" ><!--round implies -->
+<!ENTITY RuleDelayed      "&#x029F4;" ><!--rule-delayed (colon right arrow) -->
+<!ENTITY sopf             "%plane1D;564;" ><!-- -->
+<!ENTITY Tab              "&#x00009;" ><!--tabulator stop; horizontal tabulation -->
+<!ENTITY ThickSpace       "&#x02009;&#x0200A;&#x0200A;" ><!--space of width 5/18 em -->
+<!ENTITY topf             "%plane1D;565;" ><!-- -->
+<!ENTITY UnderBar         "&#x00332;" ><!--combining low line -->
+<!ENTITY UnderBrace       "&#x0FE38;" ><!--under brace  -->
+<!ENTITY UnderBracket     "&#x023B5;" ><!--under bracket -->
+<!ENTITY UnderParenthesis "&#x0FE36;" ><!--under parenthesis -->
+<!ENTITY uopf             "%plane1D;566;" ><!-- -->
+<!ENTITY UpArrowBar       "&#x02912;" ><!--up arrow to bar -->
+<!ENTITY Upsilon          "&#x003A5;" ><!--ISOGRK1 Ugr, HTML4 Upsilon -->
+<!ENTITY VerticalLine     "&#x0007C;" ><!--alias ISONUM verbar -->
+<!ENTITY VerticalSeparator "&#x02758;" ><!--vertical separating operator -->
+<!ENTITY vopf             "%plane1D;567;" ><!-- -->
+<!ENTITY wopf             "%plane1D;568;" ><!-- -->
+<!ENTITY xopf             "%plane1D;569;" ><!-- -->
+<!ENTITY yopf             "%plane1D;56A;" ><!-- -->
+<!ENTITY ZeroWidthSpace   "&#x0200B;" ><!--zero width space -->
+<!ENTITY zopf             "%plane1D;56B;" ><!-- -->
diff --git a/code/lib/Bio/Entrez/DTDs/modules.ent b/code/lib/Bio/Entrez/DTDs/modules.ent
new file mode 100644
index 0000000..5d8b7a6
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/modules.ent
@@ -0,0 +1,417 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Journal Article Module of Modules                 -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Module of Modules v2.0 20040830//EN"
+     Delivered as file "modules.ent"                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    For naming all the external modules (except       -->
+<!--             this module itself and the customization modules) -->
+<!--             that are part of the Archiving and Interchange DTD-->
+<!--             Suite modular DTD library. A specific DTD will    -->
+<!--             select from these modules by referencing the      -->
+<!--             external Parameter Entities defined below that    -->
+<!--             name the modules of the suite. To include a set   -->
+<!--             of elements (such as all the lists or the MathML  -->
+<!--             elements), reference the external Parameter Entity-->
+<!--             of the module that contains these declarations,   -->
+<!--             then modify the classes or content models to use  -->
+<!--             the new elements.                                 -->
+<!--                                                               -->
+<!-- CONTAINS:   1)  Entity declarations and public names for all  -->
+<!--                 the external modules. Note: The modules       -->
+<!--                 are NOT referenced (called/invoked) in this   -->
+<!--                 module, they are merely defined.  The DTD or  -->
+<!--                 a setup module (such as for the XHTML tables) -->
+<!--                 will invoke the external parameter entity to  -->
+<!--                 call the module.                              -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+=============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+=============================================================
+
+  4. Formal Public Identifer changed from
+        "...XML Special Characters Module v1.0 20021201//EN"
+     to the following
+        "...XML Special Characters Module v2.0 20040830//EN"
+
+     Formal Public Identifer changed from
+        "...Custom Special Characters Module v1.1 20031101//EN"
+     to the following
+        "...Custom Special Characters Module v2.0 20040830//EN"
+
+=============================================================
+
+  3. MODULE IDENTIFICATION
+     a. Updated public identifiers for ALL modules to match the
+        new 2.0 release. The most recent modules will be called.
+     b. As part of the remodularization, added new modules:
+          - default-classes.ent (to be over-ridden by DTD-specific
+                                 class definitions)
+          - default-mixes.ent   (to be over-ride en by DTD-specific
+                                 mix definitions)
+
+  2. Updated public identifier to "v2.0 20040830"         
+
+     =============================================================
+     Version 1.1                          (TRG) v1.1 (2003-11-01)
+
+  1. Updated public identifier for this modules and several modules 
+     to reflect modules' modification. 
+     Rationale: To call updated modules.                           -->
+
+
+<!-- ============================================================= -->
+<!--                    DEFAULT CLASSES AND MIXES                  -->
+<!-- ============================================================= -->
+
+
+<!--                    DEFAULT ELEMENT CLASSES MODULE             -->
+<!--                    Set up the Parameter Entities and element
+                        class definitions that will be used to
+                        establish the content models for the 
+                        Archiving and Interchange DTD.             -->
+<!ENTITY % default-classes.ent
+                        PUBLIC  
+"-//NLM//DTD Default Element Classes Module v2.0 20040830//EN"
+"default-classes.ent"                                         >
+
+
+<!--                    DEFAULT ELEMENT MIXES MODULE               -->
+<!--                    Set up the Parameter Entities and element
+                        mix definitions that will be used in
+                        content models for the Archiving and 
+                        Interchange DTD.                           -->
+<!ENTITY % default-mixes.ent
+                        PUBLIC  
+"-//NLM//DTD Default Element Mixes Module v2.0 20040830//EN"
+"default-mixes.ent"                                                  >
+                                                                    
+
+<!-- ============================================================= -->
+<!--                    COMMON ELEMENTS SHARED BY MANY MODULES     -->
+<!-- ============================================================= -->
+
+
+<!--                    COMMON (SHARED) ELEMENT DECLARATIONS       -->
+<!ENTITY % common.ent
+                        PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite Common (Shared) Elements Module v2.0 20040830//EN"
+"common.ent"                                                         >
+
+
+<!-- ============================================================= -->
+<!--                    CLASS MODULES (ALPHABETICAL ORDER)         -->
+<!-- ============================================================= -->
+
+
+<!--                    ARTICLE METADATA ELEMENTS                  -->
+<!ENTITY % articlemeta.ent
+                        PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite Journal Article Metadata Elements v2.0 20040830//EN"
+"articlemeta.ent"                                                    >
+
+
+<!--                    BACK MATTER ELEMENTS                       -->
+<!ENTITY % backmatter.ent
+                        PUBLIC  
+"-//NLM//DTD Archiving and Interchange DTD Suite Back Matter Elements v2.0 20040830//EN"
+"backmatter.ent"                                                     >
+
+
+<!--                    DISPLAY (GRAPHICAL) ELEMENTS INVOCATION    -->
+<!ENTITY % display.ent
+                        PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite Display Class Elements  v2.0 20040830//EN"
+"display.ent"                                                        >
+
+
+<!--                    FORMATTING ELEMENT CLASSES                 -->
+<!--                    Elements that change rendition/display. This
+                        module includes the Appearance Class, the
+                        Break Class, and the Emphasis Class        -->
+<!ENTITY % format.ent
+                        PUBLIC
+ "-//NLM//DTD Archiving and Interchange DTD Suite Formatting Element Classes v2.0 20040830//EN"
+"format.ent"                                                         >
+
+
+<!--                    JOURNAL METADATA ELEMENTS                  -->
+<!ENTITY % journalmeta.ent PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite Journal Metadata Elements v2.0 20040830//EN"
+"journalmeta.ent"                                                          >
+
+
+<!--                    LINK ELEMENTS                              -->
+<!ENTITY % link.ent  PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite Link Class Elements v2.0 20040830//EN"
+"link.ent"                                                           >
+
+
+<!--                    LIST ELEMENTS                              -->
+<!ENTITY % list.ent  PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite List Class Elements v2.0 20040830//EN"
+"list.ent"                                                           >
+
+
+<!--                    MATH ELEMENTS                              -->
+<!ENTITY % math.ent  PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite Math Class Elements v2.0 20040830//EN"
+"math.ent"                                                           >
+
+
+<!--                    PARAGRAPH-LEVEL ELEMENTS                   -->
+<!ENTITY % para.ent  PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite Paragraph-Like Elements v2.0 20040830//EN"
+"para.ent"                                                           >
+
+
+<!--                    PHRASE-LEVEL CONTENT ELEMENTS              -->
+<!ENTITY % phrase.ent
+                        PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite Subject Phrase Class Elements v2.0 20040830//EN"
+"phrase.ent"                                                         >
+
+
+<!--                    BIBLIOGRAPHY REFERENCES (CITATION) ELEMENTS-->
+<!ENTITY % references.ent     
+                        PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite Bibliographic Reference (Citation) Class Elements v2.0 20040830//EN"
+"references.ent"                                                     >
+
+
+<!--                    SECTION ELEMENTS                           -->
+<!ENTITY % section.ent     
+                        PUBLIC  
+"-//NLM//DTD Archiving and Interchange DTD Suite Section Class Elements v2.0 20040830//EN"
+"section.ent"                                                        >
+
+
+<!-- ============================================================= -->
+<!--                    TABLES: XHTML TABLE MODULES                -->
+<!-- ============================================================= -->
+
+
+<!--                    XHTML TABLE SETUP MODULE                   -->
+<!--                    Set all Parameter Entities needed by the
+                        HTML 4.0 (XHTML) table model, and then
+                        call the module containing that model.                           
+                        Authoring Note: If wanted, this module
+                        will be invoked in the DTD module          -->
+<!ENTITY % XHTMLtablesetup.ent 
+                        PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite XHTML Table Setup Module v2.0 20040830//EN"
+"XHTMLtablesetup.ent"                                                >
+
+
+<!--                    XHTML TABLE MODEL                          -->
+<!--                    The public XML version of the HTML 4.0
+                        (XHTML) table model. This module is invoked
+                        in %XHTMLtablesetup.ent;                   -->
+<!ENTITY % htmltable.dtd 
+                        PUBLIC
+"-//W3C//ELEMENTS XHTML Tables 1.0//EN" 
+"htmltable.dtd"                                                      >
+
+
+<!-- ============================================================= -->
+<!--                    TABLES: OASIS EXCHANGE TABLE MODULES       -->
+<!-- ============================================================= -->
+
+
+<!--                    OASIS XML TABLE SETUP MODULE               -->
+<!--                    Set all Parameter Entities needed by the
+                        OASIS (CALS) Table Exchange table model, and 
+                        then call the module containing that model.                           
+                        Authoring Note: If wanted, this module
+                        will be invoked in the DTD module          -->
+<!ENTITY % oasis-tablesetup.ent 
+                        PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite OASIS XML Table Setup Module v1.2 20040830//EN"
+"oasis-tablesetup.ent"                                               >
+
+
+<!--                    OASIS XML TABLE MODEL                      -->
+<!--                    The OASIS (CALS) Table Exchange table model
+                        This module is invoked in 
+                        %OASIStablesetup.ent;                      -->
+<!ENTITY % oasis-exchange.ent 
+                        PUBLIC
+"-//OASIS//DTD XML Exchange Table Model 19990315//EN" 
+"oasis-exchange.ent"                                                 >
+
+
+<!-- ============================================================= -->
+<!--                    MATH: MATHML MODULES                       -->
+<!-- ============================================================= -->
+
+
+<!--                    MATHML SETUP MODULE                        -->
+<!--                    Called from the DTD to include the MathML
+                        elements in the tag set.                   -->
+<!ENTITY % mathmlsetup.ent 
+                        PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite MathML Setup Module v2.0 20040830//EN"
+"mathmlsetup.ent"                                                    >
+
+
+<!--                    MATHML 2.0 QUALIFIED NAMES                 -->
+<!ENTITY % mathml-qname.mod 
+                        PUBLIC
+"-//W3C//ENTITIES MathML 2.0 Qualified Names 1.0//EN" 
+"mathml/mathml2-qname-1.mod"                                         >
+
+
+<!--                    MATHML 2.0 DTD                             -->
+<!ENTITY % mathml.dtd   PUBLIC
+"-//W3C//DTD MathML 2.0//EN"
+"mathml2.dtd"                                                        >
+
+
+<!--                    MATHML 2.0 EXTRA ENTITIES                  -->
+<!ENTITY % ent-mmlextra 
+                        PUBLIC
+"-//W3C//ENTITIES Extra for MathML 2.0//EN" 
+"mathml/mmlextra.ent"                                                >
+
+
+<!--                    MATHML 2.0 ALIASES                         -->
+<!ENTITY % ent-mmlalias 
+                        PUBLIC
+"-//W3C//ENTITIES Aliases for MathML 2.0//EN" 
+"mathml/mmlalias.ent"                                                >
+
+
+<!-- ============================================================= -->
+<!--                    SPECIAL CHARACTER MODULES                  -->
+<!-- ============================================================= -->
+
+
+<!--                    SPECIAL CHARACTERS DECLARATIONS            -->
+<!--                    Declares any standard XML special character 
+                        entities used in this DTD                  -->
+<!ENTITY % xmlspecchars.ent   
+                        PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite XML Special Characters Module v2.0 20040830//EN"
+"xmlspecchars.ent"                                                   >
+
+
+<!--                    CUSTOM SPECIAL CHARACTERS DECLARATIONS     -->
+<!--                    Declares any custom special character 
+                        entities created for this Suite            -->
+ <!ENTITY % chars.ent PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite Custom Special Characters Module v2.0 20040830//EN"
+"chars.ent"                                                          > 
+
+
+<!-- ============================================================= -->
+<!--                     NOTATIONS MODULE                          -->
+<!-- ============================================================= -->
+
+
+<!--                    NOTATION DECLARATIONS MODULE               -->
+<!--                    Container module for the Notation Declarations
+                        to be used with this DTD suite.  Placed in
+                        their own module for easy expansion or
+                        replacement.                               -->
+<!ENTITY % notat.ent PUBLIC 
+"-//NLM//DTD Archiving and Interchange DTD Suite Notation Declarations v2.0 20040830//EN"
+"notat.ent"                                                          >
+
+
+<!-- =================== End Journal Article Module of Modules === -->
diff --git a/code/lib/Bio/Entrez/DTDs/nlm-articleset-2.0.dtd b/code/lib/Bio/Entrez/DTDs/nlm-articleset-2.0.dtd
new file mode 100644
index 0000000..f82c149
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlm-articleset-2.0.dtd
@@ -0,0 +1,271 @@
+
+
+<!--
+This DTD was written to define a set of articles from NLM 
+Archive and Interchange DTD (http://dtd.nlm.nih.gov). 
+All of these articles should be 
+valid against the archivearticle.dtd 
+(http://dtd.nlm.nih.gov/2.0/archivearticle.dtd).
+
+
+
+-->
+
+<!--    Public document type definition. Typical invocation:
+<!DOCTYPE nlm-articleset PUBLIC "-//NLM//DTD ARTICLE SET 2.0//EN" []>
+-->
+<!--
+
+<!ENTITY % id-att
+	"id	   CDATA      #IMPLIED">
+
+<!ENTITY % id-att-required
+	"id	   CDATA      #REQUIRED">
+
+<!ENTITY % figr-atts
+	"fid 		CDATA 		#REQUIRED
+	 to  		CDATA 		#IMPLIED">
+
+<!ENTITY % tblr-atts
+	"tid 		CDATA 		#REQUIRED
+	 to  		CDATA 		#IMPLIED">
+
+<!ENTITY % abbr-atts
+	"bid 		CDATA 		#REQUIRED
+	 to  		CDATA 		#IMPLIED">
+
+<!ENTITY % rid-atts
+	"rid     CDATA       #REQUIRED">
+
+<!ENTITY % fnr-atts
+	"rid     CDATA       #REQUIRED
+	 to      CDATA       #IMPLIED">
+	 
+<!ENTITY % insr-atts
+	"iid     CDATA       #IMPLIED">
+
+<!ENTITY %	supplr-atts
+	"sid     CDATA       #REQUIRED
+	 to      CDATA       #IMPLIED">
+
+<!ENTITY %	secr-atts
+	"sid     CDATA       #REQUIRED"> -->
+
+<!-- Because articles were created to stand alone, ID/CDATA pairs may 
+     crash when articles are combined in one file. All attributes of
+	  ID and CDATA have been changed to CDATA in <pmc-articleset>
+	  -->
+
+<!ATTLIST	 abbrev
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 ack
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 address
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 aff
+			id	CDATA #IMPLIED
+			rid	CDATA	#IMPLIED>
+
+<!ATTLIST	 alt-text
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 app
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 array
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 author-notes
+			id	CDATA #IMPLIED
+			rid	CDATA	#IMPLIED>
+
+<!ATTLIST	 bio
+			id	CDATA #IMPLIED
+			rid	CDATA	#IMPLIED>
+
+<!ATTLIST	 boxed-text
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 caption
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 chem-struct
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 chem-struct-wrapper
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 citation
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 col
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 colgroup
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 collab
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 contract-num
+			id	CDATA #IMPLIED
+			rid	CDATA	#IMPLIED>
+
+<!ATTLIST	 contract-sponsor
+			id	CDATA #IMPLIED
+			rid	CDATA	#IMPLIED>
+
+<!ATTLIST	 contrib
+			id	CDATA #IMPLIED
+			rid	CDATA	#IMPLIED>
+
+<!ATTLIST	 corresp
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 def
+			id	CDATA #IMPLIED
+			rid	CDATA	#IMPLIED>
+
+
+<!ATTLIST	 def-item
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 disp-formula
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 disp-quote
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 ext-link
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 fig
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 fig-group
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 fn
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 glossary
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 glyph-data
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 graphic
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 inline-formula
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 inline-graphic
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 inline-supplementary-material
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 institution
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 kwd
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 kwd-group
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 long-desc
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 media
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 mml:math
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 named-content
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 note
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 notes
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 p
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 preformat
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 ref
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 related-article
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 sec
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 speech
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 statement
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 supplementary-material
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 table
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 table-wrap
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 table-wrap-group
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 target
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 tbody
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 td
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 term
+			id	CDATA #IMPLIED
+			rid	CDATA	#IMPLIED>		
+
+<!ATTLIST	 tfoot
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 th
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 thead
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 tr
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 verse-group
+			id	CDATA #IMPLIED>
+
+<!ATTLIST	 xref
+			id	CDATA #IMPLIED
+			rid	CDATA	#IMPLIED>
+
+
+<!ENTITY % archive-article  PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.0 20040830//EN" "../2.0/archivearticle.dtd">
+
+
+<!ELEMENT pmc-articleset (article+) >
+
+%archive-article;
+
diff --git a/code/lib/Bio/Entrez/DTDs/nlmcatalogrecordset_170601.dtd b/code/lib/Bio/Entrez/DTDs/nlmcatalogrecordset_170601.dtd
new file mode 100644
index 0000000..85f6cbe
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmcatalogrecordset_170601.dtd
@@ -0,0 +1,280 @@
+<!--   NLMCatalogRecordSet DTD
+
+       Comments and suggestions are welcome.
+       June 1, 2017 
+
+        **THIS IS THE CURRENT DTD FOR 2017 CURRENTLY IN USE.**
+                            
+       This is the DTD which NLM has written for Internal and External Use. 
+       If you are a data Licensee, use the information
+       from the NLMCatalogRecordSet. 
+       
+    
+* = 0 or more occurrences (optional element, repeatable)
+? = 0 or 1 occurrences (optional element, at most 1)
++ = 1 or more occurrences (required element, repeatable)
+| = choice, one or the other but not both 
+no symbol = required element
+
+-->
+<!-- ================================================================= -->
+<!--   Revision Notes Section 
+
+  The following changes were made in the nlmcatalogrecordset_170101.dtd:
+  
+       a. Changed nlmcatalogrecordset_170101.dtd to nlmcatalogrecordset_170601.dtd.
+       b. Added URI attribute to MeshHeading to accommodate MeSH RDF URIs
+       
+See http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmcatalogrecord.html  
+for historic Revision Notes for previous versions of NLMCatalogRecordSet DTD.
+-->
+<!-- ================================================================= -->
+<!ELEMENT NLMCatalogRecordSet (NLMCatalogRecord+, DeleteCatalogRecord?)>
+<!ELEMENT NLMCatalogRecord (NlmUniqueID, DateCreated, DateRevised, DateAuthorized?, 
+          DateCompleted?, DateRevisedMajor?, CollaborativePartnerDate*, TitleMain, 
+          MedlineTA?,TitleAlternate*, TitleRelated*, AuthorList?, InvestigatorList?, ResourceInfo,
+          ResourceCharacteristics*, PublicationTypeList?, GenreList?, PublicationInfo,
+          Language*, PhysicalDescription?, Abstract?, 
+          OtherAbstract*, ContentsNote*, IndexingSourceList?, 
+          GeneralNote*, LocalNote*, PersonalNameSubjectList?, MeshHeadingList?, 
+          OtherSubjectList*, SpaceFlightMission*, KeywordList*, 
+          BroadJournalHeadingList?, Classification*, 
+          GovDocClassNumber*, ELocationList?, Host*, LCCN*, ISBN*, ISSN*, 
+          ISSNLinking*, NCBIIssnAlias*, STRN*, Coden*, OtherID*, 
+          AcquisitionInfoList?, ReportNumber*)>
+<!ATTLIST NLMCatalogRecord
+	Owner (CIT | HMD | HSR | IHM | KIE | NASA | NCBI | NLM | 
+               NOTNLM | NYA | PIP | Undetermined) "Undetermined"
+	Status (Brief | Cataloging-In-Publication | Completed | In-Process |
+                Not-Our-Cataloging | On-Order | Undetermined | Withdrawn) #REQUIRED>
+<!ELEMENT Abstract (AbstractText)>
+<!ELEMENT AbstractText (#PCDATA)>
+<!ELEMENT AcquisitionInfo (StockNumber?, AcquisitionSource?)>
+<!ELEMENT AcquisitionInfoList (AcquisitionInfo+)>
+<!ELEMENT AcquisitionSource (#PCDATA)>
+<!ELEMENT Affiliation (#PCDATA)>
+<!ELEMENT Author (((LastName, ForeName?, Initials?, Suffix?) | CollectiveName),
+                    Affiliation?, DatesAssociatedWithName?, Identifier*,
+                    NameQualifier?, OtherInformation?, TitleAssociatedWithName?,Role*)>
+<!ELEMENT AuthorList (Author+)>
+<!ATTLIST AuthorList
+	CompleteYN (Y | N) "Y">
+<!ELEMENT BookFormat (#PCDATA)>
+<!ELEMENT BroadJournalHeading (#PCDATA)>
+<!ELEMENT BroadJournalHeadingList (BroadJournalHeading+)>
+<!ELEMENT CarrierType (#PCDATA)>
+<!ELEMENT ChapterPgn (#PCDATA)>
+<!ELEMENT Classification (#PCDATA)>
+<!ATTLIST Classification
+	NLMCallNumberYN (Y | N) "Y"
+	Authority (KIE | MARC | NLM | Other | Undetermined) "Undetermined"
+	CallNumberType (NLMClass | Other | ShelvingNumber) "Other">
+<!ELEMENT Coden (#PCDATA)>
+<!ELEMENT CollaborativePartnerDate (#PCDATA)>
+<!ATTLIST CollaborativePartnerDate
+	Owner (CIT | HMD | HSR | IHM | KIE | NASA | NCBI | NLM | NOTNLM | 
+               NYA | PIP | Undetermined) "Undetermined"
+	CollaborativeStatus (Create | In-House-Review | Other | 
+                             Released | Review) "Other">
+<!ELEMENT CollectiveName (#PCDATA)>
+<!ELEMENT ContentsNote (#PCDATA)>
+<!ELEMENT ContentType (#PCDATA)>
+<!ELEMENT CopyrightDate (#PCDATA)>
+<!ELEMENT Country (#PCDATA)>
+<!ELEMENT Coverage (#PCDATA)>
+<!ELEMENT DateAuthorized (Year, Month, Day)>
+<!ELEMENT DateCompleted (Year, Month, Day)>
+<!ELEMENT DateCreated (Year, Month, Day)>
+<!ELEMENT DateIssued (#PCDATA)>
+<!ELEMENT DateRevised (Year, Month, Day)>
+<!ELEMENT DateRevisedMajor (Year, Month, Day)>
+<!ELEMENT DatesAssociatedWithName (#PCDATA)>
+<!ELEMENT DatesOfSerialPublication (#PCDATA)>
+<!ELEMENT Day (#PCDATA)>
+<!ELEMENT DescriptiveInformation (#PCDATA)>
+<!ELEMENT DescriptorName (#PCDATA)>
+<!ATTLIST DescriptorName
+	MajorTopicYN (Y | N) "N"
+            Type (Geographic) #IMPLIED
+            UI  CDATA #IMPLIED>
+<!ELEMENT Edition (#PCDATA)>
+<!ELEMENT ELocation ((ELocationID, DescriptiveInformation?) | 
+                      DescriptiveInformation)>
+<!ELEMENT ELocationID (#PCDATA)>
+<!ATTLIST ELocationID
+	EIdType (url) #REQUIRED
+	ValidYN (Y | N) "Y">
+<!ELEMENT ELocationList (ELocation+)>
+<!ELEMENT Entity (#PCDATA)>
+<!ELEMENT Extent (#PCDATA)>
+<!ELEMENT ForeName (#PCDATA)>
+<!ELEMENT Form (#PCDATA)>
+<!ELEMENT Frequency (#PCDATA)>
+<!ATTLIST Frequency
+	FrequencyType (Current | Former) "Current">
+<!ELEMENT GeneralNote (#PCDATA)>
+<!ATTLIST GeneralNote
+	Owner (CIT | HMD | HSR | IHM | KIE | NASA | NCBI | NLM | NOTNLM | 
+               NYA | PIP | Undetermined) "Undetermined"
+	NoteType (Binding | Biography | BoundWith |CiteAs | Dissertation |
+                  FindingAid | LinkComplexNote | Provenance |
+                  ProvHistory | Restriction | CopyrightStatus | 
+                  CopyrightHolder) #IMPLIED>
+<!ELEMENT Generation (#PCDATA)>
+<!ELEMENT Genre (#PCDATA)>
+<!ELEMENT GenreList (Genre+)>
+<!ELEMENT GovDocClassNumber (#PCDATA)>
+<!ATTLIST GovDocClassNumber
+	ValidYN (Y | N) "Y">
+<!ELEMENT Host (#PCDATA)>
+<!ELEMENT Identifier (#PCDATA)>
+<!ATTLIST Identifier 
+          Source CDATA #IMPLIED>
+<!ELEMENT Imprint (Place*, Entity*, DateIssued*, ImprintFull)>
+<!ATTLIST Imprint
+	ImprintType (Current | Original) #REQUIRED
+        FunctionType (UnpublishedProduction | Publication | Distribution | 
+                      Manufacture) #REQUIRED>
+<!ELEMENT ImprintFull (#PCDATA)>
+<!ELEMENT IndexingSource (IndexingSourceName, Coverage?)>
+<!ELEMENT IndexingSourceList (IndexingSource+)>
+<!ELEMENT IndexingSourceName (#PCDATA)>
+<!ATTLIST IndexingSourceName
+	IndexingTreatment (Unknown | Full | Selective | ReferencedIn |
+                     ReferencedInNoDetails) #IMPLIED
+	IndexingStatus (Ceased-publication | Continued-by-another-title | 
+                     Currently-indexed | Deselected) #IMPLIED>
+<!ELEMENT Initials (#PCDATA)>
+<!ELEMENT Investigator (LastName, ForeName?, Initials?, Suffix?, Affiliation?)>
+<!ATTLIST Investigator
+	ValidYN (Y | N) "Y">
+<!ELEMENT InvestigatorList (Investigator+)>
+<!ELEMENT ISBN (#PCDATA)>
+<!ATTLIST ISBN
+	ValidYN (Y | N) "Y">
+<!ELEMENT ISSN (#PCDATA)>
+<!ATTLIST ISSN
+	IssnType (Electronic | Print | Undetermined) #REQUIRED
+	ValidYN (Y | N) "Y">
+<!ELEMENT ISSNLinking (#PCDATA)>
+<!ATTLIST ISSNLinking
+	ValidYN (Y | N) "Y">
+<!ELEMENT Issuance (#PCDATA)>
+<!ELEMENT Keyword (#PCDATA)>
+<!ATTLIST Keyword
+	MajorTopicYN (Y | N) "N">
+<!ELEMENT KeywordList (Keyword+)>
+<!ATTLIST KeywordList
+	Owner (CIT | HMD | HSR | IHM | KIE | NASA | NCBI | NLM |
+               NOTNLM | NYA | PIP | Undetermined) "NLM">
+<!ELEMENT Language (#PCDATA)>
+<!ATTLIST Language
+	LangType (Primary | Original | Summary | TableOfContents | Captions) #REQUIRED>
+<!ELEMENT LastName (#PCDATA)>
+<!ELEMENT LCCN (#PCDATA)>
+<!ATTLIST LCCN
+	ValidYN (Y | N) "Y">
+<!ELEMENT LocalNote (#PCDATA)>
+<!ATTLIST LocalNote
+	LocalNoteType (DashedOn | Other | PreservationAction) "Other">
+<!ELEMENT MaterialSpecified (#PCDATA)>
+<!ELEMENT MediaType (#PCDATA)>
+<!-- NOTE: The use of "Medline" in the MedlineTA element name does not mean the data 
+     represents a Medline-selected journal.  -->
+<!ELEMENT MedlineTA (#PCDATA)>
+<!ELEMENT MeshHeading (DescriptorName, QualifierName*)>
+<!ATTLIST MeshHeading 
+            URI CDATA #IMPLIED>
+<!ELEMENT MeshHeadingList (MeshHeading+)>
+<!ELEMENT Month (#PCDATA)>
+<!ELEMENT NameQualifier (#PCDATA)>
+<!ELEMENT NCBIIssnAlias (#PCDATA)>
+<!ELEMENT NlmUniqueID (#PCDATA)>
+<!ELEMENT OtherAbstract (AbstractText)>
+<!ATTLIST OtherAbstract
+	Type (AAMC | HMD | HSR | KIE | NASA | NCBI | NLM | NTIS |
+              NYA | PIP | Producer | Undetermined) #REQUIRED>
+<!ELEMENT OtherID (#PCDATA)>
+<!ATTLIST OtherID
+	Prefix CDATA #IMPLIED
+	Source (CIT | HMD | HSR | IDM | IHM | KIE | NASA | NLM | 
+                NYA | OCLC | PIP | Undetermined) #REQUIRED>
+<!ELEMENT OtherInformation (#PCDATA)>
+<!ELEMENT OtherSubject (OtherSubjectName, OtherInformation?, TitleAssociatedWithName?)>
+<!ELEMENT OtherSubjectList (OtherSubject+)>
+<!ATTLIST OtherSubjectList
+	Owner (CIT | HMD | HSR | IHM | KIE | NASA | NCBI | NLM | 
+               NOTNLM | NYA | PIP | Undetermined) "Undetermined">
+<!ELEMENT OtherSubjectName (#PCDATA)>
+<!ATTLIST OtherSubjectName
+	MajorTopicYN (Y | N) "N"
+	SubjectType (CorpName | Title | Other) #REQUIRED>
+<!ELEMENT PersonalNameSubject ((LastName, ForeName?, Initials?, Suffix?), 
+          DatesAssociatedWithName?, NameQualifier?, OtherInformation?, 
+          TitleAssociatedWithName?)>
+<!ELEMENT PersonalNameSubjectList (PersonalNameSubject+)>
+<!ELEMENT PhysicalDescription (Form*, Extent*, Runtime?, Generation*, 
+                               BookFormat*, Polarity*, ChapterPgn?)>
+<!ELEMENT Place (#PCDATA)>
+<!ELEMENT PlaceCode (#PCDATA)>
+<!ATTLIST PlaceCode
+	Authority (KIE | MARC | NLM | Other | Undetermined) "Undetermined">
+<!ELEMENT Polarity (#PCDATA)>
+<!ELEMENT ProjectedPublicationDate (#PCDATA)>
+<!ELEMENT PublicationEndYear (#PCDATA)>
+<!ELEMENT PublicationFirstYear (#PCDATA)>
+<!ELEMENT PublicationInfo (Country?, PlaceCode?, Imprint*,  
+                           ProjectedPublicationDate?, CopyrightDate?,
+                           PublicationFirstYear?, PublicationEndYear?, Edition?,
+                           DatesOfSerialPublication*, Frequency*)>
+<!ELEMENT PublicationType (#PCDATA)>
+<!ATTLIST PublicationType
+	UI  CDATA #IMPLIED>
+<!ELEMENT PublicationTypeList (PublicationType+)>
+<!ELEMENT QualifierName (#PCDATA)>
+<!ATTLIST QualifierName
+	MajorTopicYN (Y | N) "N"
+	UI  CDATA #IMPLIED>
+<!ELEMENT RecordID (#PCDATA)>
+<!ATTLIST RecordID
+	Source (NLM | LC | OCLC) #REQUIRED>
+<!ELEMENT ReportNumber (#PCDATA)>
+<!ATTLIST ReportNumber
+	ValidYN (Y | N) "Y">
+<!ELEMENT Resource (ContentType*, MediaType*, CarrierType*)>
+<!ELEMENT ResourceCharacteristics (#PCDATA)>
+<!ATTLIST ResourceCharacteristics
+          Format (Sound | Video | ProjectedMovingImage | DigitalFile) #REQUIRED>
+<!ELEMENT ResourceInfo (TypeOfResource, Issuance, ResourceUnit*, Resource*)>
+<!ELEMENT ResourceUnit (#PCDATA)>
+<!ELEMENT Role (#PCDATA)>
+<!ATTLIST Role
+          CodedYN (Y | N) #REQUIRED>
+<!ELEMENT Runtime (#PCDATA)>
+<!ELEMENT SpaceFlightMission (#PCDATA)>
+<!ELEMENT StockNumber (#PCDATA)>
+<!ELEMENT STRN (#PCDATA)>
+<!ELEMENT Suffix (#PCDATA)>
+<!ELEMENT Title (#PCDATA)>
+<!ATTLIST Title
+	Sort (0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | N) #REQUIRED>
+<!ELEMENT TitleAlternate (Title, MaterialSpecified?, OtherInformation?)>
+<!ATTLIST TitleAlternate
+	Owner (CIT | HMD | HSR | IHM | KIE | NASA | NCBI | NLM |
+               NOTNLM | NYA | PIP | Undetermined) "Undetermined"
+	TitleType (Former | Key |  Other | OtherTA | Translated | 
+	   Undetermined | Uniform) "Undetermined">
+<!ELEMENT TitleAssociatedWithName (#PCDATA)>
+<!ELEMENT TitleMain (Title, OtherInformation?)>
+<!ELEMENT TitleRelated (Title, OtherInformation?, RecordID*, ISSN?)>
+<!ATTLIST TitleRelated
+	Owner (CIT | HMD | HSR | IHM | KIE | NASA | NCBI | NLM |
+               NOTNLM | NYA | PIP | Undetermined) "Undetermined"
+	TitleType (Absorbed | AbsorbedBy | AbsorbedInPart | AbsorbedInPartBy | 
+                   Analytic | MergedTo | MergerOf | Other | Preceding | PrecedingInPart | 
+                   Related | Reversion | Series | SeriesAuthority | SplitFrom | SplitTo | Succeeding |
+                   SucceedingInPart | SupersededBy | SupersededInPartBy | 
+                   Supersedes | SupersedesInPart | Translated | Undetermined) "Undetermined">
+<!ELEMENT TypeOfResource (#PCDATA)>
+<!ELEMENT Year (#PCDATA)>
+<!ELEMENT DeleteCatalogRecord (NlmUniqueID+)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmcommon_011101.dtd b/code/lib/Bio/Entrez/DTDs/nlmcommon_011101.dtd
new file mode 100644
index 0000000..a092651
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmcommon_011101.dtd
@@ -0,0 +1,175 @@
+<!-- NLMCommon DTD
+      
+     This is the DTD for data elements that are shared 
+     among various applications at the NLM. 
+     Comments and suggestions are welcome.
+
+     November 1, 2001
+
+   
+
+    
+-->
+<!--    NLMCommon.dtd
+
+        Document Type Definition for the PubMed Article DTD
+        $Id: nlmcommon_011101.dtd 33908 2004-02-05 16:23:37Z yasmax $
+
+       
+-->
+<!-- ====================================================================== -->
+<!--   Revision Notes Section
+
+ The following changes were made in the nlmcommon_011101.dtd:
+
+       a.  Added DescriptorName to MeshHeading field.
+
+       b.  Added QualifierName to SubHeading field.
+
+       c.  Added attribute for DescriptorName.
+
+       d.  Added attribute for QualifierName.
+
+       e.  Added ForeName to personal name field.  
+        
+ The following changes were made in the nlmcommon_010319.dtd:
+
+       a.  Added Entity % Abstract to dtd.
+
+       b.  Element Abstract now links to % Abstract.
+
+       c.  Moved element definition of AbstractText & Copyright 
+           Information to nlmcommon_0010319.dtd. They were previously 
+           defined in nlmmedlinecitation.dtd.
+
+       d.  Made Country and MedlineCode optional elements.
+
+       e.  Element Grant now links to %GrantID.Ref, %Agency.Ref &
+           %Acronym.Ref.
+
+ The following change was made in the nlmcommon_001211.dtd:
+
+       a. addition of NLMUniqueID to the DTD.
+
+-->
+<!-- Personal and Author names -->
+<!ENTITY % personal.name "(LastName,(ForeName|(FirstName,MiddleName?))?,Initials?,Suffix?)">  
+<!ENTITY % author.name "(%personal.name; | CollectiveName)">
+<!ELEMENT FirstName (#PCDATA)>
+<!ELEMENT ForeName (#PCDATA)>
+<!ELEMENT MiddleName (#PCDATA)>
+<!ELEMENT LastName (#PCDATA)>
+<!ELEMENT Initials (#PCDATA)>
+<!ELEMENT Suffix (#PCDATA)>
+<!ELEMENT CollectiveName (#PCDATA)>
+<!-- Dates -->
+<!ENTITY % normal.date "(Year,Month,Day,(Hour,(Minute,Second?)?)?)">
+<!ENTITY % pub.date "((Year, ((Month, Day?) | Season)?) | MedlineDate)">
+<!ELEMENT Year (#PCDATA)>
+<!ELEMENT Month (#PCDATA)>
+<!ELEMENT Day (#PCDATA)>
+<!ELEMENT Season (#PCDATA)>
+<!ELEMENT MedlineDate (#PCDATA)>
+<!ELEMENT Hour (#PCDATA)>
+<!ELEMENT Minute (#PCDATA)>
+<!ELEMENT Second (#PCDATA)>
+<!ENTITY % data.template "#PCDATA">
+<!ENTITY % Abstract "(AbstractText,CopyrightInformation?)" >
+<!ELEMENT AbstractText (#PCDATA)>
+<!ELEMENT CopyrightInformation (#PCDATA)>
+
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- This is the top level element for NCBIArticle -->
+<!ELEMENT NCBIArticle (PMID, Article, MedlineJournalInfo?)>
+<!-- ================================================================= -->
+<!-- This is the top level element for Article -->
+<!ELEMENT Article ((Journal | Book),
+                    %ArticleTitle.Ref;,
+                    Pagination, 
+                    Abstract?,
+                    Affiliation?,
+                    AuthorList?,
+                    Language+, 
+                    DataBankList?, 
+                    GrantList?,
+                    PublicationTypeList,
+                    VernacularTitle?,
+                    DateOfElectronicPublication?)>
+<!ELEMENT DataBankList (DataBank+)>
+<!ELEMENT DataBank (DataBankName, AccessionNumberList?)>
+<!ELEMENT DataBankName (#PCDATA)>
+<!ELEMENT AccessionNumberList (AccessionNumber+)>
+<!ELEMENT AccessionNumber (#PCDATA)>
+<!ATTLIST DataBankList
+	CompleteYN (Y | N) "Y">
+<!ELEMENT GrantList (Grant+)>
+<!ELEMENT Grant (%GrantID.Ref;, %Acronym.Ref;, %Agency.Ref;)>
+<!ELEMENT GrantID (#PCDATA)>
+<!ELEMENT Acronym (#PCDATA)>
+<!ELEMENT Agency (#PCDATA)>
+<!ELEMENT Abstract (%Abstract;)>
+<!ATTLIST GrantList
+	CompleteYN (Y | N) "Y">
+<!ELEMENT Journal (%ISSN.Ref;, 
+                   JournalIssue,
+                   Coden?,
+                   Title?,
+                   ISOAbbreviation?)>
+<!ELEMENT ISSN (#PCDATA)>
+<!ELEMENT JournalIssue (Volume?, Issue?, %PubDate.Ref;)>
+<!ELEMENT Volume (#PCDATA)>
+<!ELEMENT Issue (#PCDATA)>
+<!ELEMENT PubDate (%pub.date;)>
+<!ELEMENT Coden (#PCDATA)>
+<!ELEMENT Title (#PCDATA)>
+<!ELEMENT ISOAbbreviation (#PCDATA)>
+<!ELEMENT DateOfElectronicPublication (#PCDATA)>
+<!ELEMENT MedlineJournalInfo (Country?,
+                              MedlineTA,
+                              MedlineCode?,
+                              NlmUniqueID?)>
+<!ELEMENT Country (#PCDATA)>
+<!ELEMENT MedlineTA (#PCDATA)>
+<!ELEMENT MedlineCode (#PCDATA)>
+<!-- Sometime in the future, MedlineCode will change to
+     NLMUniqueID   -->
+<!ELEMENT Book (%PubDate.Ref;,
+                 Publisher,
+                 Title,
+                 AuthorList?,
+                 CollectionTitle?,
+                 Volume?)>
+<!ELEMENT Publisher (#PCDATA)>
+<!ELEMENT ArticleTitle (#PCDATA)>
+<!ELEMENT CollectionTitle (#PCDATA)>
+<!ELEMENT VernacularTitle (#PCDATA)>
+<!ELEMENT PublicationTypeList (PublicationType+)>
+<!ELEMENT PublicationType (#PCDATA)>
+<!ELEMENT Language (#PCDATA)>
+<!ELEMENT AuthorList (Author+)>
+<!ELEMENT Author ((%author.name;), Affiliation?)>
+<!ELEMENT Affiliation (#PCDATA)>
+<!ATTLIST AuthorList
+	CompleteYN (Y | N) "Y">
+<!ELEMENT Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn)>
+<!ELEMENT StartPage (#PCDATA)>
+<!ELEMENT EndPage (#PCDATA)>
+<!ELEMENT MedlinePgn (#PCDATA)>
+<!ELEMENT MeshHeadingList (MeshHeading+)>
+<!ELEMENT MeshHeading ((DescriptorName | Descriptor),
+                        (QualifierName* | SubHeading*))>
+<!ELEMENT DescriptorName (#PCDATA)>
+<!ATTLIST DescriptorName
+	MajorTopicYN (Y | N) "N" >
+<!ELEMENT Descriptor (#PCDATA)>
+<!ATTLIST Descriptor
+	MajorTopicYN (Y | N) "N">
+<!ELEMENT QualifierName (#PCDATA)>
+<!ATTLIST QualifierName
+	MajorTopicYN (Y | N) "N">
+<!ELEMENT SubHeading (#PCDATA)>
+<!ATTLIST SubHeading
+	MajorTopicYN (Y | N) "N">
+<!ELEMENT PMID (#PCDATA)>
+<!ELEMENT NlmUniqueID (#PCDATA)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmcommon_080101.dtd b/code/lib/Bio/Entrez/DTDs/nlmcommon_080101.dtd
new file mode 100644
index 0000000..ac0ae02
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmcommon_080101.dtd
@@ -0,0 +1,201 @@
+<!-- NLMCommon DTD
+
+     Comments and suggestions are welcome.
+     January 1, 2008 
+
+             
+     This is the DTD for data elements that are shared 
+     among various applications at the U.S. National Library of Medicine. 
+ 
+    NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+    represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+    XML elements were first created, MEDLINE records were the only data exported. 
+    Now NLM exports citations other than MEDLINE records using these tools. To 
+    minimize unnecessary disruption to users of the data and tools, NLM has
+    retained the original DTD and element names (e.g., NLMMedline DTD, MedlineTA,
+    MedlineJournalInfo)).       
+
+  * = 0 or more occurrences (optional element, repeatable)
+  ? = 0 or 1 occurrences (optional element, at most 1)
+  + = 1 or more occurrences (required element, repeatable)
+   | = choice, one or the other but not both 
+  no symbol = required element
+-->
+<!--    NLMCommon.dtd
+
+        Document Type Definition for the PubMed Article DTD
+        $Id: nlmcommon_080101.dtd 114839 2007-11-30 16:19:51Z korobtch $
+-->
+<!-- ====================================================================== -->
+<!--   Revision Notes Section
+    
+    The following changes were made in the nlmcommon_080101.dtd:
+    
+     a. Added ELocationID in Article
+            
+     b. Added EIdType and ValidYN as attributes to ELocationID 
+
+     c. Added ISSNLinking to MedlineJournalInfo
+
+     d. Investigator & InvestigatorList were moved from nlmsharedcatcit dtd to
+        nlmcommon dtd
+            
+     Historic Revision notes for previous versions of NLMCommon DTD
+     See:
+     http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmcommon.html
+-->
+<!-- ================================================================= -->
+<!--     internal DTD entities             -->   
+<!ENTITY % Abstract "(AbstractText,CopyrightInformation?)">
+<!ENTITY % personal.name "(LastName,(ForeName|(FirstName,MiddleName?))?,
+                           Initials?,Suffix?)">
+<!ENTITY % author.name "(%personal.name; | CollectiveName)">
+<!ENTITY % data.template "#PCDATA">
+<!ENTITY % ImprintType "(Current | Original)">
+<!ENTITY % IndexingStatus "(Ceased-publication |  Continued-by-another-indexed-title | 
+                            Currently-indexed |  Currently-indexed-Title-changed |
+                            Date-range-of-indexed-citations-unspecified | 
+                            Deselected) #IMPLIED">
+<!ENTITY % IndexingTreatment "(Unknown | Full | Selective | ReferencedIn | 
+                               ReferencedInNoDetails) #IMPLIED">
+<!ENTITY % normal.date "(Year,Month,Day,(Hour,(Minute,Second?)?)?)">
+<!ENTITY % pub.date "((Year, ((Month, Day?) | Season)?) | MedlineDate)">
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- This is the top level element for NCBIArticle -->
+<!ELEMENT NCBIArticle (PMID, Article, MedlineJournalInfo,InvestigatorList?)>
+<!-- ================================================================= -->
+<!-- This is the top level element for Article -->
+<!ELEMENT Article ((Journal | Book), %ArticleTitle.Ref;,
+                  ((Pagination, ELocationID*) | ELocationID+),
+                   Abstract?, Affiliation?, AuthorList?, Language+, DataBankList?, 
+                    GrantList?, %PublicationType.Ref;, VernacularTitle?,
+                    ArticleDate*)>
+<!-- ================================================================= -->
+<!--  Further Definitions of NLM Tags                                  -->
+<!ELEMENT Abstract (%Abstract;)>
+<!ELEMENT AbstractText (#PCDATA)>
+<!ELEMENT AccessionNumber (#PCDATA)>
+<!ELEMENT AccessionNumberList (AccessionNumber+)>
+<!ELEMENT Acronym (#PCDATA)>
+<!ELEMENT Affiliation (#PCDATA)>
+<!ELEMENT Agency (#PCDATA)>
+<!ELEMENT ArticleDate (%normal.date;)>
+<!ATTLIST ArticleDate
+          DateType CDATA  #FIXED "Electronic">
+<!ELEMENT ArticleTitle (#PCDATA)>
+<!ELEMENT Author ((%author.name;), Affiliation?,DatesAssociatedWithName?,
+                 NameQualifier?,OtherInformation?,TitleAssociatedWithName?)>
+<!ATTLIST Author
+	ValidYN (Y | N) "Y"
+>
+<!ELEMENT AuthorList (Author+)>
+<!ATTLIST AuthorList
+	CompleteYN (Y | N) "Y"
+>
+<!ELEMENT Book (%PubDate.Ref;, Publisher, Title, AuthorList?,
+                CollectionTitle?, Volume?)>
+<!ELEMENT BroadJournalHeading (#PCDATA)>
+<!ELEMENT BroadJournalHeadingList (BroadJournalHeading+)>
+<!ELEMENT Coden (#PCDATA)>
+<!ELEMENT CollectionTitle (#PCDATA)>
+<!ELEMENT CollectiveName (#PCDATA)>
+<!ELEMENT CopyrightInformation (#PCDATA)>
+<!ELEMENT Country (#PCDATA)>
+<!ELEMENT Coverage  (#PCDATA)>
+<!ELEMENT DataBank (DataBankName, AccessionNumberList?)>
+<!ELEMENT DataBankList (DataBank+)>
+<!ATTLIST DataBankList
+	CompleteYN (Y | N) "Y"
+>
+<!ELEMENT DataBankName (#PCDATA)>
+<!ELEMENT DateIssued (#PCDATA)>
+<!ELEMENT DatesAssociatedWithName (#PCDATA)>
+<!ELEMENT DatesOfSerialPublication (#PCDATA)>
+<!ELEMENT Day (#PCDATA)>
+
+<!ELEMENT DescriptorName (#PCDATA)>
+<!ATTLIST DescriptorName
+	MajorTopicYN (Y | N) "N"
+>
+<!ELEMENT Edition (#PCDATA)>
+<!ELEMENT ELocationID (#PCDATA)>
+<!ATTLIST ELocationID
+          EIdType %EIdType;
+          ValidYN  (Y | N) "Y"
+>
+<!ELEMENT EndPage (#PCDATA)>
+<!ELEMENT FirstName (#PCDATA)>
+<!ELEMENT ForeName (#PCDATA)>
+<!ELEMENT Frequency (#PCDATA)>
+<!ATTLIST Frequency 
+          FrequencyType (Current | Former) "Current"
+>
+<!ELEMENT Grant (%GrantID.Ref;, %Acronym.Ref;, %Agency.Ref;)>
+<!ELEMENT GrantID (#PCDATA)>
+<!ELEMENT GrantList (Grant+)>
+<!ATTLIST GrantList
+	CompleteYN (Y | N) "Y"
+>
+<!ELEMENT Hour (#PCDATA)>
+<!ELEMENT Imprint (#PCDATA)>
+<!ELEMENT Initials (#PCDATA)>
+<!ELEMENT Investigator (%personal.name;, Affiliation?)>
+<!ELEMENT InvestigatorList (Investigator+)>
+<!ELEMENT ISOAbbreviation (#PCDATA)>
+<!ELEMENT ISSN (#PCDATA)>
+<!ATTLIST ISSN
+	  IssnType  (Electronic | Print | Undetermined) #REQUIRED
+>
+<!ELEMENT ISSNLinking (#PCDATA)>
+<!ELEMENT Issue (#PCDATA)>
+<!ELEMENT Journal (%ISSN.Ref;, JournalIssue, Coden?, Title?, ISOAbbreviation?)>
+<!ELEMENT JournalIssue (Volume?, Issue?, %PubDate.Ref;)>
+<!ATTLIST JournalIssue
+	CitedMedium (Internet | Print) #REQUIRED
+>
+<!ELEMENT Language (#PCDATA)>
+<!ELEMENT LastName (#PCDATA)>
+<!ELEMENT MedlineDate (#PCDATA)>
+<!ELEMENT MedlineJournalInfo (Country?, MedlineTA, %NlmUniqueID.Ref;, ISSNLinking?)>
+<!ELEMENT MedlinePgn (#PCDATA)>
+<!ELEMENT MedlineTA (#PCDATA)>
+<!ELEMENT MeshHeading (DescriptorName, QualifierName*)>
+<!ELEMENT MeshHeadingList (MeshHeading+)>
+<!ELEMENT MiddleName (#PCDATA)>
+<!ELEMENT Minute (#PCDATA)>
+<!ELEMENT Month (#PCDATA)>
+<!ELEMENT NameQualifier (#PCDATA)>
+<!ELEMENT NlmUniqueID (#PCDATA)>
+<!ELEMENT OtherInformation (#PCDATA)>
+<!ELEMENT Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn)>
+<!ELEMENT Place (#PCDATA)>
+<!ATTLIST Place  
+       ImprintType %ImprintType; "Current"
+>
+<!ELEMENT PlaceCode (#PCDATA)>
+<!ELEMENT PMID (#PCDATA)>
+<!ELEMENT ProjectedPublicationDate (#PCDATA)>
+<!ELEMENT PubDate (%pub.date;)>
+<!ELEMENT PublicationEndYear (#PCDATA)>
+<!ELEMENT PublicationFirstYear (#PCDATA)>
+<!ELEMENT PublicationInfo (Country?, PlaceCode?, Imprint*, Place*, 
+                           Publisher*, DateIssued*,  ProjectedPublicationDate?,
+                           PublicationFirstYear?, PublicationEndYear?, Edition?,
+                           DatesOfSerialPublication*, Frequency*)>
+<!ELEMENT PublicationType (#PCDATA)>
+<!ELEMENT PublicationTypeList (PublicationType+)>
+<!ELEMENT Publisher (#PCDATA)>
+<!ELEMENT QualifierName (#PCDATA)>
+<!ATTLIST QualifierName
+	MajorTopicYN (Y | N) "N"
+>
+<!ELEMENT Season (#PCDATA)>
+<!ELEMENT Second (#PCDATA)>
+<!ELEMENT StartPage (#PCDATA)>
+<!ELEMENT Suffix (#PCDATA)>
+<!ELEMENT Title (#PCDATA)>
+<!ELEMENT TitleAssociatedWithName (#PCDATA)>
+<!ELEMENT VernacularTitle (#PCDATA)>
+<!ELEMENT Volume (#PCDATA)>
+<!ELEMENT Year (#PCDATA)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmcommon_090101.dtd b/code/lib/Bio/Entrez/DTDs/nlmcommon_090101.dtd
new file mode 100644
index 0000000..787129b
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmcommon_090101.dtd
@@ -0,0 +1,220 @@
+<!-- NLMCommon DTD
+
+     Comments and suggestions are welcome.
+     January 1, 2009 v.2
+
+     **THIS IS THE FORTHCOMING DTD FOR 2009 NOT CURRENTLY IN USE.  
+     SEE http://www.nlm.nih.gov/databases/dtd/nlmcommon_080101.dtd FOR THE 
+     NLMCOMMON DTD DATED JANUARY 1, 2008 CURRENTLY IN USE.**  
+             
+     This is the DTD for data elements that are shared 
+     among various applications at the U.S. National Library of Medicine. 
+ 
+    NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+    represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+    XML elements were first created, MEDLINE records were the only data exported. 
+    Now NLM exports citations other than MEDLINE records using these tools. To 
+    minimize unnecessary disruption to users of the data and tools, NLM has
+    retained the original DTD and element names (e.g., NLMMedline DTD, MedlineTA,
+    MedlineJournalInfo)).       
+
+  * = 0 or more occurrences (optional element, repeatable)
+  ? = 0 or 1 occurrences (optional element, at most 1)
+  + = 1 or more occurrences (required element, repeatable)
+   | = choice, one or the other but not both 
+  no symbol = required element
+-->
+<!--    NLMCommon.dtd
+
+        Document Type Definition for the PubMed Article DTD
+        $Id: nlmcommon_090101.dtd,v 1.1 2008-12-13 18:18:29 peterc Exp $
+-->
+<!-- ====================================================================== -->
+<!--   Revision Notes Section
+    
+    The following changes were made in the nlmcommon_090101.dtd:
+    
+     a. Added ValidYN attribute to Investigator element
+            
+     b. Moved OtherAbstract element from nlmsharedcatcit to nlmcommon dtd
+
+     c. Added OtherAbstract element to NCBIArticle element
+
+     d. Moved entity Type from nlmmedlinecitation to nlmcommon dtd
+ 
+     e. Added Publisher value to entity Type 
+     
+     f. Deleted Consumer value from entity Type 
+
+     g. Added Country element to Grant element
+  
+     h. FOR V.2: Changed Country value to GrantCountry.Ref in Grant element
+ 
+                
+     Historic Revision notes for previous versions of NLMCommon DTD
+     See:
+     http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmcommon.html
+-->
+<!-- ================================================================= -->
+<!--     internal DTD entities             -->   
+<!ENTITY % Abstract "(AbstractText,CopyrightInformation?)">
+<!ENTITY % personal.name "(LastName,(ForeName|(FirstName,MiddleName?))?,
+                           Initials?,Suffix?)">
+<!ENTITY % author.name "(%personal.name; | CollectiveName)">
+<!ENTITY % data.template "#PCDATA">
+<!ENTITY % ImprintType "(Current | Original)">
+<!ENTITY % IndexingStatus "(Ceased-publication |  Continued-by-another-indexed-title | 
+                            Currently-indexed |  Currently-indexed-Title-changed |
+                            Date-range-of-indexed-citations-unspecified | 
+                            Deselected) #IMPLIED">
+<!ENTITY % IndexingTreatment "(Unknown | Full | Selective | ReferencedIn | 
+                               ReferencedInNoDetails) #IMPLIED">
+<!ENTITY % normal.date "(Year,Month,Day,(Hour,(Minute,Second?)?)?)">
+<!ENTITY % pub.date "((Year, ((Month, Day?) | Season)?) | MedlineDate)">
+<!ENTITY % Type "(AAMC | AIDS | KIE | PIP | NASA | Publisher) #REQUIRED">
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- This is the top level element for NCBIArticle -->
+<!ELEMENT NCBIArticle (PMID, Article, MedlineJournalInfo,InvestigatorList?,OtherAbstract?)>
+<!-- ================================================================= -->
+<!-- This is the top level element for Article -->
+<!ELEMENT Article ((Journal | Book), %ArticleTitle.Ref;,
+                  ((Pagination, ELocationID*) | ELocationID+),
+                   Abstract?, Affiliation?, AuthorList?, Language+, DataBankList?, 
+                    GrantList?, %PublicationType.Ref;, VernacularTitle?,
+                    ArticleDate*)>
+<!-- ================================================================= -->
+<!--  Further Definitions of NLM Tags                                  -->
+<!ELEMENT Abstract (%Abstract;)>
+<!ELEMENT AbstractText (#PCDATA)>
+<!ELEMENT AccessionNumber (#PCDATA)>
+<!ELEMENT AccessionNumberList (AccessionNumber+)>
+<!ELEMENT Acronym (#PCDATA)>
+<!ELEMENT Affiliation (#PCDATA)>
+<!ELEMENT Agency (#PCDATA)>
+<!ELEMENT ArticleDate (%normal.date;)>
+<!ATTLIST ArticleDate
+          DateType CDATA  #FIXED "Electronic">
+<!ELEMENT ArticleTitle (#PCDATA)>
+<!ELEMENT Author ((%author.name;), Affiliation?,DatesAssociatedWithName?,
+                 NameQualifier?,OtherInformation?,TitleAssociatedWithName?)>
+<!ATTLIST Author
+	ValidYN (Y | N) "Y"
+>
+<!ELEMENT AuthorList (Author+)>
+<!ATTLIST AuthorList
+	CompleteYN (Y | N) "Y"
+>
+<!ELEMENT Book (%PubDate.Ref;, Publisher, Title, AuthorList?,
+                CollectionTitle?, Volume?)>
+<!ELEMENT BroadJournalHeading (#PCDATA)>
+<!ELEMENT BroadJournalHeadingList (BroadJournalHeading+)>
+<!ELEMENT Coden (#PCDATA)>
+<!ELEMENT CollectionTitle (#PCDATA)>
+<!ELEMENT CollectiveName (#PCDATA)>
+<!ELEMENT CopyrightInformation (#PCDATA)>
+<!ELEMENT Country (#PCDATA)>
+<!ELEMENT Coverage  (#PCDATA)>
+<!ELEMENT DataBank (DataBankName, AccessionNumberList?)>
+<!ELEMENT DataBankList (DataBank+)>
+<!ATTLIST DataBankList
+	CompleteYN (Y | N) "Y"
+>
+<!ELEMENT DataBankName (#PCDATA)>
+<!ELEMENT DateIssued (#PCDATA)>
+<!ELEMENT DatesAssociatedWithName (#PCDATA)>
+<!ELEMENT DatesOfSerialPublication (#PCDATA)>
+<!ELEMENT Day (#PCDATA)>
+
+<!ELEMENT DescriptorName (#PCDATA)>
+<!ATTLIST DescriptorName
+	MajorTopicYN (Y | N) "N"
+>
+<!ELEMENT Edition (#PCDATA)>
+<!ELEMENT ELocationID (#PCDATA)>
+<!ATTLIST ELocationID
+          EIdType %EIdType;
+          ValidYN  (Y | N) "Y"
+>
+<!ELEMENT EndPage (#PCDATA)>
+<!ELEMENT FirstName (#PCDATA)>
+<!ELEMENT ForeName (#PCDATA)>
+<!ELEMENT Frequency (#PCDATA)>
+<!ATTLIST Frequency 
+          FrequencyType (Current | Former) "Current"
+>
+<!ELEMENT Grant (%GrantID.Ref;, %Acronym.Ref;, %Agency.Ref;, %GrantCountry.Ref;)>
+<!ELEMENT GrantID (#PCDATA)>
+<!ELEMENT GrantList (Grant+)>
+<!ATTLIST GrantList
+	CompleteYN (Y | N) "Y"
+>
+<!ELEMENT Hour (#PCDATA)>
+<!ELEMENT Imprint (#PCDATA)>
+<!ELEMENT Initials (#PCDATA)>
+<!ELEMENT Investigator (%personal.name;, Affiliation?)>
+<!ATTLIST Investigator
+          ValidYN (Y | N) "Y"
+>
+<!ELEMENT InvestigatorList (Investigator+)>
+<!ELEMENT ISOAbbreviation (#PCDATA)>
+<!ELEMENT ISSN (#PCDATA)>
+<!ATTLIST ISSN
+	  IssnType  (Electronic | Print | Undetermined) #REQUIRED
+>
+<!ELEMENT ISSNLinking (#PCDATA)>
+<!ELEMENT Issue (#PCDATA)>
+<!ELEMENT Journal (%ISSN.Ref;, JournalIssue, Coden?, Title?, ISOAbbreviation?)>
+<!ELEMENT JournalIssue (Volume?, Issue?, %PubDate.Ref;)>
+<!ATTLIST JournalIssue
+	CitedMedium (Internet | Print) #REQUIRED
+>
+<!ELEMENT Language (#PCDATA)>
+<!ELEMENT LastName (#PCDATA)>
+<!ELEMENT MedlineDate (#PCDATA)>
+<!ELEMENT MedlineJournalInfo (Country?, MedlineTA, %NlmUniqueID.Ref;, ISSNLinking?)>
+<!ELEMENT MedlinePgn (#PCDATA)>
+<!ELEMENT MedlineTA (#PCDATA)>
+<!ELEMENT MeshHeading (DescriptorName, QualifierName*)>
+<!ELEMENT MeshHeadingList (MeshHeading+)>
+<!ELEMENT MiddleName (#PCDATA)>
+<!ELEMENT Minute (#PCDATA)>
+<!ELEMENT Month (#PCDATA)>
+<!ELEMENT NameQualifier (#PCDATA)>
+<!ELEMENT NlmUniqueID (#PCDATA)>
+<!ELEMENT OtherAbstract (%Abstract;)>
+<!ATTLIST OtherAbstract
+	Type %Type;
+>
+<!ELEMENT OtherInformation (#PCDATA)>
+<!ELEMENT Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn)>
+<!ELEMENT Place (#PCDATA)>
+<!ATTLIST Place  
+       ImprintType %ImprintType; "Current"
+>
+<!ELEMENT PlaceCode (#PCDATA)>
+<!ELEMENT PMID (#PCDATA)>
+<!ELEMENT ProjectedPublicationDate (#PCDATA)>
+<!ELEMENT PubDate (%pub.date;)>
+<!ELEMENT PublicationEndYear (#PCDATA)>
+<!ELEMENT PublicationFirstYear (#PCDATA)>
+<!ELEMENT PublicationInfo (Country?, PlaceCode?, Imprint*, Place*, 
+                           Publisher*, DateIssued*,  ProjectedPublicationDate?,
+                           PublicationFirstYear?, PublicationEndYear?, Edition?,
+                           DatesOfSerialPublication*, Frequency*)>
+<!ELEMENT PublicationType (#PCDATA)>
+<!ELEMENT PublicationTypeList (PublicationType+)>
+<!ELEMENT Publisher (#PCDATA)>
+<!ELEMENT QualifierName (#PCDATA)>
+<!ATTLIST QualifierName
+	MajorTopicYN (Y | N) "N"
+>
+<!ELEMENT Season (#PCDATA)>
+<!ELEMENT Second (#PCDATA)>
+<!ELEMENT StartPage (#PCDATA)>
+<!ELEMENT Suffix (#PCDATA)>
+<!ELEMENT Title (#PCDATA)>
+<!ELEMENT TitleAssociatedWithName (#PCDATA)>
+<!ELEMENT VernacularTitle (#PCDATA)>
+<!ELEMENT Volume (#PCDATA)>
+<!ELEMENT Year (#PCDATA)>
\ No newline at end of file
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedline_011101.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedline_011101.dtd
new file mode 100644
index 0000000..1c5aa06
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedline_011101.dtd
@@ -0,0 +1,60 @@
+<!-- NLM Medline DTD              
+
+        This is the DTD which NLM has written for External Use. 
+        If you are a data Licensee, use the information
+        from the MedlineCitation Set.       
+ 
+        Comments and suggestions are welcome.
+        November 1, 2001
+       
+
+      
+-->
+<!-- ================================================================= -->
+<!--   NLM Medline DTD   -->
+<!-- Typical usage:   
+
+  <!DOCTYPE MedlineCitationSet PUBLIC "-//NLM//DTD NLM//EN">
+
+-->
+<!-- ================================================================= -->
+<!--   Revision Notes Section  
+ 
+  The following changes were made in the nlmmedline_011101.dtd:
+
+       a.  Made MedlineID optional
+ 
+  The following changes were made in the nlmmedline_010319.dtd:
+
+       a.  The entity reference changed to:
+          nlmmedlinecitation_010322.dtd.    
+       b.  Added PMID entity 
+
+       c.  The entity reference changed back to 
+          nlmmedlinecitation_010319.dtd.
+ 
+       d.  Added GrantID, Agency, Acronym entity references 
+
+       e.  Added MedlineID entity  
+
+ 
+-->
+<!-- ================================================================= -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % ISSN.Ref        "ISSN?">
+<!ENTITY % DateCreated.Ref "DateCreated">
+<!ENTITY % PubDate.Ref     "PubDate">
+<!ENTITY % PMID.Ref        "PMID">
+<!ENTITY % MedlineID.Ref   "MedlineID?">
+<!ENTITY % GrantID.Ref     "GrantID?">
+<!ENTITY % Agency.Ref      "Agency">
+<!ENTITY % Acronym.Ref     "Acronym?">
+<!-- ================================================================= -->
+<!-- Reference to Where the NLM MedlineCitation DTD is located  -->
+<!ENTITY % MedlineCitation PUBLIC "-//NLM//DTD MedlineCitation, 1st November 2001//EN"
+"nlmmedlinecitation_011101.dtd" >
+%MedlineCitation;
+<!-- ================================================================= -->
+<!ELEMENT MedlineCitationSet (MedlineCitation*, DeleteCitation?)>
+<!ELEMENT DeleteCitation (MedlineID+ | PMID+)>
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedline_080101.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedline_080101.dtd
new file mode 100644
index 0000000..1f935d1
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedline_080101.dtd
@@ -0,0 +1,71 @@
+<!-- NLMMedline DTD              
+
+     **THIS IS THE FORTHCOMING DTD FOR 2008 NOT CURRENTLY IN USE.  SEE
+       http://www.nlm.nih.gov/databases/dtd/nlmmedline_070101.dtd FOR THE
+       NLMMEDLINE DTD DATED JANUARY 1, 2007 CURRENTLY IN USE.
+    
+
+     Comments and suggestions are welcome.
+     January 1, 2008
+    
+            
+     This is the DTD which the U.S. National Library of Medicine 
+     has written for External Use.        
+     If you are a data Licensee, the data are found in 
+     MedlineCitationSet.       
+ 
+     NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+     represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+     XML elements were first created, MEDLINE records were the only data exported. 
+     Now NLM exports citations other than MEDLINE records using these tools. To 
+     minimize unnecessary disruption to users of the data and tools, NLM has
+     retained the original DTD and element names (e.g., NLMMedline DTD, MedlineTA,
+     MedlineJournalInfo)).       
+       
+  * = 0 or more occurrences (optional element, repeatable)
+  ? = 0 or 1 occurrences (optional element, at most 1)
+  + = 1 or more occurrences (required element, repeatable)
+  | = choice, one or the other but not both 
+  no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--   NLMMedline DTD   -->
+<!-- Typical usage:   
+
+  <!DOCTYPE MedlineCitationSet PUBLIC "-//NLM//DTD NLM Medline//EN">
+
+-->
+<!-- ================================================================= -->
+<!--   Revision Notes Section 
+
+The following changes were made in the nlmmedline_070101.dtd:
+  
+       a.  Changed entity reference from "nlmmedlinecitation_070101.dtd"
+           to: "nlmmedlinecitation_080101.dtd"
+
+               
+     Historic Revision notes for previous versions of NLMMedline DTD 
+     See:
+     http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmmedline.html
+-->
+<!-- ================================================================= -->
+<!--  external DTD entities                        -->
+<!ENTITY % Acronym.Ref "Acronym?">
+<!ENTITY % Agency.Ref "Agency">
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % DateCreated.Ref "DateCreated">
+<!ENTITY % GrantID.Ref "GrantID?">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % NlmDcmsID.Ref "NlmDcmsID?">
+<!ENTITY % PMID.Ref "PMID">
+<!ENTITY % PubDate.Ref "PubDate">
+<!ENTITY % PublicationType.Ref  "PublicationTypeList">
+<!-- ================================================================= -->
+<!-- Reference to Where the NLM MedlineCitation DTD is located  -->
+<!ENTITY % MedlineCitation PUBLIC "-//NLM//DTD MedlineCitation, 1st January 2008//EN"
+"nlmmedlinecitation_080101.dtd" >  
+%MedlineCitation;
+<!-- ================================================================= -->
+<!ELEMENT DeleteCitation (PMID+)>
+<!ELEMENT MedlineCitationSet (MedlineCitation*, DeleteCitation?)>
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedline_090101.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedline_090101.dtd
new file mode 100644
index 0000000..d903ebd
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedline_090101.dtd
@@ -0,0 +1,74 @@
+<!-- NLMMedline DTD              
+
+     Comments and suggestions are welcome.
+     January 1, 2009 v.3
+
+     **THIS IS THE FORTHCOMING DTD FOR 2009 NOT CURRENTLY IN USE.  
+     SEE http://www.nlm.nih.gov/databases/dtd/nlmmedline_080101.dtd FOR THE 
+     NLMMEDLINE DTD DATED JANUARY 1, 2008 CURRENTLY IN USE.**  
+            
+     This is the DTD which the U.S. National Library of Medicine 
+     has written for External Use.        
+     If you are a data Licensee, the data are found in 
+     MedlineCitationSet.       
+ 
+     NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+     represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+     XML elements were first created, MEDLINE records were the only data exported. 
+     Now NLM exports citations other than MEDLINE records using these tools. To 
+     minimize unnecessary disruption to users of the data and tools, NLM has
+     retained the original DTD and element names (e.g., NLMMedline DTD, MedlineTA,
+     MedlineJournalInfo)).       
+       
+  * = 0 or more occurrences (optional element, repeatable)
+  ? = 0 or 1 occurrences (optional element, at most 1)
+  + = 1 or more occurrences (required element, repeatable)
+  | = choice, one or the other but not both 
+  no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--   NLMMedline DTD   -->
+<!-- Typical usage:   
+
+  <!DOCTYPE MedlineCitationSet PUBLIC "-//NLM//DTD NLM Medline//EN">
+
+-->
+<!-- ================================================================= -->
+<!--   Revision Notes Section 
+
+The following changes were made in the nlmmedline_090101.dtd:
+  
+       a.  Changed entity reference from "nlmmedlinecitation_080101.dtd"
+           to: "nlmmedlinecitation_090101.dtd"
+
+       b.  CHANGE WITHDRAWN FOR V.2: Delete entity NlmDcmsID.Ref and
+           NlmDcmsID element
+
+       c.  FOR V.3: Added GrantCountry.Ref entity
+        
+     Historic Revision notes for previous versions of NLMMedline DTD 
+     See:
+     http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmmedline.html
+-->
+<!-- ================================================================= -->
+<!--  external DTD entities                        -->
+<!ENTITY % Acronym.Ref "Acronym?">
+<!ENTITY % Agency.Ref "Agency">
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % DateCreated.Ref "DateCreated">
+<!ENTITY % GrantCountry.Ref "Country?">
+<!ENTITY % GrantID.Ref "GrantID?">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % NlmDcmsID.Ref "NlmDcmsID?">
+<!ENTITY % PMID.Ref "PMID">
+<!ENTITY % PubDate.Ref "PubDate">
+<!ENTITY % PublicationType.Ref  "PublicationTypeList">
+<!-- ================================================================= -->
+<!-- Reference to Where the NLM MedlineCitation DTD is located  -->
+<!ENTITY % MedlineCitation PUBLIC "-//NLM//DTD MedlineCitation, 1st January 2009//EN"
+"nlmmedlinecitation_090101.dtd" >  
+%MedlineCitation;
+<!-- ================================================================= -->
+<!ELEMENT DeleteCitation (PMID+)>
+<!ELEMENT MedlineCitationSet (MedlineCitation*, DeleteCitation?)>
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedlinecitation_011101.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitation_011101.dtd
new file mode 100644
index 0000000..6b8a447
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitation_011101.dtd
@@ -0,0 +1,178 @@
+<!-- MedlineCitation DTD              
+
+     This is the DTD which NLM has written for External Use.      
+      
+        Comments and suggestions are welcome.
+        November 1, 2001 
+  
+-->
+<!-- ================================================================= -->
+<!--   Revision Notes Section  
+  
+ The following changes were made in the nlmmedlinecitation_011101.dtd:
+ 
+     a.  Added NOTNLM to Owner entity.
+
+     b.  Added Status Entity.
+    
+     c.  Added Status attribute for MedlineCitation.
+
+     d.  Added RegistryNumber to Chemical Element.
+
+     e.  Added PMID and MedlineID to CommentsCorrections. 
+   
+     f.  Added NRCBL value to SOURCE entity. 
+
+ The following changes were made in the nlmmedlinecitation_010319.dtd:
+
+     a.  Added the following values to the Owner entity: (NASA | PIP | KIE | HSR | HMD | SIS )
+     
+     b.  Removed AdditionalInformation element from MedlineCitation
+       (note, this element had been optional and was never used)
+ 
+     c.  Added the following elements to MedlineCitation:
+         OtherID
+         OtherAbstract 
+         KeywordList
+         SpaceFlightMission
+         InvestigatorList
+         GeneralNote 
+   
+     d.  Added Affiliation to Investigator
+
+     e.  Removed Procurement Source from DTD
+
+     f.  Removed AbstractAuthor from OtherAbstract
+
+     g.  Added Attribute List for OtherAbstract Type  
+   
+     h.  Added NLM default to MedlineCitationOwner attribute. 
+
+     i.  Added %PMID.Ref Entity
+
+     j.  Added Attribute for Keyword 
+
+     k.  Removed Element Owner and Element Type from DTD. 
+ 
+     l.  Added NLM default to KeywordList & GeneralNote Owner attribute. 
+
+     m.  Added attribute values for OtherID Source.
+
+     n.  Added ErratumFor to the CommentsCorrections element.
+   
+     o.  Changed Investigator element to reference personalname.
+
+     p.  Added SummaryForPatientsIn and OriginalReportIn to 
+         CommentCorrections
+
+     q.  Added %MedlineID.Ref Entity
+
+
+-->
+<!-- ================================================================= -->
+<!--   NLM Medline DTD   -->
+<!-- Typical usage:   
+
+  <!DOCTYPE MedlineCitationSet PUBLIC "-//NLM//DTD NLM//EN">
+
+-->
+<!-- ================================================================= -->
+<!-- Reference to Where the NLM Common DTD is located  -->
+<!-- "http://www.nlm.nih.gov/databases/dtd/nlmcommon_011101.dtd" -->
+<!ENTITY % NlmCommon PUBLIC "-//NLM//DTD Common, 
+1st November 2001//EN"  
+"http://www.nlm.nih.gov/databases/dtd/nlmcommon_011101.dtd" >
+%NlmCommon;
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- internal DTD entities -->
+<!ENTITY % Ref.template "(RefSource,
+                          (PMID | MedlineID)?,
+                          Note?)">
+<!ELEMENT RefSource (#PCDATA)>
+<!ELEMENT Note (#PCDATA)>
+<!ENTITY % Owner "(NLM | NASA | PIP | KIE | HSR | HMD | SIS | NOTNLM)">
+<!ENTITY % Type "(AAMC | AIDS | KIE | PIP | NASA | Consumer) #REQUIRED">
+<!ENTITY % Source "(NASA | KIE | PIP | POP | ARPL | CPC | IND | CPFH | CLML | IM | SGC | NCT | NRCBL) #REQUIRED">
+<!ENTITY % Status "(In-Process | Completed | Out-of-scope | PubMed-not-MEDLINE) #IMPLIED">
+<!-- ================================================================= -->
+<!-- This is the top level element for MedlineCitation -->
+<!ELEMENT MedlineCitation (%MedlineID.Ref;,
+                           %PMID.Ref;,
+                           %DateCreated.Ref;, 
+                           DateCompleted?, 
+                           DateRevised?,
+                           Article,
+                           MedlineJournalInfo, 
+                           ChemicalList?,
+                           CitationSubset*,
+                           CommentsCorrections?,
+                           GeneSymbolList?,
+                           MeshHeadingList?, 
+                           NumberOfReferences?,
+                           PersonalNameSubjectList?,
+                           OtherID*,
+                           OtherAbstract*,
+                           KeywordList*,
+                           SpaceFlightMission*,
+                           InvestigatorList?,
+                           GeneralNote*)>
+<!ATTLIST MedlineCitation Owner %Owner; "NLM" >
+<!ATTLIST MedlineCitation Status %Status; >
+<!-- End of MedlineCitation group -->
+<!-- ================================================================= -->
+<!--             Further Definition of NLM Tags         -->
+<!ELEMENT DateCreated (%normal.date;)>
+<!ELEMENT DateCompleted (%normal.date;)>
+<!ELEMENT DateRevised (%normal.date;)>
+<!ELEMENT MedlineID (#PCDATA)>
+<!ELEMENT OtherAbstract (%Abstract;)>
+<!ATTLIST OtherAbstract Type %Type; >
+<!ELEMENT KeywordList (Keyword+)>
+<!ATTLIST KeywordList Owner %Owner; "NLM" >
+<!ELEMENT Keyword (#PCDATA)>
+<!ATTLIST Keyword
+         MajorTopicYN (Y | N) "N" >
+<!ELEMENT SpaceFlightMission (#PCDATA)>
+<!ELEMENT OtherID (#PCDATA)>
+<!ATTLIST OtherID Source %Source; >
+<!ELEMENT InvestigatorList (Investigator+)>
+<!ELEMENT Investigator (%personal.name;,Affiliation?)>
+<!ELEMENT GeneralNote (#PCDATA)>
+<!ATTLIST GeneralNote Owner %Owner; "NLM">
+<!ELEMENT CitationSubset (#PCDATA)>
+<!ELEMENT ChemicalList (Chemical+)>
+<!ELEMENT Chemical ((RegistryNumber|CASRegistryNumber), NameOfSubstance)>
+<!ELEMENT RegistryNumber (#PCDATA)>
+<!ELEMENT CASRegistryNumber (#PCDATA)>
+<!ELEMENT NameOfSubstance (#PCDATA)>
+<!ELEMENT GeneSymbolList (GeneSymbol+)>
+<!ELEMENT GeneSymbol (#PCDATA)>
+<!ELEMENT PersonalNameSubjectList (PersonalNameSubject+)>
+<!ELEMENT PersonalNameSubject (%personal.name;)>
+<!ELEMENT NumberOfReferences (#PCDATA)>
+<!ELEMENT CommentsCorrections (CommentOn*,
+                               CommentIn*,
+                               ErratumIn*, 
+                               ErratumFor*, 
+                               RepublishedFrom*,
+                               RepublishedIn*,
+                               RetractionOf*,
+                               RetractionIn*,
+                               UpdateIn*,
+                               UpdateOf*,
+                               SummaryForPatientsIn*,
+                               OriginalReportIn*)>
+<!ELEMENT CommentOn (%Ref.template;)>
+<!ELEMENT CommentIn (%Ref.template;)>
+<!ELEMENT ErratumIn (%Ref.template;)>
+<!ELEMENT ErratumFor (%Ref.template;)>
+<!ELEMENT RepublishedFrom (%Ref.template;)>
+<!ELEMENT RepublishedIn (%Ref.template;)>
+<!ELEMENT RetractionOf (%Ref.template;)>
+<!ELEMENT RetractionIn (%Ref.template;)>
+<!ELEMENT UpdateIn (%Ref.template;)>
+<!ELEMENT UpdateOf (%Ref.template;)>
+<!ELEMENT SummaryForPatientsIn (%Ref.template;)>
+<!ELEMENT OriginalReportIn (%Ref.template;)>
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedlinecitation_080101.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitation_080101.dtd
new file mode 100644
index 0000000..670005a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitation_080101.dtd
@@ -0,0 +1,107 @@
+<!-- NLMMedlineCitation DTD   
+
+     Comments and suggestions are welcome.
+     January 1, 2008
+     
+          
+   NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+    represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+    XML elements were first created, MEDLINE records were the only data exported. 
+    Now NLM exports citations other than MEDLINE records using these tools. To 
+    minimize unnecessary disruption to users of the data and tools, NLM has
+    retained the original DTD and element names (e.g., NLMMedline DTD, MedlineTA,
+    MedlineJournalInfo)).       
+       
+  * = 0 or more occurrences (optional element, repeatable)
+  ? = 0 or 1 occurrences (optional element, at most 1)
+  + = 1 or more occurrences (required element, repeatable)
+  | = choice, one or the other but not both 
+  no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--   Revision Notes Section 
+
+The following changes were made in the nlmmedlinecitation_080101.dtd:
+
+     a. Changed entity reference from "nlmsharedcatcit_070101.dtd" 
+             to: "nlmsharedcatcit_080101.dtd" 
+     
+     b. Added entity EIdType with doi and pii values
+  
+   
+  
+     Historic Revision notes for previous versions of NLMMedlineCitation DTD
+     See:
+     http://www.nlm.nih.gov/databases/dtd/history_dtd_medlinecitation.html
+
+ -->
+<!-- ================================================================= -->
+<!-- external DTD entities -->
+<!ENTITY % EIdType "(doi | pii) #REQUIRED">
+<!-- ================================================================= -->
+<!-- internal DTD entities -->
+<!ENTITY % Owner "(NLM | NASA | PIP | KIE | HSR | HMD | SIS | NOTNLM)">
+<!ENTITY % Ref.template "(RefSource,
+                          PMID?, Note?)">
+<!ENTITY % PubModel "(Print | Print-Electronic | Electronic | Electronic-Print) #REQUIRED">
+<!ENTITY % Source "(NASA | KIE | PIP | POP | ARPL | CPC |
+                    IND | CPFH | CLML | IM | SGC | NRCBL | QCICL | QCIM) #REQUIRED">
+<!ENTITY % Status "(Completed | In-Process | PubMed-not-MEDLINE |
+                    In-Data-Review | Publisher | MEDLINE | OLDMEDLINE) #REQUIRED">
+<!ENTITY % Type "(AAMC | AIDS | KIE | PIP | NASA | Consumer) #REQUIRED">
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the NLM Common DTD via NLMSharedCatCit DTD is located  -->
+<!ENTITY % NlmSharedCatCit PUBLIC "-//NLM//DTD SharedCatCit, 1st January 2008//EN" 
+"nlmsharedcatcit_080101.dtd" >  
+%NlmSharedCatCit;
+<!-- =================================================================  -->
+<!-- This is the top level element for MedlineCitation -->
+<!ELEMENT MedlineCitation (%NlmDcmsID.Ref;, %PMID.Ref;, %DateCreated.Ref;, DateCompleted?,
+                           DateRevised?, Article, MedlineJournalInfo, ChemicalList?, 
+                           CitationSubset*, CommentsCorrections?, GeneSymbolList?, 
+                           MeshHeadingList?, NumberOfReferences?, PersonalNameSubjectList?,
+                           OtherID*, OtherAbstract*, KeywordList*, SpaceFlightMission*, 
+                           InvestigatorList?, GeneralNote*)>
+<!ATTLIST MedlineCitation
+	Owner %Owner; "NLM"
+	Status %Status;
+>
+<!ATTLIST Article
+          PubModel %PubModel;
+>
+<!-- End of MedlineCitation group -->
+<!-- ================================================================= -->
+<!--             Further Definition of NLM Tags         -->
+<!ELEMENT CitationSubset (#PCDATA)>
+<!ELEMENT CommentIn (%Ref.template;)>
+<!ELEMENT CommentOn (%Ref.template;)>
+<!ELEMENT CommentsCorrections (CommentOn*, CommentIn*, ErratumIn*, ErratumFor*,
+                               PartialRetractionIn*, PartialRetractionOf*,
+                               RepublishedFrom*, RepublishedIn*, RetractionOf*,
+                               RetractionIn*, UpdateIn*, UpdateOf*, 
+                               SummaryForPatientsIn*, OriginalReportIn*, ReprintOf*, ReprintIn*)>
+<!ELEMENT ErratumFor (%Ref.template;)>
+<!ELEMENT ErratumIn (%Ref.template;)>
+<!ATTLIST GeneralNote
+	Owner %Owner; "NLM"
+>
+<!ELEMENT GeneSymbol (#PCDATA)>
+<!ELEMENT GeneSymbolList (GeneSymbol+)>
+<!ELEMENT NlmDcmsID (#PCDATA)>
+<!ELEMENT Note (#PCDATA)>
+<!ELEMENT NumberOfReferences (#PCDATA)>
+<!ELEMENT OriginalReportIn (%Ref.template;)>
+<!ELEMENT PartialRetractionIn (%Ref.template;)>
+<!ELEMENT PartialRetractionOf (%Ref.template;)>
+<!ELEMENT RefSource (#PCDATA)>
+<!ELEMENT ReprintIn (%Ref.template;)>
+<!ELEMENT ReprintOf (%Ref.template;)>
+<!ELEMENT RepublishedFrom (%Ref.template;)>
+<!ELEMENT RepublishedIn (%Ref.template;)>
+<!ELEMENT RetractionIn (%Ref.template;)>
+<!ELEMENT RetractionOf (%Ref.template;)>
+<!ELEMENT SummaryForPatientsIn (%Ref.template;)>
+<!ELEMENT UpdateIn (%Ref.template;)>
+<!ELEMENT UpdateOf (%Ref.template;)>
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedlinecitation_090101.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitation_090101.dtd
new file mode 100644
index 0000000..1987031
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitation_090101.dtd
@@ -0,0 +1,112 @@
+<!-- NLMMedlineCitation DTD   
+     
+     Comments and suggestions are welcome.
+     January 1, 2009 v.2
+
+     **THIS IS THE FORTHCOMING DTD FOR 2009 NOT CURRENTLY IN USE.  
+     SEE http://www.nlm.nih.gov/databases/dtd/nlmmedlinecitation_080101.dtd FOR THE 
+     NLMMEDLINECITATION DTD DATED JANUARY 1, 2008 CURRENTLY IN USE.**  
+               
+   NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+    represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+    XML elements were first created, MEDLINE records were the only data exported. 
+    Now NLM exports citations other than MEDLINE records using these tools. To 
+    minimize unnecessary disruption to users of the data and tools, NLM has
+    retained the original DTD and element names (e.g., NLMMedline DTD, MedlineTA,
+    MedlineJournalInfo)).       
+       
+  * = 0 or more occurrences (optional element, repeatable)
+  ? = 0 or 1 occurrences (optional element, at most 1)
+  + = 1 or more occurrences (required element, repeatable)
+  | = choice, one or the other but not both 
+  no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--   Revision Notes Section 
+
+The following changes were made in the nlmmedlinecitation_090101.dtd:
+
+     a. Changed entity reference from "nlmsharedcatcit_080101.dtd" 
+             to: "nlmsharedcatcit_090101.dtd" 
+     
+     b. Moved entity Type to nlmcommon dtd
+
+     c. Added NLM value to entity Source
+ 
+     d. CHANGE WITHDRAWN FOR V.2: Delete entity NlmDcmsID.Ref
+   
+  
+     Historic Revision notes for previous versions of NLMMedlineCitation DTD
+     See:
+     http://www.nlm.nih.gov/databases/dtd/history_dtd_medlinecitation.html
+
+ -->
+<!-- ================================================================= -->
+<!-- external DTD entities -->
+<!ENTITY % EIdType "(doi | pii) #REQUIRED">
+<!-- ================================================================= -->
+<!-- internal DTD entities -->
+<!ENTITY % Owner "(NLM | NASA | PIP | KIE | HSR | HMD | SIS | NOTNLM)">
+<!ENTITY % Ref.template "(RefSource,
+                          PMID?, Note?)">
+<!ENTITY % PubModel "(Print | Print-Electronic | Electronic | Electronic-Print) #REQUIRED">
+<!ENTITY % Source "(NASA | KIE | PIP | POP | ARPL | CPC | IND | CPFH | 
+                    CLML | IM | SGC | NRCBL | QCICL | QCIM | NLM) #REQUIRED">
+<!ENTITY % Status "(Completed | In-Process | PubMed-not-MEDLINE |
+                    In-Data-Review | Publisher | MEDLINE | OLDMEDLINE) #REQUIRED">
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the NLM Common DTD via NLMSharedCatCit DTD is located  -->
+<!ENTITY % NlmSharedCatCit PUBLIC "-//NLM//DTD SharedCatCit, 1st January 2009//EN" 
+"nlmsharedcatcit_090101.dtd" >  
+%NlmSharedCatCit;
+<!-- =================================================================  -->
+<!-- This is the top level element for MedlineCitation -->
+<!ELEMENT MedlineCitation (%NlmDcmsID.Ref;, %PMID.Ref;, %DateCreated.Ref;, DateCompleted?,
+                           DateRevised?, Article, MedlineJournalInfo, ChemicalList?, 
+                           CitationSubset*, CommentsCorrections?, GeneSymbolList?, 
+                           MeshHeadingList?, NumberOfReferences?, PersonalNameSubjectList?,
+                           OtherID*, OtherAbstract*, KeywordList*, SpaceFlightMission*, 
+                           InvestigatorList?, GeneralNote*)>
+<!ATTLIST MedlineCitation
+	Owner %Owner; "NLM"
+	Status %Status;
+>
+<!ATTLIST Article
+          PubModel %PubModel;
+>
+<!-- End of MedlineCitation group -->
+<!-- ================================================================= -->
+<!--             Further Definition of NLM Tags         -->
+<!ELEMENT CitationSubset (#PCDATA)>
+<!ELEMENT CommentIn (%Ref.template;)>
+<!ELEMENT CommentOn (%Ref.template;)>
+<!ELEMENT CommentsCorrections (CommentOn*, CommentIn*, ErratumIn*, ErratumFor*,
+                               PartialRetractionIn*, PartialRetractionOf*,
+                               RepublishedFrom*, RepublishedIn*, RetractionOf*,
+                               RetractionIn*, UpdateIn*, UpdateOf*, 
+                               SummaryForPatientsIn*, OriginalReportIn*, ReprintOf*, ReprintIn*)>
+<!ELEMENT ErratumFor (%Ref.template;)>
+<!ELEMENT ErratumIn (%Ref.template;)>
+<!ATTLIST GeneralNote
+	Owner %Owner; "NLM"
+>
+<!ELEMENT GeneSymbol (#PCDATA)>
+<!ELEMENT GeneSymbolList (GeneSymbol+)>
+<!ELEMENT NlmDcmsID (#PCDATA)>
+<!ELEMENT Note (#PCDATA)>
+<!ELEMENT NumberOfReferences (#PCDATA)>
+<!ELEMENT OriginalReportIn (%Ref.template;)>
+<!ELEMENT PartialRetractionIn (%Ref.template;)>
+<!ELEMENT PartialRetractionOf (%Ref.template;)>
+<!ELEMENT RefSource (#PCDATA)>
+<!ELEMENT ReprintIn (%Ref.template;)>
+<!ELEMENT ReprintOf (%Ref.template;)>
+<!ELEMENT RepublishedFrom (%Ref.template;)>
+<!ELEMENT RepublishedIn (%Ref.template;)>
+<!ELEMENT RetractionIn (%Ref.template;)>
+<!ELEMENT RetractionOf (%Ref.template;)>
+<!ELEMENT SummaryForPatientsIn (%Ref.template;)>
+<!ELEMENT UpdateIn (%Ref.template;)>
+<!ELEMENT UpdateOf (%Ref.template;)>
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_100101.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_100101.dtd
new file mode 100644
index 0000000..16fc7fa
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_100101.dtd
@@ -0,0 +1,194 @@
+<!-- NLM MedlineCitationSet DTD
+
+     This is the DTD which NLM has written for Internal and External Use.
+     January 1, 2010
+
+     **THIS IS THE FORTHCOMING DTD TO BE USED FOR MEDLINE/PUBMED DATA IN
+     2010 AND IS NOT CURRENTLY IN USE.  
+     SEE http://www.nlm.nih.gov/databases/dtd/ FOR LINKS TO THE SUITE OF 
+     DTDS DATED JANUARY 1, 2009 CURRENTLY IN USE.**
+
+     NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+     represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+     XML elements were first created, MEDLINE records were the only data exported. 
+     Now NLM exports citations other than MEDLINE records using these tools. To 
+     minimize unnecessary disruption to users of the data and tools, NLM has
+     retained the original DTD and element names (e.g., NLMMedlineCitationSet, 
+     MedlineTA, MedlineJournalInfo)).  
+
+     NOTE:  StartPage and EndPage in Pagination element and NameID in Author
+     and Investigator elements are not currently used; are reserved for future use.  
+                
+
+        * = 0 or more occurrences (optional element, repeatable)
+        ? = 0 or 1 occurrences (optional element, at most 1)
+        + = 1 or more occurrences (required element, repeatable)
+        | = choice, one or the other but not both
+        no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--             Revision Notes Section
+
+  The following changes were made:
+  
+        a. The nlmmedline_090101.dtd and nlmmedlinecitation_090101.dtd have 
+           been used as the base.
+        b. The nlmcommon and sharedcatcat external entities have been merged into 
+           this single DTD. All extraneous nlmcommon and sharedcatcit objects not 
+           required by MedlineCitationSet have been removed.
+        c. No internal DTD entity references are used.
+           All elements and attributes are defined explicitly in this DTD.
+        d. All unused and unmapped tags have been removed.
+        e. Removed Book subtree from Article.	
+        f. Removed HSR, HMD and SIS from KeywordList Owner.
+        g. Removed SIS from MedlineCitation Owner.
+        h. Removed SIS and NOTNLM from GeneralNote Owner. 
+        i. Removed IM,QCICL,QCIM and SGC from Source.
+        j. Removed Hour, Minute, and Second.
+        k. Removed DatesAssociatedWithName,NameQualifier,OtherInformation and
+           TitleAssociatedWithName from Author and PersonalNameSubject.
+        l. Removed PublicationInfo and its related elements.
+        m. Removed NCBIArticle.
+        n. Removed NlmDcmsID from MedlineCitation.
+        o. Removed Prefix attribute from OtherID.
+        p. Removed BroadJournalHeadingList,BroadJournalHeading,Coden,
+           CollectiveTitle,Coverage,FirstName,Frequency and MiddleName.
+        q. Reorganized CommentsCorrections to one main element with 
+           RefType attribute. RefType valid values were previously
+           element names under CommentsCorrections. Added Cites as new
+           RefType valid value.  
+        r. Added NameID to Author and Investigator elements.  
+
+
+-->
+<!-- ====================================================================== -->
+<!-- ================================================================= -->
+<!ELEMENT	MedlineCitationSet (MedlineCitation*, DeleteCitation?)>
+<!ELEMENT	MedlineCitation (PMID, DateCreated, DateCompleted?, DateRevised?,
+                Article, MedlineJournalInfo, ChemicalList?, CitationSubset*, 
+                CommentsCorrectionsList?, GeneSymbolList?, MeshHeadingList?, 
+                NumberOfReferences?, PersonalNameSubjectList?, OtherID*, 
+                OtherAbstract*, KeywordList*, SpaceFlightMission*, 
+                InvestigatorList?, GeneralNote*)>
+<!ATTLIST	MedlineCitation 
+                Owner  (NLM | NASA | PIP | KIE | HSR | HMD | NOTNLM) "NLM"
+		Status (Completed | In-Process | PubMed-not-MEDLINE | 
+                        In-Data-Review | Publisher | MEDLINE | 
+                        OLDMEDLINE) #REQUIRED>
+<!ELEMENT	Abstract (AbstractText,CopyrightInformation?)>
+<!ELEMENT	AbstractText (#PCDATA)>
+<!ELEMENT	AccessionNumber (#PCDATA)>
+<!ELEMENT	AccessionNumberList (AccessionNumber+)>
+<!ELEMENT	Acronym (#PCDATA)>
+<!ELEMENT	Affiliation (#PCDATA)>
+<!ELEMENT	Agency (#PCDATA)>
+<!ELEMENT	Article (Journal,ArticleTitle,((Pagination, ELocationID*) | 
+                        ELocationID+),Abstract?, Affiliation?, AuthorList?, 
+                        Language+, DataBankList?, GrantList?,PublicationTypeList, 
+                        VernacularTitle?, ArticleDate*)>
+<!ATTLIST	Article 
+                PubModel (Print | Print-Electronic | Electronic | 
+                          Electronic-Print) #REQUIRED>
+<!ELEMENT	ArticleDate (Year,Month,Day)>
+<!ATTLIST	ArticleDate DateType CDATA  #FIXED "Electronic">
+<!ELEMENT	ArticleTitle (#PCDATA)>
+<!ELEMENT	Author ((LastName, ForeName?, Initials?, Suffix?, NameID*) | 
+                         (CollectiveName,NameID*))>
+<!ATTLIST	Author ValidYN (Y | N) "Y">
+<!ELEMENT	AuthorList (Author+)>
+<!ATTLIST	AuthorList CompleteYN (Y | N) "Y">
+<!ELEMENT	Chemical (RegistryNumber,NameOfSubstance)>
+<!ELEMENT	ChemicalList (Chemical+)>
+<!ELEMENT	CitationSubset (#PCDATA)>
+<!ELEMENT	CollectiveName (#PCDATA)>
+<!ELEMENT	CommentsCorrections (RefSource,PMID?,Note?)>
+<!ATTLIST   CommentsCorrections 
+                RefType (CommentOn | CommentIn | ErratumIn | ErratumFor | 
+                PartialRetractionIn | PartialRetractionOf | RepublishedFrom |
+                RepublishedIn | RetractionOf | RetractionIn | UpdateIn |
+                UpdateOf | SummaryForPatientsIn | OriginalReportIn |
+                ReprintOf | ReprintIn | Cites) #REQUIRED >
+<!ELEMENT	CommentsCorrectionsList (CommentsCorrections+)>
+<!ELEMENT	CopyrightInformation (#PCDATA)>
+<!ELEMENT	Country (#PCDATA)>
+<!ELEMENT	DataBank (DataBankName, AccessionNumberList?)>
+<!ELEMENT	DataBankList (DataBank+)>
+<!ATTLIST	DataBankList CompleteYN (Y | N) "Y">
+<!ELEMENT	DataBankName (#PCDATA)>
+<!ELEMENT	DateCompleted (Year,Month,Day)>
+<!ELEMENT	DateCreated (Year,Month,Day)>
+<!ELEMENT	DateRevised (Year,Month,Day)>
+<!ELEMENT	Day (#PCDATA)>
+<!ELEMENT	DescriptorName (#PCDATA)>
+<!ATTLIST	DescriptorName MajorTopicYN (Y | N) "N">
+<!ELEMENT	ELocationID (#PCDATA)>
+<!ATTLIST	ELocationID EIdType (doi | pii) #REQUIRED 
+            ValidYN  (Y | N) "Y">
+<!ELEMENT   EndPage (#PCDATA)>
+<!ELEMENT	ForeName (#PCDATA)>
+<!ELEMENT	GeneSymbol (#PCDATA)>
+<!ELEMENT	GeneSymbolList (GeneSymbol+)>
+<!ELEMENT	GeneralNote (#PCDATA)>
+<!ATTLIST	GeneralNote Owner (NLM | NASA | PIP | KIE | HSR | HMD) "NLM">
+<!ELEMENT	Grant (GrantID?, Acronym?, Agency, Country)>
+<!ELEMENT	GrantID (#PCDATA)>
+<!ELEMENT	GrantList (Grant+)>
+<!ATTLIST	GrantList CompleteYN (Y | N) "Y">
+<!ELEMENT	ISOAbbreviation (#PCDATA)>
+<!ELEMENT	ISSN (#PCDATA)>
+<!ATTLIST	ISSN IssnType  (Electronic | Print) #REQUIRED>
+<!ELEMENT   ISSNLinking (#PCDATA)>
+<!ELEMENT	Initials (#PCDATA)>
+<!ELEMENT	Investigator (LastName,ForeName?, Initials?,Suffix?,NameID*,Affiliation?)>
+<!ATTLIST	Investigator ValidYN (Y | N) "Y">
+<!ELEMENT	InvestigatorList (Investigator+)>
+<!ELEMENT	Issue (#PCDATA)>
+<!ELEMENT	Journal (ISSN?, JournalIssue, Title?, ISOAbbreviation?)>
+<!ELEMENT	JournalIssue (Volume?, Issue?, PubDate)>
+<!ATTLIST	JournalIssue CitedMedium (Internet | Print) #REQUIRED>
+<!ELEMENT	Keyword (#PCDATA)>
+<!ATTLIST	Keyword MajorTopicYN (Y | N) "N">
+<!ELEMENT	KeywordList (Keyword+)>
+<!ATTLIST	KeywordList Owner (NLM | NASA | PIP | KIE | NOTNLM) "NLM">
+<!ELEMENT	Language (#PCDATA)>
+<!ELEMENT	LastName (#PCDATA)>
+<!ELEMENT	MedlineDate (#PCDATA)>
+<!ELEMENT	MedlineJournalInfo (Country?, MedlineTA, NlmUniqueID?,ISSNLinking?)>
+<!ELEMENT   MedlinePgn (#PCDATA)>
+<!ELEMENT	MedlineTA (#PCDATA)>
+<!ELEMENT	MeshHeading (DescriptorName, QualifierName*)>
+<!ELEMENT	MeshHeadingList (MeshHeading+)>
+<!ELEMENT	Month (#PCDATA)>
+<!ELEMENT   NameID (#PCDATA)>
+<!ATTLIST   NameID 
+            Source (NCBI | Publisher | NISO | ISO) #REQUIRED >
+<!ELEMENT	NameOfSubstance (#PCDATA)>
+<!ELEMENT	NlmUniqueID (#PCDATA)>
+<!ELEMENT	Note (#PCDATA)>
+<!ELEMENT	NumberOfReferences (#PCDATA)>
+<!ELEMENT	OtherAbstract (AbstractText,CopyrightInformation?)>
+<!ATTLIST	OtherAbstract Type (AAMC | AIDS | KIE | PIP | 
+                                    NASA | Publisher) #REQUIRED>
+<!ELEMENT	OtherID (#PCDATA)>
+<!ATTLIST	OtherID Source (NASA | KIE | PIP | POP | ARPL | CPC | 
+                                IND | CPFH | CLML | NRCBL | NLM) #REQUIRED>
+<!ELEMENT	PMID (#PCDATA)>
+<!ELEMENT	Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn)>
+<!ELEMENT	PersonalNameSubject (LastName,ForeName?, Initials?,Suffix?)>
+<!ELEMENT	PersonalNameSubjectList (PersonalNameSubject+)>
+<!ELEMENT	PubDate ((Year, ((Month, Day?) | Season)?) | MedlineDate)>
+<!ELEMENT	PublicationType (#PCDATA)>
+<!ELEMENT	PublicationTypeList (PublicationType+)>
+<!ELEMENT	QualifierName (#PCDATA)>
+<!ATTLIST	QualifierName MajorTopicYN (Y | N) "N">
+<!ELEMENT	RefSource (#PCDATA)>
+<!ELEMENT	RegistryNumber (#PCDATA)>
+<!ELEMENT	Season (#PCDATA)>
+<!ELEMENT	SpaceFlightMission (#PCDATA)>
+<!ELEMENT   StartPage (#PCDATA)>
+<!ELEMENT	Suffix (#PCDATA)>
+<!ELEMENT	Title (#PCDATA)>
+<!ELEMENT	VernacularTitle (#PCDATA)>
+<!ELEMENT	Volume (#PCDATA)>
+<!ELEMENT	Year (#PCDATA)>
+<!ELEMENT	DeleteCitation (PMID+)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_100301.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_100301.dtd
new file mode 100644
index 0000000..e6b4c48
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_100301.dtd
@@ -0,0 +1,201 @@
+<!-- NLM MedlineCitationSet DTD
+
+     This is the DTD which NLM has written for Internal and External Use.
+     January 1, 2010
+
+     **THIS IS THE FORTHCOMING DTD TO BE USED FOR MEDLINE/PUBMED DATA IN
+     2010 AND IS NOT CURRENTLY IN USE.  
+     SEE http://www.nlm.nih.gov/databases/dtd/ FOR LINKS TO THE SUITE OF 
+     DTDS DATED JANUARY 1, 2009 CURRENTLY IN USE.**
+
+     NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+     represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+     XML elements were first created, MEDLINE records were the only data exported. 
+     Now NLM exports citations other than MEDLINE records using these tools. To 
+     minimize unnecessary disruption to users of the data and tools, NLM has
+     retained the original DTD and element names (e.g., NLMMedlineCitationSet, 
+     MedlineTA, MedlineJournalInfo)).  
+
+     NOTE:  StartPage and EndPage in Pagination element and NameID in Author
+     and Investigator elements are not currently used; are reserved for future use.  
+                
+
+        * = 0 or more occurrences (optional element, repeatable)
+        ? = 0 or 1 occurrences (optional element, at most 1)
+        + = 1 or more occurrences (required element, repeatable)
+        | = choice, one or the other but not both
+        no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--             Revision Notes Section
+
+  The following changes were made:
+  
+        a. The nlmmedline_090101.dtd and nlmmedlinecitation_090101.dtd have 
+           been used as the base.
+        b. The nlmcommon and sharedcatcat external entities have been merged into 
+           this single DTD. All extraneous nlmcommon and sharedcatcit objects not 
+           required by MedlineCitationSet have been removed.
+        c. No internal DTD entity references are used.
+           All elements and attributes are defined explicitly in this DTD.
+        d. All unused and unmapped tags have been removed.
+        e. Removed Book subtree from Article.	
+        f. Removed HSR, HMD and SIS from KeywordList Owner.
+        g. Removed SIS from MedlineCitation Owner.
+        h. Removed SIS and NOTNLM from GeneralNote Owner. 
+        i. Removed IM,QCICL,QCIM and SGC from Source.
+        j. Removed Hour, Minute, and Second.
+        k. Removed DatesAssociatedWithName,NameQualifier,OtherInformation and
+           TitleAssociatedWithName from Author and PersonalNameSubject.
+        l. Removed PublicationInfo and its related elements.
+        m. Removed NCBIArticle.
+        n. Removed NlmDcmsID from MedlineCitation.
+        o. Removed Prefix attribute from OtherID.
+        p. Removed BroadJournalHeadingList,BroadJournalHeading,Coden,
+           CollectiveTitle,Coverage,FirstName,Frequency and MiddleName.
+        q. Reorganized CommentsCorrections to one main element with 
+           RefType attribute. RefType valid values were previously
+           element names under CommentsCorrections. Added Cites as new
+           RefType valid value.  
+        r. Added NameID to Author and Investigator elements.  
+
+
+-->
+<!-- ====================================================================== -->
+<!ENTITY % format	" b | i | sup | sub | u " >
+<!ENTITY % text             "(#PCDATA | %format;)*" >
+<!ELEMENT b		%text; > <!-- bold -->
+<!ELEMENT i		%text; > <!-- italic -->
+<!ELEMENT sup	%text; > <!-- superscript -->
+<!ELEMENT sub	%text; > <!-- subscript -->
+<!ELEMENT u		%text; > <!-- underline -->
+<!-- ================================================================= -->
+<!ELEMENT	MedlineCitationSet (MedlineCitation*, DeleteCitation?)>
+<!ELEMENT	MedlineCitation (PMID, DateCreated, DateCompleted?, DateRevised?,
+                Article, MedlineJournalInfo, ChemicalList?, CitationSubset*, 
+                CommentsCorrectionsList?, GeneSymbolList?, MeshHeadingList?, 
+                NumberOfReferences?, PersonalNameSubjectList?, OtherID*, 
+                OtherAbstract*, KeywordList*, SpaceFlightMission*, 
+                InvestigatorList?, GeneralNote*)>
+<!ATTLIST	MedlineCitation 
+                Owner  (NLM | NASA | PIP | KIE | HSR | HMD | NOTNLM) "NLM"
+		Status (Completed | In-Process | PubMed-not-MEDLINE | 
+                        In-Data-Review | Publisher | MEDLINE | 
+                        OLDMEDLINE) #REQUIRED>
+<!ELEMENT	Abstract (AbstractText,CopyrightInformation?)>
+<!ELEMENT	AbstractText %text;>
+<!ELEMENT	AccessionNumber (#PCDATA)>
+<!ELEMENT	AccessionNumberList (AccessionNumber+)>
+<!ELEMENT	Acronym (#PCDATA)>
+<!ELEMENT	Affiliation %text;>
+<!ELEMENT	Agency (#PCDATA)>
+<!ELEMENT	Article (Journal,ArticleTitle,((Pagination, ELocationID*) | 
+                        ELocationID+),Abstract?, Affiliation?, AuthorList?, 
+                        Language+, DataBankList?, GrantList?,PublicationTypeList, 
+                        VernacularTitle?, ArticleDate*)>
+<!ATTLIST	Article 
+                PubModel (Print | Print-Electronic | Electronic | 
+                          Electronic-Print) #REQUIRED>
+<!ELEMENT	ArticleDate (Year,Month,Day)>
+<!ATTLIST	ArticleDate DateType CDATA  #FIXED "Electronic">
+<!ELEMENT	ArticleTitle %text;>
+<!ELEMENT	Author (((LastName, ForeName?, Initials?, Suffix?, NameID*) | 
+                         (CollectiveName,NameID*)), Affiliation?)>
+<!ATTLIST	Author ValidYN (Y | N) "Y">
+<!ELEMENT	AuthorList (Author+)>
+<!ATTLIST	AuthorList CompleteYN (Y | N) "Y">
+<!ELEMENT	Chemical (RegistryNumber,NameOfSubstance)>
+<!ELEMENT	ChemicalList (Chemical+)>
+<!ELEMENT	CitationSubset (#PCDATA)>
+<!ELEMENT	CollectiveName %text;>
+<!ELEMENT	CommentsCorrections (RefSource,PMID?,Note?)>
+<!ATTLIST   CommentsCorrections 
+                RefType (CommentOn | CommentIn | ErratumIn | ErratumFor | 
+                PartialRetractionIn | PartialRetractionOf | RepublishedFrom |
+                RepublishedIn | RetractionOf | RetractionIn | UpdateIn |
+                UpdateOf | SummaryForPatientsIn | OriginalReportIn |
+                ReprintOf | ReprintIn | Cites) #REQUIRED >
+<!ELEMENT	CommentsCorrectionsList (CommentsCorrections+)>
+<!ELEMENT	CopyrightInformation (#PCDATA)>
+<!ELEMENT	Country (#PCDATA)>
+<!ELEMENT	DataBank (DataBankName, AccessionNumberList?)>
+<!ELEMENT	DataBankList (DataBank+)>
+<!ATTLIST	DataBankList CompleteYN (Y | N) "Y">
+<!ELEMENT	DataBankName (#PCDATA)>
+<!ELEMENT	DateCompleted (Year,Month,Day)>
+<!ELEMENT	DateCreated (Year,Month,Day)>
+<!ELEMENT	DateRevised (Year,Month,Day)>
+<!ELEMENT	Day (#PCDATA)>
+<!ELEMENT	DescriptorName (#PCDATA)>
+<!ATTLIST	DescriptorName MajorTopicYN (Y | N) "N">
+<!ELEMENT	ELocationID (#PCDATA)>
+<!ATTLIST	ELocationID EIdType (doi | pii) #REQUIRED 
+            ValidYN  (Y | N) "Y">
+<!ELEMENT   EndPage (#PCDATA)>
+<!ELEMENT	ForeName (#PCDATA)>
+<!ELEMENT	GeneSymbol (#PCDATA)>
+<!ELEMENT	GeneSymbolList (GeneSymbol+)>
+<!ELEMENT	GeneralNote (#PCDATA)>
+<!ATTLIST	GeneralNote Owner (NLM | NASA | PIP | KIE | HSR | HMD) "NLM">
+<!ELEMENT	Grant (GrantID?, Acronym?, Agency, Country)>
+<!ELEMENT	GrantID (#PCDATA)>
+<!ELEMENT	GrantList (Grant+)>
+<!ATTLIST	GrantList CompleteYN (Y | N) "Y">
+<!ELEMENT	ISOAbbreviation (#PCDATA)>
+<!ELEMENT	ISSN (#PCDATA)>
+<!ATTLIST	ISSN IssnType  (Electronic | Print) #REQUIRED>
+<!ELEMENT   ISSNLinking (#PCDATA)>
+<!ELEMENT	Initials (#PCDATA)>
+<!ELEMENT	Investigator (LastName,ForeName?, Initials?,Suffix?,NameID*,Affiliation?)>
+<!ATTLIST	Investigator ValidYN (Y | N) "Y">
+<!ELEMENT	InvestigatorList (Investigator+)>
+<!ELEMENT	Issue (#PCDATA)>
+<!ELEMENT	Journal (ISSN?, JournalIssue, Title?, ISOAbbreviation?)>
+<!ELEMENT	JournalIssue (Volume?, Issue?, PubDate)>
+<!ATTLIST	JournalIssue CitedMedium (Internet | Print) #REQUIRED>
+<!ELEMENT	Keyword %text;>
+<!ATTLIST	Keyword MajorTopicYN (Y | N) "N">
+<!ELEMENT	KeywordList (Keyword+)>
+<!ATTLIST	KeywordList Owner (NLM | NASA | PIP | KIE | NOTNLM) "NLM">
+<!ELEMENT	Language (#PCDATA)>
+<!ELEMENT	LastName (#PCDATA)>
+<!ELEMENT	MedlineDate (#PCDATA)>
+<!ELEMENT	MedlineJournalInfo (Country?, MedlineTA, NlmUniqueID?,ISSNLinking?)>
+<!ELEMENT   MedlinePgn (#PCDATA)>
+<!ELEMENT	MedlineTA (#PCDATA)>
+<!ELEMENT	MeshHeading (DescriptorName, QualifierName*)>
+<!ELEMENT	MeshHeadingList (MeshHeading+)>
+<!ELEMENT	Month (#PCDATA)>
+<!ELEMENT   NameID (#PCDATA)>
+<!ATTLIST   NameID 
+            Source (NCBI | Publisher | NISO | ISO) #REQUIRED >
+<!ELEMENT	NameOfSubstance (#PCDATA)>
+<!ELEMENT	NlmUniqueID (#PCDATA)>
+<!ELEMENT	Note (#PCDATA)>
+<!ELEMENT	NumberOfReferences (#PCDATA)>
+<!ELEMENT	OtherAbstract (AbstractText,CopyrightInformation?)>
+<!ATTLIST	OtherAbstract Type (AAMC | AIDS | KIE | PIP | 
+                                    NASA | Publisher) #REQUIRED>
+<!ELEMENT	OtherID (#PCDATA)>
+<!ATTLIST	OtherID Source (NASA | KIE | PIP | POP | ARPL | CPC | 
+                                IND | CPFH | CLML | NRCBL | NLM) #REQUIRED>
+<!ELEMENT	PMID (#PCDATA)>
+<!ELEMENT	Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn)>
+<!ELEMENT	PersonalNameSubject (LastName,ForeName?, Initials?,Suffix?)>
+<!ELEMENT	PersonalNameSubjectList (PersonalNameSubject+)>
+<!ELEMENT	PubDate ((Year, ((Month, Day?) | Season)?) | MedlineDate)>
+<!ELEMENT	PublicationType (#PCDATA)>
+<!ELEMENT	PublicationTypeList (PublicationType+)>
+<!ELEMENT	QualifierName (#PCDATA)>
+<!ATTLIST	QualifierName MajorTopicYN (Y | N) "N">
+<!ELEMENT	RefSource (#PCDATA)>
+<!ELEMENT	RegistryNumber (#PCDATA)>
+<!ELEMENT	Season (#PCDATA)>
+<!ELEMENT	SpaceFlightMission (#PCDATA)>
+<!ELEMENT   StartPage (#PCDATA)>
+<!ELEMENT	Suffix %text;>
+<!ELEMENT	Title (#PCDATA)>
+<!ELEMENT	VernacularTitle %text;>
+<!ELEMENT	Volume (#PCDATA)>
+<!ELEMENT	Year (#PCDATA)>
+<!ELEMENT	DeleteCitation (PMID+)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_110101.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_110101.dtd
new file mode 100644
index 0000000..c520c6d
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_110101.dtd
@@ -0,0 +1,197 @@
+<!-- NLM MedlineCitationSet DTD
+
+     This is the DTD which NLM has written for Internal and External Use.
+     January 1, 2011
+  
+  **THIS IS THE FORTHCOMING DTD FOR 2011 NOT CURRENTLY IN USE.  
+    SEE http://www.nlm.nih.gov/databases/dtd/nlmmedlinecitationset_100101.dtd
+    FOR THE NLMMedlineCitationSet DTD DATED JANUARY 1, 2010 CURRENTLY IN USE.**  
+     
+     
+     NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+     represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+     XML elements were first created, MEDLINE records were the only data exported. 
+     Now NLM exports citations other than MEDLINE records using these tools. To 
+     minimize unnecessary disruption to users of the data and tools, NLM has
+     retained the original DTD and element names (e.g., NLMMedlineCitationSet, 
+     MedlineTA, MedlineJournalInfo)).  
+
+     NOTE:  StartPage and EndPage in Pagination element and NameID in Author
+     and Investigator elements are not currently used; are reserved for future use.  
+                
+
+        * = 0 or more occurrences (optional element, repeatable)
+        ? = 0 or 1 occurrences (optional element, at most 1)
+        + = 1 or more occurrences (required element, repeatable)
+        | = choice, one or the other but not both
+        no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--             Revision Notes Section
+
+  The following changes were made:
+  
+        a. Changed nlmmedlinecitationset_100101.dtd to
+           nlmmedlinecitationset_110101.dtd. 
+        b. Added Label and NlmCategory attributes to AbstractText. 
+        c. Added element SupplMeshList to MedlineCitation.
+        d. Added element SupplMeshName with Type attribute to SupplMeshList.
+        e. Added Type attribute to DescriptorName.
+        f. Added VersionID and VersionDate attributes to MedlineCitation.
+        g. Added Version attribute to PMID.
+        h. Repositioned NameID element in Author.
+        i. Changed NameID Source attribute to unspecified values.
+        j. Added new Owner attribute valid value NLM-AUTO.
+
+See http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmmedline.html for historic Revision Notes for previous versions of NLMMedlineCitationSet DTD.
+-->
+<!-- ====================================================================== -->
+<!ENTITY % format	" b | i | sup | sub | u " >
+<!ENTITY % text             "(#PCDATA | %format;)*" >
+<!ELEMENT b		%text; > <!-- bold -->
+<!ELEMENT i		%text; > <!-- italic -->
+<!ELEMENT sup	%text; > <!-- superscript -->
+<!ELEMENT sub	%text; > <!-- subscript -->
+<!ELEMENT u		%text; > <!-- underline -->
+<!-- ================================================================= -->
+<!ELEMENT	MedlineCitationSet (MedlineCitation*, DeleteCitation?)>
+<!ELEMENT	MedlineCitation (PMID, DateCreated, DateCompleted?, DateRevised?,
+                Article, MedlineJournalInfo, ChemicalList?,SupplMeshList?,
+                CitationSubset*, CommentsCorrectionsList?, GeneSymbolList?,
+                MeshHeadingList?,NumberOfReferences?, PersonalNameSubjectList?,
+                OtherID*, OtherAbstract*, KeywordList*, SpaceFlightMission*, 
+                InvestigatorList?, GeneralNote*)>
+<!ATTLIST	MedlineCitation 
+                Owner  (NLM | NASA | PIP | KIE | HSR | HMD | NOTNLM) "NLM"
+		Status (Completed | In-Process | PubMed-not-MEDLINE | 
+                        In-Data-Review | Publisher | MEDLINE | 
+                        OLDMEDLINE) #REQUIRED
+                VersionID CDATA #IMPLIED
+                VersionDate CDATA #IMPLIED>
+<!ELEMENT	Abstract (AbstractText+,CopyrightInformation?)>
+<!ELEMENT	AbstractText %text;>
+<!ATTLIST       AbstractText
+                Label CDATA #IMPLIED
+                NlmCategory (UNLABELLED | BACKGROUND | OBJECTIVE | METHODS |
+                             RESULTS | CONCLUSIONS) #IMPLIED>
+<!ELEMENT	AccessionNumber (#PCDATA)>
+<!ELEMENT	AccessionNumberList (AccessionNumber+)>
+<!ELEMENT	Acronym (#PCDATA)>
+<!ELEMENT	Affiliation %text;>
+<!ELEMENT	Agency (#PCDATA)>
+<!ELEMENT	Article (Journal,ArticleTitle,((Pagination, ELocationID*) | 
+                        ELocationID+),Abstract?, Affiliation?, AuthorList?, 
+                        Language+, DataBankList?, GrantList?,PublicationTypeList, 
+                        VernacularTitle?, ArticleDate*)>
+<!ATTLIST	Article 
+                PubModel (Print | Print-Electronic | Electronic | 
+                          Electronic-Print) #REQUIRED>
+<!ELEMENT	ArticleDate (Year,Month,Day)>
+<!ATTLIST	ArticleDate DateType CDATA  #FIXED "Electronic">
+<!ELEMENT	ArticleTitle %text;>
+<!ELEMENT	Author (((LastName, ForeName?, Initials?, Suffix?) | 
+                         CollectiveName),NameID*)>
+<!ATTLIST	Author ValidYN (Y | N) "Y">
+<!ELEMENT	AuthorList (Author+)>
+<!ATTLIST	AuthorList CompleteYN (Y | N) "Y">
+<!ELEMENT	Chemical (RegistryNumber,NameOfSubstance)>
+<!ELEMENT	ChemicalList (Chemical+)>
+<!ELEMENT	CitationSubset (#PCDATA)>
+<!ELEMENT	CollectiveName %text;>
+<!ELEMENT	CommentsCorrections (RefSource,PMID?,Note?)>
+<!ATTLIST   CommentsCorrections 
+                RefType (CommentOn | CommentIn | ErratumIn | ErratumFor | 
+                PartialRetractionIn | PartialRetractionOf | RepublishedFrom |
+                RepublishedIn | RetractionOf | RetractionIn | UpdateIn |
+                UpdateOf | SummaryForPatientsIn | OriginalReportIn |
+                ReprintOf | ReprintIn | Cites) #REQUIRED >
+<!ELEMENT	CommentsCorrectionsList (CommentsCorrections+)>
+<!ELEMENT	CopyrightInformation (#PCDATA)>
+<!ELEMENT	Country (#PCDATA)>
+<!ELEMENT	DataBank (DataBankName, AccessionNumberList?)>
+<!ELEMENT	DataBankList (DataBank+)>
+<!ATTLIST	DataBankList CompleteYN (Y | N) "Y">
+<!ELEMENT	DataBankName (#PCDATA)>
+<!ELEMENT	DateCompleted (Year,Month,Day)>
+<!ELEMENT	DateCreated (Year,Month,Day)>
+<!ELEMENT	DateRevised (Year,Month,Day)>
+<!ELEMENT	Day (#PCDATA)>
+<!ELEMENT	DescriptorName (#PCDATA)>
+<!ATTLIST	DescriptorName 
+                MajorTopicYN (Y | N) "N"
+                Type (Geographic) #IMPLIED>
+<!ELEMENT	ELocationID (#PCDATA)>
+<!ATTLIST	ELocationID EIdType (doi | pii) #REQUIRED 
+            ValidYN  (Y | N) "Y">
+<!ELEMENT   EndPage (#PCDATA)>
+<!ELEMENT	ForeName (#PCDATA)>
+<!ELEMENT	GeneSymbol (#PCDATA)>
+<!ELEMENT	GeneSymbolList (GeneSymbol+)>
+<!ELEMENT	GeneralNote (#PCDATA)>
+<!ATTLIST	GeneralNote Owner (NLM | NASA | PIP | KIE | HSR | HMD) "NLM">
+<!ELEMENT	Grant (GrantID?, Acronym?, Agency, Country)>
+<!ELEMENT	GrantID (#PCDATA)>
+<!ELEMENT	GrantList (Grant+)>
+<!ATTLIST	GrantList CompleteYN (Y | N) "Y">
+<!ELEMENT	ISOAbbreviation (#PCDATA)>
+<!ELEMENT	ISSN (#PCDATA)>
+<!ATTLIST	ISSN IssnType  (Electronic | Print) #REQUIRED>
+<!ELEMENT   ISSNLinking (#PCDATA)>
+<!ELEMENT	Initials (#PCDATA)>
+<!ELEMENT	Investigator (LastName,ForeName?, Initials?,Suffix?,NameID*,Affiliation?)>
+<!ATTLIST	Investigator ValidYN (Y | N) "Y">
+<!ELEMENT	InvestigatorList (Investigator+)>
+<!ELEMENT	Issue (#PCDATA)>
+<!ELEMENT	Journal (ISSN?, JournalIssue, Title?, ISOAbbreviation?)>
+<!ELEMENT	JournalIssue (Volume?, Issue?, PubDate)>
+<!ATTLIST	JournalIssue CitedMedium (Internet | Print) #REQUIRED>
+<!ELEMENT	Keyword %text;>
+<!ATTLIST	Keyword MajorTopicYN (Y | N) "N">
+<!ELEMENT	KeywordList (Keyword+)>
+<!ATTLIST	KeywordList Owner (NLM | NLM-AUTO | NASA | PIP | KIE | NOTNLM) "NLM">
+<!ELEMENT	Language (#PCDATA)>
+<!ELEMENT	LastName (#PCDATA)>
+<!ELEMENT	MedlineDate (#PCDATA)>
+<!ELEMENT	MedlineJournalInfo (Country?, MedlineTA, NlmUniqueID?,ISSNLinking?)>
+<!ELEMENT   MedlinePgn (#PCDATA)>
+<!ELEMENT	MedlineTA (#PCDATA)>
+<!ELEMENT	MeshHeading (DescriptorName, QualifierName*)>
+<!ELEMENT	MeshHeadingList (MeshHeading+)>
+<!ELEMENT	Month (#PCDATA)>
+<!ELEMENT   NameID (#PCDATA)>
+<!ATTLIST   NameID 
+            Source CDATA #REQUIRED >
+<!ELEMENT	NameOfSubstance (#PCDATA)>
+<!ELEMENT	NlmUniqueID (#PCDATA)>
+<!ELEMENT	Note (#PCDATA)>
+<!ELEMENT	NumberOfReferences (#PCDATA)>
+<!ELEMENT	OtherAbstract (AbstractText+,CopyrightInformation?)>
+<!ATTLIST	OtherAbstract Type (AAMC | AIDS | KIE | PIP | 
+                                    NASA | Publisher) #REQUIRED>
+<!ELEMENT	OtherID (#PCDATA)>
+<!ATTLIST	OtherID Source (NASA | KIE | PIP | POP | ARPL | CPC | 
+                                IND | CPFH | CLML | NRCBL | NLM) #REQUIRED>
+<!ELEMENT	PMID (#PCDATA)>
+<!ATTLIST       PMID Version CDATA #REQUIRED>
+<!ELEMENT	Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn)>
+<!ELEMENT	PersonalNameSubject (LastName,ForeName?, Initials?,Suffix?)>
+<!ELEMENT	PersonalNameSubjectList (PersonalNameSubject+)>
+<!ELEMENT	PubDate ((Year, ((Month, Day?) | Season)?) | MedlineDate)>
+<!ELEMENT	PublicationType (#PCDATA)>
+<!ELEMENT	PublicationTypeList (PublicationType+)>
+<!ELEMENT	QualifierName (#PCDATA)>
+<!ATTLIST	QualifierName MajorTopicYN (Y | N) "N">
+<!ELEMENT	RefSource (#PCDATA)>
+<!ELEMENT	RegistryNumber (#PCDATA)>
+<!ELEMENT	Season (#PCDATA)>
+<!ELEMENT	SpaceFlightMission (#PCDATA)>
+<!ELEMENT       StartPage (#PCDATA)>
+<!ELEMENT	Suffix %text;>
+<!ELEMENT       SupplMeshList (SupplMeshName+)>
+<!ELEMENT       SupplMeshName (#PCDATA)>
+<!ATTLIST       SupplMeshName Type (Disease | Protocol) #REQUIRED>
+<!ELEMENT	Title (#PCDATA)>
+<!ELEMENT	VernacularTitle %text;>
+<!ELEMENT	Volume (#PCDATA)>
+<!ELEMENT	Year (#PCDATA)>
+<!ELEMENT	DeleteCitation (PMID+)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_120101.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_120101.dtd
new file mode 100644
index 0000000..6489a8c
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_120101.dtd
@@ -0,0 +1,188 @@
+<!-- NLM MedlineCitationSet DTD
+
+     This is the DTD which NLM has written for Internal and External Use.
+     January 1, 2012
+  
+    **THIS IS THE FORTHCOMING DTD FOR 2012 NOT CURRENTLY IN USE.  
+    SEE http://www.nlm.nih.gov/databases/dtd/nlmmedlinecitationset_110101.dtd 
+    FOR THE NLMMedlineCitationSet DTD DATED JANUARY 1, 2011 CURRENTLY IN USE.**  
+     
+     NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+     represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+     XML elements were first created, MEDLINE records were the only data exported. 
+     Now NLM exports citations other than MEDLINE records using these tools. To 
+     minimize unnecessary disruption to users of the data and tools, NLM has
+     retained the original DTD and element names (e.g., NLMMedlineCitationSet, 
+     MedlineTA, MedlineJournalInfo)).  
+
+     NOTE:  StartPage and EndPage in Pagination element and NameID in Author
+     and Investigator elements are not currently used; are reserved for future use.  
+                
+
+        * = 0 or more occurrences (optional element, repeatable)
+        ? = 0 or 1 occurrences (optional element, at most 1)
+        + = 1 or more occurrences (required element, repeatable)
+        | = choice, one or the other but not both
+        no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--             Revision Notes Section
+
+  The following changes were made:
+  
+        a. Changed nlmmedlinecitationset_110101.dtd to
+           nlmmedlinecitationset_120101.dtd. 
+        b. Added new KeywordList Owner attribute valid value HHS.
+
+See http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmmedline.html for historic Revision Notes for previous versions of NLMMedlineCitationSet DTD.
+-->
+<!-- ====================================================================== -->
+<!ENTITY % format	" b | i | sup | sub | u " >
+<!ENTITY % text             "(#PCDATA | %format;)*" >
+<!ELEMENT b		%text; > <!-- bold -->
+<!ELEMENT i		%text; > <!-- italic -->
+<!ELEMENT sup	%text; > <!-- superscript -->
+<!ELEMENT sub	%text; > <!-- subscript -->
+<!ELEMENT u		%text; > <!-- underline -->
+<!-- ================================================================= -->
+<!ELEMENT	MedlineCitationSet (MedlineCitation*, DeleteCitation?)>
+<!ELEMENT	MedlineCitation (PMID, DateCreated, DateCompleted?, DateRevised?,
+                Article, MedlineJournalInfo, ChemicalList?,SupplMeshList?,
+                CitationSubset*, CommentsCorrectionsList?, GeneSymbolList?,
+                MeshHeadingList?,NumberOfReferences?, PersonalNameSubjectList?,
+                OtherID*, OtherAbstract*, KeywordList*, SpaceFlightMission*, 
+                InvestigatorList?, GeneralNote*)>
+<!ATTLIST	MedlineCitation 
+                Owner  (NLM | NASA | PIP | KIE | HSR | HMD | NOTNLM) "NLM"
+		Status (Completed | In-Process | PubMed-not-MEDLINE | 
+                        In-Data-Review | Publisher | MEDLINE | 
+                        OLDMEDLINE) #REQUIRED
+                VersionID CDATA #IMPLIED
+                VersionDate CDATA #IMPLIED>
+<!ELEMENT	Abstract (AbstractText+,CopyrightInformation?)>
+<!ELEMENT	AbstractText %text;>
+<!ATTLIST       AbstractText
+                Label CDATA #IMPLIED
+                NlmCategory (UNLABELLED | BACKGROUND | OBJECTIVE | METHODS |
+                             RESULTS | CONCLUSIONS) #IMPLIED>
+<!ELEMENT	AccessionNumber (#PCDATA)>
+<!ELEMENT	AccessionNumberList (AccessionNumber+)>
+<!ELEMENT	Acronym (#PCDATA)>
+<!ELEMENT	Affiliation %text;>
+<!ELEMENT	Agency (#PCDATA)>
+<!ELEMENT	Article (Journal,ArticleTitle,((Pagination, ELocationID*) | 
+                        ELocationID+),Abstract?, Affiliation?, AuthorList?, 
+                        Language+, DataBankList?, GrantList?,PublicationTypeList, 
+                        VernacularTitle?, ArticleDate*)>
+<!ATTLIST	Article 
+                PubModel (Print | Print-Electronic | Electronic | 
+                          Electronic-Print) #REQUIRED>
+<!ELEMENT	ArticleDate (Year,Month,Day)>
+<!ATTLIST	ArticleDate DateType CDATA  #FIXED "Electronic">
+<!ELEMENT	ArticleTitle %text;>
+<!ELEMENT	Author (((LastName, ForeName?, Initials?, Suffix?) | 
+                         CollectiveName),NameID*)>
+<!ATTLIST	Author ValidYN (Y | N) "Y">
+<!ELEMENT	AuthorList (Author+)>
+<!ATTLIST	AuthorList CompleteYN (Y | N) "Y">
+<!ELEMENT	Chemical (RegistryNumber,NameOfSubstance)>
+<!ELEMENT	ChemicalList (Chemical+)>
+<!ELEMENT	CitationSubset (#PCDATA)>
+<!ELEMENT	CollectiveName %text;>
+<!ELEMENT	CommentsCorrections (RefSource,PMID?,Note?)>
+<!ATTLIST   CommentsCorrections 
+                RefType (CommentOn | CommentIn | ErratumIn | ErratumFor | 
+                PartialRetractionIn | PartialRetractionOf | RepublishedFrom |
+                RepublishedIn | RetractionOf | RetractionIn | UpdateIn |
+                UpdateOf | SummaryForPatientsIn | OriginalReportIn |
+                ReprintOf | ReprintIn | Cites) #REQUIRED >
+<!ELEMENT	CommentsCorrectionsList (CommentsCorrections+)>
+<!ELEMENT	CopyrightInformation (#PCDATA)>
+<!ELEMENT	Country (#PCDATA)>
+<!ELEMENT	DataBank (DataBankName, AccessionNumberList?)>
+<!ELEMENT	DataBankList (DataBank+)>
+<!ATTLIST	DataBankList CompleteYN (Y | N) "Y">
+<!ELEMENT	DataBankName (#PCDATA)>
+<!ELEMENT	DateCompleted (Year,Month,Day)>
+<!ELEMENT	DateCreated (Year,Month,Day)>
+<!ELEMENT	DateRevised (Year,Month,Day)>
+<!ELEMENT	Day (#PCDATA)>
+<!ELEMENT	DescriptorName (#PCDATA)>
+<!ATTLIST	DescriptorName 
+                MajorTopicYN (Y | N) "N"
+                Type (Geographic) #IMPLIED>
+<!ELEMENT	ELocationID (#PCDATA)>
+<!ATTLIST	ELocationID EIdType (doi | pii) #REQUIRED 
+            ValidYN  (Y | N) "Y">
+<!ELEMENT   EndPage (#PCDATA)>
+<!ELEMENT	ForeName (#PCDATA)>
+<!ELEMENT	GeneSymbol (#PCDATA)>
+<!ELEMENT	GeneSymbolList (GeneSymbol+)>
+<!ELEMENT	GeneralNote (#PCDATA)>
+<!ATTLIST	GeneralNote Owner (NLM | NASA | PIP | KIE | HSR | HMD) "NLM">
+<!ELEMENT	Grant (GrantID?, Acronym?, Agency, Country)>
+<!ELEMENT	GrantID (#PCDATA)>
+<!ELEMENT	GrantList (Grant+)>
+<!ATTLIST	GrantList CompleteYN (Y | N) "Y">
+<!ELEMENT	ISOAbbreviation (#PCDATA)>
+<!ELEMENT	ISSN (#PCDATA)>
+<!ATTLIST	ISSN IssnType  (Electronic | Print) #REQUIRED>
+<!ELEMENT   ISSNLinking (#PCDATA)>
+<!ELEMENT	Initials (#PCDATA)>
+<!ELEMENT	Investigator (LastName,ForeName?, Initials?,Suffix?,NameID*,Affiliation?)>
+<!ATTLIST	Investigator ValidYN (Y | N) "Y">
+<!ELEMENT	InvestigatorList (Investigator+)>
+<!ELEMENT	Issue (#PCDATA)>
+<!ELEMENT	Journal (ISSN?, JournalIssue, Title?, ISOAbbreviation?)>
+<!ELEMENT	JournalIssue (Volume?, Issue?, PubDate)>
+<!ATTLIST	JournalIssue CitedMedium (Internet | Print) #REQUIRED>
+<!ELEMENT	Keyword %text;>
+<!ATTLIST	Keyword MajorTopicYN (Y | N) "N">
+<!ELEMENT	KeywordList (Keyword+)>
+<!ATTLIST	KeywordList Owner (NLM | NLM-AUTO | NASA | PIP | KIE | NOTNLM | HHS) "NLM">
+<!ELEMENT	Language (#PCDATA)>
+<!ELEMENT	LastName (#PCDATA)>
+<!ELEMENT	MedlineDate (#PCDATA)>
+<!ELEMENT	MedlineJournalInfo (Country?, MedlineTA, NlmUniqueID?,ISSNLinking?)>
+<!ELEMENT   MedlinePgn (#PCDATA)>
+<!ELEMENT	MedlineTA (#PCDATA)>
+<!ELEMENT	MeshHeading (DescriptorName, QualifierName*)>
+<!ELEMENT	MeshHeadingList (MeshHeading+)>
+<!ELEMENT	Month (#PCDATA)>
+<!ELEMENT   NameID (#PCDATA)>
+<!ATTLIST   NameID 
+            Source CDATA #REQUIRED >
+<!ELEMENT	NameOfSubstance (#PCDATA)>
+<!ELEMENT	NlmUniqueID (#PCDATA)>
+<!ELEMENT	Note (#PCDATA)>
+<!ELEMENT	NumberOfReferences (#PCDATA)>
+<!ELEMENT	OtherAbstract (AbstractText+,CopyrightInformation?)>
+<!ATTLIST	OtherAbstract Type (AAMC | AIDS | KIE | PIP | 
+                                    NASA | Publisher) #REQUIRED>
+<!ELEMENT	OtherID (#PCDATA)>
+<!ATTLIST	OtherID Source (NASA | KIE | PIP | POP | ARPL | CPC | 
+                                IND | CPFH | CLML | NRCBL | NLM) #REQUIRED>
+<!ELEMENT	PMID (#PCDATA)>
+<!ATTLIST       PMID Version CDATA #REQUIRED>
+<!ELEMENT	Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn)>
+<!ELEMENT	PersonalNameSubject (LastName,ForeName?, Initials?,Suffix?)>
+<!ELEMENT	PersonalNameSubjectList (PersonalNameSubject+)>
+<!ELEMENT	PubDate ((Year, ((Month, Day?) | Season)?) | MedlineDate)>
+<!ELEMENT	PublicationType (#PCDATA)>
+<!ELEMENT	PublicationTypeList (PublicationType+)>
+<!ELEMENT	QualifierName (#PCDATA)>
+<!ATTLIST	QualifierName MajorTopicYN (Y | N) "N">
+<!ELEMENT	RefSource (#PCDATA)>
+<!ELEMENT	RegistryNumber (#PCDATA)>
+<!ELEMENT	Season (#PCDATA)>
+<!ELEMENT	SpaceFlightMission (#PCDATA)>
+<!ELEMENT       StartPage (#PCDATA)>
+<!ELEMENT	Suffix %text;>
+<!ELEMENT       SupplMeshList (SupplMeshName+)>
+<!ELEMENT       SupplMeshName (#PCDATA)>
+<!ATTLIST       SupplMeshName Type (Disease | Protocol) #REQUIRED>
+<!ELEMENT	Title (#PCDATA)>
+<!ELEMENT	VernacularTitle %text;>
+<!ELEMENT	Volume (#PCDATA)>
+<!ELEMENT	Year (#PCDATA)>
+<!ELEMENT	DeleteCitation (PMID+)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_130101.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_130101.dtd
new file mode 100644
index 0000000..cda3746
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_130101.dtd
@@ -0,0 +1,191 @@
+<!-- NLM MedlineCitationSet DTD
+
+     This is the DTD which NLM has written for Internal and External Use.
+     January 1, 2013
+      
+     **THIS IS THE FORTHCOMING DTD FOR 2013 NOT CURRENTLY IN USE.  SEE        http://www.nlm.nih.gov/databases/dtd/nlmmedlinecitationset_120101.dtd 
+     FOR THE NLM MEDLINECITATIONSET DTD DATED JANUARY 1, 2012 CURRENTLY IN USE.**    
+    
+         
+     NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+     represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+     XML elements were first created, MEDLINE records were the only data exported. 
+     Now NLM exports citations other than MEDLINE records using these tools. To 
+     minimize unnecessary disruption to users of the data and tools, NLM has
+     retained the original DTD and element names (e.g., NLMMedlineCitationSet, 
+     MedlineTA, MedlineJournalInfo)).  
+
+     NOTE:  StartPage and EndPage in Pagination element and NameID in Author
+     and Investigator elements are not currently used; are reserved for future use.  
+                
+
+        * = 0 or more occurrences (optional element, repeatable)
+        ? = 0 or 1 occurrences (optional element, at most 1)
+        + = 1 or more occurrences (required element, repeatable)
+        | = choice, one or the other but not both
+        no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--             Revision Notes Section
+
+  The following changes were made:
+  
+        a. Changed nlmmedlinecitationset_120101.dtd to
+           nlmmedlinecitationset_130101.dtd. 
+        b. Added Language attribute to OtherAbstract. 
+        c. Changed NameID Element to Identifier Element.
+
+See http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmmedline.html for historic Revision Notes for previous versions of NLMMedlineCitationSet DTD.
+-->
+<!-- ====================================================================== -->
+<!ENTITY % format	" b | i | sup | sub | u " >
+<!ENTITY % text             "(#PCDATA | %format;)*" >
+<!ELEMENT b		%text; > <!-- bold -->
+<!ELEMENT i		%text; > <!-- italic -->
+<!ELEMENT sup	%text; > <!-- superscript -->
+<!ELEMENT sub	%text; > <!-- subscript -->
+<!ELEMENT u		%text; > <!-- underline -->
+<!-- ================================================================= -->
+<!ELEMENT	MedlineCitationSet (MedlineCitation*, DeleteCitation?)>
+<!ELEMENT	MedlineCitation (PMID, DateCreated, DateCompleted?, DateRevised?,
+                Article, MedlineJournalInfo, ChemicalList?,SupplMeshList?,
+                CitationSubset*, CommentsCorrectionsList?, GeneSymbolList?,
+                MeshHeadingList?,NumberOfReferences?, PersonalNameSubjectList?,
+                OtherID*, OtherAbstract*, KeywordList*, SpaceFlightMission*, 
+                InvestigatorList?, GeneralNote*)>
+<!ATTLIST	MedlineCitation 
+                Owner  (NLM | NASA | PIP | KIE | HSR | HMD | NOTNLM) "NLM"
+		Status (Completed | In-Process | PubMed-not-MEDLINE | 
+                        In-Data-Review | Publisher | MEDLINE | 
+                        OLDMEDLINE) #REQUIRED
+                VersionID CDATA #IMPLIED
+                VersionDate CDATA #IMPLIED>
+<!ELEMENT	Abstract (AbstractText+,CopyrightInformation?)>
+<!ELEMENT	AbstractText %text;>
+<!ATTLIST       AbstractText
+                Label CDATA #IMPLIED
+                NlmCategory (UNLABELLED | BACKGROUND | OBJECTIVE | METHODS |
+                             RESULTS | CONCLUSIONS) #IMPLIED>
+<!ELEMENT	AccessionNumber (#PCDATA)>
+<!ELEMENT	AccessionNumberList (AccessionNumber+)>
+<!ELEMENT	Acronym (#PCDATA)>
+<!ELEMENT	Affiliation %text;>
+<!ELEMENT	Agency (#PCDATA)>
+<!ELEMENT	Article (Journal,ArticleTitle,((Pagination, ELocationID*) | 
+                        ELocationID+),Abstract?, Affiliation?, AuthorList?, 
+                        Language+, DataBankList?, GrantList?,PublicationTypeList, 
+                        VernacularTitle?, ArticleDate*)>
+<!ATTLIST	Article 
+                PubModel (Print | Print-Electronic | Electronic | 
+                          Electronic-Print) #REQUIRED>
+<!ELEMENT	ArticleDate (Year,Month,Day)>
+<!ATTLIST	ArticleDate DateType CDATA  #FIXED "Electronic">
+<!ELEMENT	ArticleTitle %text;>
+<!ELEMENT	Author (((LastName, ForeName?, Initials?, Suffix?) | 
+                         CollectiveName),Identifier*)>
+<!ATTLIST	Author ValidYN (Y | N) "Y">
+<!ELEMENT	AuthorList (Author+)>
+<!ATTLIST	AuthorList CompleteYN (Y | N) "Y">
+<!ELEMENT	Chemical (RegistryNumber,NameOfSubstance)>
+<!ELEMENT	ChemicalList (Chemical+)>
+<!ELEMENT	CitationSubset (#PCDATA)>
+<!ELEMENT	CollectiveName %text;>
+<!ELEMENT	CommentsCorrections (RefSource,PMID?,Note?)>
+<!ATTLIST       CommentsCorrections 
+                RefType (CommentOn | CommentIn | ErratumIn | ErratumFor | 
+                PartialRetractionIn | PartialRetractionOf | RepublishedFrom |
+                RepublishedIn | RetractionOf | RetractionIn | UpdateIn |
+                UpdateOf | SummaryForPatientsIn | OriginalReportIn |
+                ReprintOf | ReprintIn | Cites) #REQUIRED >
+<!ELEMENT	CommentsCorrectionsList (CommentsCorrections+)>
+<!ELEMENT	CopyrightInformation (#PCDATA)>
+<!ELEMENT	Country (#PCDATA)>
+<!ELEMENT	DataBank (DataBankName, AccessionNumberList?)>
+<!ELEMENT	DataBankList (DataBank+)>
+<!ATTLIST	DataBankList CompleteYN (Y | N) "Y">
+<!ELEMENT	DataBankName (#PCDATA)>
+<!ELEMENT	DateCompleted (Year,Month,Day)>
+<!ELEMENT	DateCreated (Year,Month,Day)>
+<!ELEMENT	DateRevised (Year,Month,Day)>
+<!ELEMENT	Day (#PCDATA)>
+<!ELEMENT	DescriptorName (#PCDATA)>
+<!ATTLIST	DescriptorName 
+                MajorTopicYN (Y | N) "N"
+                Type (Geographic) #IMPLIED>
+<!ELEMENT	ELocationID (#PCDATA)>
+<!ATTLIST	ELocationID EIdType (doi | pii) #REQUIRED 
+                ValidYN  (Y | N) "Y">
+<!ELEMENT       EndPage (#PCDATA)>
+<!ELEMENT	ForeName (#PCDATA)>
+<!ELEMENT	GeneSymbol (#PCDATA)>
+<!ELEMENT	GeneSymbolList (GeneSymbol+)>
+<!ELEMENT	GeneralNote (#PCDATA)>
+<!ATTLIST	GeneralNote Owner (NLM | NASA | PIP | KIE | HSR | HMD) "NLM">
+<!ELEMENT	Grant (GrantID?, Acronym?, Agency, Country)>
+<!ELEMENT	GrantID (#PCDATA)>
+<!ELEMENT	GrantList (Grant+)>
+<!ATTLIST	GrantList CompleteYN (Y | N) "Y">
+<!ELEMENT       Identifier (#PCDATA)>
+<!ATTLIST       Identifier 
+                Source CDATA #REQUIRED >
+<!ELEMENT	ISOAbbreviation (#PCDATA)>
+<!ELEMENT	ISSN (#PCDATA)>
+<!ATTLIST	ISSN IssnType  (Electronic | Print) #REQUIRED>
+<!ELEMENT       ISSNLinking (#PCDATA)>
+<!ELEMENT	Initials (#PCDATA)>
+<!ELEMENT	Investigator (LastName,ForeName?, Initials?,Suffix?,Identifier*,Affiliation?)>
+<!ATTLIST	Investigator ValidYN (Y | N) "Y">
+<!ELEMENT	InvestigatorList (Investigator+)>
+<!ELEMENT	Issue (#PCDATA)>
+<!ELEMENT	Journal (ISSN?, JournalIssue, Title?, ISOAbbreviation?)>
+<!ELEMENT	JournalIssue (Volume?, Issue?, PubDate)>
+<!ATTLIST	JournalIssue CitedMedium (Internet | Print) #REQUIRED>
+<!ELEMENT	Keyword %text;>
+<!ATTLIST	Keyword MajorTopicYN (Y | N) "N">
+<!ELEMENT	KeywordList (Keyword+)>
+<!ATTLIST	KeywordList Owner (NLM | NLM-AUTO | NASA | PIP | KIE | NOTNLM | HHS) "NLM">
+<!ELEMENT	Language (#PCDATA)>
+<!ELEMENT	LastName (#PCDATA)>
+<!ELEMENT	MedlineDate (#PCDATA)>
+<!ELEMENT	MedlineJournalInfo (Country?, MedlineTA, NlmUniqueID?,ISSNLinking?)>
+<!ELEMENT       MedlinePgn (#PCDATA)>
+<!ELEMENT	MedlineTA (#PCDATA)>
+<!ELEMENT	MeshHeading (DescriptorName, QualifierName*)>
+<!ELEMENT	MeshHeadingList (MeshHeading+)>
+<!ELEMENT	Month (#PCDATA)>
+
+<!ELEMENT	NameOfSubstance (#PCDATA)>
+<!ELEMENT	NlmUniqueID (#PCDATA)>
+<!ELEMENT	Note (#PCDATA)>
+<!ELEMENT	NumberOfReferences (#PCDATA)>
+<!ELEMENT	OtherAbstract (AbstractText+,CopyrightInformation?)>
+<!ATTLIST	OtherAbstract Type (AAMC | AIDS | KIE | PIP | 
+                                    NASA | Publisher) #REQUIRED
+                              Language CDATA "eng">
+<!ELEMENT	OtherID (#PCDATA)>
+<!ATTLIST	OtherID Source (NASA | KIE | PIP | POP | ARPL | CPC | 
+                                IND | CPFH | CLML | NRCBL | NLM) #REQUIRED>
+<!ELEMENT	PMID (#PCDATA)>
+<!ATTLIST       PMID Version CDATA #REQUIRED>
+<!ELEMENT	Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn)>
+<!ELEMENT	PersonalNameSubject (LastName,ForeName?, Initials?,Suffix?)>
+<!ELEMENT	PersonalNameSubjectList (PersonalNameSubject+)>
+<!ELEMENT	PubDate ((Year, ((Month, Day?) | Season)?) | MedlineDate)>
+<!ELEMENT	PublicationType (#PCDATA)>
+<!ELEMENT	PublicationTypeList (PublicationType+)>
+<!ELEMENT	QualifierName (#PCDATA)>
+<!ATTLIST	QualifierName MajorTopicYN (Y | N) "N">
+<!ELEMENT	RefSource (#PCDATA)>
+<!ELEMENT	RegistryNumber (#PCDATA)>
+<!ELEMENT	Season (#PCDATA)>
+<!ELEMENT	SpaceFlightMission (#PCDATA)>
+<!ELEMENT       StartPage (#PCDATA)>
+<!ELEMENT	Suffix %text;>
+<!ELEMENT       SupplMeshList (SupplMeshName+)>
+<!ELEMENT       SupplMeshName (#PCDATA)>
+<!ATTLIST       SupplMeshName Type (Disease | Protocol) #REQUIRED>
+<!ELEMENT	Title (#PCDATA)>
+<!ELEMENT	VernacularTitle %text;>
+<!ELEMENT	Volume (#PCDATA)>
+<!ELEMENT	Year (#PCDATA)>
+<!ELEMENT	DeleteCitation (PMID+)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_130501.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_130501.dtd
new file mode 100644
index 0000000..9566d38
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_130501.dtd
@@ -0,0 +1,191 @@
+<!-- NLM MedlineCitationSet DTD
+
+     This is the DTD which NLM has written for Internal and External Use.
+     January 1, 2013
+      
+     **THIS IS THE FORTHCOMING DTD FOR 2013 NOT CURRENTLY IN USE.  SEE        http://www.nlm.nih.gov/databases/dtd/nlmmedlinecitationset_120101.dtd 
+     FOR THE NLM MEDLINECITATIONSET DTD DATED JANUARY 1, 2012 CURRENTLY IN USE.**    
+    
+         
+     NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+     represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+     XML elements were first created, MEDLINE records were the only data exported. 
+     Now NLM exports citations other than MEDLINE records using these tools. To 
+     minimize unnecessary disruption to users of the data and tools, NLM has
+     retained the original DTD and element names (e.g., NLMMedlineCitationSet, 
+     MedlineTA, MedlineJournalInfo)).  
+
+     NOTE:  StartPage and EndPage in Pagination element and NameID in Author
+     and Investigator elements are not currently used; are reserved for future use.  
+                
+
+        * = 0 or more occurrences (optional element, repeatable)
+        ? = 0 or 1 occurrences (optional element, at most 1)
+        + = 1 or more occurrences (required element, repeatable)
+        | = choice, one or the other but not both
+        no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--             Revision Notes Section
+
+  The following changes were made:
+  
+        a. Changed nlmmedlinecitationset_120101.dtd to
+           nlmmedlinecitationset_130101.dtd. 
+        b. Added Language attribute to OtherAbstract. 
+        c. Changed NameID Element to Identifier Element.
+
+See http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmmedline.html for historic Revision Notes for previous versions of NLMMedlineCitationSet DTD.
+-->
+<!-- ====================================================================== -->
+<!ENTITY % format	" b | i | sup | sub | u " >
+<!ENTITY % text             "(#PCDATA | %format;)*" >
+<!ELEMENT b		%text; > <!-- bold -->
+<!ELEMENT i		%text; > <!-- italic -->
+<!ELEMENT sup	%text; > <!-- superscript -->
+<!ELEMENT sub	%text; > <!-- subscript -->
+<!ELEMENT u		%text; > <!-- underline -->
+<!-- ================================================================= -->
+<!ELEMENT	MedlineCitationSet (MedlineCitation*, DeleteCitation?)>
+<!ELEMENT	MedlineCitation (PMID, DateCreated, DateCompleted?, DateRevised?,
+                Article, MedlineJournalInfo, ChemicalList?,SupplMeshList?,
+                CitationSubset*, CommentsCorrectionsList?, GeneSymbolList?,
+                MeshHeadingList?,NumberOfReferences?, PersonalNameSubjectList?,
+                OtherID*, OtherAbstract*, KeywordList*, SpaceFlightMission*, 
+                InvestigatorList?, GeneralNote*)>
+<!ATTLIST	MedlineCitation 
+                Owner  (NLM | NASA | PIP | KIE | HSR | HMD | NOTNLM) "NLM"
+		Status (Completed | In-Process | PubMed-not-MEDLINE | 
+                        In-Data-Review | Publisher | MEDLINE | 
+                        OLDMEDLINE) #REQUIRED
+                VersionID CDATA #IMPLIED
+                VersionDate CDATA #IMPLIED>
+<!ELEMENT	Abstract (AbstractText+,CopyrightInformation?)>
+<!ELEMENT	AbstractText %text;>
+<!ATTLIST       AbstractText
+                Label CDATA #IMPLIED
+                NlmCategory (UNLABELLED | BACKGROUND | OBJECTIVE | METHODS |
+                             RESULTS | CONCLUSIONS | UNASSIGNED) #IMPLIED>
+<!ELEMENT	AccessionNumber (#PCDATA)>
+<!ELEMENT	AccessionNumberList (AccessionNumber+)>
+<!ELEMENT	Acronym (#PCDATA)>
+<!ELEMENT	Affiliation %text;>
+<!ELEMENT	Agency (#PCDATA)>
+<!ELEMENT	Article (Journal,ArticleTitle,((Pagination, ELocationID*) | 
+                        ELocationID+),Abstract?, Affiliation?, AuthorList?, 
+                        Language+, DataBankList?, GrantList?,PublicationTypeList, 
+                        VernacularTitle?, ArticleDate*)>
+<!ATTLIST	Article 
+                PubModel (Print | Print-Electronic | Electronic | 
+                          Electronic-Print | Electronic-eCollection) #REQUIRED>
+<!ELEMENT	ArticleDate (Year,Month,Day)>
+<!ATTLIST	ArticleDate DateType CDATA  #FIXED "Electronic">
+<!ELEMENT	ArticleTitle %text;>
+<!ELEMENT	Author (((LastName, ForeName?, Initials?, Suffix?) | 
+                         CollectiveName),Identifier*)>
+<!ATTLIST	Author ValidYN (Y | N) "Y">
+<!ELEMENT	AuthorList (Author+)>
+<!ATTLIST	AuthorList CompleteYN (Y | N) "Y">
+<!ELEMENT	Chemical (RegistryNumber,NameOfSubstance)>
+<!ELEMENT	ChemicalList (Chemical+)>
+<!ELEMENT	CitationSubset (#PCDATA)>
+<!ELEMENT	CollectiveName %text;>
+<!ELEMENT	CommentsCorrections (RefSource,PMID?,Note?)>
+<!ATTLIST       CommentsCorrections 
+                RefType (CommentOn | CommentIn | ErratumIn | ErratumFor | 
+                PartialRetractionIn | PartialRetractionOf | RepublishedFrom |
+                RepublishedIn | RetractionOf | RetractionIn | UpdateIn |
+                UpdateOf | SummaryForPatientsIn | OriginalReportIn |
+                ReprintOf | ReprintIn | Cites) #REQUIRED >
+<!ELEMENT	CommentsCorrectionsList (CommentsCorrections+)>
+<!ELEMENT	CopyrightInformation (#PCDATA)>
+<!ELEMENT	Country (#PCDATA)>
+<!ELEMENT	DataBank (DataBankName, AccessionNumberList?)>
+<!ELEMENT	DataBankList (DataBank+)>
+<!ATTLIST	DataBankList CompleteYN (Y | N) "Y">
+<!ELEMENT	DataBankName (#PCDATA)>
+<!ELEMENT	DateCompleted (Year,Month,Day)>
+<!ELEMENT	DateCreated (Year,Month,Day)>
+<!ELEMENT	DateRevised (Year,Month,Day)>
+<!ELEMENT	Day (#PCDATA)>
+<!ELEMENT	DescriptorName (#PCDATA)>
+<!ATTLIST	DescriptorName 
+                MajorTopicYN (Y | N) "N"
+                Type (Geographic) #IMPLIED>
+<!ELEMENT	ELocationID (#PCDATA)>
+<!ATTLIST	ELocationID EIdType (doi | pii) #REQUIRED 
+                ValidYN  (Y | N) "Y">
+<!ELEMENT       EndPage (#PCDATA)>
+<!ELEMENT	ForeName (#PCDATA)>
+<!ELEMENT	GeneSymbol (#PCDATA)>
+<!ELEMENT	GeneSymbolList (GeneSymbol+)>
+<!ELEMENT	GeneralNote (#PCDATA)>
+<!ATTLIST	GeneralNote Owner (NLM | NASA | PIP | KIE | HSR | HMD) "NLM">
+<!ELEMENT	Grant (GrantID?, Acronym?, Agency, Country)>
+<!ELEMENT	GrantID (#PCDATA)>
+<!ELEMENT	GrantList (Grant+)>
+<!ATTLIST	GrantList CompleteYN (Y | N) "Y">
+<!ELEMENT       Identifier (#PCDATA)>
+<!ATTLIST       Identifier 
+                Source CDATA #REQUIRED >
+<!ELEMENT	ISOAbbreviation (#PCDATA)>
+<!ELEMENT	ISSN (#PCDATA)>
+<!ATTLIST	ISSN IssnType  (Electronic | Print) #REQUIRED>
+<!ELEMENT       ISSNLinking (#PCDATA)>
+<!ELEMENT	Initials (#PCDATA)>
+<!ELEMENT	Investigator (LastName,ForeName?, Initials?,Suffix?,Identifier*,Affiliation?)>
+<!ATTLIST	Investigator ValidYN (Y | N) "Y">
+<!ELEMENT	InvestigatorList (Investigator+)>
+<!ELEMENT	Issue (#PCDATA)>
+<!ELEMENT	Journal (ISSN?, JournalIssue, Title?, ISOAbbreviation?)>
+<!ELEMENT	JournalIssue (Volume?, Issue?, PubDate)>
+<!ATTLIST	JournalIssue CitedMedium (Internet | Print) #REQUIRED>
+<!ELEMENT	Keyword %text;>
+<!ATTLIST	Keyword MajorTopicYN (Y | N) "N">
+<!ELEMENT	KeywordList (Keyword+)>
+<!ATTLIST	KeywordList Owner (NLM | NLM-AUTO | NASA | PIP | KIE | NOTNLM | HHS) "NLM">
+<!ELEMENT	Language (#PCDATA)>
+<!ELEMENT	LastName (#PCDATA)>
+<!ELEMENT	MedlineDate (#PCDATA)>
+<!ELEMENT	MedlineJournalInfo (Country?, MedlineTA, NlmUniqueID?,ISSNLinking?)>
+<!ELEMENT       MedlinePgn (#PCDATA)>
+<!ELEMENT	MedlineTA (#PCDATA)>
+<!ELEMENT	MeshHeading (DescriptorName, QualifierName*)>
+<!ELEMENT	MeshHeadingList (MeshHeading+)>
+<!ELEMENT	Month (#PCDATA)>
+
+<!ELEMENT	NameOfSubstance (#PCDATA)>
+<!ELEMENT	NlmUniqueID (#PCDATA)>
+<!ELEMENT	Note (#PCDATA)>
+<!ELEMENT	NumberOfReferences (#PCDATA)>
+<!ELEMENT	OtherAbstract (AbstractText+,CopyrightInformation?)>
+<!ATTLIST	OtherAbstract Type (AAMC | AIDS | KIE | PIP | 
+                                    NASA | Publisher) #REQUIRED
+                              Language CDATA "eng">
+<!ELEMENT	OtherID (#PCDATA)>
+<!ATTLIST	OtherID Source (NASA | KIE | PIP | POP | ARPL | CPC | 
+                                IND | CPFH | CLML | NRCBL | NLM) #REQUIRED>
+<!ELEMENT	PMID (#PCDATA)>
+<!ATTLIST       PMID Version CDATA #REQUIRED>
+<!ELEMENT	Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn)>
+<!ELEMENT	PersonalNameSubject (LastName,ForeName?, Initials?,Suffix?)>
+<!ELEMENT	PersonalNameSubjectList (PersonalNameSubject+)>
+<!ELEMENT	PubDate ((Year, ((Month, Day?) | Season)?) | MedlineDate)>
+<!ELEMENT	PublicationType (#PCDATA)>
+<!ELEMENT	PublicationTypeList (PublicationType+)>
+<!ELEMENT	QualifierName (#PCDATA)>
+<!ATTLIST	QualifierName MajorTopicYN (Y | N) "N">
+<!ELEMENT	RefSource (#PCDATA)>
+<!ELEMENT	RegistryNumber (#PCDATA)>
+<!ELEMENT	Season (#PCDATA)>
+<!ELEMENT	SpaceFlightMission (#PCDATA)>
+<!ELEMENT       StartPage (#PCDATA)>
+<!ELEMENT	Suffix %text;>
+<!ELEMENT       SupplMeshList (SupplMeshName+)>
+<!ELEMENT       SupplMeshName (#PCDATA)>
+<!ATTLIST       SupplMeshName Type (Disease | Protocol) #REQUIRED>
+<!ELEMENT	Title (#PCDATA)>
+<!ELEMENT	VernacularTitle %text;>
+<!ELEMENT	Volume (#PCDATA)>
+<!ELEMENT	Year (#PCDATA)>
+<!ELEMENT	DeleteCitation (PMID+)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_140101.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_140101.dtd
new file mode 100644
index 0000000..d8238d9
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_140101.dtd
@@ -0,0 +1,190 @@
+<!-- NLM MedlineCitationSet DTD
+
+     This is the DTD which NLM has written for Internal and External Use.
+     January 1, 2014
+     
+    ****THIS IS THE FORTHCOMING DTD FOR 2014 NOT CURRENTLY IN USE.  
+SEE http://www.nlm.nih.gov/databases/dtd/nlmmedlinecitationset_130501.dtd FOR THE NLMMedlineCitationSet DTD DATED MAY 1, 2013 CURRENTLY IN USE.**
+         
+     NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+     represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+     XML elements were first created, MEDLINE records were the only data exported. 
+     Now NLM exports citations other than MEDLINE records using these tools. To 
+     minimize unnecessary disruption to users of the data and tools, NLM has
+     retained the original DTD and element names (e.g., NLMMedlineCitationSet, 
+     MedlineTA, MedlineJournalInfo)).  
+
+     NOTE:  StartPage and EndPage in Pagination element and NameID in Author
+     and Investigator elements are not currently used; are reserved for future use.  
+                
+
+        * = 0 or more occurrences (optional element, repeatable)
+        ? = 0 or 1 occurrences (optional element, at most 1)
+        + = 1 or more occurrences (required element, repeatable)
+        | = choice, one or the other but not both
+        no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--             Revision Notes Section
+
+  The following changes were made:
+  
+        a. Changed nlmmedlinecitationset_130501.dtd to nlmmedlinecitationset_140101.dtd. 
+        b. Moved Affiliation element from Article to Author element.
+     
+See http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmmedline.html for historic Revision Notes for previous versions of NLMMedlineCitationSet DTD.
+-->
+<!-- ====================================================================== -->
+<!-- ================================================================= -->
+<!-- ====================================================================== -->
+<!ENTITY % format	" b | i | sup | sub | u " >
+<!ENTITY % text             "(#PCDATA | %format;)*" >
+<!ELEMENT b		%text; > <!-- bold -->
+<!ELEMENT i		%text; > <!-- italic -->
+<!ELEMENT sup	%text; > <!-- superscript -->
+<!ELEMENT sub	%text; > <!-- subscript -->
+<!ELEMENT u		%text; > <!-- underline -->
+<!-- ================================================================= -->
+<!ELEMENT	MedlineCitationSet (MedlineCitation*, DeleteCitation?)>
+<!ELEMENT	MedlineCitation (PMID, DateCreated, DateCompleted?, DateRevised?,
+                Article, MedlineJournalInfo, ChemicalList?,SupplMeshList?,
+                CitationSubset*, CommentsCorrectionsList?, GeneSymbolList?,
+                MeshHeadingList?,NumberOfReferences?, PersonalNameSubjectList?,
+                OtherID*, OtherAbstract*, KeywordList*, SpaceFlightMission*, 
+                InvestigatorList?, GeneralNote*)>
+<!ATTLIST	MedlineCitation 
+                Owner  (NLM | NASA | PIP | KIE | HSR | HMD | NOTNLM) "NLM"
+		Status (Completed | In-Process | PubMed-not-MEDLINE | 
+                        In-Data-Review | Publisher | MEDLINE | 
+                        OLDMEDLINE) #REQUIRED
+                VersionID CDATA #IMPLIED
+                VersionDate CDATA #IMPLIED>
+<!ELEMENT	Abstract (AbstractText+,CopyrightInformation?)>
+<!ELEMENT	AbstractText %text;>
+<!ATTLIST       AbstractText
+                Label CDATA #IMPLIED
+                NlmCategory (UNLABELLED | BACKGROUND | OBJECTIVE | METHODS |
+                             RESULTS | CONCLUSIONS | UNASSIGNED) #IMPLIED>
+<!ELEMENT	AccessionNumber (#PCDATA)>
+<!ELEMENT	AccessionNumberList (AccessionNumber+)>
+<!ELEMENT	Acronym (#PCDATA)>
+<!ELEMENT	Affiliation %text;>
+<!ELEMENT	Agency (#PCDATA)>
+<!ELEMENT	Article (Journal,ArticleTitle,((Pagination, ELocationID*) | 
+                        ELocationID+),Abstract?, AuthorList?, 
+                        Language+, DataBankList?, GrantList?,PublicationTypeList, 
+                        VernacularTitle?, ArticleDate*)>
+<!ATTLIST	Article 
+                PubModel (Print | Print-Electronic | Electronic | 
+                          Electronic-Print | Electronic-eCollection) #REQUIRED>
+<!ELEMENT	ArticleDate (Year,Month,Day)>
+<!ATTLIST	ArticleDate DateType CDATA  #FIXED "Electronic">
+<!ELEMENT	ArticleTitle %text;>
+<!ELEMENT	Author (((LastName, ForeName?, Initials?, Suffix?) | 
+                         CollectiveName),Identifier*,Affiliation?)>
+<!ATTLIST	Author ValidYN (Y | N) "Y">
+<!ELEMENT	AuthorList (Author+)>
+<!ATTLIST	AuthorList CompleteYN (Y | N) "Y">
+<!ELEMENT	Chemical (RegistryNumber,NameOfSubstance)>
+<!ELEMENT	ChemicalList (Chemical+)>
+<!ELEMENT	CitationSubset (#PCDATA)>
+<!ELEMENT	CollectiveName %text;>
+<!ELEMENT	CommentsCorrections (RefSource,PMID?,Note?)>
+<!ATTLIST       CommentsCorrections 
+                RefType (CommentOn | CommentIn | ErratumIn | ErratumFor | 
+                PartialRetractionIn | PartialRetractionOf | RepublishedFrom |
+                RepublishedIn | RetractionOf | RetractionIn | UpdateIn |
+                UpdateOf | SummaryForPatientsIn | OriginalReportIn |
+                ReprintOf | ReprintIn | Cites) #REQUIRED >
+<!ELEMENT	CommentsCorrectionsList (CommentsCorrections+)>
+<!ELEMENT	CopyrightInformation (#PCDATA)>
+<!ELEMENT	Country (#PCDATA)>
+<!ELEMENT	DataBank (DataBankName, AccessionNumberList?)>
+<!ELEMENT	DataBankList (DataBank+)>
+<!ATTLIST	DataBankList CompleteYN (Y | N) "Y">
+<!ELEMENT	DataBankName (#PCDATA)>
+<!ELEMENT	DateCompleted (Year,Month,Day)>
+<!ELEMENT	DateCreated (Year,Month,Day)>
+<!ELEMENT	DateRevised (Year,Month,Day)>
+<!ELEMENT	Day (#PCDATA)>
+<!ELEMENT	DescriptorName (#PCDATA)>
+<!ATTLIST	DescriptorName 
+                MajorTopicYN (Y | N) "N"
+                Type (Geographic) #IMPLIED>
+<!ELEMENT	ELocationID (#PCDATA)>
+<!ATTLIST	ELocationID EIdType (doi | pii) #REQUIRED 
+                ValidYN  (Y | N) "Y">
+<!ELEMENT       EndPage (#PCDATA)>
+<!ELEMENT	ForeName (#PCDATA)>
+<!ELEMENT	GeneSymbol (#PCDATA)>
+<!ELEMENT	GeneSymbolList (GeneSymbol+)>
+<!ELEMENT	GeneralNote (#PCDATA)>
+<!ATTLIST	GeneralNote Owner (NLM | NASA | PIP | KIE | HSR | HMD) "NLM">
+<!ELEMENT	Grant (GrantID?, Acronym?, Agency, Country)>
+<!ELEMENT	GrantID (#PCDATA)>
+<!ELEMENT	GrantList (Grant+)>
+<!ATTLIST	GrantList CompleteYN (Y | N) "Y">
+<!ELEMENT       Identifier (#PCDATA)>
+<!ATTLIST       Identifier 
+                Source CDATA #REQUIRED >
+<!ELEMENT	ISOAbbreviation (#PCDATA)>
+<!ELEMENT	ISSN (#PCDATA)>
+<!ATTLIST	ISSN IssnType  (Electronic | Print) #REQUIRED>
+<!ELEMENT       ISSNLinking (#PCDATA)>
+<!ELEMENT	Initials (#PCDATA)>
+<!ELEMENT	Investigator (LastName,ForeName?, Initials?,Suffix?,Identifier*,Affiliation?)>
+<!ATTLIST	Investigator ValidYN (Y | N) "Y">
+<!ELEMENT	InvestigatorList (Investigator+)>
+<!ELEMENT	Issue (#PCDATA)>
+<!ELEMENT	Journal (ISSN?, JournalIssue, Title?, ISOAbbreviation?)>
+<!ELEMENT	JournalIssue (Volume?, Issue?, PubDate)>
+<!ATTLIST	JournalIssue CitedMedium (Internet | Print) #REQUIRED>
+<!ELEMENT	Keyword %text;>
+<!ATTLIST	Keyword MajorTopicYN (Y | N) "N">
+<!ELEMENT	KeywordList (Keyword+)>
+<!ATTLIST	KeywordList Owner (NLM | NLM-AUTO | NASA | PIP | KIE | NOTNLM | HHS) "NLM">
+<!ELEMENT	Language (#PCDATA)>
+<!ELEMENT	LastName (#PCDATA)>
+<!ELEMENT	MedlineDate (#PCDATA)>
+<!ELEMENT	MedlineJournalInfo (Country?, MedlineTA, NlmUniqueID?,ISSNLinking?)>
+<!ELEMENT       MedlinePgn (#PCDATA)>
+<!ELEMENT	MedlineTA (#PCDATA)>
+<!ELEMENT	MeshHeading (DescriptorName, QualifierName*)>
+<!ELEMENT	MeshHeadingList (MeshHeading+)>
+<!ELEMENT	Month (#PCDATA)>
+
+<!ELEMENT	NameOfSubstance (#PCDATA)>
+<!ELEMENT	NlmUniqueID (#PCDATA)>
+<!ELEMENT	Note (#PCDATA)>
+<!ELEMENT	NumberOfReferences (#PCDATA)>
+<!ELEMENT	OtherAbstract (AbstractText+,CopyrightInformation?)>
+<!ATTLIST	OtherAbstract Type (AAMC | AIDS | KIE | PIP | 
+                                    NASA | Publisher) #REQUIRED
+                              Language CDATA "eng">
+<!ELEMENT	OtherID (#PCDATA)>
+<!ATTLIST	OtherID Source (NASA | KIE | PIP | POP | ARPL | CPC | 
+                                IND | CPFH | CLML | NRCBL | NLM) #REQUIRED>
+<!ELEMENT	PMID (#PCDATA)>
+<!ATTLIST       PMID Version CDATA #REQUIRED>
+<!ELEMENT	Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn)>
+<!ELEMENT	PersonalNameSubject (LastName,ForeName?, Initials?,Suffix?)>
+<!ELEMENT	PersonalNameSubjectList (PersonalNameSubject+)>
+<!ELEMENT	PubDate ((Year, ((Month, Day?) | Season)?) | MedlineDate)>
+<!ELEMENT	PublicationType (#PCDATA)>
+<!ELEMENT	PublicationTypeList (PublicationType+)>
+<!ELEMENT	QualifierName (#PCDATA)>
+<!ATTLIST	QualifierName MajorTopicYN (Y | N) "N">
+<!ELEMENT	RefSource (#PCDATA)>
+<!ELEMENT	RegistryNumber (#PCDATA)>
+<!ELEMENT	Season (#PCDATA)>
+<!ELEMENT	SpaceFlightMission (#PCDATA)>
+<!ELEMENT       StartPage (#PCDATA)>
+<!ELEMENT	Suffix %text;>
+<!ELEMENT       SupplMeshList (SupplMeshName+)>
+<!ELEMENT       SupplMeshName (#PCDATA)>
+<!ATTLIST       SupplMeshName Type (Disease | Protocol) #REQUIRED>
+<!ELEMENT	Title (#PCDATA)>
+<!ELEMENT	VernacularTitle %text;>
+<!ELEMENT	Volume (#PCDATA)>
+<!ELEMENT	Year (#PCDATA)>
+<!ELEMENT	DeleteCitation (PMID+)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_150101.dtd b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_150101.dtd
new file mode 100644
index 0000000..2d90743
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_150101.dtd
@@ -0,0 +1,189 @@
+<!-- NLM MedlineCitationSet DTD
+
+     This is the DTD which NLM has written for Internal and External Use.
+     January 1, 2015
+     
+    **THIS IS THE FORTHCOMING DTD FOR 2015 NOT CURRENTLY IN USE.  SEE http://www.nlm.nih.gov/databases/dtd/nlmmedlinecitationset_140101.dtd FOR THE NLMMedlineCitationSet DTD DATED JANUARY 1, 2014 CURRENTLY IN USE.**  
+         
+     NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+     represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+     XML elements were first created, MEDLINE records were the only data exported. 
+     Now NLM exports citations other than MEDLINE records using these tools. To 
+     minimize unnecessary disruption to users of the data and tools, NLM has
+     retained the original DTD and element names (e.g., NLMMedlineCitationSet, 
+     MedlineTA, MedlineJournalInfo).  
+
+     NOTE:  StartPage and EndPage in Pagination element are not currently used; are reserved for future use.  
+                
+
+        * = 0 or more occurrences (optional element, repeatable)
+        ? = 0 or 1 occurrences (optional element, at most 1)
+        + = 1 or more occurrences (required element, repeatable)
+        | = choice, one or the other but not both
+        no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--             Revision Notes Section
+
+  The following changes were made:
+  
+a. Changed nlmmedlinecitationset_140101.dtd to nlmmedlinecitationset_150101.dtd. 
+b. Added UI attribute to DescriptorName, QualifierName, NameOfSubstance, SupplMeshName and PublicationType elements.
+c. Added new element AffiliationInfo to Author and Investigator elements.
+d. Moved Affliliation and Identifier elements to new optional and repeatable envelope element AffiliationInfo.
+e. Removed AbstractText NlmCategory attribute valid value UNLABELLED.
+f. Added new CommentsCorrections RefType attribute valid values AssociatedDataset and AssociatedPublication. 
+                    
+See http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmmedline.html for historic Revision Notes for previous versions of NLMMedlineCitationSet DTD.
+-->
+<!-- ====================================================================== -->
+<!ENTITY % format	" b | i | sup | sub | u " >
+<!ENTITY % text             "(#PCDATA | %format;)*" >
+<!ELEMENT b		%text; > <!-- bold -->
+<!ELEMENT i		%text; > <!-- italic -->
+<!ELEMENT sup	%text; > <!-- superscript -->
+<!ELEMENT sub	%text; > <!-- subscript -->
+<!ELEMENT u		%text; > <!-- underline -->
+<!-- ================================================================= -->
+<!ELEMENT	MedlineCitationSet (MedlineCitation*, DeleteCitation?)>
+<!ELEMENT	MedlineCitation (PMID, DateCreated, DateCompleted?, DateRevised?, Article, MedlineJournalInfo, ChemicalList?, SupplMeshList?,CitationSubset*, CommentsCorrectionsList?, GeneSymbolList?, MeshHeadingList?,NumberOfReferences?, PersonalNameSubjectList?, OtherID*, OtherAbstract*, KeywordList*, SpaceFlightMission*, InvestigatorList?, GeneralNote*)>
+<!ATTLIST	MedlineCitation 
+		Owner  (NLM | NASA | PIP | KIE | HSR | HMD | NOTNLM) "NLM"
+		Status (Completed | In-Process | PubMed-not-MEDLINE |  In-Data-Review | Publisher | MEDLINE | OLDMEDLINE) #REQUIRED 
+		VersionID CDATA #IMPLIED
+		VersionDate CDATA #IMPLIED>
+<!ELEMENT	Abstract (AbstractText+, CopyrightInformation?)>
+<!ELEMENT	AbstractText %text;>
+<!ATTLIST	AbstractText
+		Label CDATA #IMPLIED
+		NlmCategory (BACKGROUND | OBJECTIVE | METHODS | RESULTS | CONCLUSIONS | UNASSIGNED) #IMPLIED>
+<!ELEMENT	AccessionNumber (#PCDATA)>
+<!ELEMENT	AccessionNumberList (AccessionNumber+)>
+<!ELEMENT	Acronym (#PCDATA)>
+<!ELEMENT	Affiliation %text;>
+<!ELEMENT	AffiliationInfo (Affiliation, Identifier*)>
+<!ELEMENT	Agency (#PCDATA)>
+<!ELEMENT	Article (Journal,ArticleTitle,((Pagination, ELocationID*) | ELocationID+),Abstract?,AuthorList?, Language+, DataBankList?, GrantList?,PublicationTypeList, VernacularTitle?, ArticleDate*)>
+<!ATTLIST	Article 
+		PubModel (Print | Print-Electronic | Electronic | Electronic-Print | Electronic-eCollection) #REQUIRED>
+<!ELEMENT	ArticleDate (Year,Month,Day)>
+<!ATTLIST	ArticleDate DateType CDATA  #FIXED "Electronic">
+<!ELEMENT	ArticleTitle %text;>
+<!ELEMENT	Author (((LastName, ForeName?, Initials?, Suffix?) | CollectiveName), Identifier*, AffiliationInfo*)>
+<!ATTLIST	Author ValidYN (Y | N) "Y">
+<!ELEMENT	AuthorList (Author+)>
+<!ATTLIST	AuthorList CompleteYN (Y | N) "Y">
+<!ELEMENT	Chemical (RegistryNumber,NameOfSubstance)>
+<!ELEMENT	ChemicalList (Chemical+)>
+<!ELEMENT	CitationSubset (#PCDATA)>
+<!ELEMENT	CollectiveName %text;>
+<!ELEMENT	CommentsCorrections (RefSource,PMID?,Note?)>
+<!ATTLIST	CommentsCorrections 
+		RefType (AssociatedDataset | AssociatedPublication | CommentOn | CommentIn | ErratumIn | ErratumFor | PartialRetractionIn | PartialRetractionOf | RepublishedFrom | RepublishedIn | RetractionOf | RetractionIn | UpdateIn | UpdateOf | SummaryForPatientsIn | OriginalReportIn | ReprintOf | ReprintIn | Cites) #REQUIRED >
+<!ELEMENT	CommentsCorrectionsList (CommentsCorrections+)>
+<!ELEMENT	CopyrightInformation (#PCDATA)>
+<!ELEMENT	Country (#PCDATA)>
+<!ELEMENT	DataBank (DataBankName, AccessionNumberList?)>
+<!ELEMENT	DataBankList (DataBank+)>
+<!ATTLIST	DataBankList CompleteYN (Y | N) "Y">
+<!ELEMENT	DataBankName (#PCDATA)>
+<!ELEMENT	DateCompleted (Year,Month,Day)>
+<!ELEMENT	DateCreated (Year,Month,Day)>
+<!ELEMENT	DateRevised (Year,Month,Day)>
+<!ELEMENT	Day (#PCDATA)>
+<!ELEMENT	DescriptorName (#PCDATA)>
+<!ATTLIST	DescriptorName 
+		MajorTopicYN (Y | N) "N"
+		Type (Geographic) #IMPLIED
+		UI CDATA #REQUIRED>
+<!ELEMENT	ELocationID (#PCDATA)>
+<!ATTLIST	ELocationID EIdType (doi | pii) #REQUIRED 
+		ValidYN  (Y | N) "Y">
+<!ELEMENT	EndPage (#PCDATA)>
+<!ELEMENT	ForeName (#PCDATA)>
+<!ELEMENT	GeneSymbol (#PCDATA)>
+<!ELEMENT	GeneSymbolList (GeneSymbol+)>
+<!ELEMENT	GeneralNote (#PCDATA)>
+<!ATTLIST	GeneralNote
+		Owner (NLM | NASA | PIP | KIE | HSR | HMD) "NLM">
+<!ELEMENT	Grant (GrantID?, Acronym?, Agency, Country)>
+<!ELEMENT	GrantID (#PCDATA)>
+<!ELEMENT	GrantList (Grant+)>
+<!ATTLIST	GrantList CompleteYN (Y | N) "Y">
+<!ELEMENT	Identifier (#PCDATA)>
+<!ATTLIST	Identifier 
+		Source CDATA #REQUIRED >
+<!ELEMENT	ISOAbbreviation (#PCDATA)>
+<!ELEMENT	ISSN (#PCDATA)>
+<!ATTLIST	ISSN 
+		IssnType  (Electronic | Print) #REQUIRED>
+<!ELEMENT	ISSNLinking (#PCDATA)>
+<!ELEMENT	Initials (#PCDATA)>
+<!ELEMENT	Investigator (LastName, ForeName?, Initials?, Suffix?, Identifier*, AffiliationInfo*)>
+<!ATTLIST	Investigator 
+		ValidYN (Y | N) "Y">
+<!ELEMENT	InvestigatorList (Investigator+)>
+<!ELEMENT	Issue (#PCDATA)>
+<!ELEMENT	Journal (ISSN?, JournalIssue, Title?, ISOAbbreviation?)>
+<!ELEMENT	JournalIssue (Volume?, Issue?, PubDate)>
+<!ATTLIST	JournalIssue 
+		CitedMedium (Internet | Print) #REQUIRED>
+<!ELEMENT	Keyword %text;>
+<!ATTLIST	Keyword 
+		MajorTopicYN (Y | N) "N">
+<!ELEMENT	KeywordList (Keyword+)>
+<!ATTLIST	KeywordList 
+		Owner (NLM | NLM-AUTO | NASA | PIP | KIE | NOTNLM | HHS) "NLM">
+<!ELEMENT	Language (#PCDATA)>
+<!ELEMENT	LastName (#PCDATA)>
+<!ELEMENT	MedlineDate (#PCDATA)>
+<!ELEMENT	MedlineJournalInfo (Country?, MedlineTA, NlmUniqueID?, ISSNLinking?)>
+<!ELEMENT	MedlinePgn (#PCDATA)>
+<!ELEMENT	MedlineTA (#PCDATA)>
+<!ELEMENT	MeshHeading (DescriptorName, QualifierName*)>
+<!ELEMENT	MeshHeadingList (MeshHeading+)>
+<!ELEMENT	Month (#PCDATA)>
+<!ELEMENT	NameOfSubstance (#PCDATA)>
+<!ATTLIST	NameOfSubstance 
+		UI CDATA #REQUIRED>
+<!ELEMENT	NlmUniqueID (#PCDATA)>
+<!ELEMENT	Note (#PCDATA)>
+<!ELEMENT	NumberOfReferences (#PCDATA)>
+<!ELEMENT	OtherAbstract (AbstractText+, CopyrightInformation?)>
+<!ATTLIST	OtherAbstract 
+		Type (AAMC | AIDS | KIE | PIP | NASA | Publisher) #REQUIRED
+		Language CDATA "eng">
+<!ELEMENT	OtherID (#PCDATA)>
+<!ATTLIST	OtherID 
+		Source (NASA | KIE | PIP | POP | ARPL | CPC | IND | CPFH | CLML | NRCBL | NLM) #REQUIRED>
+<!ELEMENT	PMID (#PCDATA)>
+<!ATTLIST	PMID 
+		Version CDATA #REQUIRED>
+<!ELEMENT	Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn)>
+<!ELEMENT	PersonalNameSubject (LastName, ForeName?, Initials?, Suffix?)>
+<!ELEMENT	PersonalNameSubjectList (PersonalNameSubject+)>
+<!ELEMENT	PubDate ((Year, ((Month, Day?) | Season)?) | MedlineDate)>
+<!ELEMENT	PublicationType (#PCDATA)>
+<!ATTLIST	PublicationType 
+		UI CDATA #REQUIRED>
+<!ELEMENT	PublicationTypeList (PublicationType+)>
+<!ELEMENT	QualifierName (#PCDATA)>
+<!ATTLIST	QualifierName 
+		MajorTopicYN (Y | N) "N"
+		UI CDATA #REQUIRED>
+<!ELEMENT	RefSource (#PCDATA)>
+<!ELEMENT	RegistryNumber (#PCDATA)>
+<!ELEMENT	Season (#PCDATA)>
+<!ELEMENT	SpaceFlightMission (#PCDATA)>
+<!ELEMENT	StartPage (#PCDATA)>
+<!ELEMENT	Suffix %text;>
+<!ELEMENT	SupplMeshList (SupplMeshName+)>
+<!ELEMENT	SupplMeshName (#PCDATA)>
+<!ATTLIST	SupplMeshName 
+		Type (Disease | Protocol) #REQUIRED
+		UI CDATA #REQUIRED>
+<!ELEMENT	Title (#PCDATA)>
+<!ELEMENT	VernacularTitle %text;>
+<!ELEMENT	Volume (#PCDATA)>
+<!ELEMENT	Year (#PCDATA)>
+<!ELEMENT	DeleteCitation (PMID+)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmserials_080101.dtd b/code/lib/Bio/Entrez/DTDs/nlmserials_080101.dtd
new file mode 100644
index 0000000..32968d4
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmserials_080101.dtd
@@ -0,0 +1,134 @@
+<!-- NLM Serials DTD
+                  
+
+       Comments and suggestions are welcome.
+       January 1, 2008
+
+       SEE http://www.nlm.nih.gov/databases/dtd/nlmserials_090101.dtd FOR THE
+       FORTHCOMING NLMSERIALS DTD DATED JANUARY 1, 2009 FOR FUTURE USE.
+
+
+       This is the DTD which the U.S. National Library of Medicine 
+       has written for External Use. 
+       
+
+     NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+     represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+     XML elements were first created, MEDLINE records were the only data exported.  
+     Now NLM exports citations other than MEDLINE records using these tools. To 
+     minimize unnecessary disruption to users of the data and tools, NLM has 
+     retained the original DTD and element names (e.g., NLMMedline DTD, MedlineTA,
+     MedlineJournalInfo)).  
+
+
+                
+           
+
+* = 0 or more occurrences (optional element, repeatable)
+? = 0 or 1 occurrences (optional element, at most 1)
++ = 1 or more occurrences (required element, repeatable)
+| = choice, one or the other but not both 
+no symbol = required element
+
+-->
+<!-- ================================================================= -->
+<!--   Revision Notes Section 
+
+
+  The following changes were made in the nlmserials_080101.dtd:
+  
+       a.  Changed entity reference from "nlmcommon_070101.dtd"
+           to: "nlmcommon_080101.dtd"
+  
+       b.  Added elements ISSNLinking, CurrentlyIndexedForSubset, PMCHoldings 
+           and PMCEmbargo to Serial element
+
+       c.  Added PMC attribute to Serial element
+
+       d.  Added CurrentSubset and CurrentIndexingTreatment attribute to 
+           CurrentlyIndexedForSubset element
+
+       e.  Added value of S to XrType attribute in CrossReference element
+
+       f.  Added EIdType entity with doi and url values 
+  
+       g.  Removed value of G from CitationSubset
+  
+      
+-->
+<!-- ============================================================= -->
+<!--  external DTD entities   -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle?">
+<!ENTITY % EIdType "(doi | url) #REQUIRED">
+<!ENTITY % ISSN.Ref "ISSN*">
+<!ENTITY % PubDate.Ref "PubDate?">
+<!ENTITY % GrantID.Ref "GrantID?">
+<!ENTITY % Acronym.Ref "Acronym?">
+<!ENTITY % Agency.Ref "Agency?">
+<!ENTITY % DatedCreated.Ref "DateCreated">
+<!ENTITY % PublicationType.Ref  "PublicationTypeList">
+<!-- ============================================================== -->
+<!-- ====================================================================== -->
+<!-- internal DTD entities -->
+<!ENTITY % NlmUniqueID.Ref "NlmUniqueID">
+<!-- ====================================================================== -->
+<!-- Reference to Where the NLMCOMMON DTD is located  -->
+<!ENTITY % NlmCommon PUBLIC "-//NLM//DTD Common, 1st January 2008//EN" 
+"nlmcommon_080101.dtd" > 
+%NlmCommon;
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!ELEMENT SerialsSet (Serial+)>
+<!-- ================================================================= -->
+<!-- This is the top level element for Serial -->
+<!ELEMENT Serial (%NlmUniqueID.Ref;, Title, MedlineTA?, PublicationInfo?, 
+                  %ISSN.Ref;,ISSNLinking?,ISOAbbreviation?, Language*,
+                  AcidFreeYN?, Coden?, ContinuationNotes?, CurrentFormatStatus?, 
+                  MinorTitleChangeYN?, IndexingHistoryList?, CurrentlyIndexedYN?,
+                  CurrentlyIndexedForSubset*, IndexOnlineYN?, IndexingSubset?, 
+                  PMCHoldings?, PMCEmbargo?, BroadJournalHeadingList?, 
+                  CrossReferenceList?, SortSerialName, IlsCreatedTimestamp?,
+                  IlsUpdatedTimestamp?,DeletedTimestamp?)>
+<!ATTLIST Serial
+         DataCreationMethod (P | K | O) #IMPLIED
+         PMC (Yes | Forthcoming) #IMPLIED
+>
+<!-- End of Serial group -->
+<!-- ================================================================= -->
+<!--        Further Definition of NLM Tags                     -->
+<!ELEMENT AcidFreeYN (#PCDATA)>
+<!ELEMENT ContinuationNotes (#PCDATA)>
+<!ELEMENT CoverageNote (#PCDATA)>
+<!ELEMENT CrossReference (XrTitle)>
+<!ATTLIST CrossReference
+	XrType (A | X | S) #REQUIRED
+>
+<!ELEMENT CrossReferenceList (CrossReference+)>
+<!ELEMENT CurrentFormatStatus (#PCDATA)>
+<!ELEMENT CurrentlyIndexedForSubset (#PCDATA)>
+<!ATTLIST CurrentlyIndexedForSubset
+        CurrentSubset  (AIM | D | E | H | IM | K | N | Q | QIS | S | T | X) #REQUIRED
+        CurrentIndexingTreatment (Full | Selective) #REQUIRED
+>
+<!ELEMENT CurrentlyIndexedYN (#PCDATA)>
+<!ELEMENT DateOfAction (%normal.date;)>
+<!ELEMENT DeletedTimestamp (%normal.date;)>
+<!ELEMENT IlsCreatedTimestamp (%normal.date;)>
+<!ELEMENT IlsUpdatedTimestamp (%normal.date;)>
+<!ELEMENT IndexingHistory (DateOfAction,Coverage?,CoverageNote?)>
+<!ATTLIST IndexingHistory
+          CitationSubset (AIM | B | C | D | E | F | H | IM | 
+                         J | K | N | OM | P | Q | QIS | R | S | T | X) #REQUIRED
+          IndexingTreatment %IndexingTreatment; 
+          IndexingStatus  %IndexingStatus;
+>
+<!ELEMENT IndexingHistoryList (IndexingHistory+)>
+<!ELEMENT IndexingSubset (#PCDATA)>
+<!ELEMENT IndexOnlineYN (#PCDATA) >
+<!ELEMENT MinorTitleChangeYN (#PCDATA)>
+<!ELEMENT PMCEmbargo (#PCDATA)>
+<!ELEMENT PMCHoldings (#PCDATA)>
+<!ELEMENT SortSerialName (#PCDATA)>
+<!ELEMENT XrTitle (#PCDATA)>
+<!-- This is the end for Serial -->
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/nlmserials_100101.dtd b/code/lib/Bio/Entrez/DTDs/nlmserials_100101.dtd
new file mode 100644
index 0000000..d4693b0
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmserials_100101.dtd
@@ -0,0 +1,157 @@
+<!-- NLM Serials DTD
+                  
+
+       Comments and suggestions are welcome.
+       January 1, 2010
+              
+       This is the DTD which the U.S. National Library of Medicine 
+       has written for Internal and External Use. 
+
+     SEE http://www.nlm.nih.gov/databases/dtd/nlmserials_110101.dtd 
+   FOR THE FORTHCOMING NLM SERIALS DTD DATED JANUARY 1, 2011 FOR FUTURE USE. 
+       
+    
+    
+     NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+     represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+     XML elements were first created, MEDLINE records were the only data exported.  
+     Now NLM exports citations other than MEDLINE records using these tools. To 
+     minimize unnecessary disruption to users of the data and tools, NLM has 
+     retained the original DTD and element names (e.g., NLMMedline DTD, MedlineTA,
+     MedlineJournalInfo).  
+    
+* = 0 or more occurrences (optional element, repeatable)
+? = 0 or 1 occurrences (optional element, at most 1)
++ = 1 or more occurrences (required element, repeatable)
+| = choice, one or the other but not both 
+no symbol = required element
+
+-->
+<!-- ================================================================= -->
+<!--   Revision Notes Section 
+
+
+  The following changes were made in the nlmserials_090101.dtd:
+  
+       a.  The nlmserials dtd has been used as a base.
+       b.  The nlmcommon and sharedcatcit external entities have been merged 
+           into this DTD.
+       c.  No internal DTD entity references are used.
+       d.  Removed PlaceCode, Imprint, ProjectedPublicationDate,
+           Edition and DateOfSerialPublication from PublicationInfo.
+       e.  Removed element LastName.
+       f.  Added MeshHeadingList, DescriptorName and QualifierName.
+       g.  Added IndexingSourceList and IndexingSourceName.
+
+       
+-->
+<!-- ============================================================= -->
+<!ELEMENT SerialsSet (Serial+)>
+<!ELEMENT Serial (NlmUniqueID, Title, MedlineTA?, PublicationInfo?, ISSN*, 
+                  ISSNLinking?, ISOAbbreviation?, Language*, AcidFreeYN?,
+                  Coden?, ContinuationNotes?, CurrentFormatStatus?, 
+                  MinorTitleChangeYN?, IndexingHistoryList?, 
+                  IndexingSourceList?, CurrentlyIndexedYN?,
+                  CurrentlyIndexedForSubset*, IndexOnlineYN?, 
+                  IndexingSubset?, ReportedMedlineYN, PMCHoldings?, 
+                  PMCEmbargo?, BroadJournalHeadingList?, MeshHeadingList?,
+                  CrossReferenceList?, SortSerialName, IlsCreatedTimestamp?,
+                  IlsUpdatedTimestamp?, DeletedTimestamp?)>
+<!ATTLIST Serial
+	DataCreationMethod (P | K | O) #IMPLIED
+	PMC (Yes | Forthcoming) #IMPLIED
+	Status (NLMCollection | NotNLMCollection) #REQUIRED
+>
+<!ELEMENT AcidFreeYN (#PCDATA)>
+<!ELEMENT BroadJournalHeading (#PCDATA)>
+<!ELEMENT BroadJournalHeadingList (BroadJournalHeading+)>
+<!ELEMENT Coden (#PCDATA)>
+<!ELEMENT ContinuationNotes (#PCDATA)>
+<!ELEMENT Country (#PCDATA)>
+<!ELEMENT Coverage (#PCDATA)>
+<!ELEMENT CoverageNote (#PCDATA)>
+<!ELEMENT CrossReference (XrTitle)>
+<!ATTLIST CrossReference
+	XrType (A | X | S) #REQUIRED
+>
+<!ELEMENT CrossReferenceList (CrossReference+)>
+<!ELEMENT CurrentFormatStatus (#PCDATA)>
+<!ELEMENT CurrentlyIndexedForSubset (#PCDATA)>
+<!ATTLIST CurrentlyIndexedForSubset
+	CurrentSubset (AIM | D | E | H | IM | K | N | Q | QIS | S | T | X) #REQUIRED
+	CurrentIndexingTreatment (Full | Selective) #REQUIRED
+>
+<!ELEMENT CurrentlyIndexedYN (#PCDATA)>
+<!ELEMENT DateOfAction (Year, Month, Day, (Hour, (Minute, Second?)?)?)>
+<!ELEMENT DatesOfSerialPublication (#PCDATA)>
+<!ELEMENT Day (#PCDATA)>
+<!ELEMENT DeletedTimestamp (Year, Month, Day, (Hour, (Minute, Second?)?)?)>
+<!ELEMENT DescriptorName (#PCDATA)>
+<!ATTLIST DescriptorName
+	MajorTopicYN (Y | N) "N"
+>
+<!ELEMENT Frequency (#PCDATA)>
+<!ATTLIST Frequency
+	FrequencyType (Current | Former) "Current"
+>
+<!ELEMENT Hour (#PCDATA)>
+<!ELEMENT ISOAbbreviation (#PCDATA)>
+<!ELEMENT ISSN (#PCDATA)>
+<!ATTLIST ISSN
+	IssnType (Electronic | Print | Undetermined) #REQUIRED
+>
+<!ELEMENT ISSNLinking (#PCDATA)>
+<!ELEMENT IlsCreatedTimestamp (Year, Month, Day, (Hour, (Minute, Second?)?)?)>
+<!ELEMENT IlsUpdatedTimestamp (Year, Month, Day, (Hour, (Minute, Second?)?)?)>
+<!ELEMENT IndexOnlineYN (#PCDATA)>
+<!ELEMENT IndexingHistory (DateOfAction, Coverage?, CoverageNote?)>
+<!ATTLIST IndexingHistory
+	CitationSubset (AIM | B | C | D | E | F | H | IM | J | K | N | 
+                        OM | P | Q | QIS | R | S | T | X) #REQUIRED
+	IndexingTreatment (Unknown | Full | Selective | ReferencedIn |
+                           ReferencedInNoDetails) #IMPLIED
+	IndexingStatus (Ceased-publication | Continued-by-another-indexed-title | 
+                        Currently-indexed | Currently-indexed-Title-changed | 
+                        Date-range-of-indexed-citations-unspecified | 
+	                Deselected) #IMPLIED
+>
+<!ELEMENT IndexingHistoryList (IndexingHistory+)>
+<!ELEMENT IndexingSource (IndexingSourceName, Coverage?)>
+<!ELEMENT IndexingSourceList (IndexingSource+)>
+<!ELEMENT IndexingSourceName (#PCDATA)>
+<!ATTLIST IndexingSourceName
+	IndexingTreatment (Unknown | Full | Selective | ReferencedIn |
+                           ReferencedInNoDetails) #IMPLIED
+	IndexingStatus (Ceased-publication | Continued-by-another-indexed-title | 
+                        Currently-indexed | Currently-indexed-Title-changed | 
+                        Date-range-of-indexed-citations-unspecified | 
+                        Deselected) #IMPLIED
+>
+<!ELEMENT IndexingSubset (#PCDATA)>
+<!ELEMENT Language (#PCDATA)>
+<!ELEMENT MedlineTA (#PCDATA)>
+<!ELEMENT MeshHeading (DescriptorName, QualifierName*)>
+<!ELEMENT MeshHeadingList (MeshHeading+)>
+<!ELEMENT MinorTitleChangeYN (#PCDATA)>
+<!ELEMENT Minute (#PCDATA)>
+<!ELEMENT Month (#PCDATA)>
+<!ELEMENT NlmUniqueID (#PCDATA)>
+<!ELEMENT PMCEmbargo (#PCDATA)>
+<!ELEMENT PMCHoldings (#PCDATA)>
+<!ELEMENT Place (#PCDATA)>
+<!ELEMENT PublicationEndYear (#PCDATA)>
+<!ELEMENT PublicationFirstYear (#PCDATA)>
+<!ELEMENT PublicationInfo (Country?, Place*, Publisher*, 
+                           PublicationFirstYear?, PublicationEndYear?,
+                           Frequency*)>
+<!ELEMENT Publisher (#PCDATA)>
+<!ELEMENT QualifierName (#PCDATA)>
+<!ATTLIST QualifierName
+	MajorTopicYN (Y | N) "N"
+>
+<!ELEMENT ReportedMedlineYN (#PCDATA)>
+<!ELEMENT Second (#PCDATA)>
+<!ELEMENT SortSerialName (#PCDATA)>
+<!ELEMENT Title (#PCDATA)>
+<!ELEMENT XrTitle (#PCDATA)>
+<!ELEMENT Year (#PCDATA)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmsharedcatcit_080101.dtd b/code/lib/Bio/Entrez/DTDs/nlmsharedcatcit_080101.dtd
new file mode 100644
index 0000000..f7397ea
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmsharedcatcit_080101.dtd
@@ -0,0 +1,80 @@
+<!-- NLMSharedCatCit DTD
+
+     Comments and suggestions are welcome.
+     January 1, 2008
+  
+     This is the DTD for data elements that are shared between 
+     NLMCatalogRecord and NLMMedlineCitation DTDs at the 
+     U.S. National Library of Medicine. 
+     
+    NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+    represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+    XML elements were first created, MEDLINE records were the only data exported. 
+    Now NLM exports citations other than MEDLINE records using these tools. To 
+    minimize unnecessary disruption to users of the data and tools, NLM has
+    retained the original DTD and element names (e.g., NLMMedline DTD, MedlineTA,
+    MedlineJournalInfo)).       
+  
+  * = 0 or more occurrences (optional element, repeatable)
+  ? = 0 or 1 occurrences (optional element, at most 1)
+  + = 1 or more occurrences (required element, repeatable)
+   | = choice, one or the other but not both 
+  no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--   Revision Notes Section 
+
+The following changes were made in the nlmsharedcatcit_080101.dtd:
+     
+     a.  Changed entity reference from "nlmcommon_070101.dtd"
+                      to "nlmcommon_080101.dtd"
+
+     b.  Moved Investigator & InvestigatorList from nlmsharedcatcit dtd to
+         nlmcommon dtd
+    
+    Historic Revision notes for previous versions of NLMSharedcatcit DTD
+     See:
+     http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmsharedcatcit.html
+
+-->
+<!-- ====================================================================== -->
+<!-- internal DTD entities -->
+<!ENTITY % NlmUniqueID.Ref "NlmUniqueID?">
+<!-- ====================================================================== -->
+<!-- ====================================================================== -->
+<!-- Reference to Where the NLMCommon DTD is located  -->
+<!ENTITY % NlmCommon PUBLIC "-//NLM//DTD Common, 1st January 2008//EN" 
+ "nlmcommon_080101.dtd"> 
+%NlmCommon;
+<!-- ======================================================================= -->
+<!--  Further Definitions of NLM Tags                                        -->
+<!ELEMENT Chemical (RegistryNumber, NameOfSubstance)>
+<!ELEMENT ChemicalList (Chemical+)>
+<!ELEMENT DateCompleted (%normal.date;)>
+<!ELEMENT DateCreated (%normal.date;)>
+<!ELEMENT DateRevised (%normal.date;)>
+<!ELEMENT GeneralNote (#PCDATA)>
+<!ELEMENT Keyword (#PCDATA)>
+<!ATTLIST Keyword
+	MajorTopicYN (Y | N) "N"
+>
+<!ELEMENT KeywordList (Keyword+)>
+<!ATTLIST KeywordList
+	Owner %Owner; "NLM"
+>
+<!ELEMENT NameOfSubstance (#PCDATA)>
+<!ELEMENT OtherAbstract (%Abstract;)>
+<!ATTLIST OtherAbstract
+	Type %Type;
+>
+<!ELEMENT OtherID (#PCDATA)>
+<!ATTLIST OtherID
+        Prefix CDATA #IMPLIED
+	Source %Source;       
+>
+<!ELEMENT PersonalNameSubject (%personal.name;, DatesAssociatedWithName?,
+                               NameQualifier?, OtherInformation?,
+                               TitleAssociatedWithName?)>
+<!ELEMENT PersonalNameSubjectList (PersonalNameSubject+)>
+<!ELEMENT RegistryNumber (#PCDATA)>
+<!ELEMENT SpaceFlightMission (#PCDATA)>
diff --git a/code/lib/Bio/Entrez/DTDs/nlmsharedcatcit_090101.dtd b/code/lib/Bio/Entrez/DTDs/nlmsharedcatcit_090101.dtd
new file mode 100644
index 0000000..2a41d3e
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/nlmsharedcatcit_090101.dtd
@@ -0,0 +1,80 @@
+<!-- NLMSharedCatCit DTD
+
+     Comments and suggestions are welcome.
+     January 1, 2009
+
+     **THIS IS THE FORTHCOMING DTD FOR 2009 NOT CURRENTLY IN USE.  
+     SEE http://www.nlm.nih.gov/databases/dtd/nlmsharedcatcit_080101.dtd FOR THE 
+     NLMSHAREDCATCIT DTD DATED JANUARY 1, 2008 CURRENTLY IN USE.**  
+  
+     This is the DTD for data elements that are shared between 
+     NLMCatalogRecord and NLMMedlineCitation DTDs at the 
+     U.S. National Library of Medicine. 
+     
+    NOTE:  The use of "Medline" in a DTD or element name does not mean the record 
+    represents a citation from a Medline-selected journal.  When the NLM DTDs and 
+    XML elements were first created, MEDLINE records were the only data exported. 
+    Now NLM exports citations other than MEDLINE records using these tools. To 
+    minimize unnecessary disruption to users of the data and tools, NLM has
+    retained the original DTD and element names (e.g., NLMMedline DTD, MedlineTA,
+    MedlineJournalInfo)).       
+  
+  * = 0 or more occurrences (optional element, repeatable)
+  ? = 0 or 1 occurrences (optional element, at most 1)
+  + = 1 or more occurrences (required element, repeatable)
+   | = choice, one or the other but not both 
+  no symbol = required element
+-->
+<!-- ================================================================= -->
+<!--   Revision Notes Section 
+
+The following changes were made in the nlmsharedcatcit_090101.dtd:
+     
+     a.  Changed entity reference from "nlmcommon_080101.dtd"
+                      to "nlmcommon_090101.dtd"
+
+     b.  Moved OtherAbstract element from nlmsharedcatcit dtd to
+         nlmcommon dtd
+    
+    Historic Revision notes for previous versions of NLMSharedcatcit DTD
+     See:
+     http://www.nlm.nih.gov/databases/dtd/history_dtd_nlmsharedcatcit.html
+
+-->
+<!-- ====================================================================== -->
+<!-- internal DTD entities -->
+<!ENTITY % NlmUniqueID.Ref "NlmUniqueID?">
+<!-- ====================================================================== -->
+<!-- ====================================================================== -->
+<!-- Reference to Where the NLMCommon DTD is located  -->
+<!ENTITY % NlmCommon PUBLIC "-//NLM//DTD Common, 1st January 2009//EN" 
+"nlmcommon_090101.dtd"> 
+%NlmCommon;
+<!-- ======================================================================= -->
+<!--  Further Definitions of NLM Tags                                        -->
+<!ELEMENT Chemical (RegistryNumber, NameOfSubstance)>
+<!ELEMENT ChemicalList (Chemical+)>
+<!ELEMENT DateCompleted (%normal.date;)>
+<!ELEMENT DateCreated (%normal.date;)>
+<!ELEMENT DateRevised (%normal.date;)>
+<!ELEMENT GeneralNote (#PCDATA)>
+<!ELEMENT Keyword (#PCDATA)>
+<!ATTLIST Keyword
+	MajorTopicYN (Y | N) "N"
+>
+<!ELEMENT KeywordList (Keyword+)>
+<!ATTLIST KeywordList
+	Owner %Owner; "NLM"
+>
+<!ELEMENT NameOfSubstance (#PCDATA)>
+<!ELEMENT OtherID (#PCDATA)>
+<!ATTLIST OtherID
+        Prefix CDATA #IMPLIED
+	Source %Source;       
+>
+<!ELEMENT PersonalNameSubject (%personal.name;, DatesAssociatedWithName?,
+                               NameQualifier?, OtherInformation?,
+                               TitleAssociatedWithName?)>
+<!ELEMENT PersonalNameSubjectList (PersonalNameSubject+)>
+<!ELEMENT RegistryNumber (#PCDATA)>
+<!ELEMENT SpaceFlightMission (#PCDATA)>
diff --git a/code/lib/Bio/Entrez/DTDs/notat.ent b/code/lib/Bio/Entrez/DTDs/notat.ent
new file mode 100644
index 0000000..6294521
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/notat.ent
@@ -0,0 +1,172 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Notation Declarations                             -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      July 2003                                         -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Notation Declarations v2.0 20040830//EN
+     Delivered as file "notat.ent"                                 -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    To name all the allowable notations               -->
+<!--                                                               -->
+<!-- CONTAINS:   Notation Declarations                             -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+          modularization requirements.
+
+  2. LATEX - Added as a notation for display and inline formulas  
+  
+  1. Updated public identifier to "v2.0 20040830"                  -->
+
+
+<!-- ============================================================= -->
+<!--               NOTATION DECLARATIONS                           -->
+<!-- ============================================================= -->
+
+ 
+<!--               LATEX (for Mathematics)                         -->
+<!NOTATION  LaTeX  PUBLIC
+           "+//ISBN 3-893-196463::Goosens//NOTATION Der LaTeX Begleiter//DE"  >
+
+ 
+<!--               TEX (for Mathematics)                           -->
+<!NOTATION  TEX  PUBLIC
+           "+//ISBN 0-201-13448-9::Knuth//NOTATION The TeXbook//EN"  >
+<!NOTATION  tex  PUBLIC
+           "+//ISBN 0-201-13448-9::Knuth//NOTATION The TeXbook//EN"  >
+<!NOTATION  TeX  PUBLIC
+           "+//ISBN 0-201-13448-9::Knuth//NOTATION The TeXbook//EN"  >
+
+
+<!-- ============================================================= -->
+<!--               POTENTIAL NOTATION DECLARATIONS                 -->
+<!-- ============================================================= -->
+
+
+<!--               CGM (Computer Graphics Metafile)                -->
+<!NOTATION cgmchar PUBLIC 
+"ISO 8632/2//NOTATION Character encoding//EN"
+                                                                     >
+<!NOTATION cgmclear PUBLIC 
+"ISO 8632/4//NOTATION Clear text encoding//EN"
+                                                                     >
+
+
+<!--               GIF (Graphic Interchange Format)                -->
+<!NOTATION gif     PUBLIC
+"-//ISBN 0-7923-9432-1::Graphic Notation//NOTATION CompuServe 
+Graphic Interchange Format//EN"                                      
+                                                                     >
+
+<!--               EPS (Adobe's Encapsulate Postscript)            -->
+<!NOTATION eps     PUBLIC
+"+//ISBN 0-201-18127-4::Adobe//NOTATION PostScript Language Reference Manual//EN" 
+                                                                     >
+
+
+<!--               JPEG (Joint Photographic Experts Group raster)  -->
+<!NOTATION jpeg    PUBLIC
+"+//ISBN 0-7923-9432-1::Graphic Notation//NOTATION Joint Photographic Experts Group raster//EN"                               
+                                                                     >
+
+<!--               TIFF (Uncompressed)                             -->
+<!NOTATION tiff    PUBLIC
+"+//ISBN 0-7923-9432-1::Graphic Notation//NOTATION Aldus/Microsoft 
+Tagged Interchange File Format//EN"                                  >
+
+
+<!-- ================== End Notation Module ====================== -->
diff --git a/code/lib/Bio/Entrez/DTDs/para.ent b/code/lib/Bio/Entrez/DTDs/para.ent
new file mode 100644
index 0000000..9838a43
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/para.ent
@@ -0,0 +1,420 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Paragraph-Like Elements                           -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Paragraph-Like Elements v2.0 20040830//EN"
+     Delivered as file "para.ent"                                  -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Names structural elements that will appear in     -->
+<!--             the same places as a paragraph                    -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Default definition of the paragraph class      -->
+<!--             2) Paragraph <p>                                  -->
+<!--             3) Displayed Quote <disp-quote>                   -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  9. PARAGRAPH ELEMENT MODELING
+     ### Customization Alert ###
+     a. Changed the model of paragraph to use #PCDATA and
+        %p-elements; like all other #PCDATA models
+     b. Changed the inline-mix to use the OR-bar-first
+        mechanism.
+         - %inside-para;  [Now renamed -%p-elements;]
+           (used only inside Paragraph <p>)
+
+  8. COMPLETE MODELS WHEN OVER-RIDING A MODEL 
+     (for all Parameter Entities suffixed "-model")
+     ### Customization Alert ###
+     Added internal parentheses to Parameter Entity and removed 
+     them from Element Declaration for:
+     - %disp-quote-model;
+     - %statement-model;
+ 
+  7. RENAMED CLASSES
+     ### Customization Alert ###
+     Not all classes ended in the ".class" suffix. Changed the 
+     following to add the suffix:
+     - %display-back-matter.class; used in: 
+        - <disp-quote>
+        - <verse-group>
+
+  6. DEFAULT CLASSES - Were moved from this module to 
+     %default-classes.ent;
+
+  5. NEW CLASSES - To correct potential classing problems, created
+     the following new Parameter Entities:
+        - %just-para.class; used in <speech>, <statement-model>
+        - %verse-group-model; for <verse-group>
+        - %verse-line-elements; for <verse-line>
+
+  4. Updated public identifier to "v2.0 20040830"         
+
+     =============================================================
+     Version 1.1                           (TRG) v1.1 (2003-11-01)
+      
+  3. Added attribute "content-type" to elements <p>, <speech> and 
+     <statement>.  
+     Rationale: To identify and preserve the semantic intent of 
+     semantically rich source documents.
+     
+  2. Added ID attribute to elements <p>, <speech>, <disp-quote>,
+     and <verse-group>. 
+     Rationale: Provide unique identifier so these elements can be 
+     linked to. 
+                                                                   
+     =============================================================
+     Version 1.0 Changes Before Public Release
+                                       (Lapeyre) V1.0 (2002-12-25)
+
+  1. DIALOG/SPEECH - Element <dialog> renamed <speech>  
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module, 
+                        usually accomplished in the Customization
+                        Module for the specific DTD:
+                          - %emphasized.text;
+                          - %p-elements;
+                          - %para-level;
+                          - %person-name.class;
+                          - %simple-link.class;
+                          - %simple-text;                          -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAGRAPH-LEVEL ELEMENTS                   -->
+<!-- ============================================================= -->
+
+
+<!--                    PARAGRAPH ELEMENTS                         -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a paragraph <p>. 
+                        DESIGN NOTE: There is a major overlap between 
+                        this parameter entity and the mix for elements
+                        that are at the same level as a paragraph.
+                        Inline elements appear only inside a 
+                        paragraph, but block elements such as quotes 
+                        and lists may appear either within a 
+                        paragraph or at the same level as a 
+                        paragraph. This serves a requirement in a 
+                        repository DTD, since some incoming material 
+                        will have restricted such elements to only 
+                        inside a paragraph, some incoming material 
+                        will have restricted them to only outside a 
+                        paragraph and some may allow them in both
+                        places. Thus the DTD must allow for them to
+                        be in either or both.                      
+                        DESIGN NOTE: Inline mixes begin with an
+                        OR bar                                     -->
+<!ENTITY % p-elements   "| %address-link.class; | 
+                         %article-link.class; |
+                         %block-display.class; |%block-math.class; | 
+                         %citation.class; | %emphasis.class; |
+                         %inline-display.class; | 
+                         %inline-math.class; | %list.class; | 
+                         %math.class; | %phrase.class; |
+                         %rest-of-para.class; | %simple-link.class; | 
+                         %subsup.class;"                             >
+
+<!--                    PARAGRAPH                                  -->
+<!--                    The basic block-unit of textual information
+                                                                   -->
+<!ELEMENT  p            (#PCDATA %p-elements;)*                      >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.                      
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  p
+             id         ID                                 #IMPLIED  
+             content-type
+                        CDATA                              #IMPLIED  
+             xml:lang   NMTOKEN                            #IMPLIED  >
+                        
+
+<!--                    SPEECH                                     -->
+<!--                    One exchange in a real or imaginary 
+                        conversation between two or more entities, 
+                        for example, between a an interviewer and the 
+                        person being interviewed, between a nurse or 
+                        doctor and a patient, between a person and a 
+                        computer, etc.
+                        Authoring and Conversion Note: Speeches are
+                        modeled as a full paragraph, even if what is
+                        spoken is only a few words.                
+                        DESIGN NOTE: Speeches are not gathered into
+                        a larger structure, because there is no
+                        consistency in how this is done in existing
+                        journal DTDs, nor any real need for a
+                        full wrapper.                              -->
+<!ELEMENT  speech       (speaker, (%just-para.class;)+ )             >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.                      
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  speech
+             id         ID                                 #IMPLIED  
+             content-type
+                        CDATA                              #IMPLIED
+             xml:lang   NMTOKEN                            #IMPLIED  >
+
+
+<!--                    SPEAKER ELEMENTS                           -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a speaker.                                 -->
+<!ENTITY % speaker-elements
+                        "| %person-name.class; | %simple-link.class;">
+                        
+
+<!--                    SPEAKER                                    -->
+<!--                    One who utters a speech as part of a 
+                        speech, for example the computer "HAL" in
+                        the exchange 'Hal: "Hi Dave"'.             -->
+<!ELEMENT  speaker      (#PCDATA %speaker-elements;)*                >
+
+
+<!--                    QUOTE, DISPLAYED MODEL                     -->
+<!--                    Content model for the Display Quote element-->
+<!ENTITY % disp-quote-model     
+                        "(title?, (%para-level;)*, 
+                          (%display-back-matter.class;)* )"          > 
+            
+
+<!--                    QUOTE, DISPLAYED                           -->
+<!--                    Extract or extended quoted passage from 
+                        another work, usually made typographically 
+                        distinct from the surrounding text.
+                        Authoring and Conversion Note: Use this 
+                        element for epigraphs as well as block
+                        quotes and extracts within the text.       --> 
+<!ELEMENT  disp-quote   %disp-quote-model;                           >
+<!--         id         Unique identifier so the element may be
+                        referenced                                
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  disp-quote
+             id         ID                                 #IMPLIED  
+             xml:lang   NMTOKEN                            #IMPLIED  >
+ 
+
+<!--                    STATEMENT, FORMAL MODEL                    -->
+<!--                    Content model for the <statement> element  -->
+<!ENTITY % statement-model   
+                        "(label?, title?, (%just-para.class;)+ )"    > 
+            
+
+<!--                    STATEMENT, FORMAL                          -->
+<!--                    A Theorem, Lemma, Proof, Postulate,
+                        Hypothesis, Proposition, Corollary, or
+                        other formal statement, identified as such
+                        with a label, usually made typographically 
+                        distinct from the surrounding text         --> 
+<!ELEMENT  statement    %statement-model;                            >
+<!--         id         Unique identifier so that the statement can 
+                        be referenced by a <xref> element          
+             content-type   
+                        Identification of the subject, type of
+                        content, or reason that this word or phrase
+                        is considered semantically special, as a 
+                        means of preserving the semantic intent of 
+                        the original tagging.                      -->
+<!ATTLIST  statement
+             id         ID                                 #IMPLIED 
+             content-type
+                        CDATA                              #IMPLIED  >
+
+
+<!--                    VERSE GROUP MODEL                          -->
+<!--                    Content model for the <verse-group> element-->
+<!ENTITY % verse-group-model   
+                        "((verse-line | verse-group)+, 
+                         (%display-back-matter.class;)*) "           > 
+                        
+
+<!--                    VERSE FORM FOR POETRY                      -->
+<!--                    A song, poem, or verse.
+                        Implementer's Note: No attempt has been made
+                        to retain the look or visual form of the
+                        original.
+                        Remarks: Many physics journals include both
+                        initial epigraphs to articles and short
+                        articles that contain nothing but a topical,
+                        humorous, or elegiac poem.
+                        Related Elements: Poetry may also be tagged 
+                        with the <preformat> if spacing is critical, 
+                        but usually poetry should be tagged with the
+                        <verse-group> element, which may not preserve
+                        the exact indentation but is likely to be
+                        displayed in a proportional font.          -->
+<!ELEMENT  verse-group  %verse-group-model;                          > 
+<!--          id         Unique identifier so the element may be
+                        referenced
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->                
+<!ATTLIST  verse-group
+             id         ID                                 #IMPLIED  
+             xml:lang   NMTOKEN                            #IMPLIED  >
+
+
+<!--                    VERSE-LINE ELEMENTS                        -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a speaker.                              
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-text; is an inline
+                        mix, the OR bar is already there.          -->
+<!ENTITY % verse-line-elements
+                        "%simple-text;"                              >
+
+
+<!--                    LINE OF A VERSE                            -->
+<!--                    One line of a poem or verse                -->
+<!ELEMENT  verse-line   (#PCDATA %verse-line-elements;)*             >
+
+
+<!-- ================== End Paragraph Class Module =============== -->
diff --git a/code/lib/Bio/Entrez/DTDs/phrase.ent b/code/lib/Bio/Entrez/DTDs/phrase.ent
new file mode 100644
index 0000000..b08987a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/phrase.ent
@@ -0,0 +1,278 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Subject Phrase Class Elements                     -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Subject Phrase Class Elements v2.0 20040830//EN"
+     Delivered as file "phrase.ent"                                -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Defines the phrase.class, that is, names the      -->
+<!--             inline, subject-specific elements                 -->
+<!--                                                               -->
+<!--             If more specific subject words (such as "gene")   -->
+<!--             are added to later version of this DTD, they      -->
+<!--             should be added to the %phrase.class; entity and  -->
+<!--             defined in this module or in %common.ent;         -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Definition of the phrase class                 -->
+<!--             2) Subject Emphasis                               -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  6. REGULARIZE USE OF CLASSES
+     a. LINK CLASSES - Replaced %link.class; in the PE
+        %named-content-elements; with the following classes 
+        via inclusion of %all-phrase;
+        (no DTD change, just reparameterization):
+         - %address-link.class;   (links for journal article)
+         - %article-link.class;   (links for journal article)
+         - %simple-link.class;    (the internal links, same)
+        
+     b. NAMED CONTENT ELEMENTS - To make potential changes easier,
+        replaced the Parameter Entity %emphasized-text; with its
+        constituent classes
+      
+     c. RENAME EXISTING CLASSES
+        ### Customization Alert ###
+        Some classes did not have the ".class" suffix. Changed the 
+        names to add the class suffix:
+        - %block-math.class; (used in -%named-content-elements;)
+
+     d. ABBREV - Changed content model to use new %def.class;
+
+  5. DEFAULT CLASSES - Were moved from this module to 
+     %default-classes.ent;
+      
+  4. Updated public identifier to "v2.0 20040830" 
+
+     =============================================================
+     Version 1.1                           (TRG) v1.1 (2003-11-01)
+  
+  3. Added following parameter entities to the content model for
+     <named-content>:
+        - %block-display.class;
+        - %block-math.class;
+        - %emphasized-text;
+        - %inline-display.class;
+        - %list.class;
+        - %rest-of-para.class;
+     Rationale: To expand content model of <named-content> to 
+     encompass most of the other phrase-level structures, which
+     will allow preservation of phrase-level semantic information
+     in content converted to this format.
+     
+  2. Added ID attribute to parameter entity %named-content-atts;  
+     Rationale: Provide unique identifier so <named-content>
+     element can be linked to. 
+         
+  1. Added ID attribute to element <abbrev>. 
+     Rationale: Provide unique identifier so <abbrev> element 
+     can be linked to. 
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires that the following parameter 
+                        entities be defined before calling this
+                        module, usually accomplished in the 
+                        Customization Module for the specific DTD:
+                          - %might-link-atts;  
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR ATTRIBUTE LISTS     -->
+<!-- ============================================================= -->
+                                                                
+                                                                 
+<!--                    NAMED CONTENT ATTRIBUTES                   -->
+<!--                    Attributes for the <named-content> element -->
+<!ENTITY % named-content-atts
+            "content-type   
+                        CDATA                            #REQUIRED 
+             id         ID                                #IMPLIED
+             %might-link-atts;"                                      >
+
+
+<!-- ============================================================= -->
+<!--                    PHRASE-LEVEL ELEMENTS                      -->
+<!-- ============================================================= -->
+
+
+<!--                    ABBREVIATION ELEMENTS                      -->
+<!--                    Elements for use in the <abbrev> element   -->
+<!ENTITY % abbrev-elements
+                        "| %def.class;"                              >
+
+
+<!--                    ABBREVIATION OR ACRONYM                    -->
+<!--                    Used to identify an abbreviation or acronym
+                        DTD DESIGN NOTE: We have not seen this much 
+                        in the world of STM journal publishing.
+                        This element was added explicitly to
+                        meet "Checkpoint 4.3[3.5] Annotate complex, 
+                        abbreviated, or unfamiliar information with 
+                        summaries and definitions" from the W3C Web
+                        Content Accessibility Guidelines 2.0.
+                        Remarks: The linking attributes can be used
+                        to provide a live link to an expansion,
+                        definition, or additional explanation.     -->
+<!ELEMENT  abbrev       (#PCDATA %abbrev-elements;)*                 >
+<!--         id         Unique identifier so the element may be
+                        referenced                                 
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                -->            
+<!ATTLIST  abbrev
+             id         ID                                 #IMPLIED  
+             %might-link-atts;                                       >
+
+
+<!--                    NAMED CONTENT ELEMENTS                     -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the <named-content> element                -->
+<!ENTITY % named-content-elements
+                        "| %address-link.class; |
+                         %article-link.class; |
+                         %block-display.class; | %block-math.class;|
+                         %emphasis.class;  | %inline-math.class; | 
+                         %inline-display.class; | 
+                         %list.class; | %math.class; | 
+                         %phrase.class; | %simple-link.class; | 
+                         %subsup.class; |  %rest-of-para.class;"     >
+
+
+<!--                    NAMED SPECIAL (SUBJECT) CONTENT            -->
+<!--                    A word or phrase within the text that should
+                        be treated differently, often given a
+                        special typographic style or look, because
+                        the content/subject matter is distinct. For
+                        example, the word is a drug name or a 
+                        gene or the phrase identifies an organism 
+                        genus/species.                             -->
+<!ELEMENT  named-content
+                        (#PCDATA %named-content-elements;)*          >
+<!--         content-type   
+                        Identification of the subject or type of
+                        content that makes this word or phrase
+                        semantically special, usually to be treated 
+                        differently, for example, given a different 
+                        look in print or display.
+                           As an example, this attribute could be 
+                        used to identify a drug name, company name, 
+                        or product name.  It could be used to define 
+                        systematics terms such as genus, family, 
+                        order,  or suborder. It could be used to 
+                        identify biological components such as 
+                        gene, protein, or peptide. Therefore 
+                        values may include information classes, 
+                        semantic categories, or types of nouns, 
+                        such as "generic-drug-name", "genus-species",
+                        "gene", "peptide", "product", etc.  
+             id         Unique identifier so the element may be
+                        referenced                                 
+             xlink:href Provides an address or identifier of the
+                        object to which the link points, for
+                        example a URI or a filename                --> 
+ <!ATTLIST  named-content
+             %named-content-atts;                                    >
+
+
+<!-- ================== End Phrase Class Module ================== -->
diff --git a/code/lib/Bio/Entrez/DTDs/pmc-1.dtd b/code/lib/Bio/Entrez/DTDs/pmc-1.dtd
new file mode 100644
index 0000000..db84036
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pmc-1.dtd
@@ -0,0 +1,900 @@
+<!--
+# $Id: pmc-1.dtd,v 1.136 2004/12/08 23:38:35 merker Exp $
+-->
+<!-- PMC DTD Version History:
+
+   2000-08-09
+           Initial Draft. This is derived from the Keton Full Text DTD, which itself is based on the CJS 2.1 DTD.
+
+All changes since August 2000 are documented in pmc-1-dtdnotes.txt in this directory.
+
+-->
+<!--    Public document type definition. Typical invocation:
+<!DOCTYPE art PUBLIC "-//PMC//DTD FULL LENGTH ARTICLE//EN" []>
+-->
+
+<!-- Character Entities  -->
+<!ENTITY % PMCEntities SYSTEM	"pmc.ent">
+<!ENTITY % ISO8879ent SYSTEM	"iso-8879-1986.ent">
+<!ENTITY % ISO9573ent SYSTEM	"iso-9573-13-1991.ent">
+
+%PMCEntities; %ISO8879ent; %ISO9573ent;
+
+<!-- Internal Entities  -->
+<!ENTITY % data "#PCDATA|entitygrp|b|it|sc|ul|sup|sub|email|fnr|exlnk|break|mml:math">
+
+<!ENTITY % id-att
+	"id	     ID         #IMPLIED">
+
+<!ENTITY % id-att-required
+	"id	     ID         #IMPLIED">
+
+<!ENTITY % figr-atts
+	"fid 		IDREF 		#REQUIRED
+	 to  		IDREF 		#IMPLIED">
+
+<!ENTITY % tblr-atts
+	"tid 		IDREF 		#REQUIRED
+	 to  		IDREF 		#IMPLIED">
+
+<!ENTITY % abbr-atts
+	"bid 		IDREF 		#REQUIRED
+	 to  		IDREF 		#IMPLIED">
+
+<!ENTITY % rid-atts
+	"rid     IDREF       #REQUIRED">
+
+<!ENTITY % fnr-atts
+	"rid     IDREF       #REQUIRED
+	 to      IDREF       #IMPLIED">
+	 
+<!ENTITY % insr-atts
+	"iid     IDREF       #IMPLIED">
+
+<!ENTITY %	supplr-atts
+	"sid     CDATA       #REQUIRED
+	 to      CDATA       #IMPLIED">
+
+<!ENTITY %	secr-atts
+	"sid     IDREF       #REQUIRED">
+
+
+<!-- Associated DTD for Supplemental Data files: Supplemental data information can
+be included in the xml file (see the element "suppl"). Files created to this DTD are 
+used for the display of supplemental data on the PMC site -->
+<!ENTITY % supp_data_dtd SYSTEM "sdata.dtd">
+%supp_data_dtd;
+
+
+
+
+<!-- **************** Article level elements **************** -->
+<!ELEMENT art (ui*, ji, fm, bdy?, bm?, response*, sdata?)>
+<!ATTLIST art
+	type (commentary | correction | editorial | letter | obituary | 
+	      product_review | retraction | reply | review_art | 
+			series | admin | advert | cover | toc | index | filler |
+			poster) #IMPLIED
+	language (eng|spa|fre|ger|rus|jpn|ita) #IMPLIED>
+	
+<!-- type attribute not needed for regular articles -->
+
+<!-- language attribute	
+	language attribute not needed for articles in English
+	eng	English
+	spa	Spanish
+	fre	French
+	ger	German
+	rus	Russian
+	jpn	Japanese
+	ita	Italian -->
+	
+	
+	
+
+<!-- Unique Identifier: Optional element, used by rendering code for generating links -->
+<!ELEMENT ui (#PCDATA)>
+<!ATTLIST ui
+	type (aid | artnum | doi | pmid | pmc | pii | other) #IMPLIED >
+<!-- ui types
+	aid	    - article id (publisher's id)
+	artnum   - article number (publisher's number)
+	doi	   - digital object identifier (see www.doi.org)
+	pmid   - pubmed id (see www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed)
+	pmc - pubmed central id
+	-->
+
+<!-- Journal Identifier: Required element, used for identification of journal domain. 
+This is generally a short abbreviation of the journal title -->
+<!ELEMENT ji (#PCDATA)>
+
+<!-- Front matter -->
+<!ELEMENT fm (doctopic*, dochead?, docsubj*, supptitle?, sertitle*, sertext?, addart?, bibl, 
+product*, suppmat?, history?, com?, con?, cor*, cpyrt?, relart*, shortabs?, abs?, kwdg?)>
+
+<!-- Body -->
+<!ELEMENT bdy (p*, (sec|fig|tbl)*)>
+
+<!-- Back matter -->
+<!ELEMENT bm ( (ack?, glossary?, notes*, (appm |refgrp)*, fn*) | sec) > 
+
+<!-- Response to the Article -->
+<!ELEMENT response (fm?, bdy?, bm?)>
+<!ATTLIST response
+	%id-att; >
+
+
+
+
+<!-- **************** Front Matter elements **************** -->
+<!-- Document topic -->
+<!ELEMENT doctopic (%data;)*>
+
+<!-- Document heading or type -->
+<!ELEMENT dochead (%data;)*>
+
+<!-- Document subject -->
+<!ELEMENT docsubj (%data;)*>
+
+<!-- Supplement title -->
+<!ELEMENT supptitle (%data;)*>
+
+<!-- Series title -->
+<!ELEMENT sertitle (%data;)*>
+
+<!-- Series text - descriptitve text for a series of articles -->
+<!ELEMENT sertext (p*) >
+
+<!-- Addendum and/or correction information: This element is used in Erratum articles
+to identify the article that is being corrected -->
+<!ELEMENT addart EMPTY>
+<!ATTLIST addart
+	vol CDATA #IMPLIED
+	pg CDATA #REQUIRED
+	date CDATA #IMPLIED
+	ui CDATA #IMPLIED
+	pmid CDATA #IMPLIED>
+	
+	
+<!-- *************Bibliographic Information *******************-->
+<!--(the "bibl" element is shared between front matter and 
+references). Attributes to "bibl" are only used in the bibliography
+(B1, B2, etc.). They are not needed in the article (frontmatter) 
+information -->
+<!ELEMENT bibl (title?, edg?, (aug | insg)*, firstauaff?, ang?, source?, issn?, publisher?, 
+pubdate?, volume?, edition?, issue?, fpage?, lpage?, range?, stringrange?, exlnk?, inpress?, p*, xrefbib?)>
+<!ATTLIST bibl
+	%id-att;
+	no CDATA #IMPLIED >
+
+<!-- Title -->
+<!ELEMENT title (p, subtitle?)>
+<!ELEMENT subtitle (p)>
+
+<!-- ****** Author and Editor Group ******* -->
+<!-- Editor Group -->
+<!ELEMENT edg ((collab | au | etal)*, ins?)>
+<!ATTLIST edg
+	type CDATA #IMPLIED>
+	
+<!-- ******Author Group****** -->
+<!ELEMENT aug (collab | au | etal)*>
+
+<!-- Author Elements -->
+<!ELEMENT au (snm, (mi |mnm)?, fnm?, suf?, degree*, roles*, (insr | anr)*, email*, aucomment?)>
+<!ATTLIST au
+	%id-att;
+	ca (no | yes) #IMPLIED
+	ce (no | yes) #IMPLIED
+	da (no | yes) #IMPLIED
+>
+<!-- ca=Corresponding Author (set to 'yes' if they are)
+	  ce=Contributed equally (set to 'yes' if they did)
+	  da=deceased author
+ -->
+
+
+<!-- Collaborative author: used for groups of authors credited under one name -->
+<!ELEMENT collab (#PCDATA | insr | anr | email | entitygrp | it | sc | sup | sub | b | break | exlnk)*>
+<!ATTLIST collab
+	type	(on_behalf) #IMPLIED
+>
+<!-- use type=on_behalf when a group is not the author, but individual authors
+      are writing "on behalf" of an organization-->
+<!-- Surname -->	
+<!ELEMENT snm (%data;)*>
+
+<!-- Middle Initial(s) -->
+<!ELEMENT mi (%data;)*>
+
+<!-- Middle Name -->
+<!ELEMENT mnm (%data;)*>
+
+<!-- First Name -->
+<!ELEMENT fnm (%data;)*>
+
+<!-- Suffix (ie, Jr. III, 3rd) -->
+<!ELEMENT suf (#PCDATA | sup)*>
+
+<!-- Degree(s) -->
+<!ELEMENT degree (%data;)*>
+
+<!-- Roles (author's role/title) -->
+<!ELEMENT roles (%data;)*>
+
+<!-- Email address -->
+<!ELEMENT email (%data;)*>
+
+<!-- ET AL -->
+<!ELEMENT etal EMPTY>
+
+<!-- Author Comment: used for extra text associated with an author -->
+<!ELEMENT aucomment (#PCDATA | insr | anr | it | b | sc | sup | sub)* >
+
+
+<!-- ******* Institutions *************-->
+<!-- Institution Group -->
+<!ELEMENT insg (ins+)>
+
+<!-- Institution -->
+<!ELEMENT ins (p)>
+<!ATTLIST ins
+	%id-att;>
+
+<!-- First Author's Affiliation (used for PubMed affiliation) -->
+<!ELEMENT firstauaff (%data;)*>
+
+<!-- Author Notes: Footnotes to authors are collected in the Author note group -->
+<!ELEMENT ang (fn+)>
+
+<!-- Source: Journal or Book title -->
+<!ELEMENT source (#PCDATA | b | it | sc | sup | sub | ms | entitygrp | exlnk)*>
+
+<!-- ISSN -->
+<!ELEMENT issn (%data;)*>
+
+<!-- Publisher -->
+<!ELEMENT publisher (name?, location?)>
+<!ELEMENT name (%data;)*>
+<!ELEMENT location (%data;)*>
+
+<!-- Publication date (year): this is the publication date of the issue. Can be any string
+ (ie "January, 2001" "Fall 2001" "March 11, 2001"  -->
+<!ELEMENT pubdate (#PCDATA)>
+
+<!-- Volume -->
+<!ELEMENT volume (%data;)*>
+
+<!-- Edition -->
+<!ELEMENT edition (%data;)*>
+
+<!-- Issue -->
+<!ELEMENT issue (#PCDATA)>
+
+<!-- First page -->
+<!ELEMENT fpage (#PCDATA)>
+<!ATTLIST fpage
+		seq CDATA #IMPLIED >
+<!-- seq attribute used for sequence number or letter for continuous makeup journals with
+more than one article starting on the same page -->
+
+<!-- Last page -->
+<!ELEMENT lpage (#PCDATA)>
+
+<!-- Page range: Specifies non-continuous page ranges -->
+<!-- PAGE RANGE -->
+<!ELEMENT range        (subrange+) >
+<!ELEMENT subrange     EMPTY       >
+<!ATTLIST subrange
+	begin		CDATA		#IMPLIED
+	end		CDATA		#IMPLIED
+	single	CDATA		#IMPLIED
+	>
+
+<!-- String range: Page range as a string -->
+<!ELEMENT stringrange	(#PCDATA)>
+
+<!-- In press: used only in the refgrp, not in the fm -->
+<!ELEMENT inpress EMPTY>
+
+<!-- Bibliographic Links -->
+<!ELEMENT xrefbib ((pubidlist | pubid), pubid*)>
+<!-- PubId group. You can use an identifier, or a list of alternate ids 
+for the same article. This is taken from the NCBI publisher.dtd -->
+<!ELEMENT pubidlist (pubid+)>
+<!ELEMENT pubid (#PCDATA)>
+<!ATTLIST pubid
+	idtype (pubmed | medline | doi | pii | pmcid | pmcpid | pmpid) #REQUIRED>
+
+
+<!-- product is used to supply information about products (books, software, hardware, 
+    etc. being reviewed--> 
+<!ELEMENT product (%data;|bibl)*>
+<!ATTLIST product
+	product-type CDATA	#IMPLIED>
+<!-- product-types can be: book, software, hardware, website -->
+
+<!-- suppmat is the publisher-supplied toc link for supplemental material to an article--> 
+<!ELEMENT suppmat (%data;)*>
+
+
+<!-- Document History -->
+<!ELEMENT history (rec | revreq | revrec | acc | pub | epub)*>
+
+<!-- Date received -->
+<!ELEMENT rec (date)>
+
+<!-- Date revisions requested -->
+<!ELEMENT revreq (date)>
+
+<!-- Date revisions received -->
+<!ELEMENT revrec (date)>
+
+<!-- Date accepted -->
+<!ELEMENT acc (date)>
+
+<!-- Date published: This is the article/issue publication date -->
+<!ELEMENT pub (date)>
+
+<!-- Date electronic version of the article was published -->
+<!ELEMENT epub (date)>
+
+
+<!-- Date (day / month / year) -->
+<!-- These should ALWAYS be numeric values!!!! -->
+<!ELEMENT date (day?, month?, year)>
+<!ELEMENT day (#PCDATA)>
+<!ELEMENT month (#PCDATA)>
+<!ELEMENT year (#PCDATA)>
+
+
+<!-- Communicated by and Contributed by Information -->
+<!-- Communicated by -->
+<!ELEMENT com (%data;)*>
+
+<!-- Contributed by -->
+<!ELEMENT con (%data;)*>
+
+<!-- Correspondence information -->
+<!ELEMENT cor (%data;)*>
+
+
+<!-- Copyright Information -->
+<!ELEMENT cpyrt (#PCDATA | date | cpyrtnme | cpyrtclr | exlnk | it)*>
+
+<!-- Copyright holder's name -->
+<!ELEMENT cpyrtnme (collab | au)+>
+
+<!-- Copyright clearance organization -->
+<!ELEMENT cpyrtclr (collab | au)+>
+
+
+<!-- frontmatter text links to related articles -->
+<!ELEMENT relart (p | (linktype, ui?, title?, aug*, volume, issue?, fpage, lpage?, pub?))>
+<!ATTLIST relart
+	type CDATA #IMPLIED>
+
+<!ELEMENT linktype (#PCDATA)>
+	
+	
+<!-- Short abstract -->
+<!ELEMENT shortabs (sec+)>
+
+
+<!-- Keywords -->
+<!ELEMENT kwdg (kwd+)>
+<!ELEMENT kwd (#PCDATA | b | it | sc | sup | sub | entitygrp)*>
+
+
+<!-- Abstract -->
+<!ELEMENT abs (sec+)>
+
+
+<!-- ************** Section Elements ********************-->
+<!-- Section -->
+<!-- Note: Tables and figures do NOT occur inside paragraph tags!! -->
+<!ELEMENT sec (st?, (p | sec | tbl | fig | suppl | fn | graphic | refgrp)*)>
+<!ATTLIST sec
+		type      CDATA      #IMPLIED
+	   displevel CDATA      #IMPLIED
+	   %id-att;>
+<!-- Note: Level 1 heads may be assigned a type attribute if they are 
+of a certain content type (for indexing purposes). A section that contains 
+content of more than one type will have the type IDs combined. 
+(ie, "Materials and Methods" would be type="3|4") -->
+<!-- 
+             type ID	        content type
+     
+		1		Introduction/Synopsis
+		3		Materials
+		4		Methods/Procedures
+		5		Patients/Participants/Subjects
+		6		Cases/Case Reports
+		7		Results/Statement of Findings
+		8		Discussion/Interpretation
+		9		Conclusions/Comment
+		10		Appendix
+		11		Acknowledgments
+		12		Footnotes/Author Footnotes
+		13		References
+-->
+<!-- Content in Appendix [<bm><appm><app>], Abstract [<fm>(<abs>|<shortabs>)], 
+Acknowledgments [<bm><ack>], Footnotes [<bm><fn>] or Author Footnotes 
+[<fm><bibl><ang><an>], and References [<bm><refgrp>] can be recognized by its 
+identifying element if it is to be indexed. It does not need to be tagged with 
+a type attribute. -->
+
+<!-- Section Title -->
+<!ELEMENT st (p)>
+
+<!-- Paragraph -->
+<!ELEMENT p (#PCDATA | it | b | aff | abbr | fnr | ms | tblr | arrayr | hr | tblfnr | 
+figr | appr | supplr | fdr | sc | ul | sub | sup | boxtext | qd | l | f | fd | 
+deflist | math | exlnk | email | graphic | entitygrp | font | secr | bibl | 
+break | array | refgrp)*>
+
+<!-- Superscript -->
+<!ELEMENT sup (#PCDATA | b | it | sc | ul | sup | sub | tblfnr | figr | tblr | arrayr | 
+entitygrp | math | ms | exlnk | graphic | f | font)*>
+
+<!-- Subscript -->
+<!ELEMENT sub (#PCDATA | b | it | sc | ul | sup | sub | tblfnr | figr | tblr | arrayr | 
+entitygrp | math | ms | exlnk | graphic | f | font)*>
+
+<!-- Boldface -->
+<!ELEMENT b (#PCDATA | sup | sub | it | ul | sc | abbr | tblfnr | figr | tblr | arrayr | fnr |
+entitygrp | exlnk | fdr | ms| break | graphic | font | email | hr | math)*>
+
+<!-- Italic -->
+<!ELEMENT it (#PCDATA | sup | sub | b | ul | sc | abbr | tblfnr | figr | tblr | arrayr | fnr |
+entitygrp | appr | exlnk | fdr | ms | math| break | graphic | font | email)*>
+
+<!-- Small Cap -->
+<!ELEMENT sc (#PCDATA | sup | sub | b | it | ul | tblfnr | figr | tblr | arrayr | 
+entitygrp | exlnk | fdr | ms| break | graphic | font)*>
+
+<!-- Monospace text (typewriter text) -->
+<!ELEMENT ms (#PCDATA | sup | sub | b | it | ul | sc | tblfnr | figr | 
+tblr | arrayr | entitygrp | exlnk | fdr | font | break | math | graphic)*>
+
+<!-- Underline -->
+<!ELEMENT ul (#PCDATA | ul | sup | sub | b | it | sc | tblfnr | figr | 
+tblr | arrayr | entitygrp | exlnk | fdr | math | ms| break | graphic | font)*>
+<!ATTLIST ul 
+		style CDATA #IMPLIED>
+
+
+<!-- Affiliation -->
+<!ELEMENT aff (%data;)*>
+<!ATTLIST aff
+	%id-att;>
+
+
+<!-- Grouped (combined) entities: used for indicating that two entities should
+be combined for one character -->
+<!ELEMENT entitygrp (#PCDATA)>
+
+<!-- Boxtext -->
+<!ELEMENT boxtext (st?, p*, sec*)>
+<!ATTLIST boxtext
+	style CDATA #IMPLIED
+	color CDATA #IMPLIED>
+
+<!-- Font - font should be use only to modify text for purposes of distinguishing 
+data and not for stylistic or display purposes -->
+<!ELEMENT font (%data;)* >
+<!ATTLIST font
+		color CDATA #IMPLIED
+		style CDATA #IMPLIED
+		face  CDATA #IMPLIED>
+
+<!-- Displayed Quotes -->
+<!ELEMENT qd (p+)>
+
+<!-- List -->
+<!ELEMENT l (li+)>
+<!ATTLIST l
+	type CDATA #IMPLIED>
+<!-- LIST TYPES 
+	1 - ordered (numbered) list
+	2 - unordered (bulleted) list
+	3 - ordered (alpha - lowercase) list
+	4 - ordered (alpha - uppercase) list
+	5 - ordered (roman - lowercase) list
+	6 - ordered (roman - uppercase) list
+	7 - unlabeled (neither numbers nor bullets)
+-->
+
+<!-- list item -->
+<!ELEMENT li (p | tbl | fig)*>
+<!ATTLIST li
+	id CDATA #IMPLIED>
+	
+<!-- *********** Math Elements ************  -->
+	
+<!-- Inline Formula -->
+<!ELEMENT f (#PCDATA | graphic | math | entitygrp)*>
+
+<!-- Displayed Formula -->
+<!ELEMENT fd (#PCDATA | graphic | math | entitygrp)*>
+<!ATTLIST fd
+	id	 CDATA  #IMPLIED
+	sid CDATA #IMPLIED
+	align CDATA #IMPLIED>
+	
+<!-- Math Equation 
+Moved NOTATION for Tex to sdata.dtd
+
+<!NOTATION tex PUBLIC
+              "+//ISBN 0-201-13448-9::Knuth//NOTATION The TeXbook//EN">
+<!NOTATION TEX PUBLIC
+              "+//ISBN 0-201-13448-9::Knuth//NOTATION The TeXbook//EN">-->
+<!ELEMENT math (%data;)*>
+<!ATTLIST math
+	mathtype NOTATION (tex|mathml) #IMPLIED
+	id CDATA #IMPLIED>
+
+<!NOTATION mathml PUBLIC
+              "-//W3C//DTD MathML 2.0//EN">
+
+
+
+<!-- ****************** Backmatter Elements ******************** -->
+<!-- Acknowledgements -->
+<!ELEMENT ack (sec+)>
+
+<!-- Glossary  Elements -->
+<!-- Glossary -->
+<!ELEMENT glossary (st?, deflist)>
+
+<!-- Definition list -->
+<!ELEMENT deflist ((term, dd)+)>
+
+<!-- Term -->
+<!ELEMENT term (p)>
+<!ATTLIST term
+	%id-att;>
+	
+<!-- Definition -->
+<!ELEMENT dd (p+)>
+<!ATTLIST dd
+	id CDATA #IMPLIED>
+	
+<!-- Notes at End of Article: Could be "Note in Proof" -->
+<!ELEMENT notes (p*, sec*)>
+
+<!-- ***************** Appendix Elements ***************** -->
+<!-- Appendix Matter -->
+<!ELEMENT appm (st?, p*, app+)>
+
+<!-- Appendix -->
+<!ELEMENT app (st?, p*, sec*)>
+<!ATTLIST app
+	%id-att;>
+	
+<!-- Reference Group -->
+<!ELEMENT refgrp (st?, p?, bibl+)>
+
+
+<!-- Footnote -->
+<!ELEMENT fn (p+)>
+<!ATTLIST fn
+	%id-att;>
+
+
+<!-- ************ Figure and Table Elements ***************** -->
+
+<!-- Table -->
+<!ELEMENT tbl (title?, caption?, (tblbdy | graphic)*, tblfn*)>
+<!ATTLIST tbl
+	%id-att-required;>
+<!-- id of table should begin with "T" (T1, TIV, T43) -->
+
+<!-- Array - an array is tabular material displayed inline -->
+<!ELEMENT array (title?, caption?, (tblbdy | graphic)*, tblfn*) >
+<!ATTLIST array
+	%id-att;>
+
+
+
+
+
+<!-- Table Body -->
+<!ELEMENT tblbdy (r+)>
+<!ATTLIST tblbdy
+	ra CDATA #IMPLIED
+	ca (left | center | middle |  right) #IMPLIED
+	cols CDATA #REQUIRED>
+<!-- attributes: ra, row alignment; ca, cell/column alignment; cols, no of columns -->
+	
+<!-- Table rows -->
+<!ELEMENT r (c*)>
+<!ATTLIST r
+	ra (top | center | middle | bottom) #IMPLIED >
+	
+<!-- Table cells -->
+<!--
+     hr - outputs a line!!
+     cspan - number of columns to span
+     rspan - number of rows to span
+     indent - specifies an indent level (currently only supports '1')
+     ca - column alignment
+-->
+<!ELEMENT c (hr | p*)>
+<!ATTLIST c
+	cspan CDATA #IMPLIED
+	rspan CDATA #IMPLIED
+	indent CDATA #IMPLIED
+	ca (left | center | right | decimal) #IMPLIED >
+	
+<!ELEMENT hr EMPTY>
+
+<!-- line break -->
+<!ELEMENT break EMPTY> 
+
+<!-- Table Footnote -->
+<!ELEMENT tblfn (p+)>
+<!ATTLIST tblfn
+	%id-att; >
+	
+	
+<!-- Caption (figure / table) -->
+<!--
+     Descriptive title for figure / table
+     - for figures, this should be the first sentence of the caption / text.
+     (this is used when rendering thumbnails ;-)
+-->
+<!ELEMENT caption (p+)>
+
+	
+<!-- Figure -->
+<!ELEMENT fig (title?, caption?, text?, (graphic | media)*)>
+<!ATTLIST fig
+	%id-att-required; >
+<!-- id of figure should begin with "F" (F1, F2, FIII) -->
+	
+<!-- Main text / caption for the figure: this includes the first sentence
+that was pulled out for the "caption" element -->
+<!ELEMENT text (p+)>
+
+<!ELEMENT graphic EMPTY>
+<!-- NOTE: Inside a fig tag, do NOT include the extension, outside a fig 
+tag (inline image) include the extension -->
+<!ATTLIST graphic
+	file CDATA #REQUIRED
+	%id-att; 
+	hint.layout (single | double | landscape) #IMPLIED
+	hint.detail CDATA #IMPLIED >
+	
+<!ELEMENT media (graphic?)>
+<!ATTLIST media
+	file CDATA #REQUIRED
+	mime-subtype CDATA #IMPLIED >
+	
+	
+<!-- Supplementary Material -->
+<!ELEMENT suppl (anchor*, ((title?, caption?, text?, file*) | exlnk))>
+<!ATTLIST suppl
+	%id-att; >
+	
+<!ELEMENT anchor (%data;)*>
+<!ATTLIST anchor
+	type (fig | tab | toc) #REQUIRED
+	id CDATA #IMPLIED
+	part CDATA #IMPLIED >	
+<!-- ID for the supplementary material, ie. SD1, SD2, SD3 -->
+
+<!ELEMENT file (sec | p)*>
+<!ATTLIST file
+	name CDATA #IMPLIED
+	filetype CDATA #IMPLIED
+ >
+
+<!--
+	filetype NOTATION (asf | au | avi | csv | cyto | exe | fas | gz | hqz | hlp | idl | bmp | dat | doc | gif | html | 
+	jar | jpeg | jpg | map | mdb | midi | mm | mov | mp3 | mpeg | p01 | pdf | pl | pop | ppt | ps | qtx | raw | ra | rm | rtf | 
+	rv | sgml | swf | tar | tex | tiff | txt | wav | wpd | xls | zip |
+	
+ASF | AU | AVI | CSV | CYTO | EXE | FAS | GZ | HQZ | HLP | IDL | BMP | DAT | DOC | GIF | HTML | 
+	JAR | JPEG | JPG | MAP | MDB | MIDI | MM | MOV | MP3 | MPEG | P01 | PDF | PL | POP | PPT | PS | QTX | RAW | RA | RM | RTF | 
+	RV | SGML | SWF | TAR | TEX | TIFF | TXT | WAV | WPD | XLS | ZIP 	
+	
+	) #IMPLIED
+-->
+	
+<!-- External links -->
+<!ELEMENT exlnk (%data;)*>
+<!-- ext. link (Medline, Genbank) -->
+<!ATTLIST exlnk
+	access CDATA #IMPLIED
+	type (article | company | doi | ec |emblalign| ftp | gen | genpept | geo | highwire | 
+	medline | pdb | pgr | pir | pmc | url | pirdb | sprot) #REQUIRED 
+	linktype CDATA #IMPLIED 
+	vol CDATA #IMPLIED
+	pg CDATA #IMPLIED 
+	valid_as_of  CDATA  #IMPLIED	>
+	
+<!-- linktype attributes 
+	correction		used in an article to link to its associated correction
+	retraction		used in an article to link to its associated retraction
+	commentary		used in an article to link to its associated commentary	
+	companion		used in an article to link to a companion (sibling) article
+	corrected.art	used in a correction to link to the article being corrected
+	retracted.art	used in a retraction to link to the article being retracted
+	comp2comment	used in a commentary to link to the article being commented upon
+	articleref		used to link to an article mentioned in text. no back link
+	-->
+	
+	
+<!-- EXLNK TYPES
+	article       link to a related article
+	company   supplier link
+	ec             enzyme nomenclature - see http://www.chem.qmw.ac.uk/iubmb/enzyme/
+	ftp            file transfer protocol
+	gen           GenBank
+	genpept
+	geo			Gene Expression Omnibus
+	highwire   highwire press intrajournal link
+	medline	    medline or pubmed id
+	pdb          protein data bank - see http://www.rcsb.org/pdb/
+	pgr		plant gene register  - see http://www.tarweed.com/pgr/
+	pir		Protein Information Resource - see http://pir.georgetown.edu
+	pirdb		Protein Information Resource - see http://pir.georgetown.edu
+	pmc		used to link between articles in PubMedCentral access is PMID
+	sprot		Swiss-Prot - see http://www.ebi.ac.uk/swissprot/
+	url	            website
+	
+-->
+
+
+
+<!-- ***************** Intra-article References ************************-->
+
+<!-- Figure Reference -->
+<!-- Text contained between open and close tags is linked -->
+<!-- Specify the number of the figure
+     ie. Fig <figr fid="F1">1</figr>
+     NOT Fig <figr fid="F1"> -->
+<!ELEMENT figr (#PCDATA | it | b | sup| sub)*>
+<!ATTLIST figr
+		%figr-atts;>
+
+<!-- Table reference -->
+<!-- Text contained between open and close tags is linked -->
+<!-- Specify the number of the table
+     ie. Table <tblr tid="T1">1</tblr>
+     NOT Table <tblr tid="T1"> -->
+<!ELEMENT tblr (#PCDATA | it )*>
+<!ATTLIST tblr
+		%tblr-atts;>
+
+<!-- Array reference -->
+<!-- Text contained between open and close tags is linked -->
+<!-- Specify the number of the table
+     ie. Table <tblr tid="T1">1</tblr>
+     NOT Table <tblr tid="T1"> -->
+<!ELEMENT arrayr (#PCDATA | it )*>
+<!ATTLIST arrayr
+		%tblr-atts;>
+
+<!-- Bibliographic reference -->
+<!-- Text contained between open and close tags is linked -->
+<!-- Always explicitly put links for each reference, do NOT use ranges
+     ie. [<abbr bid="B1">1</abbr>,<abbr bid="B2">2</abbr>,<abbr bid="B3">3</abbr>]
+     NOT [<abbr bid="B1">1</abbr>-<abbr bid="B3">3</abbr>] -->
+<!ELEMENT abbr (#PCDATA | sup | sub | it | sc | b | entitygrp | ul)*>
+<!ATTLIST abbr
+		%abbr-atts;>
+
+<!-- Author Note Reference -->
+<!-- Text contained between open and close tags is linked -->
+<!ELEMENT anr (#PCDATA)>
+<!ATTLIST anr
+		%rid-atts; >
+	
+<!-- For rid, use "FN" followed by a value
+from the FOOTNOTE/AFFILIATION SYMBOLS table below -->
+
+<!-- Instition Reference (author affiliation) -->
+<!ELEMENT insr (#PCDATA)>
+<!ATTLIST insr
+		%insr-atts; >
+
+<!-- iid is used to identify author/institution affiliations
+For institution references, use "O" (cap oh) followed by a value
+from the FOOTNOTE/AFFILIATION SYMBOLS table below -->
+
+<!-- Footnote Reference -->
+<!-- Text contained between open and close tags is linked -->
+<!ELEMENT fnr (#PCDATA)>
+<!ATTLIST fnr
+		%fnr-atts; >
+
+<!-- For rid, use "FN" followed by a value
+from the FOOTNOTE/AFFILIATION SYMBOLS table below -->
+
+<!-- Displayed Formula Reference -->
+<!-- Text contained between open and close tags is linked -->
+<!ELEMENT fdr (#PCDATA)>
+<!ATTLIST fdr
+	 rid     CDATA       #REQUIRED
+	 to      CDATA       #IMPLIED>
+
+
+<!-- Supplementary Material Reference -->
+<!-- Text contained between open and close tags is linked -->
+<!ELEMENT supplr (#PCDATA | it)*>
+<!ATTLIST supplr
+		%supplr-atts; >
+
+<!-- Table Footnote Reference -->
+<!ELEMENT tblfnr (#PCDATA)>
+<!ATTLIST tblfnr
+		%rid-atts; >
+	
+<!-- Section Reference -->
+<!ELEMENT secr (#PCDATA)>
+<!ATTLIST secr
+		%secr-atts;>
+	
+<!-- Appendix Reference -->
+<!ELEMENT appr (%data;)*>
+<!ATTLIST appr
+		%rid-atts; >
+
+
+
+
+<!-- VALUES OF FOOTNOTE/AFFILIATION SYMBOLS
+
+a-z (ie FNa)  - superior letter
+1-9 (ie FN1)  - superior number
+
+Following is a list of symbols. The ID should be the prefix
+followed by "x" and a two-digit number (FNx01).
+x01 - asterisk
+x02 - single dagger
+x03 - double dagger
+x04 - section mark
+x05 - pgraph mark
+x06 - double vertical (parallel)
+x07 - superior #
+x08 - two asterisks
+x09 - two single daggers
+x10 - two section marks
+x11 - two pgraph marks
+x12 - two double daggers
+x14 - two double vertical (parallel)
+x15 - superior @
+x16 - double pound sign
+x17 - double @ sign
+x18 - three asterisks
+x19 - three plus signs
+x20 - three double daggers
+x21 - three section marks
+x22 - three double vertical
+x23 - three pgraph marks
+x24 - three pound signs
+x25 - three @ signs
+x26 - three single daggers
+-->
+
+
+
+<!-- ============================================================= -->
+<!--                    MATH: MATHML MODULES                       -->
+<!-- ============================================================= -->
+
+
+<!--                    MATHML SETUP MODULE                        -->
+<!--                    Called from the DTD to include the MathML
+                        elements in the tag set.                   -->
+<!ENTITY % mathmlsetup.ent 
+                        PUBLIC
+"-//NLM//DTD Archiving and Interchange DTD Suite MathML Setup Module v1.1 20031101//EN"
+"pmc1-mathml/mathmlsetup.ent"                                             >
+
+<!--                    MATHML SETUP MODULE                        -->
+<!--                    Invoke the MathML modules                  -->
+%mathmlsetup.ent;                                            
+
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_020114.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_020114.dtd
new file mode 100644
index 0000000..1538918
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_020114.dtd
@@ -0,0 +1,61 @@
+<!--    
+	   This is the Current DTD which NLM has written for 
+        External  Use.  If you are a NCBI User, use the information
+        from the PubMedArticle Set.
+
+        Comments and suggestions are welcome.
+        (May 9, 2000)
+
+        
+
+
+       -->
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the MEDLINECITATION DTD is located  -->
+<!ENTITY % Medline PUBLIC "-//NLM//DTD Medline, 01 Nov 2001//EN"
+      "nlmmedline_011101.dtd">
+%Medline;
+<!-- ================================================================= -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % Pub.Date.Ref "PubDate?">
+<!ENTITY % iso.language.codes "(AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                                   HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                                   PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                                   VI|ZH)">
+<!ENTITY % pub.status.int "pmc | pmcr | pubmed | pubmedr | 
+                             premedline | medline | medliner">
+<!ENTITY % pub.status "(received | accepted | epublish | 
+                              ppublish | revised | aheadofprint | 
+                              retracted | %pub.status.int;)">
+<!ENTITY % art.id.type.int "pubmed | medline | pmcid">
+<!ENTITY % art.id.type "(doi | pii | pmcpid | pmpid | 
+                              sici | %art.id.type.int;)">
+<!-- ================================================================= -->
+<!ELEMENT PubmedArticleSet (PubMedArticle|PubmedArticle)+>
+<!-- ================================================================= -->
+<!-- This is the top level element for PubMedArticle -->
+<!ELEMENT PubMedArticle ((NCBIArticle | MedlineCitation), PubmedData?)>
+<!ELEMENT PubmedArticle ((NCBIArticle | MedlineCitation), PubmedData?)>
+<!-- ================================================================= -->
+<!ELEMENT PubMedData (History*, PublicationStatus, ProviderId?, ArticleIdList, URL*)>
+<!ELEMENT PubmedData (History*, PublicationStatus, ProviderId?, ArticleIdList, URL*)>
+<!ELEMENT History (PubMedPubDate+)>
+<!ELEMENT PubMedPubDate (%normal.date;)>
+<!ATTLIST PubMedPubDate
+	PubStatus %pub.status; #REQUIRED
+>
+<!ELEMENT PublicationStatus (#PCDATA)>
+<!ELEMENT ProviderId (#PCDATA)>
+<!ELEMENT ArticleIdList (ArticleId+)>
+<!ELEMENT ArticleId (#PCDATA)>
+<!ATTLIST ArticleId
+	IdType %art.id.type; "pubmed"
+>
+<!ELEMENT URL (#PCDATA)>
+<!ATTLIST URL
+	lang %iso.language.codes; #IMPLIED
+	Type ( FullText | Summary | fulltext | summary) #IMPLIED
+>
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_080101.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_080101.dtd
new file mode 100644
index 0000000..11d6184
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_080101.dtd
@@ -0,0 +1,71 @@
+<!--    
+	   This is the Current DTD which NLM has written for 
+        External  Use.  If you are a NCBI User, use the information
+        from the PubmedArticleSet.
+
+        Comments and suggestions are welcome.
+        (May 9, 2000)
+	
+        Corrections:
+        ~~~~~~~~~~~
+        Oct. 09 2002 
+        - "PubMedArticle" has been renamed to "PubmedArticle"
+        - All referencies to "PubMedArticle" has been removed
+        - "ProviderId" has been removed from PubmedData
+        - "URL" has been removed from PubmdeData
+		
+		$Id: pubmed_080101.dtd 143882 2008-10-23 15:38:53Z korobtch $
+        
+       -->
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the MEDLINECITATION DTD is located  -->
+<!ENTITY % Medline PUBLIC "-//NLM//DTD Medline, 01 Jan 2008//EN"
+      "nlmmedline_080101.dtd">
+%Medline;
+<!-- ================================================================= -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % Pub.Date.Ref "PubDate?">
+<!ENTITY % iso.language.codes "(AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                                   HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                                   PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                                   VI|ZH)">
+<!ENTITY % pub.status.int "pmc | pmcr | pubmed | pubmedr | 
+                             premedline | medline | medliner | entrez">
+<!ENTITY % pub.status "(received | accepted | epublish | 
+                              ppublish | revised | aheadofprint | 
+                              retracted | %pub.status.int;)">
+<!ENTITY % art.id.type.int "pubmed | medline | pmcid">
+<!ENTITY % art.id.type "(doi | pii | pmcpid | pmpid | pmc | mid |
+                              sici | %art.id.type.int;)">
+<!-- ================================================================= -->
+<!ELEMENT PubmedArticleSet (PubmedArticle)+>
+<!-- ================================================================= -->
+<!-- This is the top level element for PubMedArticle -->
+<!ELEMENT PubmedArticle ((NCBIArticle | MedlineCitation), PubmedData?)>
+<!-- ================================================================= -->
+<!ELEMENT PubmedData (History*, PublicationStatus, ArticleIdList, ObjectList?)>
+<!ELEMENT History (PubMedPubDate+)>
+<!ELEMENT PubMedPubDate (%normal.date;)>
+<!ATTLIST PubMedPubDate
+	PubStatus %pub.status; #REQUIRED
+>
+<!ELEMENT PublicationStatus (#PCDATA)>
+<!ELEMENT ArticleIdList (ArticleId+)>
+<!ELEMENT ArticleId (#PCDATA)>
+<!ATTLIST ArticleId
+	IdType %art.id.type; "pubmed"
+>
+<!ELEMENT URL (#PCDATA)>
+<!ATTLIST URL
+	lang %iso.language.codes; #IMPLIED
+	Type ( FullText | Summary | fulltext | summary) #IMPLIED
+>
+<!ELEMENT ObjectList (Object)+>
+<!ELEMENT Object (Param)*>
+<!ATTLIST Object Type CDATA #REQUIRED>
+<!ELEMENT Param (#PCDATA)>
+<!ATTLIST Param Name CDATA #REQUIRED>
+
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_090101.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_090101.dtd
new file mode 100644
index 0000000..ea1ea8f
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_090101.dtd
@@ -0,0 +1,71 @@
+<!--    
+	   This is the Current DTD which NLM has written for 
+        External  Use.  If you are a NCBI User, use the information
+        from the PubmedArticleSet.
+
+        Comments and suggestions are welcome.
+        (May 9, 2000)
+	
+        Corrections:
+        ~~~~~~~~~~~
+        Oct. 09 2002 
+        - "PubMedArticle" has been renamed to "PubmedArticle"
+        - All referencies to "PubMedArticle" has been removed
+        - "ProviderId" has been removed from PubmedData
+        - "URL" has been removed from PubmdeData
+		
+		$Id: pubmed_090101.dtd,v 1.1 2008-12-13 18:18:29 peterc Exp $
+        
+       -->
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the MEDLINECITATION DTD is located  -->
+<!ENTITY % Medline PUBLIC "-//NLM//DTD Medline, 01 Jan 2009//EN"
+      "nlmmedline_090101.dtd">
+%Medline;
+<!-- ================================================================= -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % Pub.Date.Ref "PubDate?">
+<!ENTITY % iso.language.codes "(AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                                   HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                                   PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                                   VI|ZH)">
+<!ENTITY % pub.status.int "pmc | pmcr | pubmed | pubmedr | 
+                             premedline | medline | medliner | entrez | pmc-release">
+<!ENTITY % pub.status "(received | accepted | epublish | 
+                              ppublish | revised | aheadofprint | 
+                              retracted | %pub.status.int;)">
+<!ENTITY % art.id.type.int "pubmed | medline | pmcid">
+<!ENTITY % art.id.type "(doi | pii | pmcpid | pmpid | pmc | mid |
+                              sici | %art.id.type.int;)">
+<!-- ================================================================= -->
+<!ELEMENT PubmedArticleSet (PubmedArticle)+>
+<!-- ================================================================= -->
+<!-- This is the top level element for PubMedArticle -->
+<!ELEMENT PubmedArticle ((NCBIArticle | MedlineCitation), PubmedData?)>
+<!-- ================================================================= -->
+<!ELEMENT PubmedData (History*, PublicationStatus, ArticleIdList, ObjectList?)>
+<!ELEMENT History (PubMedPubDate+)>
+<!ELEMENT PubMedPubDate (%normal.date;)>
+<!ATTLIST PubMedPubDate
+	PubStatus %pub.status; #REQUIRED
+>
+<!ELEMENT PublicationStatus (#PCDATA)>
+<!ELEMENT ArticleIdList (ArticleId+)>
+<!ELEMENT ArticleId (#PCDATA)>
+<!ATTLIST ArticleId
+	IdType %art.id.type; "pubmed"
+>
+<!ELEMENT URL (#PCDATA)>
+<!ATTLIST URL
+	lang %iso.language.codes; #IMPLIED
+	Type ( FullText | Summary | fulltext | summary) #IMPLIED
+>
+<!ELEMENT ObjectList (Object)+>
+<!ELEMENT Object (Param)*>
+<!ATTLIST Object Type CDATA #REQUIRED>
+<!ELEMENT Param (#PCDATA)>
+<!ATTLIST Param Name CDATA #REQUIRED>
+
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_100101.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_100101.dtd
new file mode 100644
index 0000000..62b71f8
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_100101.dtd
@@ -0,0 +1,72 @@
+<!--    
+	   This is the Current DTD which NLM has written for 
+        External  Use.  If you are a NCBI User, use the information
+        from the PubmedArticleSet.
+
+        Comments and suggestions are welcome.
+        (May 9, 2000)
+	
+        Corrections:
+        ~~~~~~~~~~~
+        Oct. 09 2002 
+        - "PubMedArticle" has been renamed to "PubmedArticle"
+        - All referencies to "PubMedArticle" has been removed
+        - "ProviderId" has been removed from PubmedData
+        - "URL" has been removed from PubmdeData
+		
+		$Id: pubmed_100101.dtd 176747 2009-11-20 16:10:39Z korobtch $
+        
+       -->
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the MEDLINECITATION DTD is located  -->
+<!ENTITY % Medline PUBLIC "-//NLM//DTD Medline, 01 Jan 2010//EN"
+      "nlmmedlinecitationset_100101.dtd">
+%Medline;
+<!-- ================================================================= -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % Pub.Date.Ref "PubDate?">
+<!ENTITY % iso.language.codes "(AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                                   HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                                   PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                                   VI|ZH)">
+<!ENTITY % pub.status.int "pmc | pmcr | pubmed | pubmedr | 
+                             premedline | medline | medliner | entrez | pmc-release">
+<!ENTITY % pub.status "(received | accepted | epublish | 
+                              ppublish | revised | aheadofprint | 
+                              retracted | %pub.status.int;)">
+<!ENTITY % art.id.type.int "pubmed | medline | pmcid">
+<!ENTITY % art.id.type "(doi | pii | pmcpid | pmpid | pmc | mid |
+                              sici | %art.id.type.int;)">
+<!-- ================================================================= -->
+<!ELEMENT PubmedArticleSet (PubmedArticle)+>
+<!-- ================================================================= -->
+<!-- This is the top level element for PubMedArticle -->
+<!ELEMENT PubmedArticle (MedlineCitation, PubmedData?)>
+<!-- ================================================================= -->
+<!ENTITY % normal.date "Year, Month, Day, (Hour, (Minute, Second?)?)?">
+<!ELEMENT PubmedData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+<!ELEMENT PubMedPubDate (%normal.date;)>
+<!ATTLIST PubMedPubDate
+	PubStatus %pub.status; #REQUIRED
+>
+<!ELEMENT PublicationStatus (#PCDATA)>
+<!ELEMENT ArticleIdList (ArticleId+)>
+<!ELEMENT ArticleId (#PCDATA)>
+<!ATTLIST ArticleId
+	IdType %art.id.type; "pubmed"
+>
+<!ELEMENT History (PubMedPubDate+)>
+<!ELEMENT URL (#PCDATA)>
+<!ATTLIST URL
+	lang %iso.language.codes; #IMPLIED
+	Type ( FullText | Summary | fulltext | summary) #IMPLIED
+>
+<!ELEMENT ObjectList (Object)+>
+<!ELEMENT Object (Param)*>
+<!ATTLIST Object Type CDATA #REQUIRED>
+<!ELEMENT Param (#PCDATA)>
+<!ATTLIST Param Name CDATA #REQUIRED>
+
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_100301.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_100301.dtd
new file mode 100644
index 0000000..adc5272
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_100301.dtd
@@ -0,0 +1,79 @@
+<!--    
+	   This is the Current DTD which NLM has written for 
+        External  Use.  If you are a NCBI User, use the information
+        from the PubmedArticleSet.
+
+        Comments and suggestions are welcome.
+        (May 9, 2000)
+	
+        Corrections:
+        ~~~~~~~~~~~
+        Oct. 09 2002 
+        - "PubMedArticle" has been renamed to "PubmedArticle"
+        - All referencies to "PubMedArticle" has been removed
+        - "ProviderId" has been removed from PubmedData
+        - "URL" has been removed from PubmdeData
+		
+		$Id: pubmed_100301.dtd 185906 2010-03-16 14:08:30Z korobtch $
+        
+       -->
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the MEDLINECITATION DTD is located  -->
+<!ENTITY % Medline PUBLIC "-//NLM//DTD Medline, 01 Mar 2010//EN"
+      "nlmmedlinecitationset_100301.dtd">
+%Medline;
+<!-- ================================================================= -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % Pub.Date.Ref "PubDate?">
+<!ENTITY % iso.language.codes "(AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                                   HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                                   PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                                   VI|ZH)">
+<!ENTITY % pub.status.int "pmc | pmcr | pubmed | pubmedr | 
+                             premedline | medline | medliner | entrez | pmc-release">
+<!ENTITY % pub.status "(received | accepted | epublish | 
+                              ppublish | revised | aheadofprint | 
+                              retracted | %pub.status.int;)">
+<!ENTITY % art.id.type.int "pubmed | medline | pmcid | pmcbook | bookaccession">
+							
+<!ENTITY % art.id.type "(doi | pii | pmcpid | pmpid | pmc | mid |
+                              sici | %art.id.type.int;)">
+<!-- ================================================================= -->
+<!ELEMENT PubmedArticleSet (PubmedArticle | PubmedBookArticle)+>
+<!-- ================================================================= -->
+<!-- This is the top level element for PubMedArticle -->
+<!ELEMENT PubmedArticle (MedlineCitation, PubmedData?)>
+<!-- ================================================================= -->
+<!ENTITY % normal.date "Year, Month, Day, (Hour, (Minute, Second?)?)?">
+<!ELEMENT PubmedData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+<!ELEMENT PubMedPubDate (%normal.date;)>
+<!ATTLIST PubMedPubDate
+	PubStatus %pub.status; #REQUIRED
+>
+<!ELEMENT PublicationStatus (#PCDATA)>
+<!ELEMENT ArticleIdList (ArticleId+)>
+<!ELEMENT ArticleId (#PCDATA)>
+<!ATTLIST ArticleId
+	IdType %art.id.type; "pubmed"
+>
+<!ELEMENT History (PubMedPubDate+)>
+<!ELEMENT URL (#PCDATA)>
+<!ATTLIST URL
+	lang %iso.language.codes; #IMPLIED
+	Type ( FullText | Summary | fulltext | summary) #IMPLIED
+>
+<!ELEMENT ObjectList (Object)+>
+<!ELEMENT Object (Param)*>
+<!ATTLIST Object Type CDATA #REQUIRED>
+<!ELEMENT Param (#PCDATA)>
+<!ATTLIST Param Name CDATA #REQUIRED>
+<!ELEMENT Hour (#PCDATA)>
+<!ELEMENT Minute (#PCDATA)>
+<!ELEMENT Second (#PCDATA)>
+
+<!-- ================================================================= -->
+<!ENTITY % Bookdoc PUBLIC "-//NLM//DTD Bookdoc, 01 Mar 2010//EN" "bookdoc_100301.dtd">
+%Bookdoc;
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_110101.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_110101.dtd
new file mode 100644
index 0000000..6298eb6
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_110101.dtd
@@ -0,0 +1,79 @@
+<!--    
+	   This is the Current DTD which NLM has written for 
+        External  Use.  If you are a NCBI User, use the information
+        from the PubmedArticleSet.
+
+        Comments and suggestions are welcome.
+        (May 9, 2000)
+	
+        Corrections:
+        ~~~~~~~~~~~
+        Oct. 09 2002 
+        - "PubMedArticle" has been renamed to "PubmedArticle"
+        - All referencies to "PubMedArticle" has been removed
+        - "ProviderId" has been removed from PubmedData
+        - "URL" has been removed from PubmdeData
+		
+		$Id: pubmed_110101.dtd 204806 2010-09-10 17:50:53Z korobtch $
+        
+       -->
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the MEDLINECITATION DTD is located  -->
+<!ENTITY % Medline PUBLIC "-//NLM//DTD Medline, 01 Jan 2011//EN"
+      "nlmmedlinecitationset_110101.dtd">
+%Medline;
+<!-- ================================================================= -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % Pub.Date.Ref "PubDate?">
+<!ENTITY % iso.language.codes "(AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                                   HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                                   PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                                   VI|ZH)">
+<!ENTITY % pub.status.int "pmc | pmcr | pubmed | pubmedr | 
+                             premedline | medline | medliner | entrez | pmc-release">
+<!ENTITY % pub.status "(received | accepted | epublish | 
+                              ppublish | revised | aheadofprint | 
+                              retracted | %pub.status.int;)">
+<!ENTITY % art.id.type.int "pubmed | medline | pmcid | pmcbook | bookaccession">
+							
+<!ENTITY % art.id.type "(doi | pii | pmcpid | pmpid | pmc | mid |
+                              sici | %art.id.type.int;)">
+<!-- ================================================================= -->
+<!ELEMENT PubmedArticleSet (PubmedArticle | PubmedBookArticle)+>
+<!-- ================================================================= -->
+<!-- This is the top level element for PubMedArticle -->
+<!ELEMENT PubmedArticle (MedlineCitation, PubmedData?)>
+<!-- ================================================================= -->
+<!ENTITY % normal.date "Year, Month, Day, (Hour, (Minute, Second?)?)?">
+<!ELEMENT PubmedData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+<!ELEMENT PubMedPubDate (%normal.date;)>
+<!ATTLIST PubMedPubDate
+	PubStatus %pub.status; #REQUIRED
+>
+<!ELEMENT PublicationStatus (#PCDATA)>
+<!ELEMENT ArticleIdList (ArticleId+)>
+<!ELEMENT ArticleId (#PCDATA)>
+<!ATTLIST ArticleId
+	IdType %art.id.type; "pubmed"
+>
+<!ELEMENT History (PubMedPubDate+)>
+<!ELEMENT URL (#PCDATA)>
+<!ATTLIST URL
+	lang %iso.language.codes; #IMPLIED
+	Type ( FullText | Summary | fulltext | summary) #IMPLIED
+>
+<!ELEMENT ObjectList (Object)+>
+<!ELEMENT Object (Param)*>
+<!ATTLIST Object Type CDATA #REQUIRED>
+<!ELEMENT Param (#PCDATA)>
+<!ATTLIST Param Name CDATA #REQUIRED>
+<!ELEMENT Hour (#PCDATA)>
+<!ELEMENT Minute (#PCDATA)>
+<!ELEMENT Second (#PCDATA)>
+
+<!-- ================================================================= -->
+<!ENTITY % Bookdoc PUBLIC "-//NLM//DTD Bookdoc, 01 Jan 2011//EN" "bookdoc_110101.dtd">
+%Bookdoc;
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_120101.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_120101.dtd
new file mode 100644
index 0000000..dacafb8
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_120101.dtd
@@ -0,0 +1,79 @@
+<!--    
+	   This is the Current DTD which NLM has written for 
+        External  Use.  If you are a NCBI User, use the information
+        from the PubmedArticleSet.
+
+        Comments and suggestions are welcome.
+        (May 9, 2000)
+	
+        Corrections:
+        ~~~~~~~~~~~
+        Oct. 09 2002 
+        - "PubMedArticle" has been renamed to "PubmedArticle"
+        - All referencies to "PubMedArticle" has been removed
+        - "ProviderId" has been removed from PubmedData
+        - "URL" has been removed from PubmdeData
+		
+		$Id: pubmed_120101.dtd 340769 2011-10-12 20:01:45Z korobtch $
+        
+       -->
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the MEDLINECITATION DTD is located  -->
+<!ENTITY % Medline PUBLIC "-//NLM//DTD Medline, 01 Jan 2012//EN"
+      "nlmmedlinecitationset_120101.dtd">
+%Medline;
+<!-- ================================================================= -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % Pub.Date.Ref "PubDate?">
+<!ENTITY % iso.language.codes "(AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                                   HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                                   PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                                   VI|ZH)">
+<!ENTITY % pub.status.int "pmc | pmcr | pubmed | pubmedr | 
+                             premedline | medline | medliner | entrez | pmc-release">
+<!ENTITY % pub.status "(received | accepted | epublish | 
+                              ppublish | revised | aheadofprint | 
+                              retracted | %pub.status.int;)">
+<!ENTITY % art.id.type.int "pubmed | medline | pmcid | pmcbook | bookaccession">
+							
+<!ENTITY % art.id.type "(doi | pii | pmcpid | pmpid | pmc | mid |
+                              sici | %art.id.type.int;)">
+<!-- ================================================================= -->
+<!ELEMENT PubmedArticleSet (PubmedArticle | PubmedBookArticle)+>
+<!-- ================================================================= -->
+<!-- This is the top level element for PubMedArticle -->
+<!ELEMENT PubmedArticle (MedlineCitation, PubmedData?)>
+<!-- ================================================================= -->
+<!ENTITY % normal.date "Year, Month, Day, (Hour, (Minute, Second?)?)?">
+<!ELEMENT PubmedData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+<!ELEMENT PubMedPubDate (%normal.date;)>
+<!ATTLIST PubMedPubDate
+	PubStatus %pub.status; #REQUIRED
+>
+<!ELEMENT PublicationStatus (#PCDATA)>
+<!ELEMENT ArticleIdList (ArticleId+)>
+<!ELEMENT ArticleId (#PCDATA)>
+<!ATTLIST ArticleId
+	IdType %art.id.type; "pubmed"
+>
+<!ELEMENT History (PubMedPubDate+)>
+<!ELEMENT URL (#PCDATA)>
+<!ATTLIST URL
+	lang %iso.language.codes; #IMPLIED
+	Type ( FullText | Summary | fulltext | summary) #IMPLIED
+>
+<!ELEMENT ObjectList (Object)+>
+<!ELEMENT Object (Param)*>
+<!ATTLIST Object Type CDATA #REQUIRED>
+<!ELEMENT Param (#PCDATA)>
+<!ATTLIST Param Name CDATA #REQUIRED>
+<!ELEMENT Hour (#PCDATA)>
+<!ELEMENT Minute (#PCDATA)>
+<!ELEMENT Second (#PCDATA)>
+
+<!-- ================================================================= -->
+<!ENTITY % Bookdoc PUBLIC "-//NLM//DTD Bookdoc, 01 Jan 2012//EN" "bookdoc_120101.dtd">
+%Bookdoc;
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_130101.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_130101.dtd
new file mode 100644
index 0000000..82bd9c6
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_130101.dtd
@@ -0,0 +1,79 @@
+<!--    
+	   This is the Current DTD which NLM has written for 
+        External  Use.  If you are a NCBI User, use the information
+        from the PubmedArticleSet.
+
+        Comments and suggestions are welcome.
+        (May 9, 2000)
+	
+        Corrections:
+        ~~~~~~~~~~~
+        Oct. 09 2002 
+        - "PubMedArticle" has been renamed to "PubmedArticle"
+        - All referencies to "PubMedArticle" has been removed
+        - "ProviderId" has been removed from PubmedData
+        - "URL" has been removed from PubmdeData
+		
+		$Id: pubmed_130101.dtd 375370 2012-09-19 16:51:36Z korobtch $
+        
+       -->
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the MEDLINECITATION DTD is located  -->
+<!ENTITY % Medline PUBLIC "-//NLM//DTD Medline, 01 Jan 2013//EN"
+      "nlmmedlinecitationset_130101.dtd">
+%Medline;
+<!-- ================================================================= -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % Pub.Date.Ref "PubDate?">
+<!ENTITY % iso.language.codes "(AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                                   HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                                   PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                                   VI|ZH)">
+<!ENTITY % pub.status.int "pmc | pmcr | pubmed | pubmedr | 
+                             premedline | medline | medliner | entrez | pmc-release">
+<!ENTITY % pub.status "(received | accepted | epublish | 
+                              ppublish | revised | aheadofprint | 
+                              retracted | %pub.status.int;)">
+<!ENTITY % art.id.type.int "pubmed | medline | pmcid | pmcbook | bookaccession">
+							
+<!ENTITY % art.id.type "(doi | pii | pmcpid | pmpid | pmc | mid |
+                              sici | %art.id.type.int;)">
+<!-- ================================================================= -->
+<!ELEMENT PubmedArticleSet (PubmedArticle | PubmedBookArticle)+>
+<!-- ================================================================= -->
+<!-- This is the top level element for PubMedArticle -->
+<!ELEMENT PubmedArticle (MedlineCitation, PubmedData?)>
+<!-- ================================================================= -->
+<!ENTITY % normal.date "Year, Month, Day, (Hour, (Minute, Second?)?)?">
+<!ELEMENT PubmedData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+<!ELEMENT PubMedPubDate (%normal.date;)>
+<!ATTLIST PubMedPubDate
+	PubStatus %pub.status; #REQUIRED
+>
+<!ELEMENT PublicationStatus (#PCDATA)>
+<!ELEMENT ArticleIdList (ArticleId+)>
+<!ELEMENT ArticleId (#PCDATA)>
+<!ATTLIST ArticleId
+	IdType %art.id.type; "pubmed"
+>
+<!ELEMENT History (PubMedPubDate+)>
+<!ELEMENT URL (#PCDATA)>
+<!ATTLIST URL
+	lang %iso.language.codes; #IMPLIED
+	Type ( FullText | Summary | fulltext | summary) #IMPLIED
+>
+<!ELEMENT ObjectList (Object)+>
+<!ELEMENT Object (Param)*>
+<!ATTLIST Object Type CDATA #REQUIRED>
+<!ELEMENT Param (#PCDATA)>
+<!ATTLIST Param Name CDATA #REQUIRED>
+<!ELEMENT Hour (#PCDATA)>
+<!ELEMENT Minute (#PCDATA)>
+<!ELEMENT Second (#PCDATA)>
+
+<!-- ================================================================= -->
+<!ENTITY % Bookdoc PUBLIC "-//NLM//DTD Bookdoc, 01 Jan 2013//EN" "bookdoc_130101.dtd">
+%Bookdoc;
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_130501.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_130501.dtd
new file mode 100644
index 0000000..a3c640c
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_130501.dtd
@@ -0,0 +1,79 @@
+<!--    
+	   This is the Current DTD which NLM has written for 
+        External  Use.  If you are a NCBI User, use the information
+        from the PubmedArticleSet.
+
+        Comments and suggestions are welcome.
+        (May 9, 2000)
+	
+        Corrections:
+        ~~~~~~~~~~~
+        Oct. 09 2002 
+        - "PubMedArticle" has been renamed to "PubmedArticle"
+        - All referencies to "PubMedArticle" has been removed
+        - "ProviderId" has been removed from PubmedData
+        - "URL" has been removed from PubmdeData
+		
+		$Id: pubmed_130501.dtd 404126 2013-06-20 17:36:43Z korobtch $
+        
+       -->
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the MEDLINECITATION DTD is located  -->
+<!ENTITY % Medline PUBLIC "-//NLM//DTD Medline, 01 May 2013//EN"
+      "nlmmedlinecitationset_130501.dtd">
+%Medline;
+<!-- ================================================================= -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % Pub.Date.Ref "PubDate?">
+<!ENTITY % iso.language.codes "(AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                                   HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                                   PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                                   VI|ZH)">
+<!ENTITY % pub.status.int "pmc | pmcr | pubmed | pubmedr | 
+                             premedline | medline | medliner | entrez | pmc-release">
+<!ENTITY % pub.status "(received | accepted | epublish | 
+                              ppublish | revised | aheadofprint | 
+                              retracted | ecollection | %pub.status.int;)">
+<!ENTITY % art.id.type.int "pubmed | medline | pmcid | pmcbook | bookaccession">
+							
+<!ENTITY % art.id.type "(doi | pii | pmcpid | pmpid | pmc | mid |
+                              sici | %art.id.type.int;)">
+<!-- ================================================================= -->
+<!ELEMENT PubmedArticleSet (PubmedArticle | PubmedBookArticle)+>
+<!-- ================================================================= -->
+<!-- This is the top level element for PubMedArticle -->
+<!ELEMENT PubmedArticle (MedlineCitation, PubmedData?)>
+<!-- ================================================================= -->
+<!ENTITY % normal.date "Year, Month, Day, (Hour, (Minute, Second?)?)?">
+<!ELEMENT PubmedData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+<!ELEMENT PubMedPubDate (%normal.date;)>
+<!ATTLIST PubMedPubDate
+	PubStatus %pub.status; #REQUIRED
+>
+<!ELEMENT PublicationStatus (#PCDATA)>
+<!ELEMENT ArticleIdList (ArticleId+)>
+<!ELEMENT ArticleId (#PCDATA)>
+<!ATTLIST ArticleId
+	IdType %art.id.type; "pubmed"
+>
+<!ELEMENT History (PubMedPubDate+)>
+<!ELEMENT URL (#PCDATA)>
+<!ATTLIST URL
+	lang %iso.language.codes; #IMPLIED
+	Type ( FullText | Summary | fulltext | summary) #IMPLIED
+>
+<!ELEMENT ObjectList (Object)+>
+<!ELEMENT Object (Param)*>
+<!ATTLIST Object Type CDATA #REQUIRED>
+<!ELEMENT Param %text;>
+<!ATTLIST Param Name CDATA #REQUIRED>
+<!ELEMENT Hour (#PCDATA)>
+<!ELEMENT Minute (#PCDATA)>
+<!ELEMENT Second (#PCDATA)>
+
+<!-- ================================================================= -->
+<!ENTITY % Bookdoc PUBLIC "-//NLM//DTD Bookdoc, 01 Jan 2013//EN" "bookdoc_130101.dtd">
+%Bookdoc;
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_140101.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_140101.dtd
new file mode 100644
index 0000000..4570815
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_140101.dtd
@@ -0,0 +1,79 @@
+<!--    
+	   This is the Current DTD which NLM has written for 
+        External  Use.  If you are a NCBI User, use the information
+        from the PubmedArticleSet.
+
+        Comments and suggestions are welcome.
+        (May 9, 2000)
+	
+        Corrections:
+        ~~~~~~~~~~~
+        Oct. 09 2002 
+        - "PubMedArticle" has been renamed to "PubmedArticle"
+        - All referencies to "PubMedArticle" has been removed
+        - "ProviderId" has been removed from PubmedData
+        - "URL" has been removed from PubmdeData
+		
+		$Id: pubmed_140101.dtd 413871 2013-09-18 19:22:41Z korobtch $
+        
+       -->
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the MEDLINECITATION DTD is located  -->
+<!ENTITY % Medline PUBLIC "-//NLM//DTD Medline, 01 Jan 2014//EN"
+      "nlmmedlinecitationset_140101.dtd">
+%Medline;
+<!-- ================================================================= -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % Pub.Date.Ref "PubDate?">
+<!ENTITY % iso.language.codes "(AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                                   HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                                   PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                                   VI|ZH)">
+<!ENTITY % pub.status.int "pmc | pmcr | pubmed | pubmedr | 
+                             premedline | medline | medliner | entrez | pmc-release">
+<!ENTITY % pub.status "(received | accepted | epublish | 
+                              ppublish | revised | aheadofprint | 
+                              retracted | ecollection | %pub.status.int;)">
+<!ENTITY % art.id.type.int "pubmed | medline | pmcid | pmcbook | bookaccession">
+							
+<!ENTITY % art.id.type "(doi | pii | pmcpid | pmpid | pmc | mid |
+                              sici | %art.id.type.int;)">
+<!-- ================================================================= -->
+<!ELEMENT PubmedArticleSet (PubmedArticle | PubmedBookArticle)+>
+<!-- ================================================================= -->
+<!-- This is the top level element for PubMedArticle -->
+<!ELEMENT PubmedArticle (MedlineCitation, PubmedData?)>
+<!-- ================================================================= -->
+<!ENTITY % normal.date "Year, Month, Day, (Hour, (Minute, Second?)?)?">
+<!ELEMENT PubmedData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+<!ELEMENT PubMedPubDate (%normal.date;)>
+<!ATTLIST PubMedPubDate
+	PubStatus %pub.status; #REQUIRED
+>
+<!ELEMENT PublicationStatus (#PCDATA)>
+<!ELEMENT ArticleIdList (ArticleId+)>
+<!ELEMENT ArticleId (#PCDATA)>
+<!ATTLIST ArticleId
+	IdType %art.id.type; "pubmed"
+>
+<!ELEMENT History (PubMedPubDate+)>
+<!ELEMENT URL (#PCDATA)>
+<!ATTLIST URL
+	lang %iso.language.codes; #IMPLIED
+	Type ( FullText | Summary | fulltext | summary) #IMPLIED
+>
+<!ELEMENT ObjectList (Object)+>
+<!ELEMENT Object (Param)*>
+<!ATTLIST Object Type CDATA #REQUIRED>
+<!ELEMENT Param %text;>
+<!ATTLIST Param Name CDATA #REQUIRED>
+<!ELEMENT Hour (#PCDATA)>
+<!ELEMENT Minute (#PCDATA)>
+<!ELEMENT Second (#PCDATA)>
+
+<!-- ================================================================= -->
+<!ENTITY % Bookdoc PUBLIC "-//NLM//DTD Bookdoc, 01 Jan 2014//EN" "bookdoc_140101.dtd">
+%Bookdoc;
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_150101.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_150101.dtd
new file mode 100644
index 0000000..7c0933d
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_150101.dtd
@@ -0,0 +1,79 @@
+<!--    
+	   This is the Current DTD which NLM has written for 
+        External  Use.  If you are a NCBI User, use the information
+        from the PubmedArticleSet.
+
+        Comments and suggestions are welcome.
+        (May 9, 2000)
+	
+        Corrections:
+        ~~~~~~~~~~~
+        Oct. 09 2002 
+        - "PubMedArticle" has been renamed to "PubmedArticle"
+        - All referencies to "PubMedArticle" has been removed
+        - "ProviderId" has been removed from PubmedData
+        - "URL" has been removed from PubmdeData
+		
+		$Id: pubmed_150101.dtd 447938 2014-10-01 17:41:38Z korobtch $
+        
+       -->
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!-- Reference to Where the MEDLINECITATION DTD is located  -->
+<!ENTITY % Medline PUBLIC "-//NLM//DTD Medline, 01 Jan 2015//EN"
+      "nlmmedlinecitationset_150101.dtd">
+%Medline;
+<!-- ================================================================= -->
+<!ENTITY % ArticleTitle.Ref "ArticleTitle">
+<!ENTITY % ISSN.Ref "ISSN?">
+<!ENTITY % Pub.Date.Ref "PubDate?">
+<!ENTITY % iso.language.codes "(AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                                   HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                                   PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                                   VI|ZH)">
+<!ENTITY % pub.status.int "pmc | pmcr | pubmed | pubmedr | 
+                             premedline | medline | medliner | entrez | pmc-release">
+<!ENTITY % pub.status "(received | accepted | epublish | 
+                              ppublish | revised | aheadofprint | 
+                              retracted | ecollection | %pub.status.int;)">
+<!ENTITY % art.id.type.int "pubmed | medline | pmcid | pmcbook | bookaccession">
+							
+<!ENTITY % art.id.type "(doi | pii | pmcpid | pmpid | pmc | mid |
+                              sici | %art.id.type.int;)">
+<!-- ================================================================= -->
+<!ELEMENT PubmedArticleSet (PubmedArticle | PubmedBookArticle)+>
+<!-- ================================================================= -->
+<!-- This is the top level element for PubMedArticle -->
+<!ELEMENT PubmedArticle (MedlineCitation, PubmedData?)>
+<!-- ================================================================= -->
+<!ENTITY % normal.date "Year, Month, Day, (Hour, (Minute, Second?)?)?">
+<!ELEMENT PubmedData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+<!ELEMENT PubMedPubDate (%normal.date;)>
+<!ATTLIST PubMedPubDate
+	PubStatus %pub.status; #REQUIRED
+>
+<!ELEMENT PublicationStatus (#PCDATA)>
+<!ELEMENT ArticleIdList (ArticleId+)>
+<!ELEMENT ArticleId (#PCDATA)>
+<!ATTLIST ArticleId
+	IdType %art.id.type; "pubmed"
+>
+<!ELEMENT History (PubMedPubDate+)>
+<!ELEMENT URL (#PCDATA)>
+<!ATTLIST URL
+	lang %iso.language.codes; #IMPLIED
+	Type ( FullText | Summary | fulltext | summary) #IMPLIED
+>
+<!ELEMENT ObjectList (Object)+>
+<!ELEMENT Object (Param)*>
+<!ATTLIST Object Type CDATA #REQUIRED>
+<!ELEMENT Param %text;>
+<!ATTLIST Param Name CDATA #REQUIRED>
+<!ELEMENT Hour (#PCDATA)>
+<!ELEMENT Minute (#PCDATA)>
+<!ELEMENT Second (#PCDATA)>
+
+<!-- ================================================================= -->
+<!ENTITY % Bookdoc PUBLIC "-//NLM//DTD Bookdoc, 01 Jan 2015//EN" "bookdoc_150101.dtd">
+%Bookdoc;
+<!-- ================================================================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_180101.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_180101.dtd
new file mode 100644
index 0000000..48bec8a
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_180101.dtd
@@ -0,0 +1,434 @@
+<!--
+
+2017-10-06
+
+This DTD supports both the E-utilities and ftp service data dissemination methods. 
+It is based on http://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_170101.dtd
+
+Additions/Changes since 170101 DTD: 
+
+	1.  Added Organism to the allowed values for SupplMeshName @Type
+	2.  Deleted DateCreated
+
+	NOTE:  The use of "Medline" in a DTD or element name does not mean the record
+	represents a citation from a MEDLINE-selected journal.  When the NLM DTDs and
+	XML elements were first created, MEDLINE records were the only data exported.
+	Now NLM exports citations other than MEDLINE records using these tools. To
+	minimize unnecessary disruption to users of the data and tools, NLM has
+	retained the original DTD and element names (e.g., MedlineTA, MedlineJournalInfo). 
+ 
+	NOTE:  StartPage and EndPage in Pagination element are not currently used; are 
+	reserved for future use. 
+
+	* = 0 or more occurrences (optional element, repeatable)
+	? = 0 or 1 occurrences (optional element, at most 1)
+	+ = 1 or more occurrences (required element, repeatable)
+	| = choice, one or the other but not both
+	no symbol = required element
+
+       -->
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!ENTITY % text             "(#PCDATA | b | i | sup | sub | u)*" >
+
+ <!ENTITY % booklinkatts
+			 "book		CDATA			#IMPLIED
+			 part		CDATA			#IMPLIED
+			sec		CDATA			#IMPLIED"  >
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+
+<!--  ================= Set-level elements ============================-->
+<!ELEMENT	PubmedArticleSet ((PubmedArticle | PubmedBookArticle)+, DeleteCitation?) >
+
+<!ELEMENT	BookDocumentSet (BookDocument*, DeleteDocument?) >
+
+<!ELEMENT	PubmedBookArticleSet (PubmedBookArticle*)>
+
+
+
+<!--  ============= Document-level elements ============================-->
+<!ELEMENT	PubmedArticle (MedlineCitation, PubmedData?)>
+
+<!ELEMENT	PubmedBookArticle (BookDocument, PubmedBookData?)>
+
+<!ELEMENT	BookDocument ( PMID, ArticleIdList, Book, LocationLabel*, ArticleTitle?, VernacularTitle?,
+	Pagination?, Language*, AuthorList*, InvestigatorList?, PublicationType*, Abstract?, Sections?, KeywordList*, 
+	ContributionDate?, DateRevised?, CitationString?, GrantList?, ItemList*) >
+
+<!ELEMENT	DeleteCitation (PMID+) >
+
+<!ELEMENT	DeleteDocument (PMID*) >
+
+
+<!--  =============== Sub-Document wrapper elements =====================-->
+<!ELEMENT	MedlineCitation (PMID, DateCompleted?, DateRevised?, Article, 
+                             MedlineJournalInfo, ChemicalList?, SupplMeshList?,CitationSubset*, 
+                             CommentsCorrectionsList?, GeneSymbolList?, MeshHeadingList?, 
+                             NumberOfReferences?, PersonalNameSubjectList?, OtherID*, OtherAbstract*, 
+                             KeywordList*, CoiStatement?, SpaceFlightMission*, InvestigatorList?, GeneralNote*)>
+<!ATTLIST	MedlineCitation 
+		Owner  (NLM | NASA | PIP | KIE | HSR | HMD | NOTNLM) "NLM"
+		Status (Completed | In-Process | PubMed-not-MEDLINE |  In-Data-Review | Publisher | 
+		        MEDLINE | OLDMEDLINE) #REQUIRED 
+		VersionID CDATA #IMPLIED
+		VersionDate CDATA #IMPLIED 
+		IndexingMethod    CDATA  #IMPLIED >
+
+<!ELEMENT	PubmedData (History?, PublicationStatus, ArticleIdList, ObjectList?) >
+
+<!ELEMENT	PubmedBookData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+
+<!ELEMENT	Article (Journal,ArticleTitle,((Pagination, ELocationID*) | ELocationID+),
+                     Abstract?,AuthorList?, Language+, DataBankList?, GrantList?,
+                     PublicationTypeList, VernacularTitle?, ArticleDate*) >
+<!ATTLIST	Article 
+		    PubModel (Print | Print-Electronic | Electronic | Electronic-Print | Electronic-eCollection) #REQUIRED >
+		
+
+
+
+<!-- ================================================================= -->
+<!--  Everything else in alphabetical order                            -->
+<!-- ================================================================= -->
+
+<!ELEMENT	Abstract (AbstractText+, CopyrightInformation?)>
+
+<!ELEMENT	AbstractText %text;>
+<!ATTLIST	AbstractText
+		    Label CDATA #IMPLIED
+		    NlmCategory (BACKGROUND | OBJECTIVE | METHODS | RESULTS | CONCLUSIONS | UNASSIGNED) #IMPLIED >
+		
+<!ELEMENT	AccessionNumber (#PCDATA) >
+
+<!ELEMENT	AccessionNumberList (AccessionNumber+) >
+
+<!ELEMENT	Acronym (#PCDATA) >
+
+<!ELEMENT	Affiliation %text;>
+
+<!ELEMENT	AffiliationInfo (Affiliation, Identifier*)>
+
+<!ELEMENT	Agency (#PCDATA) >
+
+<!ELEMENT	ArticleDate (Year, Month, Day) >
+<!ATTLIST	ArticleDate 
+            DateType CDATA  #FIXED "Electronic" >
+ 
+<!ELEMENT	ArticleId (#PCDATA) >
+<!ATTLIST   ArticleId
+	        IdType (doi | pii | pmcpid | pmpid | pmc | mid |
+                   sici | pubmed | medline | pmcid | pmcbook | bookaccession) "pubmed" >
+	      
+ <!ELEMENT	ArticleIdList (ArticleId+)>
+
+<!ELEMENT	ArticleTitle %text;>
+<!ATTLIST   ArticleTitle	%booklinkatts; >
+
+<!ELEMENT	Author (((LastName, ForeName?, Initials?, Suffix?) | CollectiveName), Identifier*, AffiliationInfo*) >
+<!ATTLIST	Author 
+            ValidYN (Y | N) "Y" 
+            EqualContrib    (Y | N)  #IMPLIED >
+            
+<!ELEMENT	AuthorList (Author+) >
+<!ATTLIST	AuthorList 
+            CompleteYN (Y | N) "Y" 
+            Type ( authors | editors )  #IMPLIED >
+
+<!ELEMENT	b		%text; > <!-- bold -->
+
+<!ELEMENT	BeginningDate ( Year, ((Month, Day?) | Season)? ) >
+
+<!ELEMENT	Book ( Publisher, BookTitle, PubDate, BeginningDate?, EndingDate?, AuthorList*, InvestigatorList?, Volume?, 
+                    VolumeTitle?, Edition?, CollectionTitle?, Isbn*, ELocationID*, Medium?, ReportNumber?) >
+
+<!ELEMENT	BookTitle      %text; >
+<!ATTLIST   BookTitle	%booklinkatts; >
+
+<!ELEMENT	Chemical (RegistryNumber, NameOfSubstance) >
+
+<!ELEMENT	ChemicalList (Chemical+) >
+
+<!ELEMENT	CitationString      %text; >
+
+<!ELEMENT	CitationSubset (#PCDATA) >
+
+<!ELEMENT   CoiStatement    %text; >
+
+<!ELEMENT	CollectionTitle      %text; >
+<!ATTLIST   CollectionTitle	%booklinkatts; >
+
+<!ELEMENT	CollectiveName %text; >
+
+<!ELEMENT	CommentsCorrections (RefSource,PMID?,Note?) >
+<!ATTLIST	CommentsCorrections 
+		     RefType (AssociatedDataset | AssociatedPublication | CommentOn | CommentIn | ErratumIn | 
+		             ErratumFor | ExpressionOfConcernIn | ExpressionOfConcernFor | 
+		             RepublishedFrom | RepublishedIn | 
+		             RetractionOf | RetractionIn | UpdateIn | UpdateOf | SummaryForPatientsIn | 
+		             OriginalReportIn | ReprintOf | ReprintIn | Cites)      #REQUIRED    >
+		             
+<!ELEMENT	CommentsCorrectionsList (CommentsCorrections+) >
+
+<!ELEMENT	ContractNumber (#PCDATA) >
+
+<!ELEMENT	ContributionDate ( Year, ((Month, Day?) | Season)? ) >
+
+<!ELEMENT	CopyrightInformation (#PCDATA) >
+
+<!ELEMENT	Country (#PCDATA) >
+
+<!ELEMENT	DataBank (DataBankName, AccessionNumberList?) >
+
+<!ELEMENT	DataBankList (DataBank+) >
+
+<!ATTLIST	DataBankList 
+            CompleteYN (Y | N) "Y" >
+            
+<!ELEMENT	DataBankName (#PCDATA) >
+
+<!ELEMENT	DateCompleted (Year,Month,Day) >
+
+<!ELEMENT	DateRevised (Year,Month,Day) >
+
+<!ELEMENT	Day (#PCDATA )>
+
+<!ELEMENT	DescriptorName (#PCDATA) >
+<!ATTLIST	DescriptorName 
+		    MajorTopicYN (Y | N) "N"
+		    Type (Geographic) #IMPLIED
+		     UI CDATA #REQUIRED >
+
+<!ELEMENT	Edition (#PCDATA) >
+
+<!ELEMENT	ELocationID (#PCDATA) >
+<!ATTLIST	ELocationID 
+            EIdType (doi | pii) #REQUIRED 
+		    ValidYN  (Y | N) "Y">
+
+<!ELEMENT	EndingDate ( Year, ((Month, Day?) | Season)? ) >
+
+<!ELEMENT	EndPage (#PCDATA) >
+
+<!ELEMENT	ForeName (#PCDATA) >
+
+<!ELEMENT	GeneSymbol (#PCDATA) >
+
+<!ELEMENT	GeneSymbolList (GeneSymbol+)>
+
+<!ELEMENT	GeneralNote (#PCDATA) >
+<!ATTLIST	GeneralNote
+		     Owner (NLM | NASA | PIP | KIE | HSR | HMD) "NLM" >
+		     
+<!ELEMENT	Grant (GrantID?, Acronym?, Agency, Country)>
+
+<!ELEMENT	GrantID (#PCDATA) >
+
+<!ELEMENT	GrantList (Grant+)>
+<!ATTLIST	GrantList 
+            CompleteYN (Y | N) "Y">
+   
+<!ELEMENT	History (PubMedPubDate+) >
+
+<!ELEMENT	Hour (#PCDATA) >
+
+<!ELEMENT	i		%text; > <!-- italic -->
+
+<!ELEMENT	Identifier (#PCDATA) >
+<!ATTLIST	Identifier 
+		    Source CDATA #REQUIRED >
+		    
+<!ELEMENT	Initials (#PCDATA) >
+
+<!ELEMENT	Investigator (LastName, ForeName?, Initials?, Suffix?, Identifier*, AffiliationInfo*) >
+<!ATTLIST	Investigator 
+		    ValidYN (Y | N) "Y" >
+		    
+<!ELEMENT	InvestigatorList (Investigator+) >
+
+<!ELEMENT	Isbn (#PCDATA) >
+
+<!ELEMENT	ISOAbbreviation (#PCDATA) >
+
+<!ELEMENT	ISSN (#PCDATA) >
+<!ATTLIST	ISSN 
+		    IssnType  (Electronic | Print) #REQUIRED >
+		    
+<!ELEMENT	ISSNLinking (#PCDATA) >
+
+<!ELEMENT	Issue (#PCDATA) >
+<!ELEMENT	Item (#PCDATA)>
+
+<!ELEMENT	ItemList (Item+)>
+<!ATTLIST   ItemList 
+            ListType CDATA #REQUIRED>
+
+<!ELEMENT	Journal (ISSN?, JournalIssue, Title?, ISOAbbreviation?)>
+
+<!ELEMENT	JournalIssue (Volume?, Issue?, PubDate) >
+<!ATTLIST	JournalIssue 
+		    CitedMedium (Internet | Print) #REQUIRED >
+		    
+<!ELEMENT	Keyword %text;>
+<!ATTLIST	Keyword 
+		    MajorTopicYN (Y | N) "N" >
+		    
+<!ELEMENT	KeywordList (Keyword+) >
+<!ATTLIST	KeywordList 
+		    Owner (NLM | NLM-AUTO | NASA | PIP | KIE | NOTNLM | HHS) "NLM" >
+		    
+<!ELEMENT	Language (#PCDATA) >
+
+<!ELEMENT	LastName (#PCDATA) >
+
+<!ELEMENT	LocationLabel		(#PCDATA)>
+<!ATTLIST   LocationLabel
+			Type  (part|chapter|section|appendix|figure|table|box)  #IMPLIED >
+
+<!ELEMENT	Medium (#PCDATA) >
+
+<!ELEMENT	MedlineDate (#PCDATA) >
+
+<!ELEMENT	MedlineJournalInfo (Country?, MedlineTA, NlmUniqueID?, ISSNLinking?) >
+
+<!ELEMENT	MedlinePgn (#PCDATA) >
+
+<!ELEMENT	MedlineTA (#PCDATA) >
+
+<!ELEMENT	MeshHeading (DescriptorName, QualifierName*)>
+
+<!ELEMENT	MeshHeadingList (MeshHeading+)>
+
+<!ELEMENT	Minute (#PCDATA) >
+
+<!ELEMENT	Month (#PCDATA) >
+
+<!ELEMENT	NameOfSubstance (#PCDATA) >
+<!ATTLIST	NameOfSubstance 
+		    UI CDATA #REQUIRED >
+		    
+<!ELEMENT	NlmUniqueID (#PCDATA) >
+
+<!ELEMENT	Note (#PCDATA) >
+
+<!ELEMENT	NumberOfReferences (#PCDATA) >
+
+<!ELEMENT	Object (Param*)>
+<!ATTLIST	Object 
+          Type CDATA #REQUIRED >
+  
+<!ELEMENT	ObjectList (Object+) >
+
+<!ELEMENT	OtherAbstract (AbstractText+, CopyrightInformation?) >
+
+<!ATTLIST	OtherAbstract 
+		    Type (AAMC | AIDS | KIE | PIP | NASA | Publisher) #REQUIRED
+		    Language CDATA "eng" >
+		    
+<!ELEMENT	OtherID (#PCDATA) >
+<!ATTLIST	OtherID 
+		    Source (NASA | KIE | PIP | POP | ARPL | CPC | IND | CPFH | CLML |
+		            NRCBL | NLM | QCIM) #REQUIRED >
+		            
+<!ELEMENT	PMID (#PCDATA) >
+<!ATTLIST	PMID 
+		    Version CDATA #REQUIRED >
+		    
+<!ELEMENT	Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn) >
+
+<!ELEMENT	Param %text;>
+<!ATTLIST	Param 
+             Name CDATA #REQUIRED >
+          
+<!ELEMENT	PersonalNameSubject (LastName, ForeName?, Initials?, Suffix?) >
+
+<!ELEMENT	PersonalNameSubjectList (PersonalNameSubject+) >
+
+<!ELEMENT	PubDate ((Year, ((Month, Day?) | Season)?) | MedlineDate) >
+
+<!ELEMENT	PublicationStatus (#PCDATA) >
+
+<!ELEMENT	PublicationType (#PCDATA) >
+<!ATTLIST	PublicationType 
+		    UI CDATA #REQUIRED >
+		    
+<!ELEMENT	PublicationTypeList (PublicationType+) >
+
+<!ELEMENT   PubMedPubDate (Year, Month, Day, (Hour, (Minute, Second?)?)?)>
+<!ATTLIST   PubMedPubDate
+    	     PubStatus (received | accepted | epublish | 
+                      ppublish | revised | aheadofprint | 
+                      retracted | ecollection | pmc | pmcr | pubmed | pubmedr | 
+                      premedline | medline | medliner | entrez | pmc-release) #REQUIRED >
+
+<!ELEMENT	Publisher (PublisherName, PublisherLocation?) >
+
+<!ELEMENT	PublisherLocation (#PCDATA) >
+
+<!ELEMENT	PublisherName      %text; >
+
+<!ELEMENT	QualifierName (#PCDATA) >
+<!ATTLIST	QualifierName 
+		    MajorTopicYN (Y | N) "N"
+		    UI CDATA #REQUIRED >
+		    
+<!ELEMENT	RefSource (#PCDATA) >
+
+<!ELEMENT	RegistryNumber (#PCDATA) >
+
+<!ELEMENT	ReportNumber (#PCDATA) >
+
+<!ELEMENT	Season (#PCDATA) >
+
+<!ELEMENT	Second (#PCDATA) >
+
+
+<!ELEMENT	Section	(LocationLabel?, SectionTitle, Section*) >
+
+<!ELEMENT	Sections	(Section+) >
+
+<!ELEMENT	SectionTitle	%text; >
+<!ATTLIST   SectionTitle	%booklinkatts; >
+
+<!ELEMENT	SpaceFlightMission (#PCDATA) >
+
+<!ELEMENT	StartPage (#PCDATA) >
+
+<!ELEMENT	sub	%text; > <!-- subscript -->
+
+<!ELEMENT	Suffix %text;>
+
+<!ELEMENT	sup	%text; > <!-- superscript -->
+
+<!ELEMENT	SupplMeshList (SupplMeshName+)>
+
+<!ELEMENT	SupplMeshName (#PCDATA) >
+<!ATTLIST	SupplMeshName 
+		    Type (Disease | Protocol | Organism) #REQUIRED
+		    UI CDATA #REQUIRED >
+		    
+<!ELEMENT	Title (#PCDATA) >
+
+<!ELEMENT	u		%text; > <!-- underline -->                              
+
+<!ELEMENT	URL (#PCDATA) >
+<!ATTLIST	URL
+	      lang (AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                            HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                            PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                           VI|ZH) #IMPLIED
+	      Type ( FullText | Summary | fulltext | summary) #IMPLIED >
+	      
+<!ELEMENT	VernacularTitle %text; >
+
+<!ELEMENT	Volume (#PCDATA) >
+
+<!ELEMENT	VolumeTitle %text; >
+
+<!ELEMENT	Year (#PCDATA) >
+
+
+
+
+
+
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_180601.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_180601.dtd
new file mode 100644
index 0000000..aab61e7
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_180601.dtd
@@ -0,0 +1,454 @@
+<!--
+
+2017-10-06
+
+This DTD supports both the E-utilities and ftp service data dissemination methods. 
+It is based on http://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_180101.dtd
+
+Additions/Changes since 180101 DTD: 
+
+	1.  Added MathML3 
+	2.  Allowed <mml:math> in <AbstractText>, <ArticleTitle>, <BookTitle>, <CollectionTitle>, 
+	     <Keyword>, <VernacularTitle>.
+
+	NOTE:  The use of "Medline" in a DTD or element name does not mean the record
+	represents a citation from a MEDLINE-selected journal.  When the NLM DTDs and
+	XML elements were first created, MEDLINE records were the only data exported.
+	Now NLM exports citations other than MEDLINE records using these tools. To
+	minimize unnecessary disruption to users of the data and tools, NLM has
+	retained the original DTD and element names (e.g., MedlineTA, MedlineJournalInfo). 
+ 
+	NOTE:  StartPage and EndPage in Pagination element are not currently used; are 
+	reserved for future use. 
+
+	* = 0 or more occurrences (optional element, repeatable)
+	? = 0 or 1 occurrences (optional element, at most 1)
+	+ = 1 or more occurrences (required element, repeatable)
+	| = choice, one or the other but not both
+	no symbol = required element
+
+       -->
+       
+<!-- ============================================================= -->
+<!--                     MATHML 3.0 SETUP                        -->
+<!-- ============================================================= -->
+<!--                    MATHML SETUP FILE                 -->
+<!ENTITY % mathml-in-pubmed     SYSTEM        "mathml-in-pubmed.mod"               >	
+%mathml-in-pubmed;
+
+
+		 
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!ENTITY % text             "#PCDATA | b | i | sup | sub | u" >
+
+ <!ENTITY % booklinkatts
+			 "book		CDATA			#IMPLIED
+			 part		CDATA			#IMPLIED
+			sec		CDATA			#IMPLIED"  >
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+
+<!--  ================= Set-level elements ============================-->
+<!ELEMENT	PubmedArticleSet ((PubmedArticle | PubmedBookArticle)+, DeleteCitation?) >
+<!ATTLIST       PubmedArticleSet
+>
+
+<!ELEMENT	BookDocumentSet (BookDocument*, DeleteDocument?) >
+<!ATTLIST       BookDocumentSet
+>
+
+<!ELEMENT	PubmedBookArticleSet (PubmedBookArticle*)>
+<!ATTLIST       PubmedBookArticleSet
+>
+
+
+<!--  ============= Document-level elements ============================-->
+<!ELEMENT	PubmedArticle (MedlineCitation, PubmedData?)>
+<!ATTLIST       PubmedArticle
+>
+
+<!ELEMENT	PubmedBookArticle (BookDocument, PubmedBookData?)>
+<!ATTLIST       PubmedBookArticle
+>
+
+<!ELEMENT	BookDocument ( PMID, ArticleIdList, Book, LocationLabel*, ArticleTitle?, VernacularTitle?,
+	Pagination?, Language*, AuthorList*, InvestigatorList?, PublicationType*, Abstract?, Sections?, KeywordList*, 
+	ContributionDate?, DateRevised?, CitationString?, GrantList?, ItemList*) >
+
+<!ELEMENT	DeleteCitation (PMID+) >
+
+<!ELEMENT	DeleteDocument (PMID*) >
+
+
+<!--  =============== Sub-Document wrapper elements =====================-->
+<!ELEMENT	MedlineCitation (PMID, DateCompleted?, DateRevised?, Article, 
+                             MedlineJournalInfo, ChemicalList?, SupplMeshList?,CitationSubset*, 
+                             CommentsCorrectionsList?, GeneSymbolList?, MeshHeadingList?, 
+                             NumberOfReferences?, PersonalNameSubjectList?, OtherID*, OtherAbstract*, 
+                             KeywordList*, CoiStatement?, SpaceFlightMission*, InvestigatorList?, GeneralNote*)>
+<!ATTLIST	MedlineCitation 
+		Owner  (NLM | NASA | PIP | KIE | HSR | HMD | NOTNLM) "NLM"
+		Status (Completed | In-Process | PubMed-not-MEDLINE |  In-Data-Review | Publisher | 
+		        MEDLINE | OLDMEDLINE) #REQUIRED 
+		VersionID CDATA #IMPLIED
+		VersionDate CDATA #IMPLIED 
+		IndexingMethod    CDATA  #IMPLIED >
+
+<!ELEMENT	PubmedData (History?, PublicationStatus, ArticleIdList, ObjectList?) >
+
+<!ELEMENT	PubmedBookData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+
+<!ELEMENT	Article (Journal,ArticleTitle,((Pagination, ELocationID*) | ELocationID+),
+                     Abstract?,AuthorList?, Language+, DataBankList?, GrantList?,
+                     PublicationTypeList, VernacularTitle?, ArticleDate*) >
+<!ATTLIST	Article 
+		    PubModel (Print | Print-Electronic | Electronic | Electronic-Print | Electronic-eCollection) #REQUIRED >
+		
+
+
+
+<!-- ================================================================= -->
+<!--  Everything else in alphabetical order                            -->
+<!-- ================================================================= -->
+
+<!ELEMENT	Abstract (AbstractText+, CopyrightInformation?)>
+
+<!ELEMENT	AbstractText   (%text; | mml:math | DispFormula)* >
+<!ATTLIST	AbstractText
+		    Label CDATA #IMPLIED
+		    NlmCategory (BACKGROUND | OBJECTIVE | METHODS | RESULTS | CONCLUSIONS | UNASSIGNED) #IMPLIED >
+		
+<!ELEMENT	AccessionNumber (#PCDATA) >
+
+<!ELEMENT	AccessionNumberList (AccessionNumber+) >
+
+<!ELEMENT	Acronym (#PCDATA) >
+
+<!ELEMENT	Affiliation  (%text;)*>
+
+<!ELEMENT	AffiliationInfo (Affiliation, Identifier*)>
+
+<!ELEMENT	Agency (#PCDATA) >
+
+<!ELEMENT	ArticleDate (Year, Month, Day) >
+<!ATTLIST	ArticleDate 
+            DateType CDATA  #FIXED "Electronic" >
+ 
+<!ELEMENT	ArticleId (#PCDATA) >
+<!ATTLIST   ArticleId
+	        IdType (doi | pii | pmcpid | pmpid | pmc | mid |
+                   sici | pubmed | medline | pmcid | pmcbook | bookaccession) "pubmed" >
+	      
+ <!ELEMENT	ArticleIdList (ArticleId+)>
+
+<!ELEMENT	ArticleTitle   (%text; | mml:math)*>
+<!ATTLIST   ArticleTitle	%booklinkatts; >
+
+<!ELEMENT	Author (((LastName, ForeName?, Initials?, Suffix?) | CollectiveName), Identifier*, AffiliationInfo*) >
+<!ATTLIST	Author 
+            ValidYN (Y | N) "Y" 
+            EqualContrib    (Y | N)  #IMPLIED >
+            
+<!ELEMENT	AuthorList (Author+) >
+<!ATTLIST	AuthorList 
+            CompleteYN (Y | N) "Y" 
+            Type ( authors | editors )  #IMPLIED >
+
+<!ELEMENT	b		(%text;)*> <!-- bold -->
+
+<!ELEMENT	BeginningDate ( Year, ((Month, Day?) | Season)? ) >
+
+<!ELEMENT	Book ( Publisher, BookTitle, PubDate, BeginningDate?, EndingDate?, AuthorList*, InvestigatorList?, Volume?, 
+                    VolumeTitle?, Edition?, CollectionTitle?, Isbn*, ELocationID*, Medium?, ReportNumber?) >
+
+<!ELEMENT	BookTitle        (%text; | mml:math)*>
+<!ATTLIST   BookTitle	%booklinkatts; >
+
+<!ELEMENT	Chemical (RegistryNumber, NameOfSubstance) >
+
+<!ELEMENT	ChemicalList (Chemical+) >
+
+<!ELEMENT	CitationString     (%text;)*>
+
+<!ELEMENT	CitationSubset (#PCDATA) >
+
+<!ELEMENT   CoiStatement   (%text;)*>
+
+<!ELEMENT	CollectionTitle        (%text; | mml:math)*>
+<!ATTLIST   CollectionTitle	%booklinkatts; >
+
+<!ELEMENT	CollectiveName (%text;)*>
+
+<!ELEMENT	CommentsCorrections (RefSource,PMID?,Note?) >
+<!ATTLIST	CommentsCorrections 
+		     RefType (AssociatedDataset | AssociatedPublication | CommentOn | CommentIn | ErratumIn | 
+		             ErratumFor | ExpressionOfConcernIn | ExpressionOfConcernFor | 
+		             RepublishedFrom | RepublishedIn | 
+		             RetractionOf | RetractionIn | UpdateIn | UpdateOf | SummaryForPatientsIn | 
+		             OriginalReportIn | ReprintOf | ReprintIn | Cites)      #REQUIRED    >
+		             
+<!ELEMENT	CommentsCorrectionsList (CommentsCorrections+) >
+
+<!ELEMENT	ContractNumber (#PCDATA) >
+
+<!ELEMENT	ContributionDate ( Year, ((Month, Day?) | Season)? ) >
+
+<!ELEMENT	CopyrightInformation (#PCDATA) >
+
+<!ELEMENT	Country (#PCDATA) >
+
+<!ELEMENT	DataBank (DataBankName, AccessionNumberList?) >
+
+<!ELEMENT	DataBankList (DataBank+) >
+
+<!ATTLIST	DataBankList 
+            CompleteYN (Y | N) "Y" >
+            
+<!ELEMENT	DataBankName (#PCDATA) >
+
+<!ELEMENT	DateCompleted (Year,Month,Day) >
+
+<!ELEMENT	DateRevised (Year,Month,Day) >
+
+<!ELEMENT	Day (#PCDATA )>
+
+<!ELEMENT	DescriptorName (#PCDATA) >
+<!ATTLIST	DescriptorName 
+		    MajorTopicYN (Y | N) "N"
+		    Type (Geographic) #IMPLIED
+		     UI CDATA #REQUIRED >
+
+<!ELEMENT       DispFormula     (mml:math) >
+<!ELEMENT	Edition (#PCDATA) >
+
+<!ELEMENT	ELocationID (#PCDATA) >
+<!ATTLIST	ELocationID 
+            EIdType (doi | pii) #REQUIRED 
+		    ValidYN  (Y | N) "Y">
+
+<!ELEMENT	EndingDate ( Year, ((Month, Day?) | Season)? ) >
+
+<!ELEMENT	EndPage (#PCDATA) >
+
+<!ELEMENT	ForeName (#PCDATA) >
+
+<!ELEMENT	GeneSymbol (#PCDATA) >
+
+<!ELEMENT	GeneSymbolList (GeneSymbol+)>
+
+<!ELEMENT	GeneralNote (#PCDATA) >
+<!ATTLIST	GeneralNote
+		     Owner (NLM | NASA | PIP | KIE | HSR | HMD) "NLM" >
+		     
+<!ELEMENT	Grant (GrantID?, Acronym?, Agency, Country)>
+
+<!ELEMENT	GrantID (#PCDATA) >
+
+<!ELEMENT	GrantList (Grant+)>
+<!ATTLIST	GrantList 
+            CompleteYN (Y | N) "Y">
+   
+<!ELEMENT	History (PubMedPubDate+) >
+
+<!ELEMENT	Hour (#PCDATA) >
+
+<!ELEMENT	i		(%text;)*> <!-- italic -->
+
+<!ELEMENT	Identifier (#PCDATA) >
+<!ATTLIST	Identifier 
+		    Source CDATA #REQUIRED >
+		    
+<!ELEMENT	Initials (#PCDATA) >
+
+<!ELEMENT	Investigator (LastName, ForeName?, Initials?, Suffix?, Identifier*, AffiliationInfo*) >
+<!ATTLIST	Investigator 
+		    ValidYN (Y | N) "Y" >
+		    
+<!ELEMENT	InvestigatorList (Investigator+) >
+
+<!ELEMENT	Isbn (#PCDATA) >
+
+<!ELEMENT	ISOAbbreviation (#PCDATA) >
+
+<!ELEMENT	ISSN (#PCDATA) >
+<!ATTLIST	ISSN 
+		    IssnType  (Electronic | Print) #REQUIRED >
+		    
+<!ELEMENT	ISSNLinking (#PCDATA) >
+
+<!ELEMENT	Issue (#PCDATA) >
+<!ELEMENT	Item (#PCDATA)>
+
+<!ELEMENT	ItemList (Item+)>
+<!ATTLIST   ItemList 
+            ListType CDATA #REQUIRED>
+
+<!ELEMENT	Journal (ISSN?, JournalIssue, Title?, ISOAbbreviation?)>
+
+<!ELEMENT	JournalIssue (Volume?, Issue?, PubDate) >
+<!ATTLIST	JournalIssue 
+		    CitedMedium (Internet | Print) #REQUIRED >
+		    
+<!ELEMENT	Keyword     (%text; | mml:math)*>
+<!ATTLIST	Keyword 
+		    MajorTopicYN (Y | N) "N" >
+		    
+<!ELEMENT	KeywordList (Keyword+) >
+<!ATTLIST	KeywordList 
+		    Owner (NLM | NLM-AUTO | NASA | PIP | KIE | NOTNLM | HHS) "NLM" >
+		    
+<!ELEMENT	Language (#PCDATA) >
+
+<!ELEMENT	LastName (#PCDATA) >
+
+<!ELEMENT	LocationLabel		(#PCDATA)>
+<!ATTLIST   LocationLabel
+			Type  (part|chapter|section|appendix|figure|table|box)  #IMPLIED >
+
+<!ELEMENT	Medium (#PCDATA) >
+
+<!ELEMENT	MedlineDate (#PCDATA) >
+
+<!ELEMENT	MedlineJournalInfo (Country?, MedlineTA, NlmUniqueID?, ISSNLinking?) >
+
+<!ELEMENT	MedlinePgn (#PCDATA) >
+
+<!ELEMENT	MedlineTA (#PCDATA) >
+
+<!ELEMENT	MeshHeading (DescriptorName, QualifierName*)>
+
+<!ELEMENT	MeshHeadingList (MeshHeading+)>
+
+<!ELEMENT	Minute (#PCDATA) >
+
+<!ELEMENT	Month (#PCDATA) >
+
+<!ELEMENT	NameOfSubstance (#PCDATA) >
+<!ATTLIST	NameOfSubstance 
+		    UI CDATA #REQUIRED >
+		    
+<!ELEMENT	NlmUniqueID (#PCDATA) >
+
+<!ELEMENT	Note (#PCDATA) >
+
+<!ELEMENT	NumberOfReferences (#PCDATA) >
+
+<!ELEMENT	Object (Param*)>
+<!ATTLIST	Object 
+          Type CDATA #REQUIRED >
+  
+<!ELEMENT	ObjectList (Object+) >
+
+<!ELEMENT	OtherAbstract (AbstractText+, CopyrightInformation?) >
+
+<!ATTLIST	OtherAbstract 
+		    Type (AAMC | AIDS | KIE | PIP | NASA | Publisher) #REQUIRED
+		    Language CDATA "eng" >
+		    
+<!ELEMENT	OtherID (#PCDATA) >
+<!ATTLIST	OtherID 
+		    Source (NASA | KIE | PIP | POP | ARPL | CPC | IND | CPFH | CLML |
+		            NRCBL | NLM | QCIM) #REQUIRED >
+		            
+<!ELEMENT	PMID (#PCDATA) >
+<!ATTLIST	PMID 
+		    Version CDATA #REQUIRED >
+		    
+<!ELEMENT	Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn) >
+
+<!ELEMENT	Param  (%text;)*>
+<!ATTLIST	Param 
+             Name CDATA #REQUIRED >
+          
+<!ELEMENT	PersonalNameSubject (LastName, ForeName?, Initials?, Suffix?) >
+
+<!ELEMENT	PersonalNameSubjectList (PersonalNameSubject+) >
+
+<!ELEMENT	PubDate ((Year, ((Month, Day?) | Season)?) | MedlineDate) >
+
+<!ELEMENT	PublicationStatus (#PCDATA) >
+
+<!ELEMENT	PublicationType (#PCDATA) >
+<!ATTLIST	PublicationType 
+		    UI CDATA #REQUIRED >
+		    
+<!ELEMENT	PublicationTypeList (PublicationType+) >
+
+<!ELEMENT   PubMedPubDate (Year, Month, Day, (Hour, (Minute, Second?)?)?)>
+<!ATTLIST   PubMedPubDate
+    	     PubStatus (received | accepted | epublish | 
+                      ppublish | revised | aheadofprint | 
+                      retracted | ecollection | pmc | pmcr | pubmed | pubmedr | 
+                      premedline | medline | medliner | entrez | pmc-release) #REQUIRED >
+
+<!ELEMENT	Publisher (PublisherName, PublisherLocation?) >
+
+<!ELEMENT	PublisherLocation (#PCDATA) >
+
+<!ELEMENT	PublisherName     (%text;)*>
+
+<!ELEMENT	QualifierName (#PCDATA) >
+<!ATTLIST	QualifierName 
+		    MajorTopicYN (Y | N) "N"
+		    UI CDATA #REQUIRED >
+		    
+<!ELEMENT	RefSource (#PCDATA) >
+
+<!ELEMENT	RegistryNumber (#PCDATA) >
+
+<!ELEMENT	ReportNumber (#PCDATA) >
+
+<!ELEMENT	Season (#PCDATA) >
+
+<!ELEMENT	Second (#PCDATA) >
+
+
+<!ELEMENT	Section	(LocationLabel?, SectionTitle, Section*) >
+
+<!ELEMENT	Sections	(Section+) >
+
+<!ELEMENT	SectionTitle	(%text;)*>
+<!ATTLIST   SectionTitle	%booklinkatts; >
+
+<!ELEMENT	SpaceFlightMission (#PCDATA) >
+
+<!ELEMENT	StartPage (#PCDATA) >
+
+<!ELEMENT	sub	(%text;)*> <!-- subscript -->
+
+<!ELEMENT	Suffix (%text;)*>
+
+<!ELEMENT	sup	(%text;)*> <!-- superscript -->
+
+<!ELEMENT	SupplMeshList (SupplMeshName+)>
+
+<!ELEMENT	SupplMeshName (#PCDATA) >
+<!ATTLIST	SupplMeshName 
+		    Type (Disease | Protocol | Organism) #REQUIRED
+		    UI CDATA #REQUIRED >
+		    
+<!ELEMENT	Title (#PCDATA) >
+
+<!ELEMENT	u		(%text;)*> <!-- underline -->                              
+
+<!ELEMENT	URL (#PCDATA) >
+<!ATTLIST	URL
+	      lang (AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                            HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                            PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                           VI|ZH) #IMPLIED
+	      Type ( FullText | Summary | fulltext | summary) #IMPLIED >
+	      
+<!ELEMENT	VernacularTitle     (%text; | mml:math)*>
+
+<!ELEMENT	Volume (#PCDATA) >
+
+<!ELEMENT	VolumeTitle (%text;)*>
+
+<!ELEMENT	Year (#PCDATA) >
+
+
+
+
+
diff --git a/code/lib/Bio/Entrez/DTDs/pubmed_190101.dtd b/code/lib/Bio/Entrez/DTDs/pubmed_190101.dtd
new file mode 100644
index 0000000..a1cd167
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/pubmed_190101.dtd
@@ -0,0 +1,478 @@
+<!--
+
+2018-09-01
+
+This DTD supports both the E-utilities and ftp service data dissemination methods. 
+It is based on http://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_190101.dtd
+
+Additions/Changes since 180601 DTD: 
+
+	1.  Added elements to capture reference citations:
+			<ReferenceList>
+			<Reference>
+			<Citation>
+	2.  Removed <CitationString> from book records.
+	3. Added four values to CommentsCorrections/@RefType
+	       CorrectedandRepublishedIn
+	       CorrectedandRepublishedFrom
+	       RetractedandRepublishedIn
+	       RetractedandRepublishedFrom
+	4. Added "plain-language-summary" to allowed values for @Type on 
+	    <OtherAbstract>
+
+	NOTE:  The use of "Medline" in a DTD or element name does not mean the record
+	represents a citation from a MEDLINE-selected journal.  When the NLM DTDs and
+	XML elements were first created, MEDLINE records were the only data exported.
+	Now NLM exports citations other than MEDLINE records using these tools. To
+	minimize unnecessary disruption to users of the data and tools, NLM has
+	retained the original DTD and element names (e.g., MedlineTA, MedlineJournalInfo). 
+ 
+	NOTE:  StartPage and EndPage in Pagination element are not currently used; are 
+	reserved for future use. 
+
+	* = 0 or more occurrences (optional element, repeatable)
+	? = 0 or 1 occurrences (optional element, at most 1)
+	+ = 1 or more occurrences (required element, repeatable)
+	| = choice, one or the other but not both
+	no symbol = required element
+
+       -->
+       
+<!-- ============================================================= -->
+<!--                     MATHML 3.0 SETUP                        -->
+<!-- ============================================================= -->
+<!--                    MATHML SETUP FILE                 -->
+<!ENTITY % mathml-in-pubmed     SYSTEM        "mathml-in-pubmed.mod"               >	
+%mathml-in-pubmed;
+
+
+		 
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+<!ENTITY % text             "#PCDATA | b | i | sup | sub | u" >
+
+ <!ENTITY % booklinkatts
+			 "book		CDATA			#IMPLIED
+			 part		CDATA			#IMPLIED
+			sec		CDATA			#IMPLIED"  >
+<!-- ================================================================= -->
+<!-- ================================================================= -->
+
+<!--  ================= Set-level elements ============================-->
+<!ELEMENT	PubmedArticleSet ((PubmedArticle | PubmedBookArticle)+, DeleteCitation?) >
+<!ATTLIST       PubmedArticleSet
+>
+
+<!ELEMENT	BookDocumentSet (BookDocument*, DeleteDocument?) >
+<!ATTLIST       BookDocumentSet
+>
+
+<!ELEMENT	PubmedBookArticleSet (PubmedBookArticle*)>
+<!ATTLIST       PubmedBookArticleSet
+>
+
+
+<!--  ============= Document-level elements ============================-->
+<!ELEMENT	PubmedArticle (MedlineCitation, PubmedData?)>
+<!ATTLIST       PubmedArticle
+>
+
+<!ELEMENT	PubmedBookArticle (BookDocument, PubmedBookData?)>
+<!ATTLIST       PubmedBookArticle
+>
+
+<!ELEMENT	BookDocument ( PMID, ArticleIdList, Book, LocationLabel*, ArticleTitle?, VernacularTitle?,
+	Pagination?, Language*, AuthorList*, InvestigatorList?, PublicationType*, Abstract?, Sections?, KeywordList*, 
+	ContributionDate?, DateRevised?, GrantList?, ItemList*, ReferenceList*) >
+
+<!ELEMENT	DeleteCitation (PMID+) >
+
+<!ELEMENT	DeleteDocument (PMID*) >
+
+
+<!--  =============== Sub-Document wrapper elements =====================-->
+<!ELEMENT	MedlineCitation (PMID, DateCompleted?, DateRevised?, Article, 
+                             MedlineJournalInfo, ChemicalList?, SupplMeshList?,CitationSubset*, 
+                             CommentsCorrectionsList?, GeneSymbolList?, MeshHeadingList?, 
+                             NumberOfReferences?, PersonalNameSubjectList?, OtherID*, OtherAbstract*, 
+                             KeywordList*, CoiStatement?, SpaceFlightMission*, InvestigatorList?, GeneralNote*)>
+<!ATTLIST	MedlineCitation 
+		Owner  (NLM | NASA | PIP | KIE | HSR | HMD | NOTNLM) "NLM"
+		Status (Completed | In-Process | PubMed-not-MEDLINE |  In-Data-Review | Publisher | 
+		        MEDLINE | OLDMEDLINE) #REQUIRED 
+		VersionID CDATA #IMPLIED
+		VersionDate CDATA #IMPLIED 
+		IndexingMethod    CDATA  #IMPLIED >
+
+<!ELEMENT	PubmedData (History?, PublicationStatus, ArticleIdList, ObjectList?, ReferenceList*) >
+
+<!ELEMENT	PubmedBookData (History?, PublicationStatus, ArticleIdList, ObjectList?)>
+
+<!ELEMENT	Article (Journal,ArticleTitle,((Pagination, ELocationID*) | ELocationID+),
+                     Abstract?,AuthorList?, Language+, DataBankList?, GrantList?,
+                     PublicationTypeList, VernacularTitle?, ArticleDate*) >
+<!ATTLIST	Article 
+		    PubModel (Print | Print-Electronic | Electronic | Electronic-Print | Electronic-eCollection) #REQUIRED >
+		
+
+
+
+<!-- ================================================================= -->
+<!--  Everything else in alphabetical order                            -->
+<!-- ================================================================= -->
+
+<!ELEMENT	Abstract (AbstractText+, CopyrightInformation?)>
+
+<!ELEMENT	AbstractText   (%text; | mml:math | DispFormula)* >
+<!ATTLIST	AbstractText
+		    Label CDATA #IMPLIED
+		    NlmCategory (BACKGROUND | OBJECTIVE | METHODS | RESULTS | CONCLUSIONS | UNASSIGNED) #IMPLIED >
+		
+<!ELEMENT	AccessionNumber (#PCDATA) >
+
+<!ELEMENT	AccessionNumberList (AccessionNumber+) >
+
+<!ELEMENT	Acronym (#PCDATA) >
+
+<!ELEMENT	Affiliation  (%text;)*>
+
+<!ELEMENT	AffiliationInfo (Affiliation, Identifier*)>
+
+<!ELEMENT	Agency (#PCDATA) >
+
+<!ELEMENT	ArticleDate (Year, Month, Day) >
+<!ATTLIST	ArticleDate 
+            DateType CDATA  #FIXED "Electronic" >
+ 
+<!ELEMENT	ArticleId (#PCDATA) >
+<!ATTLIST   ArticleId
+	        IdType (doi | pii | pmcpid | pmpid | pmc | mid |
+                   sici | pubmed | medline | pmcid | pmcbook | bookaccession) "pubmed" >
+	      
+ <!ELEMENT	ArticleIdList (ArticleId+)>
+
+<!ELEMENT	ArticleTitle   (%text; | mml:math)*>
+<!ATTLIST   ArticleTitle	%booklinkatts; >
+
+<!ELEMENT	Author (((LastName, ForeName?, Initials?, Suffix?) | CollectiveName), Identifier*, AffiliationInfo*) >
+<!ATTLIST	Author 
+            ValidYN (Y | N) "Y" 
+            EqualContrib    (Y | N)  #IMPLIED >
+            
+<!ELEMENT	AuthorList (Author+) >
+<!ATTLIST	AuthorList 
+            CompleteYN (Y | N) "Y" 
+            Type ( authors | editors )  #IMPLIED >
+
+<!ELEMENT	b		(%text;)*> <!-- bold -->
+
+<!ELEMENT	BeginningDate ( Year, ((Month, Day?) | Season)? ) >
+
+<!ELEMENT	Book ( Publisher, BookTitle, PubDate, BeginningDate?, EndingDate?, AuthorList*, InvestigatorList?, Volume?, 
+                    VolumeTitle?, Edition?, CollectionTitle?, Isbn*, ELocationID*, Medium?, ReportNumber?) >
+
+<!ELEMENT	BookTitle        (%text; | mml:math)*>
+<!ATTLIST   BookTitle	%booklinkatts; >
+
+<!ELEMENT	Chemical (RegistryNumber, NameOfSubstance) >
+
+<!ELEMENT	ChemicalList (Chemical+) >
+
+<!ELEMENT	Citation       (%text; | mml:math)*>
+
+<!ELEMENT	CitationSubset (#PCDATA) >
+
+<!ELEMENT   CoiStatement   (%text;)*>
+
+<!ELEMENT	CollectionTitle        (%text; | mml:math)*>
+<!ATTLIST   CollectionTitle	%booklinkatts; >
+
+<!ELEMENT	CollectiveName (%text;)*>
+
+<!ELEMENT	CommentsCorrections (RefSource,PMID?,Note?) >
+<!ATTLIST	CommentsCorrections 
+		     RefType (AssociatedDataset | 
+		             AssociatedPublication | 
+		             CommentIn | CommentOn | 
+		             CorrectedandRepublishedIn | CorrectedandRepublishedFrom |
+		             ErratumIn | ErratumFor | 
+		             ExpressionOfConcernIn | ExpressionOfConcernFor | 
+		             RepublishedIn | RepublishedFrom |  
+		             RetractedandRepublishedIn | RetractedandRepublishedFrom |
+		             RetractionIn | RetractionOf |  
+		             UpdateIn | UpdateOf | 
+		             SummaryForPatientsIn | 
+		             OriginalReportIn | 
+		             ReprintIn | ReprintOf |  
+		             Cites)      #REQUIRED    >
+		             
+
+<!ELEMENT	CommentsCorrectionsList (CommentsCorrections+) >
+
+<!ELEMENT	ContractNumber (#PCDATA) >
+
+<!ELEMENT	ContributionDate ( Year, ((Month, Day?) | Season)? ) >
+
+<!ELEMENT	CopyrightInformation (#PCDATA) >
+
+<!ELEMENT	Country (#PCDATA) >
+
+<!ELEMENT	DataBank (DataBankName, AccessionNumberList?) >
+
+<!ELEMENT	DataBankList (DataBank+) >
+
+<!ATTLIST	DataBankList 
+            CompleteYN (Y | N) "Y" >
+            
+<!ELEMENT	DataBankName (#PCDATA) >
+
+<!ELEMENT	DateCompleted (Year,Month,Day) >
+
+<!ELEMENT	DateRevised (Year,Month,Day) >
+
+<!ELEMENT	Day (#PCDATA )>
+
+<!ELEMENT	DescriptorName (#PCDATA) >
+<!ATTLIST	DescriptorName 
+		    MajorTopicYN (Y | N) "N"
+		    Type (Geographic) #IMPLIED
+		     UI CDATA #REQUIRED >
+
+<!ELEMENT       DispFormula     (mml:math) >
+<!ELEMENT	Edition (#PCDATA) >
+
+<!ELEMENT	ELocationID (#PCDATA) >
+<!ATTLIST	ELocationID 
+            EIdType (doi | pii) #REQUIRED 
+		    ValidYN  (Y | N) "Y">
+
+<!ELEMENT	EndingDate ( Year, ((Month, Day?) | Season)? ) >
+
+<!ELEMENT	EndPage (#PCDATA) >
+
+<!ELEMENT	ForeName (#PCDATA) >
+
+<!ELEMENT	GeneSymbol (#PCDATA) >
+
+<!ELEMENT	GeneSymbolList (GeneSymbol+)>
+
+<!ELEMENT	GeneralNote (#PCDATA) >
+<!ATTLIST	GeneralNote
+		     Owner (NLM | NASA | PIP | KIE | HSR | HMD) "NLM" >
+		     
+<!ELEMENT	Grant (GrantID?, Acronym?, Agency, Country)>
+
+<!ELEMENT	GrantID (#PCDATA) >
+
+<!ELEMENT	GrantList (Grant+)>
+<!ATTLIST	GrantList 
+            CompleteYN (Y | N) "Y">
+   
+<!ELEMENT	History (PubMedPubDate+) >
+
+<!ELEMENT	Hour (#PCDATA) >
+
+<!ELEMENT	i		(%text;)*> <!-- italic -->
+
+<!ELEMENT	Identifier (#PCDATA) >
+<!ATTLIST	Identifier 
+		    Source CDATA #REQUIRED >
+		    
+<!ELEMENT	Initials (#PCDATA) >
+
+<!ELEMENT	Investigator (LastName, ForeName?, Initials?, Suffix?, Identifier*, AffiliationInfo*) >
+<!ATTLIST	Investigator 
+		    ValidYN (Y | N) "Y" >
+		    
+<!ELEMENT	InvestigatorList (Investigator+) >
+
+<!ELEMENT	Isbn (#PCDATA) >
+
+<!ELEMENT	ISOAbbreviation (#PCDATA) >
+
+<!ELEMENT	ISSN (#PCDATA) >
+<!ATTLIST	ISSN 
+		    IssnType  (Electronic | Print) #REQUIRED >
+		    
+<!ELEMENT	ISSNLinking (#PCDATA) >
+
+<!ELEMENT	Issue (#PCDATA) >
+<!ELEMENT	Item (#PCDATA)>
+
+<!ELEMENT	ItemList (Item+)>
+<!ATTLIST   ItemList 
+            ListType CDATA #REQUIRED>
+
+<!ELEMENT	Journal (ISSN?, JournalIssue, Title?, ISOAbbreviation?)>
+
+<!ELEMENT	JournalIssue (Volume?, Issue?, PubDate) >
+<!ATTLIST	JournalIssue 
+		    CitedMedium (Internet | Print) #REQUIRED >
+		    
+<!ELEMENT	Keyword     (%text; | mml:math)*>
+<!ATTLIST	Keyword 
+		    MajorTopicYN (Y | N) "N" >
+		    
+<!ELEMENT	KeywordList (Keyword+) >
+<!ATTLIST	KeywordList 
+		    Owner (NLM | NLM-AUTO | NASA | PIP | KIE | NOTNLM | HHS) "NLM" >
+		    
+<!ELEMENT	Language (#PCDATA) >
+
+<!ELEMENT	LastName (#PCDATA) >
+
+<!ELEMENT	LocationLabel		(#PCDATA)>
+<!ATTLIST   LocationLabel
+			Type  (part|chapter|section|appendix|figure|table|box)  #IMPLIED >
+
+<!ELEMENT	Medium (#PCDATA) >
+
+<!ELEMENT	MedlineDate (#PCDATA) >
+
+<!ELEMENT	MedlineJournalInfo (Country?, MedlineTA, NlmUniqueID?, ISSNLinking?) >
+
+<!ELEMENT	MedlinePgn (#PCDATA) >
+
+<!ELEMENT	MedlineTA (#PCDATA) >
+
+<!ELEMENT	MeshHeading (DescriptorName, QualifierName*)>
+
+<!ELEMENT	MeshHeadingList (MeshHeading+)>
+
+<!ELEMENT	Minute (#PCDATA) >
+
+<!ELEMENT	Month (#PCDATA) >
+
+<!ELEMENT	NameOfSubstance (#PCDATA) >
+<!ATTLIST	NameOfSubstance 
+		    UI CDATA #REQUIRED >
+		    
+<!ELEMENT	NlmUniqueID (#PCDATA) >
+
+<!ELEMENT	Note (#PCDATA) >
+
+<!ELEMENT	NumberOfReferences (#PCDATA) >
+
+<!ELEMENT	Object (Param*)>
+<!ATTLIST	Object 
+          Type CDATA #REQUIRED >
+  
+<!ELEMENT	ObjectList (Object+) >
+
+<!ELEMENT	OtherAbstract (AbstractText+, CopyrightInformation?) >
+
+<!ATTLIST	OtherAbstract 
+		    Type (AAMC | AIDS | KIE | PIP | NASA | Publisher | 
+		    plain-language-summary) #REQUIRED
+		    Language CDATA "eng" >
+		    
+<!ELEMENT	OtherID (#PCDATA) >
+<!ATTLIST	OtherID 
+		    Source (NASA | KIE | PIP | POP | ARPL | CPC | IND | CPFH | CLML |
+		            NRCBL | NLM | QCIM) #REQUIRED >
+		            
+<!ELEMENT	PMID (#PCDATA) >
+<!ATTLIST	PMID 
+		    Version CDATA #REQUIRED >
+		    
+<!ELEMENT	Pagination ((StartPage, EndPage?, MedlinePgn?) | MedlinePgn) >
+
+<!ELEMENT	Param  (%text;)*>
+<!ATTLIST	Param 
+             Name CDATA #REQUIRED >
+          
+<!ELEMENT	PersonalNameSubject (LastName, ForeName?, Initials?, Suffix?) >
+
+<!ELEMENT	PersonalNameSubjectList (PersonalNameSubject+) >
+
+<!ELEMENT	PubDate ((Year, ((Month, Day?) | Season)?) | MedlineDate) >
+
+<!ELEMENT	PublicationStatus (#PCDATA) >
+
+<!ELEMENT	PublicationType (#PCDATA) >
+<!ATTLIST	PublicationType 
+		    UI CDATA #REQUIRED >
+		    
+<!ELEMENT	PublicationTypeList (PublicationType+) >
+
+<!ELEMENT   PubMedPubDate (Year, Month, Day, (Hour, (Minute, Second?)?)?)>
+<!ATTLIST   PubMedPubDate
+    	     PubStatus (received | accepted | epublish | 
+                      ppublish | revised | aheadofprint | 
+                      retracted | ecollection | pmc | pmcr | pubmed | pubmedr | 
+                      premedline | medline | medliner | entrez | pmc-release) #REQUIRED >
+
+<!ELEMENT	Publisher (PublisherName, PublisherLocation?) >
+
+<!ELEMENT	PublisherLocation (#PCDATA) >
+
+<!ELEMENT	PublisherName     (%text;)*>
+
+<!ELEMENT	QualifierName (#PCDATA) >
+<!ATTLIST	QualifierName 
+		    MajorTopicYN (Y | N) "N"
+		    UI CDATA #REQUIRED >
+
+<!ELEMENT	Reference (Citation, ArticleIdList?) >
+
+<!ELEMENT	ReferenceList (Title?, Reference*, ReferenceList*) >
+
+<!ELEMENT	RefSource (#PCDATA) >
+
+<!ELEMENT	RegistryNumber (#PCDATA) >
+
+<!ELEMENT	ReportNumber (#PCDATA) >
+
+<!ELEMENT	Season (#PCDATA) >
+
+<!ELEMENT	Second (#PCDATA) >
+
+
+<!ELEMENT	Section	(LocationLabel?, SectionTitle, Section*) >
+
+<!ELEMENT	Sections	(Section+) >
+
+<!ELEMENT	SectionTitle	(%text;)*>
+<!ATTLIST   SectionTitle	%booklinkatts; >
+
+<!ELEMENT	SpaceFlightMission (#PCDATA) >
+
+<!ELEMENT	StartPage (#PCDATA) >
+
+<!ELEMENT	sub	(%text;)*> <!-- subscript -->
+
+<!ELEMENT	Suffix (%text;)*>
+
+<!ELEMENT	sup	(%text;)*> <!-- superscript -->
+
+<!ELEMENT	SupplMeshList (SupplMeshName+)>
+
+<!ELEMENT	SupplMeshName (#PCDATA) >
+<!ATTLIST	SupplMeshName 
+		    Type (Disease | Protocol | Organism) #REQUIRED
+		    UI CDATA #REQUIRED >
+		    
+<!ELEMENT	Title (#PCDATA) >
+
+<!ELEMENT	u		(%text;)*> <!-- underline -->                              
+
+<!ELEMENT	URL (#PCDATA) >
+<!ATTLIST	URL
+	      lang (AF|AR|AZ|BG|CS|DA|DE|EN|EL|ES|FA|FI|FR|HE|
+                            HU|HY|IN|IS|IT|IW|JA|KA|KO|LT|MK|ML|NL|NO|
+                            PL|PT|PS|RO|RU|SL|SK|SQ|SR|SV|SW|TH|TR|UK|
+                           VI|ZH) #IMPLIED
+	      Type ( FullText | Summary | fulltext | summary) #IMPLIED >
+	      
+<!ELEMENT	VernacularTitle     (%text; | mml:math)*>
+
+<!ELEMENT	Volume (#PCDATA) >
+
+<!ELEMENT	VolumeTitle (%text;)*>
+
+<!ELEMENT	Year (#PCDATA) >
+
+
+
+
+
diff --git a/code/lib/Bio/Entrez/DTDs/references.ent b/code/lib/Bio/Entrez/DTDs/references.ent
new file mode 100644
index 0000000..9e63a18
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/references.ent
@@ -0,0 +1,726 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Bibliographic Reference (Citation) Class Elements -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Bibliographic Reference (Citation) Class Elements v2.0 20040830//EN"
+Delivered as file "references.ent"                                 -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Defines the bibliographic reference elements      -->
+<!--                                                               -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Default definition of the references class     -->
+<!--             2) Parameter Entities for attribute lists         -->
+<!--             3) Models for the bibliographic reference         -->
+<!--                class elements in alphabetical order           -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+ 12. COMPLETE MODELS WHEN OVER-RIDING A MODEL 
+     (for all Parameter Entities suffixed "-model")
+     ### Customization Alert ###
+     Added internal parentheses to Parameter Entity and removed 
+     them from Element Declaration for:
+     - %note-model;
+     - %ref-list-model;
+     - %ref-model;
+     - and removed the parentheses for <person-group>
+
+ 11. DEFAULT CLASSES - Were moved from this module to 
+     %default-classes.ent;
+
+ 10. REFERENCES.CLASS - Added the following elements to the
+     %references.class;:
+      - <issue-id>
+      - <issue-title>
+      - <page-range>
+      - <role>
+      - <string-name>
+      - <volume-id>
+
+  9. EMAIL AND URI
+     a. Added to %source-elements; by changing 
+         %ext-links.class; ==> %address-link.class;
+
+  8. LOOSENING ELEMENT USAGE in Archiving DTD and Suite
+     a. Rewrote content model for access-date as a new Parameter
+        Entity %access-date-elements;
+     b. Allowed all date components (%date-part.class;)
+        inside this PE in Archiving DTD (Green) customization
+     c. The default value is the empty string in this module,
+        therefore, access date, by default, contains nothing 
+        but #PCDATA.
+
+  7. ROLE ELEMENT - Was added to the default references class 
+     %references.class;
+
+  6. PERSON GROUP - To add <string-name> as well as correct various
+     classing problems:
+
+     a. Content model was made into a Parameter Entity
+        %person-group-model;
+
+     b. The content model was changed to incorporate the new
+         -%name.class;, which adds both <string-name> and <collab>
+
+  5. NAME CLASS / STRING NAME
+     a. Created a new element <string-name> for names that
+        do not follow the former, strict personal name model.
+
+     b. Created a new class %name.class; to hold all the ways
+        to name people: <name>, <string-name>, and <collab>
+        who produce products or articles. Used in:
+        - <person-group>
+     
+     c. To allow <string-name> to be used anywhere <name> is
+        used:
+        - Added to default %references.class;
+
+  4. NEW PARAMETER ENTITIES - To correct potential classing 
+     problems, created the following new Parameter Entities:
+     a. NEW CLASSES
+        - %just-para.class; used in <annotation>, -%note-model;
+        - %ref-list.class; used in <ref-list>
+     b. NEW MIXES 
+        - <edition>       -%edition-elements;
+        - <gov>           -%gov-elements;
+        - <patent>        -%patent-elements;
+        - <series-title>  -%series-title-elements;
+        - <std>           -%std-elements;
+        - <time-stamp>    -%time-stamp-elements;
+
+
+  3. Updated public identifier to "v2.0 20040830"         
+     
+     =============================================================
+     Version 1.1                           (TRG) v1.1 (2003-11-01)
+
+  2. Added element <page-count> to parameter entity 
+     %references.class;
+     Rationale:  Permit tagging of page count where included in
+     references.
+
+     =============================================================
+     Version 1.0 Post Publishing DTD Change (DAL) v 1.0 2003-02-10
+
+  1. Removed <other-ref>, since the Publishing (authoring) DTD did
+     not need it and that is what it had been made for. It was 
+     never to be used for conversion or interchange, so it's gone.
+     Removed %other-ref-elements; as well. It did not need to be 
+     removed from any context, as it was never used.
+                                                                   -->
+                                                                   
+                                                                   
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module, 
+                        usually accomplished in the Customization
+                        Module for the specific DTD:
+                          - %emphasis.class;
+                          - %just-rendition;
+                          - %label.class;
+                          - %para-level;
+                          - %rendition-plus;
+                          - %simple-phrase;
+                          - %subsup.class;                         -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITIES FOR ATTRIBUTE LISTS     -->
+<!-- ============================================================= -->
+                                                                
+                                                                 
+<!--                    PERSON GROUP ATTRIBUTES                    -->
+<!--                    Attributes for the <person-group> element  -->
+<!ENTITY %  person-group-atts  
+             "person-group-type 
+                        CDATA                             #IMPLIED"  >
+                                                                
+                                                                 
+<!--                    PUBLICATION IDENTIFIER ATTRIBUTES          -->
+<!--                    Attributes for the <pub-id> element        -->
+<!ENTITY %  pub-id-atts  
+             "pub-id-type 
+                        (%pub-id-types;)                  #IMPLIED"  >
+
+
+<!--         xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+                                                                
+                                                                 
+<!--                    SOURCE ATTRIBUTES                          -->
+<!--                    Attributes for the <source> and 
+                        <trans-source> elements                    -->
+<!ENTITY %  source-atts
+             "xml:lang   NMTOKEN                           #IMPLIED" > 
+                                                                   
+<!-- ============================================================= -->
+<!--                    BIBLIOGRAPHIC REFERENCE LIST ELEMENTS      -->
+<!-- ============================================================= -->
+
+
+<!--ELEM   article-title       
+                        Defined in %common.ent;                    -->
+<!--ELEM   collab       Defined in %common.ent;                    -->
+<!--ELEM   conf-date    Defined in %common.ent;                    -->
+<!--ELEM   conf-loc     Defined in %common.ent;                    -->
+<!--ELEM   conf-name    Defined in %common.ent;                    -->
+<!--ELEM   day          Defined in %common.ent;                    -->
+<!--ELEM   elocation-id Defined in %common.ent;                    -->
+<!--ELEM   email        Defined in %common.ent;                    -->
+<!--ELEM   fpage        Defined in %common.ent;                    -->
+<!--ELEM   issn         Defined in %common.ent;                    -->
+<!--ELEM   issue        Defined in %common.ent;                    -->
+<!--ELEM   lpage        Defined in %common.ent;                    -->
+<!--ELEM   month        Defined in %common.ent;                    -->
+<!--ELEM   publisher-loc       
+                        Defined in %common.ent;                    -->
+<!--ELEM   publisher-name      
+                        Defined in %common.ent;                    -->
+<!--ELEM   season       Defined in %common.ent;                    -->
+<!--ELEM   title        Defined in %common.ent;                    -->
+<!--ELEM   trans-title  Defined in %common.ent;                    -->
+<!--ELEM   volume       Defined in %common.ent;                    -->
+<!--ELEM   year         Defined in %common.ent;                    -->
+
+
+<!--                    REFERENCE LIST MODEL                       -->
+<!--                    Content model for the <ref-list> element   -->
+<!ENTITY % ref-list-model
+                        "(title?, (%para-level;)*, ref*, 
+                         (%ref-list.class;)* )"                      > 
+
+
+<!--                    REFERENCE LIST (BIBLIOGRAPHIC REFERENCE LIST)
+                                                                   -->
+<!--                    List of references (citations) for the
+                        article.  Often called "References", 
+                        "Bibliography", or "Additional Reading". No
+                        distinction is made between lists of cited
+                        references and lists of suggested references.
+                        Authoring Note: The optional paragraph-level
+                        elements after the title allow for those rare
+                        cases where there is explanatory material
+                        inside the list, before the references. There
+                        may also be similar explanatory material 
+                        inside each reference group. Explanatory
+                        material preceding a citation will need to
+                        be placed inside the citation.             -->
+<!ELEMENT  ref-list     %ref-list-model;                             >
+
+
+<!--                    REFERENCE ITEM MODEL                       -->
+<!--                    Content model for the <ref> element        -->
+<!ENTITY % ref-model    "(label?, (citation | note)+ )"              > 
+
+
+<!--                    REFERENCE ITEM                             -->
+<!--                    One item in a bibliographic list, typically
+                        a citation describing a referenced work, but
+                        some journals may place notes in this list as
+                        well as citations.
+                        Conversion Note: There is usually a number or
+                        other label preceding each citation, which 
+                        the tagger may choose to preserve using the 
+                        label attribute.                           -->
+<!ELEMENT  ref          %ref-model;                                  >
+<!--         id         Unique identifier so that citation can be
+                        referenced                                 -->
+<!ATTLIST  ref
+             id         ID                                 #IMPLIED  >
+
+
+<!--ELEM   citation     Defined in %common.ent;                    -->
+
+
+<!--                    NOTE IN A REFERENCE LIST MODEL             -->
+<!ENTITY % note-model   "(label?, (%just-para.class;)+ )"            > 
+
+
+<!--                    NOTE IN A REFERENCE LIST                   -->
+<!--                    Used to tag non-citation material that 
+                        sometimes within a reference list, for 
+                        example, used to tag end note material when 
+                        such a note is placed within a reference 
+                        list.
+                        Authoring Note: For conversion use only. For
+                        creating new reference lists, notes should
+                        not be intermingled with citations.        -->
+<!ELEMENT  note         %note-model;                                 > 
+<!--         id         Unique identifier so that citation can be
+                        referenced                                 -->
+<!ATTLIST  note
+             id         ID                                 #IMPLIED  >
+
+
+<!-- ============================================================= -->
+<!--                    BIBLIOGRAPHIC REFERENCE CLASS              -->
+<!-- ============================================================= -->
+
+
+<!--                    ACCESS DATE ELEMENTS                       -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the Access Date <access-date> element      -->
+<!ENTITY % access-date-elements 
+                        " "                                          >
+
+
+<!--                    ACCESS DATE FOR CITED WORK                 -->
+<!--                    The date on which the work which is cited
+                        was examined. Some online resources are 
+                        changing so quickly that a citation to the 
+                        resource is not complete without the date 
+                        on which the cited resource was examined, 
+                        since a day before or a day later the 
+                        relevant material might be different.
+                        Related Elements: The related element
+                        <time-stamp> is used to record not the time 
+                        when a cited resource was examined, but the
+                        time stamp that was found on the resource
+                        when it was examined, for time-stamped
+                        resources.                                 -->
+<!ELEMENT  access-date  (#PCDATA %access-date-elements;)*            >
+
+
+<!--                    ANNOTATION IN A CITATION                   -->
+<!--                    Most citations just provide the bibliographic
+                        information for a cited reference but a few
+                        describe or comment upon the nature or
+                        quality of the reference or summarize its
+                        findings.
+                        Display Note: All of the other reference
+                        elements are inline elements. In contrast,
+                        an Annotation may be considered a block
+                        element, with space before it and after it.-->
+<!ELEMENT  annotation   ((%just-para.class;)+)                       >
+
+
+<!--                    COMMENT ELEMENTS                           -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        the Comment in a Citation <comment> element.
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-phrase; is an 
+                        inline mix, the OR bar is already there.   -->
+<!ENTITY % comment-elements 
+                        "%simple-phrase;"                            >
+
+
+<!--                    COMMENT IN A CITATION                      -->
+<!--                    Used to mark unstructured text within an
+                        otherwise element structured reference.
+                        In an unstructured reference, this text would
+                        merely be data characters.
+                        Typical comments could include:
+                          <comment>[Abstract]</comment>
+                          <comment> translated from Russian</comment>
+                        DESIGN NOTE: The <comment> element is defined
+                        here largely for the sake of conversion, to
+                        preserve the semantic markup when translating
+                        from other DTDs.
+                        Authoring and Conversion Note: The Comment
+                        element should be used to mark substantive
+                        text only; it should NOT be used to markup
+                        punctuation that occurs between elements.
+                        Display Note: Comments should appear inline
+                        with other reference elements.  This is a
+                        very different rendering from that given the
+                        similar element Annotation, which is
+                        typically a longer commentary concerning a
+                        citation that is rendered as a block
+                        element.
+                        DESIGN NOTE: All inline mixes begin with an
+                        OR bar, but since %simple-phrase; is an 
+                        inline mix, the OR bar is already there.   -->
+<!ELEMENT  comment      (#PCDATA %comment-elements;)*                >
+
+
+<!--                    EDITION ELEMENTS                           -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        <edition>                             
+                        Design Note: -%just-rendition; begins with
+                        an OR bar, so this inline mix begins with
+                        an OR bar.                                 -->
+<!ENTITY % edition-elements
+                        "%just-rendition;"                           >
+
+<!--                    EDITION, CITED                             -->
+<!--                    The edition number of a cited publication  -->
+<!ELEMENT  edition      (#PCDATA %edition-elements;)*                >
+
+
+<!--                    GOVERNMENT REPORT ELEMENTS                 -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        <gov>                             
+                        Design Note: -%rendition-plus; begins with
+                        an OR bar, so this inline mix begins with
+                        an OR bar.                                 -->
+<!ENTITY % gov-elements "%rendition-plus;"                           >
+
+
+<!--                    GOVERNMENT REPORT, CITED                   -->
+<!--                    The identification information (typically the
+                        title and/or an identification number) for 
+                        a cited governmental report or other 
+                        government publication                     -->
+<!ELEMENT  gov          (#PCDATA %gov-elements;)*                    >
+
+
+<!--                    ISBN                                       -->
+<!--                    International Standard Book Number         -->
+<!ELEMENT  isbn         (#PCDATA)                                    >
+
+
+<!--                    PATENT NUMBER ELEMENTS                     -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        <patent>                             
+                        Design Note: -%just-rendition; begins with
+                        an OR bar, so this inline mix begins with
+                        an OR bar.                                 -->
+<!ENTITY % patent-elements 
+                        "%just-rendition;"                           >
+
+
+<!--                    PATENT NUMBER, CITED                       -->
+<!--                    The identification information (typically the
+                        patent number or number and name) for a  
+                        cited patent                               -->
+<!ELEMENT  patent       (#PCDATA %patent-elements;)*                 >
+
+
+<!--                    PERSON GROUP MODEL                         -->
+<!--                    Content model for the Person Group element -->
+<!ENTITY % person-group-model
+                        "(%name.class; | aff | etal)*"               >
+
+
+<!--                    PERSON GROUP FOR A CITED PUBLICATION       -->
+<!--                    Wrapper element for one or more authors,
+                        editors, translators, etc. named in a cited
+                        reference.
+                        Remarks: Similar to the <contrib-group>
+                        element in the metadata, but could not use
+                        the same name (as this is a DTD not a schema)
+                        since the content is different.            -->
+<!ELEMENT  person-group %person-group-model;                         >
+<!--         person-group-type 
+                        Identifies the "role" of the persons being
+                        named, a group of authors, a group of 
+                        editors, members of the G&S chorus, etc.
+                        Valid Types include:
+                          author - Content creators
+                          editor - Content editors
+                          guest-editor 
+                                 - Content editor that has been 
+                                   invited to edit all or part of 
+                                   a work
+                          inventor 
+                                 - Idea, software, or machine creator
+                          assignee 
+                                 - Person to whom a patent is awarded
+                          translator
+                                 - Translated the cited work from 
+                                   one language into another
+                          trans-editor 
+                                 - Editor of a translated version of
+                                   a work
+                          all-authors
+                                 - Used to identify a complete list
+                                   of authors when a subset of the 
+                                   author group is used elsewhere 
+                                   in the citation. This may occur, 
+                                   for example, when a citation 
+                                   identifies both a book and a
+                                   chapter within the book.
+                          compiler
+                                 - Put together a composite work
+                                   from multiple sources
+                                                                   -->
+<!ATTLIST  person-group 
+             %person-group-atts;                                     >
+
+
+<!--                    PUBLICATION IDENTIFIER FOR A CITED PUBLICATION
+                                                                   -->
+<!--                    The identifier of a publication such as a
+                        related journal article that is listed 
+                        within a Citation <citation> inside the
+                        bibliographic reference list <ref-list> of 
+                        an article.                                -->
+<!ELEMENT  pub-id       (#PCDATA)                                    >
+<!--         pub-id-type 
+                        The "pub-id-type" attribute names the
+                        type of identifier, or the organization or 
+                        system that defined this identifier for the 
+                        identifier of the journal article or a 
+                        cited publication.
+                           Used on the <article-id> element, which 
+                        holds an identifier for the entire article.  
+                           Also used on the <pub-id> element, which 
+                        is an identifier for a publication cited in 
+                        a bibliographic reference (citation).
+                        Valid Types include:
+                          coden  - Obsolete PDB/CCDC identifier, may
+                                   be present on older articles
+                          doi    - Digital Object Identifier for
+                                   the publication being referenced
+                          medline- NLM Medline identifier
+                          other  - None of the named identifiers
+                          pii    - Publisher Item Identifier, see
+                                    /epub/piius.htm
+                          pmid   - PubMed ID (see
+                                   www.ncbi.nlm.nih.gov/entrez/
+                                   query.fcgi?db=PubMed)         
+                          publisher-id - 
+                                   Publisher's identifying number 
+                                   such as an 'article-id', 'artnum',
+                                   'identifier', 'article- number', 
+                                   etc.
+                          sici   - Serial Item and Contribution 
+                                   Identifier (SICI). A journal 
+                                   article may have more than one 
+                                   SICI, one for a print version and
+                                   one for an electronic version.  -->
+<!ATTLIST  pub-id  
+             %pub-id-atts;                                           >
+
+
+<!--                    SERIES ELEMENTS                            -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        <series>                             
+                        Design Note: -%rendition-plus; begins with
+                        an OR bar, so this inline mix begins with
+                        an OR bar.                                 -->
+<!ENTITY % series-elements "%rendition-plus;"                        >
+
+
+<!--                    SERIES                                     -->
+<!--                    Container element for any series information
+                        used in a citation. For example, within a
+                        citation to a non-journal item that spans
+                        multiple volumes, this element could contain
+                        the unique title of the entire series:
+                          <citation citation-type="book">
+                          <name>...</name> and <name>...</name>
+                          <year>1989</year>. <series>The Birds of
+                          South America</series>. <volume>1</volume>.
+                          <source>The Oscine Passerines</source>. 
+                          <publisher-name>University of Texas 
+                          Press</publisher-name>
+                          , <publisher-loc>Austin</publisher-loc>
+                          </citation>
+                                                                   -->
+<!ELEMENT  series       (#PCDATA %series-elements;)*                 >
+
+
+<!--                    STANDARD ELEMENTS                          -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        <std>                             
+                        Design Note: -%rendition-plus; begins with
+                        an OR bar, so this inline mix begins with
+                        an OR bar.                                 -->
+<!ENTITY % std-elements "%rendition-plus;"                           >
+
+
+<!--                    STANDARD, CITED                            -->
+<!--                    The identification information (typically the
+                        standard number, organization, and name) for 
+                        a cited standard, where "standard" is defined
+                        as a document produced by a recognized
+                        standards body such ISO, IEEE, OASIS, ANSI,
+                        etc.                                       -->
+<!ELEMENT  std          (#PCDATA %std-elements;)*                    >
+
+
+<!--                    SOURCE ELEMENTS                            -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <source>.                                -->
+<!ENTITY % source-elements
+                        "| %emphasis.class; | %address-link.class; |
+                         %subsup.class;"                             >
+
+
+<!--                    SOURCE                                     -->
+<!--                    Within a citation, this is the title of a
+                        journal, book, conference proceedings, etc.
+                        that is the source of the cited material.  -->
+<!ELEMENT  source       (#PCDATA %source-elements;)*                 >
+<!--         xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  source
+             %source-atts;                                           > 
+
+
+<!--                    TIME STAMP ELEMENTS                        -->
+<!--                    The elements that can be included along with
+                        data characters inside the content model of
+                        a <time-stamp>.                            -->
+<!ENTITY % time-stamp-elements
+                        " "                                          >
+
+
+<!--                    TIME STAMP FOR CITED WORK                 -->
+<!--                    Used to record any time stamp that was 
+                        found on the cited resource when it was 
+                        examined, for resources such as databases
+                        that may use a time signature to identify
+                        different versions. Note: This is not the 
+                        time when the cited resource was examined, 
+                        but rather the time it was produced,
+                        distributed, whatever milestone the resource
+                        creators chose to stamp time stamp.
+                        Related Element: <access-date> is the date 
+                        on which the cited work was examined. Some 
+                        online resources are changing so quickly 
+                        that a citation to the resource is not 
+                        complete without the date.                 -->
+<!ELEMENT  time-stamp   (#PCDATA %time-stamp-elements;)              >
+
+
+<!--                    TRANSLATED SOURCE                          -->
+<!--                    Within a citation, this is the title of a
+                        journal, book, conference proceedings, etc.
+                        that is the source of the cited material,
+                        but with the source name given in a different
+                        language from the source as given in the 
+                        <source> element. For example, if an article
+                        is originally in French, the <source> name
+                        would be the French name and the 
+                        <trans-source> might be in English.        -->
+<!ELEMENT  trans-source (#PCDATA %source-elements;)*                 >
+<!--         xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiates such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English).                   -->
+<!ATTLIST  trans-source
+             %source-atts;                                           > 
+
+
+<!-- ================== End Bibliographic Class Module =========== -->
diff --git a/code/lib/Bio/Entrez/DTDs/section.ent b/code/lib/Bio/Entrez/DTDs/section.ent
new file mode 100644
index 0000000..1623ac7
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/section.ent
@@ -0,0 +1,220 @@
+<!-- ============================================================= -->
+<!--  MODULE:    Section Class Elements                            -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite Section Class Elements v2.0 20040830//EN"
+     Delivered as file "section.ent"                               -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    Defines the member of the sec.class, that is,     -->
+<!--             names all section-level elements in the           -->
+<!--             Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!--             At the time of the initial DTD creation           -->
+<!--             there is only one such element, Section itself    -->
+<!--             <sec>, but future expansion to named sections     -->
+<!--             (such as <methodology> or <materials> or any      -->
+<!--             new section-level structures would be added here. -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Default definition of the section class        -->
+<!--             2) Defaults for attribute lists                   -->
+<!--             3) Section <sec>                                  -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+
+     =============================================================
+     Version 2.0                       (DAL/BTU) v2.0 (2004-08-30)
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  3. COMPLETE MODELS WHEN OVER-RIDING A MODEL 
+     (for all Parameter Entities suffixed "-model")
+     ### Customization Alert ###
+     Added internal parentheses to Parameter Entity and removed 
+     them from Element Declaration for:
+     - %sec-model;
+
+  2. DEFAULT CLASSES - Were moved from this module to 
+     %default-classes.ent;
+  
+  1. Updated public identifier to "v2.0 20040830"          
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    PARAMETER ENTITY DEPENDENCIES             
+                        Requires the following parameter entities
+                        be defined before calling this module. 
+                        The content-model Parameter Entities are
+                        defined in %common.ent; but may be
+                        redefined in the Customization Module for
+                        the specific DTD:
+                          %sec-model; - Content model for section-like 
+                                        elements  
+                                                                   -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DEFAULTS FOR ATTRIBUTE LISTS               -->
+<!-- ============================================================= -->
+
+
+<!--                    SECTION ATTRIBUTES                         -->
+<!--                    Attribute list for Section element         -->
+<!ENTITY % sec-atts   
+            "id         ID                                 #IMPLIED
+             xml:lang   NMTOKEN                            #IMPLIED
+             sec-type   CDATA                              #IMPLIED 
+             disp-level CDATA                              #IMPLIED" >
+
+
+<!-- ============================================================= -->
+<!--                    SECTION ELEMENTS                           -->
+<!-- ============================================================= -->
+
+
+<!--                    SECTION                                    -->
+<!--                    A headed group of material; the basic 
+                        structural unit of the article             -->
+<!ELEMENT  sec          %sec-model;                                  >
+<!--         id         Unique identifier, so the Section can be
+                        referenced                   
+             xml:lang   The language in which the value of the
+                        element is expressed.  Recommended best 
+                        practice is to use values as defined in
+                        RFC 1766, typically 2-letter language
+                        codes such as "FR" (French), "EN" (English),
+                        and "DE" (German). These values are NOT
+                        case sensitive, so "EN" = "en". The values
+                        may include hyphenated differentiations such 
+                        as "EN-AU" (Australian English) and "EN-US"
+                        (United States English). 
+             sec-type   Conversion Note: This attribute may be used
+                        to retain information on the semantic
+                        content of a section where that is known.
+                        Authoring Note: Top-level sections (those 
+                        that are not nested inside other sections)
+                        may be assigned a "type" attribute for 
+                        indexing purposes. This attribute should be
+                        used only if the section is one of the listed
+                        types and should otherwise be omitted. A
+                        section that contains content of more than 
+                        one type should have the type IDs combined.
+                        (e.g., "Materials and Methods" would be
+                        type="materials|methods").  
+                        Types values are:         
+                          intro     Introduction/Synopsis
+                          materials Materials
+                          methods   Methods/Methodology/Procedures
+                          subjects  Patients/Participants/Subjects
+                          cases     Cases/Case Reports
+                          results   Results/Statement of Findings
+                          discussion  Discussion/Interpretation
+                          conclusions Conclusions/Comment
+             disp-level Sometimes in print or on screen, the display
+                        or apparent hierarchical level of a section
+                        is not the same as its real position in the
+                        hierarchy.  For example, in some styles,
+                        the "Clinical Finding" Section or the 
+                        'Methodology" Section always looks like a 
+                        particular level heading (say a level 2 head),
+                        wherever it falls in the hierarchy (say a 
+                        level 1 head or a level 3 head). This
+                        attribute can be used to record the needed
+                        display level. (Rare)                      -->
+<!ATTLIST  sec
+             %sec-atts;                                              >
+
+
+<!-- ================== End Section Class Module ================= -->
diff --git a/code/lib/Bio/Entrez/DTDs/taxon.dtd b/code/lib/Bio/Entrez/DTDs/taxon.dtd
new file mode 100644
index 0000000..fadf481
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/taxon.dtd
@@ -0,0 +1,131 @@
+<!-- ?xml version="1.0"? -->
+<!-- DOCTYPE TaxaSet PUBLIC "-//NLM//DTD TaxaSet, 20 February 2004//EN" "taxon.dtd" -->
+
+<!-- $Id: taxon.dtd 348960 2012-01-05 21:51:40Z yasmax $ -->
+
+<!-- ELEMENTS -->
+<!ELEMENT TaxId       (#PCDATA)>
+<!ELEMENT ParentTaxId (#PCDATA)>
+<!ELEMENT Division    (#PCDATA)>
+<!ELEMENT Rank        (#PCDATA)>
+<!ELEMENT ClassCDE    (#PCDATA)>
+<!ELEMENT DispName    (#PCDATA)>
+<!ELEMENT UniqueName  (#PCDATA)>
+<!ELEMENT GCId        (#PCDATA)>
+<!ELEMENT GCName      (#PCDATA)>
+<!ELEMENT MGCId       (#PCDATA)>
+<!ELEMENT MGCName     (#PCDATA)>
+<!ELEMENT Lineage     (#PCDATA)>
+<!ELEMENT PropName    (#PCDATA)>
+<!ELEMENT CreateDate  (#PCDATA)>
+<!ELEMENT UpdateDate  (#PCDATA)>
+<!ELEMENT PubDate     (#PCDATA)>
+<!ELEMENT CitId       (#PCDATA)>
+<!ELEMENT CitKey      (#PCDATA)>
+<!ELEMENT CitUrl      (#PCDATA)>
+<!ELEMENT CitText     (#PCDATA)>
+<!ELEMENT CitPubmedId (#PCDATA)>
+<!ELEMENT CitMedlineId (#PCDATA)>
+<!ELEMENT ModId       (#PCDATA)>
+<!ELEMENT ModType     (#PCDATA)>
+<!ELEMENT ModName     (#PCDATA)>
+<!ELEMENT ModGBhidden (#PCDATA)>
+<!ELEMENT RModId      (#PCDATA)>
+<!ELEMENT RTaxId      (#PCDATA)>
+<!ELEMENT ScientificName (#PCDATA)>
+<!ELEMENT GenbankCommonName (#PCDATA)>
+<!ELEMENT GenbankAcronym (#PCDATA)>
+<!ELEMENT BlastName   (#PCDATA)>
+<!ELEMENT EquivalentName   (#PCDATA)>
+<!ELEMENT Synonym     (#PCDATA)>
+<!ELEMENT Acronym     (#PCDATA)>
+<!ELEMENT Misspelling (#PCDATA)>
+<!ELEMENT Anamorph    (#PCDATA)>
+<!ELEMENT Includes    (#PCDATA)>
+<!ELEMENT CommonName  (#PCDATA)>
+<!ELEMENT Inpart      (#PCDATA)>
+<!ELEMENT Misnomer    (#PCDATA)>
+<!ELEMENT Teleomorph  (#PCDATA)>
+<!ELEMENT GenbankSynonym  (#PCDATA)>
+<!ELEMENT GenbankAnamorph (#PCDATA)>
+<!ELEMENT PropValueInt (#PCDATA)>
+<!ELEMENT PropValueBool (#PCDATA)>
+<!ELEMENT PropValueString (#PCDATA)>
+
+<!-- Taxon structural entities -->
+<!ELEMENT TaxaSet ( Taxon* )>
+
+<!ELEMENT Taxon (
+                 TaxId,
+                 ScientificName,
+                 OtherNames?, 
+                 ParentTaxId?,
+                 Rank?, 
+                 Division?,
+                 GeneticCode?,
+                 MitoGeneticCode?,
+                 Lineage?,
+                 LineageEx?,
+                 Citations?,
+                 Modifiers?,
+                 Properties?,
+                 CreateDate?,
+                 UpdateDate?,
+                 PubDate?,
+                 AkaTaxIds?
+                )>
+
+
+<!ELEMENT OtherNames	( GenbankCommonName?,
+                          GenbankAcronym?,
+                          BlastName?,
+                          ( EquivalentName |
+                            Synonym        |
+                            Acronym        |
+                            Misspelling    |
+                            Anamorph       |
+                            Includes       |
+                            CommonName     |
+                            Inpart         |
+                            Misnomer       |
+                            Teleomorph     |
+                            GenbankSynonym |
+                            GenbankAnamorph
+                          )*,
+                          Name*
+                        )>
+
+<!ELEMENT Name ( ClassCDE, DispName, UniqueName? )>
+
+<!ELEMENT GeneticCode ( GCId, GCName )>
+
+<!ELEMENT MitoGeneticCode ( MGCId, MGCName )>
+
+<!ELEMENT Citations ( Citation+ )>
+
+<!ELEMENT Citation ( CitId,
+                     CitKey,
+                     CitUrl?,
+                     CitText?,
+                     CitPubmedId?,
+                     CitMedlineId?
+                   )>
+
+<!ELEMENT Modifiers ( Modifier+ )>
+
+<!ELEMENT Modifier ( ModId,
+                     ModType,
+                     ModName,
+                     ModGBhidden,
+                     ( RModId | RTaxId )?
+                   )>
+
+<!ELEMENT Properties ( Property+ )>
+
+<!ELEMENT Property ( PropName, ( PropValueInt |
+                                 PropValueBool |
+                                 PropValueString ) )>
+
+<!ELEMENT AkaTaxIds ( TaxId* )>
+
+<!ELEMENT LineageEx ( Taxon* )>
diff --git a/code/lib/Bio/Entrez/DTDs/xmlspecchars.ent b/code/lib/Bio/Entrez/DTDs/xmlspecchars.ent
new file mode 100644
index 0000000..d9914bf
--- /dev/null
+++ b/code/lib/Bio/Entrez/DTDs/xmlspecchars.ent
@@ -0,0 +1,290 @@
+<!-- ============================================================= -->
+<!--  MODULE:    XML Special Characters Module                     -->
+<!--  VERSION:   2.0                                               -->
+<!--  DATE:      August 2004                                       -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!--                    PUBLIC DOCUMENT TYPE DEFINITION            -->
+<!--                        TYPICAL INVOCATION                     -->
+<!--
+"-//NLM//DTD Archiving and Interchange DTD Suite XML Special Characters Module v2.0 20040830//EN"
+     Delivered as file "xmlspecchars.ent"                          -->
+<!-- ============================================================= -->
+
+<!-- ============================================================= -->
+<!-- SYSTEM:     Archiving and Interchange DTD Suite               -->
+<!--                                                               -->
+<!-- PURPOSE:    External Parameter Entities for calling in the    -->
+<!--             special character entities                        -->
+<!--                                                               -->
+<!-- CONTAINS:   1) Calls to external entity sets                  -->
+<!--                                                               -->
+<!-- MODULES REQUIRED:                                             -->
+<!--             The standard ISO special character entity sets    -->
+<!--             (see below)                                       -->
+<!--                                                               -->
+<!-- CREATED FOR:                                                  -->
+<!--             Digital archives and publishers who wish to       -->
+<!--             create a custom XML DTD for original markup of    -->
+<!--             journal literature, books, and related material,  -->
+<!--             or for archiving and transferring such material   -->
+<!--             between archives.                                 -->
+<!--                                                               -->
+<!--             This DTD is in the public domain. An organization -->
+<!--             that wishes to create its own DTD from the suite  -->
+<!--             may do so without permission from NLM.            -->
+<!--                                                               -->
+<!--             The suite has been set up to be extended using a  -->
+<!--             new DTD file and a new DTD-specific customization -->
+<!--             module to redefine the many Parameter Entities.   -->
+<!--             Do not modify the suite directly or redistribute  -->
+<!--             modified versions of the suite.                   -->
+<!--                                                               -->
+<!--             In the interest of maintaining consistency and    -->
+<!--             clarity for potential users, NLM requests:        -->
+<!--                                                               -->
+<!--             1. If you create a DTD from the Archiving and     -->
+<!--                Interchange DTD Suite and intend to stay       -->
+<!--                compatible with the suite, then please include -->
+<!--                the following statement as a comment in all of -->
+<!--                your DTD modules:                              -->
+<!--                   "Created from, and fully compatible with,   -->
+<!--                    the Archiving and Interchange DTD Suite."  -->
+<!--                                                               -->
+<!--             2. If you alter one or more modules of the suite, -->
+<!--                then please rename your version and all its    -->
+<!--                modules to avoid any confusion with the        -->
+<!--                original suite. Also, please include the       -->
+<!--                following statement as a comment in all your   -->
+<!--                DTD modules:                                   -->
+<!--                   "Based in part on, but not fully compatible -->
+<!--                    with, the Archiving and Interchange DTD    --> 
+<!--                    Suite."                                    -->
+<!--                                                               -->
+<!--             Suggestions for refinements and enhancements to   -->
+<!--             the DTD suite should be sent in email to:         -->
+<!--                 archive-dtd@ncbi.nlm.nih.gov                  -->
+<!--                                                               -->
+<!-- ORIGINAL CREATION DATE:                                       -->
+<!--             December 2002                                     -->
+<!--                                                               -->
+<!-- CREATED BY: Jeff Beck       (NCBI)                            -->
+<!--             Deborah Lapeyre (Mulberry Technologies, Inc.)     -->
+<!--             Bruce Rosenblum (Inera Inc.)                      -->
+<!--                                                               -->
+<!--             NLM thanks the Harvard University Libraries, both -->
+<!--             for proposing that a draft archiving NLM DTD for  --> 
+<!--             life sciences journals be extended to accommodate -->
+<!--             journals in all disciplines and for sponsoring    -->
+<!--             Bruce Rosenblum's collaboration with other DTD    -->
+<!--             authors in completing Version 1.0. The Andrew W.  --> 
+<!--             Mellon Foundation provided support for these      --> 
+<!--             important contributions.                          -->
+<!--                                                               -->
+<!-- ============================================================= -->
+
+
+<!-- ============================================================= -->
+<!--                    DTD VERSION/CHANGE HISTORY                 -->
+<!-- ============================================================= -->
+<!--
+     =============================================================
+
+Version  Reason/Occasion                   (who) vx.x (yyyy-mm-dd)
+     
+     Major requirement changes led to the new release, producing
+     DTD version "2.0":
+       a) The splitting of the Archival and Interchange Tag Set 
+          DTDs into three DTDs from two: an authoring DTD, an
+          archive regularization and interchange DTD (the
+          current Blue Publishing DTD), and a preservationist 
+          archive DTD (the current Green Archiving and Interchange 
+          DTD).
+       b) AIT Working Group suggestions from the June 04 meeting 
+          and June/July 2004 followup discussions
+       c) Suite remodularization to meet new (and newly articulated)
+          modularization requirements
+       d) New or renamed classes and mixes to make modifications
+          easier and more consistent
+
+  1. Updated public identifier to "v2.0 20040830"          
+                                                                   -->
+
+
+<!-- ============================================================= -->
+<!--                    ISO STANDARD SPECIAL CHARACTER SETS DEFINED-->
+<!-- ============================================================= -->
+
+
+<!--                    ISO STANDARD ADDED LATIN 1                 -->
+<!ENTITY % ISOlat1 PUBLIC
+"-//W3C//ENTITIES Added Latin 1 for MathML 2.0//EN" 
+"xmlchars/isolat1.ent"                                               >
+
+
+<!--                    ISO STANDARD ADDED LATIN 2                 -->
+<!ENTITY % ISOlat2 PUBLIC
+"-//W3C//ENTITIES Added Latin 2 for MathML 2.0//EN" 
+"xmlchars/isolat2.ent"                                               >
+
+
+<!--                    ISO BOX AND LINE DRAWING                   -->
+<!ENTITY % ISObox       PUBLIC 
+"-//W3C//ENTITIES Box and Line Drawing for MathML 2.0//EN"
+"xmlchars/isobox.ent"                                               >
+
+
+<!--                    ISO STANDARD DIACRITICAL MARKS             -->
+<!ENTITY % ISOdia PUBLIC
+"-//W3C//ENTITIES Diacritical Marks for MathML 2.0//EN" 
+"xmlchars/isodia.ent"                                                >
+
+
+<!--                    ISO STANDARD NUMERIC AND SPECIAL GRAPHIC   -->
+<!ENTITY % ISOnum PUBLIC
+"-//W3C//ENTITIES Numeric and Special Graphic for MathML 2.0//EN" 
+"xmlchars/isonum.ent"                                                >
+
+
+<!--                    ISO STANDARD PUBLISHING                    -->
+<!ENTITY % ISOpub PUBLIC
+"-//W3C//ENTITIES Publishing for MathML 2.0//EN" 
+"xmlchars/isopub.ent"                                                >
+
+
+<!--                    ISO STANDARD GENERAL TECHNICAL             -->
+<!ENTITY % ISOtech PUBLIC
+"-//W3C//ENTITIES General Technical for MathML 2.0//EN" 
+"xmlchars/isotech.ent"                                               >
+
+
+<!--                    ISO STANDARD GREEK LETTERS                 -->
+<!ENTITY % ISOgrk1 PUBLIC
+"-//W3C//ENTITIES Greek Letters//EN"
+"xmlchars/isogrk1.ent"                                               >
+
+
+<!--                    ISO STANDARD MONOTONIKO GREEK              -->
+<!ENTITY % ISOgrk2 PUBLIC
+"-//W3C//ENTITIES Monotoniko Greek//EN"
+"xmlchars/isogrk2.ent"                                               >
+
+
+<!--                    ISO STANDARD GREEK SYMBOLS                 -->
+<!ENTITY % ISOgrk3 PUBLIC
+"-//W3C//ENTITIES Greek Symbols for MathML 2.0//EN"
+"xmlchars/isogrk3.ent"                                               >
+
+
+<!--                    ISO STANDARD ALTERNATIVE GREEK SYMBOLS     -->
+<!ENTITY % ISOgrk4 PUBLIC
+"-//W3C//ENTITIES Alternative Greek Symbols//EN"
+"xmlchars/isogrk4.ent"                                               >
+
+
+<!--                    ISO STANDARD RUSSIAN CYRILLIC              -->
+<!ENTITY % ISOcyr1 PUBLIC 
+"-//W3C//ENTITIES Russian Cyrillic for MathML 2.0//EN" 
+"xmlchars/isocyr1.ent"                                               >
+
+
+<!--                    ISO STANDARD NON-RUSSIAN CYRILLIC          -->
+<!ENTITY % ISOcyr2 PUBLIC
+"-//W3C//ENTITIES Non-Russian Cyrillic for MathML 2.0//EN" 
+"xmlchars/isocyr2.ent"                                               >
+
+
+<!--                    ISO STANDARD MATH ALPHABETS (SCRIPT)       -->
+<!ENTITY % ISOmscr PUBLIC
+"-//W3C//ENTITIES Math Alphabets: Script for MathML 2.0//EN" 
+"xmlchars/isomscr.ent"                                               >
+
+
+<!--                    ISO STANDARD ADDED MATH SYMBOLS 
+                           (ARROW RELATIONS)                       -->
+<!ENTITY % ISOamsa PUBLIC
+"-//W3C//ENTITIES Added Math Symbols: Arrow Relations for MathML 2.0//EN" 
+"xmlchars/isoamsa.ent"                                               >
+
+
+<!--                    ISO STANDARD ADDED MATH SYMBOLS 
+                           (BINARY OPERATORS)                      -->
+<!ENTITY % ISOamsb PUBLIC 
+"-//W3C//ENTITIES Added Math Symbols: Binary Operators for MathML 2.0//EN" 
+"xmlchars/isoamsb.ent"                                               >
+
+
+<!--                    ISO STANDARD ADDED MATH SYMBOLS 
+                           (DELIMITERS)                            -->
+<!ENTITY % ISOamsc PUBLIC 
+"-//W3C//ENTITIES Added Math Symbols: Delimiters for MathML 2.0//EN" 
+"xmlchars/isoamsc.ent"                                               >
+
+
+<!--                    ISO STANDARD ADDED MATH SYMBOLS 
+                           (NEGATED RELATIONS)                     -->
+<!ENTITY % ISOamsn PUBLIC 
+"-//W3C//ENTITIES Added Math Symbols: Negated Relations for MathML 2.0//EN"
+"xmlchars/isoamsn.ent"                                               >
+
+
+<!--                    ISO STANDARD ADDED MATH SYMBOLS (ORDINARY) -->
+<!ENTITY % ISOamso PUBLIC 
+"-//W3C//ENTITIES Added Math Symbols: Ordinary for MathML 2.0//EN" 
+"xmlchars/isoamso.ent"                                               >
+
+
+<!--                    ISO STANDARD ADDED MATH SYMBOLS 
+                           (RELATIONS)                             -->
+<!ENTITY % ISOamsr PUBLIC 
+"-//W3C//ENTITIES Added Math Symbols: Relations for MathML 2.0//EN" 
+"xmlchars/isoamsr.ent"                                               >
+
+
+<!--                    ISO STANDARD MATH ALPHABETS (FRAKTUR)      -->
+<!ENTITY % ISOmfrk PUBLIC
+"-//W3C//ENTITIES Math Alphabets: Fraktur for MathML 2.0//EN" 
+"xmlchars/isomfrk.ent"                                               >
+
+
+<!--                    ISO STANDARD MATH ALPHABETS (OPEN FACE)    -->
+<!ENTITY % ISOmopf PUBLIC
+"-//W3C//ENTITIES Math Alphabets: Open Face for MathML 2.0//EN" 
+"xmlchars/isomopf.ent"                                               >
+
+
+<!-- ============================================================= -->
+<!--                    ISO SPECIAL CHARACTER SETS INVOKED         -->
+<!-- ============================================================= -->
+
+
+%ISOlat1;
+%ISOlat2;
+%ISObox;
+%ISOdia;
+%ISOnum;
+%ISOpub;
+%ISOtech;
+%ISOgrk1;
+%ISOgrk2;
+%ISOgrk3;
+%ISOgrk4;
+%ISOcyr1;
+%ISOcyr2;
+%ISOamsa;
+%ISOamsb;
+%ISOamsc;
+%ISOamsn;
+%ISOamso;
+%ISOamsr;
+%ISOmscr;
+%ISOmfrk; 
+%ISOmopf;
+
+
+<!--                    Custom special characters are declared 
+                        in a separate module %chars.ent;           -->
+
+
+<!-- ============ End of XML Special Characters Module =========== -->
diff --git a/code/lib/Bio/Entrez/Parser.py b/code/lib/Bio/Entrez/Parser.py
new file mode 100644
index 0000000..98ed876
--- /dev/null
+++ b/code/lib/Bio/Entrez/Parser.py
@@ -0,0 +1,1005 @@
+# Copyright 2008-2014 by Michiel de Hoon.  All rights reserved.
+# Revisions copyright 2008-2015 by Peter Cock. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Parser for XML results returned by NCBI's Entrez Utilities.
+
+This parser is used by the read() function in Bio.Entrez, and is not
+intended be used directly.
+
+The question is how to represent an XML file as Python objects. Some
+XML files returned by NCBI look like lists, others look like dictionaries,
+and others look like a mix of lists and dictionaries.
+
+My approach is to classify each possible element in the XML as a plain
+string, an integer, a list, a dictionary, or a structure. The latter is a
+dictionary where the same key can occur multiple times; in Python, it is
+represented as a dictionary where that key occurs once, pointing to a list
+of values found in the XML file.
+
+The parser then goes through the XML and creates the appropriate Python
+object for each element. The different levels encountered in the XML are
+preserved on the Python side. So a subelement of a subelement of an element
+is a value in a dictionary that is stored in a list which is a value in
+some other dictionary (or a value in a list which itself belongs to a list
+which is a value in a dictionary, and so on). Attributes encountered in
+the XML are stored as a dictionary in a member .attributes of each element,
+and the tag name is saved in a member .tag.
+
+To decide which kind of Python object corresponds to each element in the
+XML, the parser analyzes the DTD referred at the top of (almost) every
+XML file returned by the Entrez Utilities. This is preferred over a hand-
+written solution, since the number of DTDs is rather large and their
+contents may change over time. About half the code in this parser deals
+with parsing the DTD, and the other half with the XML itself.
+"""
+import os
+import warnings
+from collections import Counter
+from xml.parsers import expat
+from io import BytesIO
+import xml.etree.ElementTree as ET
+from xml.sax.saxutils import escape
+
+from urllib.request import urlopen, urlparse
+
+
+# The following four classes are used to add a member .attributes to integers,
+# strings, lists, and dictionaries, respectively.
+
+
+class NoneElement:
+    """NCBI Entrez XML element mapped to None."""
+
+    def __init__(self, tag, attributes, key=None):
+        """Create a NoneElement."""
+        self.tag = tag
+        if key is None:
+            self.key = tag
+        else:
+            self.key = key
+        self.attributes = attributes
+
+    def __eq__(self, other):
+        """Define equality with other None objects."""
+        if other is None:
+            return True
+        elif other.__eq__(None):
+            return True
+        else:
+            return False
+
+    def __ne__(self, other):
+        """Define non-equality."""
+        if other is None:
+            return False
+        elif other.__eq__(None):
+            return False
+        else:
+            return True
+
+    def __repr__(self):
+        """Return a string representation of the object."""
+        try:
+            attributes = self.attributes
+        except AttributeError:
+            return "NoneElement"
+        return "NoneElement(attributes=%r)" % attributes
+
+
+class IntegerElement(int):
+    """NCBI Entrez XML element mapped to an integer."""
+
+    def __new__(cls, value, tag, attributes, key=None):
+        """Create an IntegerElement."""
+        self = int.__new__(cls, value)
+        self.tag = tag
+        if key is None:
+            self.key = tag
+        else:
+            self.key = key
+        self.attributes = attributes
+        return self
+
+    def __repr__(self):
+        """Return a string representation of the object."""
+        text = int.__repr__(self)
+        try:
+            attributes = self.attributes
+        except AttributeError:
+            return text
+        return "IntegerElement(%s, attributes=%r)" % (text, attributes)
+
+
+class StringElement(str):
+    """NCBI Entrez XML element mapped to a string."""
+
+    def __new__(cls, value, tag, attributes, key=None):
+        """Create a StringElement."""
+        self = str.__new__(cls, value)
+        self.tag = tag
+        if key is None:
+            self.key = tag
+        else:
+            self.key = key
+        self.attributes = attributes
+        return self
+
+    def __repr__(self):
+        """Return a string representation of the object."""
+        text = str.__repr__(self)
+        attributes = self.attributes
+        if not attributes:
+            return text
+        return "StringElement(%s, attributes=%r)" % (text, attributes)
+
+
+class ListElement(list):
+    """NCBI Entrez XML element mapped to a list."""
+
+    def __init__(self, tag, attributes, allowed_tags, key=None):
+        """Create a ListElement."""
+        self.tag = tag
+        if key is None:
+            self.key = tag
+        else:
+            self.key = key
+        self.attributes = attributes
+        self.allowed_tags = allowed_tags
+
+    def __repr__(self):
+        """Return a string representation of the object."""
+        text = list.__repr__(self)
+        attributes = self.attributes
+        if not attributes:
+            return text
+        return "ListElement(%s, attributes=%r)" % (text, attributes)
+
+    def store(self, value):
+        """Append an element to the list, checking tags."""
+        key = value.key
+        if self.allowed_tags is not None and key not in self.allowed_tags:
+            raise ValueError("Unexpected item '%s' in list" % key)
+        self.append(value)
+
+
+class DictionaryElement(dict):
+    """NCBI Entrez XML element mapped to a dictionaray."""
+
+    def __init__(self, tag, attrs, allowed_tags, repeated_tags=None, key=None):
+        """Create a DictionaryElement."""
+        self.tag = tag
+        if key is None:
+            self.key = tag
+        else:
+            self.key = key
+        self.attributes = attrs
+        self.allowed_tags = allowed_tags
+        self.repeated_tags = repeated_tags
+        if repeated_tags:
+            for key in repeated_tags:
+                self[key] = []
+
+    def __repr__(self):
+        """Return a string representation of the object."""
+        text = dict.__repr__(self)
+        attributes = self.attributes
+        if not attributes:
+            return text
+        return "DictElement(%s, attributes=%r)" % (text, attributes)
+
+    def store(self, value):
+        """Add an entry to the dictionary, checking tags."""
+        key = value.key
+        tag = value.tag
+        if self.allowed_tags is not None and tag not in self.allowed_tags:
+            raise ValueError("Unexpected item '%s' in dictionary" % key)
+        if self.repeated_tags and key in self.repeated_tags:
+            self[key].append(value)
+        else:
+            self[key] = value
+
+
+class NotXMLError(ValueError):
+    """Failed to parse file as XML."""
+
+    def __init__(self, message):
+        """Initialize the class."""
+        self.msg = message
+
+    def __str__(self):
+        """Return a string summary of the exception."""
+        return (
+            "Failed to parse the XML data (%s). Please make sure that the input data "
+            "are in XML format." % self.msg
+        )
+
+
+class CorruptedXMLError(ValueError):
+    """Corrupted XML."""
+
+    def __init__(self, message):
+        """Initialize the class."""
+        self.msg = message
+
+    def __str__(self):
+        """Return a string summary of the exception."""
+        return (
+            "Failed to parse the XML data (%s). Please make sure that the input data "
+            "are not corrupted." % self.msg
+        )
+
+
+class ValidationError(ValueError):
+    """XML tag found which was not defined in the DTD.
+
+    Validating parsers raise this error if the parser finds a tag in the XML
+    that is not defined in the DTD. Non-validating parsers do not raise this
+    error. The Bio.Entrez.read and Bio.Entrez.parse functions use validating
+    parsers by default (see those functions for more information).
+    """
+
+    def __init__(self, name):
+        """Initialize the class."""
+        self.name = name
+
+    def __str__(self):
+        """Return a string summary of the exception."""
+        return (
+            "Failed to find tag '%s' in the DTD. To skip all tags that "
+            "are not represented in the DTD, please call Bio.Entrez.read "
+            "or Bio.Entrez.parse with validate=False." % self.name
+        )
+
+
+class DataHandlerMeta(type):
+    """A metaclass is needed until Python supports @classproperty."""
+
+    def __init__(cls, *args, **kwargs):
+        """Initialize the class."""
+        cls._directory = None
+
+    @property
+    def directory(cls):
+        """Directory for caching XSD and DTD files."""
+        return cls._directory
+
+    @directory.setter
+    def directory(cls, value):
+        """Set a custom directory for the local DTD/XSD directories."""
+        if value is None:
+            import platform
+
+            if platform.system() == "Windows":
+                value = os.path.join(os.getenv("APPDATA"), "biopython")
+            else:  # Unix/Linux/Mac
+                home = os.path.expanduser("~")
+                value = os.path.join(home, ".config", "biopython")
+        cls._directory = value
+        # Create DTD local directory
+        cls.local_dtd_dir = os.path.join(cls._directory, "Bio", "Entrez", "DTDs")
+        os.makedirs(cls.local_dtd_dir, exist_ok=True)
+        # Create XSD local directory
+        cls.local_xsd_dir = os.path.join(cls._directory, "Bio", "Entrez", "XSDs")
+        os.makedirs(cls.local_xsd_dir, exist_ok=True)
+
+
+class DataHandler(metaclass=DataHandlerMeta):
+    """Data handler for parsing NCBI XML from Entrez."""
+
+    from lib.Bio import Entrez
+
+    global_dtd_dir = os.path.join(Entrez.__path__[0], "DTDs")
+    global_xsd_dir = os.path.join(Entrez.__path__[0], "XSDs")
+    local_dtd_dir = ""
+    local_xsd_dir = ""
+
+    del Entrez
+
+    def __init__(self, validate, escape):
+        """Create a DataHandler object."""
+        self.dtd_urls = []
+        self.element = None
+        self.level = 0
+        self.data = []
+        self.attributes = None
+        self.allowed_tags = None
+        self.strings = {}
+        self.lists = {}
+        self.dictionaries = {}
+        self.items = set()
+        self.errors = set()
+        self.validating = validate
+        self.parser = expat.ParserCreate(namespace_separator=" ")
+        self.parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
+        self.parser.XmlDeclHandler = self.xmlDeclHandler
+        self.schema_namespace = None
+        self.namespace_level = Counter()
+        self.namespace_prefix = {}
+        if escape:
+            self.characterDataHandler = self.characterDataHandlerEscape
+        else:
+            self.characterDataHandler = self.characterDataHandlerRaw
+
+    def read(self, handle):
+        """Set up the parser and let it parse the XML results."""
+        # Expat's parser.ParseFile function only accepts binary data;
+        # see also the comment below for Entrez.parse.
+        if handle.read(0) != b"":
+            raise TypeError("file should be opened in binary mode")
+        try:
+            self.parser.ParseFile(handle)
+        except expat.ExpatError as e:
+            if self.parser.StartElementHandler:
+                # We saw the initial <!xml declaration, so we can be sure that
+                # we are parsing XML data. Most likely, the XML file is
+                # corrupted.
+                raise CorruptedXMLError(e) from None
+            else:
+                # We have not seen the initial <!xml declaration, so probably
+                # the input data is not in XML format.
+                raise NotXMLError(e) from None
+        try:
+            return self.record
+        except AttributeError:
+            if self.parser.StartElementHandler:
+                # We saw the initial <!xml declaration, and expat didn't notice
+                # any errors, so self.record should be defined. If not, this is
+                # a bug.
+                raise RuntimeError(
+                    "Failed to parse the XML file correctly, possibly due to a bug "
+                    "in Bio.Entrez. Please contact the Biopython developers via "
+                    "the mailing list or GitHub for assistance."
+                ) from None
+            else:
+                # We did not see the initial <!xml declaration, so probably
+                # the input data is not in XML format.
+                raise NotXMLError("XML declaration not found") from None
+
+    def parse(self, handle):
+        """Parse the XML in the given file handle."""
+        # The handle should have been opened in binary mode; data read from
+        # the handle are then bytes. Expat will pick up the encoding from the
+        # XML declaration (or assume UTF-8 if it is missing), and use this
+        # encoding to convert the binary data to a string before giving it to
+        # characterDataHandler.
+        # While parser.ParseFile only accepts binary data, parser.Parse accepts
+        # both binary data and strings. However, a file in text mode may have
+        # been opened with an encoding different from the encoding specified in
+        # the XML declaration at the top of the file. If so, the data in the
+        # file will have been decoded with an incorrect encoding. To avoid
+        # this, and to be consistent with parser.ParseFile (which is used in
+        # the Entrez.read function above), we require the handle to be in
+        # binary mode here as well.
+        if handle.read(0) != b"":
+            raise TypeError("file should be opened in binary mode")
+        BLOCK = 1024
+        while True:
+            # Read in another block of data from the file.
+            data = handle.read(BLOCK)
+            try:
+                self.parser.Parse(data, False)
+            except expat.ExpatError as e:
+                if self.parser.StartElementHandler:
+                    # We saw the initial <!xml declaration, so we can be sure
+                    # that we are parsing XML data. Most likely, the XML file
+                    # is corrupted.
+                    raise CorruptedXMLError(e) from None
+                else:
+                    # We have not seen the initial <!xml declaration, so
+                    # probably the input data is not in XML format.
+                    raise NotXMLError(e) from None
+            try:
+                records = self.record
+            except AttributeError:
+                if self.parser.StartElementHandler:
+                    # We saw the initial <!xml declaration, and expat
+                    # didn't notice any errors, so self.record should be
+                    # defined. If not, this is a bug.
+
+                    raise RuntimeError(
+                        "Failed to parse the XML file correctly, possibly due to a "
+                        "bug in Bio.Entrez. Please contact the Biopython "
+                        "developers via the mailing list or GitHub for assistance."
+                    ) from None
+                else:
+                    # We did not see the initial <!xml declaration, so
+                    # probably the input data is not in XML format.
+                    raise NotXMLError("XML declaration not found") from None
+
+            if not isinstance(records, list):
+                raise ValueError(
+                    "The XML file does not represent a list. Please use Entrez.read "
+                    "instead of Entrez.parse"
+                )
+
+            if not data:
+                break
+
+            while len(records) >= 2:
+                # Then the first record is finished, while the second record
+                # is still a work in progress.
+                record = records.pop(0)
+                yield record
+
+        # We have reached the end of the XML file
+        self.parser = None
+        if self.element is not None:
+            # No more XML data, but there is still some unfinished business
+            raise CorruptedXMLError("Premature end of data")
+
+        # Send out the remaining records
+        yield from records
+
+    def xmlDeclHandler(self, version, encoding, standalone):
+        """Set XML handlers when an XML declaration is found."""
+        self.parser.CharacterDataHandler = self.characterDataHandler
+        self.parser.ExternalEntityRefHandler = self.externalEntityRefHandler
+        self.parser.StartNamespaceDeclHandler = self.startNamespaceDeclHandler
+        self.parser.EndNamespaceDeclHandler = self.endNamespaceDeclHandler
+        self.parser.StartElementHandler = self.handleMissingDocumentDefinition
+
+    def handleMissingDocumentDefinition(self, tag, attrs):
+        """Raise an Exception if neither a DTD nor an XML Schema is found."""
+        raise ValueError(
+            "As the XML data contained neither a Document Type Definition (DTD) nor an XML Schema, Bio.Entrez is unable to parse these data. We recommend using a generic XML parser from the Python standard library instead, for example ElementTree."
+        )
+
+    def startNamespaceDeclHandler(self, prefix, uri):
+        """Handle start of an XML namespace declaration."""
+        if prefix == "xsi":
+            # This is an xml schema
+            self.schema_namespace = uri
+            self.parser.StartElementHandler = self.schemaHandler
+        else:
+            # Note that the DTD for MathML specifies a default attribute
+            # that declares the namespace for each MathML element. This means
+            # that MathML element in the XML has an invisible MathML namespace
+            # declaration that triggers a call to startNamespaceDeclHandler
+            # and endNamespaceDeclHandler. Therefore we need to count how often
+            # startNamespaceDeclHandler and endNamespaceDeclHandler were called
+            # to find out their first and last invocation for each namespace.
+            if prefix == "mml":
+                assert uri == "http://www.w3.org/1998/Math/MathML"
+            elif prefix == "xlink":
+                assert uri == "http://www.w3.org/1999/xlink"
+            else:
+                raise ValueError("Unknown prefix '%s' with uri '%s'" % (prefix, uri))
+            self.namespace_level[prefix] += 1
+            self.namespace_prefix[uri] = prefix
+
+    def endNamespaceDeclHandler(self, prefix):
+        """Handle end of an XML namespace declaration."""
+        if prefix != "xsi":
+            self.namespace_level[prefix] -= 1
+            if self.namespace_level[prefix] == 0:
+                for key, value in self.namespace_prefix.items():
+                    if value == prefix:
+                        break
+                else:
+                    raise RuntimeError("Failed to find namespace prefix")
+                del self.namespace_prefix[key]
+
+    def schemaHandler(self, name, attrs):
+        """Process the XML schema (before processing the element)."""
+        key = "%s noNamespaceSchemaLocation" % self.schema_namespace
+        schema = attrs[key]
+        handle = self.open_xsd_file(os.path.basename(schema))
+        # if there is no local xsd file grab the url and parse the file
+        if not handle:
+            handle = urlopen(schema)
+            text = handle.read()
+            self.save_xsd_file(os.path.basename(schema), text)
+            handle.close()
+            self.parse_xsd(ET.fromstring(text))
+        else:
+            self.parse_xsd(ET.fromstring(handle.read()))
+            handle.close()
+        # continue handling the element
+        self.startElementHandler(name, attrs)
+        # reset the element handler
+        self.parser.StartElementHandler = self.startElementHandler
+
+    def startElementHandler(self, tag, attrs):
+        """Handle start of an XML element."""
+        if tag in self.items:
+            assert tag == "Item"
+            name = attrs["Name"]
+            itemtype = attrs["Type"]
+            del attrs["Type"]
+            if itemtype == "Structure":
+                del attrs["Name"]
+                element = DictionaryElement(
+                    name, attrs, allowed_tags=None, repeated_tags=None
+                )
+                parent = self.element
+                element.parent = parent
+                # For consistency with lists below, store the element here
+                if parent is None:
+                    self.record = element
+                else:
+                    parent.store(element)
+                self.element = element
+                self.parser.EndElementHandler = self.endElementHandler
+                self.parser.CharacterDataHandler = self.skipCharacterDataHandler
+            elif name in ("ArticleIds", "History"):
+                del attrs["Name"]
+                allowed_tags = None  # allowed tags are unknown
+                repeated_tags = frozenset(["pubmed", "medline"])
+                element = DictionaryElement(
+                    tag,
+                    attrs,
+                    allowed_tags=allowed_tags,
+                    repeated_tags=repeated_tags,
+                    key=name,
+                )
+                parent = self.element
+                element.parent = parent
+                # For consistency with lists below, store the element here
+                if parent is None:
+                    self.record = element
+                else:
+                    parent.store(element)
+                self.element = element
+                self.parser.EndElementHandler = self.endElementHandler
+                self.parser.CharacterDataHandler = self.skipCharacterDataHandler
+            elif itemtype == "List":
+                del attrs["Name"]
+                allowed_tags = None  # allowed tags are unknown
+                element = ListElement(tag, attrs, allowed_tags, name)
+                parent = self.element
+                element.parent = parent
+                if self.element is None:
+                    # Set self.record here to let Entrez.parse iterate over it
+                    self.record = element
+                else:
+                    parent.store(element)
+                self.element = element
+                self.parser.EndElementHandler = self.endElementHandler
+                self.parser.CharacterDataHandler = self.skipCharacterDataHandler
+            elif itemtype == "Integer":
+                self.parser.EndElementHandler = self.endIntegerElementHandler
+                self.parser.CharacterDataHandler = self.characterDataHandler
+                self.attributes = attrs
+            elif itemtype in ("String", "Unknown", "Date", "Enumerator"):
+                assert self.attributes is None
+                self.attributes = attrs
+                self.parser.StartElementHandler = self.startRawElementHandler
+                self.parser.EndElementHandler = self.endStringElementHandler
+                self.parser.CharacterDataHandler = self.characterDataHandler
+            else:
+                raise ValueError("Unknown item type %s" % name)
+        elif tag in self.errors:
+            self.parser.EndElementHandler = self.endErrorElementHandler
+            self.parser.CharacterDataHandler = self.characterDataHandler
+        elif tag in self.strings:
+            self.parser.StartElementHandler = self.startRawElementHandler
+            self.parser.EndElementHandler = self.endStringElementHandler
+            self.parser.CharacterDataHandler = self.characterDataHandler
+            assert self.allowed_tags is None
+            self.allowed_tags = self.strings[tag]
+            assert self.attributes is None
+            self.attributes = attrs
+        elif tag in self.dictionaries:
+            allowed_tags, repeated_tags = self.dictionaries[tag]
+            element = DictionaryElement(tag, attrs, allowed_tags, repeated_tags)
+            parent = self.element
+            element.parent = parent
+            # For consistency with lists below, store the element here
+            if parent is None:
+                self.record = element
+            else:
+                parent.store(element)
+            self.element = element
+            self.parser.EndElementHandler = self.endElementHandler
+            self.parser.CharacterDataHandler = self.skipCharacterDataHandler
+        elif tag in self.lists:
+            allowed_tags = self.lists[tag]
+            element = ListElement(tag, attrs, allowed_tags)
+            parent = self.element
+            element.parent = parent
+            if parent is None:
+                # Set self.record here to let Entrez.parse iterate over it
+                self.record = element
+            else:
+                parent.store(element)
+            self.element = element
+            self.parser.EndElementHandler = self.endElementHandler
+            self.parser.CharacterDataHandler = self.skipCharacterDataHandler
+        else:
+            # Element not found in DTD
+            if self.validating:
+                raise ValidationError(tag)
+            else:
+                # this will not be stored in the record
+                self.parser.StartElementHandler = self.startSkipElementHandler
+                self.parser.EndElementHandler = self.endSkipElementHandler
+                self.parser.CharacterDataHandler = self.skipCharacterDataHandler
+                self.level = 1
+
+    def startRawElementHandler(self, name, attrs):
+        """Handle start of an XML raw element."""
+        # check if the name is in a namespace
+        prefix = None
+        if self.namespace_prefix:
+            try:
+                uri, name = name.split()
+            except ValueError:
+                pass
+            else:
+                prefix = self.namespace_prefix[uri]
+                if self.namespace_level[prefix] == 1:
+                    attrs = {"xmlns": uri}
+        if prefix:
+            key = "%s:%s" % (prefix, name)
+        else:
+            key = name
+        # self.allowed_tags is ignored for now. Anyway we know what to do
+        # with this tag.
+        tag = "<%s" % name
+        for key, value in attrs.items():
+            tag += ' %s="%s"' % (key, value)
+        tag += ">"
+        self.data.append(tag)
+        self.parser.EndElementHandler = self.endRawElementHandler
+        self.level += 1
+
+    def startSkipElementHandler(self, name, attrs):
+        """Handle start of an XML skip element."""
+        self.level += 1
+
+    def endStringElementHandler(self, tag):
+        """Handle end of an XML string element."""
+        element = self.element
+        if element is not None:
+            self.parser.StartElementHandler = self.startElementHandler
+            self.parser.EndElementHandler = self.endElementHandler
+            self.parser.CharacterDataHandler = self.skipCharacterDataHandler
+        value = "".join(self.data)
+        self.data = []
+        attributes = self.attributes
+        self.attributes = None
+        if tag in self.items:
+            assert tag == "Item"
+            key = attributes["Name"]
+            del attributes["Name"]
+        else:
+            key = tag
+        value = StringElement(value, tag, attributes, key)
+        if element is None:
+            self.record = element
+        else:
+            element.store(value)
+        self.allowed_tags = None
+
+    def endRawElementHandler(self, name):
+        """Handle start of an XML raw element."""
+        self.level -= 1
+        if self.level == 0:
+            self.parser.EndElementHandler = self.endStringElementHandler
+        if self.namespace_prefix:
+            try:
+                uri, name = name.split()
+            except ValueError:
+                pass
+        tag = "</%s>" % name
+        self.data.append(tag)
+
+    def endSkipElementHandler(self, name):
+        """Handle start of an XML skip element."""
+        self.level -= 1
+        if self.level == 0:
+            self.parser.StartElementHandler = self.startElementHandler
+            self.parser.EndElementHandler = self.endElementHandler
+
+    def endErrorElementHandler(self, name):
+        """Handle start of an XML error element."""
+        if self.data:
+            # error found:
+            value = "".join(self.data)
+            raise RuntimeError(value)
+        # no error found:
+        if self.element is not None:
+            self.parser.EndElementHandler = self.endElementHandler
+            self.parser.CharacterDataHandler = self.skipCharacterDataHandler
+
+    def endElementHandler(self, name):
+        """Handle end of an XML element."""
+        element = self.element
+        self.element = element.parent
+        del element.parent
+
+    def endIntegerElementHandler(self, tag):
+        """Handle end of an XML integer element."""
+        attributes = self.attributes
+        self.attributes = None
+        assert tag == "Item"
+        key = attributes["Name"]
+        del attributes["Name"]
+        if self.data:
+            value = int("".join(self.data))
+            self.data = []
+            value = IntegerElement(value, tag, attributes, key)
+        else:
+            value = NoneElement(tag, attributes, key)
+        element = self.element
+        if element is None:
+            self.record = value
+        else:
+            self.parser.EndElementHandler = self.endElementHandler
+            self.parser.CharacterDataHandler = self.skipCharacterDataHandler
+            if value is None:
+                return
+            element.store(value)
+
+    def characterDataHandlerRaw(self, content):
+        """Handle character data as-is (raw)."""
+        self.data.append(content)
+
+    def characterDataHandlerEscape(self, content):
+        """Handle character data by encoding it."""
+        content = escape(content)
+        self.data.append(content)
+
+    def skipCharacterDataHandler(self, content):
+        """Handle character data by skipping it."""
+
+    def parse_xsd(self, root):
+        """Parse an XSD file."""
+        prefix = "{http://www.w3.org/2001/XMLSchema}"
+        for element in root:
+            isSimpleContent = False
+            attribute_keys = []
+            keys = []
+            multiple = []
+            assert element.tag == prefix + "element"
+            name = element.attrib["name"]
+            assert len(element) == 1
+            complexType = element[0]
+            assert complexType.tag == prefix + "complexType"
+            for component in complexType:
+                tag = component.tag
+                if tag == prefix + "attribute":
+                    # we could distinguish by type; keeping string for now
+                    attribute_keys.append(component.attrib["name"])
+                elif tag == prefix + "sequence":
+                    maxOccurs = component.attrib.get("maxOccurs", "1")
+                    for key in component:
+                        assert key.tag == prefix + "element"
+                        ref = key.attrib["ref"]
+                        keys.append(ref)
+                        if maxOccurs != "1" or key.attrib.get("maxOccurs", "1") != "1":
+                            multiple.append(ref)
+                elif tag == prefix + "simpleContent":
+                    assert len(component) == 1
+                    extension = component[0]
+                    assert extension.tag == prefix + "extension"
+                    assert extension.attrib["base"] == "xs:string"
+                    for attribute in extension:
+                        assert attribute.tag == prefix + "attribute"
+                        # we could distinguish by type; keeping string for now
+                        attribute_keys.append(attribute.attrib["name"])
+                    isSimpleContent = True
+            allowed_tags = frozenset(keys)
+            if len(keys) == 1 and keys == multiple:
+                assert not isSimpleContent
+                self.lists[name] = allowed_tags
+            elif len(keys) >= 1:
+                assert not isSimpleContent
+                repeated_tags = frozenset(multiple)
+                self.dictionaries[name] = (allowed_tags, repeated_tags)
+            else:
+                self.strings[name] = allowed_tags
+
+    def elementDecl(self, name, model):
+        """Call a call-back function for each element declaration in a DTD.
+
+        This is used for each element declaration in a DTD like::
+
+            <!ELEMENT       name          (...)>
+
+        The purpose of this function is to determine whether this element
+        should be regarded as a string, integer, list, dictionary, structure,
+        or error.
+        """
+        if name.upper() == "ERROR":
+            self.errors.add(name)
+            return
+        if name == "Item" and model == (
+            expat.model.XML_CTYPE_MIXED,
+            expat.model.XML_CQUANT_REP,
+            None,
+            ((expat.model.XML_CTYPE_NAME, expat.model.XML_CQUANT_NONE, "Item", ()),),
+        ):
+            # Special case. As far as I can tell, this only occurs in the
+            # eSummary DTD.
+            self.items.add(name)
+            return
+        # First, remove ignorable parentheses around declarations
+        while (
+            model[0] in (expat.model.XML_CTYPE_SEQ, expat.model.XML_CTYPE_CHOICE)
+            and model[1] in (expat.model.XML_CQUANT_NONE, expat.model.XML_CQUANT_OPT)
+            and len(model[3]) == 1
+        ):
+            model = model[3][0]
+        # PCDATA declarations correspond to strings
+        if model[0] in (expat.model.XML_CTYPE_MIXED, expat.model.XML_CTYPE_EMPTY):
+            if model[1] == expat.model.XML_CQUANT_REP:
+                children = model[3]
+                allowed_tags = frozenset(child[2] for child in children)
+            else:
+                allowed_tags = frozenset()
+            self.strings[name] = allowed_tags
+            return
+        # List-type elements
+        if model[0] in (
+            expat.model.XML_CTYPE_CHOICE,
+            expat.model.XML_CTYPE_SEQ,
+        ) and model[1] in (expat.model.XML_CQUANT_PLUS, expat.model.XML_CQUANT_REP):
+            children = model[3]
+            if model[0] == expat.model.XML_CTYPE_SEQ:
+                assert len(children) == 1
+            allowed_tags = frozenset(child[2] for child in children)
+            self.lists[name] = allowed_tags
+            return
+        # This is the tricky case. Check which keys can occur multiple
+        # times. If only one key is possible, and it can occur multiple
+        # times, then this is a list. If more than one key is possible,
+        # but none of them can occur multiple times, then this is a
+        # dictionary. Otherwise, this is a structure.
+        # In 'single' and 'multiple', we keep track which keys can occur
+        # only once, and which can occur multiple times.
+        single = []
+        multiple = []
+        # The 'count' function is called recursively to make sure all the
+        # children in this model are counted. Error keys are ignored;
+        # they raise an exception in Python.
+
+        def count(model):
+            quantifier, key, children = model[1:]
+            if key is None:
+                if quantifier in (
+                    expat.model.XML_CQUANT_PLUS,
+                    expat.model.XML_CQUANT_REP,
+                ):
+                    for child in children:
+                        multiple.append(child[2])
+                else:
+                    for child in children:
+                        count(child)
+            elif key.upper() != "ERROR":
+                if quantifier in (
+                    expat.model.XML_CQUANT_NONE,
+                    expat.model.XML_CQUANT_OPT,
+                ):
+                    single.append(key)
+                elif quantifier in (
+                    expat.model.XML_CQUANT_PLUS,
+                    expat.model.XML_CQUANT_REP,
+                ):
+                    multiple.append(key)
+
+        count(model)
+        if len(single) == 0 and len(multiple) == 1:
+            allowed_tags = frozenset(multiple)
+            self.lists[name] = allowed_tags
+        else:
+            allowed_tags = frozenset(single + multiple)
+            repeated_tags = frozenset(multiple)
+            self.dictionaries[name] = (allowed_tags, repeated_tags)
+
+    def open_dtd_file(self, filename):
+        """Open specified DTD file."""
+        path = os.path.join(DataHandler.local_dtd_dir, filename)
+        try:
+            handle = open(path, "rb")
+        except FileNotFoundError:
+            pass
+        else:
+            return handle
+        path = os.path.join(DataHandler.global_dtd_dir, filename)
+        try:
+            handle = open(path, "rb")
+        except FileNotFoundError:
+            pass
+        else:
+            return handle
+        return None
+
+    def open_xsd_file(self, filename):
+        """Open specified XSD file."""
+        path = os.path.join(DataHandler.local_xsd_dir, filename)
+        try:
+            handle = open(path, "rb")
+        except FileNotFoundError:
+            pass
+        else:
+            return handle
+        path = os.path.join(DataHandler.global_xsd_dir, filename)
+        try:
+            handle = open(path, "rb")
+        except FileNotFoundError:
+            pass
+        else:
+            return handle
+        return None
+
+    def save_dtd_file(self, filename, text):
+        """Save DTD file to cache."""
+        path = os.path.join(DataHandler.local_dtd_dir, filename)
+        try:
+            handle = open(path, "wb")
+        except OSError:
+            warnings.warn("Failed to save %s at %s" % (filename, path))
+        else:
+            handle.write(text)
+            handle.close()
+
+    def save_xsd_file(self, filename, text):
+        """Save XSD file to cache."""
+        path = os.path.join(DataHandler.local_xsd_dir, filename)
+        try:
+            handle = open(path, "wb")
+        except OSError:
+            warnings.warn("Failed to save %s at %s" % (filename, path))
+        else:
+            handle.write(text)
+            handle.close()
+
+    def externalEntityRefHandler(self, context, base, systemId, publicId):
+        """Handle external entity reference in order to cache DTD locally.
+
+        The purpose of this function is to load the DTD locally, instead
+        of downloading it from the URL specified in the XML. Using the local
+        DTD results in much faster parsing. If the DTD is not found locally,
+        we try to download it. If new DTDs become available from NCBI,
+        putting them in Bio/Entrez/DTDs will allow the parser to see them.
+        """
+        urlinfo = urlparse(systemId)
+        if urlinfo.scheme in ["http", "https", "ftp"]:
+            # Then this is an absolute path to the DTD.
+            url = systemId
+        elif urlinfo.scheme == "":
+            # Then this is a relative path to the DTD.
+            # Look at the parent URL to find the full path.
+            try:
+                source = self.dtd_urls[-1]
+            except IndexError:
+                # Assume the default URL for DTDs if the top parent
+                # does not contain an absolute path
+                source = "http://www.ncbi.nlm.nih.gov/dtd/"
+            else:
+                source = os.path.dirname(source)
+            # urls always have a forward slash, don't use os.path.join
+            url = source.rstrip("/") + "/" + systemId
+        else:
+            raise ValueError("Unexpected URL scheme %r" % urlinfo.scheme)
+        self.dtd_urls.append(url)
+        # First, try to load the local version of the DTD file
+        location, filename = os.path.split(systemId)
+        handle = self.open_dtd_file(filename)
+        if not handle:
+            # DTD is not available as a local file. Try accessing it through
+            # the internet instead.
+            try:
+                handle = urlopen(url)
+            except OSError:
+                raise RuntimeError(
+                    "Failed to access %s at %s" % (filename, url)
+                ) from None
+            text = handle.read()
+            handle.close()
+            self.save_dtd_file(filename, text)
+            handle = BytesIO(text)
+
+        parser = self.parser.ExternalEntityParserCreate(context)
+        parser.ElementDeclHandler = self.elementDecl
+        parser.ParseFile(handle)
+        handle.close()
+        self.dtd_urls.pop()
+        self.parser.StartElementHandler = self.startElementHandler
+        return 1
diff --git a/code/lib/Bio/Entrez/XSDs/IPGReportSet.xsd b/code/lib/Bio/Entrez/XSDs/IPGReportSet.xsd
new file mode 100644
index 0000000..6194a26
--- /dev/null
+++ b/code/lib/Bio/Entrez/XSDs/IPGReportSet.xsd
@@ -0,0 +1,97 @@
+﻿<?xml version="1.0" ?>
+<xs:schema
+  xmlns:xs="http://www.w3.org/2001/XMLSchema"
+  xmlns:ncbi="http://www.ncbi.nlm.nih.gov"
+  elementFormDefault="unqualified"
+  attributeFormDefault="unqualified"
+>
+  <xs:element name="CDS">
+    <xs:complexType>
+      <xs:attribute name="accver" type="xs:string" use="required"/>
+      <xs:attribute name="kingdom" type="xs:string" use="required"/>
+      <xs:attribute name="kingdom_taxid" type="xs:integer" use="required"/>
+      <xs:attribute name="org" type="xs:string" use="required"/>
+      <xs:attribute name="start" type="xs:integer" use="required"/>
+      <xs:attribute name="stop" type="xs:integer" use="required"/>
+      <xs:attribute name="strand" type="xs:string" use="required"/>
+      <xs:attribute name="strain" type="xs:string" use="optional"/>
+      <xs:attribute name="taxid" type="xs:integer" use="required"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="CDSList">
+    <xs:complexType>
+      <xs:sequence maxOccurs="unbounded">
+        <xs:element ref="CDS"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="IPGReport">
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="Product"/>
+        <xs:element ref="ProteinList"/>
+        <xs:element ref="Statistics"/>
+      </xs:sequence>
+      <xs:attribute name="product_acc" type="xs:string" use="required"/>
+      <xs:attribute name="ipg" type="xs:integer" use="required"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="IPGReportSet">
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="IPGReport"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="Product">
+    <xs:complexType>
+      <xs:simpleContent>
+        <xs:extension base="xs:string">
+          <xs:attribute name="accver" type="xs:string" use="required"/>
+          <xs:attribute name="name" type="xs:string" use="required"/>
+          <xs:attribute name="org" type="xs:string" use="required"/>
+          <xs:attribute name="kingdom" type="xs:string" use="required"/>
+          <xs:attribute name="taxid" type="xs:integer" use="optional"/>
+          <xs:attribute name="slen" type="xs:integer" use="required"/>
+          <xs:attribute name="kingdom_taxid" type="xs:integer" use="required"/>
+        </xs:extension>
+      </xs:simpleContent>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="Protein">
+    <xs:complexType>
+      <xs:sequence minOccurs="0">
+        <xs:element ref="CDSList"/>
+      </xs:sequence>
+      <xs:attribute name="accver" type="xs:string" use="required"/>
+      <xs:attribute name="source" type="xs:string" use="required"/>
+      <xs:attribute name="name" type="xs:string" use="required"/>
+      <xs:attribute name="org" type="xs:string" use="required"/>
+      <xs:attribute name="kingdom" type="xs:string" use="required"/>
+      <xs:attribute name="kingdom_taxid" type="xs:integer" use="required"/>
+      <xs:attribute name="taxid" type="xs:integer" use="optional"/>
+      <xs:attribute name="priority" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="ProteinList">
+    <xs:complexType>
+      <xs:sequence maxOccurs="unbounded">
+        <xs:element ref="Protein"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="Statistics">
+    <xs:complexType>
+      <xs:attribute name="nuc_count" type="xs:integer" use="required"/>
+      <xs:attribute name="prot_count" type="xs:integer" use="required"/>
+    </xs:complexType>
+  </xs:element>
+
+</xs:schema>
diff --git a/code/lib/Bio/Entrez/__init__.py b/code/lib/Bio/Entrez/__init__.py
new file mode 100644
index 0000000..03f2805
--- /dev/null
+++ b/code/lib/Bio/Entrez/__init__.py
@@ -0,0 +1,696 @@
+# Copyright 1999-2000 by Jeffrey Chang.  All rights reserved.
+# Copyright 2008-2013 by Michiel de Hoon.  All rights reserved.
+# Revisions copyright 2011-2016 by Peter Cock. All rights reserved.
+# Revisions copyright 2015 by Eric Rasche. All rights reserved.
+# Revisions copyright 2015 by Carlos Pena. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Provides code to access NCBI over the WWW.
+
+The main Entrez web page is available at:
+http://www.ncbi.nlm.nih.gov/Entrez/
+
+Entrez Programming Utilities web page is available at:
+http://www.ncbi.nlm.nih.gov/books/NBK25501/
+
+This module provides a number of functions like ``efetch`` (short for
+Entrez Fetch) which will return the data as a handle object. This is
+a standard interface used in Python for reading data from a file, or
+in this case a remote network connection, and provides methods like
+``.read()`` or offers iteration over the contents line by line. See
+also "What the heck is a handle?" in the Biopython Tutorial and
+Cookbook: http://biopython.org/DIST/docs/tutorial/Tutorial.html
+http://biopython.org/DIST/docs/tutorial/Tutorial.pdf
+The handle returned by these functions can be either in text mode or
+in binary mode, depending on the data requested and the results
+returned by NCBI Entrez. Typically, XML data will be in binary mode
+while other data will be in text mode, as required by the downstream
+parser to parse the data.
+
+Unlike a handle to a file on disk from the ``open(filename)`` function,
+which has a ``.name`` attribute giving the filename, the handles from
+``Bio.Entrez`` all have a ``.url`` attribute instead giving the URL
+used to connect to the NCBI Entrez API.
+
+All the functions that send requests to the NCBI Entrez API will
+automatically respect the NCBI rate limit (of 3 requests per second
+without an API key, or 10 requests per second with an API key) and
+will automatically retry when encountering transient failures
+(i.e. connection failures or HTTP 5XX codes). By default, Biopython
+does a maximum of three tries before giving up, and sleeps for 15
+seconds between tries. You can tweak these parameters by setting
+``Bio.Entrez.max_tries`` and ``Bio.Entrez.sleep_between_tries``.
+
+The Entrez module also provides an XML parser which takes a handle
+as input.
+
+Variables:
+
+    - email        Set the Entrez email parameter (default is not set).
+    - tool         Set the Entrez tool parameter (default is ``biopython``).
+    - api_key      Personal API key from NCBI. If not set, only 3 queries per
+      second are allowed. 10 queries per seconds otherwise with a
+      valid API key.
+    - max_tries    Configures how many times failed requests will be
+      automatically retried on error (default is 3).
+    - sleep_between_tries   The delay, in seconds, before retrying a request on
+      error (default is 15).
+
+Functions:
+
+    - efetch       Retrieves records in the requested format from a list of one or
+      more primary IDs or from the user's environment
+    - epost        Posts a file containing a list of primary IDs for future use in
+      the user's environment to use with subsequent search strategies
+    - esearch      Searches and retrieves primary IDs (for use in EFetch, ELink,
+      and ESummary) and term translations and optionally retains
+      results for future use in the user's environment.
+    - elink        Checks for the existence of an external or Related Articles link
+      from a list of one or more primary IDs.  Retrieves primary IDs
+      and relevancy scores for links to Entrez databases or Related
+      Articles;  creates a hyperlink to the primary LinkOut provider
+      for a specific ID and database, or lists LinkOut URLs
+      and Attributes for multiple IDs.
+    - einfo        Provides field index term counts, last update, and available
+      links for each database.
+    - esummary     Retrieves document summaries from a list of primary IDs or from
+      the user's environment.
+    - egquery      Provides Entrez database counts in XML for a single search
+      using Global Query.
+    - espell       Retrieves spelling suggestions.
+    - ecitmatch    Retrieves PubMed IDs (PMIDs) that correspond to a set of
+      input citation strings.
+
+    - read         Parses the XML results returned by any of the above functions.
+      Alternatively, the XML data can be read from a file opened in binary mode.
+      Typical usage is:
+
+          >>> from Bio import Entrez
+          >>> Entrez.email = "Your.Name.Here@example.org"
+          >>> handle = Entrez.einfo() # or esearch, efetch, ...
+          >>> record = Entrez.read(handle)
+          >>> handle.close()
+
+       where record is now a Python dictionary or list.
+
+    - parse        Parses the XML results returned by those of the above functions
+      which can return multiple records - such as efetch, esummary
+      and elink. Typical usage is:
+
+          >>> handle = Entrez.esummary(db="pubmed", id="19304878,14630660", retmode="xml")
+          >>> records = Entrez.parse(handle)
+          >>> for record in records:
+          ...     # each record is a Python dictionary or list.
+          ...     print(record['Title'])
+          Biopython: freely available Python tools for computational molecular biology and bioinformatics.
+          PDB file parser and structure class implemented in Python.
+          >>> handle.close()
+
+      This function is appropriate only if the XML file contains
+      multiple records, and is particular useful for large files.
+
+    - _open        Internally used function.
+
+"""
+
+import time
+import warnings
+import io
+from urllib.error import URLError, HTTPError
+from urllib.parse import urlencode
+from urllib.request import urlopen
+
+
+email = None
+max_tries = 3
+sleep_between_tries = 15
+tool = "biopython"
+api_key = None
+
+
+# XXX retmode?
+def epost(db, **keywds):
+    """Post a file of identifiers for future use.
+
+    Posts a file containing a list of UIs for future use in the user's
+    environment to use with subsequent search strategies.
+
+    See the online documentation for an explanation of the parameters:
+    http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EPost
+
+    Return a handle to the results.
+
+    Raises an IOError exception if there's a network error.
+    """
+    cgi = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/epost.fcgi"
+    variables = {"db": db}
+    variables.update(keywds)
+    return _open(cgi, variables, post=True)
+
+
+def efetch(db, **keywords):
+    """Fetch Entrez results which are returned as a handle.
+
+    EFetch retrieves records in the requested format from a list or set of one or
+    more UIs or from user's environment.
+
+    See the online documentation for an explanation of the parameters:
+    http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch
+
+    Return a handle to the results.
+
+    Raises an IOError exception if there's a network error.
+
+    Short example:
+
+    >>> from Bio import Entrez
+    >>> Entrez.email = "Your.Name.Here@example.org"
+    >>> handle = Entrez.efetch(db="nucleotide", id="AY851612", rettype="gb", retmode="text")
+    >>> print(handle.readline().strip())
+    LOCUS       AY851612                 892 bp    DNA     linear   PLN 10-APR-2007
+    >>> handle.close()
+
+    This will automatically use an HTTP POST rather than HTTP GET if there
+    are over 200 identifiers as recommended by the NCBI.
+
+    **Warning:** The NCBI changed the default retmode in Feb 2012, so many
+    databases which previously returned text output now give XML.
+    """
+    cgi = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
+    variables = {"db": db}
+    variables.update(keywords)
+    post = False
+    try:
+        ids = variables["id"]
+    except KeyError:
+        pass
+    else:
+        try:
+            # ids is a single integer or a string representing a single integer
+            ids = str(int(ids))
+        except TypeError:
+            # ids was not a string; try an iterable:
+            ids = ",".join(map(str, ids))
+        except ValueError:
+            # string with commas or string not representing an integer
+            ids = ",".join(map(str, (id.strip() for id in ids.split(","))))
+
+        variables["id"] = ids
+        if ids.count(",") >= 200:
+            # NCBI prefers an HTTP POST instead of an HTTP GET if there are
+            # more than about 200 IDs
+            post = True
+    return _open(cgi, variables, post=post)
+
+
+def esearch(db, term, **keywds):
+    """Run an Entrez search and return a handle to the results.
+
+    ESearch searches and retrieves primary IDs (for use in EFetch, ELink
+    and ESummary) and term translations, and optionally retains results
+    for future use in the user's environment.
+
+    See the online documentation for an explanation of the parameters:
+    http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
+
+    Return a handle to the results which are always in XML format.
+
+    Raises an IOError exception if there's a network error.
+
+    Short example:
+
+    >>> from Bio import Entrez
+    >>> Entrez.email = "Your.Name.Here@example.org"
+    >>> handle = Entrez.esearch(db="nucleotide", retmax=10, term="opuntia[ORGN] accD", idtype="acc")
+    >>> record = Entrez.read(handle)
+    >>> handle.close()
+    >>> int(record["Count"]) >= 2
+    True
+    >>> "EF590893.1" in record["IdList"]
+    True
+    >>> "EF590892.1" in record["IdList"]
+    True
+
+    """
+    cgi = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
+    variables = {"db": db, "term": term}
+    variables.update(keywds)
+    return _open(cgi, variables)
+
+
+def elink(**keywds):
+    """Check for linked external articles and return a handle.
+
+    ELink checks for the existence of an external or Related Articles link
+    from a list of one or more primary IDs;  retrieves IDs and relevancy
+    scores for links to Entrez databases or Related Articles; creates a
+    hyperlink to the primary LinkOut provider for a specific ID and
+    database, or lists LinkOut URLs and attributes for multiple IDs.
+
+    See the online documentation for an explanation of the parameters:
+    http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ELink
+
+    Return a handle to the results, by default in XML format.
+
+    Raises an IOError exception if there's a network error.
+
+    This example finds articles related to the Biopython application
+    note's entry in the PubMed database:
+
+    >>> from Bio import Entrez
+    >>> Entrez.email = "Your.Name.Here@example.org"
+    >>> pmid = "19304878"
+    >>> handle = Entrez.elink(dbfrom="pubmed", id=pmid, linkname="pubmed_pubmed")
+    >>> record = Entrez.read(handle)
+    >>> handle.close()
+    >>> print(record[0]["LinkSetDb"][0]["LinkName"])
+    pubmed_pubmed
+    >>> linked = [link["Id"] for link in record[0]["LinkSetDb"][0]["Link"]]
+    >>> "17121776" in linked
+    True
+
+    This is explained in much more detail in the Biopython Tutorial.
+    """
+    cgi = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi"
+    variables = {}
+    variables.update(keywds)
+    return _open(cgi, variables)
+
+
+def einfo(**keywds):
+    """Return a summary of the Entrez databases as a results handle.
+
+    EInfo provides field names, index term counts, last update, and
+    available links for each Entrez database.
+
+    See the online documentation for an explanation of the parameters:
+    http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EInfo
+
+    Return a handle to the results, by default in XML format.
+
+    Raises an IOError exception if there's a network error.
+
+    Short example:
+
+    >>> from Bio import Entrez
+    >>> Entrez.email = "Your.Name.Here@example.org"
+    >>> record = Entrez.read(Entrez.einfo())
+    >>> 'pubmed' in record['DbList']
+    True
+
+    """
+    cgi = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi"
+    variables = {}
+    variables.update(keywds)
+    return _open(cgi, variables)
+
+
+def esummary(**keywds):
+    """Retrieve document summaries as a results handle.
+
+    ESummary retrieves document summaries from a list of primary IDs or
+    from the user's environment.
+
+    See the online documentation for an explanation of the parameters:
+    http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESummary
+
+    Return a handle to the results, by default in XML format.
+
+    Raises an IOError exception if there's a network error.
+
+    This example discovers more about entry 19923 in the structure
+    database:
+
+    >>> from Bio import Entrez
+    >>> Entrez.email = "Your.Name.Here@example.org"
+    >>> handle = Entrez.esummary(db="structure", id="19923")
+    >>> record = Entrez.read(handle)
+    >>> handle.close()
+    >>> print(record[0]["Id"])
+    19923
+    >>> print(record[0]["PdbDescr"])
+    Crystal Structure Of E. Coli Aconitase B
+
+    """
+    cgi = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
+    variables = {}
+    variables.update(keywds)
+    return _open(cgi, variables)
+
+
+def egquery(**keywds):
+    """Provide Entrez database counts for a global search.
+
+    EGQuery provides Entrez database counts in XML for a single search
+    using Global Query.
+
+    See the online documentation for an explanation of the parameters:
+    http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EGQuery
+
+    Return a handle to the results in XML format.
+
+    Raises an IOError exception if there's a network error.
+
+    This quick example based on a longer version from the Biopython
+    Tutorial just checks there are over 60 matches for 'Biopython'
+    in PubMedCentral:
+
+    >>> from Bio import Entrez
+    >>> Entrez.email = "Your.Name.Here@example.org"
+    >>> handle = Entrez.egquery(term="biopython")
+    >>> record = Entrez.read(handle)
+    >>> handle.close()
+    >>> for row in record["eGQueryResult"]:
+    ...     if "pmc" in row["DbName"]:
+    ...         print(int(row["Count"]) > 60)
+    True
+
+    """
+    cgi = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/egquery.fcgi"
+    variables = {}
+    variables.update(keywds)
+    return _open(cgi, variables)
+
+
+def espell(**keywds):
+    """Retrieve spelling suggestions as a results handle.
+
+    ESpell retrieves spelling suggestions, if available.
+
+    See the online documentation for an explanation of the parameters:
+    http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESpell
+
+    Return a handle to the results, by default in XML format.
+
+    Raises an IOError exception if there's a network error.
+
+    Short example:
+
+    >>> from Bio import Entrez
+    >>> Entrez.email = "Your.Name.Here@example.org"
+    >>> record = Entrez.read(Entrez.espell(term="biopythooon"))
+    >>> print(record["Query"])
+    biopythooon
+    >>> print(record["CorrectedQuery"])
+    biopython
+
+    """
+    cgi = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/espell.fcgi"
+    variables = {}
+    variables.update(keywds)
+    return _open(cgi, variables)
+
+
+def _update_ecitmatch_variables(keywds):
+    # XML is the only supported value, and it actually returns TXT.
+    variables = {"retmode": "xml"}
+    citation_keys = (
+        "journal_title",
+        "year",
+        "volume",
+        "first_page",
+        "author_name",
+        "key",
+    )
+
+    # Accept pre-formatted strings
+    if isinstance(keywds["bdata"], str):
+        variables.update(keywds)
+    else:
+        # Alternatively accept a nicer interface
+        variables["db"] = keywds["db"]
+        bdata = []
+        for citation in keywds["bdata"]:
+            formatted_citation = "|".join(
+                [citation.get(key, "") for key in citation_keys]
+            )
+            bdata.append(formatted_citation)
+        variables["bdata"] = "\r".join(bdata)
+    return variables
+
+
+def ecitmatch(**keywds):
+    """Retrieve PMIDs for input citation strings, returned as a handle.
+
+    ECitMatch retrieves PubMed IDs (PMIDs) that correspond to a set of input
+    citation strings.
+
+    See the online documentation for an explanation of the parameters:
+    http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ECitMatch
+
+    Return a handle to the results, by default in plain text
+
+    Raises an IOError exception if there's a network error.
+
+    Short example:
+
+    >>> from Bio import Entrez
+    >>> Entrez.email = "Your.Name.Here@example.org"
+    >>> citation_1 = {"journal_title": "proc natl acad sci u s a",
+    ...               "year": "1991", "volume": "88", "first_page": "3248",
+    ...               "author_name": "mann bj", "key": "citation_1"}
+    >>> handle = Entrez.ecitmatch(db="pubmed", bdata=[citation_1])
+    >>> print(handle.read().strip().split("|"))
+    ['proc natl acad sci u s a', '1991', '88', '3248', 'mann bj', 'citation_1', '2014248']
+    >>> handle.close()
+
+    """
+    cgi = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/ecitmatch.cgi"
+    variables = _update_ecitmatch_variables(keywds)
+    return _open(cgi, variables, ecitmatch=True)
+
+
+def read(handle, validate=True, escape=False):
+    """Parse an XML file from the NCBI Entrez Utilities into python objects.
+
+    This function parses an XML file created by NCBI's Entrez Utilities,
+    returning a multilevel data structure of Python lists and dictionaries.
+    Most XML files returned by NCBI's Entrez Utilities can be parsed by
+    this function, provided its DTD is available. Biopython includes the
+    DTDs for most commonly used Entrez Utilities.
+
+    The handle must be in binary mode. This allows the parser to detect the
+    encoding from the XML file, and to use it to convert all text in the XML
+    to the correct Unicode string. The functions in Bio.Entrez to access NCBI
+    Entrez will automatically return XML data in binary mode. For files,
+    please use mode "rb" when opening the file, as in
+
+        >>> from Bio import Entrez
+        >>> handle = open("Entrez/esearch1.xml", "rb")  # opened in binary mode
+        >>> record = Entrez.read(handle)
+        >>> print(record['QueryTranslation'])
+        biopython[All Fields]
+        >>> handle.close()
+
+    If validate is True (default), the parser will validate the XML file
+    against the DTD, and raise an error if the XML file contains tags that
+    are not represented in the DTD. If validate is False, the parser will
+    simply skip such tags.
+
+    If escape is True, all characters that are not valid HTML are replaced
+    by HTML escape characters to guarantee that the returned strings are
+    valid HTML fragments. For example, a less-than sign (<) is replaced by
+    &lt;. If escape is False (default), the string is returned as is.
+
+    Whereas the data structure seems to consist of generic Python lists,
+    dictionaries, strings, and so on, each of these is actually a class
+    derived from the base type. This allows us to store the attributes
+    (if any) of each element in a dictionary my_element.attributes, and
+    the tag name in my_element.tag.
+    """
+    from .Parser import DataHandler
+
+    handler = DataHandler(validate, escape)
+    record = handler.read(handle)
+    return record
+
+
+def parse(handle, validate=True, escape=False):
+    """Parse an XML file from the NCBI Entrez Utilities into python objects.
+
+    This function parses an XML file created by NCBI's Entrez Utilities,
+    returning a multilevel data structure of Python lists and dictionaries.
+    This function is suitable for XML files that (in Python) can be represented
+    as a list of individual records. Whereas 'read' reads the complete file
+    and returns a single Python list, 'parse' is a generator function that
+    returns the records one by one. This function is therefore particularly
+    useful for parsing large files.
+
+    Most XML files returned by NCBI's Entrez Utilities can be parsed by
+    this function, provided its DTD is available. Biopython includes the
+    DTDs for most commonly used Entrez Utilities.
+
+    The handle must be in binary mode. This allows the parser to detect the
+    encoding from the XML file, and to use it to convert all text in the XML
+    to the correct Unicode string. The functions in Bio.Entrez to access NCBI
+    Entrez will automatically return XML data in binary mode. For files,
+    please use mode "rb" when opening the file, as in
+
+        >>> from Bio import Entrez
+        >>> handle = open("Entrez/pubmed1.xml", "rb")  # opened in binary mode
+        >>> records = Entrez.parse(handle)
+        >>> for record in records:
+        ...     print(record['MedlineCitation']['Article']['Journal']['Title'])
+        ...
+        Social justice (San Francisco, Calif.)
+        Biochimica et biophysica acta
+        >>> handle.close()
+
+    If validate is True (default), the parser will validate the XML file
+    against the DTD, and raise an error if the XML file contains tags that
+    are not represented in the DTD. If validate is False, the parser will
+    simply skip such tags.
+
+    If escape is True, all characters that are not valid HTML are replaced
+    by HTML escape characters to guarantee that the returned strings are
+    valid HTML fragments. For example, a less-than sign (<) is replaced by
+    &lt;. If escape is False (default), the string is returned as is.
+
+    Whereas the data structure seems to consist of generic Python lists,
+    dictionaries, strings, and so on, each of these is actually a class
+    derived from the base type. This allows us to store the attributes
+    (if any) of each element in a dictionary my_element.attributes, and
+    the tag name in my_element.tag.
+    """
+    from .Parser import DataHandler
+
+    handler = DataHandler(validate, escape)
+    records = handler.parse(handle)
+    return records
+
+
+def _open(cgi, params=None, post=None, ecitmatch=False):
+    """Build the URL and open a handle to it (PRIVATE).
+
+    Open a handle to Entrez.  cgi is the URL for the cgi script to access.
+    params is a dictionary with the options to pass to it.  Does some
+    simple error checking, and will raise an IOError if it encounters one.
+
+    The argument post should be a boolean to explicitly control if an HTTP
+    POST should be used rather an HTTP GET based on the query length.
+    By default (post=None), POST is used if the URL encoded parameters would
+    be over 1000 characters long.
+
+    This function also enforces the "up to three queries per second rule"
+    to avoid abusing the NCBI servers.
+    """
+    # NCBI requirement: At most three queries per second if no API key is provided.
+    # Equivalently, at least a third of second between queries
+    params = _construct_params(params)
+    options = _encode_options(ecitmatch, params)
+    # Using just 0.333333334 seconds sometimes hit the NCBI rate limit,
+    # the slightly longer pause of 0.37 seconds has been more reliable.
+    delay = 0.1 if api_key else 0.37
+    current = time.time()
+    wait = _open.previous + delay - current
+    if wait > 0:
+        time.sleep(wait)
+        _open.previous = current + wait
+    else:
+        _open.previous = current
+
+    # By default, post is None. Set to a boolean to over-ride length choice:
+    if post is None and len(options) > 1000:
+        post = True
+    cgi = _construct_cgi(cgi, post, options)
+
+    for i in range(max_tries):
+        try:
+            if post:
+                handle = urlopen(cgi, data=options.encode("utf8"))
+            else:
+                handle = urlopen(cgi)
+        except HTTPError as exception:
+            # Reraise if the final try fails
+            if i >= max_tries - 1:
+                raise
+            # Reraise if the exception is triggered by a HTTP 4XX error
+            # indicating some kind of bad request, UNLESS it's specifically a
+            # 429 "Too Many Requests" response. NCBI seems to sometimes
+            # erroneously return 429s even when their rate limit is
+            # honored (and indeed even with the rate-limit-related fudging
+            # higher up in this function in place), so the best we can do is
+            # treat them as a serverside error and try again after sleeping
+            # for a bit.
+            if exception.code // 100 == 4 and exception.code != 429:
+                raise
+        except URLError:
+            # Reraise if the final try fails
+            if i >= max_tries - 1:
+                raise
+            # Treat as a transient error and try again after a brief delay:
+            time.sleep(sleep_between_tries)
+        else:
+            break
+
+    subtype = handle.headers.get_content_subtype()
+    if subtype == "plain":
+        url = handle.url
+        handle = io.TextIOWrapper(handle, encoding="UTF-8")
+        handle.url = url
+    return handle
+
+
+_open.previous = 0
+
+
+def _construct_params(params):
+    if params is None:
+        params = {}
+
+    # Remove None values from the parameters
+    for key, value in list(params.items()):
+        if value is None:
+            del params[key]
+    # Tell Entrez that we are using Biopython (or whatever the user has
+    # specified explicitly in the parameters or by changing the default)
+    if "tool" not in params:
+        params["tool"] = tool
+    # Tell Entrez who we are
+    if "email" not in params:
+        if email is not None:
+            params["email"] = email
+        else:
+            warnings.warn(
+                """
+Email address is not specified.
+
+To make use of NCBI's E-utilities, NCBI requires you to specify your
+email address with each request.  As an example, if your email address
+is A.N.Other@example.com, you can specify it as follows:
+   from Bio import Entrez
+   Entrez.email = 'A.N.Other@example.com'
+In case of excessive usage of the E-utilities, NCBI will attempt to contact
+a user at the email address provided before blocking access to the
+E-utilities.""",
+                UserWarning,
+            )
+    if api_key and "api_key" not in params:
+        params["api_key"] = api_key
+    return params
+
+
+def _encode_options(ecitmatch, params):
+    # Open a handle to Entrez.
+    options = urlencode(params, doseq=True)
+    # urlencode encodes pipes, which NCBI expects in ECitMatch
+    if ecitmatch:
+        options = options.replace("%7C", "|")
+    return options
+
+
+def _construct_cgi(cgi, post, options):
+    if not post:
+        # HTTP GET
+        cgi += "?" + options
+    return cgi
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Entrez/__pycache__/Parser.cpython-311.pyc b/code/lib/Bio/Entrez/__pycache__/Parser.cpython-311.pyc
new file mode 100644
index 0000000..56c87a0
Binary files /dev/null and b/code/lib/Bio/Entrez/__pycache__/Parser.cpython-311.pyc differ
diff --git a/code/lib/Bio/Entrez/__pycache__/Parser.cpython-312.pyc b/code/lib/Bio/Entrez/__pycache__/Parser.cpython-312.pyc
new file mode 100644
index 0000000..877b4ba
Binary files /dev/null and b/code/lib/Bio/Entrez/__pycache__/Parser.cpython-312.pyc differ
diff --git a/code/lib/Bio/Entrez/__pycache__/Parser.cpython-37.pyc b/code/lib/Bio/Entrez/__pycache__/Parser.cpython-37.pyc
new file mode 100644
index 0000000..acf7c7a
Binary files /dev/null and b/code/lib/Bio/Entrez/__pycache__/Parser.cpython-37.pyc differ
diff --git a/code/lib/Bio/Entrez/__pycache__/__init__.cpython-311.pyc b/code/lib/Bio/Entrez/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000..7fa3a2a
Binary files /dev/null and b/code/lib/Bio/Entrez/__pycache__/__init__.cpython-311.pyc differ
diff --git a/code/lib/Bio/Entrez/__pycache__/__init__.cpython-312.pyc b/code/lib/Bio/Entrez/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000..d217589
Binary files /dev/null and b/code/lib/Bio/Entrez/__pycache__/__init__.cpython-312.pyc differ
diff --git a/code/lib/Bio/Entrez/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Entrez/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..a5a9670
Binary files /dev/null and b/code/lib/Bio/Entrez/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/ExPASy/Enzyme.py b/code/lib/Bio/ExPASy/Enzyme.py
new file mode 100644
index 0000000..6c1e5ef
--- /dev/null
+++ b/code/lib/Bio/ExPASy/Enzyme.py
@@ -0,0 +1,157 @@
+# Copyright 1999 by Jeffrey Chang.  All rights reserved.
+# Copyright 2009 by Michiel de Hoon.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Parse the enzyme.dat file from Enzyme at ExPASy.
+
+See https://www.expasy.org/enzyme/
+
+Tested with the release of 03-Mar-2009.
+
+Functions:
+ - read       Reads a file containing one ENZYME entry
+ - parse      Reads a file containing multiple ENZYME entries
+
+Classes:
+ - Record     Holds ENZYME data.
+
+"""
+
+
+def parse(handle):
+    """Parse ENZYME records.
+
+    This function is for parsing ENZYME files containing multiple
+    records.
+
+    Arguments:
+     - handle   - handle to the file.
+
+    """
+    while True:
+        record = __read(handle)
+        if not record:
+            break
+        yield record
+
+
+def read(handle):
+    """Read one ENZYME record.
+
+    This function is for parsing ENZYME files containing
+    exactly one record.
+
+    Arguments:
+     - handle   - handle to the file.
+
+    """
+    record = __read(handle)
+    # We should have reached the end of the record by now
+    remainder = handle.read()
+    if remainder:
+        raise ValueError("More than one ENZYME record found")
+    return record
+
+
+class Record(dict):
+    """Holds information from an ExPASy ENZYME record as a Python dictionary.
+
+    Each record contains the following keys:
+
+    - ID: EC number
+    - DE: Recommended name
+    - AN: Alternative names (if any)
+    - CA: Catalytic activity
+    - CF: Cofactors (if any)
+    - PR: Pointers to the Prosite documentation entrie(s) that
+      correspond to the enzyme (if any)
+    - DR: Pointers to the Swiss-Prot protein sequence entrie(s)
+      that correspond to the enzyme (if any)
+    - CC: Comments
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        dict.__init__(self)
+        self["ID"] = ""
+        self["DE"] = ""
+        self["AN"] = []
+        self["CA"] = ""
+        self["CF"] = ""
+        self["CC"] = []  # one comment per line
+        self["PR"] = []
+        self["DR"] = []
+
+    def __repr__(self):
+        if self["ID"]:
+            if self["DE"]:
+                return "%s (%s, %s)" % (self.__class__.__name__, self["ID"], self["DE"])
+            else:
+                return "%s (%s)" % (self.__class__.__name__, self["ID"])
+        else:
+            return "%s ( )" % (self.__class__.__name__)
+
+    def __str__(self):
+        output = [
+            "ID: " + self["ID"],
+            "DE: " + self["DE"],
+            "AN: " + repr(self["AN"]),
+            "CA: '" + self["CA"] + "'",
+            "CF: " + self["CF"],
+            "CC: " + repr(self["CC"]),
+            "PR: " + repr(self["PR"]),
+            "DR: %d Records" % len(self["DR"]),
+        ]
+        return "\n".join(output)
+
+
+# Everything below is private
+
+
+def __read(handle):
+    record = None
+    for line in handle:
+        key, value = line[:2], line[5:].rstrip()
+        if key == "ID":
+            record = Record()
+            record["ID"] = value
+        elif key == "DE":
+            record["DE"] += value
+        elif key == "AN":
+            if record["AN"] and not record["AN"][-1].endswith("."):
+                record["AN"][-1] += " " + value
+            else:
+                record["AN"].append(value)
+        elif key == "CA":
+            record["CA"] += value
+        elif key == "DR":
+            pair_data = value.rstrip(";").split(";")
+            for pair in pair_data:
+                t1, t2 = pair.split(",")
+                row = [t1.strip(), t2.strip()]
+                record["DR"].append(row)
+        elif key == "CF":
+            if record["CF"]:
+                record["CF"] += " " + value
+            else:
+                record["CF"] = value
+        elif key == "PR":
+            assert value.startswith("PROSITE; ")
+            value = value[9:].rstrip(";")
+            record["PR"].append(value)
+        elif key == "CC":
+            if value.startswith("-!- "):
+                record["CC"].append(value[4:])
+            elif value.startswith("    ") and record["CC"]:
+                record["CC"][-1] += value[3:]
+            # copyright notice is silently skipped
+        elif key == "//":
+            if record:
+                return record
+            else:  # This was the copyright notice
+                continue
+    if record:
+        raise ValueError("Unexpected end of stream")
diff --git a/code/lib/Bio/ExPASy/Prodoc.py b/code/lib/Bio/ExPASy/Prodoc.py
new file mode 100644
index 0000000..52981a0
--- /dev/null
+++ b/code/lib/Bio/ExPASy/Prodoc.py
@@ -0,0 +1,173 @@
+# Copyright 2000 by Jeffrey Chang.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Code to work with the prosite.doc file from Prosite.
+
+See https://www.expasy.org/prosite/
+
+Tested with:
+ - Release 15.0, July 1998
+ - Release 16.0, July 1999
+ - Release 20.22, 13 November 2007
+ - Release 20.43, 10 February 2009
+
+Functions:
+ - read               Read a Prodoc file containing exactly one Prodoc entry.
+ - parse              Iterates over entries in a Prodoc file.
+
+Classes:
+ - Record             Holds Prodoc data.
+ - Reference          Holds data from a Prodoc reference.
+
+"""
+
+
+def read(handle):
+    """Read in a record from a file with exactly one Prodoc record."""
+    record = __read(handle)
+    # We should have reached the end of the record by now
+    line = handle.readline()
+    if line:
+        raise ValueError("More than one Prodoc record found")
+    return record
+
+
+def parse(handle):
+    """Iterate over the records in a Prodoc file."""
+    while True:
+        record = __read(handle)
+        if not record:
+            return
+        yield record
+
+
+class Record:
+    """Holds information from a Prodoc record.
+
+    Attributes:
+     - accession      Accession number of the record.
+     - prosite_refs   List of tuples (prosite accession, prosite name).
+     - text           Free format text.
+     - references     List of reference objects.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.accession = ""
+        self.prosite_refs = []
+        self.text = ""
+        self.references = []
+
+
+class Reference:
+    """Holds information from a Prodoc citation.
+
+    Attributes:
+     - number     Number of the reference. (string)
+     - authors    Names of the authors.
+     - citation   Describes the citation.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.number = ""
+        self.authors = ""
+        self.citation = ""
+
+
+# Below are private functions
+
+
+def __read_prosite_reference_line(record, line):
+    line = line.rstrip()
+    if line[-1] != "}":
+        raise ValueError("I don't understand the Prosite reference on line\n%s" % line)
+    acc, name = line[1:-1].split("; ")
+    record.prosite_refs.append((acc, name))
+
+
+def __read_text_line(record, line):
+    record.text += line
+    return True
+
+
+def __read_reference_start(record, line):
+    # Read the references
+    reference = Reference()
+    reference.number = line[1:3].strip()
+    if line[1] == "E":
+        # If it's an electronic reference, then the URL is on the
+        # line, instead of the author.
+        reference.citation = line[4:].strip()
+    else:
+        reference.authors = line[4:].strip()
+    record.references.append(reference)
+
+
+def __read_reference_line(record, line):
+    if not line.strip():
+        return False
+    reference = record.references[-1]
+    if line.startswith("     "):
+        if reference.authors[-1] == ",":
+            reference.authors += line[4:].rstrip()
+        else:
+            reference.citation += line[5:]
+        return True
+    raise Exception("I don't understand the reference line\n%s" % line)
+
+
+def __read_copyright_line(record, line):
+    # Skip the copyright statement
+    if line.startswith("+----"):
+        return False
+    return True
+
+
+def __read(handle):
+    # Skip blank lines between records
+    for line in handle:
+        line = line.rstrip()
+        if line and not line.startswith("//"):
+            break
+    else:
+        return None
+    record = Record()
+    # Read the accession number
+    if not line.startswith("{PDOC"):
+        raise ValueError("Line does not start with '{PDOC':\n%s" % line)
+    if line[-1] != "}":
+        raise ValueError("I don't understand accession line\n%s" % line)
+    record.accession = line[1:-1]
+    # Read the Prosite references
+    for line in handle:
+        if line.startswith("{PS"):
+            __read_prosite_reference_line(record, line)
+        else:
+            break
+    else:
+        raise ValueError("Unexpected end of stream.")
+    # Read the actual text
+    if not line.startswith("{BEGIN"):
+        raise ValueError("Line does not start with '{BEGIN':\n%s" % line)
+    read_line = __read_text_line
+    for line in handle:
+        if line.startswith("{END}"):
+            # Clean up the record and return
+            for reference in record.references:
+                reference.citation = reference.citation.rstrip()
+                reference.authors = reference.authors.rstrip()
+            return record
+        elif line[0] == "[" and line[3] == "]" and line[4] == " ":
+            __read_reference_start(record, line)
+            read_line = __read_reference_line
+        elif line.startswith("+----"):
+            read_line = __read_copyright_line
+        elif read_line:
+            if not read_line(record, line):
+                read_line = None
+    raise ValueError("Unexpected end of stream.")
diff --git a/code/lib/Bio/ExPASy/Prosite.py b/code/lib/Bio/ExPASy/Prosite.py
new file mode 100644
index 0000000..9174db8
--- /dev/null
+++ b/code/lib/Bio/ExPASy/Prosite.py
@@ -0,0 +1,314 @@
+# Copyright 1999 by Jeffrey Chang.  All rights reserved.
+# Copyright 2000 by Jeffrey Chang.  All rights reserved.
+# Revisions Copyright 2007 by Peter Cock.  All rights reserved.
+# Revisions Copyright 2009 by Michiel de Hoon.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""Parser for the prosite dat file from Prosite at ExPASy.
+
+See https://www.expasy.org/prosite/
+
+Tested with:
+ - Release 20.43, 10-Feb-2009
+ - Release 2017_03 of 15-Mar-2017.
+
+Functions:
+ - read                  Reads a Prosite file containing one Prosite record
+ - parse                 Iterates over records in a Prosite file.
+
+Classes:
+ - Record                Holds Prosite data.
+
+"""
+
+
+def parse(handle):
+    """Parse Prosite records.
+
+    This function is for parsing Prosite files containing multiple
+    records.
+
+    Arguments:
+     - handle   - handle to the file.
+
+    """
+    while True:
+        record = __read(handle)
+        if not record:
+            break
+        yield record
+
+
+def read(handle):
+    """Read one Prosite record.
+
+    This function is for parsing Prosite files containing
+    exactly one record.
+
+    Arguments:
+     - handle   - handle to the file.
+
+    """
+    record = __read(handle)
+    # We should have reached the end of the record by now
+    remainder = handle.read()
+    if remainder:
+        raise ValueError("More than one Prosite record found")
+    return record
+
+
+class Record:
+    """Holds information from a Prosite record.
+
+    Main attributes:
+     - name           ID of the record.  e.g. ADH_ZINC
+     - type           Type of entry.  e.g. PATTERN, MATRIX, or RULE
+     - accession      e.g. PS00387
+     - created        Date the entry was created.  (MMM-YYYY for releases
+       before January 2017, DD-MMM-YYYY since January 2017)
+     - data_update    Date the 'primary' data was last updated.
+     - info_update    Date data other than 'primary' data was last updated.
+     - pdoc           ID of the PROSITE DOCumentation.
+     - description    Free-format description.
+     - pattern        The PROSITE pattern.  See docs.
+     - matrix         List of strings that describes a matrix entry.
+     - rules          List of rule definitions (from RU lines).  (strings)
+     - prorules       List of prorules (from PR lines). (strings)
+
+    NUMERICAL RESULTS:
+     - nr_sp_release  SwissProt release.
+     - nr_sp_seqs     Number of seqs in that release of Swiss-Prot. (int)
+     - nr_total       Number of hits in Swiss-Prot.  tuple of (hits, seqs)
+     - nr_positive    True positives.  tuple of (hits, seqs)
+     - nr_unknown     Could be positives.  tuple of (hits, seqs)
+     - nr_false_pos   False positives.  tuple of (hits, seqs)
+     - nr_false_neg   False negatives.  (int)
+     - nr_partial     False negatives, because they are fragments. (int)
+
+    COMMENTS:
+     - cc_taxo_range  Taxonomic range.  See docs for format
+     - cc_max_repeat  Maximum number of repetitions in a protein
+     - cc_site        Interesting site.  list of tuples (pattern pos, desc.)
+     - cc_skip_flag   Can this entry be ignored?
+     - cc_matrix_type
+     - cc_scaling_db
+     - cc_author
+     - cc_ft_key
+     - cc_ft_desc
+     - cc_version     version number (introduced in release 19.0)
+
+    The following are all lists if tuples (swiss-prot accession, swiss-prot name).
+
+    DATA BANK REFERENCES:
+     - dr_positive
+     - dr_false_neg
+     - dr_false_pos
+     - dr_potential   Potential hits, but fingerprint region not yet available.
+     - dr_unknown     Could possibly belong
+     - pdb_structs    List of PDB entries.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.name = ""
+        self.type = ""
+        self.accession = ""
+        self.created = ""
+        self.data_update = ""
+        self.info_update = ""
+        self.pdoc = ""
+
+        self.description = ""
+        self.pattern = ""
+        self.matrix = []
+        self.rules = []
+        self.prorules = []
+        self.postprocessing = []
+
+        self.nr_sp_release = ""
+        self.nr_sp_seqs = ""
+        self.nr_total = (None, None)
+        self.nr_positive = (None, None)
+        self.nr_unknown = (None, None)
+        self.nr_false_pos = (None, None)
+        self.nr_false_neg = None
+        self.nr_partial = None
+
+        self.cc_taxo_range = ""
+        self.cc_max_repeat = ""
+        self.cc_site = []
+        self.cc_skip_flag = ""
+
+        self.dr_positive = []
+        self.dr_false_neg = []
+        self.dr_false_pos = []
+        self.dr_potential = []
+        self.dr_unknown = []
+
+        self.pdb_structs = []
+
+
+# Everything below are private functions
+
+
+def __read(handle):
+    import re
+
+    record = None
+    for line in handle:
+        keyword, value = line[:2], line[5:].rstrip()
+        if keyword == "ID":
+            record = Record()
+            cols = value.split("; ")
+            if len(cols) != 2:
+                raise ValueError("I don't understand identification line\n%s" % line)
+            record.name = cols[0]
+            record.type = cols[1].rstrip(".")  # don't want '.'
+        elif keyword == "AC":
+            record.accession = value.rstrip(";")
+        elif keyword == "DT":
+            # e.g. from January 2017,
+            # DT   01-APR-1990 CREATED; 01-APR-1990 DATA UPDATE; 01-APR-1990 INFO UPDATE.
+            # Older files had brackets round the date descriptions and used MMM-YYYY
+            dates = value.rstrip(".").split("; ")
+            if dates[0].endswith((" (CREATED)", " CREATED")):
+                # Remove last word
+                record.created = dates[0].rsplit(" ", 1)[0]
+            else:
+                raise ValueError("I don't understand date line\n%s" % line)
+            if dates[1].endswith((" (DATA UPDATE)", " DATA UPDATE")):
+                # Remove last two words
+                record.data_update = dates[1].rsplit(" ", 2)[0]
+            else:
+                raise ValueError("I don't understand date line\n%s" % line)
+            if dates[2].endswith((" (INFO UPDATE)", " INFO UPDATE")):
+                # Remove last two words
+                record.info_update = dates[2].rsplit(" ", 2)[0]
+            else:
+                raise ValueError("I don't understand date line\n%s" % line)
+        elif keyword == "DE":
+            record.description = value
+        elif keyword == "PA":
+            record.pattern += value
+        elif keyword == "MA":
+            record.matrix.append(value)
+        elif keyword == "PP":
+            record.postprocessing.extend(value.split(";"))
+        elif keyword == "RU":
+            record.rules.append(value)
+        elif keyword == "NR":
+            cols = value.split(";")
+            for col in cols:
+                if not col:
+                    continue
+                qual, data = [word.lstrip() for word in col.split("=")]
+                if qual == "/RELEASE":
+                    release, seqs = data.split(",")
+                    record.nr_sp_release = release
+                    record.nr_sp_seqs = int(seqs)
+                elif qual == "/FALSE_NEG":
+                    record.nr_false_neg = int(data)
+                elif qual == "/PARTIAL":
+                    record.nr_partial = int(data)
+                elif qual in ["/TOTAL", "/POSITIVE", "/UNKNOWN", "/FALSE_POS"]:
+                    m = re.match(r"(\d+)\((\d+)\)", data)
+                    if not m:
+                        raise Exception(
+                            "Broken data %s in comment line\n%r" % (data, line)
+                        )
+                    hits = tuple(map(int, m.groups()))
+                    if qual == "/TOTAL":
+                        record.nr_total = hits
+                    elif qual == "/POSITIVE":
+                        record.nr_positive = hits
+                    elif qual == "/UNKNOWN":
+                        record.nr_unknown = hits
+                    elif qual == "/FALSE_POS":
+                        record.nr_false_pos = hits
+                else:
+                    raise ValueError(
+                        "Unknown qual %s in comment line\n%r" % (qual, line)
+                    )
+        elif keyword == "CC":
+            # Expect CC lines like this:
+            # CC   /TAXO-RANGE=??EPV; /MAX-REPEAT=2;
+            # Can (normally) split on ";" and then on "="
+            cols = value.split(";")
+            for col in cols:
+                if not col or col[:17] == "Automatic scaling":
+                    # DNAJ_2 in Release 15 has a non-standard comment line:
+                    # CC   Automatic scaling using reversed database
+                    # Throw it away.  (Should I keep it?)
+                    continue
+                if col.count("=") == 0:
+                    # Missing qualifier!  Can we recover gracefully?
+                    # For example, from Bug 2403, in PS50293 have:
+                    # CC /AUTHOR=K_Hofmann; N_Hulo
+                    continue
+                qual, data = [word.lstrip() for word in col.split("=")]
+                if qual == "/TAXO-RANGE":
+                    record.cc_taxo_range = data
+                elif qual == "/MAX-REPEAT":
+                    record.cc_max_repeat = data
+                elif qual == "/SITE":
+                    pos, desc = data.split(",")
+                    record.cc_site.append((int(pos), desc))
+                elif qual == "/SKIP-FLAG":
+                    record.cc_skip_flag = data
+                elif qual == "/MATRIX_TYPE":
+                    record.cc_matrix_type = data
+                elif qual == "/SCALING_DB":
+                    record.cc_scaling_db = data
+                elif qual == "/AUTHOR":
+                    record.cc_author = data
+                elif qual == "/FT_KEY":
+                    record.cc_ft_key = data
+                elif qual == "/FT_DESC":
+                    record.cc_ft_desc = data
+                elif qual == "/VERSION":
+                    record.cc_version = data
+                else:
+                    raise ValueError(
+                        "Unknown qual %s in comment line\n%r" % (qual, line)
+                    )
+        elif keyword == "DR":
+            refs = value.split(";")
+            for ref in refs:
+                if not ref:
+                    continue
+                acc, name, type = [word.strip() for word in ref.split(",")]
+                if type == "T":
+                    record.dr_positive.append((acc, name))
+                elif type == "F":
+                    record.dr_false_pos.append((acc, name))
+                elif type == "N":
+                    record.dr_false_neg.append((acc, name))
+                elif type == "P":
+                    record.dr_potential.append((acc, name))
+                elif type == "?":
+                    record.dr_unknown.append((acc, name))
+                else:
+                    raise ValueError("I don't understand type flag %s" % type)
+        elif keyword == "3D":
+            cols = value.split()
+            for id in cols:
+                record.pdb_structs.append(id.rstrip(";"))
+        elif keyword == "PR":
+            rules = value.split(";")
+            record.prorules.extend(rules)
+        elif keyword == "DO":
+            record.pdoc = value.rstrip(";")
+        elif keyword == "//":
+            if not record:
+                # Then this was the copyright statement
+                continue
+            break
+        else:
+            raise ValueError("Unknown keyword %s found" % keyword)
+    else:
+        return
+    if not record:
+        raise ValueError("Unexpected end of stream.")
+    return record
diff --git a/code/lib/Bio/ExPASy/ScanProsite.py b/code/lib/Bio/ExPASy/ScanProsite.py
new file mode 100644
index 0000000..3403703
--- /dev/null
+++ b/code/lib/Bio/ExPASy/ScanProsite.py
@@ -0,0 +1,145 @@
+# Copyright 2009 by Michiel de Hoon. All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Code for calling and parsing ScanProsite from ExPASy."""
+
+# Importing these functions with leading underscore as not intended for reuse
+from urllib.request import urlopen
+from urllib.parse import urlencode
+
+from xml.sax import handler
+from xml.sax.expatreader import ExpatParser
+
+
+class Record(list):
+    """Represents search results returned by ScanProsite.
+
+    This record is a list containing the search results returned by
+    ScanProsite. The record also contains the data members n_match,
+    n_seq, capped, and warning.
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.n_match = None
+        self.n_seq = None
+        self.capped = None
+        self.warning = None
+
+
+def scan(seq="", mirror="https://www.expasy.org", output="xml", **keywords):
+    """Execute a ScanProsite search.
+
+    Arguments:
+     - mirror:   The ScanProsite mirror to be used
+                 (default: https://www.expasy.org).
+     - seq:      The query sequence, or UniProtKB (Swiss-Prot,
+                 TrEMBL) accession
+     - output:   Format of the search results
+                 (default: xml)
+
+    Further search parameters can be passed as keywords; see the
+    documentation for programmatic access to ScanProsite at
+    https://www.expasy.org/tools/scanprosite/ScanPrositeREST.html
+    for a description of such parameters.
+
+    This function returns a handle to the search results returned by
+    ScanProsite. Search results in the XML format can be parsed into a
+    Python object, by using the Bio.ExPASy.ScanProsite.read function.
+
+    """
+    parameters = {"seq": seq, "output": output}
+    for key, value in keywords.items():
+        if value is not None:
+            parameters[key] = value
+    command = urlencode(parameters)
+    url = "%s/cgi-bin/prosite/PSScan.cgi?%s" % (mirror, command)
+    handle = urlopen(url)
+    return handle
+
+
+def read(handle):
+    """Parse search results returned by ScanProsite into a Python object."""
+    content_handler = ContentHandler()
+    saxparser = Parser()
+    saxparser.setContentHandler(content_handler)
+    saxparser.parse(handle)
+    record = content_handler.record
+    return record
+
+
+# The classes below are considered private
+
+
+class Parser(ExpatParser):
+    """Process the result from a ScanProsite search (PRIVATE)."""
+
+    def __init__(self):
+        """Initialize the class."""
+        ExpatParser.__init__(self)
+        self.firsttime = True
+
+    def feed(self, data, isFinal=0):
+        """Raise an Error if plain text is received in the data.
+
+        This is to show the Error messages returned by ScanProsite.
+        """
+        # Error messages returned by the ScanProsite server are formatted as
+        # as plain text instead of an XML document. To catch such error
+        # messages, we override the feed method of the Expat parser.
+        # The error message is (hopefully) contained in the data that was just
+        # fed to the parser.
+        if self.firsttime:
+            if data[:5].decode("utf-8") != "<?xml":
+                raise ValueError(data)
+        self.firsttime = False
+        return ExpatParser.feed(self, data, isFinal)
+
+
+class ContentHandler(handler.ContentHandler):
+    """Process and fill in the records, results of the search (PRIVATE)."""
+
+    integers = ("start", "stop")
+    strings = (
+        "sequence_ac",
+        "sequence_id",
+        "sequence_db",
+        "signature_ac",
+        "level",
+        "level_tag",
+    )
+
+    def __init__(self):
+        """Initialize the class."""
+        self.element = []
+
+    def startElement(self, name, attrs):
+        """Define the beginning of a record and stores the search record."""
+        self.element.append(name)
+        self.content = ""
+        if self.element == ["matchset"]:
+            self.record = Record()
+            self.record.n_match = int(attrs["n_match"])
+            self.record.n_seq = int(attrs["n_seq"])
+        elif self.element == ["matchset", "match"]:
+            match = {}
+            self.record.append(match)
+
+    def endElement(self, name):
+        """Define the end of the search record."""
+        assert name == self.element.pop()
+        if self.element == ["matchset", "match"]:
+            match = self.record[-1]
+            if name in ContentHandler.integers:
+                match[name] = int(self.content)
+            elif name in ContentHandler.strings:
+                match[name] = self.content
+            else:
+                # Unknown type, treat it as a string
+                match[name] = self.content
+
+    def characters(self, content):
+        """Store the record content."""
+        self.content += content
diff --git a/code/lib/Bio/ExPASy/__init__.py b/code/lib/Bio/ExPASy/__init__.py
new file mode 100644
index 0000000..09a5da5
--- /dev/null
+++ b/code/lib/Bio/ExPASy/__init__.py
@@ -0,0 +1,131 @@
+# Copyright 2000 by Jeffrey Chang.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Code to access resources at ExPASy over the WWW.
+
+See https://www.expasy.org/
+
+
+Functions:
+ - get_prodoc_entry  Interface to the get-prodoc-entry CGI script.
+ - get_prosite_entry Interface to the get-prosite-entry CGI script.
+ - get_prosite_raw   Interface to the get-prosite-raw CGI script.
+ - get_sprot_raw     Interface to the get-sprot-raw CGI script.
+
+"""
+
+import io
+from urllib.request import urlopen
+from urllib.error import HTTPError
+
+
+def get_prodoc_entry(
+    id, cgi="https://prosite.expasy.org/cgi-bin/prosite/get-prodoc-entry"
+):
+    """Get a text handle to a PRODOC entry at ExPASy in HTML format.
+
+    >>> from Bio import ExPASy
+    >>> import os
+    >>> with ExPASy.get_prodoc_entry('PDOC00001') as in_handle:
+    ...     html = in_handle.read()
+    ...
+    >>> with open("myprodocrecord.html", "w") as out_handle:
+    ...     length = out_handle.write(html)
+    ...
+    >>> os.remove("myprodocrecord.html")  # tidy up
+
+    For a non-existing key XXX, ExPASy returns an HTML-formatted page
+    containing this text: 'There is currently no PROSITE entry for'
+    """
+    return _open("%s?%s" % (cgi, id))
+
+
+def get_prosite_entry(
+    id, cgi="https://prosite.expasy.org/cgi-bin/prosite/get-prosite-entry"
+):
+    """Get a text handle to a PROSITE entry at ExPASy in HTML format.
+
+    >>> from Bio import ExPASy
+    >>> import os
+    >>> with ExPASy.get_prosite_entry('PS00001') as in_handle:
+    ...     html = in_handle.read()
+    ...
+    >>> with open("myprositerecord.html", "w") as out_handle:
+    ...     length = out_handle.write(html)
+    ...
+    >>> os.remove("myprositerecord.html")  # tidy up
+
+    For a non-existing key XXX, ExPASy returns an HTML-formatted page
+    containing this text: 'There is currently no PROSITE entry for'
+    """
+    return _open("%s?%s" % (cgi, id))
+
+
+def get_prosite_raw(id, cgi=None):
+    """Get a text handle to a raw PROSITE or PRODOC record at ExPASy.
+
+    The cgi argument is deprecated due to changes in the ExPASy
+    website.
+
+    >>> from Bio import ExPASy
+    >>> from Bio.ExPASy import Prosite
+    >>> with ExPASy.get_prosite_raw('PS00001') as handle:
+    ...     record = Prosite.read(handle)
+    ...
+    >>> print(record.accession)
+    PS00001
+
+    This function raises a ValueError if the identifier does not exist:
+
+    >>> handle = ExPASy.get_prosite_raw("DOES_NOT_EXIST")
+    Traceback (most recent call last):
+        ...
+    ValueError: Failed to find entry 'DOES_NOT_EXIST' on ExPASy
+
+    """
+    handle = _open("https://prosite.expasy.org/%s.txt" % id)
+    if handle.url == "https://www.expasy.org/":
+        raise ValueError("Failed to find entry '%s' on ExPASy" % id) from None
+    return handle
+
+
+def get_sprot_raw(id):
+    """Get a text handle to a raw SwissProt entry at ExPASy.
+
+    For an ID of XXX, fetches http://www.uniprot.org/uniprot/XXX.txt
+    (as per the https://www.expasy.org/expasy_urls.html documentation).
+
+    >>> from Bio import ExPASy
+    >>> from Bio import SwissProt
+    >>> with ExPASy.get_sprot_raw("O23729") as handle:
+    ...     record = SwissProt.read(handle)
+    ...
+    >>> print(record.entry_name)
+    CHS3_BROFI
+
+    This function raises a ValueError if the identifier does not exist:
+
+    >>> ExPASy.get_sprot_raw("DOES_NOT_EXIST")
+    Traceback (most recent call last):
+        ...
+    ValueError: Failed to find SwissProt entry 'DOES_NOT_EXIST'
+
+    """
+    try:
+        handle = _open("http://www.uniprot.org/uniprot/%s.txt" % id)
+    except HTTPError as exception:
+        if exception.code == 404:
+            raise ValueError("Failed to find SwissProt entry '%s'" % id) from None
+        else:
+            raise
+    return handle
+
+
+def _open(url):
+    """Open URL and convert to text assuming UTF-8 encoding (PRIVATE)."""
+    handle = urlopen(url)
+    text_handle = io.TextIOWrapper(handle, encoding="UTF-8")
+    text_handle.url = handle.url
+    return text_handle
diff --git a/code/lib/Bio/ExPASy/__pycache__/Enzyme.cpython-37.pyc b/code/lib/Bio/ExPASy/__pycache__/Enzyme.cpython-37.pyc
new file mode 100644
index 0000000..095f873
Binary files /dev/null and b/code/lib/Bio/ExPASy/__pycache__/Enzyme.cpython-37.pyc differ
diff --git a/code/lib/Bio/ExPASy/__pycache__/Prodoc.cpython-37.pyc b/code/lib/Bio/ExPASy/__pycache__/Prodoc.cpython-37.pyc
new file mode 100644
index 0000000..90a27a5
Binary files /dev/null and b/code/lib/Bio/ExPASy/__pycache__/Prodoc.cpython-37.pyc differ
diff --git a/code/lib/Bio/ExPASy/__pycache__/Prosite.cpython-37.pyc b/code/lib/Bio/ExPASy/__pycache__/Prosite.cpython-37.pyc
new file mode 100644
index 0000000..4390c2b
Binary files /dev/null and b/code/lib/Bio/ExPASy/__pycache__/Prosite.cpython-37.pyc differ
diff --git a/code/lib/Bio/ExPASy/__pycache__/ScanProsite.cpython-37.pyc b/code/lib/Bio/ExPASy/__pycache__/ScanProsite.cpython-37.pyc
new file mode 100644
index 0000000..3b41129
Binary files /dev/null and b/code/lib/Bio/ExPASy/__pycache__/ScanProsite.cpython-37.pyc differ
diff --git a/code/lib/Bio/ExPASy/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/ExPASy/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..1e75ef7
Binary files /dev/null and b/code/lib/Bio/ExPASy/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/ExPASy/__pycache__/cellosaurus.cpython-37.pyc b/code/lib/Bio/ExPASy/__pycache__/cellosaurus.cpython-37.pyc
new file mode 100644
index 0000000..6930c02
Binary files /dev/null and b/code/lib/Bio/ExPASy/__pycache__/cellosaurus.cpython-37.pyc differ
diff --git a/code/lib/Bio/ExPASy/cellosaurus.py b/code/lib/Bio/ExPASy/cellosaurus.py
new file mode 100644
index 0000000..8794cd3
--- /dev/null
+++ b/code/lib/Bio/ExPASy/cellosaurus.py
@@ -0,0 +1,188 @@
+# Copyright 2016 by Stephen Marshall.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Parser for the cellosaurus.txt file from ExPASy.
+
+See https://web.expasy.org/cellosaurus/
+
+Tested with the release of Version 18 (July 2016).
+
+Functions:
+ - read       Reads a file containing one cell line entry
+ - parse      Reads a file containing multiple cell line entries
+
+Classes:
+ - Record     Holds cell line data.
+
+Examples
+--------
+You need to download the Cellosaurus database for this examples to
+run, e.g. from ftp://ftp.expasy.org/databases/cellosaurus/cellosaurus.txt
+
+    >> from Bio.ExPASy import cellosaurus
+    >> with open('cellosaurus.txt') as handle:
+    ...    records = cellosaurus.parse(handle)
+    ...    for record in records:
+    ...        if 'Homo sapiens' in record['OX'][0]:
+    ...            print(record['ID'])
+    ...
+    #15310-LN
+    #W7079
+    (L)PC6
+    00136
+    ...
+
+"""
+
+
+def parse(handle):
+    """Parse cell line records.
+
+    This function is for parsing cell line files containing multiple
+    records.
+
+    Arguments:
+     - handle   - handle to the file.
+
+    """
+    while True:
+        record = __read(handle)
+        if not record:
+            break
+        yield record
+
+
+def read(handle):
+    """Read one cell line record.
+
+    This function is for parsing cell line files containing
+    exactly one record.
+
+    Arguments:
+     - handle   - handle to the file.
+
+    """
+    record = __read(handle)
+    # We should have reached the end of the record by now
+    remainder = handle.read()
+    if remainder:
+        raise ValueError("More than one cell line record found")
+    return record
+
+
+class Record(dict):
+    """Holds information from an ExPASy Cellosaurus record as a Python dictionary.
+
+    Each record contains the following keys:
+
+     ---------  ---------------------------     ----------------------
+     Line code  Content                         Occurrence in an entry
+     ---------  ---------------------------     ----------------------
+     ID         Identifier (cell line name)     Once; starts an entry
+     AC         Accession (CVCL_xxxx)           Once
+     AS         Secondary accession number(s)   Optional; once
+     SY         Synonyms                        Optional; once
+     DR         Cross-references                Optional; once or more
+     RX         References identifiers          Optional: once or more
+     WW         Web pages                       Optional; once or more
+     CC         Comments                        Optional; once or more
+     ST         STR profile data                Optional; once or more
+     DI         Diseases                        Optional; once or more
+     OX         Species of origin               Once or more
+     HI         Hierarchy                       Optional; once or more
+     OI         Originate from same individual  Optional; once or more
+     SX         Sex (gender) of cell            Optional; once
+     CA         Category                        Once
+     //         Terminator                      Once; ends an entry
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        dict.__init__(self)
+        self["ID"] = ""
+        self["AC"] = ""
+        self["AS"] = ""
+        self["SY"] = ""
+        self["DR"] = []
+        self["RX"] = []
+        self["WW"] = []
+        self["CC"] = []
+        self["ST"] = []
+        self["DI"] = []
+        self["OX"] = []
+        self["HI"] = []
+        self["OI"] = []
+        self["SX"] = ""
+        self["CA"] = ""
+
+    def __repr__(self):
+        if self["ID"]:
+            if self["AC"]:
+                return "%s (%s, %s)" % (self.__class__.__name__, self["ID"], self["AC"])
+            else:
+                return "%s (%s)" % (self.__class__.__name__, self["ID"])
+        else:
+            return "%s ( )" % (self.__class__.__name__)
+
+    def __str__(self):
+        output = "ID: " + self["ID"]
+        output += " AC: " + self["AC"]
+        output += " AS: " + self["AS"]
+        output += " SY: " + self["SY"]
+        output += " DR: " + repr(self["DR"])
+        output += " RX: " + repr(self["RX"])
+        output += " WW: " + repr(self["WW"])
+        output += " CC: " + repr(self["CC"])
+        output += " ST: " + repr(self["ST"])
+        output += " DI: " + repr(self["DI"])
+        output += " OX: " + repr(self["OX"])
+        output += " HI: " + repr(self["HI"])
+        output += " OI: " + repr(self["OI"])
+        output += " SX: " + self["SX"]
+        output += " CA: " + self["CA"]
+        return output
+
+
+# Everything below is private
+
+
+def __read(handle):
+    record = None
+
+    for line in handle:
+
+        key, value = line[:2], line[5:].rstrip()
+        if key == "ID":
+            record = Record()
+            record["ID"] = value
+        elif key in ["AC", "AS", "SY", "SX", "CA"]:
+            record[key] += value
+        elif key in [
+            "AC",
+            "AS",
+            "SY",
+            "RX",
+            "WW",
+            "CC",
+            "ST",
+            "DI",
+            "OX",
+            "HI",
+            "OI",
+            "SX",
+            "CA",
+        ]:
+            record[key].append(value)
+        elif key == "DR":
+            k, v = value.split(";")
+            record["DR"].append((k.strip(), v.strip()))
+        elif key == "//":
+            if record:
+                return record
+            else:
+                continue
+    if record:
+        raise ValueError("Unexpected end of stream")
diff --git a/code/lib/Bio/File.py b/code/lib/Bio/File.py
new file mode 100644
index 0000000..5edec51
--- /dev/null
+++ b/code/lib/Bio/File.py
@@ -0,0 +1,609 @@
+# Copyright 1999 by Jeffrey Chang.  All rights reserved.
+# Copyright 2009-2018 by Peter Cock. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Code for more fancy file handles.
+
+Bio.File defines private classes used in Bio.SeqIO and Bio.SearchIO for
+indexing files. These are not intended for direct use.
+"""
+
+import os
+import contextlib
+import itertools
+import collections.abc
+
+from abc import ABC, abstractmethod
+
+try:
+    import sqlite3
+except ImportError:
+    # May be missing if Python was compiled from source without its dependencies
+    sqlite3 = None
+
+
+@contextlib.contextmanager
+def as_handle(handleish, mode="r", **kwargs):
+    r"""Context manager to ensure we are using a handle.
+
+    Context manager for arguments that can be passed to SeqIO and AlignIO read, write,
+    and parse methods: either file objects or path-like objects (strings, pathlib.Path
+    instances, or more generally, anything that can be handled by the builtin 'open'
+    function).
+
+    When given a path-like object, returns an open file handle to that path, with provided
+    mode, which will be closed when the manager exits.
+
+    All other inputs are returned, and are *not* closed.
+
+    Arguments:
+     - handleish  - Either a file handle or path-like object (anything which can be
+                    passed to the builtin 'open' function, such as str, bytes,
+                    pathlib.Path, and os.DirEntry objects)
+     - mode       - Mode to open handleish (used only if handleish is a string)
+     - kwargs     - Further arguments to pass to open(...)
+
+    Examples
+    --------
+    >>> from Bio import File
+    >>> import os
+    >>> with File.as_handle('seqs.fasta', 'w') as fp:
+    ...     fp.write('>test\nACGT')
+    ...
+    10
+    >>> fp.closed
+    True
+
+    >>> handle = open('seqs.fasta', 'w')
+    >>> with File.as_handle(handle) as fp:
+    ...     fp.write('>test\nACGT')
+    ...
+    10
+    >>> fp.closed
+    False
+    >>> fp.close()
+    >>> os.remove("seqs.fasta")  # tidy up
+
+    """
+    try:
+        with open(handleish, mode, **kwargs) as fp:
+            yield fp
+    except TypeError:
+        yield handleish
+
+
+def _open_for_random_access(filename):
+    """Open a file in binary mode, spot if it is BGZF format etc (PRIVATE).
+
+    This functionality is used by the Bio.SeqIO and Bio.SearchIO index
+    and index_db functions.
+
+    If the file is gzipped but not BGZF, a specific ValueError is raised.
+    """
+    handle = open(filename, "rb")
+    magic = handle.read(2)
+    handle.seek(0)
+
+    if magic == b"\x1f\x8b":
+        # This is a gzipped file, but is it BGZF?
+        from . import bgzf
+
+        try:
+            # If it is BGZF, we support that
+            return bgzf.BgzfReader(mode="rb", fileobj=handle)
+        except ValueError as e:
+            assert "BGZF" in str(e)
+            # Not a BGZF file after all,
+            handle.close()
+            raise ValueError(
+                "Gzipped files are not suitable for indexing, "
+                "please use BGZF (blocked gzip format) instead."
+            ) from None
+
+    return handle
+
+
+# The rest of this file defines code used in Bio.SeqIO and Bio.SearchIO
+# for indexing
+
+
+class _IndexedSeqFileProxy(ABC):
+    """Abstract base class for file format specific random access (PRIVATE).
+
+    This is subclasses in both Bio.SeqIO for indexing as SeqRecord
+    objects, and in Bio.SearchIO for indexing QueryResult objects.
+
+    Subclasses for each file format should define '__iter__', 'get'
+    and optionally 'get_raw' methods.
+    """
+
+    @abstractmethod
+    def __iter__(self):
+        """Return (identifier, offset, length in bytes) tuples.
+
+        The length can be zero where it is not implemented or not
+        possible for a particular file format.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def get(self, offset):
+        """Return parsed object for this entry."""
+        # Most file formats with self contained records can be handled by
+        # parsing StringIO(self.get_raw(offset).decode())
+        raise NotImplementedError
+
+    def get_raw(self, offset):
+        """Return the raw record from the file as a bytes string (if implemented).
+
+        If the key is not found, a KeyError exception is raised.
+
+        This may not have been implemented for all file formats.
+        """
+        # Should be done by each sub-class (if possible)
+        raise NotImplementedError("Not available for this file format.")
+
+
+class _IndexedSeqFileDict(collections.abc.Mapping):
+    """Read only dictionary interface to a sequential record file.
+
+    This code is used in both Bio.SeqIO for indexing as SeqRecord
+    objects, and in Bio.SearchIO for indexing QueryResult objects.
+
+    Keeps the keys and associated file offsets in memory, reads the file
+    to access entries as objects parsing them on demand. This approach
+    is memory limited, but will work even with millions of records.
+
+    Note duplicate keys are not allowed. If this happens, a ValueError
+    exception is raised.
+
+    As used in Bio.SeqIO, by default the SeqRecord's id string is used
+    as the dictionary key. In Bio.SearchIO, the query's id string is
+    used. This can be changed by supplying an optional key_function,
+    a callback function which will be given the record id and must
+    return the desired key. For example, this allows you to parse
+    NCBI style FASTA identifiers, and extract the GI number to use
+    as the dictionary key.
+
+    Note that this dictionary is essentially read only. You cannot
+    add or change values, pop values, nor clear the dictionary.
+    """
+
+    def __init__(self, random_access_proxy, key_function, repr, obj_repr):
+        """Initialize the class."""
+        # Use key_function=None for default value
+        self._proxy = random_access_proxy
+        self._key_function = key_function
+        self._repr = repr
+        self._obj_repr = obj_repr
+        if key_function:
+            offset_iter = ((key_function(k), o, l) for (k, o, l) in random_access_proxy)
+        else:
+            offset_iter = random_access_proxy
+        offsets = {}
+        for key, offset, length in offset_iter:
+            # Note - we don't store the length because I want to minimise the
+            # memory requirements. With the SQLite backend the length is kept
+            # and is used to speed up the get_raw method (by about 3 times).
+            # The length should be provided by all the current backends except
+            # SFF where there is an existing Roche index we can reuse (very fast
+            # but lacks the record lengths)
+            # assert length or format in ["sff", "sff-trim"], \
+            #       "%s at offset %i given length %r (%s format %s)" \
+            #       % (key, offset, length, filename, format)
+            if key in offsets:
+                self._proxy._handle.close()
+                raise ValueError("Duplicate key '%s'" % key)
+            else:
+                offsets[key] = offset
+        self._offsets = offsets
+
+    def __repr__(self):
+        """Return a string representation of the File object."""
+        return self._repr
+
+    def __str__(self):
+        """Create a string representation of the File object."""
+        # TODO - How best to handle the __str__ for SeqIO and SearchIO?
+        if self:
+            return "{%r : %s(...), ...}" % (list(self.keys())[0], self._obj_repr)
+        else:
+            return "{}"
+
+    def __len__(self):
+        """Return the number of records."""
+        return len(self._offsets)
+
+    def __iter__(self):
+        """Iterate over the keys."""
+        return iter(self._offsets)
+
+    def __getitem__(self, key):
+        """Return record for the specified key."""
+        # Pass the offset to the proxy
+        record = self._proxy.get(self._offsets[key])
+        if self._key_function:
+            key2 = self._key_function(record.id)
+        else:
+            key2 = record.id
+        if key != key2:
+            raise ValueError("Key did not match (%s vs %s)" % (key, key2))
+        return record
+
+    def get_raw(self, key):
+        """Return the raw record from the file as a bytes string.
+
+        If the key is not found, a KeyError exception is raised.
+        """
+        # Pass the offset to the proxy
+        return self._proxy.get_raw(self._offsets[key])
+
+    def close(self):
+        """Close the file handle being used to read the data.
+
+        Once called, further use of the index won't work. The sole purpose
+        of this method is to allow explicit handle closure - for example
+        if you wish to delete the file, on Windows you must first close
+        all open handles to that file.
+        """
+        self._proxy._handle.close()
+
+
+class _SQLiteManySeqFilesDict(_IndexedSeqFileDict):
+    """Read only dictionary interface to many sequential record files.
+
+    This code is used in both Bio.SeqIO for indexing as SeqRecord
+    objects, and in Bio.SearchIO for indexing QueryResult objects.
+
+    Keeps the keys, file-numbers and offsets in an SQLite database. To access
+    a record by key, reads from the offset in the appropriate file and then
+    parses the record into an object.
+
+    There are OS limits on the number of files that can be open at once,
+    so a pool are kept. If a record is required from a closed file, then
+    one of the open handles is closed first.
+    """
+
+    def __init__(
+        self,
+        index_filename,
+        filenames,
+        proxy_factory,
+        fmt,
+        key_function,
+        repr,
+        max_open=10,
+    ):
+        """Initialize the class."""
+        # TODO? - Don't keep filename list in memory (just in DB)?
+        # Should save a chunk of memory if dealing with 1000s of files.
+        # Furthermore could compare a generator to the DB on reloading
+        # (no need to turn it into a list)
+
+        if sqlite3 is None:
+            # Python was compiled without sqlite3 support
+            from Bio import MissingPythonDependencyError
+
+            raise MissingPythonDependencyError(
+                "Python was compiled without the sqlite3 module"
+            )
+        if filenames is not None:
+            filenames = list(filenames)  # In case it was a generator
+
+        # Cache the arguments as private variables
+        self._index_filename = index_filename
+        self._filenames = filenames
+        self._format = fmt
+        self._key_function = key_function
+        self._proxy_factory = proxy_factory
+        self._repr = repr
+        self._max_open = max_open
+        self._proxies = {}
+
+        # Note if using SQLite :memory: trick index filename, this will
+        # give $PWD as the relative path (which is fine).
+        self._relative_path = os.path.abspath(os.path.dirname(index_filename))
+
+        if os.path.isfile(index_filename):
+            self._load_index()
+        else:
+            self._build_index()
+
+    def _load_index(self):
+        """Call from __init__ to re-use an existing index (PRIVATE)."""
+        index_filename = self._index_filename
+        relative_path = self._relative_path
+        filenames = self._filenames
+        fmt = self._format
+        proxy_factory = self._proxy_factory
+
+        con = sqlite3.dbapi2.connect(index_filename, check_same_thread=False)
+        self._con = con
+        # Check the count...
+        try:
+            (count,) = con.execute(
+                "SELECT value FROM meta_data WHERE key=?;", ("count",)
+            ).fetchone()
+            self._length = int(count)
+            if self._length == -1:
+                con.close()
+                raise ValueError("Unfinished/partial database") from None
+
+            # use MAX(_ROWID_) to obtain the number of sequences in the database
+            # using COUNT(key) is quite slow in SQLITE
+            # (https://stackoverflow.com/questions/8988915/sqlite-count-slow-on-big-tables)
+            (count,) = con.execute("SELECT MAX(_ROWID_) FROM offset_data;").fetchone()
+            if self._length != int(count):
+                con.close()
+                raise ValueError(
+                    "Corrupt database? %i entries not %i" % (int(count), self._length)
+                ) from None
+            (self._format,) = con.execute(
+                "SELECT value FROM meta_data WHERE key=?;", ("format",)
+            ).fetchone()
+            if fmt and fmt != self._format:
+                con.close()
+                raise ValueError(
+                    "Index file says format %s, not %s" % (self._format, fmt)
+                ) from None
+            try:
+                (filenames_relative_to_index,) = con.execute(
+                    "SELECT value FROM meta_data WHERE key=?;",
+                    ("filenames_relative_to_index",),
+                ).fetchone()
+                filenames_relative_to_index = (
+                    filenames_relative_to_index.upper() == "TRUE"
+                )
+            except TypeError:
+                # Original behaviour, assume if meta_data missing
+                filenames_relative_to_index = False
+            self._filenames = [
+                row[0]
+                for row in con.execute(
+                    "SELECT name FROM file_data ORDER BY file_number;"
+                ).fetchall()
+            ]
+            if filenames_relative_to_index:
+                # Not implicitly relative to $PWD, explicitly relative to index file
+                relative_path = os.path.abspath(os.path.dirname(index_filename))
+                tmp = []
+                for f in self._filenames:
+                    if os.path.isabs(f):
+                        tmp.append(f)
+                    else:
+                        # Would be stored with Unix / path separator, so convert
+                        # it to the local OS path separator here:
+                        tmp.append(
+                            os.path.join(relative_path, f.replace("/", os.path.sep))
+                        )
+                self._filenames = tmp
+                del tmp
+            if filenames and len(filenames) != len(self._filenames):
+                con.close()
+                raise ValueError(
+                    "Index file says %i files, not %i"
+                    % (len(self._filenames), len(filenames))
+                ) from None
+            if filenames and filenames != self._filenames:
+                for old, new in zip(self._filenames, filenames):
+                    # Want exact match (after making relative to the index above)
+                    if os.path.abspath(old) != os.path.abspath(new):
+                        con.close()
+                        if filenames_relative_to_index:
+                            raise ValueError(
+                                "Index file has different filenames, e.g. %r != %r"
+                                % (os.path.abspath(old), os.path.abspath(new))
+                            ) from None
+                        else:
+                            raise ValueError(
+                                "Index file has different filenames "
+                                "[This is an old index where any relative paths "
+                                "were relative to the original working directory]. "
+                                "e.g. %r != %r"
+                                % (os.path.abspath(old), os.path.abspath(new))
+                            ) from None
+                # Filenames are equal (after imposing abspath)
+        except sqlite3.OperationalError as err:
+            con.close()
+            raise ValueError("Not a Biopython index database? %s" % err) from None
+        # Now we have the format (from the DB if not given to us),
+        if not proxy_factory(self._format):
+            con.close()
+            raise ValueError("Unsupported format '%s'" % self._format)
+
+    def _build_index(self):
+        """Call from __init__ to create a new index (PRIVATE)."""
+        index_filename = self._index_filename
+        relative_path = self._relative_path
+        filenames = self._filenames
+        fmt = self._format
+        key_function = self._key_function
+        proxy_factory = self._proxy_factory
+        max_open = self._max_open
+        random_access_proxies = self._proxies
+
+        if not fmt or not filenames:
+            raise ValueError(
+                "Filenames to index and format required to build %r" % index_filename
+            )
+        if not proxy_factory(fmt):
+            raise ValueError("Unsupported format '%s'" % fmt)
+        # Create the index
+        con = sqlite3.dbapi2.connect(index_filename)
+        self._con = con
+        # print("Creating index")
+        # Sqlite PRAGMA settings for speed
+        con.execute("PRAGMA synchronous=OFF")
+        con.execute("PRAGMA locking_mode=EXCLUSIVE")
+        # Don't index the key column until the end (faster)
+        # con.execute("CREATE TABLE offset_data (key TEXT PRIMARY KEY, "
+        #             "offset INTEGER);")
+        con.execute("CREATE TABLE meta_data (key TEXT, value TEXT);")
+        con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", ("count", -1))
+        con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", ("format", fmt))
+        con.execute(
+            "INSERT INTO meta_data (key, value) VALUES (?,?);",
+            ("filenames_relative_to_index", "True"),
+        )
+        # TODO - Record the file size and modified date?
+        con.execute("CREATE TABLE file_data (file_number INTEGER, name TEXT);")
+        con.execute(
+            "CREATE TABLE offset_data (key TEXT, "
+            "file_number INTEGER, offset INTEGER, length INTEGER);"
+        )
+        count = 0
+        for i, filename in enumerate(filenames):
+            # Default to storing as an absolute path,
+            f = os.path.abspath(filename)
+            if not os.path.isabs(filename) and not os.path.isabs(index_filename):
+                # Since user gave BOTH filename & index as relative paths,
+                # we will store this relative to the index file even though
+                # if it may now start ../ (meaning up a level)
+                # Note for cross platform use (e.g. shared drive over SAMBA),
+                # convert any Windows slash into Unix style for rel paths.
+                f = os.path.relpath(filename, relative_path).replace(os.path.sep, "/")
+            elif (os.path.dirname(os.path.abspath(filename)) + os.path.sep).startswith(
+                relative_path + os.path.sep
+            ):
+                # Since sequence file is in same directory or sub directory,
+                # might as well make this into a relative path:
+                f = os.path.relpath(filename, relative_path).replace(os.path.sep, "/")
+                assert not f.startswith("../"), f
+            # print("DEBUG - storing %r as [%r] %r" % (filename, relative_path, f))
+            con.execute(
+                "INSERT INTO file_data (file_number, name) VALUES (?,?);", (i, f)
+            )
+            random_access_proxy = proxy_factory(fmt, filename)
+            if key_function:
+                offset_iter = (
+                    (key_function(k), i, o, l) for (k, o, l) in random_access_proxy
+                )
+            else:
+                offset_iter = ((k, i, o, l) for (k, o, l) in random_access_proxy)
+            while True:
+                batch = list(itertools.islice(offset_iter, 100))
+                if not batch:
+                    break
+                # print("Inserting batch of %i offsets, %s ... %s"
+                #       % (len(batch), batch[0][0], batch[-1][0]))
+                con.executemany(
+                    "INSERT INTO offset_data (key,file_number,offset,length) VALUES (?,?,?,?);",
+                    batch,
+                )
+                con.commit()
+                count += len(batch)
+            if len(random_access_proxies) < max_open:
+                random_access_proxies[i] = random_access_proxy
+            else:
+                random_access_proxy._handle.close()
+        self._length = count
+        # print("About to index %i entries" % count)
+        try:
+            con.execute(
+                "CREATE UNIQUE INDEX IF NOT EXISTS key_index ON offset_data(key);"
+            )
+        except sqlite3.IntegrityError as err:
+            self._proxies = random_access_proxies
+            self.close()
+            con.close()
+            raise ValueError("Duplicate key? %s" % err) from None
+        con.execute("PRAGMA locking_mode=NORMAL")
+        con.execute("UPDATE meta_data SET value = ? WHERE key = ?;", (count, "count"))
+        con.commit()
+        # print("Index created")
+
+    def __repr__(self):
+        return self._repr
+
+    def __contains__(self, key):
+        return bool(
+            self._con.execute(
+                "SELECT key FROM offset_data WHERE key=?;", (key,)
+            ).fetchone()
+        )
+
+    def __len__(self):
+        """Return the number of records indexed."""
+        return self._length
+        # return self._con.execute("SELECT COUNT(key) FROM offset_data;").fetchone()[0]
+
+    def __iter__(self):
+        """Iterate over the keys."""
+        for row in self._con.execute(
+            "SELECT key FROM offset_data ORDER BY file_number, offset;"
+        ):
+            yield str(row[0])
+
+    def __getitem__(self, key):
+        """Return record for the specified key."""
+        # Pass the offset to the proxy
+        row = self._con.execute(
+            "SELECT file_number, offset FROM offset_data WHERE key=?;", (key,)
+        ).fetchone()
+        if not row:
+            raise KeyError
+        file_number, offset = row
+        proxies = self._proxies
+        if file_number in proxies:
+            record = proxies[file_number].get(offset)
+        else:
+            if len(proxies) >= self._max_open:
+                # Close an old handle...
+                proxies.popitem()[1]._handle.close()
+            # Open a new handle...
+            proxy = self._proxy_factory(self._format, self._filenames[file_number])
+            record = proxy.get(offset)
+            proxies[file_number] = proxy
+        if self._key_function:
+            key2 = self._key_function(record.id)
+        else:
+            key2 = record.id
+        if key != key2:
+            raise ValueError("Key did not match (%s vs %s)" % (key, key2))
+        return record
+
+    def get_raw(self, key):
+        """Return the raw record from the file as a bytes string.
+
+        If the key is not found, a KeyError exception is raised.
+        """
+        # Pass the offset to the proxy
+        row = self._con.execute(
+            "SELECT file_number, offset, length FROM offset_data WHERE key=?;", (key,)
+        ).fetchone()
+        if not row:
+            raise KeyError
+        file_number, offset, length = row
+        proxies = self._proxies
+        if file_number in proxies:
+            if length:
+                # Shortcut if we have the length
+                h = proxies[file_number]._handle
+                h.seek(offset)
+                return h.read(length)
+            else:
+                return proxies[file_number].get_raw(offset)
+        else:
+            # This code is duplicated from __getitem__ to avoid a function call
+            if len(proxies) >= self._max_open:
+                # Close an old handle...
+                proxies.popitem()[1]._handle.close()
+            # Open a new handle...
+            proxy = self._proxy_factory(self._format, self._filenames[file_number])
+            proxies[file_number] = proxy
+            if length:
+                # Shortcut if we have the length
+                h = proxy._handle
+                h.seek(offset)
+                return h.read(length)
+            else:
+                return proxy.get_raw(offset)
+
+    def close(self):
+        """Close any open file handles."""
+        proxies = self._proxies
+        while proxies:
+            proxies.popitem()[1]._handle.close()
diff --git a/code/lib/Bio/GenBank/Record.py b/code/lib/Bio/GenBank/Record.py
new file mode 100644
index 0000000..268efa8
--- /dev/null
+++ b/code/lib/Bio/GenBank/Record.py
@@ -0,0 +1,669 @@
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
+"""Hold GenBank data in a straightforward format.
+
+Classes:
+ - Record - All of the information in a GenBank record.
+ - Reference - hold reference data for a record.
+ - Feature - Hold the information in a Feature Table.
+ - Qualifier - Qualifiers on a Feature.
+
+"""
+
+import Bio.GenBank
+
+
+def _wrapped_genbank(information, indent, wrap_space=1, split_char=" "):
+    """Write a line of GenBank info that can wrap over multiple lines (PRIVATE).
+
+    This takes a line of information which can potentially wrap over
+    multiple lines, and breaks it up with carriage returns and
+    indentation so it fits properly into a GenBank record.
+
+    Arguments:
+     - information - The string holding the information we want
+       wrapped in GenBank method.
+     - indent - The indentation on the lines we are writing.
+     - wrap_space - Whether or not to wrap only on spaces in the
+       information.
+     - split_char - A specific character to split the lines on. By default
+       spaces are used.
+
+    """
+    info_length = Record.GB_LINE_LENGTH - indent
+
+    if not information:
+        # GenBank files use "." for missing data
+        return ".\n"
+
+    if wrap_space:
+        info_parts = information.split(split_char)
+    else:
+        cur_pos = 0
+        info_parts = []
+        while cur_pos < len(information):
+            info_parts.append(information[cur_pos : cur_pos + info_length])
+            cur_pos += info_length
+
+    # first get the information string split up by line
+    output_parts = []
+    cur_part = ""
+    for info_part in info_parts:
+        if len(cur_part) + 1 + len(info_part) > info_length:
+            if cur_part:
+                if split_char != " ":
+                    cur_part += split_char
+                output_parts.append(cur_part)
+            cur_part = info_part
+        else:
+            if cur_part == "":
+                cur_part = info_part
+            else:
+                cur_part += split_char + info_part
+
+    # add the last bit of information to the output
+    if cur_part:
+        output_parts.append(cur_part)
+
+    # now format the information string for return
+    output_info = output_parts[0] + "\n"
+    for output_part in output_parts[1:]:
+        output_info += " " * indent + output_part + "\n"
+
+    return output_info
+
+
+def _indent_genbank(information, indent):
+    """Write out information with the specified indent (PRIVATE).
+
+    Unlike _wrapped_genbank, this function makes no attempt to wrap
+    lines -- it assumes that the information already has newlines in the
+    appropriate places, and will add the specified indent to the start of
+    each line.
+    """
+    # split the info into lines based on line breaks
+    info_parts = information.split("\n")
+
+    # the first line will have no indent
+    output_info = info_parts[0] + "\n"
+    for info_part in info_parts[1:]:
+        output_info += " " * indent + info_part + "\n"
+
+    return output_info
+
+
+class Record:
+    """Hold GenBank information in a format similar to the original record.
+
+    The Record class is meant to make data easy to get to when you are
+    just interested in looking at GenBank data.
+
+    Attributes:
+     - locus - The name specified after the LOCUS keyword in the GenBank
+       record. This may be the accession number, or a clone id or something else.
+     - size - The size of the record.
+     - residue_type - The type of residues making up the sequence in this
+       record. Normally something like RNA, DNA or PROTEIN, but may be as
+       esoteric as 'ss-RNA circular'.
+     - data_file_division - The division this record is stored under in
+       GenBank (ie. PLN -> plants; PRI -> humans, primates; BCT -> bacteria...)
+     - date - The date of submission of the record, in a form like '28-JUL-1998'
+     - accession - list of all accession numbers for the sequence.
+     - nid - Nucleotide identifier number.
+     - pid - Proteint identifier number
+     - version - The accession number + version (ie. AB01234.2)
+     - db_source - Information about the database the record came from
+     - gi - The NCBI gi identifier for the record.
+     - keywords - A list of keywords related to the record.
+     - segment - If the record is one of a series, this is info about which
+       segment this record is (something like '1 of 6').
+     - source - The source of material where the sequence came from.
+     - organism - The genus and species of the organism (ie. 'Homo sapiens')
+     - taxonomy - A listing of the taxonomic classification of the organism,
+       starting general and getting more specific.
+     - references - A list of Reference objects.
+     - comment - Text with any kind of comment about the record.
+     - features - A listing of Features making up the feature table.
+     - base_counts - A string with the counts of bases for the sequence.
+     - origin - A string specifying info about the origin of the sequence.
+     - sequence - A string with the sequence itself.
+     - contig - A string of location information for a CONTIG in a RefSeq file
+     - project - The genome sequencing project numbers
+       (will be replaced by the dblink cross-references in 2009).
+     - dblinks - The genome sequencing project number(s) and other links.
+       (will replace the project information in 2009).
+
+    """
+
+    # constants for outputting GenBank information
+    GB_LINE_LENGTH = 79
+    GB_BASE_INDENT = 12
+    GB_FEATURE_INDENT = 21
+    GB_INTERNAL_INDENT = 2
+    GB_OTHER_INTERNAL_INDENT = 3
+    GB_FEATURE_INTERNAL_INDENT = 5
+    GB_SEQUENCE_INDENT = 9
+
+    BASE_FORMAT = "%-" + str(GB_BASE_INDENT) + "s"
+    INTERNAL_FORMAT = (
+        " " * GB_INTERNAL_INDENT + "%-" + str(GB_BASE_INDENT - GB_INTERNAL_INDENT) + "s"
+    )
+    OTHER_INTERNAL_FORMAT = (
+        " " * GB_OTHER_INTERNAL_INDENT
+        + "%-"
+        + str(GB_BASE_INDENT - GB_OTHER_INTERNAL_INDENT)
+        + "s"
+    )
+
+    BASE_FEATURE_FORMAT = "%-" + str(GB_FEATURE_INDENT) + "s"
+    INTERNAL_FEATURE_FORMAT = (
+        " " * GB_FEATURE_INTERNAL_INDENT
+        + "%-"
+        + str(GB_FEATURE_INDENT - GB_FEATURE_INTERNAL_INDENT)
+        + "s"
+    )
+    SEQUENCE_FORMAT = "%" + str(GB_SEQUENCE_INDENT) + "s"
+
+    def __init__(self):
+        """Initialize the class."""
+        self.accession = []
+        self.base_counts = ""
+        self.comment = ""
+        self.contig = ""
+        self.data_file_division = ""
+        self.date = ""
+        self.db_source = ""
+        self.dblinks = []
+        self.definition = ""
+        self.features = []
+        self.gi = ""
+        self.keywords = []
+        self.locus = ""
+        self.molecule_type = ""
+        self.nid = ""
+        self.organism = ""
+        self.origin = ""
+        self.pid = ""
+        self.primary = []
+        self.projects = []
+        self.references = []
+        self.residue_type = ""
+        self.segment = ""
+        self.sequence = ""
+        self.size = ""
+        self.source = ""
+        self.taxonomy = []
+        self.topology = ""
+        self.version = ""
+        self.wgs = ""
+        self.wgs_scafld = []
+
+    def __str__(self):
+        """Provide a GenBank formatted output option for a Record.
+
+        The objective of this is to provide an easy way to read in a GenBank
+        record, modify it somehow, and then output it in 'GenBank format.'
+        We are striving to make this work so that a parsed Record that is
+        output using this function will look exactly like the original
+        record.
+
+        Much of the output is based on format description info at:
+
+        ftp://ncbi.nlm.nih.gov/genbank/gbrel.txt
+        """
+        output = self._locus_line()
+        output += self._definition_line()
+        output += self._accession_line()
+        output += self._version_line()
+        output += self._project_line()
+        output += self._dblink_line()
+        output += self._nid_line()
+        output += self._pid_line()
+        output += self._keywords_line()
+        output += self._db_source_line()
+        output += self._segment_line()
+        output += self._source_line()
+        output += self._organism_line()
+        for reference in self.references:
+            output += str(reference)
+        output += self._comment_line()
+        output += self._features_line()
+        for feature in self.features:
+            output += str(feature)
+        output += self._base_count_line()
+        output += self._origin_line()
+        output += self._sequence_line()
+        output += self._wgs_line()
+        output += self._wgs_scafld_line()
+        output += self._contig_line()
+        output += "//"
+        return output
+
+    def _locus_line(self):
+        """Provide the output string for the LOCUS line (PRIVATE)."""
+        output = "LOCUS"
+        output += " " * 7  # 6-12 spaces
+        output += "%-9s" % self.locus
+        output += " "  # 22 space
+        output += "%7s" % self.size
+        if "PROTEIN" in self.residue_type:
+            output += " aa"
+        else:
+            output += " bp "
+
+        # treat circular types differently, since they'll have long residue
+        # types
+        if "circular" in self.residue_type:
+            output += "%17s" % self.residue_type
+        # second case: ss-DNA types of records
+        elif "-" in self.residue_type:
+            output += "%7s" % self.residue_type
+            output += " " * 10  # spaces for circular
+        else:
+            output += " " * 3  # spaces for stuff like ss-
+            output += "%-4s" % self.residue_type
+            output += " " * 10  # spaces for circular
+
+        output += " " * 2
+        output += "%3s" % self.data_file_division
+        output += " " * 7  # spaces for 56-63
+        output += "%11s" % self.date
+        output += "\n"
+        return output
+
+    def _definition_line(self):
+        """Provide output for the DEFINITION line (PRIVATE)."""
+        output = Record.BASE_FORMAT % "DEFINITION"
+        output += _wrapped_genbank(self.definition + ".", Record.GB_BASE_INDENT)
+        return output
+
+    def _accession_line(self):
+        """Output for the ACCESSION line (PRIVATE)."""
+        if self.accession:
+            output = Record.BASE_FORMAT % "ACCESSION"
+
+            acc_info = ""
+            for accession in self.accession:
+                acc_info += "%s " % accession
+            # strip off an extra space at the end
+            acc_info = acc_info.rstrip()
+            output += _wrapped_genbank(acc_info, Record.GB_BASE_INDENT)
+        else:
+            output = ""
+
+        return output
+
+    def _version_line(self):
+        """Output for the VERSION line (PRIVATE)."""
+        if self.version:
+            output = Record.BASE_FORMAT % "VERSION"
+            output += self.version
+            output += "  GI:"
+            output += "%s\n" % self.gi
+        else:
+            output = ""
+        return output
+
+    def _project_line(self):
+        output = ""
+        if len(self.projects) > 0:
+            output = Record.BASE_FORMAT % "PROJECT"
+            output += "%s\n" % "  ".join(self.projects)
+        return output
+
+    def _dblink_line(self):
+        output = ""
+        if len(self.dblinks) > 0:
+            output = Record.BASE_FORMAT % "DBLINK"
+            dblink_info = "\n".join(self.dblinks)
+            output += _wrapped_genbank(dblink_info, Record.GB_BASE_INDENT)
+        return output
+
+    def _nid_line(self):
+        """Output for the NID line. Use of NID is obsolete in GenBank files (PRIVATE)."""
+        if self.nid:
+            output = Record.BASE_FORMAT % "NID"
+            output += "%s\n" % self.nid
+        else:
+            output = ""
+        return output
+
+    def _pid_line(self):
+        """Output for PID line. Presumedly, PID usage is also obsolete (PRIVATE)."""
+        if self.pid:
+            output = Record.BASE_FORMAT % "PID"
+            output += "%s\n" % self.pid
+        else:
+            output = ""
+        return output
+
+    def _keywords_line(self):
+        """Output for the KEYWORDS line (PRIVATE)."""
+        output = ""
+        if self.keywords:
+            output += Record.BASE_FORMAT % "KEYWORDS"
+            keyword_info = ""
+            for keyword in self.keywords:
+                keyword_info += "%s; " % keyword
+            # replace the ; at the end with a period
+            keyword_info = keyword_info[:-2]
+            keyword_info += "."
+
+            output += _wrapped_genbank(keyword_info, Record.GB_BASE_INDENT)
+
+        return output
+
+    def _db_source_line(self):
+        """Output for DBSOURCE line (PRIVATE)."""
+        if self.db_source:
+            output = Record.BASE_FORMAT % "DBSOURCE"
+            output += "%s\n" % self.db_source
+        else:
+            output = ""
+        return output
+
+    def _segment_line(self):
+        """Output for the SEGMENT line (PRIVATE)."""
+        output = ""
+        if self.segment:
+            output += Record.BASE_FORMAT % "SEGMENT"
+            output += _wrapped_genbank(self.segment, Record.GB_BASE_INDENT)
+        return output
+
+    def _source_line(self):
+        """Output for SOURCE line on where the sample came from (PRIVATE)."""
+        output = Record.BASE_FORMAT % "SOURCE"
+        output += _wrapped_genbank(self.source, Record.GB_BASE_INDENT)
+        return output
+
+    def _organism_line(self):
+        """Output for ORGANISM line with taxonomy info (PRIVATE)."""
+        output = Record.INTERNAL_FORMAT % "ORGANISM"
+        # Now that species names can be too long, this line can wrap (Bug 2591)
+        output += _wrapped_genbank(self.organism, Record.GB_BASE_INDENT)
+        output += " " * Record.GB_BASE_INDENT
+        taxonomy_info = ""
+        for tax in self.taxonomy:
+            taxonomy_info += "%s; " % tax
+        # replace the ; at the end with a period
+        taxonomy_info = taxonomy_info[:-2]
+        taxonomy_info += "."
+        output += _wrapped_genbank(taxonomy_info, Record.GB_BASE_INDENT)
+
+        return output
+
+    def _comment_line(self):
+        """Output for the COMMENT lines (PRIVATE)."""
+        output = ""
+        if self.comment:
+            output += Record.BASE_FORMAT % "COMMENT"
+            output += _indent_genbank(self.comment, Record.GB_BASE_INDENT)
+        return output
+
+    def _features_line(self):
+        """Output for the FEATURES line (PRIVATE)."""
+        output = ""
+        if len(self.features) > 0:
+            output += Record.BASE_FEATURE_FORMAT % "FEATURES"
+            output += "Location/Qualifiers\n"
+        return output
+
+    def _base_count_line(self):
+        """Output for the BASE COUNT line with base information (PRIVATE)."""
+        output = ""
+        if self.base_counts:
+            output += Record.BASE_FORMAT % "BASE COUNT  "
+            # split up the base counts into their individual parts
+            count_parts = self.base_counts.split(" ")
+            while "" in count_parts:
+                count_parts.remove("")
+            # deal with the standard case, with a normal origin line
+            # like: 474 a    356 c    428 g    364 t
+            if len(count_parts) % 2 == 0:
+                while len(count_parts) > 0:
+                    count_info = count_parts.pop(0)
+                    count_type = count_parts.pop(0)
+
+                    output += "%7s %s" % (count_info, count_type)
+            # deal with ugly ORIGIN lines like:
+            # 1311257 a2224835 c2190093 g1309889 t
+            # by just outputting the raw information
+            else:
+                output += self.base_counts
+            output += "\n"
+        return output
+
+    def _origin_line(self):
+        """Output for the ORIGIN line (PRIVATE)."""
+        output = ""
+        # only output the ORIGIN line if we have a sequence
+        if self.sequence:
+            output += Record.BASE_FORMAT % "ORIGIN"
+            if self.origin:
+                output += _wrapped_genbank(self.origin, Record.GB_BASE_INDENT)
+            else:
+                output += "\n"
+        return output
+
+    def _sequence_line(self):
+        """Output for all of the sequence (PRIVATE)."""
+        output = ""
+        if self.sequence:
+            cur_seq_pos = 0
+            while cur_seq_pos < len(self.sequence):
+                output += Record.SEQUENCE_FORMAT % str(cur_seq_pos + 1)
+
+                for section in range(6):
+                    start_pos = cur_seq_pos + section * 10
+                    end_pos = start_pos + 10
+                    seq_section = self.sequence[start_pos:end_pos]
+                    output += " %s" % seq_section.lower()
+
+                    # stop looping if we are out of sequence
+                    if end_pos > len(self.sequence):
+                        break
+
+                output += "\n"
+                cur_seq_pos += 60
+        return output
+
+    def _wgs_line(self):
+        output = ""
+        if self.wgs:
+            output += Record.BASE_FORMAT % "WGS"
+            output += self.wgs
+        return output
+
+    def _wgs_scafld_line(self):
+        output = ""
+        if self.wgs_scafld:
+            output += Record.BASE_FORMAT % "WGS_SCAFLD"
+            output += self.wgs_scafld
+        return output
+
+    def _contig_line(self):
+        """Output for CONTIG location information from RefSeq (PRIVATE)."""
+        output = ""
+        if self.contig:
+            output += Record.BASE_FORMAT % "CONTIG"
+            output += _wrapped_genbank(
+                self.contig, Record.GB_BASE_INDENT, split_char=","
+            )
+        return output
+
+
+class Reference:
+    """Hold information from a GenBank reference.
+
+    Attributes:
+     - number - The number of the reference in the listing of references.
+     - bases - The bases in the sequence the reference refers to.
+     - authors - String with all of the authors.
+     - consrtm - Consortium the authors belong to.
+     - title - The title of the reference.
+     - journal - Information about the journal where the reference appeared.
+     - medline_id - The medline id for the reference.
+     - pubmed_id - The pubmed_id for the reference.
+     - remark - Free-form remarks about the reference.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.number = ""
+        self.bases = ""
+        self.authors = ""
+        self.consrtm = ""
+        self.title = ""
+        self.journal = ""
+        self.medline_id = ""
+        self.pubmed_id = ""
+        self.remark = ""
+
+    def __str__(self):
+        """Convert the reference to a GenBank format string."""
+        output = self._reference_line()
+        output += self._authors_line()
+        output += self._consrtm_line()
+        output += self._title_line()
+        output += self._journal_line()
+        output += self._medline_line()
+        output += self._pubmed_line()
+        output += self._remark_line()
+
+        return output
+
+    def _reference_line(self):
+        """Output for REFERENCE lines (PRIVATE)."""
+        output = Record.BASE_FORMAT % "REFERENCE"
+        if self.number:
+            if self.bases:
+                output += "%-3s" % self.number
+                output += "%s" % self.bases
+            else:
+                output += "%s" % self.number
+
+        output += "\n"
+        return output
+
+    def _authors_line(self):
+        """Output for AUTHORS information (PRIVATE)."""
+        output = ""
+        if self.authors:
+            output += Record.INTERNAL_FORMAT % "AUTHORS"
+            output += _wrapped_genbank(self.authors, Record.GB_BASE_INDENT)
+        return output
+
+    def _consrtm_line(self):
+        """Output for CONSRTM information (PRIVATE)."""
+        output = ""
+        if self.consrtm:
+            output += Record.INTERNAL_FORMAT % "CONSRTM"
+            output += _wrapped_genbank(self.consrtm, Record.GB_BASE_INDENT)
+        return output
+
+    def _title_line(self):
+        """Output for TITLE information (PRIVATE)."""
+        output = ""
+        if self.title:
+            output += Record.INTERNAL_FORMAT % "TITLE"
+            output += _wrapped_genbank(self.title, Record.GB_BASE_INDENT)
+        return output
+
+    def _journal_line(self):
+        """Output for JOURNAL information (PRIVATE)."""
+        output = ""
+        if self.journal:
+            output += Record.INTERNAL_FORMAT % "JOURNAL"
+            output += _wrapped_genbank(self.journal, Record.GB_BASE_INDENT)
+        return output
+
+    def _medline_line(self):
+        """Output for MEDLINE information (PRIVATE)."""
+        output = ""
+        if self.medline_id:
+            output += Record.INTERNAL_FORMAT % "MEDLINE"
+            output += self.medline_id + "\n"
+        return output
+
+    def _pubmed_line(self):
+        """Output for PUBMED information (PRIVATE)."""
+        output = ""
+        if self.pubmed_id:
+            output += Record.OTHER_INTERNAL_FORMAT % "PUBMED"
+            output += self.pubmed_id + "\n"
+        return output
+
+    def _remark_line(self):
+        """Output for REMARK information (PRIVATE)."""
+        output = ""
+        if self.remark:
+            output += Record.INTERNAL_FORMAT % "REMARK"
+            output += _wrapped_genbank(self.remark, Record.GB_BASE_INDENT)
+        return output
+
+
+class Feature:
+    """Hold information about a Feature in the Feature Table of GenBank record.
+
+    Attributes:
+     - key - The key name of the featue (ie. source)
+     - location - The string specifying the location of the feature.
+     - qualfiers - A list of Qualifier objects in the feature.
+
+    """
+
+    def __init__(self, key="", location=""):
+        """Initialize the class."""
+        self.key = key
+        self.location = location
+        self.qualifiers = []
+
+    def __repr__(self):
+        """Representation of the object for debugging or logging."""
+        return "Feature(key=%r, location=%r)" % (self.key, self.location)
+
+    def __str__(self):
+        """Return feature as a GenBank format string."""
+        output = Record.INTERNAL_FEATURE_FORMAT % self.key
+        output += _wrapped_genbank(
+            self.location, Record.GB_FEATURE_INDENT, split_char=","
+        )
+        for qualifier in self.qualifiers:
+            output += str(qualifier)
+        return output
+
+
+class Qualifier:
+    """Hold information about a qualifier in a GenBank feature.
+
+    Attributes:
+     - key - The key name of the qualifier (ie. /organism=)
+     - value - The value of the qualifier ("Dictyostelium discoideum").
+
+    """
+
+    def __init__(self, key="", value=""):
+        """Initialize the class."""
+        self.key = key
+        self.value = value
+
+    def __repr__(self):
+        """Representation of the object for debugging or logging."""
+        return "Qualifier(key=%r, value=%r)" % (self.key, self.value)
+
+    def __str__(self):
+        """Return feature qualifier as a GenBank format string."""
+        output = " " * Record.GB_FEATURE_INDENT
+        # determine whether we can wrap on spaces
+        space_wrap = 1
+        for no_space_key in Bio.GenBank._BaseGenBankConsumer.remove_space_keys:
+            if no_space_key in self.key:
+                space_wrap = 0
+        # return double quotes as-is, leave it to the user to escape them
+        return output + _wrapped_genbank(
+            self.key + self.value, Record.GB_FEATURE_INDENT, space_wrap
+        )
diff --git a/code/lib/Bio/GenBank/Scanner.py b/code/lib/Bio/GenBank/Scanner.py
new file mode 100644
index 0000000..2d94b4c
--- /dev/null
+++ b/code/lib/Bio/GenBank/Scanner.py
@@ -0,0 +1,1904 @@
+# Copyright 2007-2017 by Peter Cock.  All rights reserved.
+# Revisions copyright 2010 by Uri Laserson.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""Internal code for parsing GenBank and EMBL files (PRIVATE).
+
+This code is NOT intended for direct use.  It provides a basic scanner
+(for use with a event consumer such as Bio.GenBank._FeatureConsumer)
+to parse a GenBank or EMBL file (with their shared INSDC feature table).
+
+It is used by Bio.GenBank to parse GenBank files
+It is also used by Bio.SeqIO to parse GenBank and EMBL files
+
+Feature Table Documentation:
+
+- http://www.insdc.org/files/feature_table.html
+- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
+- ftp://ftp.ncbi.nih.gov/genbank/docs/
+"""
+# 17-MAR-2009: added wgs, wgs_scafld for GenBank whole genome shotgun master records.
+# These are GenBank files that summarize the content of a project, and provide lists of
+# scaffold and contig files in the project. These will be in annotations['wgs'] and
+# annotations['wgs_scafld']. These GenBank files do not have sequences. See
+# http://groups.google.com/group/bionet.molbio.genbank/browse_thread/thread/51fb88bf39e7dc36
+# http://is.gd/nNgk
+# for more details of this format, and an example.
+# Added by Ying Huang & Iddo Friedberg
+
+
+import warnings
+import re
+import sys
+from collections import OrderedDict
+
+from Bio.File import as_handle
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from Bio import BiopythonParserWarning
+
+
+class InsdcScanner:
+    """Basic functions for breaking up a GenBank/EMBL file into sub sections.
+
+    The International Nucleotide Sequence Database Collaboration (INSDC)
+    between the DDBJ, EMBL, and GenBank.  These organisations all use the
+    same "Feature Table" layout in their plain text flat file formats.
+
+    However, the header and sequence sections of an EMBL file are very
+    different in layout to those produced by GenBank/DDBJ.
+    """
+
+    # These constants get redefined with sensible values in the sub classes:
+    RECORD_START = "XXX"  # "LOCUS       " or "ID   "
+    HEADER_WIDTH = 3  # 12 or 5
+    FEATURE_START_MARKERS = ["XXX***FEATURES***XXX"]
+    FEATURE_END_MARKERS = ["XXX***END FEATURES***XXX"]
+    FEATURE_QUALIFIER_INDENT = 0
+    FEATURE_QUALIFIER_SPACER = ""
+    SEQUENCE_HEADERS = ["XXX"]  # with right hand side spaces removed
+
+    def __init__(self, debug=0):
+        """Initialize the class."""
+        assert len(self.RECORD_START) == self.HEADER_WIDTH
+        for marker in self.SEQUENCE_HEADERS:
+            assert marker == marker.rstrip()
+        assert len(self.FEATURE_QUALIFIER_SPACER) == self.FEATURE_QUALIFIER_INDENT
+        self.debug = debug
+        self.handle = None
+        self.line = None
+
+    def set_handle(self, handle):
+        """Set the handle attribute."""
+        self.handle = handle
+        self.line = ""
+
+    def find_start(self):
+        """Read in lines until find the ID/LOCUS line, which is returned.
+
+        Any preamble (such as the header used by the NCBI on ``*.seq.gz`` archives)
+        will we ignored.
+        """
+        while True:
+            if self.line:
+                line = self.line
+                self.line = ""
+            else:
+                line = self.handle.readline()
+            if not line:
+                if self.debug:
+                    print("End of file")
+                return None
+            if isinstance(line[0], int):
+                # Same exception as for FASTQ files
+                raise ValueError("Is this handle in binary mode not text mode?")
+            if line[: self.HEADER_WIDTH] == self.RECORD_START:
+                if self.debug > 1:
+                    print("Found the start of a record:\n" + line)
+                break
+            line = line.rstrip()
+            if line == "//":
+                if self.debug > 1:
+                    print("Skipping // marking end of last record")
+            elif line == "":
+                if self.debug > 1:
+                    print("Skipping blank line before record")
+            else:
+                # Ignore any header before the first ID/LOCUS line.
+                if self.debug > 1:
+                    print("Skipping header line before record:\n" + line)
+        self.line = line
+        return line
+
+    def parse_header(self):
+        """Return list of strings making up the header.
+
+        New line characters are removed.
+
+        Assumes you have just read in the ID/LOCUS line.
+        """
+        if self.line[: self.HEADER_WIDTH] != self.RECORD_START:
+            raise ValueError("Not at start of record")
+
+        header_lines = []
+        while True:
+            line = self.handle.readline()
+            if not line:
+                raise ValueError("Premature end of line during sequence data")
+            line = line.rstrip()
+            if line in self.FEATURE_START_MARKERS:
+                if self.debug:
+                    print("Found feature table")
+                break
+            # if line[:self.HEADER_WIDTH]==self.FEATURE_START_MARKER[:self.HEADER_WIDTH]:
+            #    if self.debug : print("Found header table (?)")
+            #    break
+            if line[: self.HEADER_WIDTH].rstrip() in self.SEQUENCE_HEADERS:
+                if self.debug:
+                    print("Found start of sequence")
+                break
+            if line == "//":
+                raise ValueError("Premature end of sequence data marker '//' found")
+            header_lines.append(line)
+        self.line = line
+        return header_lines
+
+    def parse_features(self, skip=False):
+        """Return list of tuples for the features (if present).
+
+        Each feature is returned as a tuple (key, location, qualifiers)
+        where key and location are strings (e.g. "CDS" and
+        "complement(join(490883..490885,1..879))") while qualifiers
+        is a list of two string tuples (feature qualifier keys and values).
+
+        Assumes you have already read to the start of the features table.
+        """
+        if self.line.rstrip() not in self.FEATURE_START_MARKERS:
+            if self.debug:
+                print("Didn't find any feature table")
+            return []
+
+        while self.line.rstrip() in self.FEATURE_START_MARKERS:
+            self.line = self.handle.readline()
+
+        features = []
+        line = self.line
+        while True:
+            if not line:
+                raise ValueError("Premature end of line during features table")
+            if line[: self.HEADER_WIDTH].rstrip() in self.SEQUENCE_HEADERS:
+                if self.debug:
+                    print("Found start of sequence")
+                break
+            line = line.rstrip()
+            if line == "//":
+                raise ValueError("Premature end of features table, marker '//' found")
+            if line in self.FEATURE_END_MARKERS:
+                if self.debug:
+                    print("Found end of features")
+                line = self.handle.readline()
+                break
+            if line[2 : self.FEATURE_QUALIFIER_INDENT].strip() == "":
+                # This is an empty feature line between qualifiers. Empty
+                # feature lines within qualifiers are handled below (ignored).
+                line = self.handle.readline()
+                continue
+            if len(line) < self.FEATURE_QUALIFIER_INDENT:
+                warnings.warn(
+                    "line too short to contain a feature: %r" % line,
+                    BiopythonParserWarning,
+                )
+                line = self.handle.readline()
+                continue
+
+            if skip:
+                line = self.handle.readline()
+                while (
+                    line[: self.FEATURE_QUALIFIER_INDENT]
+                    == self.FEATURE_QUALIFIER_SPACER
+                ):
+                    line = self.handle.readline()
+            else:
+                # Build up a list of the lines making up this feature:
+                if (
+                    line[self.FEATURE_QUALIFIER_INDENT] != " "
+                    and " " in line[self.FEATURE_QUALIFIER_INDENT :]
+                ):
+                    # The feature table design enforces a length limit on the feature keys.
+                    # Some third party files (e.g. IGMT's EMBL like files) solve this by
+                    # over indenting the location and qualifiers.
+                    feature_key, line = line[2:].strip().split(None, 1)
+                    feature_lines = [line]
+                    warnings.warn(
+                        "Over indented %s feature?" % feature_key,
+                        BiopythonParserWarning,
+                    )
+                else:
+                    feature_key = line[2 : self.FEATURE_QUALIFIER_INDENT].strip()
+                    feature_lines = [line[self.FEATURE_QUALIFIER_INDENT :]]
+                line = self.handle.readline()
+                while line[
+                    : self.FEATURE_QUALIFIER_INDENT
+                ] == self.FEATURE_QUALIFIER_SPACER or (
+                    line != "" and line.rstrip() == ""
+                ):  # cope with blank lines in the midst of a feature
+                    # Use strip to remove any harmless trailing white space AND and leading
+                    # white space (e.g. out of spec files with too much indentation)
+                    feature_lines.append(line[self.FEATURE_QUALIFIER_INDENT :].strip())
+                    line = self.handle.readline()
+                features.append(self.parse_feature(feature_key, feature_lines))
+        self.line = line
+        return features
+
+    def parse_feature(self, feature_key, lines):
+        r"""Parse a feature given as a list of strings into a tuple.
+
+        Expects a feature as a list of strings, returns a tuple (key, location,
+        qualifiers)
+
+        For example given this GenBank feature::
+
+             CDS             complement(join(490883..490885,1..879))
+                             /locus_tag="NEQ001"
+                             /note="conserved hypothetical [Methanococcus jannaschii];
+                             COG1583:Uncharacterized ACR; IPR001472:Bipartite nuclear
+                             localization signal; IPR002743: Protein of unknown
+                             function DUF57"
+                             /codon_start=1
+                             /transl_table=11
+                             /product="hypothetical protein"
+                             /protein_id="NP_963295.1"
+                             /db_xref="GI:41614797"
+                             /db_xref="GeneID:2732620"
+                             /translation="MRLLLELKALNSIDKKQLSNYLIQGFIYNILKNTEYSWLHNWKK
+                             EKYFNFTLIPKKDIIENKRYYLIISSPDKRFIEVLHNKIKDLDIITIGLAQFQLRKTK
+                             KFDPKLRFPWVTITPIVLREGKIVILKGDKYYKVFVKRLEELKKYNLIKKKEPILEEP
+                             IEISLNQIKDGWKIIDVKDRYYDFRNKSFSAFSNWLRDLKEQSLRKYNNFCGKNFYFE
+                             EAIFEGFTFYKTVSIRIRINRGEAVYIGTLWKELNVYRKLDKEEREFYKFLYDCGLGS
+                             LNSMGFGFVNTKKNSAR"
+
+        Then should give input key="CDS" and the rest of the data as a list of strings
+        lines=["complement(join(490883..490885,1..879))", ..., "LNSMGFGFVNTKKNSAR"]
+        where the leading spaces and trailing newlines have been removed.
+
+        Returns tuple containing: (key as string, location string, qualifiers as list)
+        as follows for this example:
+
+        key = "CDS", string
+        location = "complement(join(490883..490885,1..879))", string
+        qualifiers = list of string tuples:
+
+        [('locus_tag', '"NEQ001"'),
+         ('note', '"conserved hypothetical [Methanococcus jannaschii];\nCOG1583:..."'),
+         ('codon_start', '1'),
+         ('transl_table', '11'),
+         ('product', '"hypothetical protein"'),
+         ('protein_id', '"NP_963295.1"'),
+         ('db_xref', '"GI:41614797"'),
+         ('db_xref', '"GeneID:2732620"'),
+         ('translation', '"MRLLLELKALNSIDKKQLSNYLIQGFIYNILKNTEYSWLHNWKK\nEKYFNFT..."')]
+
+        In the above example, the "note" and "translation" were edited for compactness,
+        and they would contain multiple new line characters (displayed above as \n)
+
+        If a qualifier is quoted (in this case, everything except codon_start and
+        transl_table) then the quotes are NOT removed.
+
+        Note that no whitespace is removed.
+        """
+        # Skip any blank lines
+        iterator = (x for x in lines if x)
+        try:
+            line = next(iterator)
+
+            feature_location = line.strip()
+            while feature_location[-1:] == ",":
+                # Multiline location, still more to come!
+                line = next(iterator)
+                feature_location += line.strip()
+            if feature_location.count("(") > feature_location.count(")"):
+                # Including the prev line in warning would be more explicit,
+                # but this way get one-and-only-one warning shown by default:
+                warnings.warn(
+                    "Non-standard feature line wrapping (didn't break on comma)?",
+                    BiopythonParserWarning,
+                )
+                while feature_location[-1:] == "," or feature_location.count(
+                    "("
+                ) > feature_location.count(")"):
+                    line = next(iterator)
+                    feature_location += line.strip()
+
+            qualifiers = []
+
+            for line_number, line in enumerate(iterator):
+                # check for extra wrapping of the location closing parentheses
+                if line_number == 0 and line.startswith(")"):
+                    feature_location += line.strip()
+                elif line[0] == "/":
+                    # New qualifier
+                    i = line.find("=")
+                    key = line[1:i]  # does not work if i==-1
+                    value = line[i + 1 :]  # we ignore 'value' if i==-1
+                    if i and value.startswith(" ") and value.lstrip().startswith('"'):
+                        warnings.warn(
+                            "White space after equals in qualifier",
+                            BiopythonParserWarning,
+                        )
+                        value = value.lstrip()
+                    if i == -1:
+                        # Qualifier with no key, e.g. /pseudo
+                        key = line[1:]
+                        qualifiers.append((key, None))
+                    elif not value:
+                        # ApE can output /note=
+                        qualifiers.append((key, ""))
+                    elif value == '"':
+                        # One single quote
+                        if self.debug:
+                            print("Single quote %s:%s" % (key, value))
+                        # DO NOT remove the quote...
+                        qualifiers.append((key, value))
+                    elif value[0] == '"':
+                        # Quoted...
+                        value_list = [value]
+                        while value_list[-1][-1] != '"':
+                            value_list.append(next(iterator))
+                        value = "\n".join(value_list)
+                        # DO NOT remove the quotes...
+                        qualifiers.append((key, value))
+                    else:
+                        # Unquoted
+                        # if debug : print("Unquoted line %s:%s" % (key,value))
+                        qualifiers.append((key, value))
+                else:
+                    # Unquoted continuation
+                    assert len(qualifiers) > 0
+                    assert key == qualifiers[-1][0]
+                    # if debug : print("Unquoted Cont %s:%s" % (key, line))
+                    if qualifiers[-1][1] is None:
+                        raise StopIteration
+                    qualifiers[-1] = (key, qualifiers[-1][1] + "\n" + line)
+            return feature_key, feature_location, qualifiers
+        except StopIteration:
+            # Bummer
+            raise ValueError(
+                "Problem with '%s' feature:\n%s" % (feature_key, "\n".join(lines))
+            ) from None
+
+    def parse_footer(self):
+        """Return a tuple containing a list of any misc strings, and the sequence."""
+        # This is a basic bit of code to scan and discard the sequence,
+        # which was useful when developing the sub classes.
+        if self.line in self.FEATURE_END_MARKERS:
+            while self.line[: self.HEADER_WIDTH].rstrip() not in self.SEQUENCE_HEADERS:
+                self.line = self.handle.readline()
+                if not self.line:
+                    raise ValueError("Premature end of file")
+                self.line = self.line.rstrip()
+
+        if self.line[: self.HEADER_WIDTH].rstrip() not in self.SEQUENCE_HEADERS:
+            raise ValueError("Not at start of sequence")
+        while True:
+            line = self.handle.readline()
+            if not line:
+                raise ValueError("Premature end of line during sequence data")
+            line = line.rstrip()
+            if line == "//":
+                break
+        self.line = line
+        return [], ""  # Dummy values!
+
+    def _feed_first_line(self, consumer, line):
+        """Handle the LOCUS/ID line, passing data to the comsumer (PRIVATE).
+
+        This should be implemented by the EMBL / GenBank specific subclass
+
+        Used by the parse_records() and parse() methods.
+        """
+        pass
+
+    def _feed_header_lines(self, consumer, lines):
+        """Handle the header lines (list of strings), passing data to the comsumer (PRIVATE).
+
+        This should be implemented by the EMBL / GenBank specific subclass
+
+        Used by the parse_records() and parse() methods.
+        """
+        pass
+
+    @staticmethod
+    def _feed_feature_table(consumer, feature_tuples):
+        """Handle the feature table (list of tuples), passing data to the comsumer (PRIVATE).
+
+        Used by the parse_records() and parse() methods.
+        """
+        consumer.start_feature_table()
+        for feature_key, location_string, qualifiers in feature_tuples:
+            consumer.feature_key(feature_key)
+            consumer.location(location_string)
+            for q_key, q_value in qualifiers:
+                if q_value is None:
+                    consumer.feature_qualifier(q_key, q_value)
+                else:
+                    consumer.feature_qualifier(q_key, q_value.replace("\n", " "))
+
+    def _feed_misc_lines(self, consumer, lines):
+        """Handle any lines between features and sequence (list of strings), passing data to the consumer (PRIVATE).
+
+        This should be implemented by the EMBL / GenBank specific subclass
+
+        Used by the parse_records() and parse() methods.
+        """
+        pass
+
+    def feed(self, handle, consumer, do_features=True):
+        """Feed a set of data into the consumer.
+
+        This method is intended for use with the "old" code in Bio.GenBank
+
+        Arguments:
+         - handle - A handle with the information to parse.
+         - consumer - The consumer that should be informed of events.
+         - do_features - Boolean, should the features be parsed?
+           Skipping the features can be much faster.
+
+        Return values:
+         - true  - Passed a record
+         - false - Did not find a record
+
+        """
+        # Should work with both EMBL and GenBank files provided the
+        # equivalent Bio.GenBank._FeatureConsumer methods are called...
+        self.set_handle(handle)
+        if not self.find_start():
+            # Could not find (another) record
+            consumer.data = None
+            return False
+
+        # We use the above class methods to parse the file into a simplified format.
+        # The first line, header lines and any misc lines after the features will be
+        # dealt with by GenBank / EMBL specific derived classes.
+
+        # First line and header:
+        self._feed_first_line(consumer, self.line)
+        self._feed_header_lines(consumer, self.parse_header())
+
+        # Features (common to both EMBL and GenBank):
+        if do_features:
+            self._feed_feature_table(consumer, self.parse_features(skip=False))
+        else:
+            self.parse_features(skip=True)  # ignore the data
+
+        # Footer and sequence
+        misc_lines, sequence_string = self.parse_footer()
+        self._feed_misc_lines(consumer, misc_lines)
+
+        consumer.sequence(sequence_string)
+        # Calls to consumer.base_number() do nothing anyway
+        consumer.record_end("//")
+
+        assert self.line == "//"
+
+        # And we are done
+        return True
+
+    def parse(self, handle, do_features=True):
+        """Return a SeqRecord (with SeqFeatures if do_features=True).
+
+        See also the method parse_records() for use on multi-record files.
+        """
+        from Bio.GenBank import _FeatureConsumer
+        from Bio.GenBank.utils import FeatureValueCleaner
+
+        consumer = _FeatureConsumer(
+            use_fuzziness=1, feature_cleaner=FeatureValueCleaner()
+        )
+
+        if self.feed(handle, consumer, do_features):
+            return consumer.data
+        else:
+            return None
+
+    def parse_records(self, handle, do_features=True):
+        """Parse records, return a SeqRecord object iterator.
+
+        Each record (from the ID/LOCUS line to the // line) becomes a SeqRecord
+
+        The SeqRecord objects include SeqFeatures if do_features=True
+
+        This method is intended for use in Bio.SeqIO
+        """
+        # This is a generator function
+        with as_handle(handle) as handle:
+            while True:
+                record = self.parse(handle, do_features)
+                if record is None:
+                    break
+                if record.id is None:
+                    raise ValueError(
+                        "Failed to parse the record's ID. Invalid ID line?"
+                    )
+                if record.name == "<unknown name>":
+                    raise ValueError(
+                        "Failed to parse the record's name. Invalid ID line?"
+                    )
+                if record.description == "<unknown description>":
+                    raise ValueError("Failed to parse the record's description")
+                yield record
+
+    def parse_cds_features(
+        self, handle, alphabet=None, tags2id=("protein_id", "locus_tag", "product"),
+    ):
+        """Parse CDS features, return SeqRecord object iterator.
+
+        Each CDS feature becomes a SeqRecord.
+
+        Arguments:
+         - alphabet - Obsolete, should be left as None.
+         - tags2id  - Tupple of three strings, the feature keys to use
+           for the record id, name and description,
+
+        This method is intended for use in Bio.SeqIO
+
+        """
+        if alphabet is not None:
+            raise ValueError("The alphabet argument is no longer supported")
+        with as_handle(handle) as handle:
+            self.set_handle(handle)
+            while self.find_start():
+                # Got an EMBL or GenBank record...
+                self.parse_header()  # ignore header lines!
+                feature_tuples = self.parse_features()
+                # self.parse_footer() # ignore footer lines!
+                while True:
+                    line = self.handle.readline()
+                    if not line:
+                        break
+                    if line[:2] == "//":
+                        break
+                self.line = line.rstrip()
+
+                # Now go though those features...
+                for key, location_string, qualifiers in feature_tuples:
+                    if key == "CDS":
+                        # Create SeqRecord
+                        # ================
+                        # SeqRecord objects cannot be created with annotations, they
+                        # must be added afterwards.  So create an empty record and
+                        # then populate it:
+                        record = SeqRecord(seq=None)
+                        annotations = record.annotations
+                        annotations["molecule_type"] = "protein"
+                        # Should we add a location object to the annotations?
+                        # I *think* that only makes sense for SeqFeatures with their
+                        # sub features...
+                        annotations["raw_location"] = location_string.replace(" ", "")
+
+                        for (qualifier_name, qualifier_data) in qualifiers:
+                            if (
+                                qualifier_data is not None
+                                and qualifier_data[0] == '"'
+                                and qualifier_data[-1] == '"'
+                            ):
+                                # Remove quotes
+                                qualifier_data = qualifier_data[1:-1]
+                            # Append the data to the annotation qualifier...
+                            if qualifier_name == "translation":
+                                assert record.seq is None, "Multiple translations!"
+                                record.seq = Seq(qualifier_data.replace("\n", ""))
+                            elif qualifier_name == "db_xref":
+                                # its a list, possibly empty.  Its safe to extend
+                                record.dbxrefs.append(qualifier_data)
+                            else:
+                                if qualifier_data is not None:
+                                    qualifier_data = qualifier_data.replace(
+                                        "\n", " "
+                                    ).replace("  ", " ")
+                                try:
+                                    annotations[qualifier_name] += " " + qualifier_data
+                                except KeyError:
+                                    # Not an addition to existing data, its the first bit
+                                    annotations[qualifier_name] = qualifier_data
+
+                        # Fill in the ID, Name, Description
+                        # =================================
+                        try:
+                            record.id = annotations[tags2id[0]]
+                        except KeyError:
+                            pass
+                        try:
+                            record.name = annotations[tags2id[1]]
+                        except KeyError:
+                            pass
+                        try:
+                            record.description = annotations[tags2id[2]]
+                        except KeyError:
+                            pass
+
+                        yield record
+
+
+class EmblScanner(InsdcScanner):
+    """For extracting chunks of information in EMBL files."""
+
+    RECORD_START = "ID   "
+    HEADER_WIDTH = 5
+    FEATURE_START_MARKERS = ["FH   Key             Location/Qualifiers", "FH"]
+    FEATURE_END_MARKERS = ["XX"]  # XX can also mark the end of many things!
+    FEATURE_QUALIFIER_INDENT = 21
+    FEATURE_QUALIFIER_SPACER = "FT" + " " * (FEATURE_QUALIFIER_INDENT - 2)
+    SEQUENCE_HEADERS = ["SQ", "CO"]  # Remove trailing spaces
+
+    EMBL_INDENT = HEADER_WIDTH
+    EMBL_SPACER = " " * EMBL_INDENT
+
+    def parse_footer(self):
+        """Return a tuple containing a list of any misc strings, and the sequence."""
+        if self.line[: self.HEADER_WIDTH].rstrip() not in self.SEQUENCE_HEADERS:
+            raise ValueError("Footer format unexpected: '%s'" % self.line)
+
+        # Note that the SQ line can be split into several lines...
+        misc_lines = []
+        while self.line[: self.HEADER_WIDTH].rstrip() in self.SEQUENCE_HEADERS:
+            misc_lines.append(self.line)
+            self.line = self.handle.readline()
+            if not self.line:
+                raise ValueError("Premature end of file")
+            self.line = self.line.rstrip()
+
+        if not (
+            self.line[: self.HEADER_WIDTH] == " " * self.HEADER_WIDTH
+            or self.line.strip() == "//"
+        ):
+            raise ValueError("Unexpected content after SQ or CO line: %r" % self.line)
+
+        seq_lines = []
+        line = self.line
+        while True:
+            if not line:
+                raise ValueError("Premature end of file in sequence data")
+            line = line.strip()
+            if not line:
+                raise ValueError("Blank line in sequence data")
+            if line == "//":
+                break
+            if self.line[: self.HEADER_WIDTH] != (" " * self.HEADER_WIDTH):
+                raise ValueError(
+                    "Problem with characters in header line, "
+                    " or incorrect header width: " + self.line
+                )
+            # Remove tailing number now, remove spaces later
+            linersplit = line.rsplit(None, 1)
+            if len(linersplit) == 2 and linersplit[1].isdigit():
+                seq_lines.append(linersplit[0])
+            elif line.isdigit():
+                # Special case of final blank line with no bases
+                # just the sequence coordinate
+                pass
+            else:
+                warnings.warn(
+                    "EMBL sequence line missing coordinates", BiopythonParserWarning
+                )
+                seq_lines.append(line)
+            line = self.handle.readline()
+        self.line = line
+        return misc_lines, "".join(seq_lines).replace(" ", "")
+
+    def _feed_first_line(self, consumer, line):
+        assert line[: self.HEADER_WIDTH].rstrip() == "ID"
+        if line[self.HEADER_WIDTH :].count(";") == 6:
+            # Looks like the semi colon separated style introduced in 2006
+            self._feed_first_line_new(consumer, line)
+        elif line[self.HEADER_WIDTH :].count(";") == 3:
+            if line.rstrip().endswith(" SQ"):
+                # EMBL-bank patent data
+                self._feed_first_line_patents(consumer, line)
+            else:
+                # Looks like the pre 2006 style
+                self._feed_first_line_old(consumer, line)
+        elif line[self.HEADER_WIDTH :].count(";") == 2:
+            # Looks like KIKO patent data
+            self._feed_first_line_patents_kipo(consumer, line)
+        else:
+            raise ValueError("Did not recognise the ID line layout:\n" + line)
+
+    def _feed_first_line_patents(self, consumer, line):
+        # Old style EMBL patent records where ID line ended SQ
+        # Not 100% sure that PRT here is really molecule type and
+        # not the data file division...
+        #
+        # Either Non-Redundant Level 1 database records,
+        # ID <accession>; <molecule type>; <non-redundant level 1>; <cluster size L1>
+        # e.g. ID   NRP_AX000635; PRT; NR1; 15 SQ
+        #
+        # Or, Non-Redundant Level 2 database records:
+        # ID <L2-accession>; <molecule type>; <non-redundant level 2>; <cluster size L2>
+        # e.g. ID   NRP0000016E; PRT; NR2; 5 SQ
+        # e.g. ID   NRP_AX000635; PRT; NR1; 15 SQ
+        fields = [
+            data.strip() for data in line[self.HEADER_WIDTH :].strip()[:-3].split(";")
+        ]
+        assert len(fields) == 4
+        consumer.locus(fields[0])
+        consumer.residue_type(fields[1])  # semi-redundant
+        consumer.data_file_division(fields[2])
+        # TODO - Record cluster size?
+
+    def _feed_first_line_patents_kipo(self, consumer, line):
+        # EMBL format patent sequence from KIPO, e.g.
+        # ftp://ftp.ebi.ac.uk/pub/databases/patentdata/kipo_prt.dat.gz
+        #
+        # e.g. ID   DI500001       STANDARD;      PRT;   111 AA.
+        #
+        # This follows the style of _feed_first_line_old
+        assert line[: self.HEADER_WIDTH].rstrip() == "ID"
+        fields = [line[self.HEADER_WIDTH :].split(None, 1)[0]]
+        fields.extend(line[self.HEADER_WIDTH :].split(None, 1)[1].split(";"))
+        fields = [entry.strip() for entry in fields]
+        """
+        The tokens represent:
+
+           0. Primary accession number
+           (space sep)
+           1. ??? (e.g. standard)
+           (semi-colon)
+           2. Molecule type (protein)? Division? Always 'PRT'
+           3. Sequence length (e.g. '111 AA.')
+        """
+        consumer.locus(fields[0])  # Should we also call the accession consumer?
+        # consumer.molecule_type(fields[2])
+        self._feed_seq_length(consumer, fields[3])
+
+    def _feed_first_line_old(self, consumer, line):
+        # Expects an ID line in the style before 2006, e.g.
+        # ID   SC10H5 standard; DNA; PRO; 4870 BP.
+        # ID   BSUB9999   standard; circular DNA; PRO; 4214630 BP.
+        assert line[: self.HEADER_WIDTH].rstrip() == "ID"
+        fields = [line[self.HEADER_WIDTH :].split(None, 1)[0]]
+        fields.extend(line[self.HEADER_WIDTH :].split(None, 1)[1].split(";"))
+        fields = [entry.strip() for entry in fields]
+        """
+        The tokens represent:
+
+           0. Primary accession number
+           (space sep)
+           1. ??? (e.g. standard)
+           (semi-colon)
+           2. Topology and/or Molecule type (e.g. 'circular DNA' or 'DNA')
+           3. Taxonomic division (e.g. 'PRO')
+           4. Sequence length (e.g. '4639675 BP.')
+
+        """
+        consumer.locus(fields[0])  # Should we also call the accession consumer?
+        consumer.residue_type(fields[2])
+        if "circular" in fields[2]:
+            consumer.topology("circular")
+            consumer.molecule_type(fields[2].replace("circular", "").strip())
+        elif "linear" in fields[2]:
+            consumer.topology("linear")
+            consumer.molecule_type(fields[2].replace("linear", "").strip())
+        else:
+            consumer.molecule_type(fields[2].strip())
+        consumer.data_file_division(fields[3])
+        self._feed_seq_length(consumer, fields[4])
+
+    def _feed_first_line_new(self, consumer, line):
+        # Expects an ID line in the style introduced in 2006, e.g.
+        # ID   X56734; SV 1; linear; mRNA; STD; PLN; 1859 BP.
+        # ID   CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP.
+        assert line[: self.HEADER_WIDTH].rstrip() == "ID"
+        fields = [data.strip() for data in line[self.HEADER_WIDTH :].strip().split(";")]
+        assert len(fields) == 7
+        """
+        The tokens represent:
+
+           0. Primary accession number
+           1. Sequence version number
+           2. Topology: 'circular' or 'linear'
+           3. Molecule type (e.g. 'genomic DNA')
+           4. Data class (e.g. 'STD')
+           5. Taxonomic division (e.g. 'PRO')
+           6. Sequence length (e.g. '4639675 BP.')
+
+        """
+
+        consumer.locus(fields[0])
+
+        # Call the accession consumer now, to make sure we record
+        # something as the record.id, in case there is no AC line
+        consumer.accession(fields[0])
+
+        # TODO - How to deal with the version field?  At the moment the consumer
+        # will try and use this for the ID which isn't ideal for EMBL files.
+        version_parts = fields[1].split()
+        if (
+            len(version_parts) == 2
+            and version_parts[0] == "SV"
+            and version_parts[1].isdigit()
+        ):
+            consumer.version_suffix(version_parts[1])
+
+        # Based on how the old GenBank parser worked, merge these two:
+        consumer.residue_type(" ".join(fields[2:4]))  # Semi-obsolete
+
+        consumer.topology(fields[2])
+        consumer.molecule_type(fields[3])
+
+        # consumer.xxx(fields[4]) # TODO - What should we do with the data class?
+
+        consumer.data_file_division(fields[5])
+
+        self._feed_seq_length(consumer, fields[6])
+
+    @staticmethod
+    def _feed_seq_length(consumer, text):
+        length_parts = text.split()
+        assert len(length_parts) == 2, "Invalid sequence length string %r" % text
+        assert length_parts[1].upper() in ["BP", "BP.", "AA", "AA."]
+        consumer.size(length_parts[0])
+
+    def _feed_header_lines(self, consumer, lines):
+        consumer_dict = {
+            "AC": "accession",
+            "SV": "version",  # SV line removed in June 2006, now part of ID line
+            "DE": "definition",
+            # 'RN' : 'reference_num',
+            # 'RC' : reference comment... TODO
+            # 'RP' : 'reference_bases',
+            # 'RX' : reference cross reference... DOI or Pubmed
+            "RG": "consrtm",  # optional consortium
+            # 'RA' : 'authors',
+            # 'RT' : 'title',
+            "RL": "journal",
+            "OS": "organism",
+            "OC": "taxonomy",
+            # 'DR' : data reference
+            "CC": "comment",
+            # 'XX' : splitter
+        }
+        # We have to handle the following specially:
+        # RX (depending on reference type...)
+        for line in lines:
+            line_type = line[: self.EMBL_INDENT].strip()
+            data = line[self.EMBL_INDENT :].strip()
+            if line_type == "XX":
+                pass
+            elif line_type == "RN":
+                # Reformat reference numbers for the GenBank based consumer
+                # e.g. '[1]' becomes '1'
+                if data[0] == "[" and data[-1] == "]":
+                    data = data[1:-1]
+                consumer.reference_num(data)
+            elif line_type == "RP":
+                if data.strip() == "[-]":
+                    # Patent EMBL files from KIPO just use: RN  [-]
+                    pass
+                else:
+                    # Reformat reference numbers for the GenBank based consumer
+                    # e.g. '1-4639675' becomes '(bases 1 to 4639675)'
+                    # and '160-550, 904-1055' becomes '(bases 160 to 550; 904 to 1055)'
+                    # Note could be multi-line, and end with a comma
+                    parts = [
+                        bases.replace("-", " to ").strip()
+                        for bases in data.split(",")
+                        if bases.strip()
+                    ]
+                    consumer.reference_bases("(bases %s)" % "; ".join(parts))
+            elif line_type == "RT":
+                # Remove the enclosing quotes and trailing semi colon.
+                # Note the title can be split over multiple lines.
+                if data.startswith('"'):
+                    data = data[1:]
+                if data.endswith('";'):
+                    data = data[:-2]
+                consumer.title(data)
+            elif line_type == "RX":
+                # EMBL support three reference types at the moment:
+                # - PUBMED    PUBMED bibliographic database (NLM)
+                # - DOI       Digital Object Identifier (International DOI Foundation)
+                # - AGRICOLA  US National Agriculture Library (NAL) of the US Department
+                #             of Agriculture (USDA)
+                #
+                # Format:
+                # RX  resource_identifier; identifier.
+                #
+                # e.g.
+                # RX   DOI; 10.1016/0024-3205(83)90010-3.
+                # RX   PUBMED; 264242.
+                #
+                # Currently our reference object only supports PUBMED and MEDLINE
+                # (as these were in GenBank files?).
+                key, value = data.split(";", 1)
+                if value.endswith("."):
+                    value = value[:-1]
+                value = value.strip()
+                if key == "PUBMED":
+                    consumer.pubmed_id(value)
+                # TODO - Handle other reference types (here and in BioSQL bindings)
+            elif line_type == "CC":
+                # Have to pass a list of strings for this one (not just a string)
+                consumer.comment([data])
+            elif line_type == "DR":
+                # Database Cross-reference, format:
+                # DR   database_identifier; primary_identifier; secondary_identifier.
+                #
+                # e.g.
+                # DR   MGI; 98599; Tcrb-V4.
+                #
+                # TODO - How should we store any secondary identifier?
+                parts = data.rstrip(".").split(";")
+                # Turn it into "database_identifier:primary_identifier" to
+                # mimic the GenBank parser. e.g. "MGI:98599"
+                if len(parts) == 1:
+                    warnings.warn(
+                        "Malformed DR line in EMBL file.", BiopythonParserWarning
+                    )
+                else:
+                    consumer.dblink("%s:%s" % (parts[0].strip(), parts[1].strip()))
+            elif line_type == "RA":
+                # Remove trailing ; at end of authors list
+                consumer.authors(data.rstrip(";"))
+            elif line_type == "PR":
+                # In the EMBL patent files, this is a PR (PRiority) line which
+                # provides the earliest active priority within the family.
+                # The priority  number comes first, followed by the priority date.
+                #
+                # e.g.
+                # PR   JP19990377484 16-DEC-1999
+                #
+                # However, in most EMBL files this is a PR (PRoject) line which
+                # gives the BioProject reference number.
+                #
+                # e.g.
+                # PR   Project:PRJNA60715;
+                #
+                # In GenBank files this corresponds to the old PROJECT line
+                # which was later replaced with the DBLINK line.
+                if data.startswith("Project:"):
+                    # Remove trailing ; at end of the project reference
+                    consumer.project(data.rstrip(";"))
+            elif line_type == "KW":
+                consumer.keywords(data.rstrip(";"))
+            elif line_type in consumer_dict:
+                # Its a semi-automatic entry!
+                getattr(consumer, consumer_dict[line_type])(data)
+            else:
+                if self.debug:
+                    print("Ignoring EMBL header line:\n%s" % line)
+
+    def _feed_misc_lines(self, consumer, lines):
+        # TODO - Should we do something with the information on the SQ line(s)?
+        lines.append("")
+        line_iter = iter(lines)
+        try:
+            for line in line_iter:
+                if line.startswith("CO   "):
+                    line = line[5:].strip()
+                    contig_location = line
+                    while True:
+                        line = next(line_iter)
+                        if not line:
+                            break
+                        elif line.startswith("CO   "):
+                            # Don't need to preseve the whitespace here.
+                            contig_location += line[5:].strip()
+                        else:
+                            raise ValueError(
+                                "Expected CO (contig) continuation line, got:\n" + line
+                            )
+                    consumer.contig_location(contig_location)
+                if line.startswith("SQ   Sequence "):
+                    # e.g.
+                    # SQ   Sequence 219 BP; 82 A; 48 C; 33 G; 45 T; 11 other;
+                    #
+                    # Or, EMBL-bank patent, e.g.
+                    # SQ   Sequence 465 AA; 3963407aa91d3a0d622fec679a4524e0; MD5;
+                    self._feed_seq_length(
+                        consumer, line[14:].rstrip().rstrip(";").split(";", 1)[0]
+                    )
+                    # TODO - Record the checksum etc?
+            return
+        except StopIteration:
+            raise ValueError("Problem in misc lines before sequence") from None
+
+
+class _ImgtScanner(EmblScanner):
+    """For extracting chunks of information in IMGT (EMBL like) files (PRIVATE).
+
+    IMGT files are like EMBL files but in order to allow longer feature types
+    the features should be indented by 25 characters not 21 characters. In
+    practice the IMGT flat files tend to use either 21 or 25 characters, so we
+    must cope with both.
+
+    This is private to encourage use of Bio.SeqIO rather than Bio.GenBank.
+    """
+
+    FEATURE_START_MARKERS = [
+        "FH   Key             Location/Qualifiers",
+        "FH   Key             Location/Qualifiers (from EMBL)",
+        "FH   Key                 Location/Qualifiers",
+        "FH",
+    ]
+
+    def _feed_first_line(self, consumer, line):
+        assert line[: self.HEADER_WIDTH].rstrip() == "ID"
+        if line[self.HEADER_WIDTH :].count(";") != 5:
+            # Assume its an older EMBL-like line,
+            return EmblScanner._feed_first_line(self, consumer, line)
+        # Otherwise assume its the new (circa 2016) IMGT style
+        # as used in the IPD-IMGT/HLA Database
+        #
+        # https://github.com/ANHIG/IMGTHLA/
+        #
+        # The key changes post 3.16 are the addition of an SV value
+        # to the ID line, these additions should make the format more
+        # similar to the ENA style.
+        #
+        # ID   HLA00001   standard; DNA; HUM; 3503 BP.
+        #
+        # becomes
+        #
+        # ID   HLA00001; SV 1; standard; DNA; HUM; 3503 BP.
+        fields = [data.strip() for data in line[self.HEADER_WIDTH :].strip().split(";")]
+        assert len(fields) == 6
+        """
+        The tokens represent:
+
+           0. Primary accession number (eg 'HLA00001')
+           1. Sequence version number (eg 'SV 1')
+           2. ??? eg 'standard'
+           3. Molecule type (e.g. 'DNA')
+           4. Taxonomic division (e.g. 'HUM')
+           5. Sequence length (e.g. '3503 BP.')
+        """
+        consumer.locus(fields[0])
+
+        # See TODO on the EMBL _feed_first_line_new about version field
+        version_parts = fields[1].split()
+        if (
+            len(version_parts) == 2
+            and version_parts[0] == "SV"
+            and version_parts[1].isdigit()
+        ):
+            consumer.version_suffix(version_parts[1])
+
+        consumer.residue_type(fields[3])
+        if "circular" in fields[3]:
+            consumer.topology("circular")
+            consumer.molecule_type(fields[3].replace("circular", "").strip())
+        elif "linear" in fields[3]:
+            consumer.topology("linear")
+            consumer.molecule_type(fields[3].replace("linear", "").strip())
+        else:
+            consumer.molecule_type(fields[3].strip())
+        consumer.data_file_division(fields[4])
+        self._feed_seq_length(consumer, fields[5])
+
+    def parse_features(self, skip=False):
+        """Return list of tuples for the features (if present).
+
+        Each feature is returned as a tuple (key, location, qualifiers)
+        where key and location are strings (e.g. "CDS" and
+        "complement(join(490883..490885,1..879))") while qualifiers
+        is a list of two string tuples (feature qualifier keys and values).
+
+        Assumes you have already read to the start of the features table.
+        """
+        if self.line.rstrip() not in self.FEATURE_START_MARKERS:
+            if self.debug:
+                print("Didn't find any feature table")
+            return []
+
+        while self.line.rstrip() in self.FEATURE_START_MARKERS:
+            self.line = self.handle.readline()
+
+        bad_position_re = re.compile(r"([0-9]+)>")
+
+        features = []
+        line = self.line
+        while True:
+            if not line:
+                raise ValueError("Premature end of line during features table")
+            if line[: self.HEADER_WIDTH].rstrip() in self.SEQUENCE_HEADERS:
+                if self.debug:
+                    print("Found start of sequence")
+                break
+            line = line.rstrip()
+            if line == "//":
+                raise ValueError("Premature end of features table, marker '//' found")
+            if line in self.FEATURE_END_MARKERS:
+                if self.debug:
+                    print("Found end of features")
+                line = self.handle.readline()
+                break
+            if line[2 : self.FEATURE_QUALIFIER_INDENT].strip() == "":
+                # This is an empty feature line between qualifiers. Empty
+                # feature lines within qualifiers are handled below (ignored).
+                line = self.handle.readline()
+                continue
+
+            if skip:
+                line = self.handle.readline()
+                while (
+                    line[: self.FEATURE_QUALIFIER_INDENT]
+                    == self.FEATURE_QUALIFIER_SPACER
+                ):
+                    line = self.handle.readline()
+            else:
+                assert line[:2] == "FT"
+                try:
+                    feature_key, location_start = line[2:].strip().split()
+                except ValueError:
+                    # e.g. "FT   TRANSMEMBRANE-REGION2163..2240\n"
+                    # Assume indent of 25 as per IMGT spec, with the location
+                    # start in column 26 (one-based).
+                    feature_key = line[2:25].strip()
+                    location_start = line[25:].strip()
+                feature_lines = [location_start]
+                line = self.handle.readline()
+                while (
+                    line[: self.FEATURE_QUALIFIER_INDENT]
+                    == self.FEATURE_QUALIFIER_SPACER
+                    or line.rstrip() == ""
+                ):  # cope with blank lines in the midst of a feature
+                    # Use strip to remove any harmless trailing white space AND and leading
+                    # white space (copes with 21 or 26 indents and orther variants)
+                    assert line[:2] == "FT"
+                    feature_lines.append(line[self.FEATURE_QUALIFIER_INDENT :].strip())
+                    line = self.handle.readline()
+                feature_key, location, qualifiers = self.parse_feature(
+                    feature_key, feature_lines
+                )
+                # Try to handle known problems with IMGT locations here:
+                if ">" in location:
+                    # Nasty hack for common IMGT bug, should be >123 not 123>
+                    # in a location string. At least here the meaning is clear,
+                    # and since it is so common I don't want to issue a warning
+                    # warnings.warn("Feature location %s is invalid, "
+                    #              "moving greater than sign before position"
+                    #              % location, BiopythonParserWarning)
+                    location = bad_position_re.sub(r">\1", location)
+                features.append((feature_key, location, qualifiers))
+        self.line = line
+        return features
+
+
+class GenBankScanner(InsdcScanner):
+    """For extracting chunks of information in GenBank files."""
+
+    RECORD_START = "LOCUS       "
+    HEADER_WIDTH = 12
+    FEATURE_START_MARKERS = ["FEATURES             Location/Qualifiers", "FEATURES"]
+    FEATURE_END_MARKERS = []
+    FEATURE_QUALIFIER_INDENT = 21
+    FEATURE_QUALIFIER_SPACER = " " * FEATURE_QUALIFIER_INDENT
+    SEQUENCE_HEADERS = [
+        "CONTIG",
+        "ORIGIN",
+        "BASE COUNT",
+        "WGS",
+        "TSA",
+        "TLS",
+    ]  # trailing spaces removed
+
+    GENBANK_INDENT = HEADER_WIDTH
+    GENBANK_SPACER = " " * GENBANK_INDENT
+
+    STRUCTURED_COMMENT_START = "-START##"
+    STRUCTURED_COMMENT_END = "-END##"
+    STRUCTURED_COMMENT_DELIM = " :: "
+
+    def parse_footer(self):
+        """Return a tuple containing a list of any misc strings, and the sequence."""
+        if self.line[: self.HEADER_WIDTH].rstrip() not in self.SEQUENCE_HEADERS:
+            raise ValueError("Footer format unexpected:  '%s'" % self.line)
+
+        misc_lines = []
+        while (
+            self.line[: self.HEADER_WIDTH].rstrip() in self.SEQUENCE_HEADERS
+            or self.line[: self.HEADER_WIDTH] == " " * self.HEADER_WIDTH
+            or "WGS" == self.line[:3]
+        ):
+            misc_lines.append(self.line.rstrip())
+            self.line = self.handle.readline()
+            if not self.line:
+                raise ValueError("Premature end of file")
+
+        if self.line[: self.HEADER_WIDTH].rstrip() in self.SEQUENCE_HEADERS:
+            raise ValueError("Eh? '%s'" % self.line)
+
+        # Now just consume the sequence lines until reach the // marker
+        # or a CONTIG line
+        seq_lines = []
+        line = self.line
+        while True:
+            if not line:
+                warnings.warn(
+                    "Premature end of file in sequence data", BiopythonParserWarning
+                )
+                line = "//"
+                break
+            line = line.rstrip()
+            if not line:
+                warnings.warn("Blank line in sequence data", BiopythonParserWarning)
+                line = self.handle.readline()
+                continue
+            if line == "//":
+                break
+            if line.startswith("CONTIG"):
+                break
+            if len(line) > 9 and line[9:10] != " ":
+                # Some broken programs indent the sequence by one space too many
+                # so try to get rid of that and test again.
+                warnings.warn(
+                    "Invalid indentation for sequence line", BiopythonParserWarning
+                )
+                line = line[1:]
+                if len(line) > 9 and line[9:10] != " ":
+                    raise ValueError("Sequence line mal-formed, '%s'" % line)
+            seq_lines.append(line[10:])  # remove spaces later
+            line = self.handle.readline()
+
+        self.line = line
+        return misc_lines, "".join(seq_lines).replace(" ", "")
+
+    def _feed_first_line(self, consumer, line):
+        """Scan over and parse GenBank LOCUS line (PRIVATE).
+
+        This must cope with several variants, primarily the old and new column
+        based standards from GenBank. Additionally EnsEMBL produces GenBank
+        files where the LOCUS line is space separated rather that following
+        the column based layout.
+
+        We also try to cope with GenBank like files with partial LOCUS lines.
+
+        As of release 229.0, the columns are no longer strictly in a given
+        position. See GenBank format release notes:
+
+            "Historically, the LOCUS line has had a fixed length and its
+            elements have been presented at specific column positions...
+            But with the anticipated increases in the lengths of accession
+            numbers, and the advent of sequences that are gigabases long,
+            maintaining the column positions will not always be possible and
+            the overall length of the LOCUS line could exceed 79 characters."
+
+        """
+        #####################################
+        # LOCUS line                        #
+        #####################################
+        if line[0 : self.GENBANK_INDENT] != "LOCUS       ":
+            raise ValueError("LOCUS line does not start correctly:\n" + line)
+
+        # Have to break up the locus line, and handle the different bits of it.
+        # There are at least two different versions of the locus line...
+        if line[29:33] in [" bp ", " aa ", " rc "] and line[55:62] == "       ":
+            # Old... note we insist on the 55:62 being empty to avoid trying
+            # to parse space separated LOCUS lines from Ensembl etc, see below.
+            #
+            #    Positions  Contents
+            #    ---------  --------
+            #    00:06      LOCUS
+            #    06:12      spaces
+            #    12:??      Locus name
+            #    ??:??      space
+            #    ??:29      Length of sequence, right-justified
+            #    29:33      space, bp, space
+            #    33:41      strand type / molecule type, e.g. DNA
+            #    41:42      space
+            #    42:51      Blank (implies linear), linear or circular
+            #    51:52      space
+            #    52:55      The division code (e.g. BCT, VRL, INV)
+            #    55:62      space
+            #    62:73      Date, in the form dd-MMM-yyyy (e.g., 15-MAR-1991)
+            #
+            # assert line[29:33] in [' bp ', ' aa ',' rc '] , \
+            #       'LOCUS line does not contain size units at expected position:\n' + line
+            if line[41:42] != " ":
+                raise ValueError(
+                    "LOCUS line does not contain space at position 42:\n" + line
+                )
+            if line[42:51].strip() not in ["", "linear", "circular"]:
+                raise ValueError(
+                    "LOCUS line does not contain valid entry "
+                    "(linear, circular, ...):\n" + line
+                )
+            if line[51:52] != " ":
+                raise ValueError(
+                    "LOCUS line does not contain space at position 52:\n" + line
+                )
+            # if line[55:62] != '       ':
+            #      raise ValueError('LOCUS line does not contain spaces from position 56 to 62:\n' + line)
+            if line[62:73].strip():
+                if line[64:65] != "-":
+                    raise ValueError(
+                        "LOCUS line does not contain - at "
+                        "position 65 in date:\n" + line
+                    )
+                if line[68:69] != "-":
+                    raise ValueError(
+                        "LOCUS line does not contain - at "
+                        "position 69 in date:\n" + line
+                    )
+
+            name_and_length_str = line[self.GENBANK_INDENT : 29]
+            while "  " in name_and_length_str:
+                name_and_length_str = name_and_length_str.replace("  ", " ")
+            name_and_length = name_and_length_str.split(" ")
+            if len(name_and_length) > 2:
+                raise ValueError(
+                    "Cannot parse the name and length in the LOCUS line:\n" + line
+                )
+            if len(name_and_length) == 1:
+                raise ValueError("Name and length collide in the LOCUS line:\n" + line)
+            # Should be possible to split them based on position, if
+            # a clear definition of the standard exists THAT AGREES with
+            # existing files.
+            name, length = name_and_length
+            if len(name) > 16:
+                # As long as the sequence is short, can steal its leading spaces
+                # to extend the name over the current 16 character limit.
+                # However, that deserves a warning as it is out of spec.
+                warnings.warn(
+                    "GenBank LOCUS line identifier over 16 characters",
+                    BiopythonParserWarning,
+                )
+            consumer.locus(name)
+            consumer.size(length)
+            # consumer.residue_type(line[33:41].strip())
+
+            if line[33:51].strip() == "" and line[29:33] == " aa ":
+                # Amino acids -> protein (even if there is no residue type given)
+                consumer.residue_type("PROTEIN")
+            else:
+                consumer.residue_type(line[33:51].strip())
+
+            consumer.molecule_type(line[33:41].strip())
+            consumer.topology(line[42:51].strip())
+            consumer.data_file_division(line[52:55])
+            if line[62:73].strip():
+                consumer.date(line[62:73])
+        elif line[40:44] in [" bp ", " aa ", " rc "] and line[54:64].strip() in [
+            "",
+            "linear",
+            "circular",
+        ]:
+            # New... linear/circular/big blank test should avoid EnsEMBL style
+            # LOCUS line being treated like a proper column based LOCUS line.
+            #
+            #    Positions  Contents
+            #    ---------  --------
+            #    00:06      LOCUS
+            #    06:12      spaces
+            #    12:??      Locus name
+            #    ??:??      space
+            #    ??:40      Length of sequence, right-justified
+            #    40:44      space, bp, space
+            #    44:47      Blank, ss-, ds-, ms-
+            #    47:54      Blank, DNA, RNA, tRNA, mRNA, uRNA, snRNA, cDNA
+            #    54:55      space
+            #    55:63      Blank (implies linear), linear or circular
+            #    63:64      space
+            #    64:67      The division code (e.g. BCT, VRL, INV)
+            #    67:68      space
+            #    68:79      Date, in the form dd-MMM-yyyy (e.g., 15-MAR-1991)
+            #
+            if len(line) < 79:
+                # JBEI genbank files seem to miss a division code and date
+                # See issue #1656 e.g.
+                # LOCUS       pEH010                  5743 bp    DNA     circular
+                warnings.warn(
+                    "Truncated LOCUS line found - is this correct?\n:%r" % line,
+                    BiopythonParserWarning,
+                )
+                padding_len = 79 - len(line)
+                padding = " " * padding_len
+                line += padding
+
+            if line[40:44] not in [" bp ", " aa ", " rc "]:
+                raise ValueError(
+                    "LOCUS line does not contain size units at "
+                    "expected position:\n" + line
+                )
+            if line[44:47] not in ["   ", "ss-", "ds-", "ms-"]:
+                raise ValueError(
+                    "LOCUS line does not have valid strand "
+                    "type (Single stranded, ...):\n" + line
+                )
+
+            if not (
+                line[47:54].strip() == ""
+                or "DNA" in line[47:54].strip().upper()
+                or "RNA" in line[47:54].strip().upper()
+            ):
+                raise ValueError(
+                    "LOCUS line does not contain valid "
+                    "sequence type (DNA, RNA, ...):\n" + line
+                )
+            if line[54:55] != " ":
+                raise ValueError(
+                    "LOCUS line does not contain space at position 55:\n" + line
+                )
+            if line[55:63].strip() not in ["", "linear", "circular"]:
+                raise ValueError(
+                    "LOCUS line does not contain valid "
+                    "entry (linear, circular, ...):\n" + line
+                )
+            if line[63:64] != " ":
+                raise ValueError(
+                    "LOCUS line does not contain space at position 64:\n" + line
+                )
+            if line[67:68] != " ":
+                raise ValueError(
+                    "LOCUS line does not contain space at position 68:\n" + line
+                )
+            if line[68:79].strip():
+                if line[70:71] != "-":
+                    raise ValueError(
+                        "LOCUS line does not contain - at "
+                        "position 71 in date:\n" + line
+                    )
+                if line[74:75] != "-":
+                    raise ValueError(
+                        "LOCUS line does not contain - at "
+                        "position 75 in date:\n" + line
+                    )
+
+            name_and_length_str = line[self.GENBANK_INDENT : 40]
+            while "  " in name_and_length_str:
+                name_and_length_str = name_and_length_str.replace("  ", " ")
+            name_and_length = name_and_length_str.split(" ")
+            if len(name_and_length) > 2:
+                raise ValueError(
+                    "Cannot parse the name and length in the LOCUS line:\n" + line
+                )
+            if len(name_and_length) == 1:
+                raise ValueError("Name and length collide in the LOCUS line:\n" + line)
+            # Should be possible to split them based on position, if
+            # a clear definition of the stand exists THAT AGREES with
+            # existing files.
+            consumer.locus(name_and_length[0])
+            consumer.size(name_and_length[1])
+
+            if line[44:54].strip() == "" and line[40:44] == " aa ":
+                # Amino acids -> protein (even if there is no residue type given)
+                consumer.residue_type(("PROTEIN " + line[54:63]).strip())
+            else:
+                consumer.residue_type(line[44:63].strip())
+
+            consumer.molecule_type(line[44:54].strip())
+            consumer.topology(line[55:63].strip())
+            if line[64:76].strip():
+                consumer.data_file_division(line[64:67])
+            if line[68:79].strip():
+                consumer.date(line[68:79])
+        elif line[self.GENBANK_INDENT :].strip().count(" ") == 0:
+            # Truncated LOCUS line, as produced by some EMBOSS tools - see bug 1762
+            #
+            # e.g.
+            #
+            #    "LOCUS       U00096"
+            #
+            # rather than:
+            #
+            #    "LOCUS       U00096               4639675 bp    DNA     circular BCT"
+            #
+            #    Positions  Contents
+            #    ---------  --------
+            #    00:06      LOCUS
+            #    06:12      spaces
+            #    12:??      Locus name
+            if line[self.GENBANK_INDENT :].strip() != "":
+                consumer.locus(line[self.GENBANK_INDENT :].strip())
+            else:
+                # Must just have just "LOCUS       ", is this even legitimate?
+                # We should be able to continue parsing... we need real world testcases!
+                warnings.warn(
+                    "Minimal LOCUS line found - is this correct?\n:%r" % line,
+                    BiopythonParserWarning,
+                )
+        elif (
+            len(line.split()) == 8
+            and line.split()[3] in ("aa", "bp")
+            and line.split()[5] in ("linear", "circular")
+        ):
+            # Cope with invalidly spaced GenBank LOCUS lines like
+            # LOCUS       AB070938          6497 bp    DNA     linear   BCT 11-OCT-2001
+            # This will also cope with extra long accession numbers and
+            # sequence lengths
+            splitline = line.split()
+            consumer.locus(splitline[1])
+            # Provide descriptive error message if the sequence is too long
+            # for python to handle
+
+            if int(splitline[2]) > sys.maxsize:
+                raise ValueError(
+                    "Tried to load a sequence with a length %s, "
+                    "your installation of python can only load "
+                    "sesquences of length %s" % (splitline[2], sys.maxsize)
+                )
+            else:
+                consumer.size(splitline[2])
+
+            consumer.residue_type(splitline[4])
+            consumer.topology(splitline[5])
+            consumer.data_file_division(splitline[6])
+            consumer.date(splitline[7])
+            if len(line) < 80:
+                warnings.warn(
+                    "Attempting to parse malformed locus line:\n%r\n"
+                    "Found locus %r size %r residue_type %r\n"
+                    "Some fields may be wrong."
+                    % (line, splitline[1], splitline[2], splitline[4]),
+                    BiopythonParserWarning,
+                )
+        elif len(line.split()) == 7 and line.split()[3] in ["aa", "bp"]:
+            # Cope with EnsEMBL genbank files which use space separation rather
+            # than the expected column based layout. e.g.
+            # LOCUS       HG531_PATCH 1000000 bp DNA HTG 18-JUN-2011
+            # LOCUS       HG531_PATCH 759984 bp DNA HTG 18-JUN-2011
+            # LOCUS       HG506_HG1000_1_PATCH 814959 bp DNA HTG 18-JUN-2011
+            # LOCUS       HG506_HG1000_1_PATCH 1219964 bp DNA HTG 18-JUN-2011
+            # Notice that the 'bp' can occur in the position expected by either
+            # the old or the new fixed column standards (parsed above).
+            splitline = line.split()
+            consumer.locus(splitline[1])
+            consumer.size(splitline[2])
+            consumer.residue_type(splitline[4])
+            consumer.data_file_division(splitline[5])
+            consumer.date(splitline[6])
+        elif len(line.split()) >= 4 and line.split()[3] in ["aa", "bp"]:
+            # Cope with EMBOSS seqret output where it seems the locus id can cause
+            # the other fields to overflow.  We just IGNORE the other fields!
+            warnings.warn(
+                "Malformed LOCUS line found - is this correct?\n:%r" % line,
+                BiopythonParserWarning,
+            )
+            consumer.locus(line.split()[1])
+            consumer.size(line.split()[2])
+        elif len(line.split()) >= 4 and line.split()[-1] in ["aa", "bp"]:
+            # Cope with pseudo-GenBank files like this:
+            #   "LOCUS       RNA5 complete       1718 bp"
+            # Treat everything between LOCUS and the size as the identifier.
+            warnings.warn(
+                "Malformed LOCUS line found - is this correct?\n:%r" % line,
+                BiopythonParserWarning,
+            )
+            consumer.locus(line[5:].rsplit(None, 2)[0].strip())
+            consumer.size(line.split()[-2])
+        else:
+            raise ValueError("Did not recognise the LOCUS line layout:\n" + line)
+
+    def _feed_header_lines(self, consumer, lines):
+        # Following dictionary maps GenBank lines to the associated
+        # consumer methods - the special cases like LOCUS where one
+        # genbank line triggers several consumer calls have to be
+        # handled individually.
+        consumer_dict = {
+            "DEFINITION": "definition",
+            "ACCESSION": "accession",
+            "NID": "nid",
+            "PID": "pid",
+            "DBSOURCE": "db_source",
+            "KEYWORDS": "keywords",
+            "SEGMENT": "segment",
+            "SOURCE": "source",
+            "AUTHORS": "authors",
+            "CONSRTM": "consrtm",
+            "PROJECT": "project",
+            "TITLE": "title",
+            "JOURNAL": "journal",
+            "MEDLINE": "medline_id",
+            "PUBMED": "pubmed_id",
+            "REMARK": "remark",
+        }
+        # We have to handle the following specially:
+        # ORIGIN (locus, size, residue_type, data_file_division and date)
+        # COMMENT (comment)
+        # VERSION (version and gi)
+        # DBLINK (database links like projects, newlines important)
+        # REFERENCE (eference_num and reference_bases)
+        # ORGANISM (organism and taxonomy)
+        lines = [_f for _f in lines if _f]
+        lines.append("")  # helps avoid getting StopIteration all the time
+        line_iter = iter(lines)
+        try:
+            line = next(line_iter)
+            while True:
+                if not line:
+                    break
+                line_type = line[: self.GENBANK_INDENT].strip()
+                data = line[self.GENBANK_INDENT :].strip()
+
+                if line_type == "VERSION":
+                    # Need to call consumer.version(), and maybe also consumer.gi() as well.
+                    # e.g.
+                    # VERSION     AC007323.5  GI:6587720
+                    while "  " in data:
+                        data = data.replace("  ", " ")
+                    if " GI:" not in data:
+                        consumer.version(data)
+                    else:
+                        if self.debug:
+                            print(
+                                "Version ["
+                                + data.split(" GI:")[0]
+                                + "], gi ["
+                                + data.split(" GI:")[1]
+                                + "]"
+                            )
+                        consumer.version(data.split(" GI:")[0])
+                        consumer.gi(data.split(" GI:")[1])
+                    # Read in the next line!
+                    line = next(line_iter)
+                elif line_type == "DBLINK":
+                    # Need to call consumer.dblink() for each line, e.g.
+                    # DBLINK      Project: 57779
+                    #             BioProject: PRJNA57779
+                    consumer.dblink(data.strip())
+                    # Read in the next line, and see if its more of the DBLINK section:
+                    while True:
+                        line = next(line_iter)
+                        if line[: self.GENBANK_INDENT] == self.GENBANK_SPACER:
+                            # Add this continuation to the data string
+                            consumer.dblink(line[self.GENBANK_INDENT :].strip())
+                        else:
+                            # End of the DBLINK, leave this text in the variable "line"
+                            break
+                elif line_type == "REFERENCE":
+                    if self.debug > 1:
+                        print("Found reference [" + data + "]")
+                    # Need to call consumer.reference_num() and consumer.reference_bases()
+                    # e.g.
+                    # REFERENCE   1  (bases 1 to 86436)
+                    #
+                    # Note that this can be multiline, see Bug 1968, e.g.
+                    #
+                    # REFERENCE   42 (bases 1517 to 1696; 3932 to 4112; 17880 to 17975; 21142 to
+                    #             28259)
+                    #
+                    # For such cases we will call the consumer once only.
+                    data = data.strip()
+
+                    # Read in the next line, and see if its more of the reference:
+                    while True:
+                        line = next(line_iter)
+                        if line[: self.GENBANK_INDENT] == self.GENBANK_SPACER:
+                            # Add this continuation to the data string
+                            data += " " + line[self.GENBANK_INDENT :]
+                            if self.debug > 1:
+                                print("Extended reference text [" + data + "]")
+                        else:
+                            # End of the reference, leave this text in the variable "line"
+                            break
+
+                    # We now have all the reference line(s) stored in a string, data,
+                    # which we pass to the consumer
+                    while "  " in data:
+                        data = data.replace("  ", " ")
+                    if " " not in data:
+                        if self.debug > 2:
+                            print('Reference number "' + data + '"')
+                        consumer.reference_num(data)
+                    else:
+                        if self.debug > 2:
+                            print(
+                                'Reference number "'
+                                + data[: data.find(" ")]
+                                + '", "'
+                                + data[data.find(" ") + 1 :]
+                                + '"'
+                            )
+                        consumer.reference_num(data[: data.find(" ")])
+                        consumer.reference_bases(data[data.find(" ") + 1 :])
+                elif line_type == "ORGANISM":
+                    # Typically the first line is the organism, and subsequent lines
+                    # are the taxonomy lineage.  However, given longer and longer
+                    # species names (as more and more strains and sub strains get
+                    # sequenced) the oragnism name can now get wrapped onto multiple
+                    # lines.  The NCBI say we have to recognise the lineage line by
+                    # the presence of semi-colon delimited entries.  In the long term,
+                    # they are considering adding a new keyword (e.g. LINEAGE).
+                    # See Bug 2591 for details.
+                    organism_data = data
+                    lineage_data = ""
+                    while True:
+                        line = next(line_iter)
+                        if line[0 : self.GENBANK_INDENT] == self.GENBANK_SPACER:
+                            if lineage_data or ";" in line:
+                                lineage_data += " " + line[self.GENBANK_INDENT :]
+                            elif line[self.GENBANK_INDENT :].strip() == ".":
+                                # No lineage data, just . place holder
+                                pass
+                            else:
+                                organism_data += (
+                                    " " + line[self.GENBANK_INDENT :].strip()
+                                )
+                        else:
+                            # End of organism and taxonomy
+                            break
+                    consumer.organism(organism_data)
+                    if lineage_data.strip() == "" and self.debug > 1:
+                        print("Taxonomy line(s) missing or blank")
+                    consumer.taxonomy(lineage_data.strip())
+                    del organism_data, lineage_data
+                elif line_type == "COMMENT":
+                    # A COMMENT can either be plain text or tabular (Structured Comment),
+                    # or contain both. Multi-line comments are common. The code calls
+                    # consumer.comment() once with a list where each entry
+                    # is a line. If there's a structured comment consumer.structured_comment()
+                    # is called with a dict of dicts where the secondary key/value pairs are
+                    # the same as those in the structured comment table. The primary key is
+                    # the title or header of the table (e.g. Assembly-Data, FluData). See
+                    # http://www.ncbi.nlm.nih.gov/genbank/structuredcomment
+                    # for more information on Structured Comments.
+                    data = line[self.GENBANK_INDENT :]
+                    if self.debug > 1:
+                        print("Found comment")
+                    comment_list = []
+                    structured_comment_dict = OrderedDict()
+                    regex = fr"([^#]+){self.STRUCTURED_COMMENT_START}$"
+                    structured_comment_key = re.search(regex, data)
+                    if structured_comment_key is not None:
+                        structured_comment_key = structured_comment_key.group(1)
+                        if self.debug > 1:
+                            print("Found Structured Comment")
+                    else:
+                        comment_list.append(data)
+
+                    while True:
+                        line = next(line_iter)
+                        data = line[self.GENBANK_INDENT :]
+                        if line[0 : self.GENBANK_INDENT] == self.GENBANK_SPACER:
+                            if self.STRUCTURED_COMMENT_START in data:
+                                regex = r"([^#]+){}$".format(
+                                    self.STRUCTURED_COMMENT_START
+                                )
+                                structured_comment_key = re.search(regex, data)
+                                if structured_comment_key is not None:
+                                    structured_comment_key = structured_comment_key.group(
+                                        1
+                                    )
+                                else:
+                                    comment_list.append(data)
+                            elif (
+                                structured_comment_key is not None
+                                and self.STRUCTURED_COMMENT_DELIM in data
+                            ):
+                                match = re.search(
+                                    r"(.+?)\s*{}\s*(.+)".format(
+                                        self.STRUCTURED_COMMENT_DELIM
+                                    ),
+                                    data,
+                                )
+                                structured_comment_dict.setdefault(
+                                    structured_comment_key, OrderedDict()
+                                )
+                                structured_comment_dict[structured_comment_key][
+                                    match.group(1)
+                                ] = match.group(2)
+                                if self.debug > 2:
+                                    print(
+                                        "Structured Comment continuation [" + data + "]"
+                                    )
+                            elif (
+                                structured_comment_key is not None
+                                and self.STRUCTURED_COMMENT_END not in data
+                            ):
+                                # Don't die on a malformed comment, just warn and carry on
+                                if (
+                                    structured_comment_key
+                                    not in structured_comment_dict
+                                ):
+                                    warnings.warn(
+                                        "Structured comment not parsed for %s. Is it malformed?"
+                                        % consumer.data.name,
+                                        BiopythonParserWarning,
+                                    )
+                                    continue
+
+                                # The current structured comment has a multiline value
+                                previous_value_line = structured_comment_dict[
+                                    structured_comment_key
+                                ][match.group(1)]
+                                structured_comment_dict[structured_comment_key][
+                                    match.group(1)
+                                ] = (previous_value_line + " " + line.strip())
+                            elif self.STRUCTURED_COMMENT_END in data:
+                                # End of structured comment
+                                structured_comment_key = None
+                            else:
+                                comment_list.append(data)
+                                if self.debug > 2:
+                                    print("Comment continuation [" + data + "]")
+                        else:
+                            # End of the comment
+                            break
+                    if comment_list:
+                        consumer.comment(comment_list)
+                    if structured_comment_dict:
+                        consumer.structured_comment(structured_comment_dict)
+                    del comment_list, structured_comment_key, structured_comment_dict
+                elif line_type in consumer_dict:
+                    # It's a semi-automatic entry!
+                    # Now, this may be a multi line entry...
+                    while True:
+                        line = next(line_iter)
+                        if line[0 : self.GENBANK_INDENT] == self.GENBANK_SPACER:
+                            data += " " + line[self.GENBANK_INDENT :]
+                        else:
+                            # We now have all the data for this entry:
+
+                            # The DEFINITION field must ends with a period
+                            # # see ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt [3.4.5]
+                            # and discussion https://github.com/biopython/biopython/pull/616
+                            # We consider this period belong to the syntax, not to the data
+                            # So remove it if it exist
+                            if line_type == "DEFINITION" and data.endswith("."):
+                                data = data[:-1]
+                            getattr(consumer, consumer_dict[line_type])(data)
+                            # End of continuation - return to top of loop!
+                            break
+                else:
+                    if self.debug:
+                        print("Ignoring GenBank header line:\n" % line)
+                    # Read in next line
+                    line = next(line_iter)
+        except StopIteration:
+            raise ValueError("Problem in header") from None
+
+    def _feed_misc_lines(self, consumer, lines):
+        # Deals with a few misc lines between the features and the sequence
+        lines.append("")
+        line_iter = iter(lines)
+        try:
+            for line in line_iter:
+                if line.startswith("BASE COUNT"):
+                    line = line[10:].strip()
+                    if line:
+                        if self.debug:
+                            print("base_count = " + line)
+                        consumer.base_count(line)
+                if line.startswith("ORIGIN"):
+                    line = line[6:].strip()
+                    if line:
+                        if self.debug:
+                            print("origin_name = " + line)
+                        consumer.origin_name(line)
+                if line.startswith("TLS "):
+                    line = line[3:].strip()
+                    consumer.tls(line)
+                if line.startswith("TSA "):
+                    line = line[3:].strip()
+                    consumer.tsa(line)
+                if line.startswith("WGS "):
+                    line = line[3:].strip()
+                    consumer.wgs(line)
+                if line.startswith("WGS_SCAFLD"):
+                    line = line[10:].strip()
+                    consumer.add_wgs_scafld(line)
+                if line.startswith("CONTIG"):
+                    line = line[6:].strip()
+                    contig_location = line
+                    while True:
+                        line = next(line_iter)
+                        if not line:
+                            break
+                        elif line[: self.GENBANK_INDENT] == self.GENBANK_SPACER:
+                            # Don't need to preseve the whitespace here.
+                            contig_location += line[self.GENBANK_INDENT :].rstrip()
+                        elif line.startswith("ORIGIN"):
+                            # Strange, seen this in GenPept files via Entrez gbwithparts
+                            line = line[6:].strip()
+                            if line:
+                                consumer.origin_name(line)
+                            break
+                        else:
+                            raise ValueError(
+                                "Expected CONTIG continuation line, got:\n" + line
+                            )
+                    consumer.contig_location(contig_location)
+            return
+        except StopIteration:
+            raise ValueError("Problem in misc lines before sequence") from None
diff --git a/code/lib/Bio/GenBank/__init__.py b/code/lib/Bio/GenBank/__init__.py
new file mode 100644
index 0000000..1875116
--- /dev/null
+++ b/code/lib/Bio/GenBank/__init__.py
@@ -0,0 +1,1746 @@
+# Copyright 2000 by Jeffrey Chang, Brad Chapman.  All rights reserved.
+# Copyright 2006-2017 by Peter Cock.  All rights reserved.
+#
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Code to work with GenBank formatted files.
+
+Rather than using Bio.GenBank, you are now encouraged to use Bio.SeqIO with
+the "genbank" or "embl" format names to parse GenBank or EMBL files into
+SeqRecord and SeqFeature objects (see the Biopython tutorial for details).
+
+Using Bio.GenBank directly to parse GenBank files is only useful if you want
+to obtain GenBank-specific Record objects, which is a much closer
+representation to the raw file contents than the SeqRecord alternative from
+the FeatureParser (used in Bio.SeqIO).
+
+To use the Bio.GenBank parser, there are two helper functions:
+
+    - read                  Parse a handle containing a single GenBank record
+      as Bio.GenBank specific Record objects.
+    - parse                 Iterate over a handle containing multiple GenBank
+      records as Bio.GenBank specific Record objects.
+
+The following internal classes are not intended for direct use and may
+be deprecated in a future release.
+
+Classes:
+ - Iterator              Iterate through a file of GenBank entries
+ - ErrorFeatureParser    Catch errors caused during parsing.
+ - FeatureParser         Parse GenBank data in SeqRecord and SeqFeature objects.
+ - RecordParser          Parse GenBank data into a Record object.
+
+Exceptions:
+ - ParserFailureError    Exception indicating a failure in the parser (ie.
+   scanner or consumer)
+ - LocationParserError   Exception indicating a problem with the spark based
+   location parser.
+
+"""
+
+import re
+import warnings
+
+from Bio import BiopythonParserWarning
+from Bio.Seq import Seq
+from Bio import SeqFeature
+
+# other Bio.GenBank stuff
+from .utils import FeatureValueCleaner
+from .Scanner import GenBankScanner
+
+
+# Constants used to parse GenBank header lines
+GENBANK_INDENT = 12
+GENBANK_SPACER = " " * GENBANK_INDENT
+
+# Constants for parsing GenBank feature lines
+FEATURE_KEY_INDENT = 5
+FEATURE_QUALIFIER_INDENT = 21
+FEATURE_KEY_SPACER = " " * FEATURE_KEY_INDENT
+FEATURE_QUALIFIER_SPACER = " " * FEATURE_QUALIFIER_INDENT
+
+# Regular expressions for location parsing
+_solo_location = r"[<>]?\d+"
+_pair_location = r"[<>]?\d+\.\.[<>]?\d+"
+_between_location = r"\d+\^\d+"
+
+_within_position = r"\(\d+\.\d+\)"
+_re_within_position = re.compile(_within_position)
+_within_location = r"([<>]?\d+|%s)\.\.([<>]?\d+|%s)" % (
+    _within_position,
+    _within_position,
+)
+assert _re_within_position.match("(3.9)")
+assert re.compile(_within_location).match("(3.9)..10")
+assert re.compile(_within_location).match("26..(30.33)")
+assert re.compile(_within_location).match("(13.19)..(20.28)")
+
+_oneof_position = r"one\-of\(\d+(,\d+)+\)"
+_re_oneof_position = re.compile(_oneof_position)
+_oneof_location = r"([<>]?\d+|%s)\.\.([<>]?\d+|%s)" % (_oneof_position, _oneof_position)
+assert _re_oneof_position.match("one-of(6,9)")
+assert re.compile(_oneof_location).match("one-of(6,9)..101")
+assert re.compile(_oneof_location).match("one-of(6,9)..one-of(101,104)")
+assert re.compile(_oneof_location).match("6..one-of(101,104)")
+
+assert not _re_oneof_position.match("one-of(3)")
+assert _re_oneof_position.match("one-of(3,6)")
+assert _re_oneof_position.match("one-of(3,6,9)")
+
+
+_simple_location = r"\d+\.\.\d+"
+_re_simple_location = re.compile(r"^%s$" % _simple_location)
+_re_simple_compound = re.compile(
+    r"^(join|order|bond)\(%s(,%s)*\)$" % (_simple_location, _simple_location)
+)
+_complex_location = r"([a-zA-Z][a-zA-Z0-9_\.\|]*[a-zA-Z0-9]?\:)?(%s|%s|%s|%s|%s)" % (
+    _pair_location,
+    _solo_location,
+    _between_location,
+    _within_location,
+    _oneof_location,
+)
+_re_complex_location = re.compile(r"^%s$" % _complex_location)
+_possibly_complemented_complex_location = r"(%s|complement\(%s\))" % (
+    _complex_location,
+    _complex_location,
+)
+_re_complex_compound = re.compile(
+    r"^(join|order|bond)\(%s(,%s)*\)$"
+    % (_possibly_complemented_complex_location, _possibly_complemented_complex_location)
+)
+
+
+assert _re_simple_location.match("104..160")
+assert not _re_simple_location.match("68451760..68452073^68452074")
+assert not _re_simple_location.match("<104..>160")
+assert not _re_simple_location.match("104")
+assert not _re_simple_location.match("<1")
+assert not _re_simple_location.match(">99999")
+assert not _re_simple_location.match("join(104..160,320..390,504..579)")
+assert not _re_simple_compound.match("bond(12,63)")
+assert _re_simple_compound.match("join(104..160,320..390,504..579)")
+assert _re_simple_compound.match("order(1..69,1308..1465)")
+assert not _re_simple_compound.match("order(1..69,1308..1465,1524)")
+assert not _re_simple_compound.match("join(<1..442,992..1228,1524..>1983)")
+assert not _re_simple_compound.match("join(<1..181,254..336,422..497,574..>590)")
+assert not _re_simple_compound.match(
+    "join(1475..1577,2841..2986,3074..3193,3314..3481,4126..>4215)"
+)
+assert not _re_simple_compound.match("test(1..69,1308..1465)")
+assert not _re_simple_compound.match("complement(1..69)")
+assert not _re_simple_compound.match("(1..69)")
+assert _re_complex_location.match("(3.9)..10")
+assert _re_complex_location.match("26..(30.33)")
+assert _re_complex_location.match("(13.19)..(20.28)")
+assert _re_complex_location.match("41^42")  # between
+assert _re_complex_location.match("AL121804:41^42")
+assert _re_complex_location.match("AL121804:41..610")
+assert _re_complex_location.match("AL121804.2:41..610")
+assert _re_complex_location.match(
+    "AL358792.24.1.166931:3274..3461"
+)  # lots of dots in external reference
+assert _re_complex_location.match("one-of(3,6)..101")
+assert _re_complex_compound.match(
+    "join(153490..154269,AL121804.2:41..610,AL121804.2:672..1487)"
+)
+assert not _re_simple_compound.match(
+    "join(153490..154269,AL121804.2:41..610,AL121804.2:672..1487)"
+)
+assert _re_complex_compound.match("join(complement(69611..69724),139856..140650)")
+assert _re_complex_compound.match(
+    "join(complement(AL354868.10.1.164018:80837..81016),complement(AL354868.10.1.164018:80539..80835))"
+)
+
+# Trans-spliced example from NC_016406, note underscore in reference name:
+assert _re_complex_location.match("NC_016402.1:6618..6676")
+assert _re_complex_location.match("181647..181905")
+assert _re_complex_compound.match(
+    "join(complement(149815..150200),complement(293787..295573),NC_016402.1:6618..6676,181647..181905)"
+)
+assert not _re_complex_location.match(
+    "join(complement(149815..150200),complement(293787..295573),NC_016402.1:6618..6676,181647..181905)"
+)
+assert not _re_simple_compound.match(
+    "join(complement(149815..150200),complement(293787..295573),NC_016402.1:6618..6676,181647..181905)"
+)
+assert not _re_complex_location.match(
+    "join(complement(149815..150200),complement(293787..295573),NC_016402.1:6618..6676,181647..181905)"
+)
+assert not _re_simple_location.match(
+    "join(complement(149815..150200),complement(293787..295573),NC_016402.1:6618..6676,181647..181905)"
+)
+
+_solo_bond = re.compile(r"bond\(%s\)" % _solo_location)
+assert _solo_bond.match("bond(196)")
+assert _solo_bond.search("bond(196)")
+assert _solo_bond.search("join(bond(284),bond(305),bond(309),bond(305))")
+
+
+def _pos(pos_str, offset=0):
+    """Build a Position object (PRIVATE).
+
+    For an end position, leave offset as zero (default):
+
+    >>> _pos("5")
+    ExactPosition(5)
+
+    For a start position, set offset to minus one (for Python counting):
+
+    >>> _pos("5", -1)
+    ExactPosition(4)
+
+    This also covers fuzzy positions:
+
+    >>> p = _pos("<5")
+    >>> p
+    BeforePosition(5)
+    >>> print(p)
+    <5
+    >>> int(p)
+    5
+
+    >>> _pos(">5")
+    AfterPosition(5)
+
+    By default assumes an end position, so note the integer behaviour:
+
+    >>> p = _pos("one-of(5,8,11)")
+    >>> p
+    OneOfPosition(11, choices=[ExactPosition(5), ExactPosition(8), ExactPosition(11)])
+    >>> print(p)
+    one-of(5,8,11)
+    >>> int(p)
+    11
+
+    >>> _pos("(8.10)")
+    WithinPosition(10, left=8, right=10)
+
+    Fuzzy start positions:
+
+    >>> p = _pos("<5", -1)
+    >>> p
+    BeforePosition(4)
+    >>> print(p)
+    <4
+    >>> int(p)
+    4
+
+    Notice how the integer behaviour changes too!
+
+    >>> p = _pos("one-of(5,8,11)", -1)
+    >>> p
+    OneOfPosition(4, choices=[ExactPosition(4), ExactPosition(7), ExactPosition(10)])
+    >>> print(p)
+    one-of(4,7,10)
+    >>> int(p)
+    4
+
+    """
+    if pos_str.startswith("<"):
+        return SeqFeature.BeforePosition(int(pos_str[1:]) + offset)
+    elif pos_str.startswith(">"):
+        return SeqFeature.AfterPosition(int(pos_str[1:]) + offset)
+    elif _re_within_position.match(pos_str):
+        s, e = pos_str[1:-1].split(".")
+        s = int(s) + offset
+        e = int(e) + offset
+        if offset == -1:
+            default = s
+        else:
+            default = e
+        return SeqFeature.WithinPosition(default, left=s, right=e)
+    elif _re_oneof_position.match(pos_str):
+        assert pos_str.startswith("one-of(")
+        assert pos_str[-1] == ")"
+        parts = [
+            SeqFeature.ExactPosition(int(pos) + offset)
+            for pos in pos_str[7:-1].split(",")
+        ]
+        if offset == -1:
+            default = min(int(pos) for pos in parts)
+        else:
+            default = max(int(pos) for pos in parts)
+        return SeqFeature.OneOfPosition(default, choices=parts)
+    else:
+        return SeqFeature.ExactPosition(int(pos_str) + offset)
+
+
+def _loc(loc_str, expected_seq_length, strand, seq_type=None):
+    """Make FeatureLocation from non-compound non-complement location (PRIVATE).
+
+    This is also invoked to 'automatically' fix ambiguous formatting of features
+    that span the origin of a circular sequence.
+
+    Simple examples,
+
+    >>> _loc("123..456", 1000, +1)
+    FeatureLocation(ExactPosition(122), ExactPosition(456), strand=1)
+    >>> _loc("<123..>456", 1000, strand = -1)
+    FeatureLocation(BeforePosition(122), AfterPosition(456), strand=-1)
+
+    A more complex location using within positions,
+
+    >>> _loc("(9.10)..(20.25)", 1000, 1)
+    FeatureLocation(WithinPosition(8, left=8, right=9), WithinPosition(25, left=20, right=25), strand=1)
+
+    Notice how that will act as though it has overall start 8 and end 25.
+
+    Zero length between feature,
+
+    >>> _loc("123^124", 1000, 0)
+    FeatureLocation(ExactPosition(123), ExactPosition(123), strand=0)
+
+    The expected sequence length is needed for a special case, a between
+    position at the start/end of a circular genome:
+
+    >>> _loc("1000^1", 1000, 1)
+    FeatureLocation(ExactPosition(1000), ExactPosition(1000), strand=1)
+
+    Apart from this special case, between positions P^Q must have P+1==Q,
+
+    >>> _loc("123^456", 1000, 1)
+    Traceback (most recent call last):
+       ...
+    ValueError: Invalid between location '123^456'
+
+    You can optionally provide a reference name:
+
+    >>> _loc("AL391218.9:105173..108462", 2000000, 1)
+    FeatureLocation(ExactPosition(105172), ExactPosition(108462), strand=1, ref='AL391218.9')
+
+    >>> _loc("<2644..159", 2868, 1, "circular")
+    CompoundLocation([FeatureLocation(BeforePosition(2643), ExactPosition(2868), strand=1), FeatureLocation(ExactPosition(0), ExactPosition(159), strand=1)], 'join')
+    """
+    if ":" in loc_str:
+        ref, loc_str = loc_str.split(":")
+    else:
+        ref = None
+    try:
+        s, e = loc_str.split("..")
+    except ValueError:
+        assert ".." not in loc_str
+        if "^" in loc_str:
+            # A between location like "67^68" (one based counting) is a
+            # special case (note it has zero length). In python slice
+            # notation this is 67:67, a zero length slice.  See Bug 2622
+            # Further more, on a circular genome of length N you can have
+            # a location N^1 meaning the junction at the origin. See Bug 3098.
+            # NOTE - We can imagine between locations like "2^4", but this
+            # is just "3".  Similarly, "2^5" is just "3..4"
+            s, e = loc_str.split("^")
+            if int(s) + 1 == int(e):
+                pos = _pos(s)
+            elif int(s) == expected_seq_length and e == "1":
+                pos = _pos(s)
+            else:
+                raise ValueError("Invalid between location %r" % loc_str) from None
+            return SeqFeature.FeatureLocation(pos, pos, strand, ref=ref)
+        else:
+            # e.g. "123"
+            s = loc_str
+            e = loc_str
+
+    # Attempt to fix features that span the origin
+    s_pos = _pos(s, -1)
+    e_pos = _pos(e)
+    if int(s_pos) > int(e_pos):
+        if seq_type is None or "circular" not in seq_type.lower():
+            warnings.warn(
+                "It appears that %r is a feature that spans "
+                "the origin, but the sequence topology is "
+                "undefined. Skipping feature." % loc_str,
+                BiopythonParserWarning,
+            )
+            return None
+        warnings.warn(
+            "Attempting to fix invalid location %r as "
+            "it looks like incorrect origin wrapping. "
+            "Please fix input file, this could have "
+            "unintended behavior." % loc_str,
+            BiopythonParserWarning,
+        )
+
+        f1 = SeqFeature.FeatureLocation(s_pos, expected_seq_length, strand)
+        f2 = SeqFeature.FeatureLocation(0, int(e_pos), strand)
+
+        if strand == -1:
+            # For complementary features spanning the origin
+            return f2 + f1
+        else:
+            return f1 + f2
+
+    return SeqFeature.FeatureLocation(_pos(s, -1), _pos(e), strand, ref=ref)
+
+
+def _split_compound_loc(compound_loc):
+    """Split a tricky compound location string (PRIVATE).
+
+    >>> list(_split_compound_loc("123..145"))
+    ['123..145']
+    >>> list(_split_compound_loc("123..145,200..209"))
+    ['123..145', '200..209']
+    >>> list(_split_compound_loc("one-of(200,203)..300"))
+    ['one-of(200,203)..300']
+    >>> list(_split_compound_loc("complement(123..145),200..209"))
+    ['complement(123..145)', '200..209']
+    >>> list(_split_compound_loc("123..145,one-of(200,203)..209"))
+    ['123..145', 'one-of(200,203)..209']
+    >>> list(_split_compound_loc("123..145,one-of(200,203)..one-of(209,211),300"))
+    ['123..145', 'one-of(200,203)..one-of(209,211)', '300']
+    >>> list(_split_compound_loc("123..145,complement(one-of(200,203)..one-of(209,211)),300"))
+    ['123..145', 'complement(one-of(200,203)..one-of(209,211))', '300']
+    >>> list(_split_compound_loc("123..145,200..one-of(209,211),300"))
+    ['123..145', '200..one-of(209,211)', '300']
+    >>> list(_split_compound_loc("123..145,200..one-of(209,211)"))
+    ['123..145', '200..one-of(209,211)']
+    >>> list(_split_compound_loc("complement(149815..150200),complement(293787..295573),NC_016402.1:6618..6676,181647..181905"))
+    ['complement(149815..150200)', 'complement(293787..295573)', 'NC_016402.1:6618..6676', '181647..181905']
+    """
+    if "one-of(" in compound_loc:
+        # Hard case
+        while "," in compound_loc:
+            assert compound_loc[0] != ","
+            assert compound_loc[0:2] != ".."
+            i = compound_loc.find(",")
+            part = compound_loc[:i]
+            compound_loc = compound_loc[i:]  # includes the comma
+            while part.count("(") > part.count(")"):
+                assert "one-of(" in part, (part, compound_loc)
+                i = compound_loc.find(")")
+                part += compound_loc[: i + 1]
+                compound_loc = compound_loc[i + 1 :]
+            if compound_loc.startswith(".."):
+                i = compound_loc.find(",")
+                if i == -1:
+                    part += compound_loc
+                    compound_loc = ""
+                else:
+                    part += compound_loc[:i]
+                    compound_loc = compound_loc[i:]  # includes the comma
+            while part.count("(") > part.count(")"):
+                assert part.count("one-of(") == 2
+                i = compound_loc.find(")")
+                part += compound_loc[: i + 1]
+                compound_loc = compound_loc[i + 1 :]
+            if compound_loc.startswith(","):
+                compound_loc = compound_loc[1:]
+            assert part
+            yield part
+        if compound_loc:
+            yield compound_loc
+    else:
+        # Easy case
+        yield from compound_loc.split(",")
+
+
+class Iterator:
+    """Iterator interface to move over a file of GenBank entries one at a time (OBSOLETE).
+
+    This class is likely to be deprecated in a future release of Biopython.
+    Please use Bio.SeqIO.parse(..., format="gb") or Bio.GenBank.parse(...)
+    for SeqRecord and GenBank specific Record objects respectively instead.
+    """
+
+    def __init__(self, handle, parser=None):
+        """Initialize the iterator.
+
+        Arguments:
+         - handle - A handle with GenBank entries to iterate through.
+         - parser - An optional parser to pass the entries through before
+           returning them. If None, then the raw entry will be returned.
+
+        """
+        self.handle = handle
+        self._parser = parser
+
+    def __next__(self):
+        """Return the next GenBank record from the handle.
+
+        Will return None if we ran out of records.
+        """
+        if self._parser is None:
+            lines = []
+            while True:
+                line = self.handle.readline()
+                if not line:
+                    return None  # Premature end of file?
+                lines.append(line)
+                if line.rstrip() == "//":
+                    break
+            return "".join(lines)
+        try:
+            return self._parser.parse(self.handle)
+        except StopIteration:
+            return None
+
+    def __iter__(self):
+        """Iterate over the records."""
+        return iter(self.__next__, None)
+
+
+class ParserFailureError(Exception):
+    """Failure caused by some kind of problem in the parser."""
+
+    pass
+
+
+class LocationParserError(Exception):
+    """Could not Properly parse out a location from a GenBank file."""
+
+    pass
+
+
+_cleaner = FeatureValueCleaner()
+
+
+class FeatureParser:
+    """Parse GenBank files into Seq + Feature objects (OBSOLETE).
+
+    Direct use of this class is discouraged, and may be deprecated in
+    a future release of Biopython.
+
+    Please use Bio.SeqIO.parse(...) or Bio.SeqIO.read(...) instead.
+    """
+
+    def __init__(self, debug_level=0, use_fuzziness=1, feature_cleaner=None):
+        """Initialize a GenBank parser and Feature consumer.
+
+        Arguments:
+         - debug_level - An optional argument that species the amount of
+           debugging information the parser should spit out. By default we have
+           no debugging info (the fastest way to do things), but if you want
+           you can set this as high as two and see exactly where a parse fails.
+         - use_fuzziness - Specify whether or not to use fuzzy representations.
+           The default is 1 (use fuzziness).
+         - feature_cleaner - A class which will be used to clean out the
+           values of features. This class must implement the function
+           clean_value. GenBank.utils has a "standard" cleaner class, which
+           is used by default.
+
+        """
+        self._scanner = GenBankScanner(debug_level)
+        self.use_fuzziness = use_fuzziness
+        if feature_cleaner:
+            self._cleaner = feature_cleaner
+        else:
+            self._cleaner = _cleaner  # default
+
+    def parse(self, handle):
+        """Parse the specified handle."""
+        _consumer = _FeatureConsumer(self.use_fuzziness, self._cleaner)
+        self._scanner.feed(handle, _consumer)
+        return _consumer.data
+
+
+class RecordParser:
+    """Parse GenBank files into Record objects (OBSOLETE).
+
+    Direct use of this class is discouraged, and may be deprecated in
+    a future release of Biopython.
+
+    Please use the Bio.GenBank.parse(...) or Bio.GenBank.read(...) functions
+    instead.
+    """
+
+    def __init__(self, debug_level=0):
+        """Initialize the parser.
+
+        Arguments:
+         - debug_level - An optional argument that species the amount of
+           debugging information the parser should spit out. By default we have
+           no debugging info (the fastest way to do things), but if you want
+           you can set this as high as two and see exactly where a parse fails.
+
+        """
+        self._scanner = GenBankScanner(debug_level)
+
+    def parse(self, handle):
+        """Parse the specified handle into a GenBank record."""
+        _consumer = _RecordConsumer()
+
+        self._scanner.feed(handle, _consumer)
+        return _consumer.data
+
+
+class _BaseGenBankConsumer:
+    """Abstract GenBank consumer providing useful general functions (PRIVATE).
+
+    This just helps to eliminate some duplication in things that most
+    GenBank consumers want to do.
+    """
+
+    # Special keys in GenBank records that we should remove spaces from
+    # For instance, \translation keys have values which are proteins and
+    # should have spaces and newlines removed from them. This class
+    # attribute gives us more control over specific formatting problems.
+    remove_space_keys = ["translation"]
+
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def _split_keywords(keyword_string):
+        """Split a string of keywords into a nice clean list (PRIVATE)."""
+        # process the keywords into a python list
+        if keyword_string == "" or keyword_string == ".":
+            keywords = ""
+        elif keyword_string[-1] == ".":
+            keywords = keyword_string[:-1]
+        else:
+            keywords = keyword_string
+        keyword_list = keywords.split(";")
+        return [x.strip() for x in keyword_list]
+
+    @staticmethod
+    def _split_accessions(accession_string):
+        """Split a string of accession numbers into a list (PRIVATE)."""
+        # first replace all line feeds with spaces
+        # Also, EMBL style accessions are split with ';'
+        accession = accession_string.replace("\n", " ").replace(";", " ")
+
+        return [x.strip() for x in accession.split() if x.strip()]
+
+    @staticmethod
+    def _split_taxonomy(taxonomy_string):
+        """Split a string with taxonomy info into a list (PRIVATE)."""
+        if not taxonomy_string or taxonomy_string == ".":
+            # Missing data, no taxonomy
+            return []
+
+        if taxonomy_string[-1] == ".":
+            tax_info = taxonomy_string[:-1]
+        else:
+            tax_info = taxonomy_string
+        tax_list = tax_info.split(";")
+        new_tax_list = []
+        for tax_item in tax_list:
+            new_items = tax_item.split("\n")
+            new_tax_list.extend(new_items)
+        while "" in new_tax_list:
+            new_tax_list.remove("")
+        return [x.strip() for x in new_tax_list]
+
+    @staticmethod
+    def _clean_location(location_string):
+        """Clean whitespace out of a location string (PRIVATE).
+
+        The location parser isn't a fan of whitespace, so we clean it out
+        before feeding it into the parser.
+        """
+        # Originally this imported string.whitespace and did a replace
+        # via a loop.  It's simpler to just split on whitespace and rejoin
+        # the string - and this avoids importing string too.  See Bug 2684.
+        return "".join(location_string.split())
+
+    @staticmethod
+    def _remove_newlines(text):
+        """Remove any newlines in the passed text, returning the new string (PRIVATE)."""
+        # get rid of newlines in the qualifier value
+        newlines = ["\n", "\r"]
+        for ws in newlines:
+            text = text.replace(ws, "")
+
+        return text
+
+    @staticmethod
+    def _normalize_spaces(text):
+        """Replace multiple spaces in the passed text with single spaces (PRIVATE)."""
+        # get rid of excessive spaces
+        return " ".join(x for x in text.split(" ") if x)
+
+    @staticmethod
+    def _remove_spaces(text):
+        """Remove all spaces from the passed text (PRIVATE)."""
+        return text.replace(" ", "")
+
+    @staticmethod
+    def _convert_to_python_numbers(start, end):
+        """Convert a start and end range to python notation (PRIVATE).
+
+        In GenBank, starts and ends are defined in "biological" coordinates,
+        where 1 is the first base and [i, j] means to include both i and j.
+
+        In python, 0 is the first base and [i, j] means to include i, but
+        not j.
+
+        So, to convert "biological" to python coordinates, we need to
+        subtract 1 from the start, and leave the end and things should
+        be converted happily.
+        """
+        new_start = start - 1
+        new_end = end
+
+        return new_start, new_end
+
+
+class _FeatureConsumer(_BaseGenBankConsumer):
+    """Create a SeqRecord object with Features to return (PRIVATE).
+
+    Attributes:
+     - use_fuzziness - specify whether or not to parse with fuzziness in
+       feature locations.
+     - feature_cleaner - a class that will be used to provide specialized
+       cleaning-up of feature values.
+
+    """
+
+    def __init__(self, use_fuzziness, feature_cleaner=None):
+        from Bio.SeqRecord import SeqRecord
+
+        _BaseGenBankConsumer.__init__(self)
+        self.data = SeqRecord(None, id=None)
+        self.data.id = None
+        self.data.description = ""
+
+        self._use_fuzziness = use_fuzziness
+        self._feature_cleaner = feature_cleaner
+
+        self._seq_type = ""
+        self._seq_data = []
+        self._cur_reference = None
+        self._cur_feature = None
+        self._expected_size = None
+
+    def locus(self, locus_name):
+        """Set the locus name is set as the name of the Sequence."""
+        self.data.name = locus_name
+
+    def size(self, content):
+        """Record the sequence length."""
+        self._expected_size = int(content)
+
+    def residue_type(self, type):
+        """Record the sequence type (SEMI-OBSOLETE).
+
+        This reflects the fact that the topology (linear/circular) and
+        molecule type (e.g. DNA vs RNA) were a single field in early
+        files. Current GenBank/EMBL files have two fields.
+        """
+        self._seq_type = type.strip()
+
+    def topology(self, topology):
+        """Validate and record sequence topology.
+
+        The topology argument should be "linear" or "circular" (string).
+        """
+        if topology:
+            if topology not in ["linear", "circular"]:
+                raise ParserFailureError(
+                    "Unexpected topology %r should be linear or circular" % topology
+                )
+            self.data.annotations["topology"] = topology
+
+    def molecule_type(self, mol_type):
+        """Validate and record the molecule type (for round-trip etc)."""
+        if mol_type:
+            if "circular" in mol_type or "linear" in mol_type:
+                raise ParserFailureError(
+                    "Molecule type %r should not include topology" % mol_type
+                )
+
+            # Writing out records will fail if we have a lower case DNA
+            # or RNA string in here, so upper case it.
+            # This is a bit ugly, but we don't want to upper case e.g.
+            # the m in mRNA, but thanks to the strip we lost the spaces
+            # so we need to index from the back
+            if mol_type[-3:].upper() in ("DNA", "RNA") and not mol_type[-3:].isupper():
+                warnings.warn(
+                    "Non-upper case molecule type in LOCUS line: %s" % mol_type,
+                    BiopythonParserWarning,
+                )
+
+            self.data.annotations["molecule_type"] = mol_type
+
+    def data_file_division(self, division):
+        self.data.annotations["data_file_division"] = division
+
+    def date(self, submit_date):
+        self.data.annotations["date"] = submit_date
+
+    def definition(self, definition):
+        """Set the definition as the description of the sequence."""
+        if self.data.description:
+            # Append to any existing description
+            # e.g. EMBL files with two DE lines.
+            self.data.description += " " + definition
+        else:
+            self.data.description = definition
+
+    def accession(self, acc_num):
+        """Set the accession number as the id of the sequence.
+
+        If we have multiple accession numbers, the first one passed is
+        used.
+        """
+        new_acc_nums = self._split_accessions(acc_num)
+
+        # Also record them ALL in the annotations
+        try:
+            # On the off chance there was more than one accession line:
+            for acc in new_acc_nums:
+                # Prevent repeat entries
+                if acc not in self.data.annotations["accessions"]:
+                    self.data.annotations["accessions"].append(acc)
+        except KeyError:
+            self.data.annotations["accessions"] = new_acc_nums
+
+        # if we haven't set the id information yet, add the first acc num
+        if not self.data.id:
+            if len(new_acc_nums) > 0:
+                # self.data.id = new_acc_nums[0]
+                # Use the FIRST accession as the ID, not the first on this line!
+                self.data.id = self.data.annotations["accessions"][0]
+
+    def tls(self, content):
+        self.data.annotations["tls"] = content.split("-")
+
+    def tsa(self, content):
+        self.data.annotations["tsa"] = content.split("-")
+
+    def wgs(self, content):
+        self.data.annotations["wgs"] = content.split("-")
+
+    def add_wgs_scafld(self, content):
+        self.data.annotations.setdefault("wgs_scafld", []).append(content.split("-"))
+
+    def nid(self, content):
+        self.data.annotations["nid"] = content
+
+    def pid(self, content):
+        self.data.annotations["pid"] = content
+
+    def version(self, version_id):
+        # Want to use the versioned accession as the record.id
+        # This comes from the VERSION line in GenBank files, or the
+        # obsolete SV line in EMBL.  For the new EMBL files we need
+        # both the version suffix from the ID line and the accession
+        # from the AC line.
+        if version_id.count(".") == 1 and version_id.split(".")[1].isdigit():
+            self.accession(version_id.split(".")[0])
+            self.version_suffix(version_id.split(".")[1])
+        elif version_id:
+            # For backwards compatibility...
+            self.data.id = version_id
+
+    def project(self, content):
+        """Handle the information from the PROJECT line as a list of projects.
+
+        e.g.::
+
+            PROJECT     GenomeProject:28471
+
+        or::
+
+            PROJECT     GenomeProject:13543  GenomeProject:99999
+
+        This is stored as dbxrefs in the SeqRecord to be consistent with the
+        projected switch of this line to DBLINK in future GenBank versions.
+        Note the NCBI plan to replace "GenomeProject:28471" with the shorter
+        "Project:28471" as part of this transition.
+        """
+        content = content.replace("GenomeProject:", "Project:")
+        self.data.dbxrefs.extend(p for p in content.split() if p)
+
+    def dblink(self, content):
+        """Store DBLINK cross references as dbxrefs in our record object.
+
+        This line type is expected to replace the PROJECT line in 2009. e.g.
+
+        During transition::
+
+            PROJECT     GenomeProject:28471
+            DBLINK      Project:28471
+                        Trace Assembly Archive:123456
+
+        Once the project line is dropped::
+
+            DBLINK      Project:28471
+                        Trace Assembly Archive:123456
+
+        Note GenomeProject -> Project.
+
+        We'll have to see some real examples to be sure, but based on the
+        above example we can expect one reference per line.
+
+        Note that at some point the NCBI have included an extra space, e.g.::
+
+            DBLINK      Project: 28471
+
+        """
+        # During the transition period with both PROJECT and DBLINK lines,
+        # we don't want to add the same cross reference twice.
+        while ": " in content:
+            content = content.replace(": ", ":")
+        if content.strip() not in self.data.dbxrefs:
+            self.data.dbxrefs.append(content.strip())
+
+    def version_suffix(self, version):
+        """Set the version to overwrite the id.
+
+        Since the version provides the same information as the accession
+        number, plus some extra info, we set this as the id if we have
+        a version.
+        """
+        # e.g. GenBank line:
+        # VERSION     U49845.1  GI:1293613
+        # or the obsolete EMBL line:
+        # SV   U49845.1
+        # Scanner calls consumer.version("U49845.1")
+        # which then calls consumer.version_suffix(1)
+        #
+        # e.g. EMBL new line:
+        # ID   X56734; SV 1; linear; mRNA; STD; PLN; 1859 BP.
+        # Scanner calls consumer.version_suffix(1)
+        assert version.isdigit()
+        self.data.annotations["sequence_version"] = int(version)
+
+    def db_source(self, content):
+        self.data.annotations["db_source"] = content.rstrip()
+
+    def gi(self, content):
+        self.data.annotations["gi"] = content
+
+    def keywords(self, content):
+        if "keywords" in self.data.annotations:
+            # Multi-line keywords, append to list
+            # Note EMBL states "A keyword is never split between lines."
+            self.data.annotations["keywords"].extend(self._split_keywords(content))
+        else:
+            self.data.annotations["keywords"] = self._split_keywords(content)
+
+    def segment(self, content):
+        self.data.annotations["segment"] = content
+
+    def source(self, content):
+        # Note that some software (e.g. VectorNTI) may produce an empty
+        # source (rather than using a dot/period as might be expected).
+        if content == "":
+            source_info = ""
+        elif content[-1] == ".":
+            source_info = content[:-1]
+        else:
+            source_info = content
+        self.data.annotations["source"] = source_info
+
+    def organism(self, content):
+        self.data.annotations["organism"] = content
+
+    def taxonomy(self, content):
+        """Record (another line of) the taxonomy lineage."""
+        lineage = self._split_taxonomy(content)
+        try:
+            self.data.annotations["taxonomy"].extend(lineage)
+        except KeyError:
+            self.data.annotations["taxonomy"] = lineage
+
+    def reference_num(self, content):
+        """Signal the beginning of a new reference object."""
+        # if we have a current reference that hasn't been added to
+        # the list of references, add it.
+        if self._cur_reference is not None:
+            self.data.annotations["references"].append(self._cur_reference)
+        else:
+            self.data.annotations["references"] = []
+
+        self._cur_reference = SeqFeature.Reference()
+
+    def reference_bases(self, content):
+        """Attempt to determine the sequence region the reference entails.
+
+        Possible types of information we may have to deal with:
+
+        (bases 1 to 86436)
+        (sites)
+        (bases 1 to 105654; 110423 to 111122)
+        1  (residues 1 to 182)
+        """
+        # first remove the parentheses
+        assert content.endswith(")"), content
+        ref_base_info = content[1:-1]
+
+        all_locations = []
+        # parse if we've got 'bases' and 'to'
+        if "bases" in ref_base_info and "to" in ref_base_info:
+            # get rid of the beginning 'bases'
+            ref_base_info = ref_base_info[5:]
+            locations = self._split_reference_locations(ref_base_info)
+            all_locations.extend(locations)
+        elif "residues" in ref_base_info and "to" in ref_base_info:
+            residues_start = ref_base_info.find("residues")
+            # get only the information after "residues"
+            ref_base_info = ref_base_info[(residues_start + len("residues ")) :]
+            locations = self._split_reference_locations(ref_base_info)
+            all_locations.extend(locations)
+
+        # make sure if we are not finding information then we have
+        # the string 'sites' or the string 'bases'
+        elif ref_base_info == "sites" or ref_base_info.strip() == "bases":
+            pass
+        # otherwise raise an error
+        else:
+            raise ValueError(
+                "Could not parse base info %s in record %s"
+                % (ref_base_info, self.data.id)
+            )
+
+        self._cur_reference.location = all_locations
+
+    def _split_reference_locations(self, location_string):
+        """Get reference locations out of a string of reference information (PRIVATE).
+
+        The passed string should be of the form::
+
+            1 to 20; 20 to 100
+
+        This splits the information out and returns a list of location objects
+        based on the reference locations.
+        """
+        # split possibly multiple locations using the ';'
+        all_base_info = location_string.split(";")
+
+        new_locations = []
+        for base_info in all_base_info:
+            start, end = base_info.split("to")
+            new_start, new_end = self._convert_to_python_numbers(
+                int(start.strip()), int(end.strip())
+            )
+            this_location = SeqFeature.FeatureLocation(new_start, new_end)
+            new_locations.append(this_location)
+        return new_locations
+
+    def authors(self, content):
+        if self._cur_reference.authors:
+            self._cur_reference.authors += " " + content
+        else:
+            self._cur_reference.authors = content
+
+    def consrtm(self, content):
+        if self._cur_reference.consrtm:
+            self._cur_reference.consrtm += " " + content
+        else:
+            self._cur_reference.consrtm = content
+
+    def title(self, content):
+        if self._cur_reference is None:
+            warnings.warn(
+                "GenBank TITLE line without REFERENCE line.", BiopythonParserWarning
+            )
+        elif self._cur_reference.title:
+            self._cur_reference.title += " " + content
+        else:
+            self._cur_reference.title = content
+
+    def journal(self, content):
+        if self._cur_reference.journal:
+            self._cur_reference.journal += " " + content
+        else:
+            self._cur_reference.journal = content
+
+    def medline_id(self, content):
+        self._cur_reference.medline_id = content
+
+    def pubmed_id(self, content):
+        self._cur_reference.pubmed_id = content
+
+    def remark(self, content):
+        """Deal with a reference comment."""
+        if self._cur_reference.comment:
+            self._cur_reference.comment += " " + content
+        else:
+            self._cur_reference.comment = content
+
+    def comment(self, content):
+        try:
+            self.data.annotations["comment"] += "\n" + "\n".join(content)
+        except KeyError:
+            self.data.annotations["comment"] = "\n".join(content)
+
+    def structured_comment(self, content):
+        self.data.annotations["structured_comment"] = content
+
+    def features_line(self, content):
+        """Get ready for the feature table when we reach the FEATURE line."""
+        self.start_feature_table()
+
+    def start_feature_table(self):
+        """Indicate we've got to the start of the feature table."""
+        # make sure we've added on our last reference object
+        if self._cur_reference is not None:
+            self.data.annotations["references"].append(self._cur_reference)
+            self._cur_reference = None
+
+    def feature_key(self, content):
+        # start a new feature
+        self._cur_feature = SeqFeature.SeqFeature()
+        self._cur_feature.type = content
+        self.data.features.append(self._cur_feature)
+
+    def location(self, content):
+        """Parse out location information from the location string.
+
+        This uses simple Python code with some regular expressions to do the
+        parsing, and then translates the results into appropriate objects.
+        """
+        # clean up newlines and other whitespace inside the location before
+        # parsing - locations should have no whitespace whatsoever
+        location_line = self._clean_location(content)
+
+        # Older records have junk like replace(266,"c") in the
+        # location line. Newer records just replace this with
+        # the number 266 and have the information in a more reasonable
+        # place. So we'll just grab out the number and feed this to the
+        # parser. We shouldn't really be losing any info this way.
+        if "replace" in location_line:
+            comma_pos = location_line.find(",")
+            location_line = location_line[8:comma_pos]
+
+        cur_feature = self._cur_feature
+
+        # Handle top level complement here for speed
+        if location_line.startswith("complement("):
+            assert location_line.endswith(")")
+            location_line = location_line[11:-1]
+            strand = -1
+        elif "PROTEIN" in self._seq_type.upper():
+            strand = None
+        else:
+            # Assume nucleotide otherwise feature strand for
+            # GenBank files with bad LOCUS lines set to None
+            strand = 1
+
+        # Special case handling of the most common cases for speed
+        if _re_simple_location.match(location_line):
+            # e.g. "123..456"
+            s, e = location_line.split("..")
+            try:
+                cur_feature.location = SeqFeature.FeatureLocation(
+                    int(s) - 1, int(e), strand
+                )
+            except ValueError:
+                # Could be non-integers, more likely bad origin wrapping
+                cur_feature.location = _loc(
+                    location_line,
+                    self._expected_size,
+                    strand,
+                    seq_type=self._seq_type.lower(),
+                )
+            return
+
+        if ",)" in location_line:
+            warnings.warn(
+                "Dropping trailing comma in malformed feature location",
+                BiopythonParserWarning,
+            )
+            location_line = location_line.replace(",)", ")")
+
+        if _solo_bond.search(location_line):
+            # e.g. bond(196)
+            # e.g. join(bond(284),bond(305),bond(309),bond(305))
+            warnings.warn(
+                "Dropping bond qualifier in feature location", BiopythonParserWarning
+            )
+            # There ought to be a better way to do this...
+            for x in _solo_bond.finditer(location_line):
+                x = x.group()
+                location_line = location_line.replace(x, x[5:-1])
+
+        if _re_simple_compound.match(location_line):
+            # e.g. join(<123..456,480..>500)
+            i = location_line.find("(")
+            # cur_feature.location_operator = location_line[:i]
+            # we can split on the comma because these are simple locations
+            locs = []
+            for part in location_line[i + 1 : -1].split(","):
+                s, e = part.split("..")
+
+                try:
+                    locs.append(SeqFeature.FeatureLocation(int(s) - 1, int(e), strand))
+                except ValueError:
+                    # Could be non-integers, more likely bad origin wrapping
+
+                    # In the case of bad origin wrapping, _loc will return
+                    # a CompoundLocation. CompoundLocation.parts returns a
+                    # list of the FeatureLocation objects inside the
+                    # CompoundLocation.
+                    locs.extend(
+                        _loc(
+                            part, self._expected_size, strand, self._seq_type.lower()
+                        ).parts
+                    )
+
+            if len(locs) < 2:
+                # The CompoundLocation will raise a ValueError here!
+                warnings.warn(
+                    "Should have at least 2 parts for compound location",
+                    BiopythonParserWarning,
+                )
+                cur_feature.location = None
+                return
+            if strand == -1:
+                cur_feature.location = SeqFeature.CompoundLocation(
+                    locs[::-1], operator=location_line[:i]
+                )
+            else:
+                cur_feature.location = SeqFeature.CompoundLocation(
+                    locs, operator=location_line[:i]
+                )
+            return
+
+        # Handle the general case with more complex regular expressions
+        if _re_complex_location.match(location_line):
+            # e.g. "AL121804.2:41..610"
+            cur_feature.location = _loc(
+                location_line,
+                self._expected_size,
+                strand,
+                seq_type=self._seq_type.lower(),
+            )
+            return
+
+        if _re_complex_compound.match(location_line):
+            i = location_line.find("(")
+            # cur_feature.location_operator = location_line[:i]
+            # Can't split on the comma because of positions like one-of(1,2,3)
+            locs = []
+            for part in _split_compound_loc(location_line[i + 1 : -1]):
+                if part.startswith("complement("):
+                    assert part[-1] == ")"
+                    part = part[11:-1]
+                    assert strand != -1, "Double complement?"
+                    part_strand = -1
+                else:
+                    part_strand = strand
+                try:
+                    # There is likely a problem with origin wrapping.
+                    # Using _loc to return a CompoundLocation of the
+                    # wrapped feature and returning the two FeatureLocation
+                    # objects to extend to the list of feature locations.
+                    loc = _loc(
+                        part,
+                        self._expected_size,
+                        part_strand,
+                        seq_type=self._seq_type.lower(),
+                    ).parts
+
+                except ValueError:
+                    print(location_line)
+                    print(part)
+                    raise
+                # loc will be a list of one or two FeatureLocation items.
+                locs.extend(loc)
+            # Historically a join on the reverse strand has been represented
+            # in Biopython with both the parent SeqFeature and its children
+            # (the exons for a CDS) all given a strand of -1.  Likewise, for
+            # a join feature on the forward strand they all have strand +1.
+            # However, we must also consider evil mixed strand examples like
+            # this, join(complement(69611..69724),139856..140087,140625..140650)
+            if strand == -1:
+                # Whole thing was wrapped in complement(...)
+                for l in locs:
+                    assert l.strand == -1
+                # Reverse the backwards order used in GenBank files
+                # with complement(join(...))
+                cur_feature.location = SeqFeature.CompoundLocation(
+                    locs[::-1], operator=location_line[:i]
+                )
+            else:
+                cur_feature.location = SeqFeature.CompoundLocation(
+                    locs, operator=location_line[:i]
+                )
+            return
+        # Not recognised
+        if "order" in location_line and "join" in location_line:
+            # See Bug 3197
+            msg = (
+                'Combinations of "join" and "order" within the same '
+                "location (nested operators) are illegal:\n" + location_line
+            )
+            raise LocationParserError(msg)
+        # This used to be an error....
+        cur_feature.location = None
+        warnings.warn(
+            BiopythonParserWarning(
+                "Couldn't parse feature location: %r" % location_line
+            )
+        )
+
+    def feature_qualifier(self, key, value):
+        """When we get a qualifier key and its value.
+
+        Can receive None, since you can have valueless keys such as /pseudo
+        """
+        # Hack to try to preserve historical behaviour of /pseudo etc
+        if value is None:
+            # if the key doesn't exist yet, add an empty string
+            if key not in self._cur_feature.qualifiers:
+                self._cur_feature.qualifiers[key] = [""]
+                return
+            # otherwise just skip this key
+            return
+
+        # Remove enclosing quotation marks
+        value = re.sub('^"|"$', "", value)
+
+        # Handle NCBI escaping
+        # Warn if escaping is not according to standard
+        if re.search(r'[^"]"[^"]|^"[^"]|[^"]"$', value):
+            warnings.warn(
+                'The NCBI states double-quote characters like " should be escaped as "" '
+                "(two double - quotes), but here it was not: %r" % value,
+                BiopythonParserWarning,
+            )
+        # Undo escaping, repeated double quotes -> one double quote
+        value = value.replace('""', '"')
+
+        if self._feature_cleaner is not None:
+            value = self._feature_cleaner.clean_value(key, value)
+
+        # if the qualifier name exists, append the value
+        if key in self._cur_feature.qualifiers:
+            self._cur_feature.qualifiers[key].append(value)
+        # otherwise start a new list of the key with its values
+        else:
+            self._cur_feature.qualifiers[key] = [value]
+
+    def feature_qualifier_name(self, content_list):
+        """Use feature_qualifier instead (OBSOLETE)."""
+        raise NotImplementedError("Use the feature_qualifier method instead.")
+
+    def feature_qualifier_description(self, content):
+        """Use feature_qualifier instead (OBSOLETE)."""
+        raise NotImplementedError("Use the feature_qualifier method instead.")
+
+    def contig_location(self, content):
+        """Deal with CONTIG information."""
+        # Historically this was stored as a SeqFeature object, but it was
+        # stored under record.annotations["contig"] and not under
+        # record.features with the other SeqFeature objects.
+        #
+        # The CONTIG location line can include additional tokens like
+        # Gap(), Gap(100) or Gap(unk100) which are not used in the feature
+        # location lines, so storing it using SeqFeature based location
+        # objects is difficult.
+        #
+        # We now store this a string, which means for BioSQL we are now in
+        # much better agreement with how BioPerl records the CONTIG line
+        # in the database.
+        #
+        # NOTE - This code assumes the scanner will return all the CONTIG
+        # lines already combined into one long string!
+        self.data.annotations["contig"] = content
+
+    def origin_name(self, content):
+        pass
+
+    def base_count(self, content):
+        pass
+
+    def base_number(self, content):
+        pass
+
+    def sequence(self, content):
+        """Add up sequence information as we get it.
+
+        To try and make things speedier, this puts all of the strings
+        into a list of strings, and then uses string.join later to put
+        them together. Supposedly, this is a big time savings
+        """
+        assert " " not in content
+        self._seq_data.append(content.upper())
+
+    def record_end(self, content):
+        """Clean up when we've finished the record."""
+        # Try and append the version number to the accession for the full id
+        if not self.data.id:
+            if "accessions" in self.data.annotations:
+                raise ValueError(
+                    "Problem adding version number to accession: "
+                    + str(self.data.annotations["accessions"])
+                )
+            self.data.id = self.data.name  # Good fall back?
+        elif self.data.id.count(".") == 0:
+            try:
+                self.data.id += ".%i" % self.data.annotations["sequence_version"]
+            except KeyError:
+                pass
+
+        # add the sequence information
+
+        sequence = "".join(self._seq_data)
+
+        if (
+            self._expected_size is not None
+            and len(sequence) != 0
+            and self._expected_size != len(sequence)
+        ):
+            warnings.warn(
+                "Expected sequence length %i, found %i (%s)."
+                % (self._expected_size, len(sequence), self.data.id),
+                BiopythonParserWarning,
+            )
+
+        molecule_type = None
+        if self._seq_type:
+            # mRNA is really also DNA, since it is actually cDNA
+            if "DNA" in self._seq_type.upper() or "MRNA" in self._seq_type.upper():
+                molecule_type = "DNA"
+            # are there ever really RNA sequences in GenBank?
+            elif "RNA" in self._seq_type.upper():
+                # Even for data which was from RNA, the sequence string
+                # is usually given as DNA (T not U).  Bug 3010
+                molecule_type = "RNA"
+            elif (
+                "PROTEIN" in self._seq_type.upper() or self._seq_type == "PRT"
+            ):  # PRT is used in EMBL-bank for patents
+                molecule_type = "protein"
+            # work around ugly GenBank records which have circular or
+            # linear but no indication of sequence type
+            elif self._seq_type in ["circular", "linear", "unspecified"]:
+                pass
+            # we have a bug if we get here
+            else:
+                raise ValueError(
+                    "Could not determine molecule_type for seq_type %s" % self._seq_type
+                )
+        # Don't overwrite molecule_type
+        if molecule_type is not None:
+            self.data.annotations["molecule_type"] = self.data.annotations.get(
+                "molecule_type", molecule_type
+            )
+        if not sequence and self._expected_size:
+            self.data.seq = Seq(None, length=self._expected_size)
+        else:
+            self.data.seq = Seq(sequence)
+
+
+class _RecordConsumer(_BaseGenBankConsumer):
+    """Create a GenBank Record object from scanner generated information (PRIVATE)."""
+
+    def __init__(self):
+        _BaseGenBankConsumer.__init__(self)
+        from . import Record
+
+        self.data = Record.Record()
+
+        self._seq_data = []
+        self._cur_reference = None
+        self._cur_feature = None
+        self._cur_qualifier = None
+
+    def tls(self, content):
+        self.data.tls = content.split("-")
+
+    def tsa(self, content):
+        self.data.tsa = content.split("-")
+
+    def wgs(self, content):
+        self.data.wgs = content.split("-")
+
+    def add_wgs_scafld(self, content):
+        self.data.wgs_scafld.append(content.split("-"))
+
+    def locus(self, content):
+        self.data.locus = content
+
+    def size(self, content):
+        self.data.size = content
+
+    def residue_type(self, content):
+        # Be lenient about parsing, but technically lowercase residue types are malformed.
+        if "dna" in content or "rna" in content:
+            warnings.warn(
+                "Invalid seq_type (%s): DNA/RNA should be uppercase." % content,
+                BiopythonParserWarning,
+            )
+        self.data.residue_type = content
+
+    def data_file_division(self, content):
+        self.data.data_file_division = content
+
+    def date(self, content):
+        self.data.date = content
+
+    def definition(self, content):
+        self.data.definition = content
+
+    def accession(self, content):
+        for acc in self._split_accessions(content):
+            if acc not in self.data.accession:
+                self.data.accession.append(acc)
+
+    def molecule_type(self, mol_type):
+        """Validate and record the molecule type (for round-trip etc)."""
+        if mol_type:
+            if "circular" in mol_type or "linear" in mol_type:
+                raise ParserFailureError(
+                    "Molecule type %r should not include topology" % mol_type
+                )
+
+            # Writing out records will fail if we have a lower case DNA
+            # or RNA string in here, so upper case it.
+            # This is a bit ugly, but we don't want to upper case e.g.
+            # the m in mRNA, but thanks to the strip we lost the spaces
+            # so we need to index from the back
+            if mol_type[-3:].upper() in ("DNA", "RNA") and not mol_type[-3:].isupper():
+                warnings.warn(
+                    "Non-upper case molecule type in LOCUS line: %s" % mol_type,
+                    BiopythonParserWarning,
+                )
+
+            self.data.molecule_type = mol_type
+
+    def topology(self, topology):
+        """Validate and record sequence topology.
+
+        The topology argument should be "linear" or "circular" (string).
+        """
+        if topology:
+            if topology not in ["linear", "circular"]:
+                raise ParserFailureError(
+                    "Unexpected topology %r should be linear or circular" % topology
+                )
+            self.data.topology = topology
+
+    def nid(self, content):
+        self.data.nid = content
+
+    def pid(self, content):
+        self.data.pid = content
+
+    def version(self, content):
+        self.data.version = content
+
+    def db_source(self, content):
+        self.data.db_source = content.rstrip()
+
+    def gi(self, content):
+        self.data.gi = content
+
+    def keywords(self, content):
+        self.data.keywords = self._split_keywords(content)
+
+    def project(self, content):
+        self.data.projects.extend(p for p in content.split() if p)
+
+    def dblink(self, content):
+        self.data.dblinks.append(content)
+
+    def segment(self, content):
+        self.data.segment = content
+
+    def source(self, content):
+        self.data.source = content
+
+    def organism(self, content):
+        self.data.organism = content
+
+    def taxonomy(self, content):
+        self.data.taxonomy = self._split_taxonomy(content)
+
+    def reference_num(self, content):
+        """Grab the reference number and signal the start of a new reference."""
+        # check if we have a reference to add
+        if self._cur_reference is not None:
+            self.data.references.append(self._cur_reference)
+
+        from . import Record
+
+        self._cur_reference = Record.Reference()
+        self._cur_reference.number = content
+
+    def reference_bases(self, content):
+        self._cur_reference.bases = content
+
+    def authors(self, content):
+        self._cur_reference.authors = content
+
+    def consrtm(self, content):
+        self._cur_reference.consrtm = content
+
+    def title(self, content):
+        if self._cur_reference is None:
+            warnings.warn(
+                "GenBank TITLE line without REFERENCE line.", BiopythonParserWarning
+            )
+            return
+        self._cur_reference.title = content
+
+    def journal(self, content):
+        self._cur_reference.journal = content
+
+    def medline_id(self, content):
+        self._cur_reference.medline_id = content
+
+    def pubmed_id(self, content):
+        self._cur_reference.pubmed_id = content
+
+    def remark(self, content):
+        self._cur_reference.remark = content
+
+    def comment(self, content):
+        self.data.comment += "\n".join(content)
+
+    def structured_comment(self, content):
+        self.data.structured_comment = content
+
+    def primary_ref_line(self, content):
+        """Save reference data for the PRIMARY line."""
+        self.data.primary.append(content)
+
+    def primary(self, content):
+        pass
+
+    def features_line(self, content):
+        """Get ready for the feature table when we reach the FEATURE line."""
+        self.start_feature_table()
+
+    def start_feature_table(self):
+        """Signal the start of the feature table."""
+        # we need to add on the last reference
+        if self._cur_reference is not None:
+            self.data.references.append(self._cur_reference)
+
+    def feature_key(self, content):
+        """Grab the key of the feature and signal the start of a new feature."""
+        # first add on feature information if we've got any
+        self._add_feature()
+
+        from . import Record
+
+        self._cur_feature = Record.Feature()
+        self._cur_feature.key = content
+
+    def _add_feature(self):
+        """Add a feature to the record, with relevant checks (PRIVATE).
+
+        This does all of the appropriate checking to make sure we haven't
+        left any info behind, and that we are only adding info if it
+        exists.
+        """
+        if self._cur_feature is not None:
+            # if we have a left over qualifier, add it to the qualifiers
+            # on the current feature
+            if self._cur_qualifier is not None:
+                self._cur_feature.qualifiers.append(self._cur_qualifier)
+
+            self._cur_qualifier = None
+            self.data.features.append(self._cur_feature)
+
+    def location(self, content):
+        self._cur_feature.location = self._clean_location(content)
+
+    def feature_qualifier(self, key, value):
+        self.feature_qualifier_name([key])
+        if value is not None:
+            self.feature_qualifier_description(value)
+
+    def feature_qualifier_name(self, content_list):
+        """Deal with qualifier names.
+
+        We receive a list of keys, since you can have valueless keys such as
+        /pseudo which would be passed in with the next key (since no other
+        tags separate them in the file)
+        """
+        from . import Record
+
+        for content in content_list:
+            # the record parser keeps the /s -- add them if we don't have 'em
+            if not content.startswith("/"):
+                content = "/%s" % content
+            # add on a qualifier if we've got one
+            if self._cur_qualifier is not None:
+                self._cur_feature.qualifiers.append(self._cur_qualifier)
+
+            self._cur_qualifier = Record.Qualifier()
+            self._cur_qualifier.key = content
+
+    def feature_qualifier_description(self, content):
+        # if we have info then the qualifier key should have a ='s
+        if "=" not in self._cur_qualifier.key:
+            self._cur_qualifier.key = "%s=" % self._cur_qualifier.key
+        cur_content = self._remove_newlines(content)
+        # remove all spaces from the value if it is a type where spaces
+        # are not important
+        for remove_space_key in self.__class__.remove_space_keys:
+            if remove_space_key in self._cur_qualifier.key:
+                cur_content = self._remove_spaces(cur_content)
+        self._cur_qualifier.value = self._normalize_spaces(cur_content)
+
+    def base_count(self, content):
+        self.data.base_counts = content
+
+    def origin_name(self, content):
+        self.data.origin = content
+
+    def contig_location(self, content):
+        """Signal that we have contig information to add to the record."""
+        self.data.contig = self._clean_location(content)
+
+    def sequence(self, content):
+        """Add sequence information to a list of sequence strings.
+
+        This removes spaces in the data and uppercases the sequence, and
+        then adds it to a list of sequences. Later on we'll join this
+        list together to make the final sequence. This is faster than
+        adding on the new string every time.
+        """
+        assert " " not in content
+        self._seq_data.append(content.upper())
+
+    def record_end(self, content):
+        """Signal the end of the record and do any necessary clean-up."""
+        # add together all of the sequence parts to create the
+        # final sequence string
+        self.data.sequence = "".join(self._seq_data)
+        # add on the last feature
+        self._add_feature()
+
+
+def parse(handle):
+    """Iterate over GenBank formatted entries as Record objects.
+
+    >>> from Bio import GenBank
+    >>> with open("GenBank/NC_000932.gb") as handle:
+    ...     for record in GenBank.parse(handle):
+    ...         print(record.accession)
+    ['NC_000932']
+
+    To get SeqRecord objects use Bio.SeqIO.parse(..., format="gb")
+    instead.
+    """
+    return iter(Iterator(handle, RecordParser()))
+
+
+def read(handle):
+    """Read a handle containing a single GenBank entry as a Record object.
+
+    >>> from Bio import GenBank
+    >>> with open("GenBank/NC_000932.gb") as handle:
+    ...     record = GenBank.read(handle)
+    ...     print(record.accession)
+    ['NC_000932']
+
+    To get a SeqRecord object use Bio.SeqIO.read(..., format="gb")
+    instead.
+    """
+    iterator = parse(handle)
+    try:
+        record = next(iterator)
+    except StopIteration:
+        raise ValueError("No records found in handle") from None
+    try:
+        next(iterator)
+        raise ValueError("More than one record found in handle")
+    except StopIteration:
+        pass
+    return record
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/GenBank/__pycache__/Record.cpython-37.pyc b/code/lib/Bio/GenBank/__pycache__/Record.cpython-37.pyc
new file mode 100644
index 0000000..862e2a0
Binary files /dev/null and b/code/lib/Bio/GenBank/__pycache__/Record.cpython-37.pyc differ
diff --git a/code/lib/Bio/GenBank/__pycache__/Scanner.cpython-37.pyc b/code/lib/Bio/GenBank/__pycache__/Scanner.cpython-37.pyc
new file mode 100644
index 0000000..24b0a53
Binary files /dev/null and b/code/lib/Bio/GenBank/__pycache__/Scanner.cpython-37.pyc differ
diff --git a/code/lib/Bio/GenBank/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/GenBank/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..9d0e9c2
Binary files /dev/null and b/code/lib/Bio/GenBank/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/GenBank/__pycache__/utils.cpython-37.pyc b/code/lib/Bio/GenBank/__pycache__/utils.cpython-37.pyc
new file mode 100644
index 0000000..74c8727
Binary files /dev/null and b/code/lib/Bio/GenBank/__pycache__/utils.cpython-37.pyc differ
diff --git a/code/lib/Bio/GenBank/utils.py b/code/lib/Bio/GenBank/utils.py
new file mode 100644
index 0000000..6f0eb28
--- /dev/null
+++ b/code/lib/Bio/GenBank/utils.py
@@ -0,0 +1,68 @@
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
+"""Useful utilities for helping in parsing GenBank files."""
+
+
+class FeatureValueCleaner:
+    r"""Provide specialized capabilities for cleaning up values in features.
+
+    This class is designed to provide a mechanism to clean up and process
+    values in the key/value pairs of GenBank features. This is useful
+    because in cases like::
+
+         /translation="MED
+         YDPWNLRFQSKYKSRDA"
+
+    you'll otherwise end up with white space in it.
+
+    This cleaning needs to be done on a case by case basis since it is
+    impossible to interpret whether you should be concatenating everything
+    (as in translations), or combining things with spaces (as might be
+    the case with /notes).
+
+    >>> cleaner = FeatureValueCleaner(["translation"])
+    >>> cleaner
+    FeatureValueCleaner(['translation'])
+    >>> cleaner.clean_value("translation", "MED\nYDPWNLRFQSKYKSRDA")
+    'MEDYDPWNLRFQSKYKSRDA'
+    """
+
+    keys_to_process = ["translation"]
+
+    def __init__(self, to_process=keys_to_process):
+        """Initialize with the keys we should deal with."""
+        self._to_process = to_process
+
+    def __repr__(self):
+        """Return a string representation of the class."""
+        return f"{self.__class__.__name__}({self._to_process!r})"
+
+    def clean_value(self, key_name, value):
+        """Clean the specified value and return it.
+
+        If the value is not specified to be dealt with, the original value
+        will be returned.
+        """
+        if key_name in self._to_process:
+            try:
+                cleaner = getattr(self, "_clean_%s" % key_name)
+            except AttributeError:
+                raise AssertionError(
+                    "No function to clean key: %s" % key_name
+                ) from None
+            value = cleaner(value)
+        return value
+
+    def _clean_translation(self, value):
+        """Concatenate a translation value to one long protein string (PRIVATE)."""
+        translation_parts = value.split()
+        return "".join(translation_parts)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Geo/Record.py b/code/lib/Bio/Geo/Record.py
new file mode 100644
index 0000000..5e38c78
--- /dev/null
+++ b/code/lib/Bio/Geo/Record.py
@@ -0,0 +1,92 @@
+# Copyright 2001 by Katharine Lindner.  All rights reserved.
+# Copyright 2006 by PeterC.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Hold GEO data in a straightforward format.
+
+classes:
+o Record - All of the information in an GEO record.
+
+See http://www.ncbi.nlm.nih.gov/geo/
+"""
+
+
+class Record:
+    """Hold GEO information in a format similar to the original record.
+
+    The Record class is meant to make data easy to get to when you are
+    just interested in looking at GEO data.
+
+    Attributes:
+    entity_type
+    entity_id
+    entity_attributes
+    col_defs
+    table_rows
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.entity_type = ""
+        self.entity_id = ""
+        self.entity_attributes = {}
+        self.col_defs = {}
+        self.table_rows = []
+
+    def __str__(self):
+        """Return the GEO record as a string."""
+        output = ""
+        output += "GEO Type: %s\n" % self.entity_type
+        output += "GEO Id: %s\n" % self.entity_id
+        att_keys = sorted(self.entity_attributes)
+        for key in att_keys:
+            contents = self.entity_attributes[key]
+            if isinstance(contents, list):
+                for item in contents:
+                    try:
+                        output += "%s: %s\n" % (key, item[:40])
+                        output += out_block(item[40:])
+                    except Exception:  # TODO: IndexError?
+                        pass
+            elif isinstance(contents, str):
+                output += "%s: %s\n" % (key, contents[:40])
+                output += out_block(contents[40:])
+            else:
+                print(contents)
+                output += "%s: %s\n" % (key, contents[:40])
+                output += out_block(contents[40:])
+        col_keys = sorted(self.col_defs)
+        output += "Column Header Definitions\n"
+        for key in col_keys:
+            val = self.col_defs[key]
+            output += "    %s: %s\n" % (key, val[:40])
+            output += out_block(val[40:], "    ")
+        # May have to display VERY large tables,
+        # so only show the first 20 lines of data
+        MAX_ROWS = 20 + 1  # include header in count
+        for row in self.table_rows[0:MAX_ROWS]:
+            output += "%s: " % self.table_rows.index(row)
+            for col in row:
+                output += "%s\t" % col
+            output += "\n"
+        if len(self.table_rows) > MAX_ROWS:
+            output += "...\n"
+            row = self.table_rows[-1]
+            output += "%s: " % self.table_rows.index(row)
+            for col in row:
+                output += "%s\t" % col
+            output += "\n"
+
+        return output
+
+
+def out_block(text, prefix=""):
+    """Format text in blocks of 80 chars with an additional optional prefix."""
+    output = ""
+    for j in range(0, len(text), 80):
+        output += "%s%s\n" % (prefix, text[j : j + 80])
+    output += "\n"
+    return output
diff --git a/code/lib/Bio/Geo/__init__.py b/code/lib/Bio/Geo/__init__.py
new file mode 100644
index 0000000..6735e9a
--- /dev/null
+++ b/code/lib/Bio/Geo/__init__.py
@@ -0,0 +1,67 @@
+# Copyright 2001 by Katharine Lindner.  All rights reserved.
+# Copyright 2006 by PeterC.  All rights reserved.
+# Copyright 2007 by Michiel de Hoon.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""Parser for files from NCBI's Gene Expression Omnibus (GEO).
+
+http://www.ncbi.nlm.nih.gov/geo/
+"""
+
+from . import Record
+
+
+def _read_key_value(line):
+    words = line[1:].split("=", 1)
+    try:
+        key, value = words
+        value = value.strip()
+    except ValueError:
+        key = words[0]
+        value = ""
+    key = key.strip()
+    return key, value
+
+
+def parse(handle):
+    """Read Gene Expression Omnibus records from file handle.
+
+    Returns a generator object which yields Bio.Geo.Record() objects.
+    """
+    record = None
+    for line in handle:
+        line = line.strip("\n").strip("\r")
+        if not line:
+            continue  # Ignore empty lines
+        c = line[0]
+        if c == "^":
+            if record:
+                yield record
+            record = Record.Record()
+            record.entity_type, record.entity_id = _read_key_value(line)
+        elif c == "!":
+            if line in (
+                "!Sample_table_begin",
+                "!Sample_table_end",
+                "!Platform_table_begin",
+                "!Platform_table_end",
+            ):
+                continue
+            key, value = _read_key_value(line)
+            if key in record.entity_attributes:
+                if isinstance(record.entity_attributes[key], list):
+                    record.entity_attributes[key].append(value)
+                else:
+                    existing = record.entity_attributes[key]
+                    record.entity_attributes[key] = [existing, value]
+            else:
+                record.entity_attributes[key] = value
+        elif c == "#":
+            key, value = _read_key_value(line)
+            assert key not in record.col_defs
+            record.col_defs[key] = value
+        else:
+            row = line.split("\t")
+            record.table_rows.append(row)
+    yield record
diff --git a/code/lib/Bio/Geo/__pycache__/Record.cpython-37.pyc b/code/lib/Bio/Geo/__pycache__/Record.cpython-37.pyc
new file mode 100644
index 0000000..8861450
Binary files /dev/null and b/code/lib/Bio/Geo/__pycache__/Record.cpython-37.pyc differ
diff --git a/code/lib/Bio/Geo/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Geo/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..1c5efce
Binary files /dev/null and b/code/lib/Bio/Geo/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/BasicChromosome.py b/code/lib/Bio/Graphics/BasicChromosome.py
new file mode 100644
index 0000000..91e6445
--- /dev/null
+++ b/code/lib/Bio/Graphics/BasicChromosome.py
@@ -0,0 +1,823 @@
+# Copyright 2001, 2003 by Brad Chapman.  All rights reserved.
+# Revisions copyright 2011 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Draw representations of organism chromosomes with added information.
+
+These classes are meant to model the drawing of pictures of chromosomes.
+This can be useful for lots of things, including displaying markers on
+a chromosome (ie. for genetic mapping) and showing syteny between two
+chromosomes.
+
+The structure of these classes is intended to be a Composite, so that
+it will be easy to plug in and switch different parts without
+breaking the general drawing capabilities of the system. The
+relationship between classes is that everything derives from
+_ChromosomeComponent, which specifies the overall interface. The parts
+then are related so that an Organism contains Chromosomes, and these
+Chromosomes contain ChromosomeSegments. This representation differents
+from the canonical composite structure in that we don't really have
+'leaf' nodes here -- all components can potentially hold sub-components.
+
+Most of the time the ChromosomeSegment class is what you'll want to
+customize for specific drawing tasks.
+
+For providing drawing capabilities, these classes use reportlab:
+
+http://www.reportlab.com
+
+This provides nice output in PDF, SVG and postscript.  If you have
+reportlab's renderPM module installed you can also use PNG etc.
+"""
+
+# reportlab
+from reportlab.lib.pagesizes import letter
+from reportlab.lib.units import inch
+from reportlab.lib import colors
+from reportlab.pdfbase.pdfmetrics import stringWidth
+
+from reportlab.graphics.shapes import Drawing, String, Line, Rect, Wedge, ArcPath
+from reportlab.graphics.widgetbase import Widget
+
+from Bio.Graphics import _write
+from Bio.Graphics.GenomeDiagram import _Colors
+
+
+_color_trans = _Colors.ColorTranslator()
+
+
+class _ChromosomeComponent(Widget):
+    """Base class specifying the interface for a component of the system.
+
+    This class should not be instantiated directly, but should be used
+    from derived classes.
+    """
+
+    def __init__(self):
+        """Initialize a chromosome component.
+
+        Attributes:
+        - _sub_components -- Any components which are contained under
+        this parent component. This attribute should be accessed through
+        the add() and remove() functions.
+
+        """
+        self._sub_components = []
+
+    def add(self, component):
+        """Add a sub_component to the list of components under this item."""
+        if not isinstance(component, _ChromosomeComponent):
+            raise TypeError(
+                "Expected a _ChromosomeComponent object, got %s" % component
+            )
+
+        self._sub_components.append(component)
+
+    def remove(self, component):
+        """Remove the specified component from the subcomponents.
+
+        Raises a ValueError if the component is not registered as a
+        sub_component.
+        """
+        try:
+            self._sub_components.remove(component)
+        except ValueError:
+            raise ValueError(
+                "Component %s not found in sub_components." % component
+            ) from None
+
+    def draw(self):
+        """Draw the specified component."""
+        raise AssertionError("Subclasses must implement.")
+
+
+class Organism(_ChromosomeComponent):
+    """Top level class for drawing chromosomes.
+
+    This class holds information about an organism and all of its
+    chromosomes, and provides the top level object which could be used
+    for drawing a chromosome representation of an organism.
+
+    Chromosomes should be added and removed from the Organism via the
+    add and remove functions.
+    """
+
+    def __init__(self, output_format="pdf"):
+        """Initialize the class."""
+        _ChromosomeComponent.__init__(self)
+
+        # customizable attributes
+        self.page_size = letter
+        self.title_size = 20
+
+        # Do we need this given we don't draw a legend?
+        # If so, should be a public API...
+        self._legend_height = 0  # 2 * inch
+
+        self.output_format = output_format
+
+    def draw(self, output_file, title):
+        """Draw out the information for the Organism.
+
+        Arguments:
+         - output_file -- The name of a file specifying where the
+           document should be saved, or a handle to be written to.
+           The output format is set when creating the Organism object.
+           Alternatively, output_file=None will return the drawing using
+           the low-level ReportLab objects (for further processing, such
+           as adding additional graphics, before writing).
+         - title -- The output title of the produced document.
+
+        """
+        width, height = self.page_size
+        cur_drawing = Drawing(width, height)
+
+        self._draw_title(cur_drawing, title, width, height)
+
+        cur_x_pos = inch * 0.5
+        if len(self._sub_components) > 0:
+            x_pos_change = (width - inch) / len(self._sub_components)
+        # no sub_components
+        else:
+            pass
+
+        for sub_component in self._sub_components:
+            # set the drawing location of the chromosome
+            sub_component.start_x_position = cur_x_pos + 0.05 * x_pos_change
+            sub_component.end_x_position = cur_x_pos + 0.95 * x_pos_change
+            sub_component.start_y_position = height - 1.5 * inch
+            sub_component.end_y_position = self._legend_height + 1 * inch
+
+            # do the drawing
+            sub_component.draw(cur_drawing)
+
+            # update the locations for the next chromosome
+            cur_x_pos += x_pos_change
+
+        self._draw_legend(cur_drawing, self._legend_height + 0.5 * inch, width)
+
+        if output_file is None:
+            # Let the user take care of writing to the file...
+            return cur_drawing
+
+        return _write(cur_drawing, output_file, self.output_format)
+
+    def _draw_title(self, cur_drawing, title, width, height):
+        """Write out the title of the organism figure (PRIVATE)."""
+        title_string = String(width / 2, height - inch, title)
+        title_string.fontName = "Helvetica-Bold"
+        title_string.fontSize = self.title_size
+        title_string.textAnchor = "middle"
+
+        cur_drawing.add(title_string)
+
+    def _draw_legend(self, cur_drawing, start_y, width):
+        """Draw a legend for the figure (PRIVATE).
+
+        Subclasses should implement this (see also self._legend_height) to
+        provide specialized legends.
+        """
+        pass
+
+
+class Chromosome(_ChromosomeComponent):
+    """Class for drawing a chromosome of an organism.
+
+    This organizes the drawing of a single organisms chromosome. This
+    class can be instantiated directly, but the draw method makes the
+    most sense to be called in the context of an organism.
+    """
+
+    def __init__(self, chromosome_name):
+        """Initialize a Chromosome for drawing.
+
+        Arguments:
+         - chromosome_name - The label for the chromosome.
+
+        Attributes:
+         - start_x_position, end_x_position - The x positions on the page
+           where the chromosome should be drawn. This allows multiple
+           chromosomes to be drawn on a single page.
+         - start_y_position, end_y_position - The y positions on the page
+           where the chromosome should be contained.
+
+        Configuration Attributes:
+         - title_size - The size of the chromosome title.
+         - scale_num - A number of scale the drawing by. This is useful if
+           you want to draw multiple chromosomes of different sizes at the
+           same scale. If this is not set, then the chromosome drawing will
+           be scaled by the number of segements in the chromosome (so each
+           chromosome will be the exact same final size).
+
+        """
+        _ChromosomeComponent.__init__(self)
+
+        self._name = chromosome_name
+
+        self.start_x_position = -1
+        self.end_x_position = -1
+        self.start_y_position = -1
+        self.end_y_position = -1
+
+        self.title_size = 20
+        self.scale_num = None
+
+        self.label_size = 6
+        self.chr_percent = 0.25
+        self.label_sep_percent = self.chr_percent * 0.5
+        self._color_labels = False
+
+    def subcomponent_size(self):
+        """Return the scaled size of all subcomponents of this component."""
+        total_sub = 0
+        for sub_component in self._sub_components:
+            total_sub += sub_component.scale
+
+        return total_sub
+
+    def draw(self, cur_drawing):
+        """Draw a chromosome on the specified template.
+
+        Ideally, the x_position and y_*_position attributes should be
+        set prior to drawing -- otherwise we're going to have some problems.
+        """
+        for position in (
+            self.start_x_position,
+            self.end_x_position,
+            self.start_y_position,
+            self.end_y_position,
+        ):
+            assert position != -1, "Need to set drawing coordinates."
+
+        # first draw all of the sub-sections of the chromosome -- this
+        # will actually be the picture of the chromosome
+        cur_y_pos = self.start_y_position
+        if self.scale_num:
+            y_pos_change = (
+                self.start_y_position * 0.95 - self.end_y_position
+            ) / self.scale_num
+        elif len(self._sub_components) > 0:
+            y_pos_change = (
+                self.start_y_position * 0.95 - self.end_y_position
+            ) / self.subcomponent_size()
+        # no sub_components to draw
+        else:
+            pass
+
+        left_labels = []
+        right_labels = []
+        for sub_component in self._sub_components:
+            this_y_pos_change = sub_component.scale * y_pos_change
+
+            # set the location of the component to draw
+            sub_component.start_x_position = self.start_x_position
+            sub_component.end_x_position = self.end_x_position
+            sub_component.start_y_position = cur_y_pos
+            sub_component.end_y_position = cur_y_pos - this_y_pos_change
+
+            # draw the sub component
+            sub_component._left_labels = []
+            sub_component._right_labels = []
+            sub_component.draw(cur_drawing)
+            left_labels += sub_component._left_labels
+            right_labels += sub_component._right_labels
+
+            # update the position for the next component
+            cur_y_pos -= this_y_pos_change
+
+        self._draw_labels(cur_drawing, left_labels, right_labels)
+        self._draw_label(cur_drawing, self._name)
+
+    def _draw_label(self, cur_drawing, label_name):
+        """Draw a label for the chromosome (PRIVATE)."""
+        x_position = 0.5 * (self.start_x_position + self.end_x_position)
+        y_position = self.end_y_position
+
+        label_string = String(x_position, y_position, label_name)
+        label_string.fontName = "Times-BoldItalic"
+        label_string.fontSize = self.title_size
+        label_string.textAnchor = "middle"
+
+        cur_drawing.add(label_string)
+
+    def _draw_labels(self, cur_drawing, left_labels, right_labels):
+        """Layout and draw sub-feature labels for the chromosome (PRIVATE).
+
+        Tries to place each label at the same vertical position as the
+        feature it applies to, but will adjust the positions to avoid or
+        at least reduce label overlap.
+
+        Draws the label text and a coloured line linking it to the
+        location (i.e. feature) it applies to.
+        """
+        if not self._sub_components:
+            return
+        color_label = self._color_labels
+
+        segment_width = (self.end_x_position - self.start_x_position) * self.chr_percent
+        label_sep = (
+            self.end_x_position - self.start_x_position
+        ) * self.label_sep_percent
+        segment_x = self.start_x_position + 0.5 * (
+            self.end_x_position - self.start_x_position - segment_width
+        )
+
+        y_limits = []
+        for sub_component in self._sub_components:
+            y_limits.extend(
+                (sub_component.start_y_position, sub_component.end_y_position)
+            )
+        y_min = min(y_limits)
+        y_max = max(y_limits)
+        del y_limits
+        # Now do some label placement magic...
+        # from reportlab.pdfbase import pdfmetrics
+        # font = pdfmetrics.getFont('Helvetica')
+        # h = (font.face.ascent + font.face.descent) * 0.90
+        h = self.label_size
+        for x1, x2, labels, anchor in [
+            (
+                segment_x,
+                segment_x - label_sep,
+                _place_labels(left_labels, y_min, y_max, h),
+                "end",
+            ),
+            (
+                segment_x + segment_width,
+                segment_x + segment_width + label_sep,
+                _place_labels(right_labels, y_min, y_max, h),
+                "start",
+            ),
+        ]:
+            for (y1, y2, color, back_color, name) in labels:
+                cur_drawing.add(
+                    Line(x1, y1, x2, y2, strokeColor=color, strokeWidth=0.25)
+                )
+                label_string = String(x2, y2, name, textAnchor=anchor)
+                label_string.fontName = "Helvetica"
+                label_string.fontSize = h
+                if color_label:
+                    label_string.fillColor = color
+                if back_color:
+                    w = stringWidth(name, label_string.fontName, label_string.fontSize)
+                    if x1 > x2:
+                        w = w * -1.0
+                    cur_drawing.add(
+                        Rect(
+                            x2,
+                            y2 - 0.1 * h,
+                            w,
+                            h,
+                            strokeColor=back_color,
+                            fillColor=back_color,
+                        )
+                    )
+                cur_drawing.add(label_string)
+
+
+class ChromosomeSegment(_ChromosomeComponent):
+    """Draw a segment of a chromosome.
+
+    This class provides the important configurable functionality of drawing
+    a Chromosome. Each segment has some customization available here, or can
+    be subclassed to define additional functionality. Most of the interesting
+    drawing stuff is likely to happen at the ChromosomeSegment level.
+    """
+
+    def __init__(self):
+        """Initialize a ChromosomeSegment.
+
+        Attributes:
+         - start_x_position, end_x_position - Defines the x range we have
+           to draw things in.
+         - start_y_position, end_y_position - Defines the y range we have
+           to draw things in.
+
+        Configuration Attributes:
+         - scale - A scaling value for the component. By default this is
+           set at 1 (ie -- has the same scale as everything else). Higher
+           values give more size to the component, smaller values give less.
+         - fill_color - A color to fill in the segment with. Colors are
+           available in reportlab.lib.colors
+         - label - A label to place on the chromosome segment. This should
+           be a text string specifying what is to be included in the label.
+         - label_size - The size of the label.
+         - chr_percent - The percentage of area that the chromosome
+           segment takes up.
+
+        """
+        _ChromosomeComponent.__init__(self)
+
+        self.start_x_position = -1
+        self.end_x_position = -1
+        self.start_y_position = -1
+        self.end_y_position = -1
+
+        # --- attributes for configuration
+        self.scale = 1
+        self.fill_color = None
+        self.label = None
+        self.label_size = 6
+        self.chr_percent = 0.25
+
+    def draw(self, cur_drawing):
+        """Draw a chromosome segment.
+
+        Before drawing, the range we are drawing in needs to be set.
+        """
+        for position in (
+            self.start_x_position,
+            self.end_x_position,
+            self.start_y_position,
+            self.end_y_position,
+        ):
+            assert position != -1, "Need to set drawing coordinates."
+
+        self._draw_subcomponents(cur_drawing)  # Anything behind
+        self._draw_segment(cur_drawing)
+        self._overdraw_subcomponents(cur_drawing)  # Anything on top
+        self._draw_label(cur_drawing)
+
+    def _draw_subcomponents(self, cur_drawing):
+        """Draw any subcomponents of the chromosome segment (PRIVATE).
+
+        This should be overridden in derived classes if there are
+        subcomponents to be drawn.
+        """
+        pass
+
+    def _draw_segment(self, cur_drawing):
+        """Draw the current chromosome segment (PRIVATE)."""
+        # set the coordinates of the segment -- it'll take up the MIDDLE part
+        # of the space we have.
+        segment_y = self.end_y_position
+        segment_width = (self.end_x_position - self.start_x_position) * self.chr_percent
+        segment_height = self.start_y_position - self.end_y_position
+        segment_x = self.start_x_position + 0.5 * (
+            self.end_x_position - self.start_x_position - segment_width
+        )
+
+        # first draw the sides of the segment
+        right_line = Line(segment_x, segment_y, segment_x, segment_y + segment_height)
+        left_line = Line(
+            segment_x + segment_width,
+            segment_y,
+            segment_x + segment_width,
+            segment_y + segment_height,
+        )
+
+        cur_drawing.add(right_line)
+        cur_drawing.add(left_line)
+
+        # now draw the box, if it is filled in
+        if self.fill_color is not None:
+            fill_rectangle = Rect(segment_x, segment_y, segment_width, segment_height)
+            fill_rectangle.fillColor = self.fill_color
+            fill_rectangle.strokeColor = None
+
+            cur_drawing.add(fill_rectangle)
+
+    def _overdraw_subcomponents(self, cur_drawing):
+        """Draw any subcomponents of the chromosome segment over the main part (PRIVATE).
+
+        This should be overridden in derived classes if there are
+        subcomponents to be drawn.
+        """
+        pass
+
+    def _draw_label(self, cur_drawing):
+        """Add a label to the chromosome segment (PRIVATE).
+
+        The label will be applied to the right of the segment.
+
+        This may be overlapped by any sub-feature labels on other segments!
+        """
+        if self.label is not None:
+
+            label_x = 0.5 * (self.start_x_position + self.end_x_position) + (
+                self.chr_percent + 0.05
+            ) * (self.end_x_position - self.start_x_position)
+            label_y = (
+                self.start_y_position - self.end_y_position
+            ) / 2 + self.end_y_position
+
+            label_string = String(label_x, label_y, self.label)
+            label_string.fontName = "Helvetica"
+            label_string.fontSize = self.label_size
+
+            cur_drawing.add(label_string)
+
+
+def _spring_layout(desired, minimum, maximum, gap=0):
+    """Try to layout label co-ordinates or other floats (PRIVATE).
+
+    Originally written for the y-axis vertical positioning of labels on a
+    chromosome diagram (where the minimum gap between y-axis co-ordinates is
+    the label height), it could also potentially be used for x-axis placement,
+    or indeed radial placement for circular chromosomes within GenomeDiagram.
+
+    In essence this is an optimisation problem, balancing the desire to have
+    each label as close as possible to its data point, but also to spread out
+    the labels to avoid overlaps. This could be described with a cost function
+    (modelling the label distance from the desired placement, and the inter-
+    label separations as springs) and solved as a multi-variable minimization
+    problem - perhaps with NumPy or SciPy.
+
+    For now however, the implementation is a somewhat crude ad hoc algorithm.
+
+    NOTE - This expects the input data to have been sorted!
+    """
+    count = len(desired)
+    if count <= 1:
+        return desired  # Easy!
+    if minimum >= maximum:
+        raise ValueError("Bad min/max %f and %f" % (minimum, maximum))
+    if min(desired) < minimum or max(desired) > maximum:
+        raise ValueError(
+            "Data %f to %f out of bounds (%f to %f)"
+            % (min(desired), max(desired), minimum, maximum)
+        )
+    equal_step = float(maximum - minimum) / (count - 1)
+
+    if equal_step < gap:
+        import warnings
+        from Bio import BiopythonWarning
+
+        warnings.warn("Too many labels to avoid overlap", BiopythonWarning)
+        # Crudest solution
+        return [minimum + i * equal_step for i in range(count)]
+
+    good = True
+    if gap:
+        prev = desired[0]
+        for next in desired[1:]:
+            if prev - next < gap:
+                good = False
+                break
+    if good:
+        return desired
+
+    span = maximum - minimum
+    for split in [0.5 * span, span / 3.0, 2 * span / 3.0, 0.25 * span, 0.75 * span]:
+        midpoint = minimum + split
+        low = [x for x in desired if x <= midpoint - 0.5 * gap]
+        high = [x for x in desired if x > midpoint + 0.5 * gap]
+        if len(low) + len(high) < count:
+            # Bad split point, points right on boundary
+            continue
+        elif not low and len(high) * gap <= (span - split) + 0.5 * gap:
+            # Give a little of the unused low space to the high points
+            return _spring_layout(high, midpoint + 0.5 * gap, maximum, gap)
+        elif not high and len(low) * gap <= split + 0.5 * gap:
+            # Give a little of the unused highspace to the low points
+            return _spring_layout(low, minimum, midpoint - 0.5 * gap, gap)
+        elif (
+            len(low) * gap <= split - 0.5 * gap
+            and len(high) * gap <= (span - split) - 0.5 * gap
+        ):
+            return _spring_layout(
+                low, minimum, midpoint - 0.5 * gap, gap
+            ) + _spring_layout(high, midpoint + 0.5 * gap, maximum, gap)
+
+    # This can be count-productive now we can split out into the telomere or
+    # spacer-segment's vertical space...
+    # Try not to spread out as far as the min/max unless needed
+    low = min(desired)
+    high = max(desired)
+    if (high - low) / (count - 1) >= gap:
+        # Good, we don't need the full range, and can position the
+        # min and max exactly as well :)
+        equal_step = (high - low) / (count - 1)
+        return [low + i * equal_step for i in range(count)]
+
+    low = 0.5 * (minimum + min(desired))
+    high = 0.5 * (max(desired) + maximum)
+    if (high - low) / (count - 1) >= gap:
+        # Good, we don't need the full range
+        equal_step = (high - low) / (count - 1)
+        return [low + i * equal_step for i in range(count)]
+
+    # Crudest solution
+    return [minimum + i * equal_step for i in range(count)]
+
+
+# assert False, _spring_layout([0.10,0.12,0.13,0.14,0.5,0.75, 1.0], 0, 1, 0.1)
+# assert _spring_layout([0.10,0.12,0.13,0.14,0.5,0.75, 1.0], 0, 1, 0.1) == \
+#     [0.0, 0.125, 0.25, 0.375, 0.5, 0.75, 1.0]
+# assert _spring_layout([0.10,0.12,0.13,0.14,0.5,0.75, 1.0], 0, 1, 0.1) == \
+#     [0.0, 0.16666666666666666, 0.33333333333333331, 0.5,
+#      0.66666666666666663, 0.83333333333333326, 1.0]
+
+
+def _place_labels(desired_etc, minimum, maximum, gap=0):
+    # Want a list of lists/tuples for desired_etc
+    desired_etc.sort()
+    placed = _spring_layout([row[0] for row in desired_etc], minimum, maximum, gap)
+    for old, y2 in zip(desired_etc, placed):
+        # (y1, a, b, c, ..., z) --> (y1, y2, a, b, c, ..., z)
+        yield (old[0], y2) + tuple(old[1:])
+
+
+class AnnotatedChromosomeSegment(ChromosomeSegment):
+    """Annotated chromosome segment.
+
+    This is like the ChromosomeSegment, but accepts a list of features.
+    """
+
+    def __init__(
+        self,
+        bp_length,
+        features,
+        default_feature_color=colors.blue,
+        name_qualifiers=("gene", "label", "name", "locus_tag", "product"),
+    ):
+        """Initialize.
+
+        The features can either be SeqFeature objects, or tuples of values:
+        start (int), end (int), strand (+1, -1, O or None), label (string),
+        ReportLab color (string or object), and optional ReportLab fill color.
+
+        Note we require 0 <= start <= end <= bp_length, and within the vertical
+        space allocated to this segmenet lines will be places according to the
+        start/end coordinates (starting from the top).
+
+        Positive stand features are drawn on the right, negative on the left,
+        otherwise all the way across.
+
+        We recommend using consisent units for all the segment's scale values
+        (e.g. their length in base pairs).
+
+        When providing features as SeqFeature objects, the default color
+        is used, unless the feature's qualifiers include an Artemis colour
+        string (functionality also in GenomeDiagram). The caption also follows
+        the GenomeDiagram approach and takes the first qualifier from the list
+        or tuple specified in name_qualifiers.
+
+        Note additional attribute label_sep_percent controls the percentage of
+        area that the chromosome segment takes up, by default half of the
+        chr_percent attribute (half of 25%, thus 12.5%)
+
+        """
+        ChromosomeSegment.__init__(self)
+        self.bp_length = bp_length
+        self.features = features
+        self.default_feature_color = default_feature_color
+        self.name_qualifiers = name_qualifiers
+        self.label_sep_percent = self.chr_percent * 0.5
+
+    def _overdraw_subcomponents(self, cur_drawing):
+        """Draw any annotated features on the chromosome segment (PRIVATE).
+
+        Assumes _draw_segment already called to fill out the basic shape,
+        and assmes that uses the same boundaries.
+        """
+        # set the coordinates of the segment -- it'll take up the MIDDLE part
+        # of the space we have.
+        segment_y = self.end_y_position
+        segment_width = (self.end_x_position - self.start_x_position) * self.chr_percent
+        label_sep = (
+            self.end_x_position - self.start_x_position
+        ) * self.label_sep_percent
+        segment_height = self.start_y_position - self.end_y_position
+        segment_x = self.start_x_position + 0.5 * (
+            self.end_x_position - self.start_x_position - segment_width
+        )
+
+        left_labels = []
+        right_labels = []
+        for f in self.features:
+            try:
+                # Assume SeqFeature objects
+                start = f.location.start
+                end = f.location.end
+                strand = f.strand
+                try:
+                    # Handles Artemis colour integers, HTML colors, etc
+                    color = _color_trans.translate(f.qualifiers["color"][0])
+                except Exception:  # TODO: ValueError?
+                    color = self.default_feature_color
+                fill_color = color
+                name = ""
+                for qualifier in self.name_qualifiers:
+                    if qualifier in f.qualifiers:
+                        name = f.qualifiers[qualifier][0]
+                        break
+            except AttributeError:
+                # Assume tuple of ints, string, and color
+                start, end, strand, name, color = f[:5]
+                color = _color_trans.translate(color)
+                if len(f) > 5:
+                    fill_color = _color_trans.translate(f[5])
+                else:
+                    fill_color = color
+            assert 0 <= start <= end <= self.bp_length
+            if strand == +1:
+                # Right side only
+                x = segment_x + segment_width * 0.6
+                w = segment_width * 0.4
+            elif strand == -1:
+                # Left side only
+                x = segment_x
+                w = segment_width * 0.4
+            else:
+                # Both or neither - full width
+                x = segment_x
+                w = segment_width
+            local_scale = segment_height / self.bp_length
+            fill_rectangle = Rect(
+                x,
+                segment_y + segment_height - local_scale * start,
+                w,
+                local_scale * (start - end),
+            )
+            fill_rectangle.fillColor = fill_color
+            fill_rectangle.strokeColor = color
+            cur_drawing.add(fill_rectangle)
+            if name:
+                if fill_color == color:
+                    back_color = None
+                else:
+                    back_color = fill_color
+                value = (
+                    segment_y + segment_height - local_scale * start,
+                    color,
+                    back_color,
+                    name,
+                )
+                if strand == -1:
+                    self._left_labels.append(value)
+                else:
+                    self._right_labels.append(value)
+
+
+class TelomereSegment(ChromosomeSegment):
+    """A segment that is located at the end of a linear chromosome.
+
+    This is just like a regular segment, but it draws the end of a chromosome
+    which is represented by a half circle. This just overrides the
+    _draw_segment class of ChromosomeSegment to provide that specialized
+    drawing.
+    """
+
+    def __init__(self, inverted=0):
+        """Initialize a segment at the end of a chromosome.
+
+        See ChromosomeSegment for all of the attributes that can be
+        customized in a TelomereSegments.
+
+        Arguments:
+         - inverted -- Whether or not the telomere should be inverted
+           (ie. drawn on the bottom of a chromosome)
+
+        """
+        ChromosomeSegment.__init__(self)
+
+        self._inverted = inverted
+
+    def _draw_segment(self, cur_drawing):
+        """Draw a half circle representing the end of a linear chromosome (PRIVATE)."""
+        # set the coordinates of the segment -- it'll take up the MIDDLE part
+        # of the space we have.
+        width = (self.end_x_position - self.start_x_position) * self.chr_percent
+        height = self.start_y_position - self.end_y_position
+        center_x = 0.5 * (self.end_x_position + self.start_x_position)
+        start_x = center_x - 0.5 * width
+        if self._inverted:
+            center_y = self.start_y_position
+            start_angle = 180
+            end_angle = 360
+        else:
+            center_y = self.end_y_position
+            start_angle = 0
+            end_angle = 180
+
+        cap_wedge = Wedge(center_x, center_y, width / 2, start_angle, end_angle, height)
+        cap_wedge.strokeColor = None
+        cap_wedge.fillColor = self.fill_color
+        cur_drawing.add(cap_wedge)
+
+        # Now draw an arc for the curved edge of the wedge,
+        # omitting the flat end.
+        cap_arc = ArcPath()
+        cap_arc.addArc(center_x, center_y, width / 2, start_angle, end_angle, height)
+        cur_drawing.add(cap_arc)
+
+
+class SpacerSegment(ChromosomeSegment):
+    """A segment that is located at the end of a linear chromosome.
+
+    Doesn't draw anything, just empty space which can be helpful
+    for layout purposes (e.g. making room for feature labels).
+    """
+
+    def draw(self, cur_diagram):
+        """Draw nothing to the current diagram (dummy method).
+
+        The segment spacer has no actual image in the diagram,
+        so this method therefore does nothing, but is defined
+        to match the expected API of the other segment objects.
+        """
+        pass
diff --git a/code/lib/Bio/Graphics/ColorSpiral.py b/code/lib/Bio/Graphics/ColorSpiral.py
new file mode 100644
index 0000000..c113b7a
--- /dev/null
+++ b/code/lib/Bio/Graphics/ColorSpiral.py
@@ -0,0 +1,206 @@
+# Copyright 2012 by Leighton Pritchard.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Generate RGB colours suitable for distinguishing categorical data.
+
+This module provides a class that implements a spiral 'path' through HSV
+colour space, permitting the selection of a number of points along that path,
+and returning the output in RGB colour space, suitable for use with ReportLab
+and other graphics packages.
+
+This approach to colour choice was inspired by Bang Wong's Points of View
+article: Color Coding, in Nature Methods _7_ 573 (https://doi.org/10.1038/nmeth0810-573).
+
+The module also provides helper functions that return a list for colours, or
+a dictionary of colours (if passed an iterable containing the names of
+categories to be coloured).
+"""
+
+# standard library
+import colorsys  # colour format conversions
+from math import log, exp, floor, pi
+import random  # for jitter values
+
+
+class ColorSpiral:
+    """Implement a spiral path through HSV colour space.
+
+    This class provides functions for sampling points along a logarithmic
+    spiral path through HSV colour space.
+
+    The spiral is described by r = a * exp(b * t) where r is the distance
+    from the axis of the HSV cylinder to the current point in the spiral,
+    and t is the angle through which the spiral has turned to reach the
+    current point. a and b are (positive, real) parameters that control the
+    shape of the spiral.
+
+     - a: the starting direction of the spiral
+     - b: the number of revolutions about the axis made by the spiral
+
+    We permit the spiral to move along the cylinder ('in V-space') between
+    v_init and v_final, to give a gradation in V (essentially, brightness),
+    along the path, where v_init, v_final are in [0,1].
+
+    A brightness 'jitter' may also be provided as an absolute value in
+    V-space, to aid in distinguishing consecutive colour points on the
+    path.
+    """
+
+    def __init__(self, a=1, b=0.33, v_init=0.85, v_final=0.5, jitter=0.05):
+        """Initialize a logarithmic spiral path through HSV colour space.
+
+        Arguments:
+         - a - Parameter a for the spiral, controls the initial spiral
+           direction. a > 0
+         - b - parameter b for the spiral, controls the rate at which the
+           spiral revolves around the axis. b > 0
+         - v_init - initial value of V (brightness) for the spiral.
+           v_init in [0,1]
+         - v_final - final value of V (brightness) for the spiral
+           v_final in [0,1]
+         - jitter - the degree of V (brightness) jitter to add to each
+           selected colour. The amount of jitter will be selected
+           from a uniform random distribution [-jitter, jitter],
+           and V will be maintained in [0,1].
+
+        """
+        # Initialize attributes
+        self.a = a
+        self.b = b
+        self.v_init = v_init
+        self.v_final = v_final
+        self.jitter = jitter
+
+    def get_colors(self, k, offset=0.1):
+        """Generate k different RBG colours evenly-space on the spiral.
+
+        A generator returning the RGB colour space values for k
+        evenly-spaced points along the defined spiral in HSV space.
+
+        Arguments:
+         - k - the number of points to return
+         - offset - how far along the spiral path to start.
+
+        """
+        # We use the offset to skip a number of similar colours near to HSV axis
+        assert offset > 0 and offset < 1, "offset must be in (0,1)"
+        v_rate = (self._v_final - self._v_init) / float(k)
+        # Generator for colours: we have divided the arc length into sections
+        # of equal length, and step along them
+        for n in range(1, k + 1):
+            # For each value of n, t indicates the angle through which the
+            # spiral has turned, to this point
+            t = (1.0 / self._b) * (
+                log(n + (k * offset)) - log((1 + offset) * k * self._a)
+            )
+            # Put 0 <= h <= 2*pi, where h is the angular part of the polar
+            # co-ordinates for this point on the spiral
+            h = t
+            while h < 0:
+                h += 2 * pi
+            h = h - (floor(h / (2 * pi)) * pi)
+            # Now put h in [0, 1] for colorsys conversion
+            h = h / (2 * pi)
+            # r is the radial distance of this point from the centre
+            r = self._a * exp(self._b * t)
+            # v is the brightness of this point, linearly interpolated
+            # from self._v_init to self._v_final. Jitter size is sampled from
+            # a uniform distribution
+            if self._jitter:
+                jitter = random.random() * 2 * self._jitter - self._jitter
+            else:
+                jitter = 0
+            v = self._v_init + (n * v_rate + jitter)
+            # We have arranged the arithmetic such that 0 <= r <= 1, so
+            # we can use this value directly as s in HSV
+            yield colorsys.hsv_to_rgb(h, r, max(0, min(v, 1)))
+
+    def _get_a(self):
+        return self._a
+
+    def _set_a(self, value):
+        self._a = max(0, value)
+
+    def _get_b(self):
+        return self._b
+
+    def _set_b(self, value):
+        self._b = max(0, value)
+
+    def _get_v_init(self):
+        return self._v_init
+
+    def _set_v_init(self, value):
+        self._v_init = max(0, min(1, value))
+
+    def _get_v_final(self):
+        return self._v_final
+
+    def _set_v_final(self, value):
+        self._v_final = max(0, min(1, value))
+
+    def _get_jitter(self):
+        return self._jitter
+
+    def _set_jitter(self, value):
+        self._jitter = max(0, min(1, value))
+
+    a = property(
+        _get_a, _set_a, doc="Parameter controlling initial spiral direction (a > 0)"
+    )
+    b = property(
+        _get_b,
+        _set_b,
+        doc="Parameter controlling rate spiral revolves around axis (b > 0)",
+    )
+    v_init = property(
+        _get_v_init,
+        _set_v_init,
+        doc="Initial value of V (brightness) for the spiral (range 0 to 1)",
+    )
+    v_final = property(
+        _get_v_final,
+        _set_v_final,
+        doc="Final value of V (brightness) for the spiral (range 0 to 1)",
+    )
+    jitter = property(
+        _get_jitter,
+        _set_jitter,
+        doc="Degree of V (brightness) jitter to add to each color (range 0 to 1)",
+    )
+
+
+# Convenience functions for those who don't want to bother with a
+# ColorSpiral object
+def get_colors(k, **kwargs):
+    """Return k colours selected by the ColorSpiral object, as a generator.
+
+    Arguments:
+     - k - the number of colours to return
+     - kwargs - pass-through arguments to the ColorSpiral object
+
+    """
+    cs = ColorSpiral(**kwargs)
+    return cs.get_colors(k)
+
+
+def get_color_dict(l, **kwargs):
+    """Return a dictionary of colours using the provided values as keys.
+
+    Returns a dictionary, keyed by the members of iterable l, with a
+    colour assigned to each member.
+
+    Arguments:
+     - l - an iterable representing classes to be coloured
+     - kwargs - pass-through arguments to the ColorSpiral object
+
+    """
+    cs = ColorSpiral(**kwargs)
+    colors = cs.get_colors(len(l))
+    dict = {}
+    for item in l:
+        dict[item] = next(colors)
+    return dict
diff --git a/code/lib/Bio/Graphics/Comparative.py b/code/lib/Bio/Graphics/Comparative.py
new file mode 100644
index 0000000..35bc192
--- /dev/null
+++ b/code/lib/Bio/Graphics/Comparative.py
@@ -0,0 +1,178 @@
+# Copyright 2001 by Brad Chapman.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Plots to compare information between different sources.
+
+This file contains high level plots which are designed to be used to
+compare different types of information. The most basic example is comparing
+two variables in a traditional scatter plot.
+"""
+# reportlab
+from reportlab.lib import colors
+from reportlab.graphics.charts.lineplots import LinePlot
+from reportlab.lib.pagesizes import letter
+from reportlab.lib.units import inch
+
+from reportlab.graphics.shapes import Drawing, String
+from reportlab.graphics.charts.markers import makeEmptySquare, makeFilledSquare
+from reportlab.graphics.charts.markers import makeFilledDiamond, makeSmiley
+from reportlab.graphics.charts.markers import makeFilledCircle, makeEmptyCircle
+
+from Bio.Graphics import _write
+
+
+class ComparativeScatterPlot:
+    """Display a scatter-type plot comparing two different kinds of info.
+
+    Attributes;
+     - display_info - a 2D list of the information we'll be outputting. Each
+       top level list is a different data type, and each data point is a
+       two-tuple of the coordinates of a point.
+
+    So if you had two distributions of points, it should look like::
+
+       display_info = [[(1, 2), (3, 4)],
+                       [(5, 6), (7, 8)]]
+
+    If everything is just one set of points, display_info can look like::
+
+        display_info = [[(1, 2), (3, 4), (5, 6)]]
+
+    """
+
+    def __init__(self, output_format="pdf"):
+        """Initialize the class."""
+        # customizable attributes
+        self.number_of_columns = 1
+        self.page_size = letter
+        self.title_size = 20
+
+        self.output_format = output_format
+
+        # the information we'll be writing
+        self.display_info = []
+
+        # initial colors and shapes used for drawing points
+        self.color_choices = [
+            colors.red,
+            colors.green,
+            colors.blue,
+            colors.yellow,
+            colors.orange,
+            colors.black,
+        ]
+        self.shape_choices = [
+            makeFilledCircle,
+            makeEmptySquare,
+            makeFilledDiamond,
+            makeFilledSquare,
+            makeEmptyCircle,
+            makeSmiley,
+        ]
+
+    def draw_to_file(self, output_file, title):
+        """Write the comparative plot to a file.
+
+        Arguments:
+         - output_file - The name of the file to output the information to,
+           or a handle to write to.
+         - title - A title to display on the graphic.
+
+        """
+        width, height = self.page_size
+        cur_drawing = Drawing(width, height)
+
+        self._draw_title(cur_drawing, title, width, height)
+
+        start_x = inch * 0.5
+        end_x = width - inch * 0.5
+        end_y = height - 1.5 * inch
+        start_y = 0.5 * inch
+        self._draw_scatter_plot(cur_drawing, start_x, start_y, end_x, end_y)
+
+        return _write(cur_drawing, output_file, self.output_format)
+
+    def _draw_title(self, cur_drawing, title, width, height):
+        """Add a title to the page we are outputting (PRIVATE)."""
+        title_string = String(width / 2, height - inch, title)
+        title_string.fontName = "Helvetica-Bold"
+        title_string.fontSize = self.title_size
+        title_string.textAnchor = "middle"
+
+        cur_drawing.add(title_string)
+
+    def _draw_scatter_plot(self, cur_drawing, x_start, y_start, x_end, y_end):
+        """Draw a scatter plot on the drawing with the given coordinates (PRIVATE)."""
+        scatter_plot = LinePlot()
+
+        # set the dimensions of the scatter plot
+        scatter_plot.x = x_start
+        scatter_plot.y = y_start
+        scatter_plot.width = abs(x_start - x_end)
+        scatter_plot.height = abs(y_start - y_end)
+
+        scatter_plot.data = self.display_info
+
+        scatter_plot.joinedLines = 0
+
+        # set the axes of the plot
+        x_min, x_max, y_min, y_max = self._find_min_max(self.display_info)
+        scatter_plot.xValueAxis.valueMin = x_min
+        scatter_plot.xValueAxis.valueMax = x_max
+        scatter_plot.xValueAxis.valueStep = (x_max - x_min) / 10.0
+
+        scatter_plot.yValueAxis.valueMin = y_min
+        scatter_plot.yValueAxis.valueMax = y_max
+        scatter_plot.yValueAxis.valueStep = (y_max - y_min) / 10.0
+
+        self._set_colors_and_shapes(scatter_plot, self.display_info)
+
+        cur_drawing.add(scatter_plot)
+
+    def _set_colors_and_shapes(self, scatter_plot, display_info):
+        """Set the colors and shapes of the points displayed (PRIVATE).
+
+        By default this just sets all of the points according to the order
+        of colors and shapes defined in self.color_choices and
+        self.shape_choices. The first 5 shapes and colors are unique, the
+        rest of them are just set to the same color and shape (since I
+        ran out of shapes!).
+
+        You can change how this function works by either changing the
+        values of the color_choices and shape_choices attributes, or
+        by inheriting from this class and overriding this function.
+        """
+        for value_num in range(len(display_info)):
+            # if we have unique colors, add them
+            if (value_num + 1) < len(self.color_choices):
+                scatter_plot.lines[value_num].strokeColor = self.color_choices[
+                    value_num
+                ]
+                scatter_plot.lines[value_num].symbol = self.shape_choices[value_num]
+            # otherwise just use the last number
+            else:
+                scatter_plot.lines[value_num].strokeColor = self.color_choices[-1]
+                scatter_plot.lines[value_num].symbol = self.shape_choices[-1]
+
+    def _find_min_max(self, info):
+        """Find min and max for x and y coordinates in the given data (PRIVATE)."""
+        x_min = info[0][0][0]
+        x_max = info[0][0][0]
+        y_min = info[0][0][1]
+        y_max = info[0][0][1]
+
+        for two_d_list in info:
+            for x, y in two_d_list:
+                if x > x_max:
+                    x_max = x
+                if x < x_min:
+                    x_min = x
+                if y > y_max:
+                    y_max = y
+                if y < y_min:
+                    y_min = y
+
+        return x_min, x_max, y_min, y_max
diff --git a/code/lib/Bio/Graphics/DisplayRepresentation.py b/code/lib/Bio/Graphics/DisplayRepresentation.py
new file mode 100644
index 0000000..df75283
--- /dev/null
+++ b/code/lib/Bio/Graphics/DisplayRepresentation.py
@@ -0,0 +1,187 @@
+# Copyright 2001 by Brad Chapman.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Represent information for graphical display.
+
+Classes in this module are designed to hold information in a way that
+makes it easy to draw graphical figures.
+"""
+# reportlab
+from reportlab.lib import colors
+
+# local stuff
+from Bio.Graphics.BasicChromosome import ChromosomeSegment
+from Bio.Graphics.BasicChromosome import TelomereSegment
+
+
+# --- constants
+# This is a default color scheme based on the light spectrum.
+# Based on my vague recollections from biology, this is our friend ROY G. BIV
+RAINBOW_COLORS = {
+    (1, 1): colors.violet,
+    (2, 2): colors.indigo,
+    (3, 3): colors.blue,
+    (4, 4): colors.green,
+    (5, 5): colors.yellow,
+    (6, 6): colors.orange,
+    (7, 20): colors.red,
+}
+
+
+class ChromosomeCounts:
+    """Represent a chromosome with count information.
+
+    This is used to display information about counts along a chromosome.
+    The segments are expected to have different count information, which
+    will be displayed using a color scheme.
+
+    I envision using this class when you think that certain regions of
+    the chromosome will be especially abundant in the counts, and you
+    want to pick those out.
+    """
+
+    def __init__(self, segment_names, color_scheme=RAINBOW_COLORS):
+        """Initialize a representation of chromosome counts.
+
+        Arguments:
+         - segment_names - An ordered list of all segment names along
+           the chromosome. The count and other information will be added
+           to these.
+         - color_scheme - A coloring scheme to use in the counts. This
+           should be a dictionary mapping count ranges to colors (specified
+           in reportlab.lib.colors).
+
+        """
+        self._names = segment_names
+        self._count_info = {}
+        self._label_info = {}
+        self._scale_info = {}
+        for name in self._names:
+            self._count_info[name] = 0
+            self._label_info[name] = None
+            self._scale_info[name] = 1
+
+        self._color_scheme = color_scheme
+
+    def add_count(self, segment_name, count=1):
+        """Add counts to the given segment name.
+
+        Arguments:
+         - segment_name - The name of the segment we should add counts to.
+           If the name is not present, a KeyError will be raised.
+         - count - The counts to add the current segment. This defaults to
+           a single count.
+
+        """
+        try:
+            self._count_info[segment_name] += count
+        except KeyError:
+            raise KeyError("Segment name %s not found." % segment_name) from None
+
+    def scale_segment_value(self, segment_name, scale_value=None):
+        """Divide the counts for a segment by some kind of scale value.
+
+        This is useful if segments aren't represented by raw counts, but
+        are instead counts divided by some number.
+        """
+        try:
+            self._count_info[segment_name] = float(
+                self._count_info[segment_name]
+            ) / float(scale_value)
+        except KeyError:
+            raise KeyError("Segment name %s not found." % segment_name) from None
+
+    def add_label(self, segment_name, label):
+        """Add a label to a specific segment.
+
+        Raises a KeyError is the specified segment name is not found.
+        """
+        if segment_name in self._label_info:
+            self._label_info[segment_name] = label
+        else:
+            raise KeyError("Segment name %s not found." % segment_name)
+
+    def set_scale(self, segment_name, scale):
+        """Set the scale for a specific chromosome segment.
+
+        By default all segments have the same scale -- this allows scaling
+        by the size of the segment.
+
+        Raises a KeyError is the specified segment name is not found.
+        """
+        if segment_name in self._label_info:
+            self._scale_info[segment_name] = scale
+        else:
+            raise KeyError("Segment name %s not found." % segment_name)
+
+    def get_segment_info(self):
+        """Retrieve the color and label info about the segments.
+
+        Returns a list consiting of two tuples specifying the counts and
+        label name for each segment. The list is ordered according to the
+        original listing of names. Labels are set as None if no label
+        was specified.
+        """
+        order_info = []
+
+        for seg_name in self._names:
+            order_info.append((self._count_info[seg_name], self._label_info[seg_name]))
+
+        return order_info
+
+    def fill_chromosome(self, chromosome):
+        """Add the collected segment information to a chromosome for drawing.
+
+        Arguments:
+         - chromosome - A Chromosome graphics object that we can add
+           chromosome segments to.
+
+        This creates ChromosomeSegment (and TelomereSegment) objects to
+        fill in the chromosome. The information is derived from the
+        label and count information, with counts transformed to the
+        specified color map.
+
+        Returns the chromosome with all of the segments added.
+        """
+        for seg_num in range(len(self._names)):
+            is_end_segment = 0
+            # make the top and bottom telomeres
+            if seg_num == 0:
+                cur_segment = TelomereSegment()
+                is_end_segment = 1
+            elif seg_num == len(self._names) - 1:
+                cur_segment = TelomereSegment(1)
+                is_end_segment = 1
+            # otherwise, they are just regular segments
+            else:
+                cur_segment = ChromosomeSegment()
+
+            seg_name = self._names[seg_num]
+            if self._count_info[seg_name] > 0:
+                color = self._color_from_count(self._count_info[seg_name])
+                cur_segment.fill_color = color
+
+            if self._label_info[seg_name] is not None:
+                cur_segment.label = self._label_info[seg_name]
+
+            # give end segments extra size so they look right
+            if is_end_segment:
+                cur_segment.scale = 3
+            else:
+                cur_segment.scale = self._scale_info[seg_name]
+
+            chromosome.add(cur_segment)
+
+        return chromosome
+
+    def _color_from_count(self, count):
+        """Translate the given count into a color using the color scheme (PRIVATE)."""
+        for count_start, count_end in self._color_scheme:
+            if count >= count_start and count <= count_end:
+                return self._color_scheme[(count_start, count_end)]
+
+        # if we got here we didn't find a color for the count
+        raise ValueError("Count value %s was not found in the color scheme." % count)
diff --git a/code/lib/Bio/Graphics/Distribution.py b/code/lib/Bio/Graphics/Distribution.py
new file mode 100644
index 0000000..3bfb065
--- /dev/null
+++ b/code/lib/Bio/Graphics/Distribution.py
@@ -0,0 +1,258 @@
+# Copyright 2001 by Brad Chapman.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Display information distributed across a Chromosome-like object.
+
+These classes are meant to show the distribution of some kind of information
+as it changes across any kind of segment. It was designed with chromosome
+distributions in mind, but could also work for chromosome regions, BAC clones
+or anything similar.
+
+Reportlab is used for producing the graphical output.
+"""
+# standard library
+import math
+
+# reportlab
+from reportlab.lib.pagesizes import letter
+from reportlab.lib.units import inch
+from reportlab.lib import colors
+
+from reportlab.graphics.shapes import Drawing, String
+from reportlab.graphics.charts.barcharts import VerticalBarChart
+from reportlab.graphics.charts.barcharts import BarChartProperties
+from reportlab.graphics.widgetbase import TypedPropertyCollection
+
+from Bio.Graphics import _write
+
+
+class DistributionPage:
+    """Display a grouping of distributions on a page.
+
+    This organizes Distributions, and will display them nicely
+    on a single page.
+    """
+
+    def __init__(self, output_format="pdf"):
+        """Initialize the class."""
+        self.distributions = []
+
+        # customizable attributes
+        self.number_of_columns = 1
+        self.page_size = letter
+        self.title_size = 20
+
+        self.output_format = output_format
+
+    def draw(self, output_file, title):
+        """Draw out the distribution information.
+
+        Arguments:
+         - output_file - The name of the file to output the information to,
+           or a handle to write to.
+         - title - A title to display on the graphic.
+
+        """
+        width, height = self.page_size
+        cur_drawing = Drawing(width, height)
+
+        self._draw_title(cur_drawing, title, width, height)
+
+        # calculate the x and y position changes for each distribution
+        cur_x_pos = inch * 0.5
+        end_x_pos = width - inch * 0.5
+        cur_y_pos = height - 1.5 * inch
+        end_y_pos = 0.5 * inch
+        x_pos_change = (end_x_pos - cur_x_pos) / float(self.number_of_columns)
+        num_y_rows = math.ceil(
+            float(len(self.distributions)) / float(self.number_of_columns)
+        )
+        y_pos_change = (cur_y_pos - end_y_pos) / num_y_rows
+
+        self._draw_distributions(
+            cur_drawing, cur_x_pos, x_pos_change, cur_y_pos, y_pos_change, num_y_rows
+        )
+        self._draw_legend(cur_drawing, 2.5 * inch, width)
+
+        return _write(cur_drawing, output_file, self.output_format)
+
+    def _draw_title(self, cur_drawing, title, width, height):
+        """Add the title of the figure to the drawing (PRIVATE)."""
+        title_string = String(width / 2, height - inch, title)
+        title_string.fontName = "Helvetica-Bold"
+        title_string.fontSize = self.title_size
+        title_string.textAnchor = "middle"
+
+        cur_drawing.add(title_string)
+
+    def _draw_distributions(
+        self,
+        cur_drawing,
+        start_x_pos,
+        x_pos_change,
+        start_y_pos,
+        y_pos_change,
+        num_y_drawings,
+    ):
+        """Draw all of the distributions on the page (PRIVATE).
+
+        Arguments:
+         - cur_drawing - The drawing we are working with.
+         - start_x_pos - The x position on the page to start drawing at.
+         - x_pos_change - The change in x position between each figure.
+         - start_y_pos - The y position on the page to start drawing at.
+         - y_pos_change - The change in y position between each figure.
+         - num_y_drawings - The number of drawings we'll have in the y
+           (up/down) direction.
+
+        """
+        for y_drawing in range(int(num_y_drawings)):
+            # if we are on the last y position, we may not be able
+            # to fill all of the x columns
+            if (y_drawing + 1) * self.number_of_columns > len(self.distributions):
+                num_x_drawings = (
+                    len(self.distributions) - y_drawing * self.number_of_columns
+                )
+            else:
+                num_x_drawings = self.number_of_columns
+            for x_drawing in range(num_x_drawings):
+                dist_num = y_drawing * self.number_of_columns + x_drawing
+                cur_distribution = self.distributions[dist_num]
+
+                # find the x and y boundaries of the distribution
+                x_pos = start_x_pos + x_drawing * x_pos_change
+                end_x_pos = x_pos + x_pos_change
+                end_y_pos = start_y_pos - y_drawing * y_pos_change
+                y_pos = end_y_pos - y_pos_change
+
+                # draw the distribution
+                cur_distribution.draw(cur_drawing, x_pos, y_pos, end_x_pos, end_y_pos)
+
+    def _draw_legend(self, cur_drawing, start_y, width):
+        """Add a legend to the figure (PRIVATE).
+
+        Subclasses can implement to provide a specialized legend.
+        """
+        pass
+
+
+class BarChartDistribution:
+    """Display the distribution of values as a bunch of bars."""
+
+    def __init__(self, display_info=None):
+        """Initialize a Bar Chart display of distribution info.
+
+        Attributes:
+         - display_info - the information to be displayed in the distribution.
+           This should be ordered as a list of lists, where each internal list
+           is a data set to display in the bar chart.
+
+        """
+        if display_info is None:
+            display_info = []
+        self.display_info = display_info
+
+        self.x_axis_title = ""
+        self.y_axis_title = ""
+        self.chart_title = ""
+        self.chart_title_size = 10
+
+        self.padding_percent = 0.15
+
+    def draw(self, cur_drawing, start_x, start_y, end_x, end_y):
+        """Draw a bar chart with the info in the specified range."""
+        bar_chart = VerticalBarChart()
+        if self.chart_title:
+            self._draw_title(
+                cur_drawing, self.chart_title, start_x, start_y, end_x, end_y
+            )
+        # set the position of the bar chart
+        x_start, x_end, y_start, y_end = self._determine_position(
+            start_x, start_y, end_x, end_y
+        )
+
+        bar_chart.x = x_start
+        bar_chart.y = y_start
+        bar_chart.width = abs(x_start - x_end)
+        bar_chart.height = abs(y_start - y_end)
+
+        # set the information in the bar chart
+        bar_chart.data = self.display_info
+        bar_chart.valueAxis.valueMin = min(self.display_info[0])
+        bar_chart.valueAxis.valueMax = max(self.display_info[0])
+        for data_set in self.display_info[1:]:
+            if min(data_set) < bar_chart.valueAxis.valueMin:
+                bar_chart.valueAxis.valueMin = min(data_set)
+            if max(data_set) > bar_chart.valueAxis.valueMax:
+                bar_chart.valueAxis.valueMax = max(data_set)
+
+        # set other formatting options
+        if len(self.display_info) == 1:
+            bar_chart.groupSpacing = 0
+            style = TypedPropertyCollection(BarChartProperties)
+            style.strokeWidth = 0
+            style.strokeColor = colors.green
+            style[0].fillColor = colors.green
+
+            bar_chart.bars = style
+
+        # set the labels
+        # XXX labels don't work yet
+        # bar_chart.valueAxis.title = self.x_axis_title
+        # bar_chart.categoryAxis.title = self.y_axis_title
+
+        cur_drawing.add(bar_chart)
+
+    def _draw_title(self, cur_drawing, title, start_x, start_y, end_x, end_y):
+        """Add the title of the figure to the drawing (PRIVATE)."""
+        x_center = start_x + (end_x - start_x) / 2
+        y_pos = end_y + (self.padding_percent * (start_y - end_y)) / 2
+        title_string = String(x_center, y_pos, title)
+        title_string.fontName = "Helvetica-Bold"
+        title_string.fontSize = self.chart_title_size
+        title_string.textAnchor = "middle"
+
+        cur_drawing.add(title_string)
+
+    def _determine_position(self, start_x, start_y, end_x, end_y):
+        """Calculate the position of the chart with blank space (PRIVATE).
+
+        This uses some padding around the chart, and takes into account
+        whether the chart has a title. It returns 4 values, which are,
+        in order, the x_start, x_end, y_start and y_end of the chart
+        itself.
+        """
+        x_padding = self.padding_percent * (end_x - start_x)
+        y_padding = self.padding_percent * (start_y - end_y)
+
+        new_x_start = start_x + x_padding
+        new_x_end = end_x - x_padding
+
+        if self.chart_title:
+            new_y_start = start_y - y_padding - self.chart_title_size
+        else:
+            new_y_start = start_y - y_padding
+
+        new_y_end = end_y + y_padding
+
+        return new_x_start, new_x_end, new_y_start, new_y_end
+
+
+class LineDistribution:
+    """Display the distribution of values as connected lines.
+
+    This distribution displays the change in values across the object as
+    lines. This also allows multiple distributions to be displayed on a
+    single graph.
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        pass
+
+    def draw(self, cur_drawing, start_x, start_y, end_x, end_y):
+        """Draw a line distribution into the current drawing."""
+        pass
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/_AbstractDrawer.py b/code/lib/Bio/Graphics/GenomeDiagram/_AbstractDrawer.py
new file mode 100644
index 0000000..4e97e36
--- /dev/null
+++ b/code/lib/Bio/Graphics/GenomeDiagram/_AbstractDrawer.py
@@ -0,0 +1,565 @@
+# Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
+# Revisions copyright 2008-2017 by Peter Cock.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Contact:       Leighton Pritchard, The James Hutton Institute,
+#                Invergowrie, Dundee, Scotland, DD2 5DA, UK
+#                Leighton.Pritchard@hutton.ac.uk
+################################################################################
+
+"""AbstractDrawer module (considered to be a private module, the API may change!).
+
+Provides:
+ - AbstractDrawer - Superclass for methods common to the Drawer objects
+ - page_sizes - Method that returns a ReportLab pagesize when passed
+   a valid ISO size
+ - draw_box - Method that returns a closed path object when passed
+   the proper co-ordinates.  For HORIZONTAL boxes only.
+ - angle2trig - Method that returns a tuple of values that are the
+   vector for rotating a point through a passed angle,
+   about an origin
+ - intermediate_points - Method that returns a list of values intermediate
+   between the points in a passed dataset
+
+For drawing capabilities, this module uses reportlab to draw and write
+the diagram: http://www.reportlab.com
+
+For dealing with biological information, the package expects Biopython objects
+like SeqFeatures.
+"""
+
+# ReportLab imports
+
+from reportlab.lib import pagesizes
+from reportlab.lib import colors
+from reportlab.graphics.shapes import Polygon
+
+from math import pi, sin, cos
+from itertools import islice
+
+################################################################################
+# METHODS
+################################################################################
+
+
+# Utility method to translate strings to ISO page sizes
+def page_sizes(size):
+    """Convert size string into a Reportlab pagesize.
+
+    Arguments:
+     - size - A string representing a standard page size, eg 'A4' or 'LETTER'
+
+    """
+    sizes = {  # ReportLab pagesizes, keyed by ISO string
+        "A0": pagesizes.A0,
+        "A1": pagesizes.A1,
+        "A2": pagesizes.A2,
+        "A3": pagesizes.A3,
+        "A4": pagesizes.A4,
+        "A5": pagesizes.A5,
+        "A6": pagesizes.A6,
+        "B0": pagesizes.B0,
+        "B1": pagesizes.B1,
+        "B2": pagesizes.B2,
+        "B3": pagesizes.B3,
+        "B4": pagesizes.B4,
+        "B5": pagesizes.B5,
+        "B6": pagesizes.B6,
+        "ELEVENSEVENTEEN": pagesizes.ELEVENSEVENTEEN,
+        "LEGAL": pagesizes.LEGAL,
+        "LETTER": pagesizes.LETTER,
+    }
+    try:
+        return sizes[size]
+    except KeyError:
+        raise ValueError("%s not in list of page sizes" % size) from None
+
+
+def _stroke_and_fill_colors(color, border):
+    """Deal with  border and fill colors (PRIVATE)."""
+    if not isinstance(color, colors.Color):
+        raise ValueError("Invalid color %r" % color)
+
+    if color == colors.white and border is None:
+        # Force black border on white boxes with undefined border
+        strokecolor = colors.black
+    elif border is None:
+        strokecolor = color  # use fill color
+    elif border:
+        if not isinstance(border, colors.Color):
+            raise ValueError("Invalid border color %r" % border)
+        strokecolor = border
+    else:
+        # e.g. False
+        strokecolor = None
+
+    return strokecolor, color
+
+
+def draw_box(
+    point1, point2, color=colors.lightgreen, border=None, colour=None, **kwargs
+):
+    """Draw a box.
+
+    Arguments:
+     - point1, point2 - coordinates for opposite corners of the box
+       (x,y tuples)
+     - color /colour - The color for the box (colour takes priority
+       over color)
+     - border - Border color for the box
+
+    Returns a closed path object, beginning at (x1,y1) going round
+    the four points in order, and filling with the passed color.
+    """
+    x1, y1 = point1
+    x2, y2 = point2
+
+    # Let the UK spelling (colour) override the USA spelling (color)
+    if colour is not None:
+        color = colour
+        del colour
+
+    strokecolor, color = _stroke_and_fill_colors(color, border)
+
+    x1, y1, x2, y2 = min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2)
+    return Polygon(
+        [x1, y1, x2, y1, x2, y2, x1, y2],
+        strokeColor=strokecolor,
+        fillColor=color,
+        strokewidth=0,
+        **kwargs
+    )
+
+
+def draw_cut_corner_box(
+    point1, point2, corner=0.5, color=colors.lightgreen, border=None, **kwargs
+):
+    """Draw a box with the corners cut off."""
+    x1, y1 = point1
+    x2, y2 = point2
+
+    if not corner:
+        return draw_box(point1, point2, color, border)
+    elif corner < 0:
+        raise ValueError("Arrow head length ratio should be positive")
+
+    strokecolor, color = _stroke_and_fill_colors(color, border)
+
+    boxheight = y2 - y1
+    boxwidth = x2 - x1
+    x_corner = min(boxheight * 0.5 * corner, boxwidth * 0.5)
+    y_corner = min(boxheight * 0.5 * corner, boxheight * 0.5)
+
+    points = [
+        x1,
+        y1 + y_corner,
+        x1,
+        y2 - y_corner,
+        x1 + x_corner,
+        y2,
+        x2 - x_corner,
+        y2,
+        x2,
+        y2 - y_corner,
+        x2,
+        y1 + y_corner,
+        x2 - x_corner,
+        y1,
+        x1 + x_corner,
+        y1,
+    ]
+    return Polygon(
+        deduplicate(points),
+        strokeColor=strokecolor,
+        strokeWidth=1,
+        strokeLineJoin=1,  # 1=round
+        fillColor=color,
+        **kwargs
+    )
+
+
+def draw_polygon(
+    list_of_points, color=colors.lightgreen, border=None, colour=None, **kwargs
+):
+    """Draw polygon.
+
+    Arguments:
+     - list_of_point - list of (x,y) tuples for the corner coordinates
+     - color / colour - The color for the box
+
+    Returns a closed path object, beginning at (x1,y1) going round
+    the four points in order, and filling with the passed colour.
+
+    """
+    # Let the UK spelling (colour) override the USA spelling (color)
+    if colour is not None:
+        color = colour
+        del colour
+
+    strokecolor, color = _stroke_and_fill_colors(color, border)
+
+    xy_list = []
+    for (x, y) in list_of_points:
+        xy_list.append(x)
+        xy_list.append(y)
+
+    return Polygon(
+        deduplicate(xy_list),
+        strokeColor=strokecolor,
+        fillColor=color,
+        strokewidth=0,
+        **kwargs
+    )
+
+
+def draw_arrow(
+    point1,
+    point2,
+    color=colors.lightgreen,
+    border=None,
+    shaft_height_ratio=0.4,
+    head_length_ratio=0.5,
+    orientation="right",
+    colour=None,
+    **kwargs
+):
+    """Draw an arrow.
+
+    Returns a closed path object representing an arrow enclosed by the
+    box with corners at {point1=(x1,y1), point2=(x2,y2)}, a shaft height
+    given by shaft_height_ratio (relative to box height), a head length
+    given by head_length_ratio (also relative to box height), and
+    an orientation that may be 'left' or 'right'.
+    """
+    x1, y1 = point1
+    x2, y2 = point2
+
+    if shaft_height_ratio < 0 or 1 < shaft_height_ratio:
+        raise ValueError("Arrow shaft height ratio should be in range 0 to 1")
+    if head_length_ratio < 0:
+        raise ValueError("Arrow head length ratio should be positive")
+
+    # Let the UK spelling (colour) override the USA spelling (color)
+    if colour is not None:
+        color = colour
+        del colour
+
+    strokecolor, color = _stroke_and_fill_colors(color, border)
+
+    # Depending on the orientation, we define the bottom left (x1, y1) and
+    # top right (x2, y2) coordinates differently, but still draw the box
+    # using the same relative co-ordinates:
+    xmin, ymin = min(x1, x2), min(y1, y2)
+    xmax, ymax = max(x1, x2), max(y1, y2)
+    if orientation == "right":
+        x1, x2, y1, y2 = xmin, xmax, ymin, ymax
+    elif orientation == "left":
+        x1, x2, y1, y2 = xmax, xmin, ymin, ymax
+    else:
+        raise ValueError(
+            "Invalid orientation %r, should be 'left' or 'right'" % orientation
+        )
+
+    # We define boxheight and boxwidth accordingly, and calculate the shaft
+    # height from these.  We also ensure that the maximum head length is
+    # the width of the box enclosure
+    boxheight = y2 - y1
+    boxwidth = x2 - x1
+    shaftheight = boxheight * shaft_height_ratio
+    headlength = min(abs(boxheight) * head_length_ratio, abs(boxwidth))
+    if boxwidth < 0:
+        headlength *= -1  # reverse it
+
+    shafttop = 0.5 * (boxheight + shaftheight)
+    shaftbase = boxheight - shafttop
+    headbase = boxwidth - headlength
+    midheight = 0.5 * boxheight
+
+    points = [
+        x1,
+        y1 + shafttop,
+        x1 + headbase,
+        y1 + shafttop,
+        x1 + headbase,
+        y2,
+        x2,
+        y1 + midheight,
+        x1 + headbase,
+        y1,
+        x1 + headbase,
+        y1 + shaftbase,
+        x1,
+        y1 + shaftbase,
+    ]
+
+    return Polygon(
+        deduplicate(points),
+        strokeColor=strokecolor,
+        # strokeWidth=max(1, int(boxheight/40.)),
+        strokeWidth=1,
+        # default is mitre/miter which can stick out too much:
+        strokeLineJoin=1,  # 1=round
+        fillColor=color,
+        **kwargs
+    )
+
+
+def deduplicate(points):
+    """Remove adjacent duplicate points.
+
+    This is important for use with the Polygon class since reportlab has a
+    bug with duplicate points.
+
+    Arguments:
+     - points - list of points [x1, y1, x2, y2,...]
+
+    Returns a list in the same format with consecutive duplicates removed
+    """
+    assert len(points) % 2 == 0
+    if len(points) < 2:
+        return points
+    newpoints = points[0:2]
+    for x, y in zip(islice(points, 2, None, 2), islice(points, 3, None, 2)):
+        if x != newpoints[-2] or y != newpoints[-1]:
+            newpoints.append(x)
+            newpoints.append(y)
+    return newpoints
+
+
+def angle2trig(theta):
+    """Convert angle to a reportlab ready tuple.
+
+    Arguments:
+     - theta -  Angle in degrees, counter clockwise from horizontal
+
+    Returns a representation of the passed angle in a format suitable
+    for ReportLab rotations (i.e. cos(theta), sin(theta), -sin(theta),
+    cos(theta) tuple)
+    """
+    c = cos(theta * pi / 180)
+    s = sin(theta * pi / 180)
+    return (c, s, -s, c)  # Vector for rotating point around an origin
+
+
+def intermediate_points(start, end, graph_data):
+    """Generate intermediate points describing provided graph data..
+
+    Returns a list of (start, end, value) tuples describing the passed
+    graph data as 'bins' between position midpoints.
+    """
+    newdata = []  # data in form (X0, X1, val)
+    # add first block
+    newdata.append(
+        (
+            start,
+            graph_data[0][0] + (graph_data[1][0] - graph_data[0][0]) / 2.0,
+            graph_data[0][1],
+        )
+    )
+    # add middle set
+    for index in range(1, len(graph_data) - 1):
+        lastxval, lastyval = graph_data[index - 1]
+        xval, yval = graph_data[index]
+        nextxval, nextyval = graph_data[index + 1]
+        newdata.append(
+            (lastxval + (xval - lastxval) / 2.0, xval + (nextxval - xval) / 2.0, yval)
+        )
+    # add last block
+    newdata.append((xval + (nextxval - xval) / 2.0, end, graph_data[-1][1]))
+    return newdata
+
+
+################################################################################
+# CLASSES
+################################################################################
+
+
+class AbstractDrawer:
+    """Abstract Drawer.
+
+    Attributes:
+     - tracklines    Boolean for whether to draw lines delineating tracks
+     - pagesize      Tuple describing the size of the page in pixels
+     - x0            Float X co-ord for leftmost point of drawable area
+     - xlim          Float X co-ord for rightmost point of drawable area
+     - y0            Float Y co-ord for lowest point of drawable area
+     - ylim          Float Y co-ord for topmost point of drawable area
+     - pagewidth     Float pixel width of drawable area
+     - pageheight    Float pixel height of drawable area
+     - xcenter       Float X co-ord of center of drawable area
+     - ycenter       Float Y co-ord of center of drawable area
+     - start         Int, base to start drawing from
+     - end           Int, base to stop drawing at
+     - length        Size of sequence to be drawn
+     - cross_track_links List of tuples each with four entries (track A,
+       feature A, track B, feature B) to be linked.
+
+    """
+
+    def __init__(
+        self,
+        parent,
+        pagesize="A3",
+        orientation="landscape",
+        x=0.05,
+        y=0.05,
+        xl=None,
+        xr=None,
+        yt=None,
+        yb=None,
+        start=None,
+        end=None,
+        tracklines=0,
+        cross_track_links=None,
+    ):
+        """Create the object.
+
+        Arguments:
+         - parent    Diagram object containing the data that the drawer draws
+         - pagesize  String describing the ISO size of the image, or a tuple
+           of pixels
+         - orientation   String describing the required orientation of the
+           final drawing ('landscape' or 'portrait')
+         - x         Float (0->1) describing the relative size of the X
+           margins to the page
+         - y         Float (0->1) describing the relative size of the Y
+           margins to the page
+         - xl        Float (0->1) describing the relative size of the left X
+           margin to the page (overrides x)
+         - xr        Float (0->1) describing the relative size of the right X
+           margin to the page (overrides x)
+         - yt        Float (0->1) describing the relative size of the top Y
+           margin to the page (overrides y)
+         - yb        Float (0->1) describing the relative size of the lower Y
+           margin to the page (overrides y)
+         - start     Int, the position to begin drawing the diagram at
+         - end       Int, the position to stop drawing the diagram at
+         - tracklines    Boolean flag to show (or not) lines delineating tracks
+           on the diagram
+         - cross_track_links List of tuples each with four entries (track A,
+           feature A, track B, feature B) to be linked.
+
+        """
+        self._parent = parent  # The calling Diagram object
+
+        # Perform 'administrative' tasks of setting up the page
+        self.set_page_size(pagesize, orientation)  # Set drawing size
+        self.set_margins(x, y, xl, xr, yt, yb)  # Set page margins
+        self.set_bounds(start, end)  # Set limits on what will be drawn
+        self.tracklines = tracklines  # Set flags
+        if cross_track_links is None:
+            cross_track_links = []
+        else:
+            self.cross_track_links = cross_track_links
+
+    def set_page_size(self, pagesize, orientation):
+        """Set page size of the drawing..
+
+        Arguments:
+         - pagesize      Size of the output image, a tuple of pixels (width,
+           height, or a string in the reportlab.lib.pagesizes
+           set of ISO sizes.
+         - orientation   String: 'landscape' or 'portrait'
+
+        """
+        if isinstance(pagesize, str):  # A string, so translate
+            pagesize = page_sizes(pagesize)
+        elif isinstance(pagesize, tuple):  # A tuple, so don't translate
+            pass
+        else:
+            raise ValueError("Page size %s not recognised" % pagesize)
+        shortside, longside = min(pagesize), max(pagesize)
+
+        orientation = orientation.lower()
+        if orientation not in ("landscape", "portrait"):
+            raise ValueError("Orientation %s not recognised" % orientation)
+        if orientation == "landscape":
+            self.pagesize = (longside, shortside)
+        else:
+            self.pagesize = (shortside, longside)
+
+    def set_margins(self, x, y, xl, xr, yt, yb):
+        """Set page margins.
+
+        Arguments:
+         - x         Float(0->1), Absolute X margin as % of page
+         - y         Float(0->1), Absolute Y margin as % of page
+         - xl        Float(0->1), Left X margin as % of page
+         - xr        Float(0->1), Right X margin as % of page
+         - yt        Float(0->1), Top Y margin as % of page
+         - yb        Float(0->1), Bottom Y margin as % of page
+
+        Set the page margins as proportions of the page 0->1, and also
+        set the page limits x0, y0 and xlim, ylim, and page center
+        xorigin, yorigin, as well as overall page width and height
+        """
+        # Set left, right, top and bottom margins
+        xmargin_l = xl or x
+        xmargin_r = xr or x
+        ymargin_top = yt or y
+        ymargin_btm = yb or y
+
+        # Set page limits, center and height/width
+        self.x0, self.y0 = self.pagesize[0] * xmargin_l, self.pagesize[1] * ymargin_btm
+        self.xlim, self.ylim = (
+            self.pagesize[0] * (1 - xmargin_r),
+            self.pagesize[1] * (1 - ymargin_top),
+        )
+        self.pagewidth = self.xlim - self.x0
+        self.pageheight = self.ylim - self.y0
+        self.xcenter, self.ycenter = (
+            self.x0 + self.pagewidth / 2.0,
+            self.y0 + self.pageheight / 2.0,
+        )
+
+    def set_bounds(self, start, end):
+        """Set start and end points for the drawing as a whole.
+
+        Arguments:
+         - start - The first base (or feature mark) to draw from
+         - end - The last base (or feature mark) to draw to
+
+        """
+        low, high = self._parent.range()  # Extent of tracks
+
+        if start is not None and end is not None and start > end:
+            start, end = end, start
+
+        if start is None or start < 0:  # Check validity of passed args and
+            start = 0  # default to 0
+        if end is None or end < 0:
+            end = high + 1  # default to track range top limit
+
+        self.start, self.end = int(start), int(end)
+        self.length = self.end - self.start + 1
+
+    def is_in_bounds(self, value):
+        """Check if given value is within the region selected for drawing.
+
+        Arguments:
+         - value - A base position
+
+        """
+        if value >= self.start and value <= self.end:
+            return 1
+        return 0
+
+    def __len__(self):
+        """Return the length of the region to be drawn."""
+        return self.length
+
+    def _current_track_start_end(self):
+        track = self._parent[self.current_track_level]
+        if track.start is None:
+            start = self.start
+        else:
+            start = max(self.start, track.start)
+        if track.end is None:
+            end = self.end
+        else:
+            end = min(self.end, track.end)
+        return start, end
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/_CircularDrawer.py b/code/lib/Bio/Graphics/GenomeDiagram/_CircularDrawer.py
new file mode 100644
index 0000000..b090fd9
--- /dev/null
+++ b/code/lib/Bio/Graphics/GenomeDiagram/_CircularDrawer.py
@@ -0,0 +1,1725 @@
+# Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
+# Revisions copyright 2008-2017 by Peter Cock.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Contact:       Leighton Pritchard, The James Hutton Institute,
+#                Invergowrie, Dundee, Scotland, DD2 5DA, UK
+#                Leighton.Pritchard@hutton.ac.uk
+################################################################################
+
+"""CircularDrawer module for GenomeDiagram."""
+
+# ReportLab imports
+
+from reportlab.graphics.shapes import Drawing, String, Group, Line, Circle, Polygon
+from reportlab.lib import colors
+from reportlab.graphics.shapes import ArcPath
+
+# GenomeDiagram imports
+from ._AbstractDrawer import AbstractDrawer, draw_polygon, intermediate_points
+from ._AbstractDrawer import _stroke_and_fill_colors
+from ._FeatureSet import FeatureSet
+from ._GraphSet import GraphSet
+
+from math import pi, cos, sin
+
+
+class CircularDrawer(AbstractDrawer):
+    """Object for drawing circular diagrams.
+
+    Attributes:
+     - tracklines    Boolean for whether to draw lines dilineating tracks
+     - pagesize      Tuple describing the size of the page in pixels
+     - x0            Float X co-ord for leftmost point of drawable area
+     - xlim          Float X co-ord for rightmost point of drawable area
+     - y0            Float Y co-ord for lowest point of drawable area
+     - ylim          Float Y co-ord for topmost point of drawable area
+     - pagewidth     Float pixel width of drawable area
+     - pageheight    Float pixel height of drawable area
+     - xcenter       Float X co-ord of center of drawable area
+     - ycenter       Float Y co-ord of center of drawable area
+     - start         Int, base to start drawing from
+     - end           Int, base to stop drawing at
+     - length        Size of sequence to be drawn
+     - track_size    Float (0->1) the proportion of the track height to draw in
+     - drawing       Drawing canvas
+     - drawn_tracks  List of ints denoting which tracks are to be drawn
+     - current_track_level   Int denoting which track is currently being drawn
+     - track_offsets     Dictionary of number of pixels that each track top,
+       center and bottom is offset from the base of a fragment, keyed by track
+     - sweep     Float (0->1) the proportion of the circle circumference to
+       use for the diagram
+     - cross_track_links List of tuples each with four entries (track A,
+       feature A, track B, feature B) to be linked.
+
+    """
+
+    def __init__(
+        self,
+        parent=None,
+        pagesize="A3",
+        orientation="landscape",
+        x=0.05,
+        y=0.05,
+        xl=None,
+        xr=None,
+        yt=None,
+        yb=None,
+        start=None,
+        end=None,
+        tracklines=0,
+        track_size=0.75,
+        circular=1,
+        circle_core=0.0,
+        cross_track_links=None,
+    ):
+        """Create CircularDrawer object.
+
+        Arguments:
+         - parent    Diagram object containing the data that the drawer
+           draws
+         - pagesize  String describing the ISO size of the image, or a tuple
+           of pixels
+         - orientation   String describing the required orientation of the
+           final drawing ('landscape' or 'portrait')
+         - x         Float (0->1) describing the relative size of the X
+           margins to the page
+         - y         Float (0->1) describing the relative size of the Y
+           margins to the page
+         - xl        Float (0->1) describing the relative size of the left X
+           margin to the page (overrides x)
+         - xl        Float (0->1) describing the relative size of the left X
+           margin to the page (overrides x)
+         - xr        Float (0->1) describing the relative size of the right X
+           margin to the page (overrides x)
+         - yt        Float (0->1) describing the relative size of the top Y
+           margin to the page (overrides y)
+         - yb        Float (0->1) describing the relative size of the lower Y
+           margin to the page (overrides y)
+         - start     Int, the position to begin drawing the diagram at
+         - end       Int, the position to stop drawing the diagram at
+         - tracklines    Boolean flag to show (or not) lines delineating tracks
+           on the diagram
+         - track_size    The proportion of the available track height that
+           should be taken up in drawing
+         - circular      Boolean flaw to show whether the passed sequence is
+           circular or not
+         - circle_core   The proportion of the available radius to leave
+           empty at the center of a circular diagram (0 to 1).
+         - cross_track_links List of tuples each with four entries (track A,
+           feature A, track B, feature B) to be linked.
+
+        """
+        # Use the superclass' instantiation method
+        AbstractDrawer.__init__(
+            self,
+            parent,
+            pagesize,
+            orientation,
+            x,
+            y,
+            xl,
+            xr,
+            yt,
+            yb,
+            start,
+            end,
+            tracklines,
+            cross_track_links,
+        )
+
+        # Useful measurements on the page
+        self.track_size = track_size
+        self.circle_core = circle_core
+        # Determine proportion of circumference around which information will be drawn
+        if not circular:
+            self.sweep = 0.9
+        else:
+            self.sweep = 1.0
+
+    def set_track_heights(self):
+        """Initialize track heights.
+
+        Since tracks may not be of identical heights, the bottom and top
+        radius for each track is stored in a dictionary - self.track_radii,
+        keyed by track number
+        """
+        bot_track = min(min(self.drawn_tracks), 1)
+        top_track = max(self.drawn_tracks)  # The 'highest' track to draw
+
+        trackunit_sum = 0  # Total number of 'units' taken up by all tracks
+        trackunits = {}  # Start and & units for each track keyed by track number
+        heightholder = 0  # placeholder variable
+        for track in range(bot_track, top_track + 1):  # track numbers to 'draw'
+            try:
+                trackheight = self._parent[track].height  # Get track height
+            except Exception:  # TODO: ValueError? IndexError?
+                trackheight = 1
+            trackunit_sum += trackheight  # increment total track unit height
+            trackunits[track] = (heightholder, heightholder + trackheight)
+            heightholder += trackheight  # move to next height
+
+        max_radius = 0.5 * min(self.pagewidth, self.pageheight)
+        trackunit_height = max_radius * (1 - self.circle_core) / trackunit_sum
+        track_core = max_radius * self.circle_core
+
+        # Calculate top and bottom radii for each track
+        self.track_radii = {}  # The inner, outer and center radii for each track
+        track_crop = (
+            trackunit_height * (1 - self.track_size) / 2.0
+        )  # 'step back' in pixels
+        for track in trackunits:
+            top = trackunits[track][1] * trackunit_height - track_crop + track_core
+            btm = trackunits[track][0] * trackunit_height + track_crop + track_core
+            ctr = btm + (top - btm) / 2.0
+            self.track_radii[track] = (btm, ctr, top)
+
+    def draw(self):
+        """Draw a circular diagram of the stored data."""
+        # Instantiate the drawing canvas
+        self.drawing = Drawing(self.pagesize[0], self.pagesize[1])
+
+        feature_elements = []  # holds feature elements
+        feature_labels = []  # holds feature labels
+        greytrack_bgs = []  # holds track background
+        greytrack_labels = []  # holds track foreground labels
+        scale_axes = []  # holds scale axes
+        scale_labels = []  # holds scale axis labels
+
+        # Get tracks to be drawn and set track sizes
+        self.drawn_tracks = self._parent.get_drawn_levels()
+        self.set_track_heights()
+
+        # Go through each track in the parent (if it is to be drawn) one by
+        # one and collate the data as drawing elements
+        for track_level in self._parent.get_drawn_levels():
+            self.current_track_level = track_level
+            track = self._parent[track_level]
+            gbgs, glabels = self.draw_greytrack(track)  # Greytracks
+            greytrack_bgs.append(gbgs)
+            greytrack_labels.append(glabels)
+            features, flabels = self.draw_track(track)  # Features and graphs
+            feature_elements.append(features)
+            feature_labels.append(flabels)
+            if track.scale:
+                axes, slabels = self.draw_scale(track)  # Scale axes
+                scale_axes.append(axes)
+                scale_labels.append(slabels)
+
+        feature_cross_links = []
+        for cross_link_obj in self.cross_track_links:
+            cross_link_elements = self.draw_cross_link(cross_link_obj)
+            if cross_link_elements:
+                feature_cross_links.append(cross_link_elements)
+
+        # Groups listed in order of addition to page (from back to front)
+        # Draw track backgrounds
+        # Draw feature cross track links
+        # Draw features and graphs
+        # Draw scale axes
+        # Draw scale labels
+        # Draw feature labels
+        # Draw track labels
+        element_groups = [
+            greytrack_bgs,
+            feature_cross_links,
+            feature_elements,
+            scale_axes,
+            scale_labels,
+            feature_labels,
+            greytrack_labels,
+        ]
+        for element_group in element_groups:
+            for element_list in element_group:
+                [self.drawing.add(element) for element in element_list]
+
+        if self.tracklines:
+            # Draw test tracks over top of diagram
+            self.draw_test_tracks()
+
+    def draw_track(self, track):
+        """Return list of track elements and list of track labels."""
+        track_elements = []  # Holds elements for features and graphs
+        track_labels = []  # Holds labels for features and graphs
+
+        # Distribution dictionary for dealing with different set types
+        set_methods = {FeatureSet: self.draw_feature_set, GraphSet: self.draw_graph_set}
+
+        for set in track.get_sets():  # Draw the feature or graph sets
+            elements, labels = set_methods[set.__class__](set)
+            track_elements += elements
+            track_labels += labels
+        return track_elements, track_labels
+
+    def draw_feature_set(self, set):
+        """Return list of feature elements and list of labels for them."""
+        # print('draw feature set')
+        feature_elements = []  # Holds diagram elements belonging to the features
+        label_elements = []  # Holds diagram elements belonging to feature labels
+
+        # Collect all the elements for the feature set
+        for feature in set.get_features():
+            if self.is_in_bounds(feature.start) or self.is_in_bounds(feature.end):
+                features, labels = self.draw_feature(feature)
+                feature_elements += features
+                label_elements += labels
+
+        return feature_elements, label_elements
+
+    def draw_feature(self, feature):
+        """Return list of feature elements and list of labels for them."""
+        feature_elements = []  # Holds drawable elements for a single feature
+        label_elements = []  # Holds labels for a single feature
+
+        if feature.hide:  # Don't show feature: return early
+            return feature_elements, label_elements
+
+        start, end = self._current_track_start_end()
+        # A single feature may be split into subfeatures, so loop over them
+        for locstart, locend in feature.locations:
+            if locend < start:
+                continue
+            locstart = max(locstart, start)
+            if end < locstart:
+                continue
+            locend = min(locend, end)
+            # Get sigil for the feature/ each subfeature
+            feature_sigil, label = self.get_feature_sigil(feature, locstart, locend)
+            feature_elements.append(feature_sigil)
+            if label is not None:  # If there's a label
+                label_elements.append(label)
+
+        return feature_elements, label_elements
+
+    def get_feature_sigil(self, feature, locstart, locend, **kwargs):
+        """Return graphics for feature, and any required label for it.
+
+        Arguments:
+         - feature       Feature object
+         - locstart      The start position of the feature
+         - locend        The end position of the feature
+
+        """
+        # Establish the co-ordinates for the sigil
+        btm, ctr, top = self.track_radii[self.current_track_level]
+
+        startangle, startcos, startsin = self.canvas_angle(locstart)
+        endangle, endcos, endsin = self.canvas_angle(locend)
+        midangle, midcos, midsin = self.canvas_angle(float(locend + locstart) / 2)
+
+        # Distribution dictionary for various ways of drawing the feature
+        # Each method takes the inner and outer radii, the start and end angle
+        # subtended at the diagram center, and the color as arguments
+        draw_methods = {
+            "BOX": self._draw_sigil_box,
+            "OCTO": self._draw_sigil_cut_corner_box,
+            "JAGGY": self._draw_sigil_jaggy,
+            "ARROW": self._draw_sigil_arrow,
+            "BIGARROW": self._draw_sigil_big_arrow,
+        }
+
+        # Get sigil for the feature, location dependent on the feature strand
+        method = draw_methods[feature.sigil]
+        kwargs["head_length_ratio"] = feature.arrowhead_length
+        kwargs["shaft_height_ratio"] = feature.arrowshaft_height
+
+        # Support for clickable links... needs ReportLab 2.4 or later
+        # which added support for links in SVG output.
+        if hasattr(feature, "url"):
+            kwargs["hrefURL"] = feature.url
+            kwargs["hrefTitle"] = feature.name
+
+        sigil = method(
+            btm,
+            ctr,
+            top,
+            startangle,
+            endangle,
+            feature.strand,
+            color=feature.color,
+            border=feature.border,
+            **kwargs
+        )
+
+        if feature.label:  # Feature needs a label
+            # The spaces are a hack to force a little space between the label
+            # and the edge of the feature
+            label = String(
+                0,
+                0,
+                " %s " % feature.name.strip(),
+                fontName=feature.label_font,
+                fontSize=feature.label_size,
+                fillColor=feature.label_color,
+            )
+            labelgroup = Group(label)
+            if feature.label_strand:
+                strand = feature.label_strand
+            else:
+                strand = feature.strand
+            if feature.label_position in ("start", "5'", "left"):
+                # Position the label at the feature's start
+                if strand != -1:
+                    label_angle = startangle + 0.5 * pi  # Make text radial
+                    sinval, cosval = startsin, startcos
+                else:
+                    label_angle = endangle + 0.5 * pi  # Make text radial
+                    sinval, cosval = endsin, endcos
+            elif feature.label_position in ("middle", "center", "centre"):
+                # Position the label at the feature's midpoint
+                label_angle = midangle + 0.5 * pi  # Make text radial
+                sinval, cosval = midsin, midcos
+            elif feature.label_position in ("end", "3'", "right"):
+                # Position the label at the feature's end
+                if strand != -1:
+                    label_angle = endangle + 0.5 * pi  # Make text radial
+                    sinval, cosval = endsin, endcos
+                else:
+                    label_angle = startangle + 0.5 * pi  # Make text radial
+                    sinval, cosval = startsin, startcos
+            elif startangle < pi:
+                # Default to placing the label the bottom of the feature
+                # as drawn on the page, meaning feature end on left half
+                label_angle = endangle + 0.5 * pi  # Make text radial
+                sinval, cosval = endsin, endcos
+            else:
+                # Default to placing the label on the bottom of the feature,
+                # which means the feature end when on right hand half
+                label_angle = startangle + 0.5 * pi  # Make text radial
+                sinval, cosval = startsin, startcos
+            if strand != -1:
+                # Feature label on top
+                radius = top
+                if startangle < pi:  # Turn text round
+                    label_angle -= pi
+                else:
+                    labelgroup.contents[0].textAnchor = "end"
+            else:
+                # Feature label on bottom
+                radius = btm
+                if startangle < pi:  # Turn text round and anchor end
+                    label_angle -= pi
+                    labelgroup.contents[0].textAnchor = "end"
+            x_pos = self.xcenter + radius * sinval
+            y_pos = self.ycenter + radius * cosval
+            coslabel = cos(label_angle)
+            sinlabel = sin(label_angle)
+            labelgroup.transform = (
+                coslabel,
+                -sinlabel,
+                sinlabel,
+                coslabel,
+                x_pos,
+                y_pos,
+            )
+        else:
+            # No label required
+            labelgroup = None
+        # if locstart > locend:
+        #    print(locstart, locend, feature.strand, sigil, feature.name)
+        # print(locstart, locend, feature.name)
+        return sigil, labelgroup
+
+    def draw_cross_link(self, cross_link):
+        """Draw a cross-link between features."""
+        startA = cross_link.startA
+        startB = cross_link.startB
+        endA = cross_link.endA
+        endB = cross_link.endB
+
+        if not self.is_in_bounds(startA) and not self.is_in_bounds(endA):
+            return None
+        if not self.is_in_bounds(startB) and not self.is_in_bounds(endB):
+            return None
+
+        if startA < self.start:
+            startA = self.start
+        if startB < self.start:
+            startB = self.start
+        if self.end < endA:
+            endA = self.end
+        if self.end < endB:
+            endB = self.end
+
+        trackobjA = cross_link._trackA(list(self._parent.tracks.values()))
+        trackobjB = cross_link._trackB(list(self._parent.tracks.values()))
+        assert trackobjA is not None
+        assert trackobjB is not None
+        if trackobjA == trackobjB:
+            raise NotImplementedError()
+
+        if trackobjA.start is not None:
+            if endA < trackobjA.start:
+                return
+            startA = max(startA, trackobjA.start)
+        if trackobjA.end is not None:
+            if trackobjA.end < startA:
+                return
+            endA = min(endA, trackobjA.end)
+        if trackobjB.start is not None:
+            if endB < trackobjB.start:
+                return
+            startB = max(startB, trackobjB.start)
+        if trackobjB.end is not None:
+            if trackobjB.end < startB:
+                return
+            endB = min(endB, trackobjB.end)
+
+        for track_level in self._parent.get_drawn_levels():
+            track = self._parent[track_level]
+            if track == trackobjA:
+                trackA = track_level
+            if track == trackobjB:
+                trackB = track_level
+        if trackA == trackB:
+            raise NotImplementedError()
+
+        startangleA, startcosA, startsinA = self.canvas_angle(startA)
+        startangleB, startcosB, startsinB = self.canvas_angle(startB)
+        endangleA, endcosA, endsinA = self.canvas_angle(endA)
+        endangleB, endcosB, endsinB = self.canvas_angle(endB)
+
+        btmA, ctrA, topA = self.track_radii[trackA]
+        btmB, ctrB, topB = self.track_radii[trackB]
+
+        if ctrA < ctrB:
+            return [
+                self._draw_arc_poly(
+                    topA,
+                    btmB,
+                    startangleA,
+                    endangleA,
+                    startangleB,
+                    endangleB,
+                    cross_link.color,
+                    cross_link.border,
+                    cross_link.flip,
+                )
+            ]
+        else:
+            return [
+                self._draw_arc_poly(
+                    btmA,
+                    topB,
+                    startangleA,
+                    endangleA,
+                    startangleB,
+                    endangleB,
+                    cross_link.color,
+                    cross_link.border,
+                    cross_link.flip,
+                )
+            ]
+
+    def draw_graph_set(self, set):
+        """Return list of graph elements and list of their labels.
+
+        Arguments:
+         - set       GraphSet object
+
+        """
+        # print('draw graph set')
+        elements = []  # Holds graph elements
+
+        # Distribution dictionary for how to draw the graph
+        style_methods = {
+            "line": self.draw_line_graph,
+            "heat": self.draw_heat_graph,
+            "bar": self.draw_bar_graph,
+        }
+
+        for graph in set.get_graphs():
+            elements += style_methods[graph.style](graph)
+
+        return elements, []
+
+    def draw_line_graph(self, graph):
+        """Return line graph as list of drawable elements.
+
+        Arguments:
+         - graph     GraphData object
+
+        """
+        line_elements = []  # holds drawable elements
+
+        # Get graph data
+        data_quartiles = graph.quartiles()
+        minval, maxval = data_quartiles[0], data_quartiles[4]
+        btm, ctr, top = self.track_radii[self.current_track_level]
+        trackheight = 0.5 * (top - btm)
+        datarange = maxval - minval
+        if datarange == 0:
+            datarange = trackheight
+
+        start, end = self._current_track_start_end()
+        data = graph[start:end]
+
+        if not data:
+            return []
+
+        # midval is the value at which the x-axis is plotted, and is the
+        # central ring in the track
+        if graph.center is None:
+            midval = (maxval + minval) / 2.0
+        else:
+            midval = graph.center
+        # Whichever is the greatest difference: max-midval or min-midval, is
+        # taken to specify the number of pixel units resolved along the
+        # y-axis
+        resolution = max((midval - minval), (maxval - midval))
+
+        # Start from first data point
+        pos, val = data[0]
+        lastangle, lastcos, lastsin = self.canvas_angle(pos)
+        # We calculate the track height
+        posheight = trackheight * (val - midval) / resolution + ctr
+        lastx = self.xcenter + posheight * lastsin  # start xy coords
+        lasty = self.ycenter + posheight * lastcos
+        for pos, val in data:
+            posangle, poscos, possin = self.canvas_angle(pos)
+            posheight = trackheight * (val - midval) / resolution + ctr
+            x = self.xcenter + posheight * possin  # next xy coords
+            y = self.ycenter + posheight * poscos
+            line_elements.append(
+                Line(
+                    lastx,
+                    lasty,
+                    x,
+                    y,
+                    strokeColor=graph.poscolor,
+                    strokeWidth=graph.linewidth,
+                )
+            )
+            lastx, lasty, = x, y
+        return line_elements
+
+    def draw_bar_graph(self, graph):
+        """Return list of drawable elements for a bar graph.
+
+        Arguments:
+         - graph     Graph object
+
+        """
+        # At each point contained in the graph data, we draw a vertical bar
+        # from the track center to the height of the datapoint value (positive
+        # values go up in one color, negative go down in the alternative
+        # color).
+        bar_elements = []
+
+        # Set the number of pixels per unit for the data
+        data_quartiles = graph.quartiles()
+        minval, maxval = data_quartiles[0], data_quartiles[4]
+        btm, ctr, top = self.track_radii[self.current_track_level]
+        trackheight = 0.5 * (top - btm)
+        datarange = maxval - minval
+        if datarange == 0:
+            datarange = trackheight
+        data = graph[self.start : self.end]
+        # midval is the value at which the x-axis is plotted, and is the
+        # central ring in the track
+        if graph.center is None:
+            midval = (maxval + minval) / 2.0
+        else:
+            midval = graph.center
+
+        # Convert data into 'binned' blocks, covering half the distance to the
+        # next data point on either side, accounting for the ends of fragments
+        # and tracks
+        start, end = self._current_track_start_end()
+        data = intermediate_points(start, end, graph[start:end])
+
+        if not data:
+            return []
+
+        # Whichever is the greatest difference: max-midval or min-midval, is
+        # taken to specify the number of pixel units resolved along the
+        # y-axis
+        resolution = max((midval - minval), (maxval - midval))
+        if resolution == 0:
+            resolution = trackheight
+
+        # Create elements for the bar graph based on newdata
+        for pos0, pos1, val in data:
+            pos0angle, pos0cos, pos0sin = self.canvas_angle(pos0)
+            pos1angle, pos1cos, pos1sin = self.canvas_angle(pos1)
+
+            barval = trackheight * (val - midval) / resolution
+            if barval >= 0:
+                barcolor = graph.poscolor
+            else:
+                barcolor = graph.negcolor
+
+            # Draw bar
+            bar_elements.append(
+                self._draw_arc(ctr, ctr + barval, pos0angle, pos1angle, barcolor)
+            )
+        return bar_elements
+
+    def draw_heat_graph(self, graph):
+        """Return list of drawable elements for the heat graph.
+
+        Arguments:
+         - graph     Graph object
+
+        """
+        # At each point contained in the graph data, we draw a box that is the
+        # full height of the track, extending from the midpoint between the
+        # previous and current data points to the midpoint between the current
+        # and next data points
+        heat_elements = []  # holds drawable elements
+
+        # Get graph data
+        data_quartiles = graph.quartiles()
+        minval, maxval = data_quartiles[0], data_quartiles[4]
+        midval = (maxval + minval) / 2.0  # mid is the value at the X-axis
+        btm, ctr, top = self.track_radii[self.current_track_level]
+        trackheight = top - btm
+
+        start, end = self._current_track_start_end()
+        data = intermediate_points(start, end, graph[start:end])
+
+        # Create elements on the graph, indicating a large positive value by
+        # the graph's poscolor, and a large negative value by the graph's
+        # negcolor attributes
+        for pos0, pos1, val in data:
+            pos0angle, pos0cos, pos0sin = self.canvas_angle(pos0)
+            pos1angle, pos1cos, pos1sin = self.canvas_angle(pos1)
+
+            # Calculate the heat color, based on the differential between
+            # the value and the median value
+            heat = colors.linearlyInterpolatedColor(
+                graph.poscolor, graph.negcolor, maxval, minval, val
+            )
+
+            # Draw heat box
+            heat_elements.append(
+                self._draw_arc(btm, top, pos0angle, pos1angle, heat, border=heat)
+            )
+        return heat_elements
+
+    def draw_scale(self, track):
+        """Return list of elements in the scale and list of their labels.
+
+        Arguments:
+         - track     Track object
+
+        """
+        scale_elements = []  # holds axes and ticks
+        scale_labels = []  # holds labels
+
+        if not track.scale:
+            # no scale required, exit early
+            return [], []
+
+        # Get track locations
+        btm, ctr, top = self.track_radii[self.current_track_level]
+        trackheight = top - ctr
+
+        # X-axis
+        start, end = self._current_track_start_end()
+        if track.start is not None or track.end is not None:
+            # Draw an arc, leaving out the wedge
+            p = ArcPath(strokeColor=track.scale_color, fillColor=None)
+            startangle, startcos, startsin = self.canvas_angle(start)
+            endangle, endcos, endsin = self.canvas_angle(end)
+            p.addArc(
+                self.xcenter,
+                self.ycenter,
+                ctr,
+                90 - (endangle * 180 / pi),
+                90 - (startangle * 180 / pi),
+            )
+            scale_elements.append(p)
+            del p
+            # Y-axis start marker
+            x0, y0 = self.xcenter + btm * startsin, self.ycenter + btm * startcos
+            x1, y1 = self.xcenter + top * startsin, self.ycenter + top * startcos
+            scale_elements.append(Line(x0, y0, x1, y1, strokeColor=track.scale_color))
+            # Y-axis end marker
+            x0, y0 = self.xcenter + btm * endsin, self.ycenter + btm * endcos
+            x1, y1 = self.xcenter + top * endsin, self.ycenter + top * endcos
+            scale_elements.append(Line(x0, y0, x1, y1, strokeColor=track.scale_color))
+        elif self.sweep < 1:
+            # Draw an arc, leaving out the wedge
+            p = ArcPath(strokeColor=track.scale_color, fillColor=None)
+            # Note reportlab counts angles anti-clockwise from the horizontal
+            # (as in mathematics, e.g. complex numbers and polar coordinates)
+            # in degrees.
+            p.addArc(
+                self.xcenter,
+                self.ycenter,
+                ctr,
+                startangledegrees=90 - 360 * self.sweep,
+                endangledegrees=90,
+            )
+            scale_elements.append(p)
+            del p
+            # Y-axis start marker
+            x0, y0 = self.xcenter, self.ycenter + btm
+            x1, y1 = self.xcenter, self.ycenter + top
+            scale_elements.append(Line(x0, y0, x1, y1, strokeColor=track.scale_color))
+            # Y-axis end marker
+            alpha = 2 * pi * self.sweep
+            x0, y0 = self.xcenter + btm * sin(alpha), self.ycenter + btm * cos(alpha)
+            x1, y1 = self.xcenter + top * sin(alpha), self.ycenter + top * cos(alpha)
+            scale_elements.append(Line(x0, y0, x1, y1, strokeColor=track.scale_color))
+        else:
+            # Draw a full circle
+            scale_elements.append(
+                Circle(
+                    self.xcenter,
+                    self.ycenter,
+                    ctr,
+                    strokeColor=track.scale_color,
+                    fillColor=None,
+                )
+            )
+
+        start, end = self._current_track_start_end()
+        if track.scale_ticks:  # Ticks are required on the scale
+            # Draw large ticks
+            # I want the ticks to be consistently positioned relative to
+            # the start of the sequence (position 0), not relative to the
+            # current viewpoint (self.start and self.end)
+
+            ticklen = track.scale_largeticks * trackheight
+            tickiterval = int(track.scale_largetick_interval)
+            # Note that we could just start the list of ticks using
+            # range(0,self.end,tickinterval) and the filter out the
+            # ones before self.start - but this seems wasteful.
+            # Using tickiterval * (self.start/tickiterval) is a shortcut.
+            for tickpos in range(
+                tickiterval * (self.start // tickiterval), int(self.end), tickiterval
+            ):
+                if tickpos <= start or end <= tickpos:
+                    continue
+                tick, label = self.draw_tick(
+                    tickpos, ctr, ticklen, track, track.scale_largetick_labels
+                )
+                scale_elements.append(tick)
+                if label is not None:  # If there's a label, add it
+                    scale_labels.append(label)
+            # Draw small ticks
+            ticklen = track.scale_smallticks * trackheight
+            tickiterval = int(track.scale_smalltick_interval)
+            for tickpos in range(
+                tickiterval * (self.start // tickiterval), int(self.end), tickiterval
+            ):
+                if tickpos <= start or end <= tickpos:
+                    continue
+                tick, label = self.draw_tick(
+                    tickpos, ctr, ticklen, track, track.scale_smalltick_labels
+                )
+                scale_elements.append(tick)
+                if label is not None:  # If there's a label, add it
+                    scale_labels.append(label)
+
+        # Check to see if the track contains a graph - if it does, get the
+        # minimum and maximum values, and put them on the scale Y-axis
+        # at 60 degree intervals, ordering the labels by graph_id
+        startangle, startcos, startsin = self.canvas_angle(start)
+        endangle, endcos, endsin = self.canvas_angle(end)
+        if track.axis_labels:
+            for set in track.get_sets():
+                if set.__class__ is GraphSet:
+                    # Y-axis
+                    for n in range(7):
+                        angle = n * 1.0471975511965976
+                        if angle < startangle or endangle < angle:
+                            continue
+                        ticksin, tickcos = sin(angle), cos(angle)
+                        x0, y0 = (
+                            self.xcenter + btm * ticksin,
+                            self.ycenter + btm * tickcos,
+                        )
+                        x1, y1 = (
+                            self.xcenter + top * ticksin,
+                            self.ycenter + top * tickcos,
+                        )
+                        scale_elements.append(
+                            Line(x0, y0, x1, y1, strokeColor=track.scale_color)
+                        )
+
+                        graph_label_min = []
+                        graph_label_max = []
+                        graph_label_mid = []
+                        for graph in set.get_graphs():
+                            quartiles = graph.quartiles()
+                            minval, maxval = quartiles[0], quartiles[4]
+                            if graph.center is None:
+                                midval = (maxval + minval) / 2.0
+                                graph_label_min.append("%.3f" % minval)
+                                graph_label_max.append("%.3f" % maxval)
+                                graph_label_mid.append("%.3f" % midval)
+                            else:
+                                diff = max(
+                                    (graph.center - minval), (maxval - graph.center)
+                                )
+                                minval = graph.center - diff
+                                maxval = graph.center + diff
+                                midval = graph.center
+                                graph_label_mid.append("%.3f" % midval)
+                                graph_label_min.append("%.3f" % minval)
+                                graph_label_max.append("%.3f" % maxval)
+                        xmid, ymid = (x0 + x1) / 2.0, (y0 + y1) / 2.0
+                        for limit, x, y in [
+                            (graph_label_min, x0, y0),
+                            (graph_label_max, x1, y1),
+                            (graph_label_mid, xmid, ymid),
+                        ]:
+                            label = String(
+                                0,
+                                0,
+                                ";".join(limit),
+                                fontName=track.scale_font,
+                                fontSize=track.scale_fontsize,
+                                fillColor=track.scale_color,
+                            )
+                            label.textAnchor = "middle"
+                            labelgroup = Group(label)
+                            labelgroup.transform = (
+                                tickcos,
+                                -ticksin,
+                                ticksin,
+                                tickcos,
+                                x,
+                                y,
+                            )
+                            scale_labels.append(labelgroup)
+
+        return scale_elements, scale_labels
+
+    def draw_tick(self, tickpos, ctr, ticklen, track, draw_label):
+        """Return drawing element for a tick on the scale.
+
+        Arguments:
+         - tickpos   Int, position of the tick on the sequence
+         - ctr       Float, Y co-ord of the center of the track
+         - ticklen   How long to draw the tick
+         - track     Track, the track the tick is drawn on
+         - draw_label    Boolean, write the tick label?
+
+        """
+        # Calculate tick co-ordinates
+        tickangle, tickcos, ticksin = self.canvas_angle(tickpos)
+        x0, y0 = self.xcenter + ctr * ticksin, self.ycenter + ctr * tickcos
+        x1, y1 = (
+            self.xcenter + (ctr + ticklen) * ticksin,
+            self.ycenter + (ctr + ticklen) * tickcos,
+        )
+        # Calculate height of text label so it can be offset on lower half
+        # of diagram
+        # LP: not used, as not all fonts have ascent_descent data in reportlab.pdfbase._fontdata
+        # label_offset = _fontdata.ascent_descent[track.scale_font][0]*\
+        #               track.scale_fontsize/1000.
+        tick = Line(x0, y0, x1, y1, strokeColor=track.scale_color)
+        if draw_label:
+            # Put tick position on as label
+            if track.scale_format == "SInt":
+                if tickpos >= 1000000:
+                    tickstring = str(tickpos // 1000000) + " Mbp"
+                elif tickpos >= 1000:
+                    tickstring = str(tickpos // 1000) + " Kbp"
+                else:
+                    tickstring = str(tickpos)
+            else:
+                tickstring = str(tickpos)
+            label = String(
+                0,
+                0,
+                tickstring,  # Make label string
+                fontName=track.scale_font,
+                fontSize=track.scale_fontsize,
+                fillColor=track.scale_color,
+            )
+            if tickangle > pi:
+                label.textAnchor = "end"
+            # LP: This label_offset depends on ascent_descent data, which is not available for all
+            # fonts, so has been deprecated.
+            # if 0.5*pi < tickangle < 1.5*pi:
+            #    y1 -= label_offset
+            labelgroup = Group(label)
+            labelgroup.transform = (1, 0, 0, 1, x1, y1)
+        else:
+            labelgroup = None
+        return tick, labelgroup
+
+    def draw_test_tracks(self):
+        """Draw blue test tracks with grene line down their center."""
+        # Add lines only for drawn tracks
+        for track in self.drawn_tracks:
+            btm, ctr, top = self.track_radii[track]
+            self.drawing.add(
+                Circle(
+                    self.xcenter,
+                    self.ycenter,
+                    top,
+                    strokeColor=colors.blue,
+                    fillColor=None,
+                )
+            )  # top line
+            self.drawing.add(
+                Circle(
+                    self.xcenter,
+                    self.ycenter,
+                    ctr,
+                    strokeColor=colors.green,
+                    fillColor=None,
+                )
+            )  # middle line
+            self.drawing.add(
+                Circle(
+                    self.xcenter,
+                    self.ycenter,
+                    btm,
+                    strokeColor=colors.blue,
+                    fillColor=None,
+                )
+            )  # bottom line
+
+    def draw_greytrack(self, track):
+        """Drawing element for grey background to passed Track object."""
+        greytrack_bgs = []  # Holds track backgrounds
+        greytrack_labels = []  # Holds track foreground labels
+
+        if not track.greytrack:  # No greytrack required, return early
+            return [], []
+
+        # Get track location
+        btm, ctr, top = self.track_radii[self.current_track_level]
+
+        start, end = self._current_track_start_end()
+        startangle, startcos, startsin = self.canvas_angle(start)
+        endangle, endcos, endsin = self.canvas_angle(end)
+
+        # Make background
+        if track.start is not None or track.end is not None:
+            # Draw an arc, leaving out the wedge
+            p = ArcPath(strokeColor=track.scale_color, fillColor=None)
+            greytrack_bgs.append(
+                self._draw_arc(
+                    btm, top, startangle, endangle, colors.Color(0.96, 0.96, 0.96)
+                )
+            )
+        elif self.sweep < 1:
+            # Make a partial circle, a large arc box
+            # This method assumes the correct center for us.
+            greytrack_bgs.append(
+                self._draw_arc(
+                    btm, top, 0, 2 * pi * self.sweep, colors.Color(0.96, 0.96, 0.96)
+                )
+            )
+        else:
+            # Make a full circle (using a VERY thick linewidth)
+            greytrack_bgs.append(
+                Circle(
+                    self.xcenter,
+                    self.ycenter,
+                    ctr,
+                    strokeColor=colors.Color(0.96, 0.96, 0.96),
+                    fillColor=None,
+                    strokeWidth=top - btm,
+                )
+            )
+
+        if track.greytrack_labels:
+            # Labels are required for this track
+            labelstep = self.length // track.greytrack_labels  # label interval
+            for pos in range(self.start, self.end, labelstep):
+                label = String(
+                    0,
+                    0,
+                    track.name,  # Add a new label at
+                    fontName=track.greytrack_font,  # each interval
+                    fontSize=track.greytrack_fontsize,
+                    fillColor=track.greytrack_fontcolor,
+                )
+                theta, costheta, sintheta = self.canvas_angle(pos)
+                if theta < startangle or endangle < theta:
+                    continue
+                x, y = (
+                    self.xcenter + btm * sintheta,
+                    self.ycenter + btm * costheta,
+                )  # start text halfway up marker
+                labelgroup = Group(label)
+                labelangle = (
+                    self.sweep * 2 * pi * (pos - self.start) / self.length - pi / 2
+                )
+                if theta > pi:
+                    label.textAnchor = "end"  # Anchor end of text to inner radius
+                    labelangle += pi  # and reorient it
+                cosA, sinA = cos(labelangle), sin(labelangle)
+                labelgroup.transform = (cosA, -sinA, sinA, cosA, x, y)
+                if not self.length - x <= labelstep:  # Don't overrun the circle
+                    greytrack_labels.append(labelgroup)
+
+        return greytrack_bgs, greytrack_labels
+
+    def canvas_angle(self, base):
+        """Given base-pair position, return (angle, cosine, sin) (PRIVATE)."""
+        angle = self.sweep * 2 * pi * (base - self.start) / self.length
+        return (angle, cos(angle), sin(angle))
+
+    def _draw_sigil_box(
+        self, bottom, center, top, startangle, endangle, strand, **kwargs
+    ):
+        """Draw BOX sigil (PRIVATE)."""
+        if strand == 1:
+            inner_radius = center
+            outer_radius = top
+        elif strand == -1:
+            inner_radius = bottom
+            outer_radius = center
+        else:
+            inner_radius = bottom
+            outer_radius = top
+        return self._draw_arc(
+            inner_radius, outer_radius, startangle, endangle, **kwargs
+        )
+
+    def _draw_arc(
+        self,
+        inner_radius,
+        outer_radius,
+        startangle,
+        endangle,
+        color,
+        border=None,
+        colour=None,
+        **kwargs
+    ):
+        """Return closed path describing an arc box (PRIVATE).
+
+        Arguments:
+         - inner_radius  Float distance of inside of arc from drawing center
+         - outer_radius  Float distance of outside of arc from drawing center
+         - startangle    Float angle subtended by start of arc at drawing center
+           (in radians)
+         - endangle      Float angle subtended by end of arc at drawing center
+           (in radians)
+         - color        colors.Color object for arc (overridden by backwards
+           compatible argument with UK spelling, colour).
+
+        Returns a closed path object describing an arced box corresponding to
+        the passed values.  For very small angles, a simple four sided
+        polygon is used.
+        """
+        # Let the UK spelling (colour) override the USA spelling (color)
+        if colour is not None:
+            color = colour
+
+        strokecolor, color = _stroke_and_fill_colors(color, border)
+
+        if abs(float(endangle - startangle)) > 0.01:
+            # Wide arc, must use full curves
+            p = ArcPath(strokeColor=strokecolor, fillColor=color, strokewidth=0)
+            # Note reportlab counts angles anti-clockwise from the horizontal
+            # (as in mathematics, e.g. complex numbers and polar coordinates)
+            # but we use clockwise from the vertical.  Also reportlab uses
+            # degrees, but we use radians.
+            p.addArc(
+                self.xcenter,
+                self.ycenter,
+                inner_radius,
+                90 - (endangle * 180 / pi),
+                90 - (startangle * 180 / pi),
+                moveTo=True,
+            )
+            p.addArc(
+                self.xcenter,
+                self.ycenter,
+                outer_radius,
+                90 - (endangle * 180 / pi),
+                90 - (startangle * 180 / pi),
+                reverse=True,
+            )
+            p.closePath()
+            return p
+        else:
+            # Cheat and just use a four sided polygon.
+            # Calculate trig values for angle and coordinates
+            startcos, startsin = cos(startangle), sin(startangle)
+            endcos, endsin = cos(endangle), sin(endangle)
+            x0, y0 = self.xcenter, self.ycenter  # origin of the circle
+            x1, y1 = (x0 + inner_radius * startsin, y0 + inner_radius * startcos)
+            x2, y2 = (x0 + inner_radius * endsin, y0 + inner_radius * endcos)
+            x3, y3 = (x0 + outer_radius * endsin, y0 + outer_radius * endcos)
+            x4, y4 = (x0 + outer_radius * startsin, y0 + outer_radius * startcos)
+            return draw_polygon([(x1, y1), (x2, y2), (x3, y3), (x4, y4)], color, border)
+
+    def _draw_arc_line(
+        self, path, start_radius, end_radius, start_angle, end_angle, move=False
+    ):
+        """Add a list of points to a path object (PRIVATE).
+
+        Assumes angles given are in degrees!
+
+        Represents what would be a straight line on a linear diagram.
+        """
+        x0, y0 = self.xcenter, self.ycenter  # origin of the circle
+        radius_diff = end_radius - start_radius
+        angle_diff = end_angle - start_angle
+        dx = 0.01  # heuristic
+        a = start_angle * pi / 180
+        if move:
+            path.moveTo(x0 + start_radius * cos(a), y0 + start_radius * sin(a))
+        else:
+            path.lineTo(x0 + start_radius * cos(a), y0 + start_radius * sin(a))
+        x = dx
+        if 0.01 <= abs(dx):
+            while x < 1:
+                r = start_radius + x * radius_diff
+                a = (
+                    (start_angle + x * (angle_diff)) * pi / 180
+                )  # to radians for sin/cos
+                # print(x0+r*cos(a), y0+r*sin(a))
+                path.lineTo(x0 + r * cos(a), y0 + r * sin(a))
+                x += dx
+        a = end_angle * pi / 180
+        path.lineTo(x0 + end_radius * cos(a), y0 + end_radius * sin(a))
+
+    def _draw_arc_poly(
+        self,
+        inner_radius,
+        outer_radius,
+        inner_startangle,
+        inner_endangle,
+        outer_startangle,
+        outer_endangle,
+        color,
+        border=None,
+        flip=False,
+        **kwargs
+    ):
+        """Return polygon path describing an arc."""
+        strokecolor, color = _stroke_and_fill_colors(color, border)
+
+        x0, y0 = self.xcenter, self.ycenter  # origin of the circle
+        if (
+            abs(inner_endangle - outer_startangle) > 0.01
+            or abs(outer_endangle - inner_startangle) > 0.01
+            or abs(inner_startangle - outer_startangle) > 0.01
+            or abs(outer_startangle - outer_startangle) > 0.01
+        ):
+            # Wide arc, must use full curves
+            p = ArcPath(
+                strokeColor=strokecolor,
+                fillColor=color,
+                # default is mitre/miter which can stick out too much:
+                strokeLineJoin=1,  # 1=round
+                strokewidth=0,
+            )
+            # Note reportlab counts angles anti-clockwise from the horizontal
+            # (as in mathematics, e.g. complex numbers and polar coordinates)
+            # but we use clockwise from the vertical.  Also reportlab uses
+            # degrees, but we use radians.
+            i_start = 90 - (inner_startangle * 180 / pi)
+            i_end = 90 - (inner_endangle * 180 / pi)
+            o_start = 90 - (outer_startangle * 180 / pi)
+            o_end = 90 - (outer_endangle * 180 / pi)
+            p.addArc(x0, y0, inner_radius, i_end, i_start, moveTo=True, reverse=True)
+            if flip:
+                # Flipped, join end to start,
+                self._draw_arc_line(p, inner_radius, outer_radius, i_end, o_start)
+                p.addArc(x0, y0, outer_radius, o_end, o_start, reverse=True)
+                self._draw_arc_line(p, outer_radius, inner_radius, o_end, i_start)
+            else:
+                # Not flipped, join start to start, end to end
+                self._draw_arc_line(p, inner_radius, outer_radius, i_end, o_end)
+                p.addArc(x0, y0, outer_radius, o_end, o_start, reverse=False)
+                self._draw_arc_line(p, outer_radius, inner_radius, o_start, i_start)
+            p.closePath()
+            return p
+        else:
+            # Cheat and just use a four sided polygon.
+            # Calculate trig values for angle and coordinates
+            inner_startcos, inner_startsin = (
+                cos(inner_startangle),
+                sin(inner_startangle),
+            )
+            inner_endcos, inner_endsin = cos(inner_endangle), sin(inner_endangle)
+            outer_startcos, outer_startsin = (
+                cos(outer_startangle),
+                sin(outer_startangle),
+            )
+            outer_endcos, outer_endsin = cos(outer_endangle), sin(outer_endangle)
+            x1, y1 = (
+                x0 + inner_radius * inner_startsin,
+                y0 + inner_radius * inner_startcos,
+            )
+            x2, y2 = (
+                x0 + inner_radius * inner_endsin,
+                y0 + inner_radius * inner_endcos,
+            )
+            x3, y3 = (
+                x0 + outer_radius * outer_endsin,
+                y0 + outer_radius * outer_endcos,
+            )
+            x4, y4 = (
+                x0 + outer_radius * outer_startsin,
+                y0 + outer_radius * outer_startcos,
+            )
+            return draw_polygon(
+                [(x1, y1), (x2, y2), (x3, y3), (x4, y4)],
+                color,
+                border,
+                # default is mitre/miter which can stick out too much:
+                strokeLineJoin=1,  # 1=round
+            )
+
+    def _draw_sigil_cut_corner_box(
+        self,
+        bottom,
+        center,
+        top,
+        startangle,
+        endangle,
+        strand,
+        color,
+        border=None,
+        corner=0.5,
+        **kwargs
+    ):
+        """Draw OCTO sigil, box with corners cut off (PRIVATE)."""
+        if strand == 1:
+            inner_radius = center
+            outer_radius = top
+        elif strand == -1:
+            inner_radius = bottom
+            outer_radius = center
+        else:
+            inner_radius = bottom
+            outer_radius = top
+
+        strokecolor, color = _stroke_and_fill_colors(color, border)
+
+        startangle, endangle = min(startangle, endangle), max(startangle, endangle)
+        angle = float(endangle - startangle)
+
+        middle_radius = 0.5 * (inner_radius + outer_radius)
+        boxheight = outer_radius - inner_radius
+
+        corner_len = min(0.5 * boxheight, 0.5 * boxheight * corner)
+        shaft_inner_radius = inner_radius + corner_len
+        shaft_outer_radius = outer_radius - corner_len
+
+        cornerangle_delta = max(
+            0.0, min(abs(boxheight) * 0.5 * corner / middle_radius, abs(angle * 0.5))
+        )
+        if angle < 0:
+            cornerangle_delta *= -1  # reverse it
+
+        # Calculate trig values for angle and coordinates
+        startcos, startsin = cos(startangle), sin(startangle)
+        endcos, endsin = cos(endangle), sin(endangle)
+        x0, y0 = self.xcenter, self.ycenter  # origin of the circle
+        p = ArcPath(
+            strokeColor=strokecolor,
+            fillColor=color,
+            strokeLineJoin=1,  # 1=round
+            strokewidth=0,
+            **kwargs
+        )
+        # Inner curved edge
+        p.addArc(
+            self.xcenter,
+            self.ycenter,
+            inner_radius,
+            90 - ((endangle - cornerangle_delta) * 180 / pi),
+            90 - ((startangle + cornerangle_delta) * 180 / pi),
+            moveTo=True,
+        )
+        # Corner edge - straight lines assumes small angle!
+        # TODO - Use self._draw_arc_line(p, ...) here if we expose corner setting
+        p.lineTo(x0 + shaft_inner_radius * startsin, y0 + shaft_inner_radius * startcos)
+        p.lineTo(x0 + shaft_outer_radius * startsin, y0 + shaft_outer_radius * startcos)
+        # Outer curved edge
+        p.addArc(
+            self.xcenter,
+            self.ycenter,
+            outer_radius,
+            90 - ((endangle - cornerangle_delta) * 180 / pi),
+            90 - ((startangle + cornerangle_delta) * 180 / pi),
+            reverse=True,
+        )
+        # Corner edges
+        p.lineTo(x0 + shaft_outer_radius * endsin, y0 + shaft_outer_radius * endcos)
+        p.lineTo(x0 + shaft_inner_radius * endsin, y0 + shaft_inner_radius * endcos)
+        p.closePath()
+        return p
+
+    def _draw_sigil_arrow(
+        self, bottom, center, top, startangle, endangle, strand, **kwargs
+    ):
+        """Draw ARROW sigil (PRIVATE)."""
+        if strand == 1:
+            inner_radius = center
+            outer_radius = top
+            orientation = "right"
+        elif strand == -1:
+            inner_radius = bottom
+            outer_radius = center
+            orientation = "left"
+        else:
+            inner_radius = bottom
+            outer_radius = top
+            orientation = "right"  # backwards compatibility
+        return self._draw_arc_arrow(
+            inner_radius,
+            outer_radius,
+            startangle,
+            endangle,
+            orientation=orientation,
+            **kwargs
+        )
+
+    def _draw_sigil_big_arrow(
+        self, bottom, center, top, startangle, endangle, strand, **kwargs
+    ):
+        """Draw BIGARROW sigil, like ARROW but straddles the axis (PRIVATE)."""
+        if strand == -1:
+            orientation = "left"
+        else:
+            orientation = "right"
+        return self._draw_arc_arrow(
+            bottom, top, startangle, endangle, orientation=orientation, **kwargs
+        )
+
+    def _draw_arc_arrow(
+        self,
+        inner_radius,
+        outer_radius,
+        startangle,
+        endangle,
+        color,
+        border=None,
+        shaft_height_ratio=0.4,
+        head_length_ratio=0.5,
+        orientation="right",
+        colour=None,
+        **kwargs
+    ):
+        """Draw an arrow along an arc (PRIVATE)."""
+        # Let the UK spelling (colour) override the USA spelling (color)
+        if colour is not None:
+            color = colour
+
+        strokecolor, color = _stroke_and_fill_colors(color, border)
+
+        # if orientation == 'right':
+        #    startangle, endangle = min(startangle, endangle), max(startangle, endangle)
+        # elif orientation == 'left':
+        #    startangle, endangle = max(startangle, endangle), min(startangle, endangle)
+        # else:
+        startangle, endangle = min(startangle, endangle), max(startangle, endangle)
+        if orientation != "left" and orientation != "right":
+            raise ValueError(
+                "Invalid orientation %r, should be 'left' or 'right'" % orientation
+            )
+
+        angle = float(endangle - startangle)  # angle subtended by arc
+        middle_radius = 0.5 * (inner_radius + outer_radius)
+        boxheight = outer_radius - inner_radius
+        shaft_height = boxheight * shaft_height_ratio
+        shaft_inner_radius = middle_radius - 0.5 * shaft_height
+        shaft_outer_radius = middle_radius + 0.5 * shaft_height
+        headangle_delta = max(
+            0.0, min(abs(boxheight) * head_length_ratio / middle_radius, abs(angle))
+        )
+        if angle < 0:
+            headangle_delta *= -1  # reverse it
+        if orientation == "right":
+            headangle = endangle - headangle_delta
+        else:
+            headangle = startangle + headangle_delta
+        if startangle <= endangle:
+            headangle = max(min(headangle, endangle), startangle)
+        else:
+            headangle = max(min(headangle, startangle), endangle)
+        if not (
+            startangle <= headangle <= endangle or endangle <= headangle <= startangle
+        ):
+            raise RuntimeError(
+                "Problem drawing arrow, invalid positions. "
+                "Start angle: %s, Head angle: %s, "
+                "End angle: %s, Angle: %s" % (startangle, headangle, endangle, angle)
+            )
+
+        # Calculate trig values for angle and coordinates
+        startcos, startsin = cos(startangle), sin(startangle)
+        headcos, headsin = cos(headangle), sin(headangle)
+        endcos, endsin = cos(endangle), sin(endangle)
+        x0, y0 = self.xcenter, self.ycenter  # origin of the circle
+        if 0.5 >= abs(angle) and abs(headangle_delta) >= abs(angle):
+            # If the angle is small, and the arrow is all head,
+            # cheat and just use a triangle.
+            if orientation == "right":
+                x1, y1 = (x0 + inner_radius * startsin, y0 + inner_radius * startcos)
+                x2, y2 = (x0 + outer_radius * startsin, y0 + outer_radius * startcos)
+                x3, y3 = (x0 + middle_radius * endsin, y0 + middle_radius * endcos)
+            else:
+                x1, y1 = (x0 + inner_radius * endsin, y0 + inner_radius * endcos)
+                x2, y2 = (x0 + outer_radius * endsin, y0 + outer_radius * endcos)
+                x3, y3 = (x0 + middle_radius * startsin, y0 + middle_radius * startcos)
+            # return draw_polygon([(x1,y1),(x2,y2),(x3,y3)], color, border,
+            #                    stroke_line_join=1)
+            return Polygon(
+                [x1, y1, x2, y2, x3, y3],
+                strokeColor=border or color,
+                fillColor=color,
+                strokeLineJoin=1,  # 1=round, not mitre!
+                strokewidth=0,
+            )
+        elif orientation == "right":
+            p = ArcPath(
+                strokeColor=strokecolor,
+                fillColor=color,
+                # default is mitre/miter which can stick out too much:
+                strokeLineJoin=1,  # 1=round
+                strokewidth=0,
+                **kwargs
+            )
+            # Note reportlab counts angles anti-clockwise from the horizontal
+            # (as in mathematics, e.g. complex numbers and polar coordinates)
+            # but we use clockwise from the vertical.  Also reportlab uses
+            # degrees, but we use radians.
+            p.addArc(
+                self.xcenter,
+                self.ycenter,
+                shaft_inner_radius,
+                90 - (headangle * 180 / pi),
+                90 - (startangle * 180 / pi),
+                moveTo=True,
+            )
+            p.addArc(
+                self.xcenter,
+                self.ycenter,
+                shaft_outer_radius,
+                90 - (headangle * 180 / pi),
+                90 - (startangle * 180 / pi),
+                reverse=True,
+            )
+            if abs(angle) < 0.5:
+                p.lineTo(x0 + outer_radius * headsin, y0 + outer_radius * headcos)
+                p.lineTo(x0 + middle_radius * endsin, y0 + middle_radius * endcos)
+                p.lineTo(x0 + inner_radius * headsin, y0 + inner_radius * headcos)
+            else:
+                self._draw_arc_line(
+                    p,
+                    outer_radius,
+                    middle_radius,
+                    90 - (headangle * 180 / pi),
+                    90 - (endangle * 180 / pi),
+                )
+                self._draw_arc_line(
+                    p,
+                    middle_radius,
+                    inner_radius,
+                    90 - (endangle * 180 / pi),
+                    90 - (headangle * 180 / pi),
+                )
+            p.closePath()
+            return p
+        else:
+            p = ArcPath(
+                strokeColor=strokecolor,
+                fillColor=color,
+                # default is mitre/miter which can stick out too much:
+                strokeLineJoin=1,  # 1=round
+                strokewidth=0,
+                **kwargs
+            )
+            # Note reportlab counts angles anti-clockwise from the horizontal
+            # (as in mathematics, e.g. complex numbers and polar coordinates)
+            # but we use clockwise from the vertical.  Also reportlab uses
+            # degrees, but we use radians.
+            p.addArc(
+                self.xcenter,
+                self.ycenter,
+                shaft_inner_radius,
+                90 - (endangle * 180 / pi),
+                90 - (headangle * 180 / pi),
+                moveTo=True,
+                reverse=True,
+            )
+            p.addArc(
+                self.xcenter,
+                self.ycenter,
+                shaft_outer_radius,
+                90 - (endangle * 180 / pi),
+                90 - (headangle * 180 / pi),
+                reverse=False,
+            )
+            # Note - two staight lines is only a good approximation for small
+            # head angle, in general will need to curved lines here:
+            if abs(angle) < 0.5:
+                p.lineTo(x0 + outer_radius * headsin, y0 + outer_radius * headcos)
+                p.lineTo(x0 + middle_radius * startsin, y0 + middle_radius * startcos)
+                p.lineTo(x0 + inner_radius * headsin, y0 + inner_radius * headcos)
+            else:
+                self._draw_arc_line(
+                    p,
+                    outer_radius,
+                    middle_radius,
+                    90 - (headangle * 180 / pi),
+                    90 - (startangle * 180 / pi),
+                )
+                self._draw_arc_line(
+                    p,
+                    middle_radius,
+                    inner_radius,
+                    90 - (startangle * 180 / pi),
+                    90 - (headangle * 180 / pi),
+                )
+            p.closePath()
+            return p
+
+    def _draw_sigil_jaggy(
+        self,
+        bottom,
+        center,
+        top,
+        startangle,
+        endangle,
+        strand,
+        color,
+        border=None,
+        **kwargs
+    ):
+        """Draw JAGGY sigil (PRIVATE).
+
+        Although we may in future expose the head/tail jaggy lengths, for now
+        both the left and right edges are drawn jagged.
+        """
+        if strand == 1:
+            inner_radius = center
+            outer_radius = top
+            teeth = 2
+        elif strand == -1:
+            inner_radius = bottom
+            outer_radius = center
+            teeth = 2
+        else:
+            inner_radius = bottom
+            outer_radius = top
+            teeth = 4
+
+        # TODO, expose these settings?
+        tail_length_ratio = 1.0
+        head_length_ratio = 1.0
+
+        strokecolor, color = _stroke_and_fill_colors(color, border)
+
+        startangle, endangle = min(startangle, endangle), max(startangle, endangle)
+        angle = float(endangle - startangle)  # angle subtended by arc
+        height = outer_radius - inner_radius
+
+        assert startangle <= endangle and angle >= 0
+        if head_length_ratio and tail_length_ratio:
+            headangle = max(
+                endangle
+                - min(height * head_length_ratio / (center * teeth), angle * 0.5),
+                startangle,
+            )
+            tailangle = min(
+                startangle
+                + min(height * tail_length_ratio / (center * teeth), angle * 0.5),
+                endangle,
+            )
+            # With very small features, can due to floating point calculations
+            # violate the assertion below that start <= tail <= head <= end
+            tailangle = min(tailangle, headangle)
+        elif head_length_ratio:
+            headangle = max(
+                endangle - min(height * head_length_ratio / (center * teeth), angle),
+                startangle,
+            )
+            tailangle = startangle
+        else:
+            headangle = endangle
+            tailangle = min(
+                startangle + min(height * tail_length_ratio / (center * teeth), angle),
+                endangle,
+            )
+
+        if not startangle <= tailangle <= headangle <= endangle:
+            raise RuntimeError(
+                "Problem drawing jaggy sigil, invalid "
+                "positions. Start angle: %s, "
+                "Tail angle: %s, Head angle: %s, End angle %s, "
+                "Angle: %s" % (startangle, tailangle, headangle, endangle, angle)
+            )
+
+        # Calculate trig values for angle and coordinates
+        startcos, startsin = cos(startangle), sin(startangle)
+        headcos, headsin = cos(headangle), sin(headangle)
+        endcos, endsin = cos(endangle), sin(endangle)
+        x0, y0 = self.xcenter, self.ycenter  # origin of the circle
+
+        p = ArcPath(
+            strokeColor=strokecolor,
+            fillColor=color,
+            # default is mitre/miter which can stick out too much:
+            strokeLineJoin=1,  # 1=round
+            strokewidth=0,
+            **kwargs
+        )
+        # Note reportlab counts angles anti-clockwise from the horizontal
+        # (as in mathematics, e.g. complex numbers and polar coordinates)
+        # but we use clockwise from the vertical.  Also reportlab uses
+        # degrees, but we use radians.
+        p.addArc(
+            self.xcenter,
+            self.ycenter,
+            inner_radius,
+            90 - (headangle * 180 / pi),
+            90 - (tailangle * 180 / pi),
+            moveTo=True,
+        )
+        for i in range(0, teeth):
+            p.addArc(
+                self.xcenter,
+                self.ycenter,
+                inner_radius + i * height / teeth,
+                90 - (tailangle * 180 / pi),
+                90 - (startangle * 180 / pi),
+            )
+            # Curved line needed when drawing long jaggies
+            self._draw_arc_line(
+                p,
+                inner_radius + i * height / teeth,
+                inner_radius + (i + 1) * height / teeth,
+                90 - (startangle * 180 / pi),
+                90 - (tailangle * 180 / pi),
+            )
+        p.addArc(
+            self.xcenter,
+            self.ycenter,
+            outer_radius,
+            90 - (headangle * 180 / pi),
+            90 - (tailangle * 180 / pi),
+            reverse=True,
+        )
+        for i in range(0, teeth):
+            p.addArc(
+                self.xcenter,
+                self.ycenter,
+                outer_radius - i * height / teeth,
+                90 - (endangle * 180 / pi),
+                90 - (headangle * 180 / pi),
+                reverse=True,
+            )
+            # Curved line needed when drawing long jaggies
+            self._draw_arc_line(
+                p,
+                outer_radius - i * height / teeth,
+                outer_radius - (i + 1) * height / teeth,
+                90 - (endangle * 180 / pi),
+                90 - (headangle * 180 / pi),
+            )
+        p.closePath()
+        return p
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/_Colors.py b/code/lib/Bio/Graphics/GenomeDiagram/_Colors.py
new file mode 100644
index 0000000..a37e107
--- /dev/null
+++ b/code/lib/Bio/Graphics/GenomeDiagram/_Colors.py
@@ -0,0 +1,234 @@
+# Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Contact:       Leighton Pritchard, The James Hutton Institute,
+#                Invergowrie, Dundee, Scotland, DD2 5DA, UK
+#                Leighton.Pritchard@hutton.ac.uk
+################################################################################
+
+"""Colors module.
+
+Provides:
+
+- ColorTranslator - class to convert tuples of integers and floats into
+  colors.Color objects
+
+For drawing capabilities, this module uses reportlab to define colors:
+http://www.reportlab.com
+"""
+
+# ReportLab imports
+from reportlab.lib import colors
+
+
+class ColorTranslator:
+    """Class providing methods for translating representations of color into.
+
+    Examples
+    --------
+    >>> from Bio.Graphics import GenomeDiagram
+    >>> gdct=GenomeDiagram._Colors.ColorTranslator()
+    >>> print(gdct.float1_color((0.5, 0.5, 0.5)))
+    Color(.5,.5,.5,1)
+    >>> print(gdct.int255_color((1, 75, 240)))
+    Color(.003922,.294118,.941176,1)
+    >>> print(gdct.artemis_color(7))
+    Color(1,1,0,1)
+    >>> print(gdct.scheme_color(2))
+    Color(1,0,0,1)
+    >>> gdct.get_artemis_colorscheme()
+    {0: (Color(1,1,1,1), 'pathogenicity, adaptation, chaperones'), 1: (Color(.39,.39,.39,1), 'energy metabolism'), 2: (Color(1,0,0,1), 'information transfer'), 3: (Color(0,1,0,1), 'surface'), 4: (Color(0,0,1,1), 'stable RNA'), 5: (Color(0,1,1,1), 'degradation of large molecules'), 6: (Color(1,0,1,1), 'degradation of small molecules'), 7: (Color(1,1,0,1), 'central/intermediary/miscellaneous metabolism'), 8: (Color(.6,.98,.6,1), 'unknown'), 9: (Color(.53,.81,.98,1), 'regulators'), 10: (Color(1,.65,0,1), 'conserved hypotheticals'), 11: (Color(.78,.59,.39,1), 'pseudogenes and partial genes'), 12: (Color(1,.78,.78,1), 'phage/IS elements'), 13: (Color(.7,.7,.7,1), 'some miscellaneous information'), 14: (Color(0,0,0,1), ''), 15: (Color(1,.25,.25,1), 'secondary metabolism'), 16: (Color(1,.5,.5,1), ''), 17: (Color(1,.75,.75,1), '')}
+
+    >>> print(gdct.translate((0.5, 0.5, 0.5)))
+    Color(.5,.5,.5,1)
+    >>> print(gdct.translate((1, 75, 240)))
+    Color(.003922,.294118,.941176,1)
+    >>> print(gdct.translate(7))
+    Color(1,1,0,1)
+    >>> print(gdct.translate(2))
+    Color(1,0,0,1)
+
+    """
+
+    def __init__(self, filename=None):
+        """Initialize.
+
+        Argument filename is the location of a file containing
+        colorscheme information.
+        """
+        self._artemis_colorscheme = {
+            0: (colors.Color(1, 1, 1), "pathogenicity, adaptation, chaperones"),
+            1: (colors.Color(0.39, 0.39, 0.39), "energy metabolism"),
+            2: (colors.Color(1, 0, 0), "information transfer"),
+            3: (colors.Color(0, 1, 0), "surface"),
+            4: (colors.Color(0, 0, 1), "stable RNA"),
+            5: (colors.Color(0, 1, 1), "degradation of large molecules"),
+            6: (colors.Color(1, 0, 1), "degradation of small molecules"),
+            7: (colors.Color(1, 1, 0), "central/intermediary/miscellaneous metabolism"),
+            8: (colors.Color(0.60, 0.98, 0.60), "unknown"),
+            9: (colors.Color(0.53, 0.81, 0.98), "regulators"),
+            10: (colors.Color(1, 0.65, 0), "conserved hypotheticals"),
+            11: (colors.Color(0.78, 0.59, 0.39), "pseudogenes and partial genes"),
+            12: (colors.Color(1, 0.78, 0.78), "phage/IS elements"),
+            13: (colors.Color(0.70, 0.70, 0.70), "some miscellaneous information"),
+            14: (colors.Color(0, 0, 0), ""),
+            15: (colors.Color(1, 0.25, 0.25), "secondary metabolism"),
+            16: (colors.Color(1, 0.5, 0.5), ""),
+            17: (colors.Color(1, 0.75, 0.75), ""),
+        }  # Hardwired Artemis color scheme
+        self._colorscheme = {}
+        if filename is not None:
+            self.read_colorscheme(filename)  # Imported color scheme
+        else:
+            self._colorscheme = self._artemis_colorscheme
+
+    def translate(self, color=None, colour=None):
+        """Translate a color into a ReportLab Color object.
+
+        Arguments:
+         - color - Color defined as an int, a tuple of three ints 0->255
+           or a tuple of three floats 0 -> 1, or a string giving
+           one of the named colors defined by ReportLab, or a
+           ReportLab color object (returned as is).
+         - colour - Backwards compatible alias using UK spelling (which
+           will over-ride any color argument).
+
+        Returns a colors.Color object, determined semi-intelligently
+        depending on the input values
+        """
+        # Let the UK spelling (colour) override the USA spelling (color)
+        if colour is not None:
+            color = colour
+
+        if color is None:
+            raise ValueError("Passed color (or colour) must be a valid color type")
+        elif isinstance(color, int):
+            color = self.scheme_color(color)
+        elif isinstance(color, colors.Color):
+            return color
+        elif isinstance(color, str):
+            # Assume its a named reportlab color like "red".
+            color = colors.toColor(color)
+        elif isinstance(color, tuple) and isinstance(color[0], float):
+            color = self.float1_color(color)
+        elif isinstance(color, tuple) and isinstance(color[0], int):
+            color = self.int255_color(color)
+        return color
+
+    def read_colorscheme(self, filename):
+        r"""Load colour scheme from file.
+
+        Reads information from a file containing color information and stores
+        it internally.
+
+        Argument filename is the location of a file defining colors in
+        tab-separated format plaintext as::
+
+            INT \t RED \t GREEN \t BLUE \t Comment
+
+        Where RED, GREEN and BLUE are intensities in the range 0 -> 255, e.g.::
+
+            2 \t 255 \t 0 \t 0 \t Red: Information transfer
+
+        """
+        with open(filename).readlines() as lines:
+            for line in lines:
+                data = line.strip().split("\t")
+                try:
+                    label = int(data[0])
+                    red, green, blue = int(data[1]), int(data[2]), int(data[3])
+                    if len(data) > 4:
+                        comment = data[4]
+                    else:
+                        comment = ""
+                    self._colorscheme[label] = (
+                        self.int255_color((red, green, blue)),
+                        comment,
+                    )
+                except ValueError:
+                    raise ValueError(
+                        "Expected INT \t INT \t INT \t INT \t string input"
+                    ) from None
+
+    def get_artemis_colorscheme(self):
+        """Return the Artemis color scheme as a dictionary."""
+        return self._artemis_colorscheme
+
+    def artemis_color(self, value):
+        """Artemis color (integer) to ReportLab Color object.
+
+        Arguments:
+         - value: An int representing a functional class in the Artemis
+           color scheme (see www.sanger.ac.uk for a description),
+           or a string from a GenBank feature annotation for the
+           color which may be dot delimited (in which case the
+           first value is used).
+
+        Takes an int representing a functional class in the Artemis color
+        scheme, and returns the appropriate colors.Color object
+        """
+        try:
+            value = int(value)
+        except ValueError:
+            if value.count("."):  # dot-delimited
+                value = int(value.split(".", 1)[0])  # Use only first integer
+            else:
+                raise
+        if value in self._artemis_colorscheme:
+            return self._artemis_colorscheme[value][0]
+        else:
+            raise ValueError("Artemis color out of range: %d" % value)
+
+    def get_colorscheme(self):
+        """Return the user-defined color scheme as a dictionary."""
+        return self._colorscheme
+
+    def scheme_color(self, value):
+        """Map a user-defined color integer to a ReportLab Color object.
+
+        - value: An int representing a single color in the user-defined
+          color scheme
+
+        Takes an int representing a user-defined color and returns the
+        appropriate colors.Color object.
+        """
+        if value in self._colorscheme:
+            return self._colorscheme[value][0]
+        else:
+            raise ValueError("Scheme color out of range: %d" % value)
+
+    def int255_color(self, values):
+        """Map integer (red, green, blue) tuple to a ReportLab Color object.
+
+        - values: A tuple of (red, green, blue) intensities as
+          integers in the range 0->255
+
+        Takes a tuple of (red, green, blue) intensity values in the range
+        0 -> 255 and returns an appropriate colors.Color object.
+        """
+        red, green, blue = values
+        factor = 1 / 255.0
+        red, green, blue = red * factor, green * factor, blue * factor
+        return colors.Color(red, green, blue)
+
+    def float1_color(self, values):
+        """Map float (red, green, blue) tuple to a ReportLab Color object.
+
+        - values: A tuple of (red, green, blue) intensities as floats
+          in the range 0 -> 1
+
+        Takes a tuple of (red, green, blue) intensity values in the range
+        0 -> 1 and returns an appropriate colors.Color object.
+        """
+        red, green, blue = values
+        return colors.Color(red, green, blue)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=2)
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/_CrossLink.py b/code/lib/Bio/Graphics/GenomeDiagram/_CrossLink.py
new file mode 100644
index 0000000..7958de4
--- /dev/null
+++ b/code/lib/Bio/Graphics/GenomeDiagram/_CrossLink.py
@@ -0,0 +1,100 @@
+# Copyright 2011-2017 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Genome Diagram Feature cross-link module."""
+
+from reportlab.lib import colors
+
+
+class CrossLink:
+    """Hold information for drawing a cross link between features."""
+
+    def __init__(
+        self, featureA, featureB, color=colors.lightgreen, border=None, flip=False
+    ):
+        """Create a new cross link.
+
+        Arguments featureA and featureB should GenomeDiagram feature objects,
+        or 3-tuples (track object, start, end), and currently must be on
+        different tracks.
+
+        The color and border arguments should be ReportLab colour objects, or
+        for border use a boolean False for no border, otherwise it defaults to
+        the same as the main colour.
+
+        The flip argument draws an inverted cross link, useful for showing a
+        mapping where one sequence has been reversed. It is conventional to
+        also use a different colour (e.g. red for simple links, blue for any
+        flipped links).
+        """
+        # Initialize attributes
+        self.featureA = featureA
+        self.featureB = featureB
+        self.color = color  # default color to draw the feature
+        self.border = border
+        self.flip = flip
+
+    @property
+    def startA(self):
+        """Start position of Feature A."""
+        try:
+            return self.featureA.start
+        except AttributeError:
+            track, start, end = self.featureA
+            return start
+
+    @property
+    def endA(self):
+        """End position of Feature A."""
+        try:
+            return self.featureA.end
+        except AttributeError:
+            track, start, end = self.featureA
+            return end
+
+    def _trackA(self, tracks):
+        try:
+            track, start, end = self.featureA
+            assert track in tracks
+            return track
+        except TypeError:
+            for track in tracks:
+                for feature_set in track.get_sets():
+                    if hasattr(feature_set, "features"):
+                        if self.featureA in feature_set.features.values():
+                            return track
+            return None
+
+    @property
+    def startB(self):
+        """Start position of Feature B."""
+        try:
+            return self.featureB.start
+        except AttributeError:
+            track, start, end = self.featureB
+            return start
+
+    @property
+    def endB(self):
+        """End position of Feature B."""
+        try:
+            return self.featureB.end
+        except AttributeError:
+            track, start, end = self.featureB
+            return end
+
+    def _trackB(self, tracks):
+        try:
+            track, start, end = self.featureB
+            assert track in tracks
+            return track
+        except TypeError:
+            for track in tracks:
+                for feature_set in track.get_sets():
+                    if hasattr(feature_set, "features"):
+                        if self.featureB in feature_set.features.values():
+                            return track
+            return None
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/_Diagram.py b/code/lib/Bio/Graphics/GenomeDiagram/_Diagram.py
new file mode 100644
index 0000000..fa44970
--- /dev/null
+++ b/code/lib/Bio/Graphics/GenomeDiagram/_Diagram.py
@@ -0,0 +1,411 @@
+# Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Contact:       Leighton Pritchard, The James Hutton Institute,
+#                Invergowrie, Dundee, Scotland, DD2 5DA, UK
+#                Leighton.Pritchard@hutton.ac.uk
+"""Provides a container for information concerning the tracks to be drawn in a diagram.
+
+It also provides the interface for defining the diagram (possibly split these
+functions in later version?).
+
+For drawing capabilities, this module uses reportlab to draw and write the
+diagram:
+
+http://www.reportlab.com
+
+For dealing with biological information, the package expects BioPython
+objects - namely SeqRecord objects containing SeqFeature objects.
+"""
+
+try:
+    from reportlab.graphics import renderPM
+except ImportError:
+    # This is an optional part of ReportLab, so may not be installed.
+    renderPM = None
+
+from ._LinearDrawer import LinearDrawer
+from ._CircularDrawer import CircularDrawer
+from ._Track import Track
+
+from Bio.Graphics import _write
+
+
+def _first_defined(*args):
+    """Return the first non-null argument (PRIVATE)."""
+    for arg in args:
+        if arg is not None:
+            return arg
+    return None
+
+
+class Diagram:
+    """Diagram container.
+
+    Arguments:
+     - name           - a string, identifier for the diagram.
+     - tracks         - a list of Track objects comprising the diagram.
+     - format         - a string, format of the diagram 'circular' or
+       'linear', depending on the sort of diagram required.
+     - pagesize       - a string, the pagesize of output describing the ISO
+       size of the image, or a tuple of pixels.
+     - orientation    - a string describing the required orientation of the
+       final drawing ('landscape' or 'portrait').
+     - x              - a float (0->1), the proportion of the page to take
+       up with even X margins t the page.
+     - y              - a float (0->1), the proportion of the page to take
+       up with even Y margins to the page.
+     - xl             - a float (0->1), the proportion of the page to take
+       up with the left X margin to the page (overrides x).
+     - xr             - a float (0->1), the proportion of the page to take
+       up with the right X margin to the page (overrides x).
+     - yt             - a float (0->1), the proportion of the page to take
+       up with the top Y margin to the page (overrides y).
+     - yb             - a float (0->1), the proportion of the page to take
+       up with the bottom Y margin to the page (overrides y).
+     - circle_core    - a float, the proportion of the available radius to
+       leave empty at the center of a circular diagram (0 to 1).
+     - start          - an integer, the base/aa position to start the diagram at.
+     - end            - an integer, the base/aa position to end the diagram at.
+     - tracklines     - a boolean, True if track guidelines are to be drawn.
+     - fragments      - and integer, for a linear diagram, the number of equal
+       divisions into which the sequence is divided.
+     - fragment_size  - a float (0->1), the proportion of the space
+       available to each fragment that should be used in drawing.
+     - track_size     - a float (0->1), the proportion of the space
+       available to each track that should be used in drawing with sigils.
+     - circular       - a boolean, True if the genome/sequence to be drawn
+       is, in reality, circular.
+
+    """
+
+    def __init__(
+        self,
+        name=None,
+        format="circular",
+        pagesize="A3",
+        orientation="landscape",
+        x=0.05,
+        y=0.05,
+        xl=None,
+        xr=None,
+        yt=None,
+        yb=None,
+        start=None,
+        end=None,
+        tracklines=False,
+        fragments=10,
+        fragment_size=None,
+        track_size=0.75,
+        circular=True,
+        circle_core=0.0,
+    ):
+        """Initialize.
+
+        gdd = Diagram(name=None)
+        """
+        self.tracks = {}  # Holds all Track objects, keyed by level
+        self.name = name  # Description of the diagram
+        # Diagram page setup attributes
+        self.format = format
+        self.pagesize = pagesize
+        self.orientation = orientation
+        self.x = x
+        self.y = y
+        self.xl = xl
+        self.xr = xr
+        self.yt = yt
+        self.yb = yb
+        self.start = start
+        self.end = end
+        self.tracklines = tracklines
+        self.fragments = fragments
+        if fragment_size is not None:
+            self.fragment_size = fragment_size
+        else:
+            if self.fragments == 1:
+                # For single fragments, default to full height
+                self.fragment_size = 1
+            else:
+                # Otherwise keep a 10% gap between fragments
+                self.fragment_size = 0.9
+        self.track_size = track_size
+        self.circular = circular
+        self.circle_core = circle_core
+        self.cross_track_links = []
+        self.drawing = None
+
+    def set_all_tracks(self, attr, value):
+        """Set the passed attribute of all tracks in the set to the passed value.
+
+        Arguments:
+         - attr    - An attribute of the Track class.
+         - value   - The value to set that attribute.
+
+        set_all_tracks(self, attr, value)
+        """
+        for track in self.tracks.values():
+            if hasattr(track, attr):
+                # If the feature has the attribute set it to the passed value
+                setattr(track, attr, value)
+
+    def draw(
+        self,
+        format=None,
+        pagesize=None,
+        orientation=None,
+        x=None,
+        y=None,
+        xl=None,
+        xr=None,
+        yt=None,
+        yb=None,
+        start=None,
+        end=None,
+        tracklines=None,
+        fragments=None,
+        fragment_size=None,
+        track_size=None,
+        circular=None,
+        circle_core=None,
+        cross_track_links=None,
+    ):
+        """Draw the diagram, with passed parameters overriding existing attributes.
+
+        gdd.draw(format='circular')
+        """
+        # Pass the parameters to the drawer objects that will build the
+        # diagrams.  At the moment, we detect overrides with an or in the
+        # Instantiation arguments, but I suspect there's a neater way to do
+        # this.
+        if format == "linear":
+            drawer = LinearDrawer(
+                self,
+                _first_defined(pagesize, self.pagesize),
+                _first_defined(orientation, self.orientation),
+                _first_defined(x, self.x),
+                _first_defined(y, self.y),
+                _first_defined(xl, self.xl),
+                _first_defined(xr, self.xr),
+                _first_defined(yt, self.yt),
+                _first_defined(yb, self.yb),
+                _first_defined(start, self.start),
+                _first_defined(end, self.end),
+                _first_defined(tracklines, self.tracklines),
+                _first_defined(fragments, self.fragments),
+                _first_defined(fragment_size, self.fragment_size),
+                _first_defined(track_size, self.track_size),
+                _first_defined(cross_track_links, self.cross_track_links),
+            )
+        else:
+            drawer = CircularDrawer(
+                self,
+                _first_defined(pagesize, self.pagesize),
+                _first_defined(orientation, self.orientation),
+                _first_defined(x, self.x),
+                _first_defined(y, self.y),
+                _first_defined(xl, self.xl),
+                _first_defined(xr, self.xr),
+                _first_defined(yt, self.yt),
+                _first_defined(yb, self.yb),
+                _first_defined(start, self.start),
+                _first_defined(end, self.end),
+                _first_defined(tracklines, self.tracklines),
+                _first_defined(track_size, self.track_size),
+                _first_defined(circular, self.circular),
+                _first_defined(circle_core, self.circle_core),
+                _first_defined(cross_track_links, self.cross_track_links),
+            )
+        drawer.draw()  # Tell the drawer to complete the drawing
+        self.drawing = drawer.drawing  # Get the completed drawing
+
+    def write(self, filename="test1.ps", output="PS", dpi=72):
+        """Write the drawn diagram to a specified file, in a specified format.
+
+        Arguments:
+            - filename   - a string indicating the name of the output file,
+              or a handle to write to.
+            - output     - a string indicating output format, one of PS, PDF,
+              SVG, or provided the ReportLab renderPM module is installed, one
+              of the bitmap formats JPG, BMP, GIF, PNG, TIFF or TIFF.  The
+              format can be given in upper or lower case.
+            - dpi        - an integer. Resolution (dots per inch) for bitmap formats.
+
+        Returns:
+            No return value.
+
+        write(self, filename='test1.ps', output='PS', dpi=72)
+
+        """
+        return _write(self.drawing, filename, output, dpi=dpi)
+
+    def write_to_string(self, output="PS", dpi=72):
+        """Return a byte string containing the diagram in the requested format.
+
+        Arguments:
+            - output    - a string indicating output format, one of PS, PDF,
+              SVG, JPG, BMP, GIF, PNG, TIFF or TIFF (as specified for the write
+              method).
+            - dpi       - Resolution (dots per inch) for bitmap formats.
+
+        Returns:
+            Return the completed drawing as a bytes string in a prescribed
+            format.
+
+        """
+        # The ReportLab drawToString method, which this function used to call,
+        # originally just used a StringIO handle with the drawToFile method.
+        #
+        # TODO - Rename this method to include keyword bytes?
+        from io import BytesIO
+
+        handle = BytesIO()
+        self.write(handle, output, dpi)
+        return handle.getvalue()
+
+    def add_track(self, track, track_level):
+        """Add a Track object to the diagram.
+
+        It also accepts instructions to place it at a particular level on the
+        diagram.
+
+        Arguments:
+            - track          - Track object to draw.
+            - track_level    - an integer. The level at which the track will be
+              drawn (above an arbitrary baseline).
+
+        add_track(self, track, track_level)
+        """
+        if track is None:
+            raise ValueError("Must specify track")
+        if track_level not in self.tracks:  # No track at that level
+            self.tracks[track_level] = track  # so just add it
+        else:  # Already a track there, so shunt all higher tracks up one
+            occupied_levels = sorted(
+                self.get_levels()
+            )  # Get list of occupied levels...
+            occupied_levels.reverse()  # ...reverse it (highest first)
+            for val in occupied_levels:
+                # If track value >= that to be added
+                if val >= track.track_level:
+                    self.tracks[val + 1] = self.tracks[val]  # ...increment by 1
+            self.tracks[track_level] = track  # And put the new track in
+        self.tracks[track_level].track_level = track_level
+
+    def new_track(self, track_level, **args):
+        """Add a new Track to the diagram at a given level.
+
+        The track is returned for further user manipulation.
+
+        Arguments:
+            - track_level   - an integer. The level at which the track will be
+              drawn (above an arbitrary baseline).
+
+        new_track(self, track_level)
+        """
+        newtrack = Track()
+        for key in args:
+            setattr(newtrack, key, args[key])
+        if track_level not in self.tracks:  # No track at that level
+            self.tracks[track_level] = newtrack  # so just add it
+        else:  # Already a track there, so shunt all higher tracks up one
+            occupied_levels = sorted(
+                self.get_levels()
+            )  # Get list of occupied levels...
+            occupied_levels.reverse()  # ...reverse (highest first)...
+            for val in occupied_levels:
+                if val >= track_level:
+                    # Track value >= that to be added, increment by 1
+                    self.tracks[val + 1] = self.tracks[val]
+            self.tracks[track_level] = newtrack  # And put the new track in
+        self.tracks[track_level].track_level = track_level
+        return newtrack
+
+    def del_track(self, track_level):
+        """Remove the track to be drawn at a particular level on the diagram.
+
+        Arguments:
+            - track_level   - an integer. The level of the track on the diagram
+              to delete.
+
+        del_track(self, track_level)
+        """
+        del self.tracks[track_level]
+
+    def get_tracks(self):
+        """Return a list of the tracks contained in the diagram."""
+        return list(self.tracks.values())
+
+    def move_track(self, from_level, to_level):
+        """Move a track from one level on the diagram to another.
+
+        Arguments:
+         - from_level   - an integer. The level at which the track to be
+           moved is found.
+         - to_level     - an integer. The level to move the track to.
+
+        """
+        aux = self.tracks[from_level]
+        del self.tracks[from_level]
+        self.add_track(aux, to_level)
+
+    def renumber_tracks(self, low=1, step=1):
+        """Renumber all tracks consecutively.
+
+        Optionally from a passed lowest number.
+
+        Arguments:
+         - low     - an integer. The track number to start from.
+         - step    - an integer. The track interval for separation of
+           tracks.
+
+        """
+        track = low  # Start numbering from here
+        levels = self.get_levels()
+
+        conversion = {}  # Holds new set of levels
+        for level in levels:  # Starting at low...
+            conversion[track] = self.tracks[level]  # Add old tracks to new set
+            conversion[track].track_level = track
+            track += step  # step interval
+        self.tracks = conversion  # Replace old set of levels with new set
+
+    def get_levels(self):
+        """Return a sorted list of levels occupied by tracks in the diagram."""
+        return sorted(self.tracks)
+
+    def get_drawn_levels(self):
+        """Return a sorted list of levels occupied by tracks.
+
+        These tracks are not explicitly hidden.
+        """
+        return sorted(key for key in self.tracks if not self.tracks[key].hide)
+
+    def range(self):
+        """Return lowest and highest base numbers from track features.
+
+        Returned type is a tuple.
+        """
+        lows, highs = [], []
+        for track in self.tracks.values():  # Get ranges for each track
+            low, high = track.range()
+            lows.append(low)
+            highs.append(high)
+        return min(lows), max(highs)  # Return extremes from all tracks
+
+    def __getitem__(self, key):
+        """Return the track contained at the level of the passed key."""
+        return self.tracks[key]
+
+    def __str__(self):
+        """Return a formatted string describing the diagram."""
+        outstr = ["\n<%s: %s>" % (self.__class__, self.name)]
+        outstr.append("%d tracks" % len(self.tracks))
+        for level in self.get_levels():
+            outstr.append("Track %d: %s\n" % (level, self.tracks[level]))
+        outstr = "\n".join(outstr)
+        return outstr
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/_Feature.py b/code/lib/Bio/Graphics/GenomeDiagram/_Feature.py
new file mode 100644
index 0000000..87be16e
--- /dev/null
+++ b/code/lib/Bio/Graphics/GenomeDiagram/_Feature.py
@@ -0,0 +1,198 @@
+# Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Contact:       Leighton Pritchard, The James Hutton Institute,
+#                Invergowrie, Dundee, Scotland, DD2 5DA, UK
+#                Leighton.Pritchard@hutton.ac.uk
+################################################################################
+
+"""Feature module.
+
+Provides:
+ - Feature - class to wrap Bio.SeqFeature objects with drawing information
+
+For drawing capabilities, this module uses reportlab to define colors:
+http://www.reportlab.com
+"""
+
+# ReportLab imports
+from reportlab.lib import colors
+
+# GenomeDiagram imports
+from ._Colors import ColorTranslator
+
+
+class Feature:
+    """Class to wrap Bio.SeqFeature objects for GenomeDiagram.
+
+    Attributes:
+     - parent    FeatureSet, container for the object
+     - id        Unique id
+     - color    color.Color, color to draw the feature
+     - hide      Boolean for whether the feature will be drawn or not
+     - sigil     String denoting the type of sigil to use for the feature.
+       Currently either "BOX" or "ARROW" are supported.
+     - arrowhead_length  Float denoting length of the arrow head to be drawn,
+       relative to the bounding box height.  The arrow shaft
+       takes up the remainder of the bounding box's length.
+     - arrowshaft_height  Float denoting length of the representative arrow
+       shaft to be drawn, relative to the bounding box height.
+       The arrow head takes the full height of the bound box.
+     - name_qualifiers   List of Strings, describes the qualifiers that may
+       contain feature names in the wrapped Bio.SeqFeature object
+     - label     Boolean, 1 if the label should be shown
+     - label_font    String describing the font to use for the feature label
+     - label_size    Int describing the feature label font size
+     - label_color  color.Color describing the feature label color
+     - label_angle   Float describing the angle through which to rotate the
+       feature label in degrees (default = 45, linear only)
+     - label_position    String, 'start', 'end' or 'middle' denoting where
+       to place the feature label. Leave as None for the default
+       which is 'start' for linear diagrams, and at the bottom of
+       the feature as drawn on circular diagrams.
+     - label_strand  Integer -1 or +1 to explicitly place the label on the
+       forward or reverse strand. Default (None) follows th
+       feature's strand. Use -1 to put labels under (linear) or
+       inside (circular) the track, +1 to put them above (linear)
+       or outside (circular) the track.
+     - locations     List of tuples of (start, end) ints describing where the
+       feature and any subfeatures start and end
+     - type      String denoting the feature type
+     - name      String denoting the feature name
+     - strand    Int describing the strand on which the feature is found
+
+    """
+
+    def __init__(
+        self,
+        parent=None,
+        feature_id=None,
+        feature=None,
+        color=colors.lightgreen,
+        label=0,
+        border=None,
+        colour=None,
+    ):
+        """Initialize.
+
+        Arguments:
+         - parent    FeatureSet containing the feature
+         - feature_id    Unique id for the feature
+         - feature   Bio.SeqFeature object to be wrapped
+         - color    color.Color Color to draw the feature (overridden
+           by backwards compatible argument with UK spelling, colour).
+           Either argument is overridden if 'color' is found in feature
+           qualifiers
+         - border   color.Color Color to draw the feature border, use
+           None for the same as the fill color, False for no border.
+         - label     Boolean, 1 if the label should be shown
+
+        """
+        # Let the UK spelling (colour) override the USA spelling (color)
+        if colour is not None:
+            color = colour
+
+        self._colortranslator = ColorTranslator()
+
+        # Initialize attributes
+        self.parent = parent
+        self.id = feature_id
+        self.color = color  # default color to draw the feature
+        self.border = border
+        self._feature = None  # Bio.SeqFeature object to wrap
+        self.hide = 0  # show by default
+        self.sigil = "BOX"
+        self.arrowhead_length = 0.5  # 50% of the box height
+        self.arrowshaft_height = 0.4  # 40% of the box height
+        self.name_qualifiers = ["gene", "label", "name", "locus_tag", "product"]
+        self.label = label
+        self.label_font = "Helvetica"
+        self.label_size = 6
+        self.label_color = colors.black
+        self.label_angle = 45
+        self.label_position = None  # Expect 'start', 'middle', or 'end' (plus aliases)
+        self.label_strand = None  # Expect +1 or -1 if overriding this
+
+        if feature is not None:
+            self.set_feature(feature)
+
+    def set_feature(self, feature):
+        """Define the Bio.SeqFeature object to be wrapped."""
+        self._feature = feature
+        self.__process_feature()
+
+    def __process_feature(self):
+        """Examine wrapped feature and set some properties accordingly (PRIVATE)."""
+        self.locations = []
+        bounds = []
+        # This will be a list of length one for simple FeatureLocation:
+        for location in self._feature.location.parts:
+            start = location.nofuzzy_start
+            end = location.nofuzzy_end
+            # if start > end and self.strand == -1:
+            #    start, end = end, start
+            self.locations.append((start, end))
+            bounds += [start, end]
+        self.type = str(self._feature.type)  # Feature type
+        # TODO - Strand can vary with subfeatures (e.g. mixed strand tRNA)
+        if self._feature.strand is None:
+            # This is the SeqFeature default (None), but the drawing code
+            # only expects 0, +1 or -1.
+            self.strand = 0
+        else:
+            self.strand = int(self._feature.strand)  # Feature strand
+        if "color" in self._feature.qualifiers:  # Artemis color (if present)
+            self.color = self._colortranslator.artemis_color(
+                self._feature.qualifiers["color"][0]
+            )
+        self.name = self.type
+        for qualifier in self.name_qualifiers:
+            if qualifier in self._feature.qualifiers:
+                self.name = self._feature.qualifiers[qualifier][0]
+                break
+        # Note will be 0 to N for origin wrapping feature on genome of length N
+        self.start, self.end = min(bounds), max(bounds)
+
+    def get_feature(self):
+        """Return the unwrapped Bio.SeqFeature object."""
+        return self._feature
+
+    def set_colour(self, colour):
+        """Backwards compatible variant of set_color(self, color) using UK spelling."""
+        color = self._colortranslator.translate(colour)
+        self.color = color
+
+    def set_color(self, color):
+        """Set the color in which the feature will be drawn.
+
+        Arguments:
+         - color    The color to draw the feature - either a colors.Color
+           object, an RGB tuple of floats, or an integer corresponding a
+           colors in colors.txt
+
+        """
+        # TODO - Make this into the set method for a color property?
+        color = self._colortranslator.translate(color)
+        self.color = color
+
+    def __getattr__(self, name):
+        """Get attribute by name.
+
+        If the Feature class doesn't have the attribute called for,
+        check in self._feature for it.
+        """
+        return getattr(self._feature, name)  # try to get the attribute from the feature
+
+
+################################################################################
+# RUN AS SCRIPT
+################################################################################
+
+if __name__ == "__main__":
+
+    # Test code
+    gdf = Feature()
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/_FeatureSet.py b/code/lib/Bio/Graphics/GenomeDiagram/_FeatureSet.py
new file mode 100644
index 0000000..4168a29
--- /dev/null
+++ b/code/lib/Bio/Graphics/GenomeDiagram/_FeatureSet.py
@@ -0,0 +1,210 @@
+# Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Contact:       Leighton Pritchard, The James Hutton Institute,
+#                Invergowrie, Dundee, Scotland, DD2 5DA, UK
+#                Leighton.Pritchard@hutton.ac.uk
+################################################################################
+#
+# Thanks to Peter Cock for the impetus to write the get_features() code to
+# subselect Features.
+#
+################################################################################
+
+"""FeatureSet module.
+
+Provides:
+ - FeatureSet - container for Feature objects
+
+For drawing capabilities, this module uses reportlab to draw and write
+the diagram: http://www.reportlab.com
+"""
+
+
+# GenomeDiagram
+from ._Feature import Feature
+
+# Builtins
+import re
+
+
+class FeatureSet:
+    """FeatureSet object."""
+
+    def __init__(self, set_id=None, name=None, parent=None):
+        """Create the object.
+
+        Arguments:
+         - set_id: Unique id for the set
+         - name: String identifying the feature set
+
+        """
+        self.parent = parent
+        self.id = id  # Unique id for the set
+        self.next_id = 0  # counter for unique feature ids
+        self.features = {}  # Holds features, keyed by ID
+        self.name = name  # String describing the set
+
+    def add_feature(self, feature, **kwargs):
+        """Add a new feature.
+
+        Arguments:
+         - feature: Bio.SeqFeature object
+         - kwargs: Keyword arguments for Feature.  Named attributes
+           of the Feature
+
+        Add a Bio.SeqFeature object to the diagram (will be stored
+        internally in a Feature wrapper).
+        """
+        id = self.next_id  # get id number
+        f = Feature(self, id, feature)
+        self.features[id] = f  # add feature
+        for key in kwargs:
+            if key == "colour" or key == "color":
+                # Deal with "colour" as a special case by also mapping to color.
+                # If Feature.py used a python property we wouldn't need to call
+                # set_color explicitly.  However, this is important to make sure
+                # every color gets mapped to a colors object - for example color
+                # numbers, or strings (may not matter for PDF, but does for PNG).
+                self.features[id].set_color(kwargs[key])
+                continue
+            setattr(self.features[id], key, kwargs[key])
+        self.next_id += 1  # increment next id
+        return f
+
+    def del_feature(self, feature_id):
+        """Delete a feature.
+
+        Arguments:
+         - feature_id: Unique id of the feature to delete
+
+        Remove a feature from the set, indicated by its id.
+        """
+        del self.features[feature_id]
+
+    def set_all_features(self, attr, value):
+        """Set an attribute of all the features.
+
+        Arguments:
+         - attr: An attribute of the Feature class
+         - value: The value to set that attribute to
+
+        Set the passed attribute of all features in the set to the
+        passed value.
+        """
+        for feature in self.features.values():
+            if hasattr(feature, attr):
+                # If the feature has the attribute, set it to the passed value
+                setattr(feature, attr, value)
+
+        # For backwards compatibility, we support both colour and color.
+        # As a quick hack, make "colour" set both "colour" and "color".
+        # if attr=="colour":
+        #    self.set_all_feature("color",value)
+
+    def get_features(self, attribute=None, value=None, comparator=None):
+        """Retrieve features.
+
+        Arguments:
+         - attribute: String, attribute of a Feature object
+         - value: The value desired of the attribute
+         - comparator: String, how to compare the Feature attribute to the
+           passed value
+
+        If no attribute or value is given, return a list of all features in the
+        feature set.  If both an attribute and value are given, then depending
+        on the comparator, then a list of all features in the FeatureSet
+        matching (or not) the passed value will be returned.  Allowed comparators
+        are: 'startswith', 'not', 'like'.
+
+        The user is expected to make a responsible decision about which feature
+        attributes to use with which passed values and comparator settings.
+        """
+        # If no attribute or value specified, return all features
+        if attribute is None or value is None:
+            return list(self.features.values())
+        # If no comparator is specified, return all features where the attribute
+        # value matches that passed
+        if comparator is None:
+            return [
+                feature
+                for feature in self.features.values()
+                if getattr(feature, attribute) == value
+            ]
+        # If the comparator is 'not', return all features where the attribute
+        # value does not match that passed
+        elif comparator == "not":
+            return [
+                feature
+                for feature in self.features.values()
+                if getattr(feature, attribute) != value
+            ]
+        # If the comparator is 'startswith', return all features where the attribute
+        # value does not match that passed
+        elif comparator == "startswith":
+            return [
+                feature
+                for feature in self.features.values()
+                if getattr(feature, attribute).startswith(value)
+            ]
+        # If the comparator is 'like', use a regular expression search to identify
+        # features
+        elif comparator == "like":
+            return [
+                feature
+                for feature in self.features.values()
+                if re.search(value, getattr(feature, attribute))
+            ]
+        # As a final option, just return an empty list
+        return []
+
+    def get_ids(self):
+        """Return a list of all ids for the feature set."""
+        return list(self.features.keys())
+
+    def range(self):
+        """Return the lowest and highest base (or mark) numbers as a tuple."""
+        lows, highs = [], []
+        for feature in self.features.values():
+            for start, end in feature.locations:
+                lows.append(start)
+                highs.append(end)
+        if len(lows) != 0 and len(highs) != 0:  # Default in case there is
+            return (min(lows), max(highs))  # nothing in the set
+        return 0, 0
+
+    def to_string(self, verbose=0):
+        """Return a formatted string with information about the set.
+
+        Arguments:
+         - verbose: Boolean indicating whether a short (default) or
+           complete account of the set is required
+
+        """
+        if not verbose:  # Short account only required
+            return "%s" % self
+        else:  # Long account desired
+            outstr = ["\n<%s: %s>" % (self.__class__, self.name)]
+            outstr.append("%d features" % len(self.features))
+            for key in self.features:
+                outstr.append("feature: %s" % self.features[key])
+            return "\n".join(outstr)
+
+    def __len__(self):
+        """Return the number of features in the set."""
+        return len(self.features)
+
+    def __getitem__(self, key):
+        """Return a feature, keyed by id."""
+        return self.features[key]
+
+    def __str__(self):
+        """Return a formatted string with information about the feature set."""
+        outstr = [
+            "\n<%s: %s %d features>" % (self.__class__, self.name, len(self.features))
+        ]
+        return "\n".join(outstr)
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/_Graph.py b/code/lib/Bio/Graphics/GenomeDiagram/_Graph.py
new file mode 100644
index 0000000..7f99ef9
--- /dev/null
+++ b/code/lib/Bio/Graphics/GenomeDiagram/_Graph.py
@@ -0,0 +1,195 @@
+# Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
+# Revisions copyright 2008-2009 by Peter Cock.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Contact:       Leighton Pritchard, The James Hutton Institute,
+#                Invergowrie, Dundee, Scotland, DD2 5DA, UK
+#                Leighton.Pritchard@hutton.ac.uk
+################################################################################
+
+"""Graph module.
+
+Provides:
+ - GraphData - Contains data from which a graph will be drawn, and
+   information about its presentation
+
+For drawing capabilities, this module uses reportlab to draw and write
+the diagram: http://www.reportlab.com
+"""
+
+# ReportLab imports
+
+from reportlab.lib import colors
+
+from math import sqrt
+
+
+class GraphData:
+    """Graph Data.
+
+    Attributes:
+     - id    Unique identifier for the data
+     - data  Dictionary of describing the data, keyed by position
+     - name  String describing the data
+     - style String ('bar', 'heat', 'line') describing how to draw the data
+     - poscolor     colors.Color for drawing high (some styles) or all
+       values
+     - negcolor     colors.Color for drawing low values (some styles)
+     - linewidth     Int, thickness to draw the line in 'line' styles
+
+    """
+
+    def __init__(
+        self,
+        id=None,
+        data=None,
+        name=None,
+        style="bar",
+        color=colors.lightgreen,
+        altcolor=colors.darkseagreen,
+        center=None,
+        colour=None,
+        altcolour=None,
+    ):
+        """Initialize.
+
+        Arguments:
+         - id    Unique ID for the graph
+         - data  List of (position, value) tuples
+         - name  String describing the graph
+         - style String describing the presentation style ('bar', 'line',
+           'heat')
+         - color   colors.Color describing the color to draw all or the
+           'high' (some styles) values (overridden by backwards
+           compatible argument with UK spelling, colour).
+         - altcolor colors.Color describing the color to draw the 'low'
+           values (some styles only) (overridden by backwards
+           compatible argument with UK spelling, colour).
+         - center Value at which x-axis crosses y-axis.
+
+        """
+        # Let the UK spelling (colour) override the USA spelling (color)
+        if colour is not None:
+            color = colour
+        if altcolour is not None:
+            altcolor = altcolour
+
+        self.id = id  # Unique identifier for the graph
+        self.data = {}  # holds values, keyed by sequence position
+        if data is not None:
+            self.set_data(data)
+        self.name = name  # Descriptive string
+
+        # Attributes describing how the graph will be drawn
+        self.style = style  # One of 'bar', 'heat' or 'line'
+        self.poscolor = color  # Color to draw all, or 'high' values
+        self.negcolor = altcolor  # Color to draw 'low' values
+        self.linewidth = 2  # linewidth to use in line graphs
+        self.center = center  # value at which x-axis crosses y-axis
+
+    def set_data(self, data):
+        """Add data as a list of (position, value) tuples."""
+        for (pos, val) in data:  # Fill data dictionary
+            self.data[pos] = val
+
+    def get_data(self):
+        """Return data as a list of sorted (position, value) tuples."""
+        data = []
+        for xval in self.data:
+            yval = self.data[xval]
+            data.append((xval, yval))
+        data.sort()
+        return data
+
+    def add_point(self, point):
+        """Add a single point to the set of data as a (position, value) tuple."""
+        pos, val = point
+        self.data[pos] = val
+
+    def quartiles(self):
+        """Return (minimum, lowerQ, medianQ, upperQ, maximum) values as tuple."""
+        data = sorted(self.data.values())
+        datalen = len(data)
+        return (
+            data[0],
+            data[datalen // 4],
+            data[datalen // 2],
+            data[3 * datalen // 4],
+            data[-1],
+        )
+
+    def range(self):
+        """Return range of data as (start, end) tuple.
+
+        Returns the range of the data, i.e. its start and end points on
+        the genome as a (start, end) tuple.
+        """
+        positions = sorted(self.data)  # i.e. dict keys
+        # Return first and last positions in graph
+        # print(len(self.data))
+        return (positions[0], positions[-1])
+
+    def mean(self):
+        """Return the mean value for the data points (float)."""
+        data = list(self.data.values())
+        sum = 0.0
+        for item in data:
+            sum += float(item)
+        return sum / len(data)
+
+    def stdev(self):
+        """Return the sample standard deviation for the data (float)."""
+        data = list(self.data.values())
+        m = self.mean()
+        runtotal = 0.0
+        for entry in data:
+            runtotal += float((entry - m) ** 2)
+        # This is sample standard deviation; population stdev would involve
+        # division by len(data), rather than len(data)-1
+        return sqrt(runtotal / (len(data) - 1))
+
+    def __len__(self):
+        """Return the number of points in the data set."""
+        return len(self.data)
+
+    def __getitem__(self, index):
+        """Return data value(s) at the given position.
+
+        Given an integer representing position on the sequence
+        returns a float - the data value at the passed position.
+
+        If a slice, returns graph data from the region as a list or
+        (position, value) tuples. Slices with step are not supported.
+        """
+        if isinstance(index, int):
+            return self.data[index]
+        elif isinstance(index, slice):
+            # TODO - Why does it treat the end points both as inclusive?
+            # This doesn't match Python norms does it?
+            low = index.start
+            high = index.stop
+            if index.step is not None and index.step != 1:
+                raise ValueError
+            outlist = []
+            for pos in sorted(self.data):
+                if pos >= low and pos <= high:
+                    outlist.append((pos, self.data[pos]))
+            return outlist
+        else:
+            raise TypeError("Need an integer or a slice")
+
+    def __str__(self):
+        """Return a string describing the graph data."""
+        outstr = ["\nGraphData: %s, ID: %s" % (self.name, self.id)]
+        outstr.append("Number of points: %d" % len(self.data))
+        outstr.append("Mean data value: %s" % self.mean())
+        outstr.append("Sample SD: %.3f" % self.stdev())
+        outstr.append(
+            "Minimum: %s\n1Q: %s\n2Q: %s\n3Q: %s\nMaximum: %s" % self.quartiles()
+        )
+        outstr.append("Sequence Range: %s..%s" % self.range())
+        return "\n".join(outstr)
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/_GraphSet.py b/code/lib/Bio/Graphics/GenomeDiagram/_GraphSet.py
new file mode 100644
index 0000000..d79e6ef
--- /dev/null
+++ b/code/lib/Bio/Graphics/GenomeDiagram/_GraphSet.py
@@ -0,0 +1,171 @@
+# Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
+# Revisions copyright 2008-2010 by Peter Cock.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Contact:       Leighton Pritchard, The James Hutton Institute,
+#                Invergowrie, Dundee, Scotland, DD2 5DA, UK
+#                Leighton.Pritchard@hutton.ac.uk
+################################################################################
+#
+# TODO: Make representation of Ymax and Ymin values at this level, so that
+#       calculation of graph/axis drawing is simplified
+
+"""GraphSet module.
+
+Provides:
+ - GraphSet - container for GraphData objects
+
+For drawing capabilities, this module uses reportlab to draw and write
+the diagram: http://www.reportlab.com
+"""
+
+# ReportLab imports
+
+from reportlab.lib import colors
+
+from ._Graph import GraphData
+
+
+class GraphSet:
+    """Graph Set.
+
+    Attributes:
+     - id        Unique identifier for the set
+     - name      String describing the set
+
+    """
+
+    def __init__(self, name=None):
+        """Initialize.
+
+        Arguments:
+         - name      String identifying the graph set sensibly
+
+        """
+        self.id = id  # Unique identifier for the set
+        self._next_id = 0  # Holds unique ids for graphs
+        self._graphs = {}  # Holds graphs, keyed by unique id
+        self.name = name  # Holds description of graph
+
+    def new_graph(
+        self,
+        data,
+        name=None,
+        style="bar",
+        color=colors.lightgreen,
+        altcolor=colors.darkseagreen,
+        linewidth=1,
+        center=None,
+        colour=None,
+        altcolour=None,
+        centre=None,
+    ):
+        """Add a GraphData object to the diagram.
+
+        Arguments:
+         - data      List of (position, value) int tuples
+         - name      String, description of the graph
+         - style     String ('bar', 'heat', 'line') describing how the graph
+           will be drawn
+         - color    colors.Color describing the color to draw all or 'high'
+           (some styles) data (overridden by backwards compatible
+           argument with UK spelling, colour).
+         - altcolor  colors.Color describing the color to draw 'low' (some
+           styles) data (overridden by backwards compatible argument
+           with UK spelling, colour).
+         - linewidth     Float describing linewidth for graph
+         - center        Float setting the value at which the x-axis
+           crosses the y-axis (overridden by backwards
+           compatible argument with UK spelling, centre)
+
+        Add a GraphData object to the diagram (will be stored internally).
+        """
+        # Let the UK spelling (colour) override the USA spelling (color)
+        if colour is not None:
+            color = colour
+        if altcolour is not None:
+            altcolor = altcolour
+        if centre is not None:
+            center = centre
+
+        id = self._next_id  # get id number
+        graph = GraphData(id, data, name, style, color, altcolor, center)
+        graph.linewidth = linewidth
+        self._graphs[id] = graph  # add graph data
+        self._next_id += 1  # increment next id
+        return graph
+
+    def del_graph(self, graph_id):
+        """Remove a graph from the set, indicated by its id."""
+        del self._graphs[graph_id]
+
+    def get_graphs(self):
+        """Return list of all graphs in the graph set, sorted by id.
+
+        Sorting is to ensure reliable stacking.
+        """
+        return [self._graphs[id] for id in sorted(self._graphs)]
+
+    def get_ids(self):
+        """Return a list of all ids for the graph set."""
+        return list(self._graphs.keys())
+
+    def range(self):
+        """Return the lowest and highest base (or mark) numbers as a tuple."""
+        lows, highs = [], []
+        for graph in self._graphs.values():
+            low, high = graph.range()
+            lows.append(low)
+            highs.append(high)
+        return (min(lows), max(highs))
+
+    def data_quartiles(self):
+        """Return (minimum, lowerQ, medianQ, upperQ, maximum) values as a tuple."""
+        data = []
+        for graph in self._graphs.values():
+            data += list(graph.data.values())
+        data.sort()
+        datalen = len(data)
+        return (
+            data[0],
+            data[datalen / 4],
+            data[datalen / 2],
+            data[3 * datalen / 4],
+            data[-1],
+        )
+
+    def to_string(self, verbose=0):
+        """Return a formatted string with information about the set.
+
+        Arguments:
+            - verbose - Flag indicating whether a short or complete account
+              of the set is required
+
+        """
+        if not verbose:
+            return "%s" % self
+        else:
+            outstr = ["\n<%s: %s>" % (self.__class__, self.name)]
+            outstr.append("%d graphs" % len(self._graphs))
+            for key in self._graphs:
+                outstr.append("%s" % self._graphs[key])
+            return "\n".join(outstr)
+
+    def __len__(self):
+        """Return the number of graphs in the set."""
+        return len(self._graphs)
+
+    def __getitem__(self, key):
+        """Return a graph, keyed by id."""
+        return self._graphs[key]
+
+    def __str__(self):
+        """Return a formatted string with information about the feature set."""
+        outstr = ["\n<%s: %s>" % (self.__class__, self.name)]
+        outstr.append("%d graphs" % len(self._graphs))
+        outstr = "\n".join(outstr)
+        return outstr
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/_LinearDrawer.py b/code/lib/Bio/Graphics/GenomeDiagram/_LinearDrawer.py
new file mode 100644
index 0000000..36012ad
--- /dev/null
+++ b/code/lib/Bio/Graphics/GenomeDiagram/_LinearDrawer.py
@@ -0,0 +1,1580 @@
+# Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
+# Revisions copyright 2008-2009 by Peter Cock.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Contact:       Leighton Pritchard, The James Hutton Institute,
+#                Invergowrie, Dundee, Scotland, DD2 5DA, UK
+#                Leighton.Pritchard@hutton.ac.uk
+################################################################################
+
+"""Linear Drawer module.
+
+Provides:
+ - LinearDrawer -  Drawing object for linear diagrams
+
+For drawing capabilities, this module uses reportlab to draw and write
+the diagram: http://www.reportlab.com
+"""
+
+# ReportLab imports
+
+from reportlab.graphics.shapes import Drawing, Line, String, Group, Polygon
+from reportlab.lib import colors
+
+# GenomeDiagram imports
+from ._AbstractDrawer import AbstractDrawer, draw_box, draw_arrow
+from ._AbstractDrawer import draw_cut_corner_box, _stroke_and_fill_colors
+from ._AbstractDrawer import intermediate_points, angle2trig, deduplicate
+from ._FeatureSet import FeatureSet
+from ._GraphSet import GraphSet
+
+from math import ceil
+
+
+class LinearDrawer(AbstractDrawer):
+    """Linear Drawer.
+
+    Inherits from:
+     - AbstractDrawer
+
+    Attributes:
+     - tracklines    Boolean for whether to draw lines delineating tracks
+     - pagesize      Tuple describing the size of the page in pixels
+     - x0            Float X co-ord for leftmost point of drawable area
+     - xlim          Float X co-ord for rightmost point of drawable area
+     - y0            Float Y co-ord for lowest point of drawable area
+     - ylim          Float Y co-ord for topmost point of drawable area
+     - pagewidth     Float pixel width of drawable area
+     - pageheight    Float pixel height of drawable area
+     - xcenter       Float X co-ord of center of drawable area
+     - ycenter       Float Y co-ord of center of drawable area
+     - start         Int, base to start drawing from
+     - end           Int, base to stop drawing at
+     - length        Int, size of sequence to be drawn
+     - fragments     Int, number of fragments into which to divide the
+       drawn sequence
+     - fragment_size Float (0->1) the proportion of the fragment height to
+       draw in
+     - track_size    Float (0->1) the proportion of the track height to
+       draw in
+     - drawing       Drawing canvas
+     - drawn_tracks  List of ints denoting which tracks are to be drawn
+     - current_track_level   Int denoting which track is currently being
+       drawn
+     - fragment_height   Float total fragment height in pixels
+     - fragment_bases    Int total fragment length in bases
+     - fragment_lines    Dictionary of top and bottom y-coords of fragment,
+       keyed by fragment number
+     - fragment_limits   Dictionary of start and end bases of each fragment,
+       keyed by fragment number
+     - track_offsets     Dictionary of number of pixels that each track top,
+       center and bottom is offset from the base of a fragment, keyed by track
+     - cross_track_links List of tuples each with four entries (track A,
+       feature A, track B, feature B) to be linked.
+
+    """
+
+    def __init__(
+        self,
+        parent=None,
+        pagesize="A3",
+        orientation="landscape",
+        x=0.05,
+        y=0.05,
+        xl=None,
+        xr=None,
+        yt=None,
+        yb=None,
+        start=None,
+        end=None,
+        tracklines=0,
+        fragments=10,
+        fragment_size=None,
+        track_size=0.75,
+        cross_track_links=None,
+    ):
+        """Initialize.
+
+        Arguments:
+         - parent    Diagram object containing the data that the drawer draws
+         - pagesize  String describing the ISO size of the image, or a tuple
+           of pixels
+         - orientation   String describing the required orientation of the
+           final drawing ('landscape' or 'portrait')
+         - x         Float (0->1) describing the relative size of the X
+           margins to the page
+         - y         Float (0->1) describing the relative size of the Y
+           margins to the page
+         - xl        Float (0->1) describing the relative size of the left X
+           margin to the page (overrides x)
+         - xl        Float (0->1) describing the relative size of the left X
+           margin to the page (overrides x)
+         - xr        Float (0->1) describing the relative size of the right X
+           margin to the page (overrides x)
+         - yt        Float (0->1) describing the relative size of the top Y
+           margin to the page (overrides y)
+         - yb        Float (0->1) describing the relative size of the lower Y
+           margin to the page (overrides y)
+         - start     Int, the position to begin drawing the diagram at
+         - end       Int, the position to stop drawing the diagram at
+         - tracklines    Boolean flag to show (or not) lines delineating tracks
+           on the diagram
+         - fragments Int, the number of equal fragments into which the
+           sequence should be divided for drawing
+         - fragment_size Float(0->1) The proportion of the available height
+           for the fragment that should be taken up in drawing
+         - track_size    The proportion of the available track height that
+           should be taken up in drawing
+         - cross_track_links List of tuples each with four entries (track A,
+           feature A, track B, feature B) to be linked.
+        """
+        # Use the superclass' instantiation method
+        AbstractDrawer.__init__(
+            self,
+            parent,
+            pagesize,
+            orientation,
+            x,
+            y,
+            xl,
+            xr,
+            yt,
+            yb,
+            start,
+            end,
+            tracklines,
+            cross_track_links,
+        )
+
+        # Useful measurements on the page
+        self.fragments = fragments
+        if fragment_size is not None:
+            self.fragment_size = fragment_size
+        else:
+            if self.fragments == 1:
+                # For single fragments, default to full height
+                self.fragment_size = 1
+            else:
+                # Otherwise keep a 10% gap between fragments
+                self.fragment_size = 0.9
+        self.track_size = track_size
+
+    def draw(self):
+        """Draw a linear diagram of the data in the parent Diagram object."""
+        # Instantiate the drawing canvas
+        self.drawing = Drawing(self.pagesize[0], self.pagesize[1])
+
+        feature_elements = []  # holds feature elements
+        feature_labels = []  # holds feature labels
+        greytrack_bgs = []  # holds track background
+        greytrack_labels = []  # holds track foreground labels
+        scale_axes = []  # holds scale axes
+        scale_labels = []  # holds scale axis labels
+
+        # Get the tracks to be drawn
+        self.drawn_tracks = self._parent.get_drawn_levels()
+
+        # Set fragment and track sizes
+        self.init_fragments()
+        self.set_track_heights()
+
+        # Go through each track in the parent (if it is to be drawn) one by
+        # one and collate the data as drawing elements
+        for track_level in self.drawn_tracks:  # only use tracks to be drawn
+            self.current_track_level = track_level  # establish track level
+            track = self._parent[track_level]  # get the track at that level
+            gbgs, glabels = self.draw_greytrack(track)  # get greytrack elements
+            greytrack_bgs.append(gbgs)
+            greytrack_labels.append(glabels)
+            features, flabels = self.draw_track(track)  # get feature and graph elements
+            feature_elements.append(features)
+            feature_labels.append(flabels)
+            if track.scale:
+                axes, slabels = self.draw_scale(track)  # get scale elements
+                scale_axes.append(axes)
+                scale_labels.append(slabels)
+
+        feature_cross_links = []
+        for cross_link_obj in self.cross_track_links:
+            cross_link_elements = self.draw_cross_link(cross_link_obj)
+            if cross_link_elements:
+                feature_cross_links.append(cross_link_elements)
+
+        # Groups listed in order of addition to page (from back to front)
+        # Draw track backgrounds
+        # Draw feature cross track links
+        # Draw features and graphs
+        # Draw scale axes
+        # Draw scale labels
+        # Draw feature labels
+        # Draw track labels
+        element_groups = [
+            greytrack_bgs,
+            feature_cross_links,
+            feature_elements,
+            scale_axes,
+            scale_labels,
+            feature_labels,
+            greytrack_labels,
+        ]
+        for element_group in element_groups:
+            for element_list in element_group:
+                [self.drawing.add(element) for element in element_list]
+
+        if self.tracklines:  # Draw test tracks over top of diagram
+            self.draw_test_tracks()
+
+    def init_fragments(self):
+        """Initialize useful values for positioning diagram elements."""
+        # Set basic heights, lengths etc
+        self.fragment_height = (
+            1.0 * self.pageheight / self.fragments
+        )  # total fragment height in pixels
+        self.fragment_bases = ceil(
+            1.0 * self.length / self.fragments
+        )  # fragment length in bases
+
+        # Key fragment base and top lines by fragment number
+        # Holds bottom and top line locations of fragments, keyed by fragment number
+        self.fragment_lines = {}
+        # Number of pixels to crop the fragment:
+        fragment_crop = (1 - self.fragment_size) / 2
+        fragy = self.ylim  # Holder for current absolute fragment base
+        for fragment in range(self.fragments):
+            fragtop = fragy - fragment_crop * self.fragment_height  # top - crop
+            fragbtm = (
+                fragy - (1 - fragment_crop) * self.fragment_height
+            )  # bottom + crop
+            self.fragment_lines[fragment] = (fragbtm, fragtop)
+            fragy -= self.fragment_height  # next fragment base
+
+        # Key base starts and ends for each fragment by fragment number
+        self.fragment_limits = {}  # Holds first and last base positions in a fragment
+        fragment_step = self.fragment_bases  # bases per fragment
+        fragment_count = 0
+        # Add start and end positions for each fragment to dictionary
+        for marker in range(int(self.start), int(self.end), int(fragment_step)):
+            self.fragment_limits[fragment_count] = (marker, marker + fragment_step)
+            fragment_count += 1
+
+    def set_track_heights(self):
+        """Set track heights.
+
+        Since tracks may not be of identical heights, the bottom and top
+        offsets of each track relative to the fragment top and bottom is
+        stored in a dictionary - self.track_offsets, keyed by track number.
+        """
+        bot_track = min(min(self.drawn_tracks), 1)
+        top_track = max(self.drawn_tracks)  # The 'highest' track number to draw
+
+        trackunit_sum = 0  # Total number of 'units' for the tracks
+        trackunits = {}  # The start and end units for each track, keyed by track number
+        heightholder = 0  # placeholder variable
+        for track in range(bot_track, top_track + 1):  # for all track numbers to 'draw'
+            try:
+                trackheight = self._parent[track].height  # Get track height
+            except Exception:  # TODO: IndexError?
+                trackheight = 1  # ...or default to 1
+            trackunit_sum += trackheight  # increment total track unit height
+            trackunits[track] = (heightholder, heightholder + trackheight)
+            heightholder += trackheight  # move to next height
+        trackunit_height = (
+            1.0 * self.fragment_height * self.fragment_size / trackunit_sum
+        )
+
+        # Calculate top and bottom offsets for each track, relative to fragment
+        # base
+        track_offsets = {}  # The offsets from fragment base for each track
+        track_crop = (
+            trackunit_height * (1 - self.track_size) / 2.0
+        )  # 'step back' in pixels
+        assert track_crop >= 0
+        for track in trackunits:
+            top = trackunits[track][1] * trackunit_height - track_crop  # top offset
+            btm = trackunits[track][0] * trackunit_height + track_crop  # bottom offset
+            ctr = btm + (top - btm) / 2.0  # center offset
+            track_offsets[track] = (btm, ctr, top)
+        self.track_offsets = track_offsets
+
+    def draw_test_tracks(self):
+        """Draw test tracks.
+
+        Draw red lines indicating the top and bottom of each fragment,
+        and blue ones indicating tracks to be drawn.
+        """
+        # Add lines for each fragment
+        for fbtm, ftop in self.fragment_lines.values():
+            self.drawing.add(
+                Line(self.x0, ftop, self.xlim, ftop, strokeColor=colors.red)
+            )  # top line
+            self.drawing.add(
+                Line(self.x0, fbtm, self.xlim, fbtm, strokeColor=colors.red)
+            )  # bottom line
+
+            # Add track lines for this fragment - but only for drawn tracks
+            for track in self.drawn_tracks:
+                trackbtm = fbtm + self.track_offsets[track][0]
+                trackctr = fbtm + self.track_offsets[track][1]
+                tracktop = fbtm + self.track_offsets[track][2]
+                self.drawing.add(
+                    Line(
+                        self.x0, tracktop, self.xlim, tracktop, strokeColor=colors.blue
+                    )
+                )  # top line
+                self.drawing.add(
+                    Line(
+                        self.x0, trackctr, self.xlim, trackctr, strokeColor=colors.green
+                    )
+                )  # center line
+                self.drawing.add(
+                    Line(
+                        self.x0, trackbtm, self.xlim, trackbtm, strokeColor=colors.blue
+                    )
+                )  # bottom line
+
+    def draw_track(self, track):
+        """Draw track.
+
+        Arguments:
+         - track     Track object
+
+        Returns a tuple (list of elements in the track, list of labels in
+        the track).
+        """
+        track_elements = []  # Holds elements from features and graphs
+        track_labels = []  # Holds labels from features and graphs
+
+        # Distribution dictionary for dealing with different set types
+        set_methods = {FeatureSet: self.draw_feature_set, GraphSet: self.draw_graph_set}
+
+        for set in track.get_sets():  # Draw the feature or graph sets
+            elements, labels = set_methods[set.__class__](set)
+            track_elements += elements
+            track_labels += labels
+        return track_elements, track_labels
+
+    def draw_tick(self, tickpos, ctr, ticklen, track, draw_label):
+        """Draw tick.
+
+        Arguments:
+         - tickpos   Int, position of the tick on the sequence
+         - ctr       Float, Y co-ord of the center of the track
+         - ticklen   How long to draw the tick
+         - track     Track, the track the tick is drawn on
+         - draw_label    Boolean, write the tick label?
+
+        Returns a drawing element that is the tick on the scale
+        """
+        if self.start >= tickpos and tickpos >= self.end:
+            raise RuntimeError(
+                "Tick at %i, but showing %i to %i" % (tickpos, self.start, self.end)
+            )
+        if not (
+            (track.start is None or track.start <= tickpos)
+            and (track.end is None or tickpos <= track.end)
+        ):
+            raise RuntimeError(
+                "Tick at %i, but showing %r to %r for track"
+                % (tickpos, track.start, track.end)
+            )
+        fragment, tickx = self.canvas_location(tickpos)  # Tick co-ordinates
+        assert fragment >= 0, "Fragment %i, tickpos %i" % (fragment, tickpos)
+        tctr = ctr + self.fragment_lines[fragment][0]  # Center line of the track
+        tickx += self.x0  # Tick X co-ord
+        ticktop = tctr + ticklen  # Y co-ord of tick top
+        tick = Line(tickx, tctr, tickx, ticktop, strokeColor=track.scale_color)
+        if draw_label:  # Put tick position on as label
+            if track.scale_format == "SInt":
+                if tickpos >= 1000000:
+                    tickstring = str(tickpos // 1000000) + " Mbp"
+                elif tickpos >= 1000:
+                    tickstring = str(tickpos // 1000) + " Kbp"
+                else:
+                    tickstring = str(tickpos)
+            else:
+                tickstring = str(tickpos)
+            label = String(
+                0,
+                0,
+                tickstring,  # Make label string
+                fontName=track.scale_font,
+                fontSize=track.scale_fontsize,
+                fillColor=track.scale_color,
+            )
+            labelgroup = Group(label)
+            rotation = angle2trig(track.scale_fontangle)
+            labelgroup.transform = (
+                rotation[0],
+                rotation[1],
+                rotation[2],
+                rotation[3],
+                tickx,
+                ticktop,
+            )
+        else:
+            labelgroup = None
+        return tick, labelgroup
+
+    def draw_scale(self, track):
+        """Draw scale.
+
+        Argument:
+         - track     Track object
+
+        Returns a tuple of (list of elements in the scale, list of labels
+        in the scale).
+        """
+        scale_elements = []  # Holds axes and ticks
+        scale_labels = []  # Holds labels
+
+        if not track.scale:  # No scale required, exit early
+            return [], []
+
+        # Get track location
+        btm, ctr, top = self.track_offsets[self.current_track_level]
+        trackheight = top - ctr
+
+        # For each fragment, draw the scale for this track
+        start, end = self._current_track_start_end()
+        start_f, start_x = self.canvas_location(start)
+        end_f, end_x = self.canvas_location(end)
+
+        for fragment in range(start_f, end_f + 1):
+            tbtm = btm + self.fragment_lines[fragment][0]
+            tctr = ctr + self.fragment_lines[fragment][0]
+            ttop = top + self.fragment_lines[fragment][0]
+            # X-axis
+            if fragment == start_f:
+                x_left = start_x
+            else:
+                x_left = 0
+            if fragment == end_f:
+                x_right = end_x
+                # Y-axis end marker
+                scale_elements.append(
+                    Line(
+                        self.x0 + x_right,
+                        tbtm,
+                        self.x0 + x_right,
+                        ttop,
+                        strokeColor=track.scale_color,
+                    )
+                )
+            else:
+                x_right = self.xlim - self.x0
+            scale_elements.append(
+                Line(
+                    self.x0 + x_left,
+                    tctr,
+                    self.x0 + x_right,
+                    tctr,
+                    strokeColor=track.scale_color,
+                )
+            )
+            # Y-axis start marker
+            scale_elements.append(
+                Line(
+                    self.x0 + x_left,
+                    tbtm,
+                    self.x0 + x_left,
+                    ttop,
+                    strokeColor=track.scale_color,
+                )
+            )
+
+        start, end = self._current_track_start_end()
+        if track.scale_ticks:  # Ticks are required on the scale
+            # Draw large ticks
+            # I want the ticks to be consistently positioned relative to
+            # the start of the sequence (position 0), not relative to the
+            # current viewpoint (self.start and self.end)
+
+            ticklen = track.scale_largeticks * trackheight
+            tickiterval = int(track.scale_largetick_interval)
+            # Note that we could just start the list of ticks using
+            # range(0,self.end,tickinterval) and the filter out the
+            # ones before self.start - but this seems wasteful.
+            # Using tickiterval * (self.start//tickiterval) is a shortcut.
+            for tickpos in range(
+                tickiterval * (self.start // tickiterval), int(self.end), tickiterval
+            ):
+                if tickpos <= start or end <= tickpos:
+                    continue
+                tick, label = self.draw_tick(
+                    tickpos, ctr, ticklen, track, track.scale_largetick_labels
+                )
+                scale_elements.append(tick)
+                if label is not None:  # If there's a label, add it
+                    scale_labels.append(label)
+            # Draw small ticks
+            ticklen = track.scale_smallticks * trackheight
+            tickiterval = int(track.scale_smalltick_interval)
+            for tickpos in range(
+                tickiterval * (self.start // tickiterval), int(self.end), tickiterval
+            ):
+                if tickpos <= start or end <= tickpos:
+                    continue
+                tick, label = self.draw_tick(
+                    tickpos, ctr, ticklen, track, track.scale_smalltick_labels
+                )
+                scale_elements.append(tick)
+                if label is not None:  # If there's a label, add it
+                    scale_labels.append(label)
+
+        # Check to see if the track contains a graph - if it does, get the
+        # minimum and maximum values, and put them on the scale Y-axis
+        if track.axis_labels:
+            for set in track.get_sets():  # Check all sets...
+                if set.__class__ is GraphSet:  # ...for a graph set
+                    graph_label_min = []
+                    graph_label_mid = []
+                    graph_label_max = []
+                    for graph in set.get_graphs():
+                        quartiles = graph.quartiles()
+                        minval, maxval = quartiles[0], quartiles[4]
+                        if graph.center is None:
+                            midval = (maxval + minval) / 2.0
+                            graph_label_min.append("%.3f" % minval)
+                            graph_label_max.append("%.3f" % maxval)
+                        else:
+                            diff = max((graph.center - minval), (maxval - graph.center))
+                            minval = graph.center - diff
+                            maxval = graph.center + diff
+                            midval = graph.center
+                            graph_label_mid.append("%.3f" % midval)
+                            graph_label_min.append("%.3f" % minval)
+                            graph_label_max.append("%.3f" % maxval)
+                    for fragment in range(
+                        start_f, end_f + 1
+                    ):  # Add to all used fragment axes
+                        tbtm = btm + self.fragment_lines[fragment][0]
+                        tctr = ctr + self.fragment_lines[fragment][0]
+                        ttop = top + self.fragment_lines[fragment][0]
+                        if fragment == start_f:
+                            x_left = start_x
+                        else:
+                            x_left = 0
+                        for val, pos in [
+                            (";".join(graph_label_min), tbtm),
+                            (";".join(graph_label_max), ttop),
+                            (";".join(graph_label_mid), tctr),
+                        ]:
+                            label = String(
+                                0,
+                                0,
+                                val,
+                                fontName=track.scale_font,
+                                fontSize=track.scale_fontsize,
+                                fillColor=track.scale_color,
+                            )
+                            labelgroup = Group(label)
+                            rotation = angle2trig(track.scale_fontangle)
+                            labelgroup.transform = (
+                                rotation[0],
+                                rotation[1],
+                                rotation[2],
+                                rotation[3],
+                                self.x0 + x_left,
+                                pos,
+                            )
+                            scale_labels.append(labelgroup)
+
+        return scale_elements, scale_labels
+
+    def draw_greytrack(self, track):
+        """Draw greytrack.
+
+        Arguments:
+         - track     Track object
+
+        Put in a grey background to the current track in all fragments,
+        if track specifies that we should.
+        """
+        greytrack_bgs = []  # Holds grey track backgrounds
+        greytrack_labels = []  # Holds grey foreground labels
+
+        if not track.greytrack:  # No greytrack required, return early
+            return [], []
+
+        # Get track location
+        btm, ctr, top = self.track_offsets[self.current_track_level]
+
+        start, end = self._current_track_start_end()
+        start_fragment, start_offset = self.canvas_location(start)
+        end_fragment, end_offset = self.canvas_location(end)
+
+        # Add greytrack to all fragments for this track
+        for fragment in range(start_fragment, end_fragment + 1):
+            tbtm = btm + self.fragment_lines[fragment][0]
+            tctr = ctr + self.fragment_lines[fragment][0]
+            ttop = top + self.fragment_lines[fragment][0]
+            if fragment == start_fragment:
+                x1 = self.x0 + start_offset
+            else:
+                x1 = self.x0
+            if fragment == end_fragment:
+                x2 = self.x0 + end_offset
+            else:
+                x2 = self.xlim
+            box = draw_box(
+                (x1, tbtm), (x2, ttop), colors.Color(0.96, 0.96, 0.96)  # Grey track bg
+            )  # is just a box
+            greytrack_bgs.append(box)
+
+            if track.greytrack_labels:  # If labels are required
+                # # how far apart should they be?
+                labelstep = self.pagewidth / track.greytrack_labels
+                label = String(
+                    0,
+                    0,
+                    track.name,  # label contents
+                    fontName=track.greytrack_font,
+                    fontSize=track.greytrack_fontsize,
+                    fillColor=track.greytrack_fontcolor,
+                )
+                # Create a new labelgroup at each position the label is required
+                for x in range(int(self.x0), int(self.xlim), int(labelstep)):
+                    if fragment == start_fragment and x < start_offset:
+                        continue
+                    if (
+                        fragment == end_fragment
+                        and end_offset < x + label.getBounds()[2]
+                    ):
+                        continue
+                    labelgroup = Group(label)
+                    rotation = angle2trig(track.greytrack_font_rotation)
+                    labelgroup.transform = (
+                        rotation[0],
+                        rotation[1],
+                        rotation[2],
+                        rotation[3],
+                        x,
+                        tbtm,
+                    )
+                    if not self.xlim - x <= labelstep:
+                        # Don't overlap the end of the track
+                        greytrack_labels.append(labelgroup)
+
+        return greytrack_bgs, greytrack_labels
+
+    def draw_feature_set(self, set):
+        """Draw feature set.
+
+        Arguments:
+         - set       FeatureSet object
+
+        Returns a tuple (list of elements describing features, list of
+        labels for elements).
+        """
+        # print("draw feature set")
+        feature_elements = []  # Holds diagram elements belonging to the features
+        label_elements = []  # Holds diagram elements belonging to feature labels
+
+        # Collect all the elements for the feature set
+        for feature in set.get_features():
+            if self.is_in_bounds(feature.start) or self.is_in_bounds(feature.end):
+                features, labels = self.draw_feature(feature)  # get elements and labels
+                feature_elements += features
+                label_elements += labels
+
+        return feature_elements, label_elements
+
+    def draw_feature(self, feature):
+        """Draw feature.
+
+        Arguments:
+         - feature           Feature containing location info
+
+        Returns tuple of (list of elements describing single feature, list
+        of labels for those elements).
+        """
+        if feature.hide:  # Feature hidden, don't draw it...
+            return [], []
+
+        feature_elements = []  # Holds diagram elements belonging to the feature
+        label_elements = []  # Holds labels belonging to the feature
+
+        start, end = self._current_track_start_end()
+        # A single feature may be split into subfeatures, so loop over them
+        for locstart, locend in feature.locations:
+            if locend < start:
+                continue
+            locstart = max(locstart, start)
+            if end < locstart:
+                continue
+            locend = min(locend, end)
+            feature_boxes = self.draw_feature_location(feature, locstart, locend)
+            for box, label in feature_boxes:
+                feature_elements.append(box)
+                if label is not None:
+                    label_elements.append(label)
+
+        return feature_elements, label_elements
+
+    def draw_feature_location(self, feature, locstart, locend):
+        """Draw feature location."""
+        feature_boxes = []
+        # Get start and end positions for feature/subfeatures
+        start_fragment, start_offset = self.canvas_location(locstart)
+        end_fragment, end_offset = self.canvas_location(locend)
+        # print("start_fragment, start_offset", start_fragment, start_offset)
+        # print("end_fragment, end_offset", end_fragment, end_offset)
+        # print("start, end", locstart, locend)
+
+        # Note that there is a strange situation where a feature may be in
+        # several parts, and one or more of those parts may end up being
+        # drawn on a non-existent fragment.  So we check that the start and
+        # end fragments do actually exist in terms of the drawing
+        allowed_fragments = list(self.fragment_limits.keys())
+        if start_fragment in allowed_fragments and end_fragment in allowed_fragments:
+            # print(feature.name, feature.start, feature.end, start_offset, end_offset)
+            if start_fragment == end_fragment:  # Feature is found on one fragment
+                feature_box, label = self.get_feature_sigil(
+                    feature, start_offset, end_offset, start_fragment
+                )
+                feature_boxes.append((feature_box, label))
+                # feature_elements.append(feature_box)
+                # if label is not None:   # There is a label for the feature
+                #    label_elements.append(label)
+            else:  # Feature is split over two or more fragments
+                fragment = start_fragment
+                start = start_offset
+                # The bit that runs up to the end of the first fragment,
+                # and any bits that subsequently span whole fragments
+                while self.fragment_limits[fragment][1] < locend:
+                    # print(fragment, self.fragment_limits[fragment][1], locend)
+                    feature_box, label = self.get_feature_sigil(
+                        feature, start, self.pagewidth, fragment
+                    )
+
+                    fragment += 1  # move to next fragment
+                    start = 0  # start next sigil from start of fragment
+                    feature_boxes.append((feature_box, label))
+                    # feature_elements.append(feature_box)
+                    # if label is not None:   # There's a label for the feature
+                    #    label_elements.append(label)
+                # The last bit of the feature
+                # print(locend, self.end, fragment)
+                # print(self.fragment_bases, self.length)
+                feature_box, label = self.get_feature_sigil(
+                    feature, 0, end_offset, fragment
+                )
+                feature_boxes.append((feature_box, label))
+        # if locstart > locend:
+        #    print(locstart, locend, feature.strand, feature_boxes, feature.name)
+        return feature_boxes
+
+    def draw_cross_link(self, cross_link):
+        """Draw cross-link between two features."""
+        startA = cross_link.startA
+        startB = cross_link.startB
+        endA = cross_link.endA
+        endB = cross_link.endB
+
+        if not self.is_in_bounds(startA) and not self.is_in_bounds(endA):
+            return None
+        if not self.is_in_bounds(startB) and not self.is_in_bounds(endB):
+            return None
+
+        if startA < self.start:
+            startA = self.start
+        if startB < self.start:
+            startB = self.start
+        if self.end < endA:
+            endA = self.end
+        if self.end < endB:
+            endB = self.end
+
+        trackobjA = cross_link._trackA(list(self._parent.tracks.values()))
+        trackobjB = cross_link._trackB(list(self._parent.tracks.values()))
+        assert trackobjA is not None
+        assert trackobjB is not None
+        if trackobjA == trackobjB:
+            raise NotImplementedError()
+
+        if trackobjA.start is not None:
+            if endA < trackobjA.start:
+                return
+            startA = max(startA, trackobjA.start)
+        if trackobjA.end is not None:
+            if trackobjA.end < startA:
+                return
+            endA = min(endA, trackobjA.end)
+        if trackobjB.start is not None:
+            if endB < trackobjB.start:
+                return
+            startB = max(startB, trackobjB.start)
+        if trackobjB.end is not None:
+            if trackobjB.end < startB:
+                return
+            endB = min(endB, trackobjB.end)
+
+        for track_level in self._parent.get_drawn_levels():
+            track = self._parent[track_level]
+            if track == trackobjA:
+                trackA = track_level
+            if track == trackobjB:
+                trackB = track_level
+        if trackA == trackB:
+            raise NotImplementedError()
+
+        strokecolor, fillcolor = _stroke_and_fill_colors(
+            cross_link.color, cross_link.border
+        )
+
+        allowed_fragments = list(self.fragment_limits.keys())
+
+        start_fragmentA, start_offsetA = self.canvas_location(startA)
+        end_fragmentA, end_offsetA = self.canvas_location(endA)
+        if (
+            start_fragmentA not in allowed_fragments
+            or end_fragmentA not in allowed_fragments
+        ):
+            return
+
+        start_fragmentB, start_offsetB = self.canvas_location(startB)
+        end_fragmentB, end_offsetB = self.canvas_location(endB)
+        if (
+            start_fragmentB not in allowed_fragments
+            or end_fragmentB not in allowed_fragments
+        ):
+            return
+
+        # TODO - Better drawing of flips when split between fragments
+
+        answer = []
+        for fragment in range(
+            min(start_fragmentA, start_fragmentB), max(end_fragmentA, end_fragmentB) + 1
+        ):
+            btmA, ctrA, topA = self.track_offsets[trackA]
+            btmA += self.fragment_lines[fragment][0]
+            ctrA += self.fragment_lines[fragment][0]
+            topA += self.fragment_lines[fragment][0]
+
+            btmB, ctrB, topB = self.track_offsets[trackB]
+            btmB += self.fragment_lines[fragment][0]
+            ctrB += self.fragment_lines[fragment][0]
+            topB += self.fragment_lines[fragment][0]
+
+            if self.fragment_limits[fragment][1] < endA:
+                xAe = self.x0 + self.pagewidth
+                crop_rightA = True
+            else:
+                xAe = self.x0 + end_offsetA
+                crop_rightA = False
+            if self.fragment_limits[fragment][1] < endB:
+                xBe = self.x0 + self.pagewidth
+                crop_rightB = True
+            else:
+                xBe = self.x0 + end_offsetB
+                crop_rightB = False
+
+            if fragment < start_fragmentA:
+                xAs = self.x0 + self.pagewidth
+                xAe = xAs
+                crop_leftA = False
+            elif fragment == start_fragmentA:
+                xAs = self.x0 + start_offsetA
+                crop_leftA = False
+            else:
+                xAs = self.x0
+                crop_leftA = True
+
+            if fragment < start_fragmentB:
+                xBs = self.x0 + self.pagewidth
+                xBe = xBs
+                crop_leftB = False
+            elif fragment == start_fragmentB:
+                xBs = self.x0 + start_offsetB
+                crop_leftB = False
+            else:
+                xBs = self.x0
+                crop_leftB = True
+
+            if ctrA < ctrB:
+                yA = topA
+                yB = btmB
+            else:
+                yA = btmA
+                yB = topB
+
+            if fragment < start_fragmentB or end_fragmentB < fragment:
+                if cross_link.flip:
+                    # Just draw A as a triangle to left/right
+                    if fragment < start_fragmentB:
+                        extra = [self.x0 + self.pagewidth, 0.5 * (yA + yB)]
+                    else:
+                        extra = [self.x0, 0.5 * (yA + yB)]
+                else:
+                    if fragment < start_fragmentB:
+                        extra = [
+                            self.x0 + self.pagewidth,
+                            0.7 * yA + 0.3 * yB,
+                            self.x0 + self.pagewidth,
+                            0.3 * yA + 0.7 * yB,
+                        ]
+                    else:
+                        extra = [
+                            self.x0,
+                            0.3 * yA + 0.7 * yB,
+                            self.x0,
+                            0.7 * yA + 0.3 * yB,
+                        ]
+                answer.append(
+                    Polygon(
+                        deduplicate([xAs, yA, xAe, yA] + extra),
+                        strokeColor=strokecolor,
+                        fillColor=fillcolor,
+                        # default is mitre/miter which can stick out too much:
+                        strokeLineJoin=1,  # 1=round
+                        strokewidth=0,
+                    )
+                )
+            elif fragment < start_fragmentA or end_fragmentA < fragment:
+                if cross_link.flip:
+                    # Just draw B as a triangle to left
+                    if fragment < start_fragmentA:
+                        extra = [self.x0 + self.pagewidth, 0.5 * (yA + yB)]
+                    else:
+                        extra = [self.x0, 0.5 * (yA + yB)]
+                else:
+                    if fragment < start_fragmentA:
+                        extra = [
+                            self.x0 + self.pagewidth,
+                            0.3 * yA + 0.7 * yB,
+                            self.x0 + self.pagewidth,
+                            0.7 * yA + 0.3 * yB,
+                        ]
+                    else:
+                        extra = [
+                            self.x0,
+                            0.7 * yA + 0.3 * yB,
+                            self.x0,
+                            0.3 * yA + 0.7 * yB,
+                        ]
+                answer.append(
+                    Polygon(
+                        deduplicate([xBs, yB, xBe, yB] + extra),
+                        strokeColor=strokecolor,
+                        fillColor=fillcolor,
+                        # default is mitre/miter which can stick out too much:
+                        strokeLineJoin=1,  # 1=round
+                        strokewidth=0,
+                    )
+                )
+            elif cross_link.flip and (
+                (crop_leftA and not crop_rightA) or (crop_leftB and not crop_rightB)
+            ):
+                # On left end of fragment... force "crossing" to margin
+                answer.append(
+                    Polygon(
+                        deduplicate(
+                            [
+                                xAs,
+                                yA,
+                                xAe,
+                                yA,
+                                self.x0,
+                                0.5 * (yA + yB),
+                                xBe,
+                                yB,
+                                xBs,
+                                yB,
+                            ]
+                        ),
+                        strokeColor=strokecolor,
+                        fillColor=fillcolor,
+                        # default is mitre/miter which can stick out too much:
+                        strokeLineJoin=1,  # 1=round
+                        strokewidth=0,
+                    )
+                )
+            elif cross_link.flip and (
+                (crop_rightA and not crop_leftA) or (crop_rightB and not crop_leftB)
+            ):
+                # On right end... force "crossing" to margin
+                answer.append(
+                    Polygon(
+                        deduplicate(
+                            [
+                                xAs,
+                                yA,
+                                xAe,
+                                yA,
+                                xBe,
+                                yB,
+                                xBs,
+                                yB,
+                                self.x0 + self.pagewidth,
+                                0.5 * (yA + yB),
+                            ]
+                        ),
+                        strokeColor=strokecolor,
+                        fillColor=fillcolor,
+                        # default is mitre/miter which can stick out too much:
+                        strokeLineJoin=1,  # 1=round
+                        strokewidth=0,
+                    )
+                )
+            elif cross_link.flip:
+                answer.append(
+                    Polygon(
+                        deduplicate([xAs, yA, xAe, yA, xBs, yB, xBe, yB]),
+                        strokeColor=strokecolor,
+                        fillColor=fillcolor,
+                        # default is mitre/miter which can stick out too much:
+                        strokeLineJoin=1,  # 1=round
+                        strokewidth=0,
+                    )
+                )
+            else:
+                answer.append(
+                    Polygon(
+                        deduplicate([xAs, yA, xAe, yA, xBe, yB, xBs, yB]),
+                        strokeColor=strokecolor,
+                        fillColor=fillcolor,
+                        # default is mitre/miter which can stick out too much:
+                        strokeLineJoin=1,  # 1=round
+                        strokewidth=0,
+                    )
+                )
+        return answer
+
+    def get_feature_sigil(self, feature, x0, x1, fragment, **kwargs):
+        """Get feature sigil.
+
+        Arguments:
+         - feature       Feature object
+         - x0            Start X co-ordinate on diagram
+         - x1            End X co-ordinate on diagram
+         - fragment      The fragment on which the feature appears
+
+        Returns a drawable indicator of the feature, and any required label
+        for it.
+        """
+        # Establish co-ordinates for drawing
+        x0, x1 = self.x0 + x0, self.x0 + x1
+        btm, ctr, top = self.track_offsets[self.current_track_level]
+        try:
+            btm += self.fragment_lines[fragment][0]
+            ctr += self.fragment_lines[fragment][0]
+            top += self.fragment_lines[fragment][0]
+        except Exception:  # Only called if the method screws up big time
+            print("We've got a screw-up")
+            print("%s %s" % (self.start, self.end))
+            print(self.fragment_bases)
+            print("%r %r" % (x0, x1))
+            for locstart, locend in feature.locations:
+                print(self.canvas_location(locstart))
+                print(self.canvas_location(locend))
+            print("FEATURE\n%s" % feature)
+            raise
+
+        # Distribution dictionary for various ways of drawing the feature
+        draw_methods = {
+            "BOX": self._draw_sigil_box,
+            "ARROW": self._draw_sigil_arrow,
+            "BIGARROW": self._draw_sigil_big_arrow,
+            "OCTO": self._draw_sigil_octo,
+            "JAGGY": self._draw_sigil_jaggy,
+        }
+
+        method = draw_methods[feature.sigil]
+        kwargs["head_length_ratio"] = feature.arrowhead_length
+        kwargs["shaft_height_ratio"] = feature.arrowshaft_height
+
+        # Support for clickable links... needs ReportLab 2.4 or later
+        # which added support for links in SVG output.
+        if hasattr(feature, "url"):
+            kwargs["hrefURL"] = feature.url
+            kwargs["hrefTitle"] = feature.name
+
+        # Get sigil for the feature, give it the bounding box straddling
+        # the axis (it decides strand specific placement)
+        sigil = method(
+            btm,
+            ctr,
+            top,
+            x0,
+            x1,
+            strand=feature.strand,
+            color=feature.color,
+            border=feature.border,
+            **kwargs
+        )
+
+        if feature.label_strand:
+            strand = feature.label_strand
+        else:
+            strand = feature.strand
+        if feature.label:  # Feature requires a label
+            label = String(
+                0,
+                0,
+                feature.name,
+                fontName=feature.label_font,
+                fontSize=feature.label_size,
+                fillColor=feature.label_color,
+            )
+            labelgroup = Group(label)
+            # Feature is on top, or covers both strands (location affects
+            # the height and rotation of the label)
+            if strand != -1:
+                rotation = angle2trig(feature.label_angle)
+                if feature.label_position in ("end", "3'", "right"):
+                    pos = x1
+                elif feature.label_position in ("middle", "center", "centre"):
+                    pos = (x1 + x0) / 2.0
+                else:
+                    # Default to start, i.e. 'start', "5'", 'left'
+                    pos = x0
+                labelgroup.transform = (
+                    rotation[0],
+                    rotation[1],
+                    rotation[2],
+                    rotation[3],
+                    pos,
+                    top,
+                )
+            else:  # Feature on bottom strand
+                rotation = angle2trig(feature.label_angle + 180)
+                if feature.label_position in ("end", "3'", "right"):
+                    pos = x0
+                elif feature.label_position in ("middle", "center", "centre"):
+                    pos = (x1 + x0) / 2.0
+                else:
+                    # Default to start, i.e. 'start', "5'", 'left'
+                    pos = x1
+                labelgroup.transform = (
+                    rotation[0],
+                    rotation[1],
+                    rotation[2],
+                    rotation[3],
+                    pos,
+                    btm,
+                )
+        else:
+            labelgroup = None
+        return sigil, labelgroup
+
+    def draw_graph_set(self, set):
+        """Draw graph set.
+
+        Arguments:
+         - set       GraphSet object
+
+        Returns tuple (list of graph elements, list of graph labels).
+        """
+        # print('draw graph set')
+        elements = []  # Holds graph elements
+
+        # Distribution dictionary for how to draw the graph
+        style_methods = {
+            "line": self.draw_line_graph,
+            "heat": self.draw_heat_graph,
+            "bar": self.draw_bar_graph,
+        }
+
+        for graph in set.get_graphs():
+            elements += style_methods[graph.style](graph)
+
+        return elements, []
+
+    def draw_line_graph(self, graph):
+        """Return a line graph as a list of drawable elements.
+
+        Arguments:
+         - graph     Graph object
+
+        """
+        # print('\tdraw_line_graph')
+        line_elements = []  # Holds drawable elements
+
+        # Get graph data
+        data_quartiles = graph.quartiles()
+        minval, maxval = data_quartiles[0], data_quartiles[4]
+        btm, ctr, top = self.track_offsets[self.current_track_level]
+        trackheight = 0.5 * (top - btm)
+        datarange = maxval - minval
+        if datarange == 0:
+            datarange = trackheight
+
+        start, end = self._current_track_start_end()
+        data = graph[start:end]
+
+        # midval is the value at which the x-axis is plotted, and is the
+        # central ring in the track
+        if graph.center is None:
+            midval = (maxval + minval) / 2.0
+        else:
+            midval = graph.center
+        # Whichever is the greatest difference: max-midval or min-midval, is
+        # taken to specify the number of pixel units resolved along the
+        # y-axis
+        resolution = max((midval - minval), (maxval - midval))
+
+        # Start from first data point
+        pos, val = data[0]
+        lastfrag, lastx = self.canvas_location(pos)
+        lastx += self.x0  # Start xy co-ords
+        lasty = (
+            trackheight * (val - midval) / resolution
+            + self.fragment_lines[lastfrag][0]
+            + ctr
+        )
+        lastval = val
+        # Add a series of lines linking consecutive data points
+        for pos, val in data:
+            frag, x = self.canvas_location(pos)
+            x += self.x0  # next xy co-ords
+            y = (
+                trackheight * (val - midval) / resolution
+                + self.fragment_lines[frag][0]
+                + ctr
+            )
+            if frag == lastfrag:  # Points on the same fragment: draw the line
+                line_elements.append(
+                    Line(
+                        lastx,
+                        lasty,
+                        x,
+                        y,
+                        strokeColor=graph.poscolor,
+                        strokeWidth=graph.linewidth,
+                    )
+                )
+            else:  # Points not on the same fragment, so interpolate
+                tempy = (
+                    trackheight * (val - midval) / resolution
+                    + self.fragment_lines[lastfrag][0]
+                    + ctr
+                )
+                line_elements.append(
+                    Line(
+                        lastx,
+                        lasty,
+                        self.xlim,
+                        tempy,
+                        strokeColor=graph.poscolor,
+                        strokeWidth=graph.linewidth,
+                    )
+                )
+                tempy = (
+                    trackheight * (val - midval) / resolution
+                    + self.fragment_lines[frag][0]
+                    + ctr
+                )
+                line_elements.append(
+                    Line(
+                        self.x0,
+                        tempy,
+                        x,
+                        y,
+                        strokeColor=graph.poscolor,
+                        strokeWidth=graph.linewidth,
+                    )
+                )
+            lastfrag, lastx, lasty, lastval = frag, x, y, val
+
+        return line_elements
+
+    def draw_heat_graph(self, graph):
+        """Return a list of drawable elements for the heat graph."""
+        # print('\tdraw_heat_graph')
+        # At each point contained in the graph data, we draw a box that is the
+        # full height of the track, extending from the midpoint between the
+        # previous and current data points to the midpoint between the current
+        # and next data points
+        heat_elements = []  # Holds drawable elements for the graph
+
+        # Get graph data and information
+        data_quartiles = graph.quartiles()
+        minval, maxval = data_quartiles[0], data_quartiles[4]
+        midval = (maxval + minval) / 2.0  # mid is the value at the X-axis
+        btm, ctr, top = self.track_offsets[self.current_track_level]
+        trackheight = top - btm
+
+        start, end = self._current_track_start_end()
+        data = intermediate_points(start, end, graph[start:end])
+
+        if not data:
+            return []
+
+        # Create elements on the graph, indicating a large positive value by
+        # the graph's poscolor, and a large negative value by the graph's
+        # negcolor attributes
+        for pos0, pos1, val in data:
+            # assert start <= pos0 <= pos1 <= end
+            fragment0, x0 = self.canvas_location(pos0)
+            fragment1, x1 = self.canvas_location(pos1)
+            x0, x1 = self.x0 + x0, self.x0 + x1  # account for margin
+            # print('x1 before:', x1)
+
+            # Calculate the heat color, based on the differential between
+            # the value and the median value
+            heat = colors.linearlyInterpolatedColor(
+                graph.poscolor, graph.negcolor, maxval, minval, val
+            )
+
+            # Draw heat box
+            if fragment0 == fragment1:  # Box is contiguous on one fragment
+                if pos1 >= self.fragment_limits[fragment0][1]:
+                    x1 = self.xlim
+                ttop = top + self.fragment_lines[fragment0][0]
+                tbtm = btm + self.fragment_lines[fragment0][0]
+                # print('equal', pos0, pos1, val)
+                # print(pos0, pos1, fragment0, fragment1)
+                heat_elements.append(
+                    draw_box((x0, tbtm), (x1, ttop), color=heat, border=None)
+                )
+            else:  # box is split over two or more fragments
+                # if pos0 >= self.fragment_limits[fragment0][0]:
+                #    fragment0 += 1
+                fragment = fragment0
+                start_x = x0
+                while self.fragment_limits[fragment][1] <= pos1:
+                    # print(pos0, self.fragment_limits[fragment][1], pos1)
+                    ttop = top + self.fragment_lines[fragment][0]
+                    tbtm = btm + self.fragment_lines[fragment][0]
+                    heat_elements.append(
+                        draw_box(
+                            (start_x, tbtm), (self.xlim, ttop), color=heat, border=None
+                        )
+                    )
+                    fragment += 1
+                    start_x = self.x0
+                ttop = top + self.fragment_lines[fragment][0]
+                tbtm = btm + self.fragment_lines[fragment][0]
+                # Add the last part of the bar
+                # print('x1 after:', x1, '\n')
+                heat_elements.append(
+                    draw_box((self.x0, tbtm), (x1, ttop), color=heat, border=None)
+                )
+
+        return heat_elements
+
+    def draw_bar_graph(self, graph):
+        """Return list of drawable elements for a bar graph."""
+        # print('\tdraw_bar_graph')
+        # At each point contained in the graph data, we draw a vertical bar
+        # from the track center to the height of the datapoint value (positive
+        # values go up in one color, negative go down in the alternative
+        # color).
+        bar_elements = []  # Holds drawable elements for the graph
+
+        # Set the number of pixels per unit for the data
+        data_quartiles = graph.quartiles()
+        minval, maxval = data_quartiles[0], data_quartiles[4]
+        btm, ctr, top = self.track_offsets[self.current_track_level]
+        trackheight = 0.5 * (top - btm)
+        datarange = maxval - minval
+        if datarange == 0:
+            datarange = trackheight
+        data = graph[self.start : self.end]
+        # midval is the value at which the x-axis is plotted, and is the
+        # central ring in the track
+        if graph.center is None:
+            midval = (maxval + minval) / 2.0
+        else:
+            midval = graph.center
+
+        # Convert data into 'binned' blocks, covering half the distance to the
+        # next data point on either side, accounting for the ends of fragments
+        # and tracks
+        start, end = self._current_track_start_end()
+        data = intermediate_points(start, end, graph[start:end])
+
+        if not data:
+            return []
+
+        # Whichever is the greatest difference: max-midval or min-midval, is
+        # taken to specify the number of pixel units resolved along the
+        # y-axis
+        resolution = max((midval - minval), (maxval - midval))
+        if resolution == 0:
+            resolution = trackheight
+
+        # Create elements for the bar graph based on newdata
+        for pos0, pos1, val in data:
+            fragment0, x0 = self.canvas_location(pos0)
+            fragment1, x1 = self.canvas_location(pos1)
+            x0, x1 = self.x0 + x0, self.x0 + x1  # account for margin
+            barval = trackheight * (val - midval) / resolution
+            if barval >= 0:  # Different colors for bars that extend above...
+                barcolor = graph.poscolor
+            else:  # ...or below the axis
+                barcolor = graph.negcolor
+
+            # Draw bar
+            if fragment0 == fragment1:  # Box is contiguous
+                if pos1 >= self.fragment_limits[fragment0][1]:
+                    x1 = self.xlim
+                tctr = ctr + self.fragment_lines[fragment0][0]
+                barval += tctr
+                bar_elements.append(draw_box((x0, tctr), (x1, barval), color=barcolor))
+            else:  # Box is split over two or more fragments
+                fragment = fragment0
+                # if pos0 >= self.fragment_limits[fragment0][0]:
+                #    fragment += 1
+                start = x0
+                while self.fragment_limits[fragment][1] < pos1:
+                    tctr = ctr + self.fragment_lines[fragment][0]
+                    thisbarval = barval + tctr
+                    bar_elements.append(
+                        draw_box((start, tctr), (self.xlim, thisbarval), color=barcolor)
+                    )
+                    fragment += 1
+                    start = self.x0
+                tctr = ctr + self.fragment_lines[fragment1][0]
+                barval += tctr
+                # Add the last part of the bar
+                bar_elements.append(
+                    draw_box((self.x0, tctr), (x1, barval), color=barcolor)
+                )
+
+        return bar_elements
+
+    def canvas_location(self, base):
+        """Canvas location of a base on the genome.
+
+        Arguments:
+         - base      The base number on the genome sequence
+
+        Returns the x-coordinate and fragment number of a base on the
+        genome sequence, in the context of the current drawing setup
+        """
+        base = int(base - self.start)  # number of bases we are from the start
+        fragment = int(base / self.fragment_bases)
+        if fragment < 1:  # First fragment
+            base_offset = base
+            fragment = 0
+        elif fragment >= self.fragments:
+            fragment = self.fragments - 1
+            base_offset = self.fragment_bases
+        else:  # Calculate number of bases from start of fragment
+            base_offset = base % self.fragment_bases
+        assert fragment < self.fragments, (
+            base,
+            self.start,
+            self.end,
+            self.length,
+            self.fragment_bases,
+        )
+        # Calculate number of pixels from start of fragment
+        x_offset = 1.0 * self.pagewidth * base_offset / self.fragment_bases
+        return fragment, x_offset
+
+    def _draw_sigil_box(self, bottom, center, top, x1, x2, strand, **kwargs):
+        """Draw BOX sigil (PRIVATE)."""
+        if strand == 1:
+            y1 = center
+            y2 = top
+        elif strand == -1:
+            y1 = bottom
+            y2 = center
+        else:
+            y1 = bottom
+            y2 = top
+        return draw_box((x1, y1), (x2, y2), **kwargs)
+
+    def _draw_sigil_octo(self, bottom, center, top, x1, x2, strand, **kwargs):
+        """Draw OCTO sigil, a box with the corners cut off (PRIVATE)."""
+        if strand == 1:
+            y1 = center
+            y2 = top
+        elif strand == -1:
+            y1 = bottom
+            y2 = center
+        else:
+            y1 = bottom
+            y2 = top
+        return draw_cut_corner_box((x1, y1), (x2, y2), **kwargs)
+
+    def _draw_sigil_jaggy(
+        self, bottom, center, top, x1, x2, strand, color, border=None, **kwargs
+    ):
+        """Draw JAGGY sigil (PRIVATE).
+
+        Although we may in future expose the head/tail jaggy lengths, for now
+        both the left and right edges are drawn jagged.
+        """
+        if strand == 1:
+            y1 = center
+            y2 = top
+            teeth = 2
+        elif strand == -1:
+            y1 = bottom
+            y2 = center
+            teeth = 2
+        else:
+            y1 = bottom
+            y2 = top
+            teeth = 4
+
+        xmin = min(x1, x2)
+        xmax = max(x1, x2)
+        height = y2 - y1
+        boxwidth = x2 - x1
+        tooth_length = min(height / teeth, boxwidth * 0.5)
+
+        headlength = tooth_length
+        taillength = tooth_length
+
+        strokecolor, color = _stroke_and_fill_colors(color, border)
+
+        points = []
+        for i in range(teeth):
+            points.extend(
+                (
+                    xmin,
+                    y1 + i * height / teeth,
+                    xmin + taillength,
+                    y1 + (i + 1) * height / teeth,
+                )
+            )
+        for i in range(teeth):
+            points.extend(
+                (
+                    xmax,
+                    y1 + (teeth - i) * height / teeth,
+                    xmax - headlength,
+                    y1 + (teeth - i - 1) * height / teeth,
+                )
+            )
+
+        return Polygon(
+            deduplicate(points),
+            strokeColor=strokecolor,
+            strokeWidth=1,
+            strokeLineJoin=1,  # 1=round
+            fillColor=color,
+            **kwargs
+        )
+
+    def _draw_sigil_arrow(self, bottom, center, top, x1, x2, strand, **kwargs):
+        """Draw ARROW sigil (PRIVATE)."""
+        if strand == 1:
+            y1 = center
+            y2 = top
+            orientation = "right"
+        elif strand == -1:
+            y1 = bottom
+            y2 = center
+            orientation = "left"
+        else:
+            y1 = bottom
+            y2 = top
+            orientation = "right"  # backward compatibility
+        return draw_arrow((x1, y1), (x2, y2), orientation=orientation, **kwargs)
+
+    def _draw_sigil_big_arrow(self, bottom, center, top, x1, x2, strand, **kwargs):
+        """Draw BIGARROW sigil, like ARROW but straddles the axis (PRIVATE)."""
+        if strand == -1:
+            orientation = "left"
+        else:
+            orientation = "right"
+        return draw_arrow((x1, bottom), (x2, top), orientation=orientation, **kwargs)
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/_Track.py b/code/lib/Bio/Graphics/GenomeDiagram/_Track.py
new file mode 100644
index 0000000..a6c67f9
--- /dev/null
+++ b/code/lib/Bio/Graphics/GenomeDiagram/_Track.py
@@ -0,0 +1,285 @@
+# Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Contact:       Leighton Pritchard, The James Hutton Institute,
+#                Invergowrie, Dundee, Scotland, DD2 5DA, UK
+#                Leighton.Pritchard@hutton.ac.uk
+################################################################################
+
+"""Track module.
+
+Provides:
+ - Track - Container for a single track on the diagram, containing
+   FeatureSet and GraphSet objects
+
+For drawing capabilities, this module uses reportlab to draw and write
+the diagram: http://www.reportlab.com
+"""
+
+
+from reportlab.lib import colors
+
+# GenomeDiagram imports
+from ._FeatureSet import FeatureSet
+from ._GraphSet import GraphSet
+
+_grey = colors.Color(0.6, 0.6, 0.6)
+
+
+class Track:
+    """Track.
+
+    Attributes:
+     - height    Int describing the relative height to other trackscale_fontsizes
+       in the diagram
+     - name      String describing the track
+     - hide      Boolean, 0 if the track is not to be drawn
+     - start, end    Integers (or None) specifying start/end to draw just
+       a partial track.
+     - greytrack     Boolean, 1 if a grey background to the track is to be
+       drawn
+     - greytrack_labels  Int describing how many track-identifying labels
+       should be placed on the track at regular intervals
+     - greytrack_font    String describing the font to use for the greytrack
+       labels
+     - greytrack_fontsize    Int describing the font size to display the
+       labels on the grey track
+     - greytrack_font_rotation   Int describing the angle through which to
+       rotate the grey track labels (Linear only)
+     - greytrack_font_color     colors.Color describing the color to draw
+       the grey track labels
+     - scale     Boolean, 1 if a scale is to be drawn on the track
+     - scale_format  String, defaults to None, when scale values are written
+       as numerals.  Setting this to 'SInt' invokes SI
+       unit-like multiples, such as Mbp, Kbp and so on.
+     - scale_color  colors.Color to draw the elements of the scale
+     - scale_font    String describing the font to use for the scale labels
+     - scale_fontsize    Int describing the size of the scale label font
+     - scale_fontangle   Int describing the angle at which to draw the scale
+       labels (linear only)
+     - scale_ticks       Boolean, 1 if ticks should be drawn at all on the
+       scale
+     - scale_largeticks  Float (0->1) describing the height of large
+       scale ticks relative to the track height.
+     - scale_smallticks  Float (0->1) describing the height of large
+       scale ticks relative to the track height.
+     - scale_largetick_interval  Int, describing the number of bases that
+       should separate large ticks
+     - scale_smalltick_interval  Int, describing the number of bases that
+       should separate small ticks
+     - scale_largetick_labels    Boolean describing whether position labels
+       should be written over large ticks
+     - scale_smalltick_labels    Boolean describing whether position labels
+       should be written over small ticks
+     - axis_labels       Boolean describing whether the value labels should
+       be placed on the Y axes
+
+    """
+
+    def __init__(
+        self,
+        name=None,
+        height=1,
+        hide=0,
+        greytrack=0,
+        greytrack_labels=5,
+        greytrack_fontsize=8,
+        greytrack_font="Helvetica",
+        greytrack_font_rotation=0,
+        greytrack_font_color=_grey,
+        scale=1,
+        scale_format=None,
+        scale_color=colors.black,
+        scale_font="Helvetica",
+        scale_fontsize=6,
+        scale_fontangle=45,
+        scale_largeticks=0.5,
+        scale_ticks=1,
+        scale_smallticks=0.3,
+        scale_largetick_interval=1e6,
+        scale_smalltick_interval=1e4,
+        scale_largetick_labels=1,
+        scale_smalltick_labels=0,
+        axis_labels=1,
+        start=None,
+        end=None,
+        greytrack_font_colour=None,
+        scale_colour=None,
+    ):
+        """Initialize.
+
+        Arguments:
+         - height    Int describing the relative height to other tracks in the
+           diagram
+         - name      String describing the track
+         - hide      Boolean, 0 if the track is not to be drawn
+         - greytrack     Boolean, 1 if a grey background to the track is to be
+           drawn
+         - greytrack_labels  Int describing how many track-identifying labels
+           should be placed on the track at regular intervals
+         - greytrack_font    String describing the font to use for the greytrack
+           labels
+         - greytrack_fontsize    Int describing the font size to display the
+           labels on the grey track
+         - greytrack_font_rotation   Int describing the angle through which to
+           rotate the grey track labels (Linear only)
+         - greytrack_font_color     colors.Color describing the color to draw
+           the grey track labels (overridden by backwards compatible argument
+           with UK spelling, colour).
+         - scale     Boolean, 1 if a scale is to be drawn on the track
+         - scale_color  colors.Color to draw the elements of the scale
+           (overridden by backwards compatible argument with UK
+           spelling, colour).
+         - scale_font    String describing the font to use for the scale labels
+         - scale_fontsize    Int describing the size of the scale label font
+         - scale_fontangle   Int describing the angle at which to draw the scale
+           labels (linear only)
+         - scale_ticks       Boolean, 1 if ticks should be drawn at all on the
+           scale
+         - scale_largeticks  Float (0->1) describing the height of large
+           scale ticks relative to the track height.
+         - scale_smallticks  Float (0->1) describing the height of large
+           scale ticks relative to the track height.
+         - scale_largetick_interval  Int, describing the number of bases that
+           should separate large ticks
+         - scale_smalltick_interval  Int, describing the number of bases that
+           should separate small ticks
+         - scale_largetick_labels    Boolean describing whether position labels
+           should be written over large ticks
+         - scale_smalltick_labels    Boolean describing whether position labels
+           should be written over small ticks
+         - name          String to help identify the track
+         - height        Relative height to draw the track
+         - axis_labels       Boolean describing whether the value labels should
+           be placed on the Y axes
+
+        """
+        # Let the UK spelling (colour) override the USA spelling (color)
+        if greytrack_font_colour is not None:
+            greytrack_font_color = greytrack_font_colour
+        if scale_colour is not None:
+            scale_color = scale_colour
+
+        self._next_id = 0  # This will count sets as they are added to the track
+        self._sets = {}  # Holds sets, keyed by unique ID
+
+        # Assign attribute values from instantiation
+        self.height = height
+        if name is not None:
+            self.name = str(name)
+        else:
+            self.name = "Track"
+        self.hide = hide
+        self.start = start
+        self.end = end
+
+        # Attributes for the grey track background and labels
+        self.greytrack = greytrack
+        self.greytrack_labels = greytrack_labels
+        self.greytrack_fontsize = greytrack_fontsize
+        self.greytrack_font = greytrack_font
+        self.greytrack_font_rotation = greytrack_font_rotation
+        self.greytrack_fontcolor = greytrack_font_color
+
+        # Attributes for the track scale
+        self.scale = scale
+        self.scale_format = scale_format
+        self.scale_color = scale_color
+        self.scale_font = scale_font
+        self.scale_fontsize = scale_fontsize
+        self.scale_fontangle = scale_fontangle
+        self.scale_ticks = scale_ticks
+        self.scale_largeticks = scale_largeticks
+        self.scale_smallticks = scale_smallticks
+        self.scale_largetick_interval = scale_largetick_interval
+        self.scale_smalltick_interval = scale_smalltick_interval
+        self.scale_largetick_labels = scale_largetick_labels
+        self.scale_smalltick_labels = scale_smalltick_labels
+        self.axis_labels = axis_labels
+
+    def add_set(self, set):
+        """Add a preexisting FeatureSet or GraphSet object to the track."""
+        set.id = self._next_id  # Assign unique id to set
+        set.parent = self  # Make set's parent this track
+        self._sets[self._next_id] = set  # Add set, keyed by unique id
+        self._next_id += 1  # Increment unique set ids
+
+    def new_set(self, type="feature", **args):
+        """Create a new FeatureSet or GraphSet object.
+
+        Create a new FeatureSet or GraphSet object, add it to the
+        track, and return for user manipulation
+        """
+        type_dict = {"feature": FeatureSet, "graph": GraphSet}
+        set = type_dict[type]()
+        for key in args:
+            setattr(set, key, args[key])
+        set.id = self._next_id  # Assign unique id to set
+        set.parent = self  # Make set's parent this track
+        self._sets[self._next_id] = set  # Add set, keyed by unique id
+        self._next_id += 1  # Increment unique set ids
+        return set
+
+    def del_set(self, set_id):
+        """Remove the set with the passed id from the track."""
+        del self._sets[set_id]
+
+    def get_sets(self):
+        """Return the sets contained in this track."""
+        return list(self._sets.values())
+
+    def get_ids(self):
+        """Return the ids of all sets contained in this track."""
+        return list(self._sets.keys())
+
+    def range(self):
+        """Return the lowest and highest base (or mark) numbers as a tuple."""
+        lows, highs = [], []  # Holds set of low and high values from sets
+        if self.start is not None:
+            lows.append(self.start)
+        if self.end is not None:
+            highs.append(self.end)
+        for set in self._sets.values():
+            low, high = set.range()  # Get each set range
+            lows.append(low)
+            highs.append(high)
+        if lows:
+            low = min(lows)
+        else:
+            low = None
+        if highs:
+            high = max(highs)
+        else:
+            high = None
+        return low, high  # Return lowest and highest values
+
+    def to_string(self, verbose=0):
+        """Return a formatted string with information about the track.
+
+        Arguments:
+         - verbose - Boolean indicating whether a short or complete
+           account of the track is required
+
+        """
+        if not verbose:  # Return the short description
+            return "%s" % self  # Use __str__ method instead
+        else:  # Return the long description
+            outstr = ["\n<%s: %s>" % (self.__class__, self.name)]
+            outstr.append("%d sets" % len(self._sets))
+            for key in self._sets:
+                outstr.append("set: %s" % self._sets[key])
+            return "\n".join(outstr)
+
+    def __getitem__(self, key):
+        """Return the set with the passed id."""
+        return self._sets[key]
+
+    def __str__(self):
+        """Return a formatted string with information about the Track."""
+        outstr = ["\n<%s: %s>" % (self.__class__, self.name)]
+        outstr.append("%d sets" % len(self._sets))
+        return "\n".join(outstr)
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__init__.py b/code/lib/Bio/Graphics/GenomeDiagram/__init__.py
new file mode 100644
index 0000000..ca40d28
--- /dev/null
+++ b/code/lib/Bio/Graphics/GenomeDiagram/__init__.py
@@ -0,0 +1,37 @@
+# Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
+# Revisions copyright 2009 by Peter Cock.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Contact:       Leighton Pritchard, The James Hutton Institute,
+#                Invergowrie, Dundee, Scotland, DD2 5DA, UK
+#                Leighton.Pritchard@hutton.ac.uk
+# #############################################################################
+
+"""GenomeDiagram module integrated into Biopython."""
+
+# Local imports, to make these classes available directly under the
+# Bio.Graphics.GenomeDiagram namespace:
+
+from ._Diagram import Diagram
+from ._Track import Track
+from ._FeatureSet import FeatureSet
+from ._GraphSet import GraphSet
+from ._CrossLink import CrossLink
+from ._Colors import ColorTranslator
+from ._Feature import Feature
+from ._Graph import GraphData
+
+__all__ = (
+    "Diagram",
+    "Track",
+    "FeatureSet",
+    "Feature",
+    "GraphSet",
+    "GraphData",
+    "CrossLink",
+    "ColorTranslator",
+)
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_AbstractDrawer.cpython-37.pyc b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_AbstractDrawer.cpython-37.pyc
new file mode 100644
index 0000000..b0e8b84
Binary files /dev/null and b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_AbstractDrawer.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_CircularDrawer.cpython-37.pyc b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_CircularDrawer.cpython-37.pyc
new file mode 100644
index 0000000..e580415
Binary files /dev/null and b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_CircularDrawer.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Colors.cpython-37.pyc b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Colors.cpython-37.pyc
new file mode 100644
index 0000000..a8989e5
Binary files /dev/null and b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Colors.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_CrossLink.cpython-37.pyc b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_CrossLink.cpython-37.pyc
new file mode 100644
index 0000000..2b61193
Binary files /dev/null and b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_CrossLink.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Diagram.cpython-37.pyc b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Diagram.cpython-37.pyc
new file mode 100644
index 0000000..9ae04f2
Binary files /dev/null and b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Diagram.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Feature.cpython-37.pyc b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Feature.cpython-37.pyc
new file mode 100644
index 0000000..4dab43a
Binary files /dev/null and b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Feature.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_FeatureSet.cpython-37.pyc b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_FeatureSet.cpython-37.pyc
new file mode 100644
index 0000000..ac57f08
Binary files /dev/null and b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_FeatureSet.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Graph.cpython-37.pyc b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Graph.cpython-37.pyc
new file mode 100644
index 0000000..3aecb02
Binary files /dev/null and b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Graph.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_GraphSet.cpython-37.pyc b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_GraphSet.cpython-37.pyc
new file mode 100644
index 0000000..e9a7dc7
Binary files /dev/null and b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_GraphSet.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_LinearDrawer.cpython-37.pyc b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_LinearDrawer.cpython-37.pyc
new file mode 100644
index 0000000..bd7db74
Binary files /dev/null and b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_LinearDrawer.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Track.cpython-37.pyc b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Track.cpython-37.pyc
new file mode 100644
index 0000000..959ca99
Binary files /dev/null and b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Track.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..6f04517
Binary files /dev/null and b/code/lib/Bio/Graphics/GenomeDiagram/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/KGML_vis.py b/code/lib/Bio/Graphics/KGML_vis.py
new file mode 100644
index 0000000..9a09086
--- /dev/null
+++ b/code/lib/Bio/Graphics/KGML_vis.py
@@ -0,0 +1,443 @@
+# Copyright 2013 Leighton Pritchard.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Classes and functions to visualise a KGML Pathway Map.
+
+The KGML definition is as of release KGML v0.7.1
+(http://www.kegg.jp/kegg/xml/docs/)
+
+Classes:
+"""
+
+
+import os
+import tempfile
+from io import BytesIO
+
+try:
+    from reportlab.lib import colors
+    from reportlab.pdfgen import canvas
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Install reportlab if you want to use KGML_vis."
+    ) from None
+
+try:
+    from PIL import Image
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Install pillow if you want to use KGML_vis."
+    ) from None
+
+from urllib.request import urlopen
+
+from Bio.KEGG.KGML.KGML_pathway import Pathway
+
+
+def darken(color, factor=0.7):
+    """Return darkened color as a ReportLab RGB color.
+
+    Take a passed color and returns a Reportlab color that is darker by the
+    factor indicated in the parameter.
+    """
+    newcol = color_to_reportlab(color)
+    for a in ["red", "green", "blue"]:
+        setattr(newcol, a, factor * getattr(newcol, a))
+    return newcol
+
+
+def color_to_reportlab(color):
+    """Return the passed color in Reportlab Color format.
+
+    We allow colors to be specified as hex values, tuples, or Reportlab Color
+    objects, and with or without an alpha channel. This function acts as a
+    Rosetta stone for conversion of those formats to a Reportlab Color
+    object, with alpha value.
+
+    Any other color specification is returned directly
+    """
+    # Reportlab Color objects are in the format we want already
+    if isinstance(color, colors.Color):
+        return color
+    elif isinstance(color, str):  # String implies hex color
+        if color.startswith("0x"):  # Standardise to octothorpe
+            color.replace("0x", "#")
+        if len(color) == 7:
+            return colors.HexColor(color)
+        else:
+            try:
+                return colors.HexColor(color, hasAlpha=True)
+            except TypeError:  # Catch pre-2.7 Reportlab
+                raise RuntimeError(
+                    "Your reportlab seems to be too old, try 2.7 onwards"
+                ) from None
+    elif isinstance(color, tuple):  # Tuple implies RGB(alpha) tuple
+        return colors.Color(*color)
+    return color
+
+
+def get_temp_imagefilename(url):
+    """Return filename of temporary file containing downloaded image.
+
+    Create a new temporary file to hold the image file at the passed URL
+    and return the filename.
+    """
+    img = urlopen(url).read()
+    im = Image.open(BytesIO(img))
+    # im.transpose(Image.FLIP_TOP_BOTTOM)
+    f = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
+    fname = f.name
+    f.close()
+    im.save(fname, "PNG")
+    return fname
+
+
+class KGMLCanvas:
+    """Reportlab Canvas-based representation of a KGML pathway map."""
+
+    def __init__(
+        self,
+        pathway,
+        import_imagemap=False,
+        label_compounds=True,
+        label_orthologs=True,
+        label_reaction_entries=True,
+        label_maps=True,
+        show_maps=False,
+        fontname="Helvetica",
+        fontsize=6,
+        draw_relations=True,
+        show_orthologs=True,
+        show_compounds=True,
+        show_genes=True,
+        show_reaction_entries=True,
+        margins=(0.02, 0.02),
+    ):
+        """Initialize the class."""
+        self.pathway = pathway
+        self.show_maps = show_maps
+        self.show_orthologs = show_orthologs
+        self.show_compounds = show_compounds
+        self.show_genes = show_genes
+        self.show_reaction_entries = show_reaction_entries
+        self.label_compounds = label_compounds
+        self.label_orthologs = label_orthologs
+        self.label_reaction_entries = label_reaction_entries
+        self.label_maps = label_maps
+        self.fontname = fontname
+        self.fontsize = fontsize
+        self.draw_relations = draw_relations
+        self.non_reactant_transparency = 0.3
+        self.import_imagemap = import_imagemap  # Import the map .png from URL
+        # percentage of canvas that will be margin in on either side in the
+        # X and Y directions
+        self.margins = margins
+
+    def draw(self, filename):
+        """Add the map elements to the drawing."""
+        # Instantiate the drawing, first
+        # size x_max, y_max for now - we can add margins, later
+        if self.import_imagemap:
+            # We're drawing directly on the image, so we set the canvas to the
+            # same size as the image
+            if os.path.isfile(self.pathway.image):
+                imfilename = self.pathway.image
+            else:
+                imfilename = get_temp_imagefilename(self.pathway.image)
+            im = Image.open(imfilename)
+            cwidth, cheight = im.size
+        else:
+            # No image, so we set the canvas size to accommodate visible
+            # elements
+            cwidth, cheight = (self.pathway.bounds[1][0], self.pathway.bounds[1][1])
+        # Instantiate canvas
+        self.drawing = canvas.Canvas(
+            filename,
+            bottomup=0,
+            pagesize=(
+                cwidth * (1 + 2 * self.margins[0]),
+                cheight * (1 + 2 * self.margins[1]),
+            ),
+        )
+        self.drawing.setFont(self.fontname, self.fontsize)
+        # Transform the canvas to add the margins
+        self.drawing.translate(
+            self.margins[0] * self.pathway.bounds[1][0],
+            self.margins[1] * self.pathway.bounds[1][1],
+        )
+        # Add the map image, if required
+        if self.import_imagemap:
+            self.drawing.saveState()
+            self.drawing.scale(1, -1)
+            self.drawing.translate(0, -cheight)
+            self.drawing.drawImage(imfilename, 0, 0)
+            self.drawing.restoreState()
+        # Add the reactions, compounds and maps
+        # Maps go on first, to be overlaid by more information.
+        # By default, they're slightly transparent.
+        if self.show_maps:
+            self.__add_maps()
+        if self.show_reaction_entries:
+            self.__add_reaction_entries()
+        if self.show_orthologs:
+            self.__add_orthologs()
+        if self.show_compounds:
+            self.__add_compounds()
+        if self.show_genes:
+            self.__add_genes()
+        # TODO: complete draw_relations code
+        # if self.draw_relations:
+        #    self.__add_relations()
+        # Write the pathway map to PDF
+        self.drawing.save()
+
+    def __add_maps(self):
+        """Add maps to the drawing of the map (PRIVATE).
+
+        We do this first, as they're regional labels to be overlaid by
+        information.  Also, we want to set the color to something subtle.
+
+        We're using Hex colors because that's what KGML uses, and
+        Reportlab doesn't mind.
+        """
+        for m in self.pathway.maps:
+            for g in m.graphics:
+                self.drawing.setStrokeColor("#888888")
+                self.drawing.setFillColor("#DDDDDD")
+                self.__add_graphics(g)
+                if self.label_maps:
+                    self.drawing.setFillColor("#888888")
+                    self.__add_labels(g)
+
+    def __add_graphics(self, graphics):
+        """Add the passed graphics object to the map (PRIVATE).
+
+        Add text, add after the graphics object, for sane Z-ordering.
+        """
+        if graphics.type == "line":
+            p = self.drawing.beginPath()
+            x, y = graphics.coords[0]
+            # There are optional settings for lines that aren't necessarily
+            # part of the KGML DTD
+            if graphics.width is not None:
+                self.drawing.setLineWidth(graphics.width)
+            else:
+                self.drawing.setLineWidth(1)
+            p.moveTo(x, y)
+            for (x, y) in graphics.coords:
+                p.lineTo(x, y)
+            self.drawing.drawPath(p)
+            self.drawing.setLineWidth(1)  # Return to default
+        # KGML defines the (x, y) coordinates as the centre of the circle/
+        # rectangle/roundrectangle, but Reportlab uses the co-ordinates of the
+        # lower-left corner for rectangle/elif.
+        if graphics.type == "circle":
+            self.drawing.circle(
+                graphics.x, graphics.y, graphics.width * 0.5, stroke=1, fill=1
+            )
+        elif graphics.type == "roundrectangle":
+            self.drawing.roundRect(
+                graphics.x - graphics.width * 0.5,
+                graphics.y - graphics.height * 0.5,
+                graphics.width,
+                graphics.height,
+                min(graphics.width, graphics.height) * 0.1,
+                stroke=1,
+                fill=1,
+            )
+        elif graphics.type == "rectangle":
+            self.drawing.rect(
+                graphics.x - graphics.width * 0.5,
+                graphics.y - graphics.height * 0.5,
+                graphics.width,
+                graphics.height,
+                stroke=1,
+                fill=1,
+            )
+
+    def __add_labels(self, graphics):
+        """Add labels for the passed graphics objects to the map (PRIVATE).
+
+        We don't check that the labels fit inside objects such as circles/
+        rectangles/roundrectangles.
+        """
+        if graphics.type == "line":
+            # We use the midpoint of the line - sort of - we take the median
+            # line segment (list-wise, not in terms of length), and use the
+            # midpoint of that line.  We could have other options here,
+            # maybe even parameterising it to a proportion of the total line
+            # length.
+            mid_idx = len(graphics.coords) * 0.5
+            if not int(mid_idx) == mid_idx:
+                idx1, idx2 = int(mid_idx - 0.5), int(mid_idx + 0.5)
+            else:
+                idx1, idx2 = int(mid_idx - 1), int(mid_idx)
+            x1, y1 = graphics.coords[idx1]
+            x2, y2 = graphics.coords[idx2]
+            x, y = 0.5 * (x1 + x2), 0.5 * (y1 + y2)
+        elif graphics.type == "circle":
+            x, y = graphics.x, graphics.y
+        elif graphics.type in ("rectangle", "roundrectangle"):
+            x, y = graphics.x, graphics.y
+        # How big so we want the text, and how many characters?
+        if graphics._parent.type == "map":
+            text = graphics.name
+            self.drawing.setFont(self.fontname, self.fontsize + 2)
+        elif len(graphics.name) < 15:
+            text = graphics.name
+        else:
+            text = graphics.name[:12] + "..."
+        self.drawing.drawCentredString(x, y, text)
+        self.drawing.setFont(self.fontname, self.fontsize)
+
+    def __add_orthologs(self):
+        """Add 'ortholog' Entry elements to the drawing of the map (PRIVATE).
+
+        In KGML, these are typically line objects, so we render them
+        before the compound circles to cover the unsightly ends/junctions.
+        """
+        for ortholog in self.pathway.orthologs:
+            for g in ortholog.graphics:
+                self.drawing.setStrokeColor(color_to_reportlab(g.fgcolor))
+                self.drawing.setFillColor(color_to_reportlab(g.bgcolor))
+                self.__add_graphics(g)
+                if self.label_orthologs:
+                    # We want the label color to be slightly darker
+                    # (where possible), so it can be read
+                    self.drawing.setFillColor(darken(g.fgcolor))
+                    self.__add_labels(g)
+
+    def __add_reaction_entries(self):
+        """Add Entry elements for Reactions to the map drawing (PRIVATE).
+
+        In KGML, these are typically line objects, so we render them
+        before the compound circles to cover the unsightly ends/junctions
+        """
+        for reaction in self.pathway.reaction_entries:
+            for g in reaction.graphics:
+                self.drawing.setStrokeColor(color_to_reportlab(g.fgcolor))
+                self.drawing.setFillColor(color_to_reportlab(g.bgcolor))
+                self.__add_graphics(g)
+                if self.label_reaction_entries:
+                    # We want the label color to be slightly darker
+                    # (where possible), so it can be read
+                    self.drawing.setFillColor(darken(g.fgcolor))
+                    self.__add_labels(g)
+
+    def __add_compounds(self):
+        """Add compound elements to the drawing of the map (PRIVATE)."""
+        for compound in self.pathway.compounds:
+            for g in compound.graphics:
+                # Modify transparency of compounds that don't participate
+                # in reactions
+                fillcolor = color_to_reportlab(g.bgcolor)
+                if not compound.is_reactant:
+                    fillcolor.alpha *= self.non_reactant_transparency
+                self.drawing.setStrokeColor(color_to_reportlab(g.fgcolor))
+                self.drawing.setFillColor(fillcolor)
+                self.__add_graphics(g)
+                if self.label_compounds:
+                    if not compound.is_reactant:
+                        t = 0.3
+                    else:
+                        t = 1
+                    self.drawing.setFillColor(colors.Color(0.2, 0.2, 0.2, t))
+                    self.__add_labels(g)
+
+    def __add_genes(self):
+        """Add gene elements to the drawing of the map (PRIVATE)."""
+        for gene in self.pathway.genes:
+            for g in gene.graphics:
+                self.drawing.setStrokeColor(color_to_reportlab(g.fgcolor))
+                self.drawing.setFillColor(color_to_reportlab(g.bgcolor))
+                self.__add_graphics(g)
+                if self.label_compounds:
+                    self.drawing.setFillColor(darken(g.fgcolor))
+                    self.__add_labels(g)
+
+    def __add_relations(self):
+        """Add relations to the map (PRIVATE).
+
+        This is tricky. There is no defined graphic in KGML for a
+        relation, and the corresponding entries are typically defined
+        as objects 'to be connected somehow'.  KEGG uses KegSketch, which
+        is not public, and most third-party software draws straight line
+        arrows, with heads to indicate the appropriate direction
+        (at both ends for reversible reactions), using solid lines for
+        ECrel relation types, and dashed lines for maplink relation types.
+
+        The relation has:
+        - entry1: 'from' node
+        - entry2: 'to' node
+        - subtype: what the relation refers to
+
+        Typically we have entry1 = map/ortholog; entry2 = map/ortholog,
+        subtype = compound.
+        """
+        # Dashed lines for maplinks, solid for everything else
+        for relation in list(self.pathway.relations):
+            if relation.type == "maplink":
+                self.drawing.setDash(6, 3)
+            else:
+                self.drawing.setDash()
+            for s in relation.subtypes:
+                subtype = self.pathway.entries[s[1]]
+                # Our aim is to draw an arrow from the entry1 object to the
+                # entry2 object, via the subtype object.
+                # 1) Entry 1 to subtype
+                self.__draw_arrow(relation.entry1, subtype)
+                # 2) subtype to Entry 2
+                self.__draw_arrow(subtype, relation.entry2)
+
+    def __draw_arrow(self, g_from, g_to):
+        """Draw an arrow between given Entry objects (PRIVATE).
+
+        Draws an arrow from the g_from Entry object to the g_to
+        Entry object; both must have Graphics objects.
+        """
+        # Centre and bound co-ordinates for the from and two objects
+        bounds_from, bounds_to = g_from.bounds, g_to.bounds
+        centre_from = (
+            0.5 * (bounds_from[0][0] + bounds_from[1][0]),
+            0.5 * (bounds_from[0][1] + bounds_from[1][1]),
+        )
+        centre_to = (
+            0.5 * (bounds_to[0][0] + bounds_to[1][0]),
+            0.5 * (bounds_to[0][1] + bounds_to[1][1]),
+        )
+        p = self.drawing.beginPath()
+        # print(True, g_from.name, g_to.name, bounds_to, bounds_from)
+        # If the 'from' and 'to' graphics are vertically-aligned, draw a line
+        # from the 'from' to the 'to' entity
+        if bounds_to[0][0] < centre_from[0] < bounds_to[1][0]:
+            # print(True, g_from.name, g_to.name, bounds_to, bounds_from)
+            if centre_to[1] > centre_from[1]:  # to above from
+                p.moveTo(centre_from[0], bounds_from[1][1])
+                p.lineTo(centre_from[0], bounds_to[0][1])
+                # Draw arrow point - TODO
+            else:  # to below from
+                p.moveTo(centre_from[0], bounds_from[0][1])
+                p.lineTo(centre_from[0], bounds_to[1][1])
+                # Draw arrow point - TODO
+        elif bounds_from[0][0] < centre_to[0] < bounds_from[1][0]:
+            # print(True, g_from.name, g_to.name, bounds_to, bounds_from)
+            if centre_to[1] > centre_from[1]:  # to above from
+                p.moveTo(centre_to[0], bounds_from[1][1])
+                p.lineTo(centre_to[0], bounds_to[0][1])
+                # Draw arrow point - TODO
+            else:  # to below from
+                p.moveTo(centre_to[0], bounds_from[0][1])
+                p.lineTo(centre_to[0], bounds_to[1][1])
+                # Draw arrow point - TODO
+        self.drawing.drawPath(p)  # Draw arrow shaft
+        # print(g_from)
+        # print(bounds_from)
+        # print(g_to)
+        # print(bounds_to)
diff --git a/code/lib/Bio/Graphics/__init__.py b/code/lib/Bio/Graphics/__init__.py
new file mode 100644
index 0000000..8720bb4
--- /dev/null
+++ b/code/lib/Bio/Graphics/__init__.py
@@ -0,0 +1,90 @@
+# Copyright 2008 by Brad Chapman. All rights reserved.
+# Copyright 2008 by Michiel de Hoon.  All rights reserved.
+# Copyright 2009-2017 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.Graphics offers several graphical outputs, all using ReportLab."""
+
+# Check if ReportLab is installed.
+try:
+    import reportlab as r
+
+    del r
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Please install ReportLab if you want "
+        "to use Bio.Graphics. You can find ReportLab at "
+        "http://www.reportlab.com/software/opensource/"
+    ) from None
+
+
+# The following code is to allow all the Bio.Graphics
+# code to deal with the different ReportLab renderers
+# and the API quirks consistently.
+
+
+def _write(drawing, output_file, format, dpi=72):
+    """Standardize output to files (PRIVATE).
+
+    Writes the provided drawing out to a file in a prescribed format.
+
+      - drawing - suitable ReportLab drawing object.
+      - output_file - a handle to write to, or a filename to write to.
+      - format - String indicating output format, one of PS, PDF, SVG,
+        or provided the ReportLab renderPM module is installed,
+        one of the bitmap formats JPG, BMP, GIF, PNG, TIFF or TIFF.
+        The format can be given in any case.
+      - dpi - Resolution (dots per inch) for bitmap formats.
+
+    No return value.
+    """
+    from reportlab.graphics import renderPS, renderPDF, renderSVG
+
+    try:
+        from reportlab.graphics import renderPM
+    except ImportError:
+        # This is an optional part of ReportLab, so may not be installed.
+        # We'll raise a missing dependency error if rendering to a
+        # bitmap format is attempted.
+        renderPM = None
+
+    formatdict = {
+        "PS": renderPS,
+        "EPS": renderPS,
+        # not sure which you actually get, PS or EPS, but
+        # GenomeDiagram used PS while other modules used EPS.
+        "PDF": renderPDF,
+        "SVG": renderSVG,
+        "JPG": renderPM,
+        "BMP": renderPM,
+        "GIF": renderPM,
+        "PNG": renderPM,
+        "TIFF": renderPM,
+        "TIF": renderPM,
+    }
+    try:
+        # If output is not a string, then .upper() will trigger
+        # an attribute error...
+        drawmethod = formatdict[format.upper()]  # select drawing method
+    except (KeyError, AttributeError):
+        raise ValueError(
+            "Output format should be one of %s" % ", ".join(formatdict)
+        ) from None
+
+    if drawmethod is None:
+        # i.e. We wanted renderPM but it isn't installed
+        # See the import at the top of the function.
+        from Bio import MissingPythonDependencyError
+
+        raise MissingPythonDependencyError("Please install ReportLab's renderPM module")
+
+    if drawmethod == renderPM:
+        # This has a different API to the other render objects
+        return drawmethod.drawToFile(drawing, output_file, format, dpi=dpi)
+    else:
+        return drawmethod.drawToFile(drawing, output_file)
diff --git a/code/lib/Bio/Graphics/__pycache__/BasicChromosome.cpython-37.pyc b/code/lib/Bio/Graphics/__pycache__/BasicChromosome.cpython-37.pyc
new file mode 100644
index 0000000..366a8eb
Binary files /dev/null and b/code/lib/Bio/Graphics/__pycache__/BasicChromosome.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/__pycache__/ColorSpiral.cpython-37.pyc b/code/lib/Bio/Graphics/__pycache__/ColorSpiral.cpython-37.pyc
new file mode 100644
index 0000000..fdef3ca
Binary files /dev/null and b/code/lib/Bio/Graphics/__pycache__/ColorSpiral.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/__pycache__/Comparative.cpython-37.pyc b/code/lib/Bio/Graphics/__pycache__/Comparative.cpython-37.pyc
new file mode 100644
index 0000000..7a2e4da
Binary files /dev/null and b/code/lib/Bio/Graphics/__pycache__/Comparative.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/__pycache__/DisplayRepresentation.cpython-37.pyc b/code/lib/Bio/Graphics/__pycache__/DisplayRepresentation.cpython-37.pyc
new file mode 100644
index 0000000..363b6b5
Binary files /dev/null and b/code/lib/Bio/Graphics/__pycache__/DisplayRepresentation.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/__pycache__/Distribution.cpython-37.pyc b/code/lib/Bio/Graphics/__pycache__/Distribution.cpython-37.pyc
new file mode 100644
index 0000000..aa31c44
Binary files /dev/null and b/code/lib/Bio/Graphics/__pycache__/Distribution.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/__pycache__/KGML_vis.cpython-37.pyc b/code/lib/Bio/Graphics/__pycache__/KGML_vis.cpython-37.pyc
new file mode 100644
index 0000000..f086a54
Binary files /dev/null and b/code/lib/Bio/Graphics/__pycache__/KGML_vis.cpython-37.pyc differ
diff --git a/code/lib/Bio/Graphics/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Graphics/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..22ca765
Binary files /dev/null and b/code/lib/Bio/Graphics/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/HMM/DynamicProgramming.py b/code/lib/Bio/HMM/DynamicProgramming.py
new file mode 100644
index 0000000..9f9b095
--- /dev/null
+++ b/code/lib/Bio/HMM/DynamicProgramming.py
@@ -0,0 +1,326 @@
+# Copyright 2001 Brad Chapman.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Dynamic Programming algorithms for general usage.
+
+This module contains classes which implement Dynamic Programming
+algorithms that can be used generally.
+"""
+
+
+class AbstractDPAlgorithms:
+    """An abstract class to calculate forward and backward probabilities.
+
+    This class should not be instantiated directly, but should be used
+    through a derived class which implements proper scaling of variables.
+
+    This class is just meant to encapsulate the basic forward and backward
+    algorithms, and allow derived classes to deal with the problems of
+    multiplying probabilities.
+
+    Derived class of this must implement:
+
+    - _forward_recursion -- Calculate the forward values in the recursion
+      using some kind of technique for preventing underflow errors.
+    - _backward_recursion -- Calculate the backward values in the recursion
+      step using some technique to prevent underflow errors.
+
+    """
+
+    def __init__(self, markov_model, sequence):
+        """Initialize to calculate forward and backward probabilities.
+
+        Arguments:
+         - markov_model -- The current Markov model we are working with.
+         - sequence -- A training sequence containing a set of emissions.
+
+        """
+        self._mm = markov_model
+        self._seq = sequence
+
+    def _forward_recursion(self, cur_state, sequence_pos, forward_vars):
+        """Calculate the forward recursion value (PRIVATE)."""
+        raise NotImplementedError("Subclasses must implement")
+
+    def forward_algorithm(self):
+        """Calculate sequence probability using the forward algorithm.
+
+        This implements the forward algorithm, as described on p57-58 of
+        Durbin et al.
+
+        Returns:
+         - A dictionary containing the forward variables. This has keys of the
+           form (state letter, position in the training sequence), and values
+           containing the calculated forward variable.
+         - The calculated probability of the sequence.
+
+        """
+        # all of the different letters that the state path can be in
+        state_letters = self._mm.state_alphabet
+
+        # -- initialize the algorithm
+        #
+        # NOTE: My index numbers are one less than what is given in Durbin
+        # et al, since we are indexing the sequence going from 0 to
+        # (Length - 1) not 1 to Length, like in Durbin et al.
+        #
+        forward_var = {}
+        # f_{0}(0) = 1
+        forward_var[(state_letters[0], -1)] = 1
+        # f_{k}(0) = 0, for k > 0
+        for k in range(1, len(state_letters)):
+            forward_var[(state_letters[k], -1)] = 0
+
+        # -- now do the recursion step
+        # loop over the training sequence
+        # Recursion step: (i = 1 .. L)
+        for i in range(len(self._seq.emissions)):
+            # now loop over the letters in the state path
+            for main_state in state_letters:
+                # calculate the forward value using the appropriate
+                # method to prevent underflow errors
+                forward_value = self._forward_recursion(main_state, i, forward_var)
+
+                if forward_value is not None:
+                    forward_var[(main_state, i)] = forward_value
+
+        # -- termination step - calculate the probability of the sequence
+        first_state = state_letters[0]
+        seq_prob = 0
+
+        for state_item in state_letters:
+            # f_{k}(L)
+            forward_value = forward_var[(state_item, len(self._seq.emissions) - 1)]
+            # a_{k0}
+            transition_value = self._mm.transition_prob[(state_item, first_state)]
+
+            seq_prob += forward_value * transition_value
+
+        return forward_var, seq_prob
+
+    def _backward_recursion(self, cur_state, sequence_pos, forward_vars):
+        """Calculate the backward recursion value (PRIVATE)."""
+        raise NotImplementedError("Subclasses must implement")
+
+    def backward_algorithm(self):
+        """Calculate sequence probability using the backward algorithm.
+
+        This implements the backward algorithm, as described on p58-59 of
+        Durbin et al.
+
+        Returns:
+         - A dictionary containing the backwards variables. This has keys
+           of the form (state letter, position in the training sequence),
+           and values containing the calculated backward variable.
+
+        """
+        # all of the different letters that the state path can be in
+        state_letters = self._mm.state_alphabet
+
+        # -- initialize the algorithm
+        #
+        # NOTE: My index numbers are one less than what is given in Durbin
+        # et al, since we are indexing the sequence going from 0 to
+        # (Length - 1) not 1 to Length, like in Durbin et al.
+        #
+        backward_var = {}
+
+        first_letter = state_letters[0]
+        # b_{k}(L) = a_{k0} for all k
+        for state in state_letters:
+            backward_var[
+                (state, len(self._seq.emissions) - 1)
+            ] = self._mm.transition_prob[(state, state_letters[0])]
+
+        # -- recursion
+        # first loop over the training sequence backwards
+        # Recursion step: (i = L - 1 ... 1)
+        all_indexes = list(range(len(self._seq.emissions) - 1))
+        all_indexes.reverse()
+        for i in all_indexes:
+            # now loop over the letters in the state path
+            for main_state in state_letters:
+                # calculate the backward value using the appropriate
+                # method to prevent underflow errors
+                backward_value = self._backward_recursion(main_state, i, backward_var)
+
+                if backward_value is not None:
+                    backward_var[(main_state, i)] = backward_value
+
+        # skip the termination step to avoid recalculations -- you should
+        # get sequence probabilities using the forward algorithm
+
+        return backward_var
+
+
+class ScaledDPAlgorithms(AbstractDPAlgorithms):
+    """Implement forward and backward algorithms using a rescaling approach.
+
+    This scales the f and b variables, so that they remain within a
+    manageable numerical interval during calculations. This approach is
+    described in Durbin et al. on p 78.
+
+    This approach is a little more straightforward then log transformation
+    but may still give underflow errors for some types of models. In these
+    cases, the LogDPAlgorithms class should be used.
+    """
+
+    def __init__(self, markov_model, sequence):
+        """Initialize the scaled approach to calculating probabilities.
+
+        Arguments:
+         - markov_model -- The current Markov model we are working with.
+         - sequence -- A TrainingSequence object that must have a
+           set of emissions to work with.
+
+        """
+        AbstractDPAlgorithms.__init__(self, markov_model, sequence)
+
+        self._s_values = {}
+
+    def _calculate_s_value(self, seq_pos, previous_vars):
+        """Calculate the next scaling variable for a sequence position (PRIVATE).
+
+        This utilizes the approach of choosing s values such that the
+        sum of all of the scaled f values is equal to 1.
+
+        Arguments:
+         - seq_pos -- The current position we are at in the sequence.
+         - previous_vars -- All of the forward or backward variables
+           calculated so far.
+
+        Returns:
+         - The calculated scaling variable for the sequence item.
+
+        """
+        # all of the different letters the state can have
+        state_letters = self._mm.state_alphabet
+
+        # loop over all of the possible states
+        s_value = 0
+        for main_state in state_letters:
+            emission = self._mm.emission_prob[
+                (main_state, self._seq.emissions[seq_pos])
+            ]
+
+            # now sum over all of the previous vars and transitions
+            trans_and_var_sum = 0
+            for second_state in self._mm.transitions_from(main_state):
+                # the value of the previous f or b value
+                var_value = previous_vars[(second_state, seq_pos - 1)]
+
+                # the transition probability
+                trans_value = self._mm.transition_prob[(second_state, main_state)]
+
+                trans_and_var_sum += var_value * trans_value
+
+            s_value += emission * trans_and_var_sum
+
+        return s_value
+
+    def _forward_recursion(self, cur_state, sequence_pos, forward_vars):
+        """Calculate the value of the forward recursion (PRIVATE).
+
+        Arguments:
+         - cur_state -- The letter of the state we are calculating the
+           forward variable for.
+         - sequence_pos -- The position we are at in the training seq.
+         - forward_vars -- The current set of forward variables
+
+        """
+        # calculate the s value, if we haven't done so already (ie. during
+        # a previous forward or backward recursion)
+        if sequence_pos not in self._s_values:
+            self._s_values[sequence_pos] = self._calculate_s_value(
+                sequence_pos, forward_vars
+            )
+
+        # e_{l}(x_{i})
+        seq_letter = self._seq.emissions[sequence_pos]
+        cur_emission_prob = self._mm.emission_prob[(cur_state, seq_letter)]
+        # divide by the scaling value
+        scale_emission_prob = float(cur_emission_prob) / float(
+            self._s_values[sequence_pos]
+        )
+
+        # loop over all of the possible states at the position
+        state_pos_sum = 0
+        have_transition = 0
+        for second_state in self._mm.transitions_from(cur_state):
+            have_transition = 1
+
+            # get the previous forward_var values
+            # f_{k}(i - 1)
+            prev_forward = forward_vars[(second_state, sequence_pos - 1)]
+
+            # a_{kl}
+            cur_trans_prob = self._mm.transition_prob[(second_state, cur_state)]
+            state_pos_sum += prev_forward * cur_trans_prob
+
+        # if we have the possibility of having a transition
+        # return the recursion value
+        if have_transition:
+            return scale_emission_prob * state_pos_sum
+        else:
+            return None
+
+    def _backward_recursion(self, cur_state, sequence_pos, backward_vars):
+        """Calculate the value of the backward recursion (PRIVATE).
+
+        Arguments:
+         - cur_state -- The letter of the state we are calculating the
+           forward variable for.
+         - sequence_pos -- The position we are at in the training seq.
+         - backward_vars -- The current set of backward variables
+
+        """
+        # calculate the s value, if we haven't done so already (ie. during
+        # a previous forward or backward recursion)
+        if sequence_pos not in self._s_values:
+            self._s_values[sequence_pos] = self._calculate_s_value(
+                sequence_pos, backward_vars
+            )
+
+        # loop over all of the possible states at the position
+        state_pos_sum = 0
+        have_transition = 0
+        for second_state in self._mm.transitions_from(cur_state):
+            have_transition = 1
+            # e_{l}(x_{i + 1})
+            seq_letter = self._seq.emissions[sequence_pos + 1]
+            cur_emission_prob = self._mm.emission_prob[(cur_state, seq_letter)]
+
+            # get the previous backward_var value
+            # b_{l}(i + 1)
+            prev_backward = backward_vars[(second_state, sequence_pos + 1)]
+
+            # the transition probability -- a_{kl}
+            cur_transition_prob = self._mm.transition_prob[(cur_state, second_state)]
+
+            state_pos_sum += cur_emission_prob * prev_backward * cur_transition_prob
+
+        # if we have a probability for a transition, return it
+        if have_transition:
+            return state_pos_sum / float(self._s_values[sequence_pos])
+        # otherwise we have no probability (ie. we can't do this transition)
+        # and return None
+        else:
+            return None
+
+
+class LogDPAlgorithms(AbstractDPAlgorithms):
+    """Implement forward and backward algorithms using a log approach.
+
+    This uses the approach of calculating the sum of log probabilities
+    using a lookup table for common values.
+
+    XXX This is not implemented yet!
+    """
+
+    def __init__(self, markov_model, sequence):
+        """Initialize the class."""
+        raise NotImplementedError("Haven't coded this yet...")
diff --git a/code/lib/Bio/HMM/MarkovModel.py b/code/lib/Bio/HMM/MarkovModel.py
new file mode 100644
index 0000000..ef9fef6
--- /dev/null
+++ b/code/lib/Bio/HMM/MarkovModel.py
@@ -0,0 +1,677 @@
+# Copyright 2001 Brad Chapman.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Deal with representations of Markov Models."""
+# standard modules
+import copy
+import math
+import random
+from collections import defaultdict
+
+from Bio.Seq import Seq
+
+
+def _gen_random_array(n):
+    """Return an array of n random numbers summing to 1.0 (PRIVATE)."""
+    randArray = [random.random() for _ in range(n)]
+    total = sum(randArray)
+    return [x / total for x in randArray]
+
+
+def _calculate_emissions(emission_probs):
+    """Calculate which symbols can be emitted in each state (PRIVATE)."""
+    # loop over all of the state-symbol duples, mapping states to
+    # lists of emitted symbols
+    emissions = defaultdict(list)
+    for state, symbol in emission_probs:
+        emissions[state].append(symbol)
+
+    return emissions
+
+
+def _calculate_from_transitions(trans_probs):
+    """Calculate which 'from transitions' are allowed for each state (PRIVATE).
+
+    This looks through all of the trans_probs, and uses this dictionary
+    to determine allowed transitions. It converts this information into
+    a dictionary, whose keys are source states and whose values are
+    lists of destination states reachable from the source state via a
+    transition.
+    """
+    transitions = defaultdict(list)
+    for from_state, to_state in trans_probs:
+        transitions[from_state].append(to_state)
+
+    return transitions
+
+
+def _calculate_to_transitions(trans_probs):
+    """Calculate which 'to transitions' are allowed for each state (PRIVATE).
+
+    This looks through all of the trans_probs, and uses this dictionary
+    to determine allowed transitions. It converts this information into
+    a dictionary, whose keys are destination states and whose values are
+    lists of source states from which the destination is reachable via a
+    transition.
+    """
+    transitions = defaultdict(list)
+    for from_state, to_state in trans_probs:
+        transitions[to_state].append(from_state)
+
+    return transitions
+
+
+class MarkovModelBuilder:
+    """Interface to build up a Markov Model.
+
+    This class is designed to try to separate the task of specifying the
+    Markov Model from the actual model itself. This is in hopes of making
+    the actual Markov Model classes smaller.
+
+    So, this builder class should be used to create Markov models instead
+    of trying to initiate a Markov Model directly.
+    """
+
+    # the default pseudo counts to use
+    DEFAULT_PSEUDO = 1
+
+    def __init__(self, state_alphabet, emission_alphabet):
+        """Initialize a builder to create Markov Models.
+
+        Arguments:
+         - state_alphabet -- An iterable (e.g., tuple or list) containing
+           all of the letters that can appear in the states
+         - emission_alphabet -- An iterable (e.g., tuple or list) containing
+           all of the letters for states that can be emitted by the HMM.
+
+        """
+        self._state_alphabet = tuple(state_alphabet)
+        self._emission_alphabet = tuple(emission_alphabet)
+
+        # probabilities for the initial state, initialized by calling
+        # set_initial_probabilities (required)
+        self.initial_prob = {}
+
+        # the probabilities for transitions and emissions
+        # by default we have no transitions and all possible emissions
+        self.transition_prob = {}
+        self.emission_prob = self._all_blank(state_alphabet, emission_alphabet)
+
+        # the default pseudocounts for transition and emission counting
+        self.transition_pseudo = {}
+        self.emission_pseudo = self._all_pseudo(state_alphabet, emission_alphabet)
+
+    def _all_blank(self, first_alphabet, second_alphabet):
+        """Return a dictionary with all counts set to zero (PRIVATE).
+
+        This uses the letters in the first and second alphabet to create
+        a dictionary with keys of two tuples organized as
+        (letter of first alphabet, letter of second alphabet). The values
+        are all set to 0.
+        """
+        all_blank = {}
+        for first_state in first_alphabet:
+            for second_state in second_alphabet:
+                all_blank[(first_state, second_state)] = 0
+
+        return all_blank
+
+    def _all_pseudo(self, first_alphabet, second_alphabet):
+        """Return a dictionary with all counts set to a default value (PRIVATE).
+
+        This takes the letters in first alphabet and second alphabet and
+        creates a dictionary with keys of two tuples organized as:
+        (letter of first alphabet, letter of second alphabet). The values
+        are all set to the value of the class attribute DEFAULT_PSEUDO.
+        """
+        all_counts = {}
+        for first_state in first_alphabet:
+            for second_state in second_alphabet:
+                all_counts[(first_state, second_state)] = self.DEFAULT_PSEUDO
+
+        return all_counts
+
+    def get_markov_model(self):
+        """Return the markov model corresponding with the current parameters.
+
+        Each markov model returned by a call to this function is unique
+        (ie. they don't influence each other).
+        """
+        # user must set initial probabilities
+        if not self.initial_prob:
+            raise Exception(
+                "set_initial_probabilities must be called to "
+                "fully initialize the Markov model"
+            )
+
+        initial_prob = copy.deepcopy(self.initial_prob)
+        transition_prob = copy.deepcopy(self.transition_prob)
+        emission_prob = copy.deepcopy(self.emission_prob)
+        transition_pseudo = copy.deepcopy(self.transition_pseudo)
+        emission_pseudo = copy.deepcopy(self.emission_pseudo)
+
+        return HiddenMarkovModel(
+            self._state_alphabet,
+            self._emission_alphabet,
+            initial_prob,
+            transition_prob,
+            emission_prob,
+            transition_pseudo,
+            emission_pseudo,
+        )
+
+    def set_initial_probabilities(self, initial_prob):
+        """Set initial state probabilities.
+
+        initial_prob is a dictionary mapping states to probabilities.
+        Suppose, for example, that the state alphabet is ('A', 'B'). Call
+        set_initial_prob({'A': 1}) to guarantee that the initial
+        state will be 'A'. Call set_initial_prob({'A': 0.5, 'B': 0.5})
+        to make each initial state equally probable.
+
+        This method must now be called in order to use the Markov model
+        because the calculation of initial probabilities has changed
+        incompatibly; the previous calculation was incorrect.
+
+        If initial probabilities are set for all states, then they should add up
+        to 1. Otherwise the sum should be <= 1. The residual probability is
+        divided up evenly between all the states for which the initial
+        probability has not been set. For example, calling
+        set_initial_prob({}) results in P('A') = 0.5 and P('B') = 0.5,
+        for the above example.
+        """
+        self.initial_prob = copy.copy(initial_prob)
+
+        # ensure that all referenced states are valid
+        for state in initial_prob:
+            if state not in self._state_alphabet:
+                raise ValueError(
+                    "State %s was not found in the sequence alphabet" % state
+                )
+
+        # distribute the residual probability, if any
+        num_states_not_set = len(self._state_alphabet) - len(self.initial_prob)
+        if num_states_not_set < 0:
+            raise Exception("Initial probabilities can't exceed # of states")
+        prob_sum = sum(self.initial_prob.values())
+        if prob_sum > 1.0:
+            raise Exception("Total initial probability cannot exceed 1.0")
+        if num_states_not_set > 0:
+            prob = (1.0 - prob_sum) / num_states_not_set
+            for state in self._state_alphabet:
+                if state not in self.initial_prob:
+                    self.initial_prob[state] = prob
+
+    def set_equal_probabilities(self):
+        """Reset all probabilities to be an average value.
+
+        Resets the values of all initial probabilities and all allowed
+        transitions and all allowed emissions to be equal to 1 divided by the
+        number of possible elements.
+
+        This is useful if you just want to initialize a Markov Model to
+        starting values (ie. if you have no prior notions of what the
+        probabilities should be -- or if you are just feeling too lazy
+        to calculate them :-).
+
+        Warning 1 -- this will reset all currently set probabilities.
+
+        Warning 2 -- This just sets all probabilities for transitions and
+        emissions to total up to 1, so it doesn't ensure that the sum of
+        each set of transitions adds up to 1.
+        """
+        # set initial state probabilities
+        new_initial_prob = float(1) / float(len(self.transition_prob))
+        for state in self._state_alphabet:
+            self.initial_prob[state] = new_initial_prob
+
+        # set the transitions
+        new_trans_prob = float(1) / float(len(self.transition_prob))
+        for key in self.transition_prob:
+            self.transition_prob[key] = new_trans_prob
+
+        # set the emissions
+        new_emission_prob = float(1) / float(len(self.emission_prob))
+        for key in self.emission_prob:
+            self.emission_prob[key] = new_emission_prob
+
+    def set_random_initial_probabilities(self):
+        """Set all initial state probabilities to a randomly generated distribution.
+
+        Returns the dictionary containing the initial probabilities.
+        """
+        initial_freqs = _gen_random_array(len(self._state_alphabet))
+        for state in self._state_alphabet:
+            self.initial_prob[state] = initial_freqs.pop()
+
+        return self.initial_prob
+
+    def set_random_transition_probabilities(self):
+        """Set all allowed transition probabilities to a randomly generated distribution.
+
+        Returns the dictionary containing the transition probabilities.
+        """
+        if not self.transition_prob:
+            raise Exception(
+                "No transitions have been allowed yet. "
+                "Allow some or all transitions by calling "
+                "allow_transition or allow_all_transitions first."
+            )
+
+        transitions_from = _calculate_from_transitions(self.transition_prob)
+        for from_state in transitions_from:
+            freqs = _gen_random_array(len(transitions_from[from_state]))
+            for to_state in transitions_from[from_state]:
+                self.transition_prob[(from_state, to_state)] = freqs.pop()
+
+        return self.transition_prob
+
+    def set_random_emission_probabilities(self):
+        """Set all allowed emission probabilities to a randomly generated distribution.
+
+        Returns the dictionary containing the emission probabilities.
+        """
+        if not self.emission_prob:
+            raise Exception(
+                "No emissions have been allowed yet. Allow some or all emissions."
+            )
+
+        emissions = _calculate_emissions(self.emission_prob)
+        for state in emissions:
+            freqs = _gen_random_array(len(emissions[state]))
+            for symbol in emissions[state]:
+                self.emission_prob[(state, symbol)] = freqs.pop()
+
+        return self.emission_prob
+
+    def set_random_probabilities(self):
+        """Set all probabilities to randomly generated numbers.
+
+        Resets probabilities of all initial states, transitions, and
+        emissions to random values.
+        """
+        self.set_random_initial_probabilities()
+        self.set_random_transition_probabilities()
+        self.set_random_emission_probabilities()
+
+    # --- functions to deal with the transitions in the sequence
+
+    def allow_all_transitions(self):
+        """Create transitions between all states.
+
+        By default all transitions within the alphabet are disallowed;
+        this is a convenience function to change this to allow all
+        possible transitions.
+        """
+        # first get all probabilities and pseudo counts set
+        # to the default values
+        all_probs = self._all_blank(self._state_alphabet, self._state_alphabet)
+
+        all_pseudo = self._all_pseudo(self._state_alphabet, self._state_alphabet)
+
+        # now set any probabilities and pseudo counts that
+        # were previously set
+        for set_key in self.transition_prob:
+            all_probs[set_key] = self.transition_prob[set_key]
+
+        for set_key in self.transition_pseudo:
+            all_pseudo[set_key] = self.transition_pseudo[set_key]
+
+        # finally reinitialize the transition probs and pseudo counts
+        self.transition_prob = all_probs
+        self.transition_pseudo = all_pseudo
+
+    def allow_transition(
+        self, from_state, to_state, probability=None, pseudocount=None
+    ):
+        """Set a transition as being possible between the two states.
+
+        probability and pseudocount are optional arguments
+        specifying the probabilities and pseudo counts for the transition.
+        If these are not supplied, then the values are set to the
+        default values.
+
+        Raises:
+        KeyError -- if the two states already have an allowed transition.
+
+        """
+        # check the sanity of adding these states
+        for state in [from_state, to_state]:
+            if state not in self._state_alphabet:
+                raise ValueError(
+                    "State %s was not found in the sequence alphabet" % state
+                )
+
+        # ensure that the states are not already set
+        if (from_state, to_state) not in self.transition_prob and (
+            from_state,
+            to_state,
+        ) not in self.transition_pseudo:
+            # set the initial probability
+            if probability is None:
+                probability = 0
+            self.transition_prob[(from_state, to_state)] = probability
+
+            # set the initial pseudocounts
+            if pseudocount is None:
+                pseudocount = self.DEFAULT_PSEUDO
+            self.transition_pseudo[(from_state, to_state)] = pseudocount
+        else:
+            raise KeyError(
+                "Transition from %s to %s is already allowed." % (from_state, to_state)
+            )
+
+    def destroy_transition(self, from_state, to_state):
+        """Restrict transitions between the two states.
+
+        Raises:
+        KeyError if the transition is not currently allowed.
+
+        """
+        try:
+            del self.transition_prob[(from_state, to_state)]
+            del self.transition_pseudo[(from_state, to_state)]
+        except KeyError:
+            raise KeyError(
+                "Transition from %s to %s is already disallowed."
+                % (from_state, to_state)
+            )
+
+    def set_transition_score(self, from_state, to_state, probability):
+        """Set the probability of a transition between two states.
+
+        Raises:
+        KeyError if the transition is not allowed.
+
+        """
+        if (from_state, to_state) in self.transition_prob:
+            self.transition_prob[(from_state, to_state)] = probability
+        else:
+            raise KeyError(
+                "Transition from %s to %s is not allowed." % (from_state, to_state)
+            )
+
+    def set_transition_pseudocount(self, from_state, to_state, count):
+        """Set the default pseudocount for a transition.
+
+        To avoid computational problems, it is helpful to be able to
+        set a 'default' pseudocount to start with for estimating
+        transition and emission probabilities (see p62 in Durbin et al
+        for more discussion on this. By default, all transitions have
+        a pseudocount of 1.
+
+        Raises:
+        KeyError if the transition is not allowed.
+
+        """
+        if (from_state, to_state) in self.transition_pseudo:
+            self.transition_pseudo[(from_state, to_state)] = count
+        else:
+            raise KeyError(
+                "Transition from %s to %s is not allowed." % (from_state, to_state)
+            )
+
+    # --- functions to deal with emissions from the sequence
+
+    def set_emission_score(self, seq_state, emission_state, probability):
+        """Set the probability of a emission from a particular state.
+
+        Raises:
+        KeyError if the emission from the given state is not allowed.
+
+        """
+        if (seq_state, emission_state) in self.emission_prob:
+            self.emission_prob[(seq_state, emission_state)] = probability
+        else:
+            raise KeyError(
+                "Emission of %s from %s is not allowed." % (emission_state, seq_state)
+            )
+
+    def set_emission_pseudocount(self, seq_state, emission_state, count):
+        """Set the default pseudocount for an emission.
+
+        To avoid computational problems, it is helpful to be able to
+        set a 'default' pseudocount to start with for estimating
+        transition and emission probabilities (see p62 in Durbin et al
+        for more discussion on this. By default, all emissions have
+        a pseudocount of 1.
+
+        Raises:
+        KeyError if the emission from the given state is not allowed.
+
+        """
+        if (seq_state, emission_state) in self.emission_pseudo:
+            self.emission_pseudo[(seq_state, emission_state)] = count
+        else:
+            raise KeyError(
+                "Emission of %s from %s is not allowed." % (emission_state, seq_state)
+            )
+
+
+class HiddenMarkovModel:
+    """Represent a hidden markov model that can be used for state estimation."""
+
+    def __init__(
+        self,
+        state_alphabet,
+        emission_alphabet,
+        initial_prob,
+        transition_prob,
+        emission_prob,
+        transition_pseudo,
+        emission_pseudo,
+    ):
+        """Initialize a Markov Model.
+
+        Note: You should use the MarkovModelBuilder class instead of
+        initiating this class directly.
+
+        Arguments:
+         - state_alphabet -- A tuple containing all of the letters that can
+           appear in the states.
+         - emission_alphabet -- A tuple containing all of the letters for
+           states that can be emitted by the HMM.
+         - initial_prob - A dictionary of initial probabilities for all states.
+         - transition_prob -- A dictionary of transition probabilities for all
+           possible transitions in the sequence.
+         - emission_prob -- A dictionary of emission probabilities for all
+           possible emissions from the sequence states.
+         - transition_pseudo -- Pseudo-counts to be used for the transitions,
+           when counting for purposes of estimating transition probabilities.
+         - emission_pseudo -- Pseudo-counts to be used for the emissions,
+           when counting for purposes of estimating emission probabilities.
+
+        """
+        self.state_alphabet = state_alphabet
+        self.emission_alphabet = emission_alphabet
+
+        self.initial_prob = initial_prob
+
+        self._transition_pseudo = transition_pseudo
+        self._emission_pseudo = emission_pseudo
+
+        self.transition_prob = transition_prob
+        self.emission_prob = emission_prob
+
+        # a dictionary of the possible transitions from each state
+        # each key is a source state, mapped to a list of the destination states
+        # that are reachable from the source state via a transition
+        self._transitions_from = _calculate_from_transitions(self.transition_prob)
+
+        # a dictionary of the possible transitions to each state
+        # each key is a destination state, mapped to a list of source states
+        # from which the destination is reachable via a transition
+        self._transitions_to = _calculate_to_transitions(self.transition_prob)
+
+    def get_blank_transitions(self):
+        """Get the default transitions for the model.
+
+        Returns a dictionary of all of the default transitions between any
+        two letters in the sequence alphabet. The dictionary is structured
+        with keys as (letter1, letter2) and values as the starting number
+        of transitions.
+        """
+        return self._transition_pseudo
+
+    def get_blank_emissions(self):
+        """Get the starting default emmissions for each sequence.
+
+        This returns a dictionary of the default emmissions for each
+        letter. The dictionary is structured with keys as
+        (seq_letter, emmission_letter) and values as the starting number
+        of emmissions.
+        """
+        return self._emission_pseudo
+
+    def transitions_from(self, state_letter):
+        """Get all destination states which can transition from source state_letter.
+
+        This returns all letters which the given state_letter can transition
+        to, i.e. all the destination states reachable from state_letter.
+
+        An empty list is returned if state_letter has no outgoing transitions.
+        """
+        if state_letter in self._transitions_from:
+            return self._transitions_from[state_letter]
+        else:
+            return []
+
+    def transitions_to(self, state_letter):
+        """Get all source states which can transition to destination state_letter.
+
+        This returns all letters which the given state_letter is reachable
+        from, i.e. all the source states which can reach state_later
+
+        An empty list is returned if state_letter is unreachable.
+        """
+        if state_letter in self._transitions_to:
+            return self._transitions_to[state_letter]
+        else:
+            return []
+
+    def viterbi(self, sequence, state_alphabet):
+        """Calculate the most probable state path using the Viterbi algorithm.
+
+        This implements the Viterbi algorithm (see pgs 55-57 in Durbin et
+        al for a full explanation -- this is where I took my implementation
+        ideas from), to allow decoding of the state path, given a sequence
+        of emissions.
+
+        Arguments:
+         - sequence -- A Seq object with the emission sequence that we
+           want to decode.
+         - state_alphabet -- An iterable (e.g., tuple or list) containing
+           all of the letters that can appear in the states
+
+        """
+        # calculate logarithms of the initial, transition, and emission probs
+        log_initial = self._log_transform(self.initial_prob)
+        log_trans = self._log_transform(self.transition_prob)
+        log_emission = self._log_transform(self.emission_prob)
+
+        viterbi_probs = {}
+        pred_state_seq = {}
+
+        # --- recursion
+        # loop over the training squence (i = 1 .. L)
+        # NOTE: My index numbers are one less than what is given in Durbin
+        # et al, since we are indexing the sequence going from 0 to
+        # (Length - 1) not 1 to Length, like in Durbin et al.
+        for i in range(0, len(sequence)):
+            # loop over all of the possible i-th states in the state path
+            for cur_state in state_alphabet:
+                # e_{l}(x_{i})
+                emission_part = log_emission[(cur_state, sequence[i])]
+
+                max_prob = 0
+                if i == 0:
+                    # for the first state, use the initial probability rather
+                    # than looking back to previous states
+                    max_prob = log_initial[cur_state]
+                else:
+                    # loop over all possible (i-1)-th previous states
+                    possible_state_probs = {}
+                    for prev_state in self.transitions_to(cur_state):
+                        # a_{kl}
+                        trans_part = log_trans[(prev_state, cur_state)]
+
+                        # v_{k}(i - 1)
+                        viterbi_part = viterbi_probs[(prev_state, i - 1)]
+                        cur_prob = viterbi_part + trans_part
+
+                        possible_state_probs[prev_state] = cur_prob
+
+                    # calculate the viterbi probability using the max
+                    max_prob = max(possible_state_probs.values())
+
+                # v_{k}(i)
+                viterbi_probs[(cur_state, i)] = emission_part + max_prob
+
+                if i > 0:
+                    # get the most likely prev_state leading to cur_state
+                    for state in possible_state_probs:
+                        if possible_state_probs[state] == max_prob:
+                            pred_state_seq[(i - 1, cur_state)] = state
+                            break
+
+        # --- termination
+        # calculate the probability of the state path
+        # loop over all states
+        all_probs = {}
+        for state in state_alphabet:
+            # v_{k}(L)
+            all_probs[state] = viterbi_probs[(state, len(sequence) - 1)]
+
+        state_path_prob = max(all_probs.values())
+
+        # find the last pointer we need to trace back from
+        last_state = ""
+        for state in all_probs:
+            if all_probs[state] == state_path_prob:
+                last_state = state
+
+        assert last_state != "", "Didn't find the last state to trace from!"
+
+        # --- traceback
+        traceback_seq = []
+
+        loop_seq = list(range(1, len(sequence)))
+        loop_seq.reverse()
+
+        # last_state is the last state in the most probable state sequence.
+        # Compute that sequence by walking backwards in time. From the i-th
+        # state in the sequence, find the (i-1)-th state as the most
+        # probable state preceding the i-th state.
+        state = last_state
+        traceback_seq.append(state)
+        for i in loop_seq:
+            state = pred_state_seq[(i - 1, state)]
+            traceback_seq.append(state)
+
+        # put the traceback sequence in the proper orientation
+        traceback_seq.reverse()
+        traceback_seq = "".join(traceback_seq)
+
+        return Seq(traceback_seq), state_path_prob
+
+    def _log_transform(self, probability):
+        """Return log transform of the given probability dictionary (PRIVATE).
+
+        When calculating the Viterbi equation, add logs of probabilities rather
+        than multiplying probabilities, to avoid underflow errors. This method
+        returns a new dictionary with the same keys as the given dictionary
+        and log-transformed values.
+        """
+        log_prob = copy.copy(probability)
+        for key in log_prob:
+            prob = log_prob[key]
+            if prob > 0:
+                log_prob[key] = math.log(log_prob[key])
+            else:
+                log_prob[key] = -math.inf
+
+        return log_prob
diff --git a/code/lib/Bio/HMM/Trainer.py b/code/lib/Bio/HMM/Trainer.py
new file mode 100644
index 0000000..98e3703
--- /dev/null
+++ b/code/lib/Bio/HMM/Trainer.py
@@ -0,0 +1,430 @@
+# Copyright 2001 Brad Chapman.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Provide trainers which estimate parameters based on training sequences.
+
+These should be used to 'train' a Markov Model prior to actually using
+it to decode state paths. When supplied training sequences and a model
+to work from, these classes will estimate parameters of the model.
+
+This aims to estimate two parameters:
+
+- a_{kl} -- the number of times there is a transition from k to l in the
+  training data.
+- e_{k}(b) -- the number of emissions of the state b from the letter k
+  in the training data.
+
+"""
+# standard modules
+import math
+
+# local stuff
+from .DynamicProgramming import ScaledDPAlgorithms
+
+
+class TrainingSequence:
+    """Hold a training sequence with emissions and optionally, a state path."""
+
+    def __init__(self, emissions, state_path):
+        """Initialize a training sequence.
+
+        Arguments:
+         - emissions - An iterable (e.g., a tuple, list, or Seq object)
+           containing the sequence of emissions in the training sequence.
+         - state_path - An iterable (e.g., a tuple or list) containing the
+           sequence of states. If there is no known state path, then the
+           sequence of states should be an empty iterable.
+
+        """
+        if len(state_path) > 0 and len(emissions) != len(state_path):
+            raise ValueError("State path does not match associated emissions.")
+        self.emissions = emissions
+        self.states = state_path
+
+
+class AbstractTrainer:
+    """Provide generic functionality needed in all trainers."""
+
+    def __init__(self, markov_model):
+        """Initialize the class."""
+        self._markov_model = markov_model
+
+    def log_likelihood(self, probabilities):
+        """Calculate the log likelihood of the training seqs.
+
+        Arguments:
+         - probabilities -- A list of the probabilities of each training
+           sequence under the current parameters, calculated using the
+           forward algorithm.
+
+        """
+        total_likelihood = 0
+        for probability in probabilities:
+            total_likelihood += math.log(probability)
+
+        return total_likelihood
+
+    def estimate_params(self, transition_counts, emission_counts):
+        """Get a maximum likelihood estimation of transition and emmission.
+
+        Arguments:
+         - transition_counts -- A dictionary with the total number of counts
+           of transitions between two states.
+         - emissions_counts -- A dictionary with the total number of counts
+           of emmissions of a particular emission letter by a state letter.
+
+        This then returns the maximum likelihood estimators for the
+        transitions and emissions, estimated by formulas 3.18 in
+        Durbin et al::
+
+            a_{kl} = A_{kl} / sum(A_{kl'})
+            e_{k}(b) = E_{k}(b) / sum(E_{k}(b'))
+
+        Returns:
+        Transition and emission dictionaries containing the maximum
+        likelihood estimators.
+
+        """
+        # now calculate the information
+        ml_transitions = self.ml_estimator(transition_counts)
+        ml_emissions = self.ml_estimator(emission_counts)
+
+        return ml_transitions, ml_emissions
+
+    def ml_estimator(self, counts):
+        """Calculate the maximum likelihood estimator.
+
+        This can calculate maximum likelihoods for both transitions
+        and emissions.
+
+        Arguments:
+         - counts -- A dictionary of the counts for each item.
+
+        See estimate_params for a description of the formula used for
+        calculation.
+
+        """
+        # get an ordered list of all items
+        all_ordered = sorted(counts)
+
+        ml_estimation = {}
+
+        # the total counts for the current letter we are on
+        cur_letter = None
+        cur_letter_counts = 0
+
+        for cur_item in all_ordered:
+            # if we are on a new letter (ie. the first letter of the tuple)
+            if cur_item[0] != cur_letter:
+                # set the new letter we are working with
+                cur_letter = cur_item[0]
+
+                # count up the total counts for this letter
+                cur_letter_counts = counts[cur_item]
+
+                # add counts for all other items with the same first letter
+                cur_position = all_ordered.index(cur_item) + 1
+
+                # keep adding while we have the same first letter or until
+                # we get to the end of the ordered list
+                while (
+                    cur_position < len(all_ordered)
+                    and all_ordered[cur_position][0] == cur_item[0]
+                ):
+                    cur_letter_counts += counts[all_ordered[cur_position]]
+                    cur_position += 1
+            # otherwise we've already got the total counts for this letter
+            else:
+                pass
+
+            # now calculate the ml and add it to the estimation
+            cur_ml = float(counts[cur_item]) / float(cur_letter_counts)
+            ml_estimation[cur_item] = cur_ml
+
+        return ml_estimation
+
+
+class BaumWelchTrainer(AbstractTrainer):
+    """Trainer that uses the Baum-Welch algorithm to estimate parameters.
+
+    These should be used when a training sequence for an HMM has unknown
+    paths for the actual states, and you need to make an estimation of the
+    model parameters from the observed emissions.
+
+    This uses the Baum-Welch algorithm, first described in
+    Baum, L.E. 1972. Inequalities. 3:1-8
+    This is based on the description in 'Biological Sequence Analysis' by
+    Durbin et al. in section 3.3
+
+    This algorithm is guaranteed to converge to a local maximum, but not
+    necessarily to the global maxima, so use with care!
+    """
+
+    def __init__(self, markov_model):
+        """Initialize the trainer.
+
+        Arguments:
+         - markov_model - The model we are going to estimate parameters for.
+           This should have the parameters with some initial estimates, that
+           we can build from.
+
+        """
+        AbstractTrainer.__init__(self, markov_model)
+
+    def train(self, training_seqs, stopping_criteria, dp_method=ScaledDPAlgorithms):
+        """Estimate the parameters using training sequences.
+
+        The algorithm for this is taken from Durbin et al. p64, so this
+        is a good place to go for a reference on what is going on.
+
+        Arguments:
+         - training_seqs -- A list of TrainingSequence objects to be used
+           for estimating the parameters.
+         - stopping_criteria -- A function, that when passed the change
+           in log likelihood and threshold, will indicate if we should stop
+           the estimation iterations.
+         - dp_method -- A class instance specifying the dynamic programming
+           implementation we should use to calculate the forward and
+           backward variables. By default, we use the scaling method.
+
+        """
+        prev_log_likelihood = None
+        num_iterations = 1
+
+        while True:
+            transition_count = self._markov_model.get_blank_transitions()
+            emission_count = self._markov_model.get_blank_emissions()
+
+            # remember all of the sequence probabilities
+            all_probabilities = []
+
+            for training_seq in training_seqs:
+                # calculate the forward and backward variables
+                DP = dp_method(self._markov_model, training_seq)
+                forward_var, seq_prob = DP.forward_algorithm()
+                backward_var = DP.backward_algorithm()
+
+                all_probabilities.append(seq_prob)
+
+                # update the counts for transitions and emissions
+                transition_count = self.update_transitions(
+                    transition_count, training_seq, forward_var, backward_var, seq_prob
+                )
+                emission_count = self.update_emissions(
+                    emission_count, training_seq, forward_var, backward_var, seq_prob
+                )
+
+            # update the markov model with the new probabilities
+            ml_transitions, ml_emissions = self.estimate_params(
+                transition_count, emission_count
+            )
+            self._markov_model.transition_prob = ml_transitions
+            self._markov_model.emission_prob = ml_emissions
+
+            cur_log_likelihood = self.log_likelihood(all_probabilities)
+
+            # if we have previously calculated the log likelihood (ie.
+            # not the first round), see if we can finish
+            if prev_log_likelihood is not None:
+                # XXX log likelihoods are negatives -- am I calculating
+                # the change properly, or should I use the negatives...
+                # I'm not sure at all if this is right.
+                log_likelihood_change = abs(
+                    abs(cur_log_likelihood) - abs(prev_log_likelihood)
+                )
+
+                # check whether we have completed enough iterations to have
+                # a good estimation
+                if stopping_criteria(log_likelihood_change, num_iterations):
+                    break
+
+            # set up for another round of iterations
+            prev_log_likelihood = cur_log_likelihood
+            num_iterations += 1
+
+        return self._markov_model
+
+    def update_transitions(
+        self,
+        transition_counts,
+        training_seq,
+        forward_vars,
+        backward_vars,
+        training_seq_prob,
+    ):
+        """Add the contribution of a new training sequence to the transitions.
+
+        Arguments:
+         - transition_counts -- A dictionary of the current counts for the
+           transitions
+         - training_seq -- The training sequence we are working with
+         - forward_vars -- Probabilities calculated using the forward
+           algorithm.
+         - backward_vars -- Probabilities calculated using the backwards
+           algorithm.
+         - training_seq_prob - The probability of the current sequence.
+
+        This calculates A_{kl} (the estimated transition counts from state
+        k to state l) using formula 3.20 in Durbin et al.
+
+        """
+        # set up the transition and emission probabilities we are using
+        transitions = self._markov_model.transition_prob
+        emissions = self._markov_model.emission_prob
+
+        # loop over the possible combinations of state path letters
+        for k in self._markov_model.state_alphabet:
+            for l in self._markov_model.transitions_from(k):
+                estimated_counts = 0
+                # now loop over the entire training sequence
+                for i in range(len(training_seq.emissions) - 1):
+                    # the forward value of k at the current position
+                    forward_value = forward_vars[(k, i)]
+
+                    # the backward value of l in the next position
+                    backward_value = backward_vars[(l, i + 1)]
+
+                    # the probability of a transition from k to l
+                    trans_value = transitions[(k, l)]
+
+                    # the probability of getting the emission at the next pos
+                    emm_value = emissions[(l, training_seq.emissions[i + 1])]
+
+                    estimated_counts += (
+                        forward_value * trans_value * emm_value * backward_value
+                    )
+
+                # update the transition approximation
+                transition_counts[(k, l)] += float(estimated_counts) / training_seq_prob
+
+        return transition_counts
+
+    def update_emissions(
+        self,
+        emission_counts,
+        training_seq,
+        forward_vars,
+        backward_vars,
+        training_seq_prob,
+    ):
+        """Add the contribution of a new training sequence to the emissions.
+
+        Arguments:
+         - emission_counts -- A dictionary of the current counts for the
+           emissions
+         - training_seq -- The training sequence we are working with
+         - forward_vars -- Probabilities calculated using the forward
+           algorithm.
+         - backward_vars -- Probabilities calculated using the backwards
+           algorithm.
+         - training_seq_prob - The probability of the current sequence.
+
+        This calculates E_{k}(b) (the estimated emission probability for
+        emission letter b from state k) using formula 3.21 in Durbin et al.
+
+        """
+        # loop over the possible combinations of state path letters
+        for k in self._markov_model.state_alphabet:
+            # now loop over all of the possible emissions
+            for b in self._markov_model.emission_alphabet:
+                expected_times = 0
+                # finally loop over the entire training sequence
+                for i in range(len(training_seq.emissions)):
+                    # only count the forward and backward probability if the
+                    # emission at the position is the same as b
+                    if training_seq.emissions[i] == b:
+                        # f_{k}(i) b_{k}(i)
+                        expected_times += forward_vars[(k, i)] * backward_vars[(k, i)]
+
+                # add to E_{k}(b)
+                emission_counts[(k, b)] += float(expected_times) / training_seq_prob
+
+        return emission_counts
+
+
+class KnownStateTrainer(AbstractTrainer):
+    """Estimate probabilities with known state sequences.
+
+    This should be used for direct estimation of emission and transition
+    probabilities when both the state path and emission sequence are
+    known for the training examples.
+    """
+
+    def __init__(self, markov_model):
+        """Initialize the class."""
+        AbstractTrainer.__init__(self, markov_model)
+
+    def train(self, training_seqs):
+        """Estimate the Markov Model parameters with known state paths.
+
+        This trainer requires that both the state and the emissions are
+        known for all of the training sequences in the list of
+        TrainingSequence objects.
+        This training will then count all of the transitions and emissions,
+        and use this to estimate the parameters of the model.
+        """
+        # count up all of the transitions and emissions
+        transition_counts = self._markov_model.get_blank_transitions()
+        emission_counts = self._markov_model.get_blank_emissions()
+
+        for training_seq in training_seqs:
+            emission_counts = self._count_emissions(training_seq, emission_counts)
+            transition_counts = self._count_transitions(
+                training_seq.states, transition_counts
+            )
+
+        # update the markov model from the counts
+        ml_transitions, ml_emissions = self.estimate_params(
+            transition_counts, emission_counts
+        )
+        self._markov_model.transition_prob = ml_transitions
+        self._markov_model.emission_prob = ml_emissions
+
+        return self._markov_model
+
+    def _count_emissions(self, training_seq, emission_counts):
+        """Add emissions from the training sequence to the current counts (PRIVATE).
+
+        Arguments:
+         - training_seq -- A TrainingSequence with states and emissions
+           to get the counts from
+         - emission_counts -- The current emission counts to add to.
+
+        """
+        for index in range(len(training_seq.emissions)):
+            cur_state = training_seq.states[index]
+            cur_emission = training_seq.emissions[index]
+
+            try:
+                emission_counts[(cur_state, cur_emission)] += 1
+            except KeyError:
+                raise KeyError(
+                    "Unexpected emission (%s, %s)" % (cur_state, cur_emission)
+                )
+        return emission_counts
+
+    def _count_transitions(self, state_seq, transition_counts):
+        """Add transitions from the training sequence to the current counts (PRIVATE).
+
+        Arguments:
+         - state_seq -- A Seq object with the states of the current training
+           sequence.
+         - transition_counts -- The current transition counts to add to.
+
+        """
+        for cur_pos in range(len(state_seq) - 1):
+            cur_state = state_seq[cur_pos]
+            next_state = state_seq[cur_pos + 1]
+
+            try:
+                transition_counts[(cur_state, next_state)] += 1
+            except KeyError:
+                raise KeyError(
+                    "Unexpected transition (%s, %s)" % (cur_state, next_state)
+                )
+
+        return transition_counts
diff --git a/code/lib/Bio/HMM/Utilities.py b/code/lib/Bio/HMM/Utilities.py
new file mode 100644
index 0000000..61d3b37
--- /dev/null
+++ b/code/lib/Bio/HMM/Utilities.py
@@ -0,0 +1,68 @@
+# Copyright 2001 Brad Chapman.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Generic functions which are useful for working with HMMs.
+
+This just collects general functions which you might like to use in
+dealing with HMMs.
+"""
+
+
+def pretty_print_prediction(
+    emissions,
+    real_state,
+    predicted_state,
+    emission_title="Emissions",
+    real_title="Real State",
+    predicted_title="Predicted State",
+    line_width=75,
+):
+    """Print out a state sequence prediction in a nice manner.
+
+    Arguments:
+     - emissions -- The sequence of emissions of the sequence you are
+       dealing with.
+     - real_state -- The actual state path that generated the emissions.
+     - predicted_state -- A state path predicted by some kind of HMM model.
+
+    """
+    # calculate the length of the titles and sequences
+    title_length = max(len(emission_title), len(real_title), len(predicted_title)) + 1
+    seq_length = line_width - title_length
+
+    # set up the titles so they'll print right
+    emission_title = emission_title.ljust(title_length)
+    real_title = real_title.ljust(title_length)
+    predicted_title = predicted_title.ljust(title_length)
+
+    cur_position = 0
+    # while we still have more than seq_length characters to print
+    while True:
+        if (cur_position + seq_length) < len(emissions):
+            extension = seq_length
+        else:
+            extension = len(emissions) - cur_position
+
+        print(
+            "%s%s"
+            % (emission_title, emissions[cur_position : cur_position + seq_length])
+        )
+        print(
+            "%s%s" % (real_title, real_state[cur_position : cur_position + seq_length])
+        )
+        print(
+            "%s%s\n"
+            % (
+                predicted_title,
+                predicted_state[cur_position : cur_position + seq_length],
+            )
+        )
+
+        if len(emissions) < (cur_position + seq_length):
+            break
+
+        cur_position += seq_length
diff --git a/code/lib/Bio/HMM/__init__.py b/code/lib/Bio/HMM/__init__.py
new file mode 100644
index 0000000..a477108
--- /dev/null
+++ b/code/lib/Bio/HMM/__init__.py
@@ -0,0 +1,5 @@
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""A selection of Hidden Markov Model code."""
diff --git a/code/lib/Bio/HMM/__pycache__/DynamicProgramming.cpython-37.pyc b/code/lib/Bio/HMM/__pycache__/DynamicProgramming.cpython-37.pyc
new file mode 100644
index 0000000..b9f86c9
Binary files /dev/null and b/code/lib/Bio/HMM/__pycache__/DynamicProgramming.cpython-37.pyc differ
diff --git a/code/lib/Bio/HMM/__pycache__/MarkovModel.cpython-37.pyc b/code/lib/Bio/HMM/__pycache__/MarkovModel.cpython-37.pyc
new file mode 100644
index 0000000..3d527bd
Binary files /dev/null and b/code/lib/Bio/HMM/__pycache__/MarkovModel.cpython-37.pyc differ
diff --git a/code/lib/Bio/HMM/__pycache__/Trainer.cpython-37.pyc b/code/lib/Bio/HMM/__pycache__/Trainer.cpython-37.pyc
new file mode 100644
index 0000000..89eb7b5
Binary files /dev/null and b/code/lib/Bio/HMM/__pycache__/Trainer.cpython-37.pyc differ
diff --git a/code/lib/Bio/HMM/__pycache__/Utilities.cpython-37.pyc b/code/lib/Bio/HMM/__pycache__/Utilities.cpython-37.pyc
new file mode 100644
index 0000000..6a1bd24
Binary files /dev/null and b/code/lib/Bio/HMM/__pycache__/Utilities.cpython-37.pyc differ
diff --git a/code/lib/Bio/HMM/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/HMM/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..50bf067
Binary files /dev/null and b/code/lib/Bio/HMM/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/KEGG/Compound/__init__.py b/code/lib/Bio/KEGG/Compound/__init__.py
new file mode 100644
index 0000000..000291a
--- /dev/null
+++ b/code/lib/Bio/KEGG/Compound/__init__.py
@@ -0,0 +1,175 @@
+# Copyright 2001 by Tarjei Mikkelsen.  All rights reserved.
+# Copyright 2007 by Michiel de Hoon.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Code to work with the KEGG Ligand/Compound database.
+
+Functions:
+ - parse - Returns an iterator giving Record objects.
+
+Classes:
+ - Record - A representation of a KEGG Ligand/Compound.
+"""
+
+
+from Bio.KEGG import _default_wrap, _struct_wrap, _wrap_kegg, _write_kegg
+
+
+# Set up line wrapping rules (see Bio.KEGG._wrap_kegg)
+name_wrap = [0, "", (" ", "$", 1, 1), ("-", "$", 1, 1)]
+id_wrap = _default_wrap
+struct_wrap = _struct_wrap
+
+
+class Record:
+    """Holds info from a KEGG Ligand/Compound record.
+
+    Attributes:
+     - entry       The entry identifier.
+     - name        A list of the compund names.
+     - formula     The chemical formula for the compound
+     - mass        The molecular weight for the compound
+     - pathway     A list of 3-tuples: ('PATH', pathway id, pathway)
+     - enzyme      A list of the EC numbers.
+     - structures  A list of 2-tuples: (database, list of struct ids)
+     - dblinks     A list of 2-tuples: (database, list of link ids)
+
+    """
+
+    def __init__(self):
+        """Initialize as new record."""
+        self.entry = ""
+        self.name = []
+        self.formula = ""
+        self.mass = ""
+        self.pathway = []
+        self.enzyme = []
+        self.structures = []
+        self.dblinks = []
+
+    def __str__(self):
+        """Return a string representation of this Record."""
+        return (
+            self._entry()
+            + self._name()
+            + self._formula()
+            + self._mass()
+            + self._pathway()
+            + self._enzyme()
+            + self._structures()
+            + self._dblinks()
+            + "///"
+        )
+
+    def _entry(self):
+        return _write_kegg("ENTRY", [self.entry])
+
+    def _name(self):
+        return _write_kegg(
+            "NAME", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.name]
+        )
+
+    def _formula(self):
+        return _write_kegg("FORMULA", [self.formula])
+
+    def _mass(self):
+        return _write_kegg("MASS", [self.mass])
+
+    def _pathway(self):
+        s = []
+        for entry in self.pathway:
+            s.append(entry[0] + "  " + entry[1])
+        return _write_kegg("PATHWAY", [_wrap_kegg(l, wrap_rule=id_wrap(16)) for l in s])
+
+    def _enzyme(self):
+        return _write_kegg(
+            "ENZYME", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.enzyme]
+        )
+
+    def _structures(self):
+        s = []
+        for entry in self.structures:
+            s.append(entry[0] + ": " + "  ".join(entry[1]) + "  ")
+        return _write_kegg(
+            "STRUCTURES", [_wrap_kegg(l, wrap_rule=struct_wrap(5)) for l in s]
+        )
+
+    def _dblinks(self):
+        s = []
+        for entry in self.dblinks:
+            s.append(entry[0] + ": " + " ".join(entry[1]))
+        return _write_kegg("DBLINKS", [_wrap_kegg(l, wrap_rule=id_wrap(9)) for l in s])
+
+
+def parse(handle):
+    """Parse a KEGG Ligan/Compound file, returning Record objects.
+
+    This is an iterator function, typically used in a for loop.  For
+    example, using one of the example KEGG files in the Biopython
+    test suite,
+
+    >>> with open("KEGG/compound.sample") as handle:
+    ...     for record in parse(handle):
+    ...         print("%s %s" % (record.entry, record.name[0]))
+    ...
+    C00023 Iron
+    C00017 Protein
+    C00099 beta-Alanine
+    C00294 Inosine
+    C00298 Trypsin
+    C00348 all-trans-Undecaprenyl phosphate
+    C00349 2-Methyl-3-oxopropanoate
+    C01386 NH2Mec
+
+    """
+    record = Record()
+    for line in handle:
+        if line[:3] == "///":
+            yield record
+            record = Record()
+            continue
+        if line[:12] != "            ":
+            keyword = line[:12]
+        data = line[12:].strip()
+        if keyword == "ENTRY       ":
+            words = data.split()
+            record.entry = words[0]
+        elif keyword == "NAME        ":
+            data = data.strip(";")
+            record.name.append(data)
+        elif keyword == "ENZYME      ":
+            while data:
+                column = data[:16]
+                data = data[16:]
+                enzyme = column.strip()
+                record.enzyme.append(enzyme)
+        elif keyword == "PATHWAY     ":
+            map, name = data.split("  ")
+            pathway = ("PATH", map, name)
+            record.pathway.append(pathway)
+        elif keyword == "FORMULA     ":
+            record.formula = data
+        elif keyword == "MASS        ":
+            record.mass = data
+        elif keyword == "DBLINKS     ":
+            if ":" in data:
+                key, values = data.split(":")
+                values = values.split()
+                row = (key, values)
+                record.dblinks.append(row)
+            else:
+                row = record.dblinks[-1]
+                key, values = row
+                values.extend(data.split())
+                row = key, values
+                record.dblinks[-1] = row
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/KEGG/Compound/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/KEGG/Compound/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..7d9b138
Binary files /dev/null and b/code/lib/Bio/KEGG/Compound/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/KEGG/Enzyme/__init__.py b/code/lib/Bio/KEGG/Enzyme/__init__.py
new file mode 100644
index 0000000..bb5bb7c
--- /dev/null
+++ b/code/lib/Bio/KEGG/Enzyme/__init__.py
@@ -0,0 +1,328 @@
+# Copyright 2001 by Tarjei Mikkelsen.  All rights reserved.
+# Copyright 2007 by Michiel de Hoon.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Code to work with the KEGG Enzyme database.
+
+Functions:
+ - parse - Returns an iterator giving Record objects.
+
+Classes:
+ - Record - Holds the information from a KEGG Enzyme record.
+"""
+
+
+from Bio.KEGG import _default_wrap, _struct_wrap, _wrap_kegg, _write_kegg
+
+
+# Set up line wrapping rules (see Bio.KEGG._wrap_kegg)
+rxn_wrap = [
+    0,
+    "",
+    (" + ", "", 1, 1),
+    (" = ", "", 1, 1),
+    (" ", "$", 1, 1),
+    ("-", "$", 1, 1),
+]
+name_wrap = [0, "", (" ", "$", 1, 1), ("-", "$", 1, 1)]
+id_wrap = _default_wrap
+struct_wrap = _struct_wrap
+
+
+class Record:
+    """Holds info from a KEGG Enzyme record.
+
+    Attributes:
+     - entry       The EC number (withou the 'EC ').
+     - name        A list of the enzyme names.
+     - classname   A list of the classification terms.
+     - sysname     The systematic name of the enzyme.
+     - reaction    A list of the reaction description strings.
+     - substrate   A list of the substrates.
+     - product     A list of the products.
+     - inhibitor   A list of the inhibitors.
+     - cofactor    A list of the cofactors.
+     - effector    A list of the effectors.
+     - comment     A list of the comment strings.
+     - pathway     A list of 3-tuples: (database, id, pathway)
+     - genes       A list of 2-tuples: (organism, list of gene ids)
+     - disease     A list of 3-tuples: (database, id, disease)
+     - structures  A list of 2-tuples: (database, list of struct ids)
+     - dblinks     A list of 2-tuples: (database, list of db ids)
+
+    """
+
+    def __init__(self):
+        """Initialize a new Record."""
+        self.entry = ""
+        self.name = []
+        self.classname = []
+        self.sysname = []
+        self.reaction = []
+        self.substrate = []
+        self.product = []
+        self.inhibitor = []
+        self.cofactor = []
+        self.effector = []
+        self.comment = []
+        self.pathway = []
+        self.genes = []
+        self.disease = []
+        self.structures = []
+        self.dblinks = []
+
+    def __str__(self):
+        """Return a string representation of this Record."""
+        return (
+            self._entry()
+            + self._name()
+            + self._classname()
+            + self._sysname()
+            + self._reaction()
+            + self._substrate()
+            + self._product()
+            + self._inhibitor()
+            + self._cofactor()
+            + self._effector()
+            + self._comment()
+            + self._pathway()
+            + self._genes()
+            + self._disease()
+            + self._structures()
+            + self._dblinks()
+            + "///"
+        )
+
+    def _entry(self):
+        return _write_kegg("ENTRY", ["EC " + self.entry])
+
+    def _name(self):
+        return _write_kegg(
+            "NAME", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.name]
+        )
+
+    def _classname(self):
+        return _write_kegg("CLASS", self.classname)
+
+    def _sysname(self):
+        return _write_kegg(
+            "SYSNAME", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.sysname]
+        )
+
+    def _reaction(self):
+        return _write_kegg(
+            "REACTION", [_wrap_kegg(l, wrap_rule=rxn_wrap) for l in self.reaction]
+        )
+
+    def _substrate(self):
+        return _write_kegg(
+            "SUBSTRATE", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.substrate]
+        )
+
+    def _product(self):
+        return _write_kegg(
+            "PRODUCT", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.product]
+        )
+
+    def _inhibitor(self):
+        return _write_kegg(
+            "INHIBITOR", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.inhibitor]
+        )
+
+    def _cofactor(self):
+        return _write_kegg(
+            "COFACTOR", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.cofactor]
+        )
+
+    def _effector(self):
+        return _write_kegg(
+            "EFFECTOR", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.effector]
+        )
+
+    def _comment(self):
+        return _write_kegg(
+            "COMMENT", [_wrap_kegg(l, wrap_rule=id_wrap(0)) for l in self.comment]
+        )
+
+    def _pathway(self):
+        s = []
+        for entry in self.pathway:
+            s.append(entry[0] + ": " + entry[1] + "  " + entry[2])
+        return _write_kegg("PATHWAY", [_wrap_kegg(l, wrap_rule=id_wrap(16)) for l in s])
+
+    def _genes(self):
+        s = []
+        for entry in self.genes:
+            s.append(entry[0] + ": " + " ".join(entry[1]))
+        return _write_kegg("GENES", [_wrap_kegg(l, wrap_rule=id_wrap(5)) for l in s])
+
+    def _disease(self):
+        s = []
+        for entry in self.disease:
+            s.append(entry[0] + ": " + entry[1] + "  " + entry[2])
+        return _write_kegg("DISEASE", [_wrap_kegg(l, wrap_rule=id_wrap(13)) for l in s])
+
+    def _structures(self):
+        s = []
+        for entry in self.structures:
+            s.append(entry[0] + ": " + "  ".join(entry[1]) + "  ")
+        return _write_kegg(
+            "STRUCTURES", [_wrap_kegg(l, wrap_rule=struct_wrap(5)) for l in s]
+        )
+
+    def _dblinks(self):
+        # This is a bit of a cheat that won't work if enzyme entries
+        # have more than one link id per db id. For now, that's not
+        # the case - storing links ids in a list is only to make
+        # this class similar to the Compound.Record class.
+        s = []
+        for entry in self.dblinks:
+            s.append(entry[0] + ": " + "  ".join(entry[1]))
+        return _write_kegg("DBLINKS", s)
+
+
+def parse(handle):
+    """Parse a KEGG Enzyme file, returning Record objects.
+
+    This is an iterator function, typically used in a for loop.  For
+    example, using one of the example KEGG files in the Biopython
+    test suite,
+
+    >>> with open("KEGG/enzyme.sample") as handle:
+    ...     for record in parse(handle):
+    ...         print("%s %s" % (record.entry, record.name[0]))
+    ...
+    1.1.1.1 alcohol dehydrogenase
+    1.1.1.62 17beta-estradiol 17-dehydrogenase
+    1.1.1.68 Transferred to 1.5.1.20
+    1.6.5.3 NADH:ubiquinone reductase (H+-translocating)
+    1.14.13.28 3,9-dihydroxypterocarpan 6a-monooxygenase
+    2.4.1.68 glycoprotein 6-alpha-L-fucosyltransferase
+    3.1.1.6 acetylesterase
+    2.7.2.1 acetate kinase
+
+    """
+    record = Record()
+    for line in handle:
+        if line[:3] == "///":
+            yield record
+            record = Record()
+            continue
+        if line[:12] != "            ":
+            keyword = line[:12]
+        data = line[12:].strip()
+        if keyword == "ENTRY       ":
+            words = data.split()
+            record.entry = words[1]
+        elif keyword == "CLASS       ":
+            record.classname.append(data)
+        elif keyword == "COFACTOR    ":
+            record.cofactor.append(data)
+        elif keyword == "COMMENT     ":
+            record.comment.append(data)
+        elif keyword == "DBLINKS     ":
+            if ":" in data:
+                key, values = data.split(":")
+                values = values.split()
+                row = (key, values)
+                record.dblinks.append(row)
+            else:
+                row = record.dblinks[-1]
+                key, values = row
+                values.extend(data.split())
+                row = key, values
+                record.dblinks[-1] = row
+        elif keyword == "DISEASE     ":
+            if ":" in data:
+                database, data = data.split(":")
+                number, name = data.split(None, 1)
+                row = (database, number, name)
+                record.disease.append(row)
+            else:
+                row = record.disease[-1]
+                database, number, name = row
+                name = name + " " + data
+                row = database, number, name
+                record.disease[-1] = row
+        elif keyword == "EFFECTOR    ":
+            record.effector.append(data.strip(";"))
+        elif keyword == "GENES       ":
+            if data[3:5] == ": " or data[4:6] == ": ":
+                key, values = data.split(":", 1)
+                values = [value.split("(")[0] for value in values.split()]
+                row = (key, values)
+                record.genes.append(row)
+            else:
+                row = record.genes[-1]
+                key, values = row
+                for value in data.split():
+                    value = value.split("(")[0]
+                    values.append(value)
+                row = key, values
+                record.genes[-1] = row
+        elif keyword == "INHIBITOR   ":
+            record.inhibitor.append(data.strip(";"))
+        elif keyword == "NAME        ":
+            record.name.append(data.strip(";"))
+        elif keyword == "PATHWAY     ":
+            if data[:5] == "PATH:":
+                _, map_num, name = data.split(None, 2)
+                pathway = ("PATH", map_num, name)
+                record.pathway.append(pathway)
+            else:
+                ec_num, name = data.split(None, 1)
+                pathway = "PATH", ec_num, name
+                record.pathway.append(pathway)
+        elif keyword == "PRODUCT     ":
+            record.product.append(data.strip(";"))
+        elif keyword == "REACTION    ":
+            record.reaction.append(data.strip(";"))
+        elif keyword == "STRUCTURES  ":
+            if data[:4] == "PDB:":
+                database = data[:3]
+                accessions = data[4:].split()
+                row = (database, accessions)
+                record.structures.append(row)
+            else:
+                row = record.structures[-1]
+                database, accessions = row
+                accessions.extend(data.split())
+                row = (database, accessions)
+                record.structures[-1] = row
+        elif keyword == "SUBSTRATE   ":
+            record.substrate.append(data.strip(";"))
+        elif keyword == "SYSNAME     ":
+            record.sysname.append(data.strip(";"))
+
+
+def read(handle):
+    """Parse a KEGG Enzyme file with exactly one entry.
+
+    If the handle contains no records, or more than one record,
+    an exception is raised.  For example:
+
+    >>> with open("KEGG/enzyme.new") as handle:
+    ...     record = read(handle)
+    ...     print("%s %s" % (record.entry, record.name[0]))
+    ...
+    6.2.1.25 benzoate---CoA ligase
+    """
+    records = parse(handle)
+    try:
+        record = next(records)
+    except StopIteration:
+        raise ValueError("No records found in handle") from None
+    try:
+        next(records)
+        raise ValueError("More than one record found in handle")
+    except StopIteration:
+        pass
+    return record
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/KEGG/Enzyme/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/KEGG/Enzyme/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..b6c2c5f
Binary files /dev/null and b/code/lib/Bio/KEGG/Enzyme/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/KEGG/Gene/__init__.py b/code/lib/Bio/KEGG/Gene/__init__.py
new file mode 100644
index 0000000..8ffe5c2
--- /dev/null
+++ b/code/lib/Bio/KEGG/Gene/__init__.py
@@ -0,0 +1,140 @@
+# Copyright 2017 by Kozo Nishida.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Code to work with the KEGG Gene database.
+
+Functions:
+- parse - Returns an iterator giving Record objects.
+
+Classes:
+- Record - A representation of a KEGG Gene.
+
+"""
+
+
+from Bio.KEGG import _default_wrap, _wrap_kegg, _write_kegg
+
+
+# Set up line wrapping rules (see Bio.KEGG._wrap_kegg)
+name_wrap = [0, "", (" ", "$", 1, 1), ("-", "$", 1, 1)]
+id_wrap = _default_wrap
+
+
+class Record:
+    """Holds info from a KEGG Gene record.
+
+    Attributes:
+     - entry       The entry identifier.
+     - name        A list of the gene names.
+     - definition  The definition for the gene.
+     - orthology   A list of 2-tuples: (orthology id, role)
+     - organism    A tuple: (organism id, organism)
+     - position    The position for the gene
+     - motif       A list of 2-tuples: (database, list of link ids)
+     - dblinks     A list of 2-tuples: (database, list of link ids)
+
+    """
+
+    def __init__(self):
+        """Initialize new record."""
+        self.entry = ""
+        self.name = []
+        self.definition = ""
+        self.orthology = []
+        self.organism = ""
+        self.position = ""
+        self.motif = []
+        self.dblinks = []
+
+    def __str__(self):
+        """Return a string representation of this Record."""
+        return self._entry() + self._name() + self._dblinks() + "///"
+
+    def _entry(self):
+        return _write_kegg("ENTRY", [self.entry])
+
+    def _name(self):
+        return _write_kegg(
+            "NAME", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.name]
+        )
+
+    def _definition(self):
+        return _write_kegg("DEFINITION", [self.definition])
+
+    def _dblinks(self):
+        s = []
+        for entry in self.dblinks:
+            s.append(entry[0] + ": " + " ".join(entry[1]))
+        return _write_kegg("DBLINKS", [_wrap_kegg(l, wrap_rule=id_wrap(9)) for l in s])
+
+
+def parse(handle):
+    """Parse a KEGG Gene file, returning Record objects.
+
+    This is an iterator function, typically used in a for loop.  For
+    example, using one of the example KEGG files in the Biopython
+    test suite,
+
+    >>> with open("KEGG/gene.sample") as handle:
+    ...     for record in parse(handle):
+    ...         print("%s %s" % (record.entry, record.name[0]))
+    ...
+    b1174 minE
+    b1175 minD
+
+
+    """
+    record = Record()
+    for line in handle:
+        if line[:3] == "///":
+            yield record
+            record = Record()
+            continue
+        if line[:12] != "            ":
+            keyword = line[:12]
+        data = line[12:].strip()
+        if keyword == "ENTRY       ":
+            words = data.split()
+            record.entry = words[0]
+        elif keyword == "NAME        ":
+            data = data.strip(";")
+            record.name.append(data)
+        elif keyword == "DEFINITION  ":
+            record.definition = data
+        elif keyword == "ORTHOLOGY   ":
+            id, name = data.split("  ")
+            orthology = (id, name)
+            record.orthology.append(orthology)
+        elif keyword == "ORGANISM    ":
+            id, name = data.split("  ")
+            organism = (id, name)
+            record.organism = organism
+        elif keyword == "POSITION    ":
+            record.position = data
+        elif keyword == "MOTIF       ":
+            key, values = data.split(": ")
+            values = values.split()
+            row = (key, values)
+            record.motif.append(row)
+        elif keyword == "DBLINKS     ":
+            if ":" in data:
+                key, values = data.split(": ")
+                values = values.split()
+                row = (key, values)
+                record.dblinks.append(row)
+            else:
+                row = record.dblinks[-1]
+                key, values = row
+                values.extend(data.split())
+                row = key, values
+                record.dblinks[-1] = row
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/KEGG/Gene/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/KEGG/Gene/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..efa0935
Binary files /dev/null and b/code/lib/Bio/KEGG/Gene/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/KEGG/KGML/KGML_parser.py b/code/lib/Bio/KEGG/KGML/KGML_parser.py
new file mode 100644
index 0000000..6405ce3
--- /dev/null
+++ b/code/lib/Bio/KEGG/KGML/KGML_parser.py
@@ -0,0 +1,189 @@
+# Copyright 2013 by Leighton Pritchard.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes and functions to parse a KGML pathway map.
+
+The KGML pathway map is parsed into the object structure defined in
+KGML_Pathway.py in this module.
+
+Classes:
+ - KGMLParser - Parses KGML file
+
+Functions:
+ - read - Returns a single Pathway object, using KGMLParser internally
+
+"""
+
+from xml.etree import ElementTree
+
+from io import StringIO
+
+from Bio.KEGG.KGML.KGML_pathway import Component, Entry, Graphics
+from Bio.KEGG.KGML.KGML_pathway import Pathway, Reaction, Relation
+
+
+def read(handle):
+    """Parse a single KEGG Pathway from given file handle.
+
+    Returns a single Pathway object.  There should be one and only
+    one pathway in each file, but there may well be pathological
+    examples out there.
+    """
+    pathways = parse(handle)
+    try:
+        pathway = next(pathways)
+    except StopIteration:
+        raise ValueError("No pathways found in handle") from None
+    try:
+        next(pathways)
+        raise ValueError("More than one pathway found in handle")
+    except StopIteration:
+        pass
+    return pathway
+
+
+def parse(handle):
+    """Return an iterator over Pathway elements.
+
+    Arguments:
+     - handle - file handle to a KGML file for parsing, or a KGML string
+
+    This is a generator for the return of multiple Pathway objects.
+
+    """
+    # Check handle
+    try:
+        handle.read(0)
+    except AttributeError:
+        try:
+            handle = StringIO(handle)
+        except TypeError:
+            raise TypeError(
+                "An XML-containing handle or an XML string must be provided"
+            ) from None
+    # Parse XML and return each Pathway
+    for event, elem in ElementTree.iterparse(handle, events=("start", "end")):
+        if event == "end" and elem.tag == "pathway":
+            yield KGMLParser(elem).parse()
+            elem.clear()
+
+
+class KGMLParser:
+    """Parses a KGML XML Pathway entry into a Pathway object.
+
+    Example: Read and parse large metabolism file
+
+    >>> from Bio.KEGG.KGML.KGML_parser import read
+    >>> pathway = read(open('KEGG/ko01100.xml', 'r'))
+    >>> print(len(pathway.entries))
+    3628
+    >>> print(len(pathway.reactions))
+    1672
+    >>> print(len(pathway.maps))
+    149
+
+    >>> pathway = read(open('KEGG/ko00010.xml', 'r'))
+    >>> print(pathway) #doctest: +NORMALIZE_WHITESPACE
+    Pathway: Glycolysis / Gluconeogenesis
+    KEGG ID: path:ko00010
+    Image file: http://www.kegg.jp/kegg/pathway/ko/ko00010.png
+    Organism: ko
+    Entries: 99
+    Entry types:
+        ortholog: 61
+        compound: 31
+        map: 7
+
+    """
+
+    def __init__(self, elem):
+        """Initialize the class."""
+        self.entry = elem
+
+    def parse(self):
+        """Parse the input elements."""
+
+        def _parse_pathway(attrib):
+            for k, v in attrib.items():
+                self.pathway.__setattr__(k, v)
+
+        def _parse_entry(element):
+            new_entry = Entry()
+            for k, v in element.attrib.items():
+                new_entry.__setattr__(k, v)
+            for subelement in element:
+                if subelement.tag == "graphics":
+                    _parse_graphics(subelement, new_entry)
+                elif subelement.tag == "component":
+                    _parse_component(subelement, new_entry)
+            self.pathway.add_entry(new_entry)
+
+        def _parse_graphics(element, entry):
+            new_graphics = Graphics(entry)
+            for k, v in element.attrib.items():
+                new_graphics.__setattr__(k, v)
+            entry.add_graphics(new_graphics)
+
+        def _parse_component(element, entry):
+            new_component = Component(entry)
+            for k, v in element.attrib.items():
+                new_component.__setattr__(k, v)
+            entry.add_component(new_component)
+
+        def _parse_reaction(element):
+            new_reaction = Reaction()
+            for k, v in element.attrib.items():
+                new_reaction.__setattr__(k, v)
+            for subelement in element:
+                if subelement.tag == "substrate":
+                    new_reaction.add_substrate(int(subelement.attrib["id"]))
+                elif subelement.tag == "product":
+                    new_reaction.add_product(int(subelement.attrib["id"]))
+            self.pathway.add_reaction(new_reaction)
+
+        def _parse_relation(element):
+            new_relation = Relation()
+            new_relation.entry1 = int(element.attrib["entry1"])
+            new_relation.entry2 = int(element.attrib["entry2"])
+            new_relation.type = element.attrib["type"]
+            for subtype in element:
+                name, value = subtype.attrib["name"], subtype.attrib["value"]
+                if name in ("compound", "hidden compound"):
+                    new_relation.subtypes.append((name, int(value)))
+                else:
+                    new_relation.subtypes.append((name, value))
+            self.pathway.add_relation(new_relation)
+
+        # ==========
+        # Initialize Pathway
+        self.pathway = Pathway()
+        # Get information about the pathway itself
+        _parse_pathway(self.entry.attrib)
+        for element in self.entry:
+            if element.tag == "entry":
+                _parse_entry(element)
+            elif element.tag == "reaction":
+                _parse_reaction(element)
+            elif element.tag == "relation":
+                _parse_relation(element)
+            # Parsing of some elements not implemented - no examples yet
+            else:
+                # This should warn us of any unimplemented tags
+                import warnings
+                from Bio import BiopythonParserWarning
+
+                warnings.warn(
+                    "Warning: tag %s not implemented in parser" % element.tag,
+                    BiopythonParserWarning,
+                )
+        return self.pathway
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/KEGG/KGML/KGML_pathway.py b/code/lib/Bio/KEGG/KGML/KGML_pathway.py
new file mode 100644
index 0000000..12dd8aa
--- /dev/null
+++ b/code/lib/Bio/KEGG/KGML/KGML_pathway.py
@@ -0,0 +1,859 @@
+# Copyright 2013 by Leighton Pritchard.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes to represent a KGML Pathway Map.
+
+The KGML definition is as of release KGML v0.7.2
+(http://www.kegg.jp/kegg/xml/docs/)
+
+Classes:
+ - Pathway - Specifies graph information for the pathway map
+ - Relation - Specifies a relationship between two proteins or KOs,
+   or protein and compound. There is an implied direction to the
+   relationship in some cases.
+ - Reaction - A specific chemical reaction between a substrate and
+   a product.
+ - Entry - A node in the pathway graph
+ - Graphics - Entry subelement describing its visual representation
+
+"""
+
+import time
+from itertools import chain
+from xml.dom import minidom
+import xml.etree.ElementTree as ET
+
+
+# Pathway
+class Pathway:
+    """Represents a KGML pathway from KEGG.
+
+    Specifies graph information for the pathway map, as described in
+    release KGML v0.7.2 (http://www.kegg.jp/kegg/xml/docs/)
+
+    Attributes:
+     - name - KEGGID of the pathway map
+     - org - ko/ec/[org prefix]
+     - number - map number (integer)
+     - title - the map title
+     - image - URL of the image map for the pathway
+     - link - URL of information about the pathway
+     - entries - Dictionary of entries in the pathway, keyed by node ID
+     - reactions - Set of reactions in the pathway
+
+    The name attribute has a restricted format, so we make it a property and
+    enforce the formatting.
+
+    The Pathway object is the only allowed route for adding/removing
+    Entry, Reaction, or Relation elements.
+
+    Entries are held in a dictionary and keyed by the node ID for the
+    pathway graph - this allows for ready access via the Reaction/Relation
+    etc. elements.  Entries must be added before reference by any other
+    element.
+
+    Reactions are held in a dictionary, keyed by node ID for the path.
+    The elements referred to in the reaction must be added before the
+    reaction itself.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self._name = ""
+        self.org = ""
+        self._number = None
+        self.title = ""
+        self.image = ""
+        self.link = ""
+        self.entries = {}
+        self._reactions = {}
+        self._relations = set()
+
+    def get_KGML(self):
+        """Return the pathway as a string in prettified KGML format."""
+        header = "\n".join(
+            [
+                '<?xml version="1.0"?>',
+                "<!DOCTYPE pathway SYSTEM "
+                '"http://www.genome.jp/kegg/xml/'
+                'KGML_v0.7.2_.dtd">',
+                "<!-- Created by KGML_Pathway.py %s -->" % time.asctime(),
+            ]
+        )
+        rough_xml = header + ET.tostring(self.element, "utf-8").decode()
+        reparsed = minidom.parseString(rough_xml)
+        return reparsed.toprettyxml(indent="  ")
+
+    def add_entry(self, entry):
+        """Add an Entry element to the pathway."""
+        # We insist that the node ID is an integer
+        if not isinstance(entry.id, int):
+            raise TypeError(
+                "Node ID must be an integer, got %s (%s)" % (type(entry.id), entry.id)
+            )
+        entry._pathway = self  # Let the entry know about the pathway
+        self.entries[entry.id] = entry
+
+    def remove_entry(self, entry):
+        """Remove an Entry element from the pathway."""
+        if not isinstance(entry.id, int):
+            raise TypeError(
+                "Node ID must be an integer, got %s (%s)" % (type(entry.id), entry.id)
+            )
+        # We need to remove the entry from any other elements that may
+        # contain it, which means removing those elements
+        # TODO
+        del self.entries[entry.id]
+
+    def add_reaction(self, reaction):
+        """Add a Reaction element to the pathway."""
+        # We insist that the node ID is an integer and corresponds to an entry
+        if not isinstance(reaction.id, int):
+            raise ValueError(
+                "Node ID must be an integer, got %s (%s)"
+                % (type(reaction.id), reaction.id)
+            )
+        if reaction.id not in self.entries:
+            raise ValueError("Reaction ID %d has no corresponding entry" % reaction.id)
+        reaction._pathway = self  # Let the reaction know about the pathway
+        self._reactions[reaction.id] = reaction
+
+    def remove_reaction(self, reaction):
+        """Remove a Reaction element from the pathway."""
+        if not isinstance(reaction.id, int):
+            raise TypeError(
+                "Node ID must be an integer, got %s (%s)"
+                % (type(reaction.id), reaction.id)
+            )
+        # We need to remove the reaction from any other elements that may
+        # contain it, which means removing those elements
+        # TODO
+        del self._reactions[reaction.id]
+
+    def add_relation(self, relation):
+        """Add a Relation element to the pathway."""
+        relation._pathway = self  # Let the reaction know about the pathway
+        self._relations.add(relation)
+
+    def remove_relation(self, relation):
+        """Remove a Relation element from the pathway."""
+        self._relations.remove(relation)
+
+    def __str__(self):
+        """Return a readable summary description string."""
+        outstr = [
+            "Pathway: %s" % self.title,
+            "KEGG ID: %s" % self.name,
+            "Image file: %s" % self.image,
+            "Organism: %s" % self.org,
+            "Entries: %d" % len(self.entries),
+            "Entry types:",
+        ]
+        for t in ["ortholog", "enzyme", "reaction", "gene", "group", "compound", "map"]:
+            etype = [e for e in self.entries.values() if e.type == t]
+            if len(etype):
+                outstr.append("\t%s: %d" % (t, len(etype)))
+        return "\n".join(outstr) + "\n"
+
+    # Assert correct formatting of the pathway name, and other attributes
+    def _getname(self):
+        return self._name
+
+    def _setname(self, value):
+        if not value.startswith("path:"):
+            raise ValueError("Pathway name should begin with 'path:', got %s" % value)
+        self._name = value
+
+    def _delname(self):
+        del self._name
+
+    name = property(_getname, _setname, _delname, "The KEGGID for the pathway map.")
+
+    def _getnumber(self):
+        return self._number
+
+    def _setnumber(self, value):
+        self._number = int(value)
+
+    def _delnumber(self):
+        del self._number
+
+    number = property(_getnumber, _setnumber, _delnumber, "The KEGG map number.")
+
+    @property
+    def compounds(self):
+        """Get a list of entries of type compound."""
+        return [e for e in self.entries.values() if e.type == "compound"]
+
+    @property
+    def maps(self):
+        """Get a list of entries of type map."""
+        return [e for e in self.entries.values() if e.type == "map"]
+
+    @property
+    def orthologs(self):
+        """Get a list of entries of type ortholog."""
+        return [e for e in self.entries.values() if e.type == "ortholog"]
+
+    @property
+    def genes(self):
+        """Get a list of entries of type gene."""
+        return [e for e in self.entries.values() if e.type == "gene"]
+
+    @property
+    def reactions(self):
+        """Get a list of reactions in the pathway."""
+        return self._reactions.values()
+
+    @property
+    def reaction_entries(self):
+        """List of entries corresponding to each reaction in the pathway."""
+        return [self.entries[i] for i in self._reactions]
+
+    @property
+    def relations(self):
+        """Get a list of relations in the pathway."""
+        return list(self._relations)
+
+    @property
+    def element(self):
+        """Return the Pathway as a valid KGML element."""
+        # The root is this Pathway element
+        pathway = ET.Element("pathway")
+        pathway.attrib = {
+            "name": self._name,
+            "org": self.org,
+            "number": str(self._number),
+            "title": self.title,
+            "image": self.image,
+            "link": self.link,
+        }
+        # We add the Entries in node ID order
+        for eid, entry in sorted(self.entries.items()):
+            pathway.append(entry.element)
+        # Next we add Relations
+        for relation in self._relations:
+            pathway.append(relation.element)
+        for eid, reaction in sorted(self._reactions.items()):
+            pathway.append(reaction.element)
+        return pathway
+
+    @property
+    def bounds(self):
+        """Coordinate bounds for all Graphics elements in the Pathway.
+
+        Returns the [(xmin, ymin), (xmax, ymax)] coordinates for all
+        Graphics elements in the Pathway
+        """
+        xlist, ylist = [], []
+        for b in [g.bounds for g in self.entries.values()]:
+            xlist.extend([b[0][0], b[1][0]])
+            ylist.extend([b[0][1], b[1][1]])
+        return [(min(xlist), min(ylist)), (max(xlist), max(ylist))]
+
+
+# Entry
+class Entry:
+    """Represent an Entry from KGML.
+
+    Each Entry element is a node in the pathway graph, as described in
+    release KGML v0.7.2 (http://www.kegg.jp/kegg/xml/docs/)
+
+    Attributes:
+     - id - The ID of the entry in the pathway map (integer)
+     - names - List of KEGG IDs for the entry
+     - type - The type of the entry
+     - link - URL of information about the entry
+     - reaction - List of KEGG IDs of the corresponding reactions
+       (integer)
+     - graphics -    List of Graphics objects describing the Entry's visual
+       representation
+     - components - List of component node ID for this Entry ('group')
+     - alt - List of alternate names for the Entry
+
+    NOTE: The alt attribute represents a subelement of the substrate and
+    product elements in the KGML file
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self._id = None
+        self._names = []
+        self.type = ""
+        self.image = ""
+        self.link = ""
+        self.graphics = []
+        self.components = set()
+        self.alt = []
+        self._pathway = None
+        self._reactions = []
+
+    def __str__(self):
+        """Return readable descriptive string."""
+        outstr = [
+            "Entry node ID: %d" % self.id,
+            "Names: %s" % self.name,
+            "Type: %s" % self.type,
+            "Components: %s" % self.components,
+            "Reactions: %s" % self.reaction,
+            "Graphics elements: %d %s" % (len(self.graphics), self.graphics),
+        ]
+        return "\n".join(outstr) + "\n"
+
+    def add_component(self, element):
+        """Add an element to the entry.
+
+        If the Entry is already part of a pathway, make sure
+        the component already exists.
+        """
+        if self._pathway is not None:
+            if element.id not in self._pathway.entries:
+                raise ValueError(
+                    "Component %s is not an entry in the pathway" % element.id
+                )
+        self.components.add(element)
+
+    def remove_component(self, value):
+        """Remove the entry with the passed ID from the group."""
+        self.components.remove(value)
+
+    def add_graphics(self, entry):
+        """Add the Graphics entry."""
+        self.graphics.append(entry)
+
+    def remove_graphics(self, entry):
+        """Remove the Graphics entry with the passed ID from the group."""
+        self.graphics.remove(entry)
+
+    # Names may be given as a space-separated list of KEGG identifiers
+    def _getname(self):
+        return " ".join(self._names)
+
+    def _setname(self, value):
+        self._names = value.split()
+
+    def _delname(self):
+        self._names = []
+
+    name = property(
+        _getname, _setname, _delname, "List of KEGG identifiers for the Entry."
+    )
+
+    # Reactions may be given as a space-separated list of KEGG identifiers
+    def _getreaction(self):
+        return " ".join(self._reactions)
+
+    def _setreaction(self, value):
+        self._reactions = value.split()
+
+    def _delreaction(self):
+        self._reactions = []
+
+    reaction = property(
+        _getreaction,
+        _setreaction,
+        _delreaction,
+        "List of reaction KEGG IDs for this Entry.",
+    )
+
+    # We make sure that the node ID is an integer
+    def _getid(self):
+        return self._id
+
+    def _setid(self, value):
+        self._id = int(value)
+
+    def _delid(self):
+        del self._id
+
+    id = property(_getid, _setid, _delid, "The pathway graph node ID for the Entry.")
+
+    @property
+    def element(self):
+        """Return the Entry as a valid KGML element."""
+        # The root is this Entry element
+        entry = ET.Element("entry")
+        entry.attrib = {
+            "id": str(self._id),
+            "name": self.name,
+            "link": self.link,
+            "type": self.type,
+        }
+        if len(self._reactions):
+            entry.attrib["reaction"] = self.reaction
+        if len(self.graphics):
+            for g in self.graphics:
+                entry.append(g.element)
+        if len(self.components):
+            for c in self.components:
+                entry.append(c.element)
+        return entry
+
+    @property
+    def bounds(self):
+        """Coordinate bounds for all Graphics elements in the Entry.
+
+        Return the [(xmin, ymin), (xmax, ymax)] co-ordinates for the Entry
+        Graphics elements.
+        """
+        xlist, ylist = [], []
+        for b in [g.bounds for g in self.graphics]:
+            xlist.extend([b[0][0], b[1][0]])
+            ylist.extend([b[0][1], b[1][1]])
+        return [(min(xlist), min(ylist)), (max(xlist), max(ylist))]
+
+    @property
+    def is_reactant(self):
+        """Return true if this Entry participates in any reaction in its parent pathway."""
+        for rxn in self._pathway.reactions:
+            if self._id in rxn.reactant_ids:
+                return True
+        return False
+
+
+# Component
+class Component:
+    """An Entry subelement used to represents a complex node.
+
+    A subelement of the Entry element, used when the Entry is a complex
+    node, as described in release KGML v0.7.2
+    (http://www.kegg.jp/kegg/xml/docs/)
+
+    The Component acts as a collection (with type 'group', and typically
+    its own Graphics subelement), having only an ID.
+    """
+
+    def __init__(self, parent):
+        """Initialize the class."""
+        self._id = None
+        self._parent = parent
+
+    # We make sure that the node ID is an integer
+    def _getid(self):
+        return self._id
+
+    def _setid(self, value):
+        self._id = int(value)
+
+    def _delid(self):
+        del self._id
+
+    id = property(_getid, _setid, _delid, "The pathway graph node ID for the Entry")
+
+    @property
+    def element(self):
+        """Return the Component as a valid KGML element."""
+        # The root is this Component element
+        component = ET.Element("component")
+        component.attrib = {"id": str(self._id)}
+        return component
+
+
+# Graphics
+class Graphics:
+    """An Entry subelement used to represents the visual representation.
+
+    A subelement of Entry, specifying its visual representation, as
+    described in release KGML v0.7.2 (http://www.kegg.jp/kegg/xml/docs/)
+
+    Attributes:
+     - name         Label for the graphics object
+     - x            X-axis position of the object (int)
+     - y            Y-axis position of the object (int)
+     - coords       polyline co-ordinates, list of (int, int) tuples
+     - type         object shape
+     - width        object width (int)
+     - height       object height (int)
+     - fgcolor      object foreground color (hex RGB)
+     - bgcolor      object background color (hex RGB)
+
+    Some attributes are present only for specific graphics types.  For
+    example, line types do not (typically) have a width.
+    We permit non-DTD attributes and attribute settings, such as
+
+    dash         List of ints, describing an on/off pattern for dashes
+
+    """
+
+    def __init__(self, parent):
+        """Initialize the class."""
+        self.name = ""
+        self._x = None
+        self._y = None
+        self._coords = None
+        self.type = ""
+        self._width = None
+        self._height = None
+        self.fgcolor = ""
+        self.bgcolor = ""
+        self._parent = parent
+
+    # We make sure that the XY coordinates, width and height are numbers
+    def _getx(self):
+        return self._x
+
+    def _setx(self, value):
+        self._x = float(value)
+
+    def _delx(self):
+        del self._x
+
+    x = property(_getx, _setx, _delx, "The X coordinate for the graphics element.")
+
+    def _gety(self):
+        return self._y
+
+    def _sety(self, value):
+        self._y = float(value)
+
+    def _dely(self):
+        del self._y
+
+    y = property(_gety, _sety, _dely, "The Y coordinate for the graphics element.")
+
+    def _getwidth(self):
+        return self._width
+
+    def _setwidth(self, value):
+        self._width = float(value)
+
+    def _delwidth(self):
+        del self._width
+
+    width = property(
+        _getwidth, _setwidth, _delwidth, "The width of the graphics element."
+    )
+
+    def _getheight(self):
+        return self._height
+
+    def _setheight(self, value):
+        self._height = float(value)
+
+    def _delheight(self):
+        del self._height
+
+    height = property(
+        _getheight, _setheight, _delheight, "The height of the graphics element."
+    )
+
+    # We make sure that the polyline co-ordinates are integers, too
+    def _getcoords(self):
+        return self._coords
+
+    def _setcoords(self, value):
+        clist = [int(e) for e in value.split(",")]
+        self._coords = [tuple(clist[i : i + 2]) for i in range(0, len(clist), 2)]
+
+    def _delcoords(self):
+        del self._coords
+
+    coords = property(
+        _getcoords,
+        _setcoords,
+        _delcoords,
+        "Polyline coordinates for the graphics element.",
+    )
+
+    # Set default colors
+    def _getfgcolor(self):
+        return self._fgcolor
+
+    def _setfgcolor(self, value):
+        if value == "none":
+            self._fgcolor = "#000000"  # this default defined in KGML spec
+        else:
+            self._fgcolor = value
+
+    def _delfgcolor(self):
+        del self._fgcolor
+
+    fgcolor = property(_getfgcolor, _setfgcolor, _delfgcolor, "Foreground color.")
+
+    def _getbgcolor(self):
+        return self._bgcolor
+
+    def _setbgcolor(self, value):
+        if value == "none":
+            self._bgcolor = "#000000"  # this default defined in KGML spec
+        else:
+            self._bgcolor = value
+
+    def _delbgcolor(self):
+        del self._bgcolor
+
+    bgcolor = property(_getbgcolor, _setbgcolor, _delbgcolor, "Background color.")
+
+    @property
+    def element(self):
+        """Return the Graphics as a valid KGML element."""
+        # The root is this Component element
+        graphics = ET.Element("graphics")
+        if isinstance(self.fgcolor, str):  # Assumes that string is hexstring
+            fghex = self.fgcolor
+        else:  # Assumes ReportLab Color object
+            fghex = "#" + self.fgcolor.hexval()[2:]
+        if isinstance(self.bgcolor, str):  # Assumes that string is hexstring
+            bghex = self.bgcolor
+        else:  # Assumes ReportLab Color object
+            bghex = "#" + self.bgcolor.hexval()[2:]
+        graphics.attrib = {
+            "name": self.name,
+            "type": self.type,
+            "fgcolor": fghex,
+            "bgcolor": bghex,
+        }
+        for (n, attr) in [
+            ("x", "_x"),
+            ("y", "_y"),
+            ("width", "_width"),
+            ("height", "_height"),
+        ]:
+            if getattr(self, attr) is not None:
+                graphics.attrib[n] = str(getattr(self, attr))
+        if self.type == "line":  # Need to write polycoords
+            graphics.attrib["coords"] = ",".join(
+                [str(e) for e in chain.from_iterable(self.coords)]
+            )
+        return graphics
+
+    @property
+    def bounds(self):
+        """Coordinate bounds for the Graphics element.
+
+        Return the bounds of the Graphics object as an [(xmin, ymin),
+        (xmax, ymax)] tuple.  Co-ordinates give the centre of the
+        circle, rectangle, roundrectangle elements, so we have to
+        adjust for the relevant width/height.
+        """
+        if self.type == "line":
+            xlist = [x for x, y in self.coords]
+            ylist = [y for x, y in self.coords]
+            return [(min(xlist), min(ylist)), (max(xlist), max(ylist))]
+        else:
+            return [
+                (self.x - self.width * 0.5, self.y - self.height * 0.5),
+                (self.x + self.width * 0.5, self.y + self.height * 0.5),
+            ]
+
+    @property
+    def centre(self):
+        """Return the centre of the Graphics object as an (x, y) tuple."""
+        return (
+            0.5 * (self.bounds[0][0] + self.bounds[1][0]),
+            0.5 * (self.bounds[0][1] + self.bounds[1][1]),
+        )
+
+
+# Reaction
+class Reaction:
+    """A specific chemical reaction with substrates and products.
+
+    This describes a specific chemical reaction between one or more
+    substrates and one or more products.
+
+    Attributes:
+     - id             Pathway graph node ID of the entry
+     - names          List of KEGG identifier(s) from the REACTION database
+     - type           String: reversible or irreversible
+     - substrate      Entry object of the substrate
+     - product        Entry object of the product
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self._id = None
+        self._names = []
+        self.type = ""
+        self._substrates = set()
+        self._products = set()
+        self._pathway = None
+
+    def __str__(self):
+        """Return an informative human-readable string."""
+        outstr = [
+            "Reaction node ID: %s" % self.id,
+            "Reaction KEGG IDs: %s" % self.name,
+            "Type: %s" % self.type,
+            "Substrates: %s" % ",".join([s.name for s in self.substrates]),
+            "Products: %s" % ",".join([s.name for s in self.products]),
+        ]
+        return "\n".join(outstr) + "\n"
+
+    def add_substrate(self, substrate_id):
+        """Add a substrate, identified by its node ID, to the reaction."""
+        if self._pathway is not None:
+            if int(substrate_id) not in self._pathway.entries:
+                raise ValueError(
+                    "Couldn't add substrate, no node ID %d in Pathway"
+                    % int(substrate_id)
+                )
+        self._substrates.add(substrate_id)
+
+    def add_product(self, product_id):
+        """Add a product, identified by its node ID, to the reaction."""
+        if self._pathway is not None:
+            if int(product_id) not in self._pathway.entries:
+                raise ValueError(
+                    "Couldn't add product, no node ID %d in Pathway" % product_id
+                )
+        self._products.add(int(product_id))
+
+    # The node ID is also the node ID of the Entry that corresponds to the
+    # reaction; we get the corresponding Entry when there is an associated
+    # Pathway
+    def _getid(self):
+        return self._id
+
+    def _setid(self, value):
+        self._id = int(value)
+
+    def _delid(self):
+        del self._id
+
+    id = property(_getid, _setid, _delid, "Node ID for the reaction.")
+
+    # Names may show up as a space-separated list of several KEGG identifiers
+    def _getnames(self):
+        return " ".join(self._names)
+
+    def _setnames(self, value):
+        self._names.extend(value.split())
+
+    def _delnames(self):
+        del self.names
+
+    name = property(
+        _getnames, _setnames, _delnames, "List of KEGG identifiers for the reaction."
+    )
+
+    # products and substrates are read-only properties, returning lists
+    # of Entry objects
+    @property
+    def substrates(self):
+        """Return list of substrate Entry elements."""
+        return [self._pathway.entries[sid] for sid in self._substrates]
+
+    @property
+    def products(self):
+        """Return list of product Entry elements."""
+        return [self._pathway.entries[pid] for pid in self._products]
+
+    @property
+    def entry(self):
+        """Return the Entry corresponding to this reaction."""
+        return self._pathway.entries[self._id]
+
+    @property
+    def reactant_ids(self):
+        """Return a list of substrate and product reactant IDs."""
+        return self._products.union(self._substrates)
+
+    @property
+    def element(self):
+        """Return KGML element describing the Reaction."""
+        # The root is this Relation element
+        reaction = ET.Element("reaction")
+        reaction.attrib = {"id": str(self.id), "name": self.name, "type": self.type}
+        for s in self._substrates:
+            substrate = ET.Element("substrate")
+            substrate.attrib["id"] = str(s)
+            substrate.attrib["name"] = self._pathway.entries[s].name
+            reaction.append(substrate)
+        for p in self._products:
+            product = ET.Element("product")
+            product.attrib["id"] = str(p)
+            product.attrib["name"] = self._pathway.entries[p].name
+            reaction.append(product)
+        return reaction
+
+
+# Relation
+class Relation:
+    """A relationship between to products, KOs, or protein and compound.
+
+    This describes a relationship between two products, KOs, or protein
+    and compound, as described in release KGML v0.7.2
+    (http://www.kegg.jp/kegg/xml/docs/)
+
+    Attributes:
+     - entry1 - The first Entry object node ID defining the
+       relation (int)
+     - entry2 - The second Entry object node ID defining the
+       relation (int)
+     - type - The relation type
+     - subtypes - List of subtypes for the relation, as a list of
+       (name, value) tuples
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self._entry1 = None
+        self._entry2 = None
+        self.type = ""
+        self.subtypes = []
+        self._pathway = None
+
+    def __str__(self):
+        """Return a useful human-readable string."""
+        outstr = [
+            "Relation (subtypes: %d):" % len(self.subtypes),
+            "Entry1:",
+            str(self.entry1),
+            "Entry2:",
+            str(self.entry2),
+        ]
+        for s in self.subtypes:
+            outstr.extend(["Subtype: %s" % s[0], str(s[1])])
+        return "\n".join(outstr)
+
+    # Properties entry1 and entry2
+    def _getentry1(self):
+        if self._pathway is not None:
+            return self._pathway.entries[self._entry1]
+        return self._entry1
+
+    def _setentry1(self, value):
+        self._entry1 = int(value)
+
+    def _delentry1(self):
+        del self._entry1
+
+    entry1 = property(_getentry1, _setentry1, _delentry1, "Entry1 of the relation.")
+
+    def _getentry2(self):
+        if self._pathway is not None:
+            return self._pathway.entries[self._entry2]
+        return self._entry2
+
+    def _setentry2(self, value):
+        self._entry2 = int(value)
+
+    def _delentry2(self):
+        del self._entry2
+
+    entry2 = property(_getentry2, _setentry2, _delentry2, "Entry2 of the relation.")
+
+    @property
+    def element(self):
+        """Return KGML element describing the Relation."""
+        # The root is this Relation element
+        relation = ET.Element("relation")
+        relation.attrib = {
+            "entry1": str(self._entry1),
+            "entry2": str(self._entry2),
+            "type": self.type,
+        }
+        for (name, value) in self.subtypes:
+            subtype = ET.Element("subtype")
+            subtype.attrib = {"name": name, "value": str(value)}
+            relation.append(subtype)
+        return relation
diff --git a/code/lib/Bio/KEGG/KGML/__init__.py b/code/lib/Bio/KEGG/KGML/__init__.py
new file mode 100644
index 0000000..9063911
--- /dev/null
+++ b/code/lib/Bio/KEGG/KGML/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2013 by Leighton Pritchard.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Code to work with data from the KEGG database.
+
+References:
+Kanehisa, M. and Goto, S.; KEGG: Kyoto Encyclopedia of Genes and Genomes.
+Nucleic Acids Res. 28, 29-34 (2000).
+
+URL: http://www.genome.ad.jp/kegg/
+
+"""
diff --git a/code/lib/Bio/KEGG/KGML/__pycache__/KGML_parser.cpython-37.pyc b/code/lib/Bio/KEGG/KGML/__pycache__/KGML_parser.cpython-37.pyc
new file mode 100644
index 0000000..9ed45a0
Binary files /dev/null and b/code/lib/Bio/KEGG/KGML/__pycache__/KGML_parser.cpython-37.pyc differ
diff --git a/code/lib/Bio/KEGG/KGML/__pycache__/KGML_pathway.cpython-37.pyc b/code/lib/Bio/KEGG/KGML/__pycache__/KGML_pathway.cpython-37.pyc
new file mode 100644
index 0000000..687a3d2
Binary files /dev/null and b/code/lib/Bio/KEGG/KGML/__pycache__/KGML_pathway.cpython-37.pyc differ
diff --git a/code/lib/Bio/KEGG/KGML/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/KEGG/KGML/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..236575f
Binary files /dev/null and b/code/lib/Bio/KEGG/KGML/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/KEGG/Map/__init__.py b/code/lib/Bio/KEGG/Map/__init__.py
new file mode 100644
index 0000000..e1b37f7
--- /dev/null
+++ b/code/lib/Bio/KEGG/Map/__init__.py
@@ -0,0 +1,49 @@
+# Copyright 2001 by Tarjei Mikkelsen. All rights reserved.
+# Copyright 2007 by Michiel de Hoon. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Load KEGG Pathway maps for use with the Biopython Pathway module.
+
+The pathway maps are in the format::
+
+    RXXXXX:[X.X.X.X:] A + 2 B <=> C
+    RXXXXX:[X.X.X.X:] 3C <=> 2 D + E
+    ...
+
+where RXXXXX is a five-digit reaction id, and X.X.X.X is the optional
+EC number of the enzyme that catalyze the reaction.
+"""
+
+from Bio.Pathway import Reaction
+
+
+def parse(handle):
+    """Parse a KEGG pathway map."""
+    for line in handle:
+        data, catalysts, reaction = line.split(":")
+        catalysts = [(catalysts,)]
+        reactants = {}
+        before, after = reaction.split("<=>")
+        compounds = before.split(" + ")
+        for compound in compounds:
+            compound = compound.strip()
+            try:
+                number, compound = compound.split()
+                number = -int(number)
+            except ValueError:
+                number = -1
+            reactants[compound] = number
+        compounds = after.split(" + ")
+        for compound in compounds:
+            compound = compound.strip()
+            try:
+                number, compound = compound.split()
+                number = int(number)
+            except ValueError:
+                number = +1
+            reactants[compound] = number
+        yield Reaction(reactants, catalysts, True, data)
diff --git a/code/lib/Bio/KEGG/Map/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/KEGG/Map/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..c220247
Binary files /dev/null and b/code/lib/Bio/KEGG/Map/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/KEGG/REST.py b/code/lib/Bio/KEGG/REST.py
new file mode 100644
index 0000000..11f9f98
--- /dev/null
+++ b/code/lib/Bio/KEGG/REST.py
@@ -0,0 +1,315 @@
+# Copyright 2014 by Kevin Wu.
+# Revisions copyright 2014 by Peter Cock.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Provides code to access the REST-style KEGG online API.
+
+This module aims to make the KEGG online REST-style API easier to use. See:
+http://www.kegg.jp/kegg/rest/keggapi.html
+
+The KEGG REST-style API provides simple access to a range of KEGG databases.
+This works using simple URLs (which this module will construct for you),
+with any errors indicated via HTTP error levels.
+
+The functionality is somewhat similar to Biopython's Bio.TogoWS and Bio.Entrez
+modules.
+
+Currently KEGG does not provide any usage guidelines (unlike the NCBI whose
+requirements are reasonably clear). To avoid risking overloading the service,
+Biopython will only allow three calls per second.
+
+References:
+Kanehisa, M. and Goto, S.; KEGG: Kyoto Encyclopedia of Genes and Genomes.
+Nucleic Acids Res. 28, 29-34 (2000).
+
+"""
+
+import io
+from urllib.request import urlopen
+
+
+def _q(op, arg1, arg2=None, arg3=None):
+    URL = "http://rest.kegg.jp/%s"
+    if arg2 and arg3:
+        args = "%s/%s/%s/%s" % (op, arg1, arg2, arg3)
+    elif arg2:
+        args = "%s/%s/%s" % (op, arg1, arg2)
+    else:
+        args = "%s/%s" % (op, arg1)
+    resp = urlopen(URL % (args))
+
+    if "image" == arg2:
+        return resp
+
+    handle = io.TextIOWrapper(resp, encoding="UTF-8")
+    handle.url = resp.url
+    return handle
+
+
+# http://www.kegg.jp/kegg/rest/keggapi.html
+def kegg_info(database):
+    """KEGG info - Displays the current statistics of a given database.
+
+    db - database or organism (string)
+
+    The argument db can be a KEGG database name (e.g. 'pathway' or its
+    official abbreviation, 'path'), or a KEGG organism code or T number
+    (e.g. 'hsa' or 'T01001' for human).
+
+    A valid list of organism codes and their T numbers can be obtained
+    via kegg_info('organism') or http://rest.kegg.jp/list/organism
+
+    """
+    # TODO - return a string (rather than the handle?)
+    # TODO - chache and validate the organism code / T numbers?
+    # TODO - can we parse the somewhat formatted output?
+    #
+    # http://rest.kegg.jp/info/<database>
+    #
+    # <database> = pathway | brite | module | disease | drug | environ |
+    #              ko | genome |<org> | compound | glycan | reaction |
+    #              rpair | rclass | enzyme | genomes | genes | ligand | kegg
+    # <org> = KEGG organism code or T number
+    return _q("info", database)
+
+
+def kegg_list(database, org=None):
+    """KEGG list - Entry list for database, or specified database entries.
+
+    db - database or organism (string)
+    org - optional organism (string), see below.
+
+    For the pathway and module databases the optional organism can be
+    used to restrict the results.
+
+    """
+    # TODO - split into two functions (dbentries seems separate)?
+    #
+    #  http://rest.kegg.jp/list/<database>/<org>
+    #
+    #  <database> = pathway | module
+    #  <org> = KEGG organism code
+    if database in ("pathway", "module") and org:
+        resp = _q("list", database, org)
+    elif isinstance(database, str) and database and org:
+        raise ValueError("Invalid database arg for kegg list request.")
+
+    # http://rest.kegg.jp/list/<database>
+    #
+    # <database> = pathway | brite | module | disease | drug | environ |
+    #              ko | genome | <org> | compound | glycan | reaction |
+    #              rpair | rclass | enzyme | organism
+    # <org> = KEGG organism code or T number
+    #
+    #
+    # http://rest.kegg.jp/list/<dbentries>
+    #
+    # <dbentries> = KEGG database entries involving the following <database>
+    # <database> = pathway | brite | module | disease | drug | environ |
+    #              ko | genome | <org> | compound | glycan | reaction |
+    #              rpair | rclass | enzyme
+    # <org> = KEGG organism code or T number
+    else:
+        if isinstance(database, list):
+            if len(database) > 100:
+                raise ValueError(
+                    "Maximum number of databases is 100 for kegg list query"
+                )
+            database = ("+").join(database)
+        resp = _q("list", database)
+
+    return resp
+
+
+def kegg_find(database, query, option=None):
+    """KEGG find - Data search.
+
+    Finds entries with matching query keywords or other query data in
+    a given database.
+
+    db - database or organism (string)
+    query - search terms (string)
+    option - search option (string), see below.
+
+    For the compound and drug database, set option to the string 'formula',
+    'exact_mass' or 'mol_weight' to search on that field only. The
+    chemical formula search is a partial match irrespective of the order
+    of atoms given. The exact mass (or molecular weight) is checked by
+    rounding off to the same decimal place as the query data. A range of
+    values may also be specified with the minus(-) sign.
+
+    """
+    # TODO - return list of tuples?
+    #
+    # http://rest.kegg.jp/find/<database>/<query>/<option>
+    #
+    # <database> = compound | drug
+    # <option> = formula | exact_mass | mol_weight
+    if database in ["compound", "drug"] and option in [
+        "formula",
+        "exact_mass",
+        "mol_weight",
+    ]:
+        resp = _q("find", database, query, option)
+    elif option:
+        raise ValueError("Invalid option arg for kegg find request.")
+
+    # http://rest.kegg.jp/find/<database>/<query>
+    #
+    # <database> = pathway | module | disease | drug | environ | ko |
+    #              genome | <org> | compound | glycan | reaction | rpair |
+    #              rclass | enzyme | genes | ligand
+    # <org> = KEGG organism code or T number
+    else:
+        if isinstance(query, list):
+            query = "+".join(query)
+        resp = _q("find", database, query)
+
+    return resp
+
+
+def kegg_get(dbentries, option=None):
+    """KEGG get - Data retrieval.
+
+    dbentries - Identifiers (single string, or list of strings), see below.
+    option - One of "aaseq", "ntseq", "mol", "kcf", "image", "kgml" (string)
+
+    The input is limited up to 10 entries.
+    The input is limited to one pathway entry with the image or kgml option.
+    The input is limited to one compound/glycan/drug entry with the image option.
+
+    Returns a handle.
+    """
+    if isinstance(dbentries, list) and len(dbentries) <= 10:
+        dbentries = "+".join(dbentries)
+    elif isinstance(dbentries, list) and len(dbentries) > 10:
+        raise ValueError("Maximum number of dbentries is 10 for kegg get query")
+
+    # http://rest.kegg.jp/get/<dbentries>[/<option>]
+    #
+    # <dbentries> = KEGG database entries involving the following <database>
+    # <database> = pathway | brite | module | disease | drug | environ |
+    #              ko | genome | <org> | compound | glycan | reaction |
+    #              rpair | rclass | enzyme
+    # <org> = KEGG organism code or T number
+    #
+    # <option> = aaseq | ntseq | mol | kcf | image
+    if option in ["aaseq", "ntseq", "mol", "kcf", "image", "kgml"]:
+        resp = _q("get", dbentries, option)
+    elif option:
+        raise ValueError("Invalid option arg for kegg get request.")
+    else:
+        resp = _q("get", dbentries)
+
+    return resp
+
+
+def kegg_conv(target_db, source_db, option=None):
+    """KEGG conv - convert KEGG identifiers to/from outside identifiers.
+
+    Arguments:
+     - target_db - Target database
+     - source_db_or_dbentries - source database or database entries
+     - option - Can be "turtle" or "n-triple" (string).
+
+    """
+    # http://rest.kegg.jp/conv/<target_db>/<source_db>[/<option>]
+    #
+    # (<target_db> <source_db>) = (<kegg_db> <outside_db>) |
+    #                             (<outside_db> <kegg_db>)
+    #
+    # For gene identifiers:
+    # <kegg_db> = <org>
+    # <org> = KEGG organism code or T number
+    # <outside_db> = ncbi-gi | ncbi-geneid | uniprot
+    #
+    # For chemical substance identifiers:
+    # <kegg_db> = drug | compound | glycan
+    # <outside_db> = pubchem | chebi
+    #
+    # <option> = turtle | n-triple
+    #
+    # http://rest.kegg.jp/conv/<target_db>/<dbentries>[/<option>]
+    #
+    # For gene identifiers:
+    # <dbentries> = database entries involving the following <database>
+    # <database> = <org> | ncbi-gi | ncbi-geneid | uniprot
+    # <org> = KEGG organism code or T number
+    #
+    # For chemical substance identifiers:
+    # <dbentries> = database entries involving the following <database>
+    # <database> = drug | compound | glycan | pubchem | chebi
+    #
+    # <option> = turtle | n-triple
+    if option and option not in ["turtle", "n-triple"]:
+        raise ValueError("Invalid option arg for kegg conv request.")
+
+    if isinstance(source_db, list):
+        source_db = "+".join(source_db)
+
+    if (
+        target_db in ["ncbi-gi", "ncbi-geneid", "uniprot"]
+        or source_db in ["ncbi-gi", "ncbi-geneid", "uniprot"]
+        or (
+            target_db in ["drug", "compound", "glycan"]
+            and source_db in ["pubchem", "glycan"]
+        )
+        or (
+            target_db in ["pubchem", "glycan"]
+            and source_db in ["drug", "compound", "glycan"]
+        )
+    ):
+
+        if option:
+            resp = _q("conv", target_db, source_db, option)
+        else:
+            resp = _q("conv", target_db, source_db)
+
+        return resp
+    else:
+        raise ValueError("Bad argument target_db or source_db for kegg conv request.")
+
+
+def kegg_link(target_db, source_db, option=None):
+    """KEGG link - find related entries by using database cross-references.
+
+    target_db - Target database
+    source_db_or_dbentries - source database
+    option - Can be "turtle" or "n-triple" (string).
+    """
+    # http://rest.kegg.jp/link/<target_db>/<source_db>[/<option>]
+    #
+    # <target_db> = <database>
+    # <source_db> = <database>
+    #
+    # <database> = pathway | brite | module | ko | genome | <org> | compound |
+    #              glycan | reaction | rpair | rclass | enzyme | disease |
+    #              drug | dgroup | environ
+    #
+    # <option> = turtle | n-triple
+    # http://rest.kegg.jp/link/<target_db>/<dbentries>[/<option>]
+    #
+    # <dbentries> = KEGG database entries involving the following <database>
+    # <database> = pathway | brite | module | ko | genome | <org> | compound |
+    #              glycan | reaction | rpair | rclass | enzyme | disease |
+    #              drug | dgroup | environ | genes
+    #
+    # <option> = turtle | n-triple
+
+    if option and option not in ["turtle", "n-triple"]:
+        raise ValueError("Invalid option arg for kegg conv request.")
+
+    if isinstance(source_db, list):
+        source_db = "+".join(source_db)
+
+    if option:
+        resp = _q("link", target_db, source_db, option)
+    else:
+        resp = _q("link", target_db, source_db)
+
+    return resp
diff --git a/code/lib/Bio/KEGG/__init__.py b/code/lib/Bio/KEGG/__init__.py
new file mode 100644
index 0000000..bd8495d
--- /dev/null
+++ b/code/lib/Bio/KEGG/__init__.py
@@ -0,0 +1,97 @@
+# Copyright 2001 by Tarjei Mikkelsen.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Code to work with data from the KEGG database.
+
+References:
+Kanehisa, M. and Goto, S.; KEGG: Kyoto Encyclopedia of Genes and Genomes.
+Nucleic Acids Res. 28, 29-34 (2000).
+
+URL: http://www.genome.ad.jp/kegg/
+
+"""
+
+
+KEGG_ITEM_LENGTH = 12
+KEGG_LINE_LENGTH = 80
+KEGG_DATA_LENGTH = KEGG_LINE_LENGTH - KEGG_ITEM_LENGTH
+
+
+def _default_wrap(indent):
+    """Return default wrap rule for _wrap_kegg (PRIVATE).
+
+    A wrap rule is a list with the following elements:
+    [indent, connect, (splitstr, connect, splitafter, keep), ...]
+    """
+    return [indent, "", (" ", "", 1, 0)]
+
+
+def _struct_wrap(indent):
+    """Return wrap rule for KEGG STRUCTURE (PRIVATE)."""
+    return [indent, "", ("  ", "", 1, 1)]
+
+
+def _wrap_kegg(line, max_width=KEGG_DATA_LENGTH, wrap_rule=_default_wrap):
+    """Wrap the input line for KEGG output (PRIVATE).
+
+    Arguments:
+     - info - String holding the information we want wrapped
+       for KEGG output.
+     - max_width - Maximum width of a line.
+     - wrap_rule - A wrap rule (see above) for deciding how to split
+       strings that must be wrapped.
+
+    """
+    s = ""
+    wrapped_line = ""
+    indent = " " * wrap_rule[0]
+    connect = wrap_rule[1]
+    rules = wrap_rule[2:]
+    while True:
+        if len(line) <= max_width:
+            wrapped_line = wrapped_line + line
+            s = s + wrapped_line
+            break
+        else:
+            did_split = 0
+            for rule in rules:
+                to = max_width
+                if not rule[2]:
+                    to = to + len(rule[0])
+                split_idx = line.rfind(rule[0], 0, to)
+                if split_idx > -1:
+                    if rule[2] and rule[3]:
+                        split_idx = split_idx + len(rule[0])
+                    wrapped_line = wrapped_line + line[0:split_idx] + "\n"
+                    if not rule[3]:
+                        split_idx = split_idx + len(rule[0])
+                    line = indent + rule[1] + line[split_idx:]
+                    did_split = 1
+                    break
+            if not did_split:
+                wrapped_line = wrapped_line + line[0:max_width] + "\n"
+                line = indent + connect + line[max_width:]
+    return s
+
+
+def _write_kegg(item, info, indent=KEGG_ITEM_LENGTH):
+    """Write a indented KEGG record item (PRIVATE).
+
+    Arguments:
+     - item - The name of the item to be written.
+     - info - The (wrapped) information to write.
+     - indent - Width of item field.
+
+    """
+    s = ""
+    for line in info:
+        partial_lines = line.splitlines()
+        for l in partial_lines:
+            s += item.ljust(indent) + l + "\n"
+            if item:  # ensure item is only written on first line
+                item = ""
+    return s
diff --git a/code/lib/Bio/KEGG/__pycache__/REST.cpython-37.pyc b/code/lib/Bio/KEGG/__pycache__/REST.cpython-37.pyc
new file mode 100644
index 0000000..b88905a
Binary files /dev/null and b/code/lib/Bio/KEGG/__pycache__/REST.cpython-37.pyc differ
diff --git a/code/lib/Bio/KEGG/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/KEGG/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..6197a80
Binary files /dev/null and b/code/lib/Bio/KEGG/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/LogisticRegression.py b/code/lib/Bio/LogisticRegression.py
new file mode 100644
index 0000000..064ef0c
--- /dev/null
+++ b/code/lib/Bio/LogisticRegression.py
@@ -0,0 +1,134 @@
+# Copyright 2002 by Jeffrey Chang.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Code for doing logistic regressions.
+
+Classes:
+ - LogisticRegression    Holds information for a LogisticRegression classifier.
+
+Functions:
+ - train        Train a new classifier.
+ - calculate    Calculate the probabilities of each class, given an observation.
+ - classify     Classify an observation into a class.
+"""
+
+
+import numpy
+import numpy.linalg
+
+
+class LogisticRegression:
+    """Holds information necessary to do logistic regression classification.
+
+    Attributes:
+     - beta - List of the weights for each dimension.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.beta = []
+
+
+def train(xs, ys, update_fn=None, typecode=None):
+    """Train a logistic regression classifier on a training set.
+
+    Argument xs is a list of observations and ys is a list of the class
+    assignments, which should be 0 or 1.  xs and ys should contain the
+    same number of elements.  update_fn is an optional callback function
+    that takes as parameters that iteration number and log likelihood.
+    """
+    if len(xs) != len(ys):
+        raise ValueError("xs and ys should be the same length.")
+    classes = set(ys)
+    if classes != {0, 1}:
+        raise ValueError("Classes should be 0's and 1's")
+    if typecode is None:
+        typecode = "d"
+
+    # Dimensionality of the data is the dimensionality of the
+    # observations plus a constant dimension.
+    N, ndims = len(xs), len(xs[0]) + 1
+    if N == 0 or ndims == 1:
+        raise ValueError("No observations or observation of 0 dimension.")
+
+    # Make an X array, with a constant first dimension.
+    X = numpy.ones((N, ndims), typecode)
+    X[:, 1:] = xs
+    Xt = numpy.transpose(X)
+    y = numpy.asarray(ys, typecode)
+
+    # Initialize the beta parameter to 0.
+    beta = numpy.zeros(ndims, typecode)
+
+    MAX_ITERATIONS = 500
+    CONVERGE_THRESHOLD = 0.01
+    stepsize = 1.0
+    # Now iterate using Newton-Raphson until the log-likelihoods
+    # converge.
+    i = 0
+    old_beta = old_llik = None
+    while i < MAX_ITERATIONS:
+        # Calculate the probabilities.  p = e^(beta X) / (1+e^(beta X))
+        ebetaX = numpy.exp(numpy.dot(beta, Xt))
+        p = ebetaX / (1 + ebetaX)
+
+        # Find the log likelihood score and see if I've converged.
+        logp = y * numpy.log(p) + (1 - y) * numpy.log(1 - p)
+        llik = sum(logp)
+        if update_fn is not None:
+            update_fn(iter, llik)
+        if old_llik is not None:
+            # Check to see if the likelihood decreased.  If it did, then
+            # restore the old beta parameters and half the step size.
+            if llik < old_llik:
+                stepsize /= 2.0
+                beta = old_beta
+            # If I've converged, then stop.
+            if numpy.fabs(llik - old_llik) <= CONVERGE_THRESHOLD:
+                break
+        old_llik, old_beta = llik, beta
+        i += 1
+
+        W = numpy.identity(N) * p
+        Xtyp = numpy.dot(Xt, y - p)  # Calculate the first derivative.
+        XtWX = numpy.dot(numpy.dot(Xt, W), X)  # Calculate the second derivative.
+        delta = numpy.linalg.solve(XtWX, Xtyp)
+        if numpy.fabs(stepsize - 1.0) > 0.001:
+            delta *= stepsize
+        beta += delta  # Update beta.
+    else:
+        raise RuntimeError("Didn't converge.")
+
+    lr = LogisticRegression()
+    lr.beta = [float(x) for x in beta]  # Convert back to regular array.
+    return lr
+
+
+def calculate(lr, x):
+    """Calculate the probability for each class.
+
+    Arguments:
+     - lr is a LogisticRegression object.
+     - x is the observed data.
+
+    Returns a list of the probability that it fits each class.
+    """
+    # Insert a constant term for x.
+    x = numpy.asarray([1.0] + x)
+    # Calculate the probability.  p = e^(beta X) / (1+e^(beta X))
+    ebetaX = numpy.exp(numpy.dot(lr.beta, x))
+    p = ebetaX / (1 + ebetaX)
+    return [1 - p, p]
+
+
+def classify(lr, x):
+    """Classify an observation into a class."""
+    probs = calculate(lr, x)
+    if probs[0] > probs[1]:
+        return 0
+    return 1
diff --git a/code/lib/Bio/MarkovModel.py b/code/lib/Bio/MarkovModel.py
new file mode 100644
index 0000000..fec8732
--- /dev/null
+++ b/code/lib/Bio/MarkovModel.py
@@ -0,0 +1,683 @@
+# Copyright 2002 by Jeffrey Chang.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""A state-emitting MarkovModel.
+
+Note terminology similar to Manning and Schutze is used.
+
+
+Functions:
+train_bw        Train a markov model using the Baum-Welch algorithm.
+train_visible   Train a visible markov model using MLE.
+find_states     Find the a state sequence that explains some observations.
+
+load            Load a MarkovModel.
+save            Save a MarkovModel.
+
+Classes:
+MarkovModel     Holds the description of a markov model
+"""
+
+import numpy
+
+
+try:
+    logaddexp = numpy.logaddexp
+except AttributeError:
+    # Numpy versions older than 1.3 do not contain logaddexp.
+    # Once we require Numpy version 1.3 or later, we should revisit this
+    # module to see if we can simplify some of the other functions in
+    # this module.
+    import warnings
+
+    warnings.warn(
+        "For optimal speed, please update to Numpy version 1.3 or later (current version is %s)"
+        % numpy.__version__
+    )
+
+    def logaddexp(logx, logy):
+        """Implement logaddexp method if Numpy version is older than 1.3."""
+        if logy - logx > 100:
+            return logy
+        elif logx - logy > 100:
+            return logx
+        minxy = min(logx, logy)
+        return minxy + numpy.log(numpy.exp(logx - minxy) + numpy.exp(logy - minxy))
+
+
+def itemindex(values):
+    """Return a dictionary of values with their sequence offset as keys."""
+    d = {}
+    entries = enumerate(values[::-1])
+    n = len(values) - 1
+    for index, key in entries:
+        d[key] = n - index
+    return d
+
+
+numpy.random.seed()
+
+VERY_SMALL_NUMBER = 1e-300
+LOG0 = numpy.log(VERY_SMALL_NUMBER)
+
+
+class MarkovModel:
+    """Create a state-emitting MarkovModel object."""
+
+    def __init__(
+        self, states, alphabet, p_initial=None, p_transition=None, p_emission=None
+    ):
+        """Initialize the class."""
+        self.states = states
+        self.alphabet = alphabet
+        self.p_initial = p_initial
+        self.p_transition = p_transition
+        self.p_emission = p_emission
+
+    def __str__(self):
+        """Create a string representation of the MarkovModel object."""
+        from io import StringIO
+
+        handle = StringIO()
+        save(self, handle)
+        handle.seek(0)
+        return handle.read()
+
+
+def _readline_and_check_start(handle, start):
+    """Read the first line and evaluate that begisn with the correct start (PRIVATE)."""
+    line = handle.readline()
+    if not line.startswith(start):
+        raise ValueError("I expected %r but got %r" % (start, line))
+    return line
+
+
+def load(handle):
+    """Parse a file handle into a MarkovModel object."""
+    # Load the states.
+    line = _readline_and_check_start(handle, "STATES:")
+    states = line.split()[1:]
+
+    # Load the alphabet.
+    line = _readline_and_check_start(handle, "ALPHABET:")
+    alphabet = line.split()[1:]
+
+    mm = MarkovModel(states, alphabet)
+    N, M = len(states), len(alphabet)
+
+    # Load the initial probabilities.
+    mm.p_initial = numpy.zeros(N)
+    line = _readline_and_check_start(handle, "INITIAL:")
+    for i in range(len(states)):
+        line = _readline_and_check_start(handle, "  %s:" % states[i])
+        mm.p_initial[i] = float(line.split()[-1])
+
+    # Load the transition.
+    mm.p_transition = numpy.zeros((N, N))
+    line = _readline_and_check_start(handle, "TRANSITION:")
+    for i in range(len(states)):
+        line = _readline_and_check_start(handle, "  %s:" % states[i])
+        mm.p_transition[i, :] = [float(v) for v in line.split()[1:]]
+
+    # Load the emission.
+    mm.p_emission = numpy.zeros((N, M))
+    line = _readline_and_check_start(handle, "EMISSION:")
+    for i in range(len(states)):
+        line = _readline_and_check_start(handle, "  %s:" % states[i])
+        mm.p_emission[i, :] = [float(v) for v in line.split()[1:]]
+
+    return mm
+
+
+def save(mm, handle):
+    """Save MarkovModel object into handle."""
+    # This will fail if there are spaces in the states or alphabet.
+    w = handle.write
+    w("STATES: %s\n" % " ".join(mm.states))
+    w("ALPHABET: %s\n" % " ".join(mm.alphabet))
+    w("INITIAL:\n")
+    for i in range(len(mm.p_initial)):
+        w("  %s: %g\n" % (mm.states[i], mm.p_initial[i]))
+    w("TRANSITION:\n")
+    for i in range(len(mm.p_transition)):
+        w("  %s: %s\n" % (mm.states[i], " ".join(str(x) for x in mm.p_transition[i])))
+    w("EMISSION:\n")
+    for i in range(len(mm.p_emission)):
+        w("  %s: %s\n" % (mm.states[i], " ".join(str(x) for x in mm.p_emission[i])))
+
+
+# XXX allow them to specify starting points
+def train_bw(
+    states,
+    alphabet,
+    training_data,
+    pseudo_initial=None,
+    pseudo_transition=None,
+    pseudo_emission=None,
+    update_fn=None,
+):
+    """Train a MarkovModel using the Baum-Welch algorithm.
+
+    Train a MarkovModel using the Baum-Welch algorithm.  states is a list
+    of strings that describe the names of each state.  alphabet is a
+    list of objects that indicate the allowed outputs.  training_data
+    is a list of observations.  Each observation is a list of objects
+    from the alphabet.
+
+    pseudo_initial, pseudo_transition, and pseudo_emission are
+    optional parameters that you can use to assign pseudo-counts to
+    different matrices.  They should be matrices of the appropriate
+    size that contain numbers to add to each parameter matrix, before
+    normalization.
+
+    update_fn is an optional callback that takes parameters
+    (iteration, log_likelihood).  It is called once per iteration.
+    """
+    N, M = len(states), len(alphabet)
+    if not training_data:
+        raise ValueError("No training data given.")
+    if pseudo_initial is not None:
+        pseudo_initial = numpy.asarray(pseudo_initial)
+        if pseudo_initial.shape != (N,):
+            raise ValueError("pseudo_initial not shape len(states)")
+    if pseudo_transition is not None:
+        pseudo_transition = numpy.asarray(pseudo_transition)
+        if pseudo_transition.shape != (N, N):
+            raise ValueError("pseudo_transition not shape len(states) X len(states)")
+    if pseudo_emission is not None:
+        pseudo_emission = numpy.asarray(pseudo_emission)
+        if pseudo_emission.shape != (N, M):
+            raise ValueError("pseudo_emission not shape len(states) X len(alphabet)")
+
+    # Training data is given as a list of members of the alphabet.
+    # Replace those with indexes into the alphabet list for easier
+    # computation.
+    training_outputs = []
+    indexes = itemindex(alphabet)
+    for outputs in training_data:
+        training_outputs.append([indexes[x] for x in outputs])
+
+    # Do some sanity checking on the outputs.
+    lengths = [len(x) for x in training_outputs]
+    if min(lengths) == 0:
+        raise ValueError("I got training data with outputs of length 0")
+
+    # Do the training with baum welch.
+    x = _baum_welch(
+        N,
+        M,
+        training_outputs,
+        pseudo_initial=pseudo_initial,
+        pseudo_transition=pseudo_transition,
+        pseudo_emission=pseudo_emission,
+        update_fn=update_fn,
+    )
+    p_initial, p_transition, p_emission = x
+    return MarkovModel(states, alphabet, p_initial, p_transition, p_emission)
+
+
+MAX_ITERATIONS = 1000
+
+
+def _baum_welch(
+    N,
+    M,
+    training_outputs,
+    p_initial=None,
+    p_transition=None,
+    p_emission=None,
+    pseudo_initial=None,
+    pseudo_transition=None,
+    pseudo_emission=None,
+    update_fn=None,
+):
+    """Implement the Baum-Welch algorithm to evaluate unknown parameters in the MarkovModel object (PRIVATE)."""
+    if p_initial is None:
+        p_initial = _random_norm(N)
+    else:
+        p_initial = _copy_and_check(p_initial, (N,))
+
+    if p_transition is None:
+        p_transition = _random_norm((N, N))
+    else:
+        p_transition = _copy_and_check(p_transition, (N, N))
+    if p_emission is None:
+        p_emission = _random_norm((N, M))
+    else:
+        p_emission = _copy_and_check(p_emission, (N, M))
+
+    # Do all the calculations in log space to avoid underflows.
+    lp_initial = numpy.log(p_initial)
+    lp_transition = numpy.log(p_transition)
+    lp_emission = numpy.log(p_emission)
+    if pseudo_initial is not None:
+        lpseudo_initial = numpy.log(pseudo_initial)
+    else:
+        lpseudo_initial = None
+    if pseudo_transition is not None:
+        lpseudo_transition = numpy.log(pseudo_transition)
+    else:
+        lpseudo_transition = None
+    if pseudo_emission is not None:
+        lpseudo_emission = numpy.log(pseudo_emission)
+    else:
+        lpseudo_emission = None
+
+    # Iterate through each sequence of output, updating the parameters
+    # to the HMM.  Stop when the log likelihoods of the sequences
+    # stops varying.
+    prev_llik = None
+    for i in range(MAX_ITERATIONS):
+        llik = LOG0
+        for outputs in training_outputs:
+            llik += _baum_welch_one(
+                N,
+                M,
+                outputs,
+                lp_initial,
+                lp_transition,
+                lp_emission,
+                lpseudo_initial,
+                lpseudo_transition,
+                lpseudo_emission,
+            )
+        if update_fn is not None:
+            update_fn(i, llik)
+        if prev_llik is not None and numpy.fabs(prev_llik - llik) < 0.1:
+            break
+        prev_llik = llik
+    else:
+        raise RuntimeError("HMM did not converge in %d iterations" % MAX_ITERATIONS)
+
+    # Return everything back in normal space.
+    return [numpy.exp(_) for _ in (lp_initial, lp_transition, lp_emission)]
+
+
+def _baum_welch_one(
+    N,
+    M,
+    outputs,
+    lp_initial,
+    lp_transition,
+    lp_emission,
+    lpseudo_initial,
+    lpseudo_transition,
+    lpseudo_emission,
+):
+    """Execute one step for Baum-Welch algorithm (PRIVATE).
+
+    Do one iteration of Baum-Welch based on a sequence of output.
+    Changes the value for lp_initial, lp_transition and lp_emission in place.
+    """
+    T = len(outputs)
+    fmat = _forward(N, T, lp_initial, lp_transition, lp_emission, outputs)
+    bmat = _backward(N, T, lp_transition, lp_emission, outputs)
+
+    # Calculate the probability of traversing each arc for any given
+    # transition.
+    lp_arc = numpy.zeros((N, N, T))
+    for t in range(T):
+        k = outputs[t]
+        lp_traverse = numpy.zeros((N, N))  # P going over one arc.
+        for i in range(N):
+            for j in range(N):
+                # P(getting to this arc)
+                # P(making this transition)
+                # P(emitting this character)
+                # P(going to the end)
+                lp = (
+                    fmat[i][t]
+                    + lp_transition[i][j]
+                    + lp_emission[i][k]
+                    + bmat[j][t + 1]
+                )
+                lp_traverse[i][j] = lp
+        # Normalize the probability for this time step.
+        lp_arc[:, :, t] = lp_traverse - _logsum(lp_traverse)
+
+    # Sum of all the transitions out of state i at time t.
+    lp_arcout_t = numpy.zeros((N, T))
+    for t in range(T):
+        for i in range(N):
+            lp_arcout_t[i][t] = _logsum(lp_arc[i, :, t])
+
+    # Sum of all the transitions out of state i.
+    lp_arcout = numpy.zeros(N)
+    for i in range(N):
+        lp_arcout[i] = _logsum(lp_arcout_t[i, :])
+
+    # UPDATE P_INITIAL.
+    lp_initial = lp_arcout_t[:, 0]
+    if lpseudo_initial is not None:
+        lp_initial = _logvecadd(lp_initial, lpseudo_initial)
+        lp_initial = lp_initial - _logsum(lp_initial)
+
+    # UPDATE P_TRANSITION.  p_transition[i][j] is the sum of all the
+    # transitions from i to j, normalized by the sum of the
+    # transitions out of i.
+    for i in range(N):
+        for j in range(N):
+            lp_transition[i][j] = _logsum(lp_arc[i, j, :]) - lp_arcout[i]
+        if lpseudo_transition is not None:
+            lp_transition[i] = _logvecadd(lp_transition[i], lpseudo_transition)
+            lp_transition[i] = lp_transition[i] - _logsum(lp_transition[i])
+
+    # UPDATE P_EMISSION.  lp_emission[i][k] is the sum of all the
+    # transitions out of i when k is observed, divided by the sum of
+    # the transitions out of i.
+    for i in range(N):
+        ksum = numpy.zeros(M) + LOG0  # ksum[k] is the sum of all i with k.
+        for t in range(T):
+            k = outputs[t]
+            for j in range(N):
+                ksum[k] = logaddexp(ksum[k], lp_arc[i, j, t])
+        ksum = ksum - _logsum(ksum)  # Normalize
+        if lpseudo_emission is not None:
+            ksum = _logvecadd(ksum, lpseudo_emission[i])
+            ksum = ksum - _logsum(ksum)  # Renormalize
+        lp_emission[i, :] = ksum
+
+    # Calculate the log likelihood of the output based on the forward
+    # matrix.  Since the parameters of the HMM has changed, the log
+    # likelihoods are going to be a step behind, and we might be doing
+    # one extra iteration of training.  The alternative is to rerun
+    # the _forward algorithm and calculate from the clean one, but
+    # that may be more expensive than overshooting the training by one
+    # step.
+    return _logsum(fmat[:, T])
+
+
+def _forward(N, T, lp_initial, lp_transition, lp_emission, outputs):
+    """Implement forward algorithm (PRIVATE).
+
+    Calculate a Nx(T+1) matrix, where the last column is the total
+    probability of the output.
+    """
+    matrix = numpy.zeros((N, T + 1))
+
+    # Initialize the first column to be the initial values.
+    matrix[:, 0] = lp_initial
+    for t in range(1, T + 1):
+        k = outputs[t - 1]
+        for j in range(N):
+            # The probability of the state is the sum of the
+            # transitions from all the states from time t-1.
+            lprob = LOG0
+            for i in range(N):
+                lp = matrix[i][t - 1] + lp_transition[i][j] + lp_emission[i][k]
+                lprob = logaddexp(lprob, lp)
+            matrix[j][t] = lprob
+    return matrix
+
+
+def _backward(N, T, lp_transition, lp_emission, outputs):
+    """Implement backward algorithm (PRIVATE)."""
+    matrix = numpy.zeros((N, T + 1))
+    for t in range(T - 1, -1, -1):
+        k = outputs[t]
+        for i in range(N):
+            # The probability of the state is the sum of the
+            # transitions from all the states from time t+1.
+            lprob = LOG0
+            for j in range(N):
+                lp = matrix[j][t + 1] + lp_transition[i][j] + lp_emission[i][k]
+                lprob = logaddexp(lprob, lp)
+            matrix[i][t] = lprob
+    return matrix
+
+
+def train_visible(
+    states,
+    alphabet,
+    training_data,
+    pseudo_initial=None,
+    pseudo_transition=None,
+    pseudo_emission=None,
+):
+    """Train a visible MarkovModel using maximum likelihoood estimates for each of the parameters.
+
+    Train a visible MarkovModel using maximum likelihoood estimates
+    for each of the parameters.  states is a list of strings that
+    describe the names of each state.  alphabet is a list of objects
+    that indicate the allowed outputs.  training_data is a list of
+    (outputs, observed states) where outputs is a list of the emission
+    from the alphabet, and observed states is a list of states from
+    states.
+
+    pseudo_initial, pseudo_transition, and pseudo_emission are
+    optional parameters that you can use to assign pseudo-counts to
+    different matrices.  They should be matrices of the appropriate
+    size that contain numbers to add to each parameter matrix.
+    """
+    N, M = len(states), len(alphabet)
+    if pseudo_initial is not None:
+        pseudo_initial = numpy.asarray(pseudo_initial)
+        if pseudo_initial.shape != (N,):
+            raise ValueError("pseudo_initial not shape len(states)")
+    if pseudo_transition is not None:
+        pseudo_transition = numpy.asarray(pseudo_transition)
+        if pseudo_transition.shape != (N, N):
+            raise ValueError("pseudo_transition not shape len(states) X len(states)")
+    if pseudo_emission is not None:
+        pseudo_emission = numpy.asarray(pseudo_emission)
+        if pseudo_emission.shape != (N, M):
+            raise ValueError("pseudo_emission not shape len(states) X len(alphabet)")
+
+    # Training data is given as a list of members of the alphabet.
+    # Replace those with indexes into the alphabet list for easier
+    # computation.
+    training_states, training_outputs = [], []
+    states_indexes = itemindex(states)
+    outputs_indexes = itemindex(alphabet)
+    for toutputs, tstates in training_data:
+        if len(tstates) != len(toutputs):
+            raise ValueError("states and outputs not aligned")
+        training_states.append([states_indexes[x] for x in tstates])
+        training_outputs.append([outputs_indexes[x] for x in toutputs])
+
+    x = _mle(
+        N,
+        M,
+        training_outputs,
+        training_states,
+        pseudo_initial,
+        pseudo_transition,
+        pseudo_emission,
+    )
+    p_initial, p_transition, p_emission = x
+
+    return MarkovModel(states, alphabet, p_initial, p_transition, p_emission)
+
+
+def _mle(
+    N,
+    M,
+    training_outputs,
+    training_states,
+    pseudo_initial,
+    pseudo_transition,
+    pseudo_emission,
+):
+    """Implement Maximum likelihood estimation algorithm (PRIVATE)."""
+    # p_initial is the probability that a sequence of states starts
+    # off with a particular one.
+    p_initial = numpy.zeros(N)
+    if pseudo_initial:
+        p_initial = p_initial + pseudo_initial
+    for states in training_states:
+        p_initial[states[0]] += 1
+    p_initial = _normalize(p_initial)
+
+    # p_transition is the probability that a state leads to the next
+    # one.  C(i,j)/C(i) where i and j are states.
+    p_transition = numpy.zeros((N, N))
+    if pseudo_transition:
+        p_transition = p_transition + pseudo_transition
+    for states in training_states:
+        for n in range(len(states) - 1):
+            i, j = states[n], states[n + 1]
+            p_transition[i, j] += 1
+    for i in range(len(p_transition)):
+        p_transition[i, :] = p_transition[i, :] / sum(p_transition[i, :])
+
+    # p_emission is the probability of an output given a state.
+    # C(s,o)|C(s) where o is an output and s is a state.
+    p_emission = numpy.zeros((N, M))
+    if pseudo_emission:
+        p_emission = p_emission + pseudo_emission
+    p_emission = numpy.ones((N, M))
+    for outputs, states in zip(training_outputs, training_states):
+        for o, s in zip(outputs, states):
+            p_emission[s, o] += 1
+    for i in range(len(p_emission)):
+        p_emission[i, :] = p_emission[i, :] / sum(p_emission[i, :])
+
+    return p_initial, p_transition, p_emission
+
+
+def _argmaxes(vector, allowance=None):
+    """Return indeces of the maximum values aong the vector (PRIVATE)."""
+    return [numpy.argmax(vector)]
+
+
+def find_states(markov_model, output):
+    """Find states in the given Markov model output.
+
+    Returns a list of (states, score) tuples.
+    """
+    mm = markov_model
+    N = len(mm.states)
+
+    # _viterbi does calculations in log space.  Add a tiny bit to the
+    # matrices so that the logs will not break.
+    lp_initial = numpy.log(mm.p_initial + VERY_SMALL_NUMBER)
+    lp_transition = numpy.log(mm.p_transition + VERY_SMALL_NUMBER)
+    lp_emission = numpy.log(mm.p_emission + VERY_SMALL_NUMBER)
+    # Change output into a list of indexes into the alphabet.
+    indexes = itemindex(mm.alphabet)
+    output = [indexes[x] for x in output]
+
+    # Run the viterbi algorithm.
+    results = _viterbi(N, lp_initial, lp_transition, lp_emission, output)
+
+    for i in range(len(results)):
+        states, score = results[i]
+        results[i] = [mm.states[x] for x in states], numpy.exp(score)
+    return results
+
+
+def _viterbi(N, lp_initial, lp_transition, lp_emission, output):
+    """Implement Viterbi algorithm to find most likely states for a given input (PRIVATE)."""
+    T = len(output)
+    # Store the backtrace in a NxT matrix.
+    backtrace = []  # list of indexes of states in previous timestep.
+    for i in range(N):
+        backtrace.append([None] * T)
+
+    # Store the best scores.
+    scores = numpy.zeros((N, T))
+    scores[:, 0] = lp_initial + lp_emission[:, output[0]]
+    for t in range(1, T):
+        k = output[t]
+        for j in range(N):
+            # Find the most likely place it came from.
+            i_scores = scores[:, t - 1] + lp_transition[:, j] + lp_emission[j, k]
+            indexes = _argmaxes(i_scores)
+            scores[j, t] = i_scores[indexes[0]]
+            backtrace[j][t] = indexes
+
+    # Do the backtrace.  First, find a good place to start.  Then,
+    # we'll follow the backtrace matrix to find the list of states.
+    # In the event of ties, there may be multiple paths back through
+    # the matrix, which implies a recursive solution.  We'll simulate
+    # it by keeping our own stack.
+    in_process = []  # list of (t, states, score)
+    results = []  # return values.  list of (states, score)
+    indexes = _argmaxes(scores[:, T - 1])  # pick the first place
+    for i in indexes:
+        in_process.append((T - 1, [i], scores[i][T - 1]))
+    while in_process:
+        t, states, score = in_process.pop()
+        if t == 0:
+            results.append((states, score))
+        else:
+            indexes = backtrace[states[0]][t]
+            for i in indexes:
+                in_process.append((t - 1, [i] + states, score))
+    return results
+
+
+def _normalize(matrix):
+    """Normalize matrix object (PRIVATE)."""
+    if len(matrix.shape) == 1:
+        matrix = matrix / float(sum(matrix))
+    elif len(matrix.shape) == 2:
+        # Normalize by rows.
+        for i in range(len(matrix)):
+            matrix[i, :] = matrix[i, :] / sum(matrix[i, :])
+    else:
+        raise ValueError("I cannot handle matrixes of that shape")
+    return matrix
+
+
+def _uniform_norm(shape):
+    """Normalize a uniform matrix (PRIVATE)."""
+    matrix = numpy.ones(shape)
+    return _normalize(matrix)
+
+
+def _random_norm(shape):
+    """Normalize a random matrix (PRIVATE)."""
+    matrix = numpy.random.random(shape)
+    return _normalize(matrix)
+
+
+def _copy_and_check(matrix, desired_shape):
+    """Copy a matrix and check its dimension. Normalize at the end (PRIVATE)."""
+    # Copy the matrix.
+    matrix = numpy.array(matrix, copy=1)
+    # Check the dimensions.
+    if matrix.shape != desired_shape:
+        raise ValueError("Incorrect dimension")
+    # Make sure it's normalized.
+    if len(matrix.shape) == 1:
+        if numpy.fabs(sum(matrix) - 1.0) > 0.01:
+            raise ValueError("matrix not normalized to 1.0")
+    elif len(matrix.shape) == 2:
+        for i in range(len(matrix)):
+            if numpy.fabs(sum(matrix[i]) - 1.0) > 0.01:
+                raise ValueError("matrix %d not normalized to 1.0" % i)
+    else:
+        raise ValueError("I don't handle matrices > 2 dimensions")
+    return matrix
+
+
+def _logsum(matrix):
+    """Implement logsum for a matrix object (PRIVATE)."""
+    if len(matrix.shape) > 1:
+        vec = numpy.reshape(matrix, (numpy.product(matrix.shape),))
+    else:
+        vec = matrix
+    sum = LOG0
+    for num in vec:
+        sum = logaddexp(sum, num)
+    return sum
+
+
+def _logvecadd(logvec1, logvec2):
+    """Implement a log sum for two vector objects (PRIVATE)."""
+    assert len(logvec1) == len(logvec2), "vectors aren't the same length"
+    sumvec = numpy.zeros(len(logvec1))
+    for i in range(len(logvec1)):
+        sumvec[i] = logaddexp(logvec1[i], logvec2[i])
+    return sumvec
+
+
+def _exp_logsum(numbers):
+    """Return the exponential of a logsum (PRIVATE)."""
+    sum = _logsum(numbers)
+    return numpy.exp(sum)
diff --git a/code/lib/Bio/MaxEntropy.py b/code/lib/Bio/MaxEntropy.py
new file mode 100644
index 0000000..93dc584
--- /dev/null
+++ b/code/lib/Bio/MaxEntropy.py
@@ -0,0 +1,340 @@
+# Copyright 2001 by Jeffrey Chang.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Maximum Entropy code.
+
+Uses Improved Iterative Scaling.
+"""
+# TODO Define terminology
+
+from functools import reduce
+
+try:
+    import numpy
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Install NumPy if you want to use Bio.MaxEntropy."
+    )
+
+
+class MaxEntropy:
+    """Hold information for a Maximum Entropy classifier.
+
+    Members:
+    classes      List of the possible classes of data.
+    alphas       List of the weights for each feature.
+    feature_fns  List of the feature functions.
+
+    Car data from example Naive Bayes Classifier example by Eric Meisner November 22, 2003
+    http://www.inf.u-szeged.hu/~ormandi/teaching
+
+    >>> from Bio.MaxEntropy import train, classify
+    >>> xcar = [
+    ...     ['Red', 'Sports', 'Domestic'],
+    ...     ['Red', 'Sports', 'Domestic'],
+    ...     ['Red', 'Sports', 'Domestic'],
+    ...     ['Yellow', 'Sports', 'Domestic'],
+    ...     ['Yellow', 'Sports', 'Imported'],
+    ...     ['Yellow', 'SUV', 'Imported'],
+    ...     ['Yellow', 'SUV', 'Imported'],
+    ...     ['Yellow', 'SUV', 'Domestic'],
+    ...     ['Red', 'SUV', 'Imported'],
+    ...     ['Red', 'Sports', 'Imported']]
+    >>> ycar = ['Yes','No','Yes','No','Yes','No','Yes','No','No','Yes']
+
+    Requires some rules or features
+
+    >>> def udf1(ts, cl):
+    ...     return ts[0] != 'Red'
+    ...
+    >>> def udf2(ts, cl):
+    ...     return ts[1] != 'Sports'
+    ...
+    >>> def udf3(ts, cl):
+    ...     return ts[2] != 'Domestic'
+    ...
+    >>> user_functions = [udf1, udf2, udf3]  # must be an iterable type
+    >>> xe = train(xcar, ycar, user_functions)
+    >>> for xv, yv in zip(xcar, ycar):
+    ...     xc = classify(xe, xv)
+    ...     print('Pred: %s gives %s y is %s' % (xv, xc, yv))
+    ...
+    Pred: ['Red', 'Sports', 'Domestic'] gives No y is Yes
+    Pred: ['Red', 'Sports', 'Domestic'] gives No y is No
+    Pred: ['Red', 'Sports', 'Domestic'] gives No y is Yes
+    Pred: ['Yellow', 'Sports', 'Domestic'] gives No y is No
+    Pred: ['Yellow', 'Sports', 'Imported'] gives No y is Yes
+    Pred: ['Yellow', 'SUV', 'Imported'] gives No y is No
+    Pred: ['Yellow', 'SUV', 'Imported'] gives No y is Yes
+    Pred: ['Yellow', 'SUV', 'Domestic'] gives No y is No
+    Pred: ['Red', 'SUV', 'Imported'] gives No y is No
+    Pred: ['Red', 'Sports', 'Imported'] gives No y is Yes
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.classes = []
+        self.alphas = []
+        self.feature_fns = []
+
+
+def calculate(me, observation):
+    """Calculate the log of the probability for each class.
+
+    me is a MaxEntropy object that has been trained.  observation is a vector
+    representing the observed data.  The return value is a list of
+    unnormalized log probabilities for each class.
+    """
+    scores = []
+    assert len(me.feature_fns) == len(me.alphas)
+    for klass in me.classes:
+        lprob = 0.0
+        for fn, alpha in zip(me.feature_fns, me.alphas):
+            lprob += fn(observation, klass) * alpha
+        scores.append(lprob)
+    return scores
+
+
+def classify(me, observation):
+    """Classify an observation into a class."""
+    scores = calculate(me, observation)
+    max_score, klass = scores[0], me.classes[0]
+    for i in range(1, len(scores)):
+        if scores[i] > max_score:
+            max_score, klass = scores[i], me.classes[i]
+    return klass
+
+
+def _eval_feature_fn(fn, xs, classes):
+    """Evaluate a feature function on every instance of the training set and class (PRIVATE).
+
+    fn is a callback function that takes two parameters: a
+    training instance and a class.  Return a dictionary of (training
+    set index, class index) -> non-zero value.  Values of 0 are not
+    stored in the dictionary.
+    """
+    values = {}
+    for i in range(len(xs)):
+        for j in range(len(classes)):
+            f = fn(xs[i], classes[j])
+            if f != 0:
+                values[(i, j)] = f
+    return values
+
+
+def _calc_empirical_expects(xs, ys, classes, features):
+    """Calculate the expectation of each function from the data (PRIVATE).
+
+    This is the constraint for the maximum entropy distribution. Return a
+    list of expectations, parallel to the list of features.
+    """
+    # E[f_i] = SUM_x,y P(x, y) f(x, y)
+    #        = 1/N f(x, y)
+    class2index = {}
+    for index, key in enumerate(classes):
+        class2index[key] = index
+    ys_i = [class2index[y] for y in ys]
+
+    expect = []
+    N = len(xs)
+    for feature in features:
+        s = 0
+        for i in range(N):
+            s += feature.get((i, ys_i[i]), 0)
+        expect.append(float(s) / N)
+    return expect
+
+
+def _calc_model_expects(xs, classes, features, alphas):
+    """Calculate the expectation of each feature from the model (PRIVATE).
+
+    This is not used in maximum entropy training, but provides a good function
+    for debugging.
+    """
+    # SUM_X P(x) SUM_Y P(Y|X) F(X, Y)
+    # = 1/N SUM_X SUM_Y P(Y|X) F(X, Y)
+    p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
+
+    expects = []
+    for feature in features:
+        sum = 0.0
+        for (i, j), f in feature.items():
+            sum += p_yx[i][j] * f
+        expects.append(sum / len(xs))
+    return expects
+
+
+def _calc_p_class_given_x(xs, classes, features, alphas):
+    """Calculate conditional probability P(y|x) (PRIVATE).
+
+    y is the class and x is an instance from the training set.
+    Return a XSxCLASSES matrix of probabilities.
+    """
+    prob_yx = numpy.zeros((len(xs), len(classes)))
+
+    # Calculate log P(y, x).
+    assert len(features) == len(alphas)
+    for feature, alpha in zip(features, alphas):
+        for (x, y), f in feature.items():
+            prob_yx[x][y] += alpha * f
+    # Take an exponent to get P(y, x)
+    prob_yx = numpy.exp(prob_yx)
+    # Divide out the probability over each class, so we get P(y|x).
+    for i in range(len(xs)):
+        z = sum(prob_yx[i])
+        prob_yx[i] = prob_yx[i] / z
+    return prob_yx
+
+
+def _calc_f_sharp(N, nclasses, features):
+    """Calculate a matrix of f sharp values (PRIVATE)."""
+    # f#(x, y) = SUM_i feature(x, y)
+    f_sharp = numpy.zeros((N, nclasses))
+    for feature in features:
+        for (i, j), f in feature.items():
+            f_sharp[i][j] += f
+    return f_sharp
+
+
+def _iis_solve_delta(
+    N, feature, f_sharp, empirical, prob_yx, max_newton_iterations, newton_converge
+):
+    """Solve delta using Newton's method (PRIVATE)."""
+    # SUM_x P(x) * SUM_c P(c|x) f_i(x, c) e^[delta_i * f#(x, c)] = 0
+    delta = 0.0
+    iters = 0
+    while iters < max_newton_iterations:  # iterate for Newton's method
+        f_newton = df_newton = 0.0  # evaluate the function and derivative
+        for (i, j), f in feature.items():
+            prod = prob_yx[i][j] * f * numpy.exp(delta * f_sharp[i][j])
+            f_newton += prod
+            df_newton += prod * f_sharp[i][j]
+        f_newton, df_newton = empirical - f_newton / N, -df_newton / N
+
+        ratio = f_newton / df_newton
+        delta -= ratio
+        if numpy.fabs(ratio) < newton_converge:  # converged
+            break
+        iters = iters + 1
+    else:
+        raise RuntimeError("Newton's method did not converge")
+    return delta
+
+
+def _train_iis(
+    xs,
+    classes,
+    features,
+    f_sharp,
+    alphas,
+    e_empirical,
+    max_newton_iterations,
+    newton_converge,
+):
+    """Do one iteration of hill climbing to find better alphas (PRIVATE)."""
+    # This is a good function to parallelize.
+
+    # Pre-calculate P(y|x)
+    p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
+
+    N = len(xs)
+    newalphas = alphas[:]
+    for i in range(len(alphas)):
+        delta = _iis_solve_delta(
+            N,
+            features[i],
+            f_sharp,
+            e_empirical[i],
+            p_yx,
+            max_newton_iterations,
+            newton_converge,
+        )
+        newalphas[i] += delta
+    return newalphas
+
+
+def train(
+    training_set,
+    results,
+    feature_fns,
+    update_fn=None,
+    max_iis_iterations=10000,
+    iis_converge=1.0e-5,
+    max_newton_iterations=100,
+    newton_converge=1.0e-10,
+):
+    """Train a maximum entropy classifier, returns MaxEntropy object.
+
+    Train a maximum entropy classifier on a training set.
+    training_set is a list of observations.  results is a list of the
+    class assignments for each observation.  feature_fns is a list of
+    the features.  These are callback functions that take an
+    observation and class and return a 1 or 0.  update_fn is a
+    callback function that is called at each training iteration.  It is
+    passed a MaxEntropy object that encapsulates the current state of
+    the training.
+
+    The maximum number of iterations and the convergence criterion for IIS
+    are given by max_iis_iterations and iis_converge, respectively, while
+    max_newton_iterations and newton_converge are the maximum number
+    of iterations and the convergence criterion for Newton's method.
+    """
+    if not training_set:
+        raise ValueError("No data in the training set.")
+    if len(training_set) != len(results):
+        raise ValueError("training_set and results should be parallel lists.")
+
+    # Rename variables for convenience.
+    xs, ys = training_set, results
+
+    # Get a list of all the classes that need to be trained.
+    classes = sorted(set(results))
+
+    # Cache values for all features.
+    features = [_eval_feature_fn(fn, training_set, classes) for fn in feature_fns]
+    # Cache values for f#.
+    f_sharp = _calc_f_sharp(len(training_set), len(classes), features)
+
+    # Pre-calculate the empirical expectations of the features.
+    e_empirical = _calc_empirical_expects(xs, ys, classes, features)
+
+    # Now train the alpha parameters to weigh each feature.
+    alphas = [0.0] * len(features)
+    iters = 0
+    while iters < max_iis_iterations:
+        nalphas = _train_iis(
+            xs,
+            classes,
+            features,
+            f_sharp,
+            alphas,
+            e_empirical,
+            max_newton_iterations,
+            newton_converge,
+        )
+        diff = [numpy.fabs(x - y) for x, y in zip(alphas, nalphas)]
+        diff = reduce(numpy.add, diff, 0)
+        alphas = nalphas
+
+        me = MaxEntropy()
+        me.alphas, me.classes, me.feature_fns = alphas, classes, feature_fns
+        if update_fn is not None:
+            update_fn(me)
+
+        if diff < iis_converge:  # converged
+            break
+    else:
+        raise RuntimeError("IIS did not converge")
+
+    return me
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/Medline/__init__.py b/code/lib/Bio/Medline/__init__.py
new file mode 100644
index 0000000..e095885
--- /dev/null
+++ b/code/lib/Bio/Medline/__init__.py
@@ -0,0 +1,221 @@
+# Copyright 1999 by Jeffrey Chang.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Code to work with Medline from the NCBI.
+
+Classes:
+ - Record           A dictionary holding Medline data.
+
+Functions:
+ - read             Reads one Medline record
+ - parse            Allows you to iterate over a bunch of Medline records
+
+"""
+
+
+class Record(dict):
+    """A dictionary holding information from a Medline record.
+
+    All data are stored under the mnemonic appearing in the Medline
+    file. These mnemonics have the following interpretations:
+
+    ========= ==============================
+    Mnemonic  Description
+    --------- ------------------------------
+    AB        Abstract
+    CI        Copyright Information
+    AD        Affiliation
+    IRAD      Investigator Affiliation
+    AID       Article Identifier
+    AU        Author
+    FAU       Full Author
+    CN        Corporate Author
+    DCOM      Date Completed
+    DA        Date Created
+    LR        Date Last Revised
+    DEP       Date of Electronic Publication
+    DP        Date of Publication
+    EDAT      Entrez Date
+    GS        Gene Symbol
+    GN        General Note
+    GR        Grant Number
+    IR        Investigator Name
+    FIR       Full Investigator Name
+    IS        ISSN
+    IP        Issue
+    TA        Journal Title Abbreviation
+    JT        Journal Title
+    LA        Language
+    LID       Location Identifier
+    MID       Manuscript Identifier
+    MHDA      MeSH Date
+    MH        MeSH Terms
+    JID       NLM Unique ID
+    RF        Number of References
+    OAB       Other Abstract
+    OCI       Other Copyright Information
+    OID       Other ID
+    OT        Other Term
+    OTO       Other Term Owner
+    OWN       Owner
+    PG        Pagination
+    PS        Personal Name as Subject
+    FPS       Full Personal Name as Subject
+    PL        Place of Publication
+    PHST      Publication History Status
+    PST       Publication Status
+    PT        Publication Type
+    PUBM      Publishing Model
+    PMC       PubMed Central Identifier
+    PMID      PubMed Unique Identifier
+    RN        Registry Number/EC Number
+    NM        Substance Name
+    SI        Secondary Source ID
+    SO        Source
+    SFM       Space Flight Mission
+    STAT      Status
+    SB        Subset
+    TI        Title
+    TT        Transliterated Title
+    VI        Volume
+    CON       Comment on
+    CIN       Comment in
+    EIN       Erratum in
+    EFR       Erratum for
+    CRI       Corrected and Republished in
+    CRF       Corrected and Republished from
+    PRIN      Partial retraction in
+    PROF      Partial retraction of
+    RPI       Republished in
+    RPF       Republished from
+    RIN       Retraction in
+    ROF       Retraction of
+    UIN       Update in
+    UOF       Update of
+    SPIN      Summary for patients in
+    ORI       Original report in
+    ========= ==============================
+
+    """
+
+
+def parse(handle):
+    """Read Medline records one by one from the handle.
+
+    The handle is either is a Medline file, a file-like object, or a list
+    of lines describing one or more Medline records.
+
+    Typical usage::
+
+        >>> from Bio import Medline
+        >>> with open("Medline/pubmed_result2.txt") as handle:
+        ...     records = Medline.parse(handle)
+        ...     for record in records:
+        ...         print(record['TI'])
+        ...
+        A high level interface to SCOP and ASTRAL ...
+        GenomeDiagram: a python package for the visualization of ...
+        Open source clustering software.
+        PDB file parser and structure class implemented in Python.
+
+    """
+    # These keys point to string values
+    textkeys = (
+        "ID",
+        "PMID",
+        "SO",
+        "RF",
+        "NI",
+        "JC",
+        "TA",
+        "IS",
+        "CY",
+        "TT",
+        "CA",
+        "IP",
+        "VI",
+        "DP",
+        "YR",
+        "PG",
+        "LID",
+        "DA",
+        "LR",
+        "OWN",
+        "STAT",
+        "DCOM",
+        "PUBM",
+        "DEP",
+        "PL",
+        "JID",
+        "SB",
+        "PMC",
+        "EDAT",
+        "MHDA",
+        "PST",
+        "AB",
+        "EA",
+        "TI",
+        "JT",
+    )
+    handle = iter(handle)
+
+    key = ""
+    record = Record()
+    for line in handle:
+        line = line.rstrip()
+        if line[:6] == "      ":  # continuation line
+            if key in ["MH", "AD"]:
+                # Multi-line MESH term, want to append to last entry in list
+                record[key][-1] += line[5:]  # including space using line[5:]
+            else:
+                record[key].append(line[6:])
+        elif line:
+            key = line[:4].rstrip()
+            if key not in record:
+                record[key] = []
+            record[key].append(line[6:])
+        elif record:
+            # Join each list of strings into one string.
+            for key in record:
+                if key in textkeys:
+                    record[key] = " ".join(record[key])
+            yield record
+            record = Record()
+    if record:  # catch last one
+        for key in record:
+            if key in textkeys:
+                record[key] = " ".join(record[key])
+        yield record
+
+
+def read(handle):
+    """Read a single Medline record from the handle.
+
+    The handle is either is a Medline file, a file-like object, or a list
+    of lines describing a Medline record.
+
+    Typical usage:
+
+        >>> from Bio import Medline
+        >>> with open("Medline/pubmed_result1.txt") as handle:
+        ...     record = Medline.read(handle)
+        ...     print(record['TI'])
+        ...
+        The Bio* toolkits--a brief overview.
+
+    """
+    #records = parse(handle)
+    #return next(records)
+    records = parse(handle)
+    try:
+        while True:
+            item = next(records)
+    except StopIteration:
+        pass
+    finally:
+        del records 
+    return item
\ No newline at end of file
diff --git a/code/lib/Bio/Medline/__pycache__/__init__.cpython-311.pyc b/code/lib/Bio/Medline/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000..1142202
Binary files /dev/null and b/code/lib/Bio/Medline/__pycache__/__init__.cpython-311.pyc differ
diff --git a/code/lib/Bio/Medline/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Medline/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..9bf9d93
Binary files /dev/null and b/code/lib/Bio/Medline/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/NMR/NOEtools.py b/code/lib/Bio/NMR/NOEtools.py
new file mode 100644
index 0000000..e799f7a
--- /dev/null
+++ b/code/lib/Bio/NMR/NOEtools.py
@@ -0,0 +1,100 @@
+# Copyright 2004 by Bob Bussell.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""NOEtools: For predicting NOE coordinates from assignment data.
+
+The input and output are modelled on nmrview peaklists.
+This modules is suitable for directly generating an nmrview
+peaklist with predicted crosspeaks directly from the
+input assignment peaklist.
+"""
+
+from . import xpktools
+
+
+def predictNOE(peaklist, originNuc, detectedNuc, originResNum, toResNum):
+    """Predict the i->j NOE position based on self peak (diagonal) assignments.
+
+    Parameters
+    ----------
+    peaklist : xprtools.Peaklist
+        List of peaks from which to derive predictions
+    originNuc : str
+        Name of originating nucleus.
+    originResNum : int
+        Index of originating residue.
+    detectedNuc : str
+        Name of detected nucleus.
+
+    toResNum : int
+        Index of detected residue.
+
+    Returns
+    -------
+    returnLine : str
+        The .xpk file entry for the predicted crosspeak.
+
+    Examples
+    --------
+    Using predictNOE(peaklist,"N15","H1",10,12)
+    where peaklist is of the type xpktools.peaklist
+    would generate a .xpk file entry for a crosspeak
+    that originated on N15 of residue 10 and ended up
+    as magnetization detected on the H1 nucleus of
+    residue 12
+
+
+    Notes
+    =====
+    The initial peaklist is assumed to be diagonal (self peaks only)
+    and currently there is no checking done to insure that this
+    assumption holds true.  Check your peaklist for errors and
+    off diagonal peaks before attempting to use predictNOE.
+
+    """
+    returnLine = ""  # The modified line to be returned to the caller
+
+    datamap = _data_map(peaklist.datalabels)
+
+    # Construct labels for keying into dictionary
+    originAssCol = datamap[originNuc + ".L"] + 1
+    originPPMCol = datamap[originNuc + ".P"] + 1
+    detectedPPMCol = datamap[detectedNuc + ".P"] + 1
+
+    # Make a list of the data lines involving the detected
+    if (str(toResNum) in peaklist.residue_dict(detectedNuc)) and (
+        str(originResNum) in peaklist.residue_dict(detectedNuc)
+    ):
+        detectedList = peaklist.residue_dict(detectedNuc)[str(toResNum)]
+        originList = peaklist.residue_dict(detectedNuc)[str(originResNum)]
+        returnLine = detectedList[0]
+
+        for line in detectedList:
+            aveDetectedPPM = _col_ave(detectedList, detectedPPMCol)
+            aveOriginPPM = _col_ave(originList, originPPMCol)
+            originAss = originList[0].split()[originAssCol]
+
+        returnLine = xpktools.replace_entry(returnLine, originAssCol + 1, originAss)
+        returnLine = xpktools.replace_entry(returnLine, originPPMCol + 1, aveOriginPPM)
+
+    return returnLine
+
+
+def _data_map(labelline):
+    # Generate a map between datalabels and column number
+    #   based on a labelline
+    labelList = labelline.split()  # Get the label line
+
+    # Get the column number for each label
+    datamap = {label: i for i, label in enumerate(labelList)}
+
+    return datamap
+
+
+def _col_ave(elements, col):
+    # Compute average values from a particular column in a string list
+    total = 0.0
+    for element in elements:
+        total += float(element.split()[col])
+    return total / len(elements)
diff --git a/code/lib/Bio/NMR/__init__.py b/code/lib/Bio/NMR/__init__.py
new file mode 100644
index 0000000..a0cb75f
--- /dev/null
+++ b/code/lib/Bio/NMR/__init__.py
@@ -0,0 +1,11 @@
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
+"""Code for working with NMR data.
+
+This directory currently contains contributions from:
+ - Bob Bussell <rgb2003@med.cornell.edu> -- NOEtools and xpktools
+
+"""
diff --git a/code/lib/Bio/NMR/__pycache__/NOEtools.cpython-37.pyc b/code/lib/Bio/NMR/__pycache__/NOEtools.cpython-37.pyc
new file mode 100644
index 0000000..61ce76f
Binary files /dev/null and b/code/lib/Bio/NMR/__pycache__/NOEtools.cpython-37.pyc differ
diff --git a/code/lib/Bio/NMR/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/NMR/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..beb053c
Binary files /dev/null and b/code/lib/Bio/NMR/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/NMR/__pycache__/xpktools.cpython-37.pyc b/code/lib/Bio/NMR/__pycache__/xpktools.cpython-37.pyc
new file mode 100644
index 0000000..dd0f4c0
Binary files /dev/null and b/code/lib/Bio/NMR/__pycache__/xpktools.cpython-37.pyc differ
diff --git a/code/lib/Bio/NMR/xpktools.py b/code/lib/Bio/NMR/xpktools.py
new file mode 100644
index 0000000..0872d3a
--- /dev/null
+++ b/code/lib/Bio/NMR/xpktools.py
@@ -0,0 +1,315 @@
+# Copyright 2004 by Bob Bussell.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""Tools to manipulate data from nmrview .xpk peaklist files."""
+
+
+HEADERLEN = 6
+
+
+class XpkEntry:
+    """Provide dictonary access to single entry from nmrview .xpk file.
+
+    This class is suited for handling single lines of non-header data
+    from an nmrview .xpk file. This class provides methods for extracting
+    data by the field name which is listed in the last line of the
+    peaklist header.
+
+    Parameters
+    ----------
+    xpkentry : str
+        The line from an nmrview .xpk file.
+    xpkheadline : str
+        The line from the header file that gives the names of the entries.
+        This is typically the sixth line of the header, 1-origin.
+
+    Attributes
+    ----------
+    fields : dict
+        Dictionary of fields where key is in header line, value is an entry.
+        Variables are accessed by either their name in the header line as in
+        self.field["H1.P"] will return the H1.P entry for example.
+        self.field["entrynum"] returns the line number (1st field of line)
+
+    """
+
+    def __init__(self, entry, headline):
+        """Initialize the class."""
+        # Holds all fields from input line in a dictionary
+        # keys are data labels from the .xpk header
+        datlist = entry.split()
+        headlist = headline.split()
+
+        self.fields = dict(zip(headlist, datlist[1:]))
+
+        try:
+            self.fields["entrynum"] = datlist[0]
+        except IndexError:
+            pass
+
+
+class Peaklist:
+    """Provide access to header lines and data from a nmrview xpk file.
+
+    Header file lines and file data are available as attributes.
+
+    Parameters
+    ----------
+    infn : str
+        The input nmrview filename.
+
+    Attributes
+    ----------
+    firstline  : str
+        The first line in the header.
+    axislabels : str
+        The axis labels.
+    dataset    : str
+        The label of the dataset.
+    sw         : str
+        The sw coordinates.
+    sf         : str
+        The sf coordinates.
+    datalabels : str
+        The labels of the entries.
+
+    data : list
+        File data after header lines.
+
+    Examples
+    --------
+    >>> from Bio.NMR.xpktools import Peaklist
+    >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk')
+    >>> peaklist.firstline
+    'label dataset sw sf '
+    >>> peaklist.dataset
+    'test.nv'
+    >>> peaklist.sf
+    '{599.8230 } { 60.7860 } { 60.7860 }'
+    >>> peaklist.datalabels
+    ' H1.L  H1.P  H1.W  H1.B  H1.E  H1.J  15N2.L  15N2.P  15N2.W  15N2.B  15N2.E  15N2.J  N15.L  N15.P  N15.W  N15.B  N15.E  N15.J  vol  int  stat '
+
+    """
+
+    def __init__(self, infn):
+        """Initialize the class."""
+        with open(infn) as infile:
+
+            # Read in the header lines
+            self.firstline = infile.readline().split("\012")[0]
+            self.axislabels = infile.readline().split("\012")[0]
+            self.dataset = infile.readline().split("\012")[0]
+            self.sw = infile.readline().split("\012")[0]
+            self.sf = infile.readline().split("\012")[0]
+            self.datalabels = infile.readline().split("\012")[0]
+
+            # Read in the data lines to a list
+            self.data = [line.split("\012")[0] for line in infile]
+
+    def residue_dict(self, index):
+        """Return a dict of lines in 'data' indexed by residue number or a nucleus.
+
+        The nucleus should be given as the input argument in the same form as
+        it appears in the xpk label line (H1, 15N for example)
+
+        Parameters
+        ----------
+        index : str
+            The nucleus to index data by.
+
+        Returns
+        -------
+        resdict : dict
+            Mappings of index nucleus to data line.
+
+        Examples
+        --------
+        >>> from Bio.NMR.xpktools import Peaklist
+        >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk')
+        >>> residue_d = peaklist.residue_dict('H1')
+        >>> sorted(residue_d.keys())
+        ['10', '3', '4', '5', '6', '7', '8', '9', 'maxres', 'minres']
+        >>> residue_d['10']
+        ['8  10.hn   7.663   0.021   0.010   ++   0.000   10.n   118.341   0.324   0.010   +E   0.000   10.n   118.476   0.324   0.010   +E   0.000  0.49840 0.49840 0']
+
+        """
+        maxres = -1
+        minres = -1
+
+        # Cast the data lines into the xpentry class
+        self.dict = {}
+        for line in self.data:
+            ind = XpkEntry(line, self.datalabels).fields[index + ".L"]
+            key = ind.split(".")[0]
+
+            res = int(key)
+
+            if maxres == -1:
+                maxres = res
+            if minres == -1:
+                minres = res
+
+            maxres = max([maxres, res])
+            minres = min([minres, res])
+            res = str(res)
+
+            try:
+                # Append additional data to list under same key
+                self.dict[res].append(line)
+            except KeyError:
+                # This is a new residue, start a new list
+                self.dict[res] = [line]  # Use [] for list type
+
+        self.dict["maxres"] = maxres
+        self.dict["minres"] = minres
+
+        return self.dict
+
+    def write_header(self, outfn):
+        """Write header lines from input file to handle ``outfn``."""
+        with open(outfn, "w") as outfile:
+            outfile.write(self.firstline)
+            outfile.write("\012")
+            outfile.write(self.axislabels)
+            outfile.write("\012")
+            outfile.write(self.dataset)
+            outfile.write("\012")
+            outfile.write(self.sw)
+            outfile.write("\012")
+            outfile.write(self.sf)
+            outfile.write("\012")
+            outfile.write(self.datalabels)
+            outfile.write("\012")
+
+
+def replace_entry(line, fieldn, newentry):
+    """Replace an entry in a string by the field number.
+
+    No padding is implemented currently.  Spacing will change if
+    the original field entry and the new field entry are of
+    different lengths.
+    """
+    # This method depends on xpktools._find_start_entry
+
+    start = _find_start_entry(line, fieldn)
+    leng = len(line[start:].split()[0])
+    newline = line[:start] + str(newentry) + line[(start + leng) :]
+    return newline
+
+
+def _find_start_entry(line, n):
+    """Find the starting character for entry ``n`` in a space delimited ``line`` (PRIVATE).
+
+    n is counted starting with 1.
+    The n=1 field by definition begins at the first character.
+
+    Returns
+    -------
+    starting character : str
+        The starting character for entry ``n``.
+
+    """
+    # This function is used by replace_entry
+
+    if n == 1:
+        return 0  # Special case
+
+    # Count the number of fields by counting spaces
+    c = 1
+    leng = len(line)
+
+    # Initialize variables according to whether the first character
+    #  is a space or a character
+    if line[0] == " ":
+        infield = False
+        field = 0
+    else:
+        infield = True
+        field = 1
+
+    while c < leng and field < n:
+        if infield:
+            if line[c] == " " and line[c - 1] != " ":
+                infield = False
+            else:
+                if line[c] != " ":
+                    infield = True
+                    field += 1
+
+        c += 1
+
+    return c - 1
+
+
+def data_table(fn_list, datalabel, keyatom):
+    """Generate a data table from a list of input xpk files.
+
+    Parameters
+    ----------
+    fn_list : list
+        List of .xpk file names.
+    datalabel : str
+        The data element reported.
+    keyatom : str
+        The name of the nucleus used as an index for the data table.
+
+    Returns
+    -------
+    outlist : list
+       List of table rows indexed by ``keyatom``.
+
+    """
+    # TODO - Clarify this docstring, add an example?
+    outlist = []
+
+    dict_list, label_line_list = _read_dicts(fn_list, keyatom)
+
+    # Find global max and min residue numbers
+    minr = dict_list[0]["minres"]
+    maxr = dict_list[0]["maxres"]
+
+    for dictionary in dict_list:
+        if maxr < dictionary["maxres"]:
+            maxr = dictionary["maxres"]
+        if minr > dictionary["minres"]:
+            minr = dictionary["minres"]
+
+    res = minr
+    while res <= maxr:  # s.t. res numbers
+        count = 0
+        key = str(res)
+        line = key
+        for dictionary in dict_list:  # s.t. dictionaries
+            label = label_line_list[count]
+            if key in dictionary:
+                line = (
+                    line + "\t" + XpkEntry(dictionary[key][0], label).fields[datalabel]
+                )
+            else:
+                line += "\t*"
+            count += 1
+        line += "\n"
+        outlist.append(line)
+        res += 1
+
+    return outlist
+
+
+def _read_dicts(fn_list, keyatom):
+    """Read multiple files into a list of residue dictionaries (PRIVATE)."""
+    dict_list = []
+    datalabel_list = []
+    for fn in fn_list:
+        peaklist = Peaklist(fn)
+        dictionary = peaklist.residue_dict(keyatom)
+        dict_list.append(dictionary)
+        datalabel_list.append(peaklist.datalabels)
+
+    return [dict_list, datalabel_list]
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/NaiveBayes.py b/code/lib/Bio/NaiveBayes.py
new file mode 100644
index 0000000..3935b44
--- /dev/null
+++ b/code/lib/Bio/NaiveBayes.py
@@ -0,0 +1,214 @@
+# Copyright 2000 by Jeffrey Chang.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""General Naive Bayes learner.
+
+Naive Bayes is a supervised classification algorithm that uses Bayes
+rule to compute the fit between a new observation and some previously
+observed data.  The observations are discrete feature vectors, with
+the Bayes assumption that the features are independent.  Although this
+is hardly ever true, the classifier works well enough in practice.
+
+Glossary:
+ - observation - A feature vector of discrete data.
+ - class       - A possible classification for an observation.
+
+Classes:
+ - NaiveBayes - Holds information for a naive Bayes classifier.
+
+Functions:
+ - train     - Train a new naive Bayes classifier.
+ - calculate - Calculate the probabilities of each class,
+   given an observation.
+ - classify  - Classify an observation into a class.
+
+"""
+
+
+try:
+    import numpy
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Install NumPy if you want to use Bio.MaxEntropy."
+    )
+
+
+def _contents(items):
+    """Return a dictionary where the key is the item and the value is the probablity associated (PRIVATE)."""
+    term = 1.0 / len(items)
+    counts = {}
+    for item in items:
+        counts[item] = counts.get(item, 0) + term
+    return counts
+
+
+class NaiveBayes:
+    """Hold information for a NaiveBayes classifier.
+
+    Attributes:
+     - classes        - List of the possible classes of data.
+     - p_conditional  - CLASS x DIM array of dicts of value -> ``P(value|class,dim)``
+     - p_prior        - List of the prior probabilities for every class.
+     - dimensionality - Dimensionality of the data.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.classes = []
+        self.p_conditional = None
+        self.p_prior = []
+        self.dimensionality = None
+
+
+def calculate(nb, observation, scale=False):
+    """Calculate the logarithmic conditional probability for each class.
+
+    Arguments:
+     - nb          - A NaiveBayes classifier that has been trained.
+     - observation - A list representing the observed data.
+     - scale       - Boolean to indicate whether the probability should be
+       scaled by ``P(observation)``.  By default, no scaling is done.
+
+    A dictionary is returned where the key is the class and the value is
+    the log probability of the class.
+    """
+    # P(class|observation) = P(observation|class)*P(class)/P(observation)
+    # Taking the log:
+    # lP(class|observation) = lP(observation|class)+lP(class)-lP(observation)
+
+    # Make sure the observation has the right dimensionality.
+    if len(observation) != nb.dimensionality:
+        raise ValueError(
+            f"observation in {len(observation)} dimension,"
+            f" but classifier in {nb.dimensionality}"
+        )
+
+    # Calculate log P(observation|class) for every class.
+    n = len(nb.classes)
+    lp_observation_class = numpy.zeros(n)  # array of log P(observation|class)
+    for i in range(n):
+        # log P(observation|class) = SUM_i log P(observation_i|class)
+        probs = [None] * len(observation)
+        for j in range(len(observation)):
+            probs[j] = nb.p_conditional[i][j].get(observation[j], 0)
+        lprobs = numpy.log(numpy.clip(probs, 1.0e-300, 1.0e300))
+        lp_observation_class[i] = sum(lprobs)
+
+    # Calculate log P(class).
+    lp_prior = numpy.log(nb.p_prior)
+
+    # Calculate log P(observation).
+    lp_observation = 0.0  # P(observation)
+    if scale:  # Only calculate this if requested.
+        # log P(observation) = log SUM_i P(observation|class_i)P(class_i)
+        obs = numpy.exp(numpy.clip(lp_prior + lp_observation_class, -700, +700))
+        lp_observation = numpy.log(sum(obs))
+
+    # Calculate log P(class|observation).
+    lp_class_observation = {}  # Dict of class : log P(class|observation)
+    for i in range(len(nb.classes)):
+        lp_class_observation[nb.classes[i]] = (
+            lp_observation_class[i] + lp_prior[i] - lp_observation
+        )
+
+    return lp_class_observation
+
+
+def classify(nb, observation):
+    """Classify an observation into a class."""
+    # The class is the one with the highest probability.
+    probs = calculate(nb, observation, scale=False)
+    max_prob = max_class = None
+    for klass in nb.classes:
+        if max_prob is None or probs[klass] > max_prob:
+            max_prob, max_class = probs[klass], klass
+    return max_class
+
+
+def train(training_set, results, priors=None, typecode=None):
+    """Train a NaiveBayes classifier on a training set.
+
+    Arguments:
+     - training_set - List of observations.
+     - results      - List of the class assignments for each observation.
+       Thus, training_set and results must be the same length.
+     - priors       - Optional dictionary specifying the prior probabilities
+       for each type of result. If not specified, the priors will
+       be estimated from the training results.
+
+    """
+    if not len(training_set):
+        raise ValueError("No data in the training set.")
+    if len(training_set) != len(results):
+        raise ValueError("training_set and results should be parallel lists.")
+
+    # If no typecode is specified, try to pick a reasonable one.  If
+    # training_set is a Numeric array, then use that typecode.
+    # Otherwise, choose a reasonable default.
+    # XXX NOT IMPLEMENTED
+
+    # Check to make sure each vector in the training set has the same
+    # dimensionality.
+    dimensions = [len(x) for x in training_set]
+    if min(dimensions) != max(dimensions):
+        raise ValueError("observations have different dimensionality")
+
+    nb = NaiveBayes()
+    nb.dimensionality = dimensions[0]
+
+    # Get a list of all the classes, and
+    # estimate the prior probabilities for the classes.
+    if priors is not None:
+        percs = priors
+        nb.classes = list(set(results))
+    else:
+        class_freq = _contents(results)
+        nb.classes = list(class_freq.keys())
+        percs = class_freq
+    nb.classes.sort()  # keep it tidy
+
+    nb.p_prior = numpy.zeros(len(nb.classes))
+    for i in range(len(nb.classes)):
+        nb.p_prior[i] = percs[nb.classes[i]]
+
+    # Collect all the observations in class.  For each class, make a
+    # matrix of training instances versus dimensions.  I might be able
+    # to optimize this with Numeric, if the training_set parameter
+    # were guaranteed to be a matrix.  However, this may not be the
+    # case, because the client may be hacking up a sparse matrix or
+    # something.
+    c2i = {}  # class to index of class
+    for index, key in enumerate(nb.classes):
+        c2i[key] = index
+    observations = [[] for c in nb.classes]  # separate observations by class
+    for i in range(len(results)):
+        klass, obs = results[i], training_set[i]
+        observations[c2i[klass]].append(obs)
+    # Now make the observations Numeric matrix.
+    for i in range(len(observations)):
+        # XXX typecode must be specified!
+        observations[i] = numpy.asarray(observations[i], typecode)
+
+    # Calculate P(value|class,dim) for every class.
+    # This is a good loop to optimize.
+    nb.p_conditional = []
+    for i in range(len(nb.classes)):
+        class_observations = observations[i]  # observations for this class
+        nb.p_conditional.append([None] * nb.dimensionality)
+        for j in range(nb.dimensionality):
+            # Collect all the values in this dimension.
+            values = class_observations[:, j]
+
+            # Add pseudocounts here.  This needs to be parameterized.
+            # values = list(values) + range(len(nb.classes))  # XXX add 1
+
+            # Estimate P(value|class,dim)
+            nb.p_conditional[i][j] = _contents(values)
+    return nb
diff --git a/code/lib/Bio/Nexus/Nexus.py b/code/lib/Bio/Nexus/Nexus.py
new file mode 100644
index 0000000..ea39bab
--- /dev/null
+++ b/code/lib/Bio/Nexus/Nexus.py
@@ -0,0 +1,2157 @@
+# Copyright 2005-2008 by Frank Kauff & Cymon J. Cox. All rights reserved.
+# Revisions copyright 2014-2015 by Joe Cora (standard data)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Nexus class. Parse the contents of a NEXUS file.
+
+Based upon 'NEXUS: An extensible file format for systematic information'
+Maddison, Swofford, Maddison. 1997. Syst. Biol. 46(4):590-621
+"""
+
+from functools import reduce
+import copy
+import math
+import random
+import sys
+
+from Bio import File
+from Bio.Data import IUPACData
+from Bio.Seq import Seq
+
+from Bio.Nexus.StandardData import StandardData
+from Bio.Nexus.Trees import Tree
+
+
+INTERLEAVE = 70
+SPECIAL_COMMANDS = [
+    "charstatelabels",
+    "charlabels",
+    "taxlabels",
+    "taxset",
+    "charset",
+    "charpartition",
+    "taxpartition",
+    "matrix",
+    "tree",
+    "utree",
+    "translate",
+    "codonposset",
+    "title",
+]
+KNOWN_NEXUS_BLOCKS = ["trees", "data", "characters", "taxa", "sets", "codons"]
+PUNCTUATION = "()[]{}\\,;:=*\\'\"`+-<>"
+MRBAYESSAFE = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_"
+WHITESPACE = " \t\n"
+# SPECIALCOMMENTS = ['!','&','%','/','\\','@'] # original list of special comments
+SPECIALCOMMENTS = [
+    "&"
+]  # supported special comment ('tree' command), all others are ignored
+CHARSET = "chars"
+TAXSET = "taxa"
+CODONPOSITIONS = "codonpositions"
+DEFAULTNEXUS = (
+    "#NEXUS\nbegin data; dimensions ntax=0 nchar=0; format datatype=dna; end; "
+)
+
+
+class NexusError(Exception):
+    """Provision for the management of Nexus exceptions."""
+
+    pass
+
+
+class CharBuffer:
+    """Helps reading NEXUS-words and characters from a buffer (semi-PRIVATE).
+
+    This class is not intended for public use (any more).
+    """
+
+    def __init__(self, string):
+        """Initialize the class."""
+        if string:
+            self.buffer = list(string)
+        else:
+            self.buffer = []
+
+    def peek(self):
+        """Return the first character from the buffer."""
+        if self.buffer:
+            return self.buffer[0]
+        else:
+            return None
+
+    def peek_nonwhitespace(self):
+        """Return the first character from the buffer, do not include spaces."""
+        b = "".join(self.buffer).strip()
+        if b:
+            return b[0]
+        else:
+            return None
+
+    def __next__(self):
+        """Iterate over NEXUS characters in the file."""
+        if self.buffer:
+            return self.buffer.pop(0)
+        else:
+            return None
+
+    def next_nonwhitespace(self):
+        """Check for next non whitespace character in NEXUS file."""
+        while True:
+            p = next(self)
+            if p is None:
+                break
+            if p not in WHITESPACE:
+                return p
+        return None
+
+    def skip_whitespace(self):
+        """Skip whitespace characters in NEXUS file."""
+        while self.buffer[0] in WHITESPACE:
+            self.buffer = self.buffer[1:]
+
+    def next_until(self, target):
+        """Iterate over the NEXUS file until a target character is reached."""
+        for t in target:
+            try:
+                pos = self.buffer.index(t)
+            except ValueError:
+                pass
+            else:
+                found = "".join(self.buffer[:pos])
+                self.buffer = self.buffer[pos:]
+                return found
+        else:
+            return None
+
+    def peek_word(self, word):
+        """Return a word stored in the buffer."""
+        return "".join(self.buffer[: len(word)]) == word
+
+    def next_word(self):
+        """Return the next NEXUS word from a string.
+
+        This deals with single and double quotes, whitespace and punctuation.
+        """
+        word = []
+        quoted = False
+        # get first character
+        first = self.next_nonwhitespace()
+        if not first:
+            # return empty if only whitespace left
+            return None
+        word.append(first)
+        if first == "'":
+            quoted = "'"
+        elif first == '"':
+            quoted = '"'
+        elif first in PUNCTUATION:
+            # if it's non-quote punctuation, return immediately
+            return first
+        while True:
+            c = self.peek()
+            if c == quoted:  # a quote?
+                word.append(next(self))  # store quote
+                if self.peek() == quoted:  # double quote
+                    next(self)  # skip second quote
+                elif quoted:  # second single quote ends word
+                    break
+            elif quoted:
+                # if quoted, then add anything
+                word.append(next(self))
+            elif not c or c in PUNCTUATION or c in WHITESPACE:
+                # if not quoted and special character, stop
+                break
+            else:
+                word.append(next(self))  # standard character
+        return "".join(word)
+
+    def rest(self):
+        """Return the rest of the string without parsing."""
+        return "".join(self.buffer)
+
+
+class StepMatrix:
+    """Calculate a stepmatrix for weighted parsimony.
+
+    See :
+    COMBINATORIAL WEIGHTS IN PHYLOGENETIC ANALYSIS - A STATISTICAL PARSIMONY PROCEDURE
+    Wheeler (1990), Cladistics 6:269-275.
+    """
+
+    def __init__(self, symbols, gap):
+        """Initialize the class."""
+        self.data = {}
+        self.symbols = sorted(symbols)
+        if gap:
+            self.symbols.append(gap)
+        for x in self.symbols:
+            for y in [s for s in self.symbols if s != x]:
+                self.set(x, y, 0)
+
+    def set(self, x, y, value):
+        """Set a given value in the matrix's position."""
+        if x > y:
+            x, y = y, x
+        self.data[x + y] = value
+
+    def add(self, x, y, value):
+        """Add the given value to existing, in matrix's position."""
+        if x > y:
+            x, y = y, x
+        self.data[x + y] += value
+
+    def sum(self):
+        """Calculate the associations, makes matrix of associations."""
+        return reduce(lambda x, y: x + y, self.data.values())
+
+    def transformation(self):
+        """Calculate the transformation matrix.
+
+        Normalizes the columns of the matrix of associations.
+        """
+        total = self.sum()
+        if total != 0:
+            for k in self.data:
+                self.data[k] = self.data[k] / float(total)
+        return self
+
+    def weighting(self):
+        """Calculate the Phylogenetic weight matrix.
+
+        Constructed from the logarithmic transformation of the
+        transformation matrix.
+        """
+        for k in self.data:
+            if self.data[k] != 0:
+                self.data[k] = -math.log(self.data[k])
+        return self
+
+    def smprint(self, name="your_name_here"):
+        """Print a stepmatrix."""
+        matrix = "usertype %s stepmatrix=%d\n" % (name, len(self.symbols))
+        matrix += "        %s\n" % "        ".join(self.symbols)
+        for x in self.symbols:
+            matrix += "[%s]".ljust(8) % x
+            for y in self.symbols:
+                if x == y:
+                    matrix += " .       "
+                else:
+                    if x > y:
+                        x1, y1 = y, x
+                    else:
+                        x1, y1 = x, y
+                    if self.data[x1 + y1] == 0:
+                        matrix += "inf.     "
+                    else:
+                        matrix += "%2.2f".ljust(10) % (self.data[x1 + y1])
+            matrix += "\n"
+        matrix += ";\n"
+        return matrix
+
+
+def safename(name, mrbayes=False):
+    """Return a taxon identifier according to NEXUS standard.
+
+    Wrap quotes around names with punctuation or whitespace, and double
+    single quotes.
+
+    mrbayes=True: write names without quotes, whitespace or punctuation
+    for the mrbayes software package.
+    """
+    if mrbayes:
+        safe = name.replace(" ", "_")
+        safe = "".join(c for c in safe if c in MRBAYESSAFE)
+    else:
+        safe = name.replace("'", "''")
+        if set(safe).intersection(set(WHITESPACE + PUNCTUATION)):
+            safe = "'" + safe + "'"
+    return safe
+
+
+def quotestrip(word):
+    """Remove quotes and/or double quotes around identifiers."""
+    if not word:
+        return None
+    while (word.startswith("'") and word.endswith("'")) or (
+        word.startswith('"') and word.endswith('"')
+    ):
+        word = word[1:-1]
+    return word
+
+
+def get_start_end(sequence, skiplist=("-", "?")):
+    """Return position of first and last character which is not in skiplist.
+
+    Skiplist defaults to ['-','?'].
+    """
+    length = len(sequence)
+    if length == 0:
+        return None, None
+    end = length - 1
+    while end >= 0 and (sequence[end] in skiplist):
+        end -= 1
+    start = 0
+    while start < length and (sequence[start] in skiplist):
+        start += 1
+    if start == length and end == -1:  # empty sequence
+        return -1, -1
+    else:
+        return start, end
+
+
+def _sort_keys_by_values(p):
+    """Return a sorted list of keys of p sorted by values of p (PRIVATE)."""
+    return sorted((pn for pn in p if p[pn]), key=lambda pn: p[pn])
+
+
+def _make_unique(l):
+    """Check all values in list are unique and return a pruned and sorted list (PRIVATE)."""
+    return sorted(set(l))
+
+
+def _unique_label(previous_labels, label):
+    """Return a unique name if label is already in previous_labels (PRIVATE)."""
+    while label in previous_labels:
+        label_split = label.split(".")
+        if label_split[-1].startswith("copy"):
+            copy_num = 1
+            if label_split[-1] != "copy":
+                copy_num = int(label_split[-1][4:]) + 1
+            new_label = "%s.copy%s" % (".".join(label_split[:-1]), copy_num)
+            label = new_label
+        else:
+            label += ".copy"
+    return label
+
+
+def _seqmatrix2strmatrix(matrix):
+    """Convert a Seq-object matrix to a plain sequence-string matrix (PRIVATE)."""
+    return {t: str(matrix[t]) for t in matrix}
+
+
+def _compact4nexus(orig_list):
+    r"""Compact lists for Nexus output (PRIVATE).
+
+    Example
+    -------
+    >>> _compact4nexus([1, 2, 3, 5, 6, 7, 8, 12, 15, 18, 20])
+    '2-4 6-9 13-19\\3 21'
+
+    Transform [1 2 3 5 6 7 8 12 15 18 20] (baseindex 0, used in the Nexus class)
+    into '2-4 6-9 13-19\\3 21' (baseindex 1, used in programs like Paup or MrBayes.).
+
+    """
+    if not orig_list:
+        return ""
+    orig_list = sorted(set(orig_list))
+    shortlist = []
+    clist = orig_list[:]
+    clist.append(clist[-1] + 0.5)  # dummy value makes it easier
+    while len(clist) > 1:
+        step = 1
+        for i, x in enumerate(clist):
+            if x == clist[0] + i * step:  # are we still in the right step?
+                continue
+            elif i == 1 and len(clist) > 3 and clist[i + 1] - x == x - clist[0]:
+                # second element, and possibly at least 3 elements to link,
+                # and the next one is in the right step
+                step = x - clist[0]
+            else:  # pattern broke, add all values before current position to new list
+                sub = clist[:i]
+                if len(sub) == 1:
+                    shortlist.append(str(sub[0] + 1))
+                else:
+                    if step == 1:
+                        shortlist.append("%d-%d" % (sub[0] + 1, sub[-1] + 1))
+                    else:
+                        shortlist.append("%d-%d\\%d" % (sub[0] + 1, sub[-1] + 1, step))
+                clist = clist[i:]
+                break
+    return " ".join(shortlist)
+
+
+def combine(matrices):
+    """Combine matrices in [(name,nexus-instance),...] and return new nexus instance.
+
+    combined_matrix=combine([(name1,nexus_instance1),(name2,nexus_instance2),...]
+    Character sets, character partitions and taxon sets are prefixed, readjusted
+    and present in the combined matrix.
+    """
+    if not matrices:
+        return None
+    name = matrices[0][0]
+    combined = copy.deepcopy(matrices[0][1])  # initiate with copy of first matrix
+    mixed_datatypes = len({n[1].datatype for n in matrices}) > 1
+    if mixed_datatypes:
+        # dealing with mixed matrices is application specific.
+        # You take care of that yourself!
+        combined.datatype = "None"
+    #    raise NexusError('Matrices must be of same datatype')
+    combined.charlabels = None
+    combined.statelabels = None
+    combined.interleave = False
+    combined.translate = None
+
+    # rename taxon sets and character sets and name them with prefix
+    for cn, cs in combined.charsets.items():
+        combined.charsets["%s.%s" % (name, cn)] = cs
+        del combined.charsets[cn]
+    for tn, ts in combined.taxsets.items():
+        combined.taxsets["%s.%s" % (name, tn)] = ts
+        del combined.taxsets[tn]
+    # previous partitions usually don't make much sense in combined matrix
+    # just initiate one new partition parted by single matrices
+    combined.charpartitions = {"combined": {name: list(range(combined.nchar))}}
+    for n, m in matrices[1:]:  # add all other matrices
+        both = [t for t in combined.taxlabels if t in m.taxlabels]
+        combined_only = [t for t in combined.taxlabels if t not in both]
+        m_only = [t for t in m.taxlabels if t not in both]
+        for t in both:
+            # concatenate sequences and unify gap and missing character symbols
+            combined.matrix[t] += Seq(
+                str(m.matrix[t])
+                .replace(m.gap, combined.gap)
+                .replace(m.missing, combined.missing),
+            )
+        # replace date of missing taxa with symbol for missing data
+        for t in combined_only:
+            combined.matrix[t] += Seq(combined.missing * m.nchar)
+        for t in m_only:
+            combined.matrix[t] = Seq(combined.missing * combined.nchar) + Seq(
+                str(m.matrix[t])
+                .replace(m.gap, combined.gap)
+                .replace(m.missing, combined.missing),
+            )
+        combined.taxlabels.extend(m_only)  # new taxon list
+        for cn, cs in m.charsets.items():  # adjust character sets for new matrix
+            combined.charsets["%s.%s" % (n, cn)] = [x + combined.nchar for x in cs]
+        if m.taxsets:
+            if not combined.taxsets:
+                combined.taxsets = {}
+            # update taxon sets
+            combined.taxsets.update(
+                {"%s.%s" % (n, tn): ts for tn, ts in m.taxsets.items()}
+            )
+        # update new charpartition
+        combined.charpartitions["combined"][n] = list(
+            range(combined.nchar, combined.nchar + m.nchar)
+        )
+        # update charlabels
+        if m.charlabels:
+            if not combined.charlabels:
+                combined.charlabels = {}
+            combined.charlabels.update(
+                {combined.nchar + i: label for i, label in m.charlabels.items()}
+            )
+        combined.nchar += m.nchar  # update nchar and ntax
+        combined.ntax += len(m_only)
+
+    # some prefer partitions, some charsets:
+    # make separate charset for ecah initial dataset
+    for c in combined.charpartitions["combined"]:
+        combined.charsets[c] = combined.charpartitions["combined"][c]
+
+    return combined
+
+
+def _kill_comments_and_break_lines(text):
+    r"""Delete []-delimited comments out of a file and break into lines separated by ';' (PRIVATE).
+
+    stripped_text=_kill_comments_and_break_lines(text):
+    Nested and multiline comments are allowed. [ and ] symbols within single
+    or double quotes are ignored, newline ends a quote, all symbols with quotes are
+    treated the same (thus not quoting inside comments like [this character ']' ends a comment])
+    Special [&...] and [\...] comments remain untouched, if not inside standard comment.
+    Quotes inside special [& and [\ are treated as normal characters,
+    but no nesting inside these special comments allowed (like [&   [\   ]]).
+    ';' ist deleted from end of line.
+
+    NOTE: this function is very slow for large files, and obsolete when using C extension cnexus
+    """
+    if not text:
+        return ""
+    contents = iter(text)
+    newtext = []
+    newline = []
+    quotelevel = ""
+    speciallevel = False
+    commlevel = 0
+    # Parse with one character look ahead (for special comments)
+    t2 = next(contents)
+    while True:
+        t = t2
+        try:
+            t2 = next(contents)
+        except StopIteration:
+            t2 = None
+        if t is None:
+            break
+        if t == quotelevel and not (commlevel or speciallevel):
+            # matching quote ends quotation
+            quotelevel = ""
+        elif (
+            not quotelevel
+            and not (commlevel or speciallevel)
+            and (t == '"' or t == "'")
+        ):
+            # single or double quote starts quotation
+            quotelevel = t
+        elif not quotelevel and t == "[":
+            # opening bracket outside a quote
+            if t2 in SPECIALCOMMENTS and commlevel == 0 and not speciallevel:
+                speciallevel = True
+            else:
+                commlevel += 1
+        elif not quotelevel and t == "]":
+            # closing bracket ioutside a quote
+            if speciallevel:
+                speciallevel = False
+            else:
+                commlevel -= 1
+                if commlevel < 0:
+                    raise NexusError("Nexus formatting error: unmatched ]")
+                continue
+        if commlevel == 0:
+            # copy if we're not in comment
+            if t == ";" and not quotelevel:
+                newtext.append("".join(newline))
+                newline = []
+            else:
+                newline.append(t)
+    # level of comments should be 0 at the end of the file
+    if newline:
+        newtext.append("\n".join(newline))
+    if commlevel > 0:
+        raise NexusError("Nexus formatting error: unmatched [")
+    return newtext
+
+
+def _adjust_lines(lines):
+    """Adjust linebreaks to match ';', strip leading/trailing whitespace (PRIVATE).
+
+    list_of_commandlines=_adjust_lines(input_text)
+    Lines are adjusted so that no linebreaks occur within a commandline
+    (except matrix command line)
+    """
+    formatted_lines = []
+    for line in lines:
+        # Convert line endings
+        line = line.replace("\r\n", "\n").replace("\r", "\n").strip()
+        if line.lower().startswith("matrix"):
+            formatted_lines.append(line)
+        else:
+            line = line.replace("\n", " ")
+            if line:
+                formatted_lines.append(line)
+    return formatted_lines
+
+
+def _replace_parenthesized_ambigs(seq, rev_ambig_values):
+    """Replace ambigs in xxx(ACG)xxx format by IUPAC ambiguity code (PRIVATE)."""
+    opening = seq.find("(")
+    while opening > -1:
+        closing = seq.find(")")
+        if closing < 0:
+            raise NexusError("Missing closing parenthesis in: " + seq)
+        elif closing < opening:
+            raise NexusError("Missing opening parenthesis in: " + seq)
+        ambig = "".join(sorted(seq[opening + 1 : closing]))
+        ambig_code = rev_ambig_values[ambig.upper()]
+        if ambig != ambig.upper():
+            ambig_code = ambig_code.lower()
+        seq = seq[:opening] + ambig_code + seq[closing + 1 :]
+        opening = seq.find("(")
+    return seq
+
+
+class Commandline:
+    """Represent a commandline as command and options."""
+
+    def __init__(self, line, title):
+        """Initialize the class."""
+        self.options = {}
+        options = []
+        self.command = None
+        try:
+            # Assume matrix (all other command lines have been stripped of \n)
+            self.command, options = line.strip().split("\n", 1)
+        except ValueError:  # Not matrix
+            # self.command,options=line.split(' ',1)  # no: could be tab or spaces (translate...)
+            self.command = line.split()[0]
+            options = " ".join(line.split()[1:])
+        self.command = self.command.strip().lower()
+        if self.command in SPECIAL_COMMANDS:
+            # special command that need newlines and order of options preserved
+            self.options = options.strip()
+        else:
+            if len(options) > 0:
+                try:
+                    options = options.replace("=", " = ").split()
+                    valued_indices = [
+                        (n - 1, n, n + 1)
+                        for n in range(len(options))
+                        if options[n] == "=" and n != 0 and n != len(options)
+                    ]
+                    indices = []
+                    for sl in valued_indices:
+                        indices.extend(sl)
+                    token_indices = [n for n in range(len(options)) if n not in indices]
+                    for opt in valued_indices:
+                        # self.options[options[opt[0]].lower()] = options[opt[2]].lower()
+                        self.options[options[opt[0]].lower()] = options[opt[2]]
+                    for token in token_indices:
+                        self.options[options[token].lower()] = None
+                except ValueError:
+                    raise NexusError(
+                        "Incorrect formatting in line: %s" % line
+                    ) from None
+
+
+class Block:
+    """Represent a NEXUS block with block name and list of commandlines."""
+
+    def __init__(self, title=None):
+        """Initialize the class."""
+        self.title = title
+        self.commandlines = []
+
+
+class Nexus:
+    """Create the Nexus class, main class for the management of Nexus files."""
+
+    def __init__(self, input=None):
+        """Initialize the class."""
+        self.ntax = 0  # number of taxa
+        self.nchar = 0  # number of characters
+        self.unaltered_taxlabels = (
+            []
+        )  # taxlabels as the appear in the input file (incl. duplicates, etc.)
+        self.taxlabels = []  # labels for taxa, ordered by their id
+        self.charlabels = None  # ... and for characters
+        self.statelabels = None  # ... and for states
+        self.datatype = "dna"  # (standard), dna, rna, nucleotide, protein
+        self.respectcase = False  # case sensitivity
+        self.missing = "?"  # symbol for missing characters
+        self.gap = "-"  # symbol for gap
+        self.symbols = None  # set of symbols
+        self.equate = None  # set of symbol synonyms
+        self.matchchar = None  # matching char for matrix representation
+        self.labels = None  # left, right, no
+        self.transpose = False  # whether matrix is transposed
+        self.interleave = False  # whether matrix is interleaved
+        self.tokens = False  # unsupported
+        self.eliminate = None  # unsupported
+        self.matrix = None  # ...
+        self.unknown_blocks = []  # blocks we don't care about
+        self.taxsets = {}
+        self.charsets = {}
+        self.charpartitions = {}
+        self.taxpartitions = {}
+        self.trees = []  # list of Trees (instances of Tree class)
+        self.translate = None  # Dict to translate taxon <-> taxon numbers
+        self.structured = []  # structured input representation
+        self.set = {}  # dict of the set command to set various options
+        self.options = {}  # dict of the options command in the data block
+        self.codonposset = (
+            None  # name of the charpartition that defines codon positions
+        )
+
+        # some defaults
+        self.options["gapmode"] = "missing"
+
+        if input:
+            self.read(input)
+        else:
+            self.read(DEFAULTNEXUS)
+
+    def get_original_taxon_order(self):
+        """Included for backwards compatibility (DEPRECATED)."""
+        return self.taxlabels
+
+    def set_original_taxon_order(self, value):
+        """Included for backwards compatibility (DEPRECATED)."""
+        self.taxlabels = value
+
+    original_taxon_order = property(get_original_taxon_order, set_original_taxon_order)
+
+    def read(self, input):
+        """Read and parse NEXUS input (a filename, file-handle, or string)."""
+        # 1. Assume we have the name of a file in the execution dir or a
+        # file-like object.
+        # Note we need to add parsing of the path to dir/filename
+        try:
+            with File.as_handle(input) as fp:
+                file_contents = fp.read()
+                self.filename = getattr(fp, "name", "Unknown_nexus_file")
+        except (TypeError, OSError, AttributeError):
+            # 2. Assume we have a string from a fh.read()
+            if isinstance(input, str):
+                file_contents = input
+                self.filename = "input_string"
+            else:
+                print(input.strip()[:50])
+                raise NexusError("Unrecognized input: %s ..." % input[:100]) from None
+        file_contents = file_contents.strip()
+        if file_contents.startswith("#NEXUS"):
+            file_contents = file_contents[6:]
+        commandlines = _get_command_lines(file_contents)
+        # get rid of stupid 'NEXUS token - in merged treefiles, this might appear multiple times'
+        for i, cl in enumerate(commandlines):
+            try:
+                if cl[:6].upper() == "#NEXUS":
+                    commandlines[i] = cl[6:].strip()
+            except IndexError:
+                pass
+        # now loop through blocks (we parse only data in known blocks, thus ignoring non-block commands
+        nexus_block_gen = self._get_nexus_block(commandlines)
+        while True:
+            try:
+                title, contents = next(nexus_block_gen)
+            except StopIteration:
+                break
+            if title in KNOWN_NEXUS_BLOCKS:
+                self._parse_nexus_block(title, contents)
+            else:
+                self._unknown_nexus_block(title, contents)
+
+    def _get_nexus_block(self, file_contents):
+        """Return a generator for looping through Nexus blocks (PRIVATE)."""
+        inblock = False
+        blocklines = []
+        while file_contents:
+            cl = file_contents.pop(0)
+            if cl.lower().startswith("begin"):
+                if not inblock:
+                    inblock = True
+                    title = cl.split()[1].lower()
+                else:
+                    raise NexusError("Illegal block nesting in block %s" % title)
+            elif cl.lower().startswith("end"):
+                if inblock:
+                    inblock = False
+                    yield title, blocklines
+                    blocklines = []
+                else:
+                    raise NexusError("Unmatched 'end'.")
+            elif inblock:
+                blocklines.append(cl)
+
+    def _unknown_nexus_block(self, title, contents):
+        block = Block()
+        block.commandlines.append(contents)
+        block.title = title
+        self.unknown_blocks.append(block)
+
+    def _parse_nexus_block(self, title, contents):
+        """Parse a known Nexus Block (PRIVATE)."""
+        # attached the structured block representation
+        self._apply_block_structure(title, contents)
+        # now check for taxa,characters,data blocks. If this stuff is defined more than once
+        # the later occurrences will override the previous ones.
+        block = self.structured[-1]
+        for line in block.commandlines:
+            try:
+                getattr(self, "_" + line.command)(line.options)
+            except AttributeError:
+                raise NexusError("Unknown command: %s " % line.command) from None
+
+    def _title(self, options):
+        pass
+
+    def _link(self, options):
+        pass
+
+    def _dimensions(self, options):
+        if "ntax" in options:
+            self.ntax = eval(options["ntax"])
+        if "nchar" in options:
+            self.nchar = eval(options["nchar"])
+
+    def _format(self, options):
+        # print options
+        # we first need to test respectcase, then symbols (which depends on respectcase)
+        # then datatype (which, if standard, depends on symbols and respectcase in order to generate
+        # dicts for ambiguous values
+        if "respectcase" in options:
+            self.respectcase = True
+        # adjust symbols to for respectcase
+        if "symbols" in options:
+            self.symbols = "".join(options["symbols"].split())
+            if (self.symbols.startswith('"') and self.symbols.endswith('"')) or (
+                self.symbols.startswith("'") and self.symbols.endswith("'")
+            ):
+                self.symbols = self.symbols[1:-1]
+            if not self.respectcase:
+                self.symbols = list(self.symbols.upper())
+                # self.symbols = self.symbols.lower() + self.symbols.upper()
+                # self.symbols = list(set(self.symbols))
+        if "datatype" in options:
+            self.datatype = options["datatype"].lower()
+
+            if self.datatype == "dna" or self.datatype == "nucleotide":
+                self.ambiguous_values = IUPACData.ambiguous_dna_values.copy()
+                self.unambiguous_letters = IUPACData.unambiguous_dna_letters
+            elif self.datatype == "rna":
+                self.ambiguous_values = IUPACData.ambiguous_rna_values.copy()
+                self.unambiguous_letters = IUPACData.unambiguous_rna_letters
+            elif self.datatype == "protein":
+                self.ambiguous_values = {
+                    "B": "DN",
+                    "Z": "EQ",
+                    "X": IUPACData.protein_letters,
+                }
+                # that's how PAUP handles it
+                self.unambiguous_letters = IUPACData.protein_letters + "*"  # stop-codon
+            elif self.datatype == "standard":
+                self.ambiguous_values = {}
+                if not self.symbols:
+                    # PARSER BUG ##
+                    # This error arises when symbols are absent or when
+                    # whitespace is located within the SYMBOLS command values.
+                    # The Nexus parser quits reading the SYMBOLS line upon
+                    # finding a whitespace character.
+                    raise NexusError(
+                        "Symbols must be defined when using standard datatype. "
+                        "Please remove any whitespace (spaces, tabs, etc.) "
+                        "between values for symbols as this confuses the Nexus parser."
+                    )
+
+                self.unambiguous_letters = "".join(self.symbols)
+                if not self.respectcase:
+                    self.unambiguous_letters += self.unambiguous_letters.lower()
+            else:
+                raise NexusError("Unsupported datatype: " + self.datatype)
+            self.valid_characters = (
+                "".join(self.ambiguous_values) + self.unambiguous_letters
+            )
+            if not self.respectcase:
+                self.valid_characters = (
+                    self.valid_characters.lower() + self.valid_characters.upper()
+                )
+            # we have to sort the reverse ambig coding dict key characters:
+            # to be sure that it's 'ACGT':'N' and not 'GTCA':'N'
+            rev = {v: k for k, v in self.ambiguous_values.items() if k != "X"}
+            self.rev_ambiguous_values = {}
+            for k, v in rev.items():
+                key = sorted(c for c in k)
+                self.rev_ambiguous_values["".join(key)] = v
+        # overwrite symbols for datatype rna,dna,nucleotide
+        if self.datatype in ["dna", "nucleotide"]:
+            self.symbols = IUPACData.ambiguous_dna_letters
+            if self.missing not in self.ambiguous_values:
+                self.ambiguous_values[self.missing] = (
+                    self.unambiguous_letters + self.gap
+                )
+            self.ambiguous_values[self.gap] = self.gap
+        elif self.datatype == "rna":
+            self.symbols = IUPACData.ambiguous_rna_letters
+            if self.missing not in self.ambiguous_values:
+                self.ambiguous_values[self.missing] = (
+                    self.unambiguous_letters + self.gap
+                )
+            self.ambiguous_values[self.gap] = self.gap
+        # elif self.datatype == 'standard':
+        #    if not self.symbols:
+        #        self.symbols = ['0', '1']
+        if "missing" in options:
+            self.missing = options["missing"][0]
+        if "gap" in options:
+            self.gap = options["gap"][0]
+        if "equate" in options:
+            self.equate = options["equate"]
+        if "matchchar" in options:
+            self.matchchar = options["matchchar"][0]
+        if "labels" in options:
+            self.labels = options["labels"]
+        if "transpose" in options:
+            # self.transpose = True
+            raise NexusError("TRANSPOSE is not supported!")
+        if "interleave" in options:
+            if options["interleave"] is None or options["interleave"].lower() == "yes":
+                self.interleave = True
+        if "tokens" in options:
+            self.tokens = True
+        if "notokens" in options:
+            self.tokens = False
+
+    def _set(self, options):
+        self.set = options
+
+    def _options(self, options):
+        self.options = options
+
+    def _eliminate(self, options):
+        self.eliminate = options
+
+    def _taxlabels(self, options):
+        """Get taxon labels (PRIVATE).
+
+        As the taxon names are already in the matrix, this is superfluous
+        except for transpose matrices, which are currently unsupported anyway.
+        Thus, we ignore the taxlabels command to make handling of duplicate
+        taxon names easier.
+        """
+        pass
+        # self.taxlabels = []
+        # opts = CharBuffer(options)
+        # while True:
+        #    taxon = quotestrip(opts.next_word())
+        #    if not taxon:
+        #        break
+        #    self.taxlabels.append(taxon)
+
+    def _check_taxlabels(self, taxon):
+        """Check for presence of taxon in self.taxlabels (PRIVATE)."""
+        # According to NEXUS standard, underscores shall be treated as spaces...,
+        # so checking for identity is more difficult
+        nextaxa = {t.replace(" ", "_"): t for t in self.taxlabels}
+        nexid = taxon.replace(" ", "_")
+        return nextaxa.get(nexid)
+
+    def _charlabels(self, options):
+        """Get labels for characters (PRIVATE)."""
+        self.charlabels = {}
+        opts = CharBuffer(options)
+        while True:
+            # get id and state
+            w = opts.next_word()
+            if (
+                w is None
+            ):  # McClade saves and reads charlabel-lists with terminal comma?!
+                break
+            identifier = self._resolve(w, set_type=CHARSET)
+            state = quotestrip(opts.next_word())
+            self.charlabels[identifier] = state
+            # check for comma or end of command
+            c = opts.next_nonwhitespace()
+            if c is None:
+                break
+            elif c != ",":
+                raise NexusError("Missing ',' in line %s." % options)
+
+    def _charstatelabels(self, options):
+        self.charlabels = {}
+        self.statelabels = {}
+        opts = CharBuffer(options)
+
+        # Make sure symbols are defined
+        if not self.symbols:
+            raise NexusError("Symbols must be defined when using character states")
+
+        while True:
+            # get id and character name
+            w = opts.next_word()
+
+            # McClade saves and reads charlabel-lists with terminal comma?!
+            if w is None:
+                break
+
+            identifier = self._resolve(w, set_type=CHARSET)
+            character = quotestrip(opts.next_word())
+
+            self.charlabels[identifier] = character
+            self.statelabels[identifier] = []
+
+            # check for comma, slash or end of command
+            c = opts.next_nonwhitespace()
+
+            if c is None:
+                break
+            elif c != ",":
+                # Check if states are defined, otherwise report error
+                if c != "/":
+                    raise NexusError("Missing ',' in line %s." % options)
+
+                # Get the first state
+                state = quotestrip(opts.next_word())
+
+                if state is None:
+                    raise NexusError("Missing character state in line %s." % options)
+
+                while True:
+                    # Make sure current state does not exceed number of
+                    # available symbols
+                    if len(self.statelabels[identifier]) > len(self.symbols):
+                        raise NexusError(
+                            "Character states exceed number of available symbols in line %s."
+                            % options
+                        )
+
+                    # Add the character state to the statelabels
+                    self.statelabels[identifier].append(state)
+
+                    # Check for another state or comma to end states (last
+                    # character should not have comma at end of states - but
+                    # we'll ignore)
+                    state = quotestrip(opts.next_word())
+
+                    if state is None:
+                        return
+                    elif state == ",":
+                        break
+
+    def _statelabels(self, options):
+        # self.charlabels = options
+        # print("Command statelabels is not supported and will be ignored.")
+        pass
+
+    def _matrix(self, options):
+        """Create a matrix for NEXUS object (PRIVATE)."""
+        if not self.ntax or not self.nchar:
+            raise NexusError("Dimensions must be specified before matrix!")
+        self.matrix = {}
+        taxcount = 0
+        first_matrix_block = True
+
+        # eliminate empty lines and leading/trailing whitespace
+        lines = [_.strip() for _ in options.split("\n") if _.strip() != ""]
+        lineiter = iter(lines)
+        while True:
+            try:
+                line = next(lineiter)
+            except StopIteration:
+                if taxcount < self.ntax:
+                    raise NexusError("Not enough taxa in matrix.") from None
+                elif taxcount > self.ntax:
+                    raise NexusError("Too many taxa in matrix.") from None
+                else:
+                    break
+            # count the taxa and check for interleaved matrix
+            taxcount += 1
+            if taxcount > self.ntax:
+                if not self.interleave:
+                    raise NexusError(
+                        "Too many taxa in matrix - should matrix be interleaved?"
+                    )
+                else:
+                    taxcount = 1
+                    first_matrix_block = False
+            # get taxon name and sequence
+            linechars = CharBuffer(line)
+            id = quotestrip(linechars.next_word())
+            line = linechars.rest().strip()
+            chars = ""
+            if self.interleave:
+                # interleaved matrix
+                if line:
+                    chars = "".join(line.split())
+                else:
+                    chars = "".join(next(lineiter).split())
+            else:
+                # non-interleaved matrix
+                chars = "".join(line.split())
+                while len(chars) < self.nchar:
+                    line = next(lineiter)
+                    chars += "".join(line.split())
+
+            # Reformat sequence for non-standard datatypes
+            if self.datatype != "standard":
+                iupac_seq = Seq(
+                    _replace_parenthesized_ambigs(chars, self.rev_ambiguous_values),
+                )
+                # first taxon has the reference sequence if matchhar is used
+                if taxcount == 1:
+                    refseq = iupac_seq
+                else:
+                    if self.matchchar:
+                        while True:
+                            p = iupac_seq.find(self.matchchar)
+                            if p == -1:
+                                break
+                            iupac_seq = Seq(
+                                iupac_seq[:p] + refseq[p] + iupac_seq[p + 1 :]
+                            )
+
+                # Check for invalid characters
+                for i, c in enumerate(iupac_seq):
+                    if (
+                        c not in self.valid_characters
+                        and c != self.gap
+                        and c != self.missing
+                    ):
+                        raise NexusError(
+                            "Taxon %s: Illegal character %s in sequence %s "
+                            "(check dimensions/interleaving)" % (id, c, iupac_seq)
+                        )
+            else:
+                iupac_seq = StandardData(chars)
+
+                # Check for invalid characters
+                for i, c in enumerate(iupac_seq):
+                    # Go through each coding for each character
+                    for coding in c["d"]:
+                        if coding not in self.valid_characters:
+                            if coding != self.gap and coding != self.missing:
+                                raise NexusError(
+                                    "Taxon %s: Illegal character %s in sequence %s "
+                                    "(check dimensions/interleaving)"
+                                    % (id, coding, iupac_seq)
+                                )
+
+            # add sequence to matrix
+            if first_matrix_block:
+                self.unaltered_taxlabels.append(id)
+                id = _unique_label(list(self.matrix.keys()), id)
+                self.matrix[id] = iupac_seq
+                self.taxlabels.append(id)
+            else:
+                # taxon names need to be in the same order in each interleaved block
+                id = _unique_label(self.taxlabels[: taxcount - 1], id)
+                taxon_present = self._check_taxlabels(id)
+                if taxon_present:
+                    self.matrix[taxon_present] += iupac_seq
+                else:
+                    raise NexusError(
+                        "Taxon %s not in first block of interleaved "
+                        "matrix. Check matrix dimensions and interleave." % id
+                    )
+        # check all sequences for length according to nchar
+        for taxon in self.matrix:
+            if len(self.matrix[taxon]) != self.nchar:
+                raise NexusError(
+                    "Matrix Nchar %d does not match data length (%d) for taxon %s"
+                    % (self.nchar, len(self.matrix[taxon]), taxon)
+                )
+        # check that taxlabels is identical with matrix.keys. If not, it's a problem
+        matrixkeys = sorted(self.matrix)
+        taxlabelssort = sorted(self.taxlabels[:])
+        if matrixkeys != taxlabelssort:
+            raise ValueError(
+                "ERROR: TAXLABELS must be identical with MATRIX. "
+                "Please Report this as a bug, and send in data file."
+            )
+
+    def _translate(self, options):
+        """Translate a Nexus file (PRIVATE)."""
+        self.translate = {}
+        opts = CharBuffer(options)
+        while True:
+            try:
+                # get id and state
+                identifier = int(opts.next_word())
+                label = quotestrip(opts.next_word())
+                self.translate[identifier] = label
+                # check for comma or end of command
+                c = opts.next_nonwhitespace()
+                if c is None:
+                    break
+                elif c != ",":
+                    raise NexusError("Missing ',' in line %s." % options)
+            except NexusError:
+                raise
+            except Exception:  # TODO: ValueError?
+                raise NexusError("Format error in line %s." % options) from None
+
+    def _utree(self, options):
+        """Use 'utree' to denote an unrooted tree (ex: clustalx) (PRIVATE)."""
+        self._tree(options)
+
+    def _tree(self, options):
+        opts = CharBuffer(options)
+        if opts.peek_nonwhitespace() == "*":
+            # a star can be used to make it the default tree in some software packages
+            dummy = opts.next_nonwhitespace()
+        name = opts.next_word()
+        if opts.next_nonwhitespace() != "=":
+            raise NexusError("Syntax error in tree description: %s" % options[:50])
+        rooted = False
+        weight = 1.0
+        while opts.peek_nonwhitespace() == "[":
+            opts.next_nonwhitespace()  # discard opening bracket
+            symbol = next(opts)
+            if symbol != "&":
+                raise NexusError(
+                    "Illegal special comment [%s...] in tree description: %s"
+                    % (symbol, options[:50])
+                )
+            special = next(opts)
+            value = opts.next_until("]")
+            next(opts)  # discard closing bracket
+            if special == "R":
+                rooted = True
+            elif special == "U":
+                rooted = False
+            elif special == "W":
+                weight = float(value)
+        tree = Tree(name=name, weight=weight, rooted=rooted, tree=opts.rest().strip())
+        # if there's an active translation table, translate
+        if self.translate:
+            for n in tree.get_terminals():
+                try:
+                    tree.node(n).data.taxon = safename(
+                        self.translate[int(tree.node(n).data.taxon)]
+                    )
+                except (ValueError, KeyError):
+                    raise NexusError(
+                        "Unable to substitute %s using 'translate' data."
+                        % tree.node(n).data.taxon
+                    ) from None
+        self.trees.append(tree)
+
+    def _apply_block_structure(self, title, lines):
+        """Apply Block structure to the NEXUS file (PRIVATE)."""
+        block = Block("")
+        block.title = title
+        for line in lines:
+            block.commandlines.append(Commandline(line, title))
+        self.structured.append(block)
+
+    def _taxset(self, options):
+        """Create unique taxset (PRIVATE)."""
+        name, taxa = self._get_indices(options, set_type=TAXSET)
+        self.taxsets[name] = _make_unique(taxa)
+
+    def _charset(self, options):
+        """Create unique character set (PRIVATE)."""
+        name, sites = self._get_indices(options, set_type=CHARSET)
+        self.charsets[name] = _make_unique(sites)
+
+    def _taxpartition(self, options):
+        """Collect taxpartition from a NEXUS file (PRIVATE)."""
+        taxpartition = {}
+        quotelevel = False
+        opts = CharBuffer(options)
+        name = self._name_n_vector(opts)
+        if not name:
+            raise NexusError("Formatting error in taxpartition: %s " % options)
+        # now collect thesubbpartitions and parse them
+        # subpartitons separated by commas - which unfortunately could be part of a quoted identifier...
+        # this is rather unelegant, but we have to avoid double-parsing and potential change of special nexus-words
+        sub = ""
+        while True:
+            w = next(opts)
+            if w is None or (w == "," and not quotelevel):
+                subname, subindices = self._get_indices(
+                    sub, set_type=TAXSET, separator=":"
+                )
+                taxpartition[subname] = _make_unique(subindices)
+                sub = ""
+                if w is None:
+                    break
+            else:
+                if w == "'":
+                    quotelevel = not quotelevel
+                sub += w
+        self.taxpartitions[name] = taxpartition
+
+    def _codonposset(self, options):
+        """Read codon positions from a codons block as written from McClade (PRIVATE).
+
+        Here codonposset is just a fancy name for a character partition with
+        the name CodonPositions and the partitions N,1,2,3
+        """
+        prev_partitions = list(self.charpartitions.keys())
+        self._charpartition(options)
+        # mcclade calls it CodonPositions, but you never know...
+        codonname = [n for n in self.charpartitions if n not in prev_partitions]
+        if codonname == [] or len(codonname) > 1:
+            raise NexusError("Formatting Error in codonposset: %s " % options)
+        else:
+            self.codonposset = codonname[0]
+
+    def _codeset(self, options):
+        pass
+
+    def _charpartition(self, options):
+        """Collect character partition from NEXUS file (PRIVATE)."""
+        charpartition = {}
+        quotelevel = False
+        opts = CharBuffer(options)
+        name = self._name_n_vector(opts)
+        if not name:
+            raise NexusError("Formatting error in charpartition: %s " % options)
+        # now collect the subpartitions and parse them
+        # subpartitions separated by commas - which unfortunately could be part
+        # of a quoted identifier...
+        sub = ""
+        while True:
+            w = next(opts)
+            if w is None or (w == "," and not quotelevel):
+                subname, subindices = self._get_indices(
+                    sub, set_type=CHARSET, separator=":"
+                )
+                charpartition[subname] = _make_unique(subindices)
+                sub = ""
+                if w is None:
+                    break
+            else:
+                if w == "'":
+                    quotelevel = not quotelevel
+                sub += w
+        self.charpartitions[name] = charpartition
+
+    def _get_indices(self, options, set_type=CHARSET, separator="="):
+        r"""Parse the taxset/charset specification (PRIVATE).
+
+        e.g. '1 2   3 - 5 dog cat   10 - 20 \\ 3'
+        --> [0,1,2,3,4,'dog','cat',9,12,15,18]
+        """
+        opts = CharBuffer(options)
+        name = self._name_n_vector(opts, separator=separator)
+        indices = self._parse_list(opts, set_type=set_type)
+        if indices is None:
+            raise NexusError("Formatting error in line: %s " % options)
+        return name, indices
+
+    def _name_n_vector(self, opts, separator="="):
+        """Extract name and check that it's not in vector format (PRIVATE)."""
+        rest = opts.rest()
+        name = opts.next_word()
+        # we ignore * before names
+        if name == "*":
+            name = opts.next_word()
+        if not name:
+            raise NexusError("Formatting error in line: %s " % rest)
+        name = quotestrip(name)
+        if opts.peek_nonwhitespace == "(":
+            open = opts.next_nonwhitespace()
+            qualifier = open.next_word()
+            close = opts.next_nonwhitespace()
+            if qualifier.lower() == "vector":
+                raise NexusError("Unsupported VECTOR format in line %s" % (opts))
+            elif qualifier.lower() != "standard":
+                raise NexusError("Unknown qualifier %s in line %s" % (qualifier, opts))
+        if opts.next_nonwhitespace() != separator:
+            raise NexusError("Formatting error in line: %s " % rest)
+        return name
+
+    def _parse_list(self, options_buffer, set_type):
+        r"""Parse a NEXUS list (PRIVATE).
+
+        e.g. [1, 2, 4-8\\2, dog, cat] --> [1,2,4,6,8,17,21],
+        (assuming dog is taxon no. 17 and cat is taxon no. 21).
+        """
+        plain_list = []
+        if options_buffer.peek_nonwhitespace():
+            try:
+                # capture all possible exceptions and treat them as formatting
+                # errors, if they are not NexusError
+                while True:
+                    identifier = options_buffer.next_word()  # next list element
+                    if not identifier:  # end of list?
+                        break
+                    start = self._resolve(identifier, set_type=set_type)
+                    if options_buffer.peek_nonwhitespace() == "-":  # followd by -
+                        end = start
+                        step = 1
+                        # get hyphen and end of range
+                        hyphen = options_buffer.next_nonwhitespace()
+                        end = self._resolve(
+                            options_buffer.next_word(), set_type=set_type
+                        )
+                        if set_type == CHARSET:
+                            if (
+                                options_buffer.peek_nonwhitespace() == "\\"
+                            ):  # followd by \
+                                backslash = options_buffer.next_nonwhitespace()
+                                step = int(
+                                    options_buffer.next_word()
+                                )  # get backslash and step
+                            plain_list.extend(range(start, end + 1, step))
+                        else:
+                            if isinstance(start, list) or isinstance(end, list):
+                                raise NexusError(
+                                    "Name if character sets not allowed in range definition: %s"
+                                    % identifier
+                                )
+                            start = self.taxlabels.index(start)
+                            end = self.taxlabels.index(end)
+                            taxrange = self.taxlabels[start : end + 1]
+                            plain_list.extend(taxrange)
+                    else:
+                        if isinstance(start, list):
+                            # start was the name of charset or taxset
+                            plain_list.extend(start)
+                        else:
+                            # start was an ordinary identifier
+                            plain_list.append(start)
+            except NexusError:
+                raise
+            except Exception:  # FIXME - this seems unwise
+                return None
+        return plain_list
+
+    def _resolve(self, identifier, set_type=None):
+        """Translate identifier in list into character/taxon index (PRIVATE).
+
+        Characters (which are referred to by their index in Nexus.py):
+            Plain numbers are returned minus 1 (Nexus indices to python indices)
+            Text identifiers are translated into their indices (if plain character identifiers),
+            the first hit in charlabels is returned (charlabels don't need to be unique)
+            or the range of indices is returned (if names of character sets).
+        Taxa (which are referred to by their unique name in Nexus.py):
+            Plain numbers are translated in their taxon name, underscores and spaces are considered equal.
+            Names are returned unchanged (if plain taxon identifiers), or the names in
+            the corresponding taxon set is returned.
+
+        """
+        identifier = quotestrip(identifier)
+        if not set_type:
+            raise NexusError("INTERNAL ERROR: Need type to resolve identifier.")
+        if set_type == CHARSET:
+            try:
+                n = int(identifier)
+            except ValueError:
+                if self.charlabels and identifier in self.charlabels.values():
+                    for k in self.charlabels:
+                        if self.charlabels[k] == identifier:
+                            return k
+                elif self.charsets and identifier in self.charsets:
+                    return self.charsets[identifier]
+                else:
+                    raise NexusError(
+                        "Unknown character identifier: %s" % identifier
+                    ) from None
+            else:
+                if n <= self.nchar:
+                    return n - 1
+                else:
+                    raise NexusError(
+                        "Illegal character identifier: %d>nchar (=%d)."
+                        % (identifier, self.nchar)
+                    )
+        elif set_type == TAXSET:
+            try:
+                n = int(identifier)
+            except ValueError:
+                taxlabels_id = self._check_taxlabels(identifier)
+                if taxlabels_id:
+                    return taxlabels_id
+                elif self.taxsets and identifier in self.taxsets:
+                    return self.taxsets[identifier]
+                else:
+                    raise NexusError(
+                        "Unknown taxon identifier: %s" % identifier
+                    ) from None
+            else:
+                if n > 0 and n <= self.ntax:
+                    return self.taxlabels[n - 1]
+                else:
+                    raise NexusError(
+                        "Illegal taxon identifier: %d>ntax (=%d)."
+                        % (identifier, self.ntax)
+                    )
+        else:
+            raise NexusError("Unknown set specification: %s." % set_type)
+
+    def _stateset(self, options):
+        # Not implemented
+        pass
+
+    def _changeset(self, options):
+        # Not implemented
+        pass
+
+    def _treeset(self, options):
+        # Not implemented
+        pass
+
+    def _treepartition(self, options):
+        # Not implemented
+        pass
+
+    def write_nexus_data_partitions(
+        self,
+        matrix=None,
+        filename=None,
+        blocksize=None,
+        interleave=False,
+        exclude=(),
+        delete=(),
+        charpartition=None,
+        comment="",
+        mrbayes=False,
+    ):
+        """Write a nexus file for each partition in charpartition.
+
+        Only non-excluded characters and non-deleted taxa are included,
+        just the data block is written.
+        """
+        if not matrix:
+            matrix = self.matrix
+        if not matrix:
+            return
+        if not filename:
+            filename = self.filename
+        if charpartition:
+            pfilenames = {}
+            for p in charpartition:
+                total_exclude = list(exclude)
+                total_exclude.extend(
+                    c for c in range(self.nchar) if c not in charpartition[p]
+                )
+                total_exclude = _make_unique(total_exclude)
+                pcomment = comment + "\nPartition: " + p + "\n"
+                dot = filename.rfind(".")
+                if dot > 0:
+                    pfilename = filename[:dot] + "_" + p + ".data"
+                else:
+                    pfilename = filename + "_" + p
+                pfilenames[p] = pfilename
+                self.write_nexus_data(
+                    filename=pfilename,
+                    matrix=matrix,
+                    blocksize=blocksize,
+                    interleave=interleave,
+                    exclude=total_exclude,
+                    delete=delete,
+                    comment=pcomment,
+                    append_sets=False,
+                    mrbayes=mrbayes,
+                )
+            return pfilenames
+        else:
+            fn = self.filename + ".data"
+            self.write_nexus_data(
+                filename=fn,
+                matrix=matrix,
+                blocksize=blocksize,
+                interleave=interleave,
+                exclude=exclude,
+                delete=delete,
+                comment=comment,
+                append_sets=False,
+                mrbayes=mrbayes,
+            )
+            return fn
+
+    def write_nexus_data(
+        self,
+        filename=None,
+        matrix=None,
+        exclude=(),
+        delete=(),
+        blocksize=None,
+        interleave=False,
+        interleave_by_partition=False,
+        comment=None,
+        omit_NEXUS=False,
+        append_sets=True,
+        mrbayes=False,
+        codons_block=True,
+    ):
+        """Write a nexus file with data and sets block to a file or handle.
+
+        Character sets and partitions are appended by default, and are
+        adjusted according to excluded characters (i.e. character sets
+        still point to the same sites (not necessarily same positions),
+        without including the deleted characters.
+
+        - filename - Either a filename as a string (which will be opened,
+          written to and closed), or a handle object (which will
+          be written to but NOT closed).
+        - interleave_by_partition - Optional name of partition (string)
+        - omit_NEXUS - Boolean.  If true, the '#NEXUS' line normally at the
+          start of the file is omitted.
+
+        Returns the filename/handle used to write the data.
+        """
+        if not matrix:
+            matrix = self.matrix
+        if not matrix:
+            return
+        if not filename:
+            filename = self.filename
+        if [t for t in delete if not self._check_taxlabels(t)]:
+            raise NexusError(
+                "Unknown taxa: %s"
+                % ", ".join(set(delete).difference(set(self.taxlabels)))
+            )
+        if interleave_by_partition:
+            if interleave_by_partition not in self.charpartitions:
+                raise NexusError("Unknown partition: %r" % interleave_by_partition)
+            else:
+                partition = self.charpartitions[interleave_by_partition]
+                # we need to sort the partition names by starting position
+                # before we exclude characters
+                names = _sort_keys_by_values(partition)
+                newpartition = {}
+                for p in partition:
+                    newpartition[p] = [c for c in partition[p] if c not in exclude]
+        # how many taxa and how many characters are left?
+        undelete = [
+            taxon for taxon in self.taxlabels if taxon in matrix and taxon not in delete
+        ]
+        cropped_matrix = _seqmatrix2strmatrix(
+            self.crop_matrix(matrix, exclude=exclude, delete=delete)
+        )
+        ntax_adjusted = len(undelete)
+        nchar_adjusted = len(cropped_matrix[undelete[0]])
+        if not undelete or (undelete and undelete[0] == ""):
+            return
+
+        with File.as_handle(filename, mode="w") as fh:
+            if not omit_NEXUS:
+                fh.write("#NEXUS\n")
+            if comment:
+                fh.write("[" + comment + "]\n")
+            fh.write("begin data;\n")
+            fh.write("dimensions ntax=%d nchar=%d;\n" % (ntax_adjusted, nchar_adjusted))
+            fh.write("format datatype=" + self.datatype)
+            if self.respectcase:
+                fh.write(" respectcase")
+            if self.missing:
+                fh.write(" missing=" + self.missing)
+            if self.gap:
+                fh.write(" gap=" + self.gap)
+            if self.matchchar:
+                fh.write(" matchchar=" + self.matchchar)
+            if self.labels:
+                fh.write(" labels=" + self.labels)
+            if self.equate:
+                fh.write(" equate=" + self.equate)
+            if interleave or interleave_by_partition:
+                fh.write(" interleave")
+            fh.write(";\n")
+            # if self.taxlabels:
+            #    fh.write('taxlabels '+' '.join(self.taxlabels)+';\n')
+            if self.charlabels:
+                newcharlabels = self._adjust_charlabels(exclude=exclude)
+                clkeys = sorted(newcharlabels)
+                fh.write(
+                    "charlabels "
+                    + ", ".join(
+                        "%s %s" % (k + 1, safename(newcharlabels[k])) for k in clkeys
+                    )
+                    + ";\n"
+                )
+            fh.write("matrix\n")
+            if not blocksize:
+                if interleave:
+                    blocksize = 70
+                else:
+                    blocksize = self.nchar
+            # delete deleted taxa and ecxclude excluded characters...
+            namelength = max(len(safename(t, mrbayes=mrbayes)) for t in undelete)
+            if interleave_by_partition:
+                # interleave by partitions, but adjust partitions with regard
+                # to excluded characters
+                seek = 0
+                for p in names:
+                    fh.write("[%s: %s]\n" % (interleave_by_partition, p))
+                    if len(newpartition[p]) > 0:
+                        for taxon in undelete:
+                            fh.write(
+                                safename(taxon, mrbayes=mrbayes).ljust(namelength + 1)
+                            )
+                            fh.write(
+                                cropped_matrix[taxon][
+                                    seek : seek + len(newpartition[p])
+                                ]
+                                + "\n"
+                            )
+                        fh.write("\n")
+                    else:
+                        fh.write("[empty]\n\n")
+                    seek += len(newpartition[p])
+            elif interleave:
+                for seek in range(0, nchar_adjusted, blocksize):
+                    for taxon in undelete:
+                        fh.write(safename(taxon, mrbayes=mrbayes).ljust(namelength + 1))
+                        fh.write(cropped_matrix[taxon][seek : seek + blocksize] + "\n")
+                    fh.write("\n")
+            else:
+                for taxon in undelete:
+                    if blocksize < nchar_adjusted:
+                        fh.write(safename(taxon, mrbayes=mrbayes) + "\n")
+                    else:
+                        fh.write(safename(taxon, mrbayes=mrbayes).ljust(namelength + 1))
+                    taxon_seq = cropped_matrix[taxon]
+                    for seek in range(0, nchar_adjusted, blocksize):
+                        fh.write(taxon_seq[seek : seek + blocksize] + "\n")
+                    del taxon_seq
+            fh.write(";\nend;\n")
+            if append_sets:
+                if codons_block:
+                    fh.write(
+                        self.append_sets(
+                            exclude=exclude,
+                            delete=delete,
+                            mrbayes=mrbayes,
+                            include_codons=False,
+                        )
+                    )
+                    fh.write(
+                        self.append_sets(
+                            exclude=exclude,
+                            delete=delete,
+                            mrbayes=mrbayes,
+                            codons_only=True,
+                        )
+                    )
+                else:
+                    fh.write(
+                        self.append_sets(
+                            exclude=exclude, delete=delete, mrbayes=mrbayes
+                        )
+                    )
+        return filename
+
+    def append_sets(
+        self,
+        exclude=(),
+        delete=(),
+        mrbayes=False,
+        include_codons=True,
+        codons_only=False,
+    ):
+        """Return a sets block."""
+        if not self.charsets and not self.taxsets and not self.charpartitions:
+            return ""
+        if codons_only:
+            setsb = ["\nbegin codons"]
+        else:
+            setsb = ["\nbegin sets"]
+        # - now if characters have been excluded, the character sets need to be adjusted,
+        #   so that they still point to the right character positions
+        # calculate a list of offsets: for each deleted character, the following character position
+        # in the new file will have an additional offset of -1
+        offset = 0
+        offlist = []
+        for c in range(self.nchar):
+            if c in exclude:
+                offset += 1
+                offlist.append(
+                    -1
+                )  # dummy value as these character positions are excluded
+            else:
+                offlist.append(c - offset)
+        # now adjust each of the character sets
+        if not codons_only:
+            for n, ns in self.charsets.items():
+                cset = [offlist[c] for c in ns if c not in exclude]
+                if cset:
+                    setsb.append(
+                        "charset %s = %s" % (safename(n), _compact4nexus(cset))
+                    )
+            for n, s in self.taxsets.items():
+                tset = [safename(t, mrbayes=mrbayes) for t in s if t not in delete]
+                if tset:
+                    setsb.append("taxset %s = %s" % (safename(n), " ".join(tset)))
+        for n, p in self.charpartitions.items():
+            if not include_codons and n == CODONPOSITIONS:
+                continue
+            elif codons_only and n != CODONPOSITIONS:
+                continue
+            # as characters have been excluded, the partitions must be adjusted
+            # if a partition is empty, it will be omitted from the charpartition command
+            # (although paup allows charpartition part=t1:,t2:,t3:1-100)
+            names = _sort_keys_by_values(p)
+            newpartition = {}
+            for sn in names:
+                nsp = [offlist[c] for c in p[sn] if c not in exclude]
+                if nsp:
+                    newpartition[sn] = nsp
+            if newpartition:
+                if include_codons and n == CODONPOSITIONS:
+                    command = "codonposset"
+                else:
+                    command = "charpartition"
+                setsb.append(
+                    "%s %s = %s"
+                    % (
+                        command,
+                        safename(n),
+                        ", ".join(
+                            "%s: %s" % (sn, _compact4nexus(newpartition[sn]))
+                            for sn in names
+                            if sn in newpartition
+                        ),
+                    )
+                )
+        # now write charpartititions, much easier than charpartitions
+        for n, p in self.taxpartitions.items():
+            names = _sort_keys_by_values(p)
+            newpartition = {}
+            for sn in names:
+                nsp = [t for t in p[sn] if t not in delete]
+                if nsp:
+                    newpartition[sn] = nsp
+            if newpartition:
+                setsb.append(
+                    "taxpartition %s = %s"
+                    % (
+                        safename(n),
+                        ", ".join(
+                            "%s: %s"
+                            % (
+                                safename(sn),
+                                " ".join(safename(x) for x in newpartition[sn]),
+                            )
+                            for sn in names
+                            if sn in newpartition
+                        ),
+                    )
+                )
+        # add 'end' and return everything
+        setsb.append("end;\n")
+        if len(setsb) == 2:  # begin and end only
+            return ""
+        else:
+            return ";\n".join(setsb)
+
+    def export_fasta(self, filename=None, width=70):
+        """Write matrix into a fasta file."""
+        if not filename:
+            if "." in self.filename and self.filename.split(".")[-1].lower() in [
+                "paup",
+                "nexus",
+                "nex",
+                "dat",
+            ]:
+                filename = ".".join(self.filename.split(".")[:-1]) + ".fas"
+            else:
+                filename = self.filename + ".fas"
+        with open(filename, "w") as fh:
+            for taxon in self.taxlabels:
+                fh.write(">" + safename(taxon) + "\n")
+                for i in range(0, len(str(self.matrix[taxon])), width):
+                    fh.write(str(self.matrix[taxon])[i : i + width] + "\n")
+        return filename
+
+    def export_phylip(self, filename=None):
+        """Write matrix into a PHYLIP file.
+
+        Note that this writes a relaxed PHYLIP format file, where the names
+        are not truncated, nor checked for invalid characters.
+        """
+        if not filename:
+            if "." in self.filename and self.filename.split(".")[-1].lower() in [
+                "paup",
+                "nexus",
+                "nex",
+                "dat",
+            ]:
+                filename = ".".join(self.filename.split(".")[:-1]) + ".phy"
+            else:
+                filename = self.filename + ".phy"
+        with open(filename, "w") as fh:
+            fh.write("%d %d\n" % (self.ntax, self.nchar))
+            for taxon in self.taxlabels:
+                fh.write("%s %s\n" % (safename(taxon), str(self.matrix[taxon])))
+        return filename
+
+    def constant(self, matrix=None, delete=(), exclude=()):
+        """Return a list with all constant characters."""
+        if not matrix:
+            matrix = self.matrix
+        undelete = [t for t in self.taxlabels if t in matrix and t not in delete]
+        if not undelete:
+            return None
+        elif len(undelete) == 1:
+            return [x for x in range(len(matrix[undelete[0]])) if x not in exclude]
+        # get the first sequence and expand all ambiguous values
+        constant = [
+            (x, self.ambiguous_values.get(n.upper(), n.upper()))
+            for x, n in enumerate(str(matrix[undelete[0]]))
+            if x not in exclude
+        ]
+
+        for taxon in undelete[1:]:
+            newconstant = []
+            for site in constant:
+                # print("%d (paup=%d)" % (site[0],site[0]+1), end="")
+                seqsite = matrix[taxon][site[0]].upper()
+                # print(seqsite,"checked against",site[1],"\t", end="")
+                if (
+                    seqsite == self.missing
+                    or (
+                        seqsite == self.gap
+                        and self.options["gapmode"].lower() == "missing"
+                    )
+                    or seqsite == site[1]
+                ):
+                    # missing or same as before  -> ok
+                    newconstant.append(site)
+                elif (
+                    seqsite in site[1]
+                    or site[1] == self.missing
+                    or (
+                        self.options["gapmode"].lower() == "missing"
+                        and site[1] == self.gap
+                    )
+                ):
+                    # subset of an ambig or only missing in previous -> take subset
+                    newconstant.append(
+                        (site[0], self.ambiguous_values.get(seqsite, seqsite))
+                    )
+                elif seqsite in self.ambiguous_values:
+                    # is it an ambig: check the intersection with prev. values
+                    intersect = set(self.ambiguous_values[seqsite]).intersection(
+                        set(site[1])
+                    )
+                    if intersect:
+                        newconstant.append((site[0], "".join(intersect)))
+                    #    print("ok")
+                    # else:
+                    #    print("failed")
+                # else:
+                #    print("failed")
+            constant = newconstant
+        cpos = [s[0] for s in constant]
+        return cpos
+
+    def cstatus(self, site, delete=(), narrow=True):
+        """Summarize character.
+
+        narrow=True:  paup-mode (a c ? --> ac; ? ? ? --> ?)
+        narrow=false:           (a c ? --> a c g t -; ? ? ? --> a c g t -)
+        """
+        undelete = [t for t in self.taxlabels if t not in delete]
+        if not undelete:
+            return None
+        cstatus = []
+        for t in undelete:
+            c = self.matrix[t][site].upper()
+            if self.options.get("gapmode") == "missing" and c == self.gap:
+                c = self.missing
+            if narrow and c == self.missing:
+                if c not in cstatus:
+                    cstatus.append(c)
+            else:
+                cstatus.extend(b for b in self.ambiguous_values[c] if b not in cstatus)
+        if self.missing in cstatus and narrow and len(cstatus) > 1:
+            cstatus = [_ for _ in cstatus if _ != self.missing]
+        cstatus.sort()
+        return cstatus
+
+    def weighted_stepmatrix(self, name="your_name_here", exclude=(), delete=()):
+        """Calculate a stepmatrix for weighted parsimony.
+
+        See Wheeler (1990), Cladistics 6:269-275 and
+        Felsenstein (1981), Biol. J. Linn. Soc. 16:183-196
+        """
+        m = StepMatrix(self.unambiguous_letters, self.gap)
+        for site in [s for s in range(self.nchar) if s not in exclude]:
+            cstatus = self.cstatus(site, delete)
+            for i, b1 in enumerate(cstatus[:-1]):
+                for b2 in cstatus[i + 1 :]:
+                    m.add(b1.upper(), b2.upper(), 1)
+        return m.transformation().weighting().smprint(name=name)
+
+    def crop_matrix(self, matrix=None, delete=(), exclude=()):
+        """Return a matrix without deleted taxa and excluded characters."""
+        if not matrix:
+            matrix = self.matrix
+        if [t for t in delete if not self._check_taxlabels(t)]:
+            raise NexusError(
+                "Unknown taxa: %s" % ", ".join(set(delete).difference(self.taxlabels))
+            )
+        if exclude != []:
+            undelete = [t for t in self.taxlabels if t in matrix and t not in delete]
+            if not undelete:
+                return {}
+            m = [str(matrix[k]) for k in undelete]
+            sitesm = [s for i, s in enumerate(zip(*m)) if i not in exclude]
+            if sitesm == []:
+                return {t: Seq("") for t in undelete}
+            else:
+                m = [Seq(s) for s in ("".join(x) for x in zip(*sitesm))]
+                return dict(zip(undelete, m))
+        else:
+            return {
+                t: matrix[t] for t in self.taxlabels if t in matrix and t not in delete
+            }
+
+    def bootstrap(self, matrix=None, delete=(), exclude=()):
+        """Return a bootstrapped matrix."""
+        if not matrix:
+            matrix = self.matrix
+        seqobjects = isinstance(
+            matrix[list(matrix.keys())[0]], Seq
+        )  # remember if Seq objects
+        cm = self.crop_matrix(delete=delete, exclude=exclude)  # crop data out
+        if not cm:  # everything deleted?
+            return {}
+        elif not cm[list(cm.keys())[0]]:  # everything excluded?
+            return cm
+        undelete = [t for t in self.taxlabels if t in cm]
+        if seqobjects:
+            sitesm = list(zip(*[str(cm[t]) for t in undelete]))
+        else:
+            sitesm = list(zip(*[cm[t] for t in undelete]))
+        bootstrapsitesm = [
+            sitesm[random.randint(0, len(sitesm) - 1)] for _ in range(len(sitesm))
+        ]
+        bootstrapseqs = ["".join(x) for x in zip(*bootstrapsitesm)]
+        if seqobjects:
+            bootstrapseqs = [Seq(s) for s in bootstrapseqs]
+        return dict(zip(undelete, bootstrapseqs))
+
+    def add_sequence(self, name, sequence):
+        """Add a sequence (string) to the matrix."""
+        if not name:
+            raise NexusError("New sequence must have a name")
+
+        diff = self.nchar - len(sequence)
+        if diff < 0:
+            self.insert_gap(self.nchar, -diff)
+        elif diff > 0:
+            sequence += self.missing * diff
+
+        if name in self.taxlabels:
+            unique_name = _unique_label(self.taxlabels, name)
+            # print("WARNING: Sequence name %s is already present. Sequence was added as %s." % (name,unique_name))
+        else:
+            unique_name = name
+
+        if unique_name in self.matrix:
+            raise ValueError(
+                "ERROR. There is a discrepancy between taxlabels "
+                "and matrix keys. Report this as a bug."
+            )
+
+        self.matrix[unique_name] = Seq(sequence)
+        self.ntax += 1
+        self.taxlabels.append(unique_name)
+        self.unaltered_taxlabels.append(name)
+
+    def insert_gap(self, pos, n=1, leftgreedy=False):
+        """Add a gap into the matrix and adjust charsets and partitions.
+
+        pos=0: first position
+        pos=nchar: last position
+        """
+
+        def _adjust(set, x, d, leftgreedy=False):
+            """Adjust character sets if gaps are inserted (PRIVATE).
+
+            Takes care of new gaps within a coherent character set.
+            """
+            # if 3 gaps are inserted at pos. 9 in a set that looks like 1 2 3  8 9 10 11 13 14 15
+            # then the adjusted set will be 1 2 3  8 9 10 11 12 13 14 15 16 17 18
+            # but inserting into position 8 it will stay like 1 2 3 11 12 13 14 15 16 17 18
+            set.sort()
+            addpos = 0
+            for i, c in enumerate(set):
+                if c >= x:
+                    set[i] = c + d
+                # if we add gaps within a group of characters, we want the gap position included in this group
+                if c == x:
+                    if leftgreedy or (i > 0 and set[i - 1] == c - 1):
+                        addpos = i
+            if addpos > 0:
+                set[addpos:addpos] = list(range(x, x + d))
+            return set
+
+        if pos < 0 or pos > self.nchar:
+            raise NexusError("Illegal gap position: %d" % pos)
+        if n == 0:
+            return
+        sitesm = list(zip(*[str(self.matrix[t]) for t in self.taxlabels]))
+        sitesm[pos:pos] = [["-"] * len(self.taxlabels)] * n
+        mapped = ["".join(x) for x in zip(*sitesm)]
+        listed = [(taxon, Seq(mapped[i])) for i, taxon in enumerate(self.taxlabels)]
+        self.matrix = dict(listed)
+        self.nchar += n
+        # now adjust character sets
+        for i, s in self.charsets.items():
+            self.charsets[i] = _adjust(s, pos, n, leftgreedy=leftgreedy)
+        for p in self.charpartitions:
+            for sp, s in self.charpartitions[p].items():
+                self.charpartitions[p][sp] = _adjust(s, pos, n, leftgreedy=leftgreedy)
+        # now adjust character state labels
+        self.charlabels = self._adjust_charlabels(insert=[pos] * n)
+        return self.charlabels
+
+    def _adjust_charlabels(self, exclude=None, insert=None):
+        """Return adjusted indices of self.charlabels if characters are excluded or inserted (PRIVATE)."""
+        if exclude and insert:
+            raise NexusError("Can't exclude and insert at the same time")
+        if not self.charlabels:
+            return None
+        labels = sorted(self.charlabels)
+        newcharlabels = {}
+        if exclude:
+            exclude.sort()
+            exclude.append(sys.maxsize)
+            excount = 0
+            for c in labels:
+                if c not in exclude:
+                    while c > exclude[excount]:
+                        excount += 1
+                    newcharlabels[c - excount] = self.charlabels[c]
+        elif insert:
+            insert.sort()
+            insert.append(sys.maxsize)
+            icount = 0
+            for c in labels:
+                while c >= insert[icount]:
+                    icount += 1
+                newcharlabels[c + icount] = self.charlabels[c]
+        else:
+            return self.charlabels
+        return newcharlabels
+
+    def invert(self, charlist):
+        """Return all character indices that are not in charlist."""
+        return [c for c in range(self.nchar) if c not in charlist]
+
+    def gaponly(self, include_missing=False):
+        """Return gap-only sites."""
+        gap = set(self.gap)
+        if include_missing:
+            gap.add(self.missing)
+        sitesm = zip(*[str(self.matrix[t]) for t in self.taxlabels])
+        return [i for i, site in enumerate(sitesm) if set(site).issubset(gap)]
+
+    def terminal_gap_to_missing(self, missing=None, skip_n=True):
+        """Replace all terminal gaps with missing character.
+
+        Mixtures like ???------??------- are properly resolved.
+        """
+        if not missing:
+            missing = self.missing
+        replace = [self.missing, self.gap]
+        if not skip_n:
+            replace.extend(["n", "N"])
+        for taxon in self.taxlabels:
+            sequence = str(self.matrix[taxon])
+            length = len(sequence)
+            start, end = get_start_end(sequence, skiplist=replace)
+            if start == -1 and end == -1:
+                sequence = missing * length
+            else:
+                sequence = sequence[: end + 1] + missing * (length - end - 1)
+                sequence = start * missing + sequence[start:]
+            if length != len(sequence):
+                raise RuntimeError(
+                    "Illegal sequence manipulation in "
+                    "Nexus.terminal_gap_to_missing in taxon %s" % taxon
+                )
+            self.matrix[taxon] = Seq(sequence)
+
+
+try:
+    import cnexus
+except ImportError:
+
+    def _get_command_lines(file_contents):
+        lines = _kill_comments_and_break_lines(file_contents)
+        commandlines = _adjust_lines(lines)
+        return commandlines
+
+
+else:
+
+    def _get_command_lines(file_contents):
+        decommented = cnexus.scanfile(file_contents)
+        # check for unmatched parentheses
+        if decommented == "[" or decommented == "]":
+            raise NexusError("Unmatched %s" % decommented)
+        # cnexus can't return lists, so in analogy we separate
+        # commandlines with chr(7) (a character that shouldn't be part of a
+        # nexus file under normal circumstances)
+        commandlines = _adjust_lines(decommented.split(chr(7)))
+        return commandlines
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Nexus/Nodes.py b/code/lib/Bio/Nexus/Nodes.py
new file mode 100644
index 0000000..1f641c5
--- /dev/null
+++ b/code/lib/Bio/Nexus/Nodes.py
@@ -0,0 +1,182 @@
+# Copyright 2005-2008 by Frank Kauff & Cymon J. Cox. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Linked list functionality for use in Bio.Nexus.
+
+Provides functionality of a linked list.
+Each node has one (or none) predecessor, and an arbitrary number of successors.
+Nodes can store arbitrary data in a NodeData class.
+
+Subclassed by Nexus.Trees to store phylogenetic trees.
+
+Bug reports to Frank Kauff (fkauff@biologie.uni-kl.de)
+"""
+
+
+class ChainException(Exception):
+    """Provision for the management of Chain exceptions."""
+
+    pass
+
+
+class NodeException(Exception):
+    """Provision for the management of Node exceptions."""
+
+    pass
+
+
+class Chain:
+    """Stores a list of nodes that are linked together."""
+
+    def __init__(self):
+        """Initialize a node chain."""
+        self.chain = {}
+        self.id = -1
+
+    def _get_id(self):
+        """Get a new id for a node in the chain (PRIVATE)."""
+        self.id += 1
+        return self.id
+
+    def all_ids(self):
+        """Return a list of all node ids."""
+        return list(self.chain.keys())
+
+    def add(self, node, prev=None):
+        """Attach node to another."""
+        if prev is not None and prev not in self.chain:
+            raise ChainException("Unknown predecessor: " + str(prev))
+        else:
+            id = self._get_id()
+            node.set_id(id)
+            node.set_prev(prev)
+            if prev is not None:
+                self.chain[prev].add_succ(id)
+            self.chain[id] = node
+        return id
+
+    def collapse(self, id):
+        """Delete node from chain and relinks successors to predecessor."""
+        if id not in self.chain:
+            raise ChainException("Unknown ID: " + str(id))
+        prev_id = self.chain[id].get_prev()
+        self.chain[prev_id].remove_succ(id)
+        succ_ids = self.chain[id].get_succ()
+        for i in succ_ids:
+            self.chain[i].set_prev(prev_id)
+        self.chain[prev_id].add_succ(succ_ids)
+        node = self.chain[id]
+        self.kill(id)
+        return node
+
+    def kill(self, id):
+        """Kill a node from chain without caring to what it is connected."""
+        if id not in self.chain:
+            raise ChainException("Unknown ID: " + str(id))
+        else:
+            del self.chain[id]
+
+    def unlink(self, id):
+        """Disconnect node from his predecessor."""
+        if id not in self.chain:
+            raise ChainException("Unknown ID: " + str(id))
+        else:
+            prev_id = self.chain[id].prev
+            if prev_id is not None:
+                self.chain[prev_id].succ.pop(self.chain[prev_id].succ.index(id))
+            self.chain[id].prev = None
+            return prev_id
+
+    def link(self, parent, child):
+        """Connect son to parent."""
+        if child not in self.chain:
+            raise ChainException("Unknown ID: " + str(child))
+        elif parent not in self.chain:
+            raise ChainException("Unknown ID: " + str(parent))
+        else:
+            self.unlink(child)
+            self.chain[parent].succ.append(child)
+            self.chain[child].set_prev(parent)
+
+    def is_parent_of(self, parent, grandchild):
+        """Check if grandchild is a subnode of parent."""
+        if grandchild == parent or grandchild in self.chain[parent].get_succ():
+            return True
+        else:
+            for sn in self.chain[parent].get_succ():
+                if self.is_parent_of(sn, grandchild):
+                    return True
+            else:
+                return False
+
+    def trace(self, start, finish):
+        """Return a list of all node_ids between two nodes (excluding start, including end)."""
+        if start not in self.chain or finish not in self.chain:
+            raise NodeException("Unknown node.")
+        if not self.is_parent_of(start, finish) or start == finish:
+            return []
+        for sn in self.chain[start].get_succ():
+            if self.is_parent_of(sn, finish):
+                return [sn] + self.trace(sn, finish)
+
+
+class Node:
+    """A single node."""
+
+    def __init__(self, data=None):
+        """Represent a node with one predecessor and multiple successors."""
+        self.id = None
+        self.data = data
+        self.prev = None
+        self.succ = []
+
+    def set_id(self, id):
+        """Set the id of a node, if not set yet."""
+        if self.id is not None:
+            raise NodeException("Node id cannot be changed.")
+        self.id = id
+
+    def get_id(self):
+        """Return the node's id."""
+        return self.id
+
+    def get_succ(self):
+        """Return a list of the node's successors."""
+        return self.succ
+
+    def get_prev(self):
+        """Return the id of the node's predecessor."""
+        return self.prev
+
+    def add_succ(self, id):
+        """Add a node id to the node's successors."""
+        if isinstance(id, type([])):
+            self.succ.extend(id)
+        else:
+            self.succ.append(id)
+
+    def remove_succ(self, id):
+        """Remove a node id from the node's successors."""
+        self.succ.remove(id)
+
+    def set_succ(self, new_succ):
+        """Set the node's successors."""
+        if not isinstance(new_succ, type([])):
+            raise NodeException("Node successor must be of list type.")
+        self.succ = new_succ
+
+    def set_prev(self, id):
+        """Set the node's predecessor."""
+        self.prev = id
+
+    def get_data(self):
+        """Return a node's data."""
+        return self.data
+
+    def set_data(self, data):
+        """Set a node's data."""
+        self.data = data
diff --git a/code/lib/Bio/Nexus/StandardData.py b/code/lib/Bio/Nexus/StandardData.py
new file mode 100644
index 0000000..6f89155
--- /dev/null
+++ b/code/lib/Bio/Nexus/StandardData.py
@@ -0,0 +1,119 @@
+# Copyright 2014 Joe Cora.
+# Revisions copyright 2017 Peter Cock.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Objects to represent NEXUS standard data type matrix coding."""
+
+
+class NexusError(Exception):
+    """Provision for the management of Nexus exceptions."""
+
+    pass
+
+
+class StandardData:
+    """Create a StandardData iterable object.
+
+    Each coding specifies t [type] => (std [standard], multi [multistate] or
+    uncer [uncertain]) and d [data]
+    """
+
+    def __init__(self, data):
+        """Initialize the class."""
+        self._data = []
+        self._current_pos = 0
+
+        # Enforce string data requirement
+        if not isinstance(data, str):
+            raise NexusError(
+                "The coding data given to a StandardData object should be a string"
+            )
+
+        # Transfer each coding to a position within a sequence
+        multi_coding = False
+        uncertain_coding = False
+        coding_list = {"t": "std", "d": []}
+
+        for pos, coding in enumerate(data):
+            # Check if in a multiple coded or uncertain character
+            if multi_coding:
+                # End multicoding if close parenthesis
+                if coding == ")":
+                    multi_coding = False
+                else:
+                    # Add current coding to list and advance to next coding
+                    coding_list["d"].append(coding)
+                    continue
+            elif uncertain_coding:
+                # End multicoding if close parenthesis
+                if coding == "}":
+                    uncertain_coding = False
+                else:
+                    # Add current coding to list and advance to next coding
+                    coding_list["d"].append(coding)
+                    continue
+            else:
+                # Check if a multiple coded or uncertain character is starting
+                if coding == "(":
+                    multi_coding = True
+                    coding_list["t"] = "multi"
+                    continue
+                elif coding == "{":
+                    uncertain_coding = True
+                    coding_list["t"] = "uncer"
+                    continue
+                elif coding in [")", "}"]:
+                    raise NexusError(
+                        "Improper character %s at position %i of a coding sequence."
+                        % (coding, pos)
+                    )
+                else:
+                    coding_list["d"].append(coding)
+
+            # Add character coding to data
+            self._data.append(coding_list.copy())
+            coding_list = {"t": "std", "d": []}
+
+    def __len__(self):
+        """Return the length of the coding, use len(my_coding)."""
+        return len(self._data)
+
+    def __getitem__(self, arg):
+        """Pull out child by index."""
+        return self._data[arg]
+
+    def __iter__(self):
+        """Iterate over the items."""
+        return self
+
+    def __next__(self):
+        """Return next item."""
+        try:
+            return_coding = self._data[self._current_pos]
+        except IndexError:
+            self._current_pos = 0
+            raise StopIteration from None
+        else:
+            self._current_pos += 1
+            return return_coding
+
+    def raw(self):
+        """Return the full coding as a python list."""
+        return self._data
+
+    def __str__(self):
+        """Return the full coding as a python string, use str(my_coding)."""
+        str_return = ""
+        for coding in self._data:
+            if coding["t"] == "multi":
+                str_return += "(" + "".join(coding["d"]) + ")"
+            elif coding["t"] == "uncer":
+                str_return += "{" + "".join(coding["d"]) + "}"
+            else:
+                str_return += coding["d"][0]
+        return str_return
diff --git a/code/lib/Bio/Nexus/Trees.py b/code/lib/Bio/Nexus/Trees.py
new file mode 100644
index 0000000..4f76ec7
--- /dev/null
+++ b/code/lib/Bio/Nexus/Trees.py
@@ -0,0 +1,979 @@
+# Copyright 2005-2008 by Frank Kauff & Cymon J. Cox. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Tree class to handle phylogenetic trees.
+
+Provides a set of methods to read and write newick-format tree descriptions,
+get information about trees (monphyly of taxon sets, congruence between trees,
+common ancestors,...) and to manipulate trees (re-root trees, split terminal
+nodes).
+"""
+
+
+import random
+import sys
+from . import Nodes
+
+
+PRECISION_BRANCHLENGTH = 6
+PRECISION_SUPPORT = 6
+NODECOMMENT_START = "[&"
+NODECOMMENT_END = "]"
+
+
+class TreeError(Exception):
+    """Provision for the management of Tree exceptions."""
+
+    pass
+
+
+class NodeData:
+    """Store tree-relevant data associated with nodes (e.g. branches or otus)."""
+
+    def __init__(self, taxon=None, branchlength=0.0, support=None, comment=None):
+        """Initialize the class."""
+        self.taxon = taxon
+        self.branchlength = branchlength
+        self.support = support
+        self.comment = comment
+
+
+class Tree(Nodes.Chain):
+    """Represent a tree using a chain of nodes with on predecessor (=ancestor) and multiple successors (=subclades)."""
+
+    # A newick tree is parsed into nested list and then converted to a node list in two stages
+    # mostly due to historical reasons. This could be done in one swoop). Note: parentheses ( ) and
+    # colon : are not allowed in taxon names. This is against NEXUS standard, but makes life much
+    # easier when parsing trees.
+
+    # NOTE: Tree should store its data class in something like self.dataclass=data,
+    # so that nodes that are generated have easy access to the data class
+    # Some routines use automatically NodeData, this needs to be more concise
+
+    def __init__(
+        self,
+        tree=None,
+        weight=1.0,
+        rooted=False,
+        name="",
+        data=NodeData,
+        values_are_support=False,
+        max_support=1.0,
+    ):
+        """Ntree(self,tree)."""
+        Nodes.Chain.__init__(self)
+        self.dataclass = data
+        self.__values_are_support = values_are_support
+        self.max_support = max_support
+        self.weight = weight
+        self.rooted = rooted
+        self.name = name
+        root = Nodes.Node(data())
+        self.root = self.add(root)
+        if tree:  # use the tree we have
+            # if Tree is called from outside Nexus parser, we need to get rid of linebreaks, etc
+            tree = tree.strip().replace("\n", "").replace("\r", "")
+            # there's discrepancy whether newick allows semicolons et the end
+            tree = tree.rstrip(";")
+            subtree_info, base_info = self._parse(tree)
+            root.data = self._add_nodedata(root.data, [[], base_info])
+            self._add_subtree(parent_id=root.id, tree=subtree_info)
+
+    def _parse(self, tree):
+        """Parse (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively (PRIVATE)."""
+        # Remove any leading/trailing white space - want any string starting
+        # with " (..." should be recognised as a leaf, "(..."
+        tree = tree.strip()
+        if tree.count("(") != tree.count(")"):
+            raise TreeError("Parentheses do not match in (sub)tree: " + tree)
+        if tree.count("(") == 0:  # a leaf
+            # check if there's a colon, or a special comment, or both  after the taxon name
+            nodecomment = tree.find(NODECOMMENT_START)
+            colon = tree.find(":")
+            if colon == -1 and nodecomment == -1:  # none
+                return [tree, [None]]
+            elif colon == -1 and nodecomment > -1:  # only special comment
+                return [tree[:nodecomment], self._get_values(tree[nodecomment:])]
+            elif colon > -1 and nodecomment == -1:  # only numerical values
+                return [tree[:colon], self._get_values(tree[colon + 1 :])]
+            elif (
+                colon < nodecomment
+            ):  # taxon name ends at first colon or with special comment
+                return [tree[:colon], self._get_values(tree[colon + 1 :])]
+            else:
+                return [tree[:nodecomment], self._get_values(tree[nodecomment:])]
+        else:
+            closing = tree.rfind(")")
+            val = self._get_values(tree[closing + 1 :])
+            if not val:
+                val = [None]
+            subtrees = []
+            plevel = 0
+            prev = 1
+            incomment = False
+            for p in range(1, closing):
+                if not incomment and tree[p] == "(":
+                    plevel += 1
+                elif not incomment and tree[p] == ")":
+                    plevel -= 1
+                elif tree[p:].startswith(NODECOMMENT_START):
+                    incomment = True
+                elif incomment and tree[p] == NODECOMMENT_END:
+                    incomment = False
+                elif not incomment and tree[p] == "," and plevel == 0:
+                    subtrees.append(tree[prev:p])
+                    prev = p + 1
+
+            subtrees.append(tree[prev:closing])
+            subclades = [self._parse(subtree) for subtree in subtrees]
+            return [subclades, val]
+
+    def _add_subtree(self, parent_id=None, tree=None):
+        """Add leaf or tree (in newick format) to a parent_id (PRIVATE)."""
+        if parent_id is None:
+            raise TreeError("Need node_id to connect to.")
+        for st in tree:
+            nd = self.dataclass()
+            nd = self._add_nodedata(nd, st)
+            if isinstance(st[0], list):  # it's a subtree
+                sn = Nodes.Node(nd)
+                self.add(sn, parent_id)
+                self._add_subtree(sn.id, st[0])
+            else:  # it's a leaf
+                nd.taxon = st[0]
+                leaf = Nodes.Node(nd)
+                self.add(leaf, parent_id)
+
+    def _add_nodedata(self, nd, st):
+        """Add data to the node parsed from the comments, taxon and support (PRIVATE)."""
+        if isinstance(st[1][-1], str) and st[1][-1].startswith(NODECOMMENT_START):
+            nd.comment = st[1].pop(-1)
+        # if the first element is a string, it's the subtree node taxon
+        elif isinstance(st[1][0], str):
+            nd.taxon = st[1][0]
+            st[1] = st[1][1:]
+        if len(st) > 1:
+            if (
+                len(st[1]) >= 2
+            ):  # if there's two values, support comes first. Is that always so?
+                nd.support = st[1][0]
+                if st[1][1] is not None:
+                    nd.branchlength = st[1][1]
+            elif (
+                len(st[1]) == 1
+            ):  # otherwise it could be real branchlengths or support as branchlengths
+                if not self.__values_are_support:  # default
+                    if st[1][0] is not None:
+                        nd.branchlength = st[1][0]
+                else:
+                    nd.support = st[1][0]
+        return nd
+
+    def _get_values(self, text):
+        """Extract values (support/branchlength) from xx[:yyy], xx (PRIVATE)."""
+        if text == "":
+            return None
+        nodecomment = None
+        if NODECOMMENT_START in text:  # if there's a [&....] comment, cut it out
+            nc_start = text.find(NODECOMMENT_START)
+            nc_end = text.find(NODECOMMENT_END)
+            if nc_end == -1:
+                raise TreeError(
+                    "Error in tree description: Found %s without matching %s"
+                    % (NODECOMMENT_START, NODECOMMENT_END)
+                )
+            nodecomment = text[nc_start : nc_end + 1]
+            text = text[:nc_start] + text[nc_end + 1 :]
+
+        # pase out supports and branchlengths, with internal node taxa info
+        values = []
+        taxonomy = None
+        for part in [t.strip() for t in text.split(":")]:
+            if part:
+                try:
+                    values.append(float(part))
+                except ValueError:
+                    assert taxonomy is None, "Two string taxonomies?"
+                    taxonomy = part
+        if taxonomy:
+            values.insert(0, taxonomy)
+        if nodecomment:
+            values.append(nodecomment)
+        return values
+
+    def _walk(self, node=None):
+        """Return all node_ids downwards from a node (PRIVATE)."""
+        if node is None:
+            node = self.root
+        for n in self.node(node).succ:
+            yield n
+            yield from self._walk(n)
+
+    def node(self, node_id):
+        """Return the instance of node_id.
+
+        node = node(self,node_id)
+        """
+        if node_id not in self.chain:
+            raise TreeError("Unknown node_id: %d" % node_id)
+        return self.chain[node_id]
+
+    def split(self, parent_id=None, n=2, branchlength=1.0):
+        """Speciation: generates n (default two) descendants of a node.
+
+        [new ids] = split(self,parent_id=None,n=2,branchlength=1.0):
+        """
+        if parent_id is None:
+            raise TreeError("Missing node_id.")
+        ids = []
+        parent_data = self.chain[parent_id].data
+        for i in range(n):
+            node = Nodes.Node()
+            if parent_data:
+                node.data = self.dataclass()
+                # each node has taxon and branchlength attribute
+                if parent_data.taxon:
+                    node.data.taxon = parent_data.taxon + str(i)
+                node.data.branchlength = branchlength
+            ids.append(self.add(node, parent_id))
+        return ids
+
+    def search_taxon(self, taxon):
+        """Return the first matching taxon in self.data.taxon. Not restricted to terminal nodes.
+
+        node_id = search_taxon(self,taxon)
+
+        """
+        for id, node in self.chain.items():
+            if node.data.taxon == taxon:
+                return id
+        return None
+
+    def prune(self, taxon):
+        """Prune a terminal taxon from the tree.
+
+        id_of_previous_node = prune(self,taxon)
+        If taxon is from a bifurcation, the connectiong node will be collapsed
+        and its branchlength added to remaining terminal node. This might be no
+        longer a meaningful value'
+        """
+        id = self.search_taxon(taxon)
+        if id is None:
+            raise TreeError("Taxon not found: %s" % taxon)
+        elif id not in self.get_terminals():
+            raise TreeError("Not a terminal taxon: %s" % taxon)
+        else:
+            prev = self.unlink(id)
+            self.kill(id)
+            if len(self.node(prev).succ) == 1:
+                if (
+                    prev == self.root
+                ):  # we deleted one branch of a bifurcating root, then we have to move the root upwards
+                    self.root = self.node(self.root).succ[0]
+                    self.node(self.root).branchlength = 0.0
+                    self.kill(prev)
+                else:
+                    succ = self.node(prev).succ[0]
+                    new_bl = (
+                        self.node(prev).data.branchlength
+                        + self.node(succ).data.branchlength
+                    )
+                    self.collapse(prev)
+                    self.node(succ).data.branchlength = new_bl
+            return prev
+
+    def get_taxa(self, node_id=None):
+        """Return a list of all otus downwards from a node.
+
+        nodes = get_taxa(self,node_id=None)
+        """
+        if node_id is None:
+            node_id = self.root
+        if node_id not in self.chain:
+            raise TreeError("Unknown node_id: %d." % node_id)
+        if self.chain[node_id].succ == []:
+            if self.chain[node_id].data:
+                return [self.chain[node_id].data.taxon]
+            else:
+                return None
+        else:
+            list = []
+            for succ in self.chain[node_id].succ:
+                list.extend(self.get_taxa(succ))
+            return list
+
+    def get_terminals(self):
+        """Return a list of all terminal nodes."""
+        return [i for i in self.all_ids() if self.node(i).succ == []]
+
+    def is_terminal(self, node):
+        """Return True if node is a terminal node."""
+        return self.node(node).succ == []
+
+    def is_internal(self, node):
+        """Return True if node is an internal node."""
+        return len(self.node(node).succ) > 0
+
+    def is_preterminal(self, node):
+        """Return True if all successors of a node are terminal ones."""
+        if self.is_terminal(node):
+            return False not in [self.is_terminal(n) for n in self.node(node).succ]
+        else:
+            return False
+
+    def count_terminals(self, node=None):
+        """Count the number of terminal nodes that are attached to a node."""
+        if node is None:
+            node = self.root
+        return len([n for n in self._walk(node) if self.is_terminal(n)])
+
+    def collapse_genera(self, space_equals_underscore=True):
+        """Collapse all subtrees which belong to the same genus.
+
+        (i.e share the same first word in their taxon name.)
+        """
+        while True:
+            for n in self._walk():
+                if self.is_terminal(n):
+                    continue
+                taxa = self.get_taxa(n)
+                genera = []
+                for t in taxa:
+                    if space_equals_underscore:
+                        t = t.replace(" ", "_")
+                    try:
+                        genus = t.split("_", 1)[0]
+                    except IndexError:
+                        genus = "None"
+                    if genus not in genera:
+                        genera.append(genus)
+                if len(genera) == 1:
+                    self.node(n).data.taxon = genera[0] + " <collapsed>"
+                    # now we kill all nodes downstream
+                    nodes2kill = list(self._walk(node=n))
+                    for kn in nodes2kill:
+                        self.kill(kn)
+                    self.node(n).succ = []
+                    break  # break out of for loop because node list from _walk will be inconsistent
+            else:  # for loop exhausted: no genera to collapse left
+                break  # while
+
+    def sum_branchlength(self, root=None, node=None):
+        """Add up the branchlengths from root (default self.root) to node.
+
+        sum = sum_branchlength(self,root=None,node=None)
+        """
+        if root is None:
+            root = self.root
+        if node is None:
+            raise TreeError("Missing node id.")
+        blen = 0.0
+        while node is not None and node is not root:
+            blen += self.node(node).data.branchlength
+            node = self.node(node).prev
+        return blen
+
+    def set_subtree(self, node):
+        """Return subtree as a set of nested sets.
+
+        sets = set_subtree(self,node)
+        """
+        if self.node(node).succ == []:
+            return self.node(node).data.taxon
+        else:
+            try:
+                return frozenset(self.set_subtree(n) for n in self.node(node).succ)
+            except Exception:
+                print(node)
+                print(self.node(node).succ)
+                for n in self.node(node).succ:
+                    print("%s %s" % (n, self.set_subtree(n)))
+                print([self.set_subtree(n) for n in self.node(node).succ])
+                raise
+
+    def is_identical(self, tree2):
+        """Compare tree and tree2 for identity.
+
+        result = is_identical(self,tree2)
+        """
+        return self.set_subtree(self.root) == tree2.set_subtree(tree2.root)
+
+    def is_compatible(self, tree2, threshold, strict=True):
+        """Compare branches with support>threshold for compatibility.
+
+        result = is_compatible(self,tree2,threshold)
+        """
+        # check if both trees have the same set of taxa. strict=True enforces this.
+        missing2 = set(self.get_taxa()) - set(tree2.get_taxa())
+        missing1 = set(tree2.get_taxa()) - set(self.get_taxa())
+        if strict and (missing1 or missing2):
+            if missing1:
+                print(
+                    "Taxon/taxa %s is/are missing in tree %s"
+                    % (",".join(missing1), self.name)
+                )
+            if missing2:
+                print(
+                    "Taxon/taxa %s is/are missing in tree %s"
+                    % (",".join(missing2), tree2.name)
+                )
+            raise TreeError("Can't compare trees with different taxon compositions.")
+        t1 = [
+            (set(self.get_taxa(n)), self.node(n).data.support)
+            for n in self.all_ids()
+            if self.node(n).succ
+            and (
+                self.node(n).data
+                and self.node(n).data.support
+                and self.node(n).data.support >= threshold
+            )
+        ]
+        t2 = [
+            (set(tree2.get_taxa(n)), tree2.node(n).data.support)
+            for n in tree2.all_ids()
+            if tree2.node(n).succ
+            and tree2.node(n).data
+            and tree2.node(n).data.support
+            and tree2.node(n).data.support >= threshold
+        ]
+        conflict = []
+        for (st1, sup1) in t1:
+            for (st2, sup2) in t2:
+                if not st1.issubset(st2) and not st2.issubset(
+                    st1
+                ):  # don't hiccup on upstream nodes
+                    intersect, notin1, notin2 = (
+                        st1 & st2,
+                        st2 - st1,
+                        st1 - st2,
+                    )  # all three are non-empty sets
+                    # if notin1==missing1 or notin2==missing2  <==> st1.issubset(st2) or st2.issubset(st1) ???
+                    if intersect and not (
+                        notin1.issubset(missing1) or notin2.issubset(missing2)
+                    ):  # omit conflicts due to missing taxa
+                        conflict.append(
+                            (st1, sup1, st2, sup2, intersect, notin1, notin2)
+                        )
+        return conflict
+
+    def common_ancestor(self, node1, node2):
+        """Return the common ancestor that connects two nodes.
+
+        node_id = common_ancestor(self,node1,node2)
+        """
+        l1 = [self.root] + self.trace(self.root, node1)
+        l2 = [self.root] + self.trace(self.root, node2)
+        return [n for n in l1 if n in l2][-1]
+
+    def distance(self, node1, node2):
+        """Add and return the sum of the branchlengths between two nodes.
+
+        dist = distance(self,node1,node2)
+        """
+        ca = self.common_ancestor(node1, node2)
+        return self.sum_branchlength(ca, node1) + self.sum_branchlength(ca, node2)
+
+    def is_monophyletic(self, taxon_list):
+        """Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise.
+
+        result = is_monophyletic(self,taxon_list)
+        """
+        taxon_set = set(taxon_list)
+        node_id = self.root
+        while True:
+            subclade_taxa = set(self.get_taxa(node_id))
+            if subclade_taxa == taxon_set:  # are we there?
+                return node_id
+            else:  # check subnodes
+                for subnode in self.chain[node_id].succ:
+                    if set(self.get_taxa(subnode)).issuperset(
+                        taxon_set
+                    ):  # taxon_set is downstream
+                        node_id = subnode
+                        break  # out of for loop
+                else:
+                    return -1  # taxon set was not with successors, for loop exhausted
+
+    def is_bifurcating(self, node=None):
+        """Return True if tree downstream of node is strictly bifurcating."""
+        if node is None:
+            node = self.root
+        if (
+            node == self.root and len(self.node(node).succ) == 3
+        ):  # root can be trifurcating, because it has no ancestor
+            return (
+                self.is_bifurcating(self.node(node).succ[0])
+                and self.is_bifurcating(self.node(node).succ[1])
+                and self.is_bifurcating(self.node(node).succ[2])
+            )
+        if len(self.node(node).succ) == 2:
+            return self.is_bifurcating(self.node(node).succ[0]) and self.is_bifurcating(
+                self.node(node).succ[1]
+            )
+        elif len(self.node(node).succ) == 0:
+            return True
+        else:
+            return False
+
+    def branchlength2support(self):
+        """Move values stored in data.branchlength to data.support, and set branchlength to 0.0.
+
+        This is necessary when support has been stored as branchlength (e.g. paup), and has thus
+        been read in as branchlength.
+        """
+        for n in self.chain:
+            self.node(n).data.support = self.node(n).data.branchlength
+            self.node(n).data.branchlength = 0.0
+
+    def convert_absolute_support(self, nrep):
+        """Convert absolute support (clade-count) to rel. frequencies.
+
+        Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of
+        calculating relative frequencies.
+        """
+        for n in self._walk():
+            if self.node(n).data.support:
+                self.node(n).data.support /= float(nrep)
+
+    def has_support(self, node=None):
+        """Return True if any of the nodes has data.support != None."""
+        for n in self._walk(node):
+            if self.node(n).data.support:
+                return True
+        else:
+            return False
+
+    def randomize(
+        self,
+        ntax=None,
+        taxon_list=None,
+        branchlength=1.0,
+        branchlength_sd=None,
+        bifurcate=True,
+    ):
+        """Generate a random tree with ntax taxa and/or taxa from taxlabels.
+
+        new_tree = randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True)
+        Trees are bifurcating by default. (Polytomies not yet supported).
+        """
+        if not ntax and taxon_list:
+            ntax = len(taxon_list)
+        elif not taxon_list and ntax:
+            taxon_list = ["taxon" + str(i + 1) for i in range(ntax)]
+        elif not ntax and not taxon_list:
+            raise TreeError("Either numer of taxa or list of taxa must be specified.")
+        elif ntax != len(taxon_list):
+            raise TreeError("Length of taxon list must correspond to ntax.")
+        # initiate self with empty root
+        self.__init__()
+        terminals = self.get_terminals()
+        # bifurcate randomly at terminal nodes until ntax is reached
+        while len(terminals) < ntax:
+            newsplit = random.choice(terminals)
+            new_terminals = self.split(parent_id=newsplit, branchlength=branchlength)
+            # if desired, give some variation to the branch length
+            if branchlength_sd:
+                for nt in new_terminals:
+                    bl = random.gauss(branchlength, branchlength_sd)
+                    if bl < 0:
+                        bl = 0
+                    self.node(nt).data.branchlength = bl
+            terminals.extend(new_terminals)
+            terminals.remove(newsplit)
+        # distribute taxon labels randomly
+        random.shuffle(taxon_list)
+        for (node, name) in zip(terminals, taxon_list):
+            self.node(node).data.taxon = name
+
+    def display(self):
+        """Quick and dirty lists of all nodes."""
+        table = [
+            ("#", "taxon", "prev", "succ", "brlen", "blen (sum)", "support", "comment")
+        ]
+        for i in self.all_ids():
+            n = self.node(i)
+            if not n.data:
+                table.append(
+                    (str(i), "-", str(n.prev), str(n.succ), "-", "-", "-", "-")
+                )
+            else:
+                tx = n.data.taxon
+                if not tx:
+                    tx = "-"
+                blength = "%0.2f" % n.data.branchlength
+                if blength is None:
+                    blength = "-"
+                    sum_blength = "-"
+                else:
+                    sum_blength = "%0.2f" % self.sum_branchlength(node=i)
+                support = n.data.support
+                if support is None:
+                    support = "-"
+                else:
+                    support = "%0.2f" % support
+                comment = n.data.comment
+                if comment is None:
+                    comment = "-"
+                table.append(
+                    (
+                        str(i),
+                        tx,
+                        str(n.prev),
+                        str(n.succ),
+                        blength,
+                        sum_blength,
+                        support,
+                        comment,
+                    )
+                )
+        print("\n".join("%3s %32s %15s %15s %8s %10s %8s %20s" % l for l in table))
+        print("\nRoot:  %s" % self.root)
+
+    def to_string(
+        self,
+        support_as_branchlengths=False,
+        branchlengths_only=False,
+        plain=True,
+        plain_newick=False,
+        ladderize=None,
+        ignore_comments=True,
+    ):
+        """Return a paup compatible tree line."""
+        # if there's a conflict in the arguments, we override plain=True
+        if support_as_branchlengths or branchlengths_only:
+            plain = False
+        self.support_as_branchlengths = support_as_branchlengths
+        self.branchlengths_only = branchlengths_only
+        self.ignore_comments = ignore_comments
+        self.plain = plain
+
+        def make_info_string(data, terminal=False):
+            """Create nicely formatted support/branchlengths."""
+            # CHECK FORMATTING
+            if self.plain:  # plain tree only. That's easy.
+                info_string = ""
+            elif (
+                self.support_as_branchlengths
+            ):  # support as branchlengths (eg. PAUP), ignore actual branchlengths
+                if terminal:  # terminal branches have 100% support
+                    info_string = ":%1.2f" % self.max_support
+                elif data.support:
+                    info_string = ":%1.2f" % (data.support)
+                else:
+                    info_string = ":0.00"
+            elif self.branchlengths_only:  # write only branchlengths, ignore support
+                info_string = ":%1.5f" % (data.branchlength)
+            else:  # write suport and branchlengths (e.g. .con tree of mrbayes)
+                if terminal:
+                    info_string = ":%1.5f" % (data.branchlength)
+                else:
+                    if (
+                        data.branchlength is not None and data.support is not None
+                    ):  # we have blen and suppport
+                        info_string = "%1.2f:%1.5f" % (data.support, data.branchlength)
+                    elif data.branchlength is not None:  # we have only blen
+                        info_string = "0.00000:%1.5f" % (data.branchlength)
+                    elif data.support is not None:  # we have only support
+                        info_string = "%1.2f:0.00000" % (data.support)
+                    else:
+                        info_string = "0.00:0.00000"
+            if not ignore_comments:
+                try:
+                    info_string = str(data.nodecomment) + info_string
+                except AttributeError:
+                    pass
+            return info_string
+
+        def ladderize_nodes(nodes, ladderize=None):
+            """Sort node numbers according to the number of terminal nodes."""
+            if ladderize in ["left", "LEFT", "right", "RIGHT"]:
+                succnode_terminals = sorted(
+                    (self.count_terminals(node=n), n) for n in nodes
+                )
+                if ladderize == "right" or ladderize == "RIGHT":
+                    succnode_terminals.reverse()
+                if succnode_terminals:
+                    succnodes = zip(*succnode_terminals)[1]
+                else:
+                    succnodes = []
+            else:
+                succnodes = nodes
+            return succnodes
+
+        def newickize(node, ladderize=None):
+            """Convert a node tree to a newick tree recursively."""
+            if not self.node(node).succ:  # terminal
+                return self.node(node).data.taxon + make_info_string(
+                    self.node(node).data, terminal=True
+                )
+            else:
+                succnodes = ladderize_nodes(self.node(node).succ, ladderize=ladderize)
+                subtrees = [newickize(sn, ladderize=ladderize) for sn in succnodes]
+                return "(%s)%s" % (
+                    ",".join(subtrees),
+                    make_info_string(self.node(node).data),
+                )
+
+        treeline = ["tree"]
+        if self.name:
+            treeline.append(self.name)
+        else:
+            treeline.append("a_tree")
+        treeline.append("=")
+        if self.weight != 1:
+            treeline.append("[&W%s]" % str(round(float(self.weight), 3)))
+        if self.rooted:
+            treeline.append("[&R]")
+        succnodes = ladderize_nodes(self.node(self.root).succ)
+        subtrees = [newickize(sn, ladderize=ladderize) for sn in succnodes]
+        treeline.append("(%s)" % ",".join(subtrees))
+        if plain_newick:
+            return treeline[-1]
+        else:
+            return " ".join(treeline) + ";"
+
+    def __str__(self):
+        """Short version of to_string(), gives plain tree."""
+        return self.to_string(plain=True)
+
+    def unroot(self):
+        """Define a unrooted Tree structure, using data of a rooted Tree."""
+        # travel down the rooted tree structure and save all branches and the nodes they connect
+
+        def _get_branches(node):
+            branches = []
+            for b in self.node(node).succ:
+                branches.append(
+                    [node, b, self.node(b).data.branchlength, self.node(b).data.support]
+                )
+                branches.extend(_get_branches(b))
+            return branches
+
+        self.unrooted = _get_branches(self.root)
+        # if root is bifurcating, then it is eliminated
+        if len(self.node(self.root).succ) == 2:
+            # find the two branches that connect to root
+            rootbranches = [b for b in self.unrooted if self.root in b[:2]]
+            b1 = self.unrooted.pop(self.unrooted.index(rootbranches[0]))
+            b2 = self.unrooted.pop(self.unrooted.index(rootbranches[1]))
+            # Connect them two each other. If both have support, it should be identical (or one set to None?).
+            # If both have branchlengths, they will be added
+            newbranch = [b1[1], b2[1], b1[2] + b2[2]]
+            if b1[3] is None:
+                newbranch.append(
+                    b2[3]
+                )  # either None (both rootbranches are unsupported) or some support
+            elif b2[3] is None:
+                newbranch.append(b1[3])  # dito
+            elif b1[3] == b2[3]:
+                newbranch.append(b1[3])  # identical support
+            elif b1[3] == 0 or b2[3] == 0:
+                newbranch.append(b1[3] + b2[3])  # one is 0, take the other
+            else:
+                raise TreeError(
+                    "Support mismatch in bifurcating root: %f, %f"
+                    % (float(b1[3]), float(b2[3]))
+                )
+            self.unrooted.append(newbranch)
+
+    def root_with_outgroup(self, outgroup=None):
+        """Define a tree's root with a reference group outgroup."""
+
+        def _connect_subtree(parent, child):
+            """Attach subtree starting with node child to parent (PRIVATE)."""
+            for i, branch in enumerate(self.unrooted):
+                if parent in branch[:2] and child in branch[:2]:
+                    branch = self.unrooted.pop(i)
+                    break
+            else:
+                raise TreeError(
+                    "Unable to connect nodes for rooting: nodes %d and %d are not connected"
+                    % (parent, child)
+                )
+            self.link(parent, child)
+            self.node(child).data.branchlength = branch[2]
+            self.node(child).data.support = branch[3]
+            # now check if there are more branches connected to the child, and if so, connect them
+            child_branches = [b for b in self.unrooted if child in b[:2]]
+            for b in child_branches:
+                if child == b[0]:
+                    succ = b[1]
+                else:
+                    succ = b[0]
+                _connect_subtree(child, succ)
+
+        # check the outgroup we're supposed to root with
+        if outgroup is None:
+            return self.root
+        outgroup_node = self.is_monophyletic(outgroup)
+        if outgroup_node == -1:
+            return -1
+        # if tree is already rooted with outgroup on a bifurcating root,
+        # or the outgroup includes all taxa on the tree, then we're fine
+        if (
+            len(self.node(self.root).succ) == 2
+            and outgroup_node in self.node(self.root).succ
+        ) or outgroup_node == self.root:
+            return self.root
+
+        self.unroot()
+        # now we find the branch that connects outgroup and ingroup
+        # print(self.node(outgroup_node).prev)
+        for i, b in enumerate(self.unrooted):
+            if outgroup_node in b[:2] and self.node(outgroup_node).prev in b[:2]:
+                root_branch = self.unrooted.pop(i)
+                break
+        else:
+            raise TreeError("Unrooted and rooted Tree do not match")
+        if outgroup_node == root_branch[1]:
+            ingroup_node = root_branch[0]
+        else:
+            ingroup_node = root_branch[1]
+        # now we destroy the old tree structure, but keep node data. Nodes will be reconnected according to new outgroup
+        for n in self.all_ids():
+            self.node(n).prev = None
+            self.node(n).succ = []
+        # now we just add both subtrees (outgroup and ingroup) branch for branch
+        root = Nodes.Node(data=NodeData())  # new root
+        self.add(root)  # add to tree description
+        self.root = root.id  # set as root
+        self.unrooted.append(
+            [root.id, ingroup_node, root_branch[2], root_branch[3]]
+        )  # add branch to ingroup to unrooted tree
+        self.unrooted.append(
+            [root.id, outgroup_node, 0.0, 0.0]
+        )  # add branch to outgroup to unrooted tree
+        _connect_subtree(root.id, ingroup_node)  # add ingroup
+        _connect_subtree(root.id, outgroup_node)  # add outgroup
+        # if theres still a lonely node in self.chain, then it's the old root, and we delete it
+        oldroot = [
+            i for i in self.all_ids() if self.node(i).prev is None and i != self.root
+        ]
+        if len(oldroot) > 1:
+            raise TreeError(
+                "Isolated nodes in tree description: %s" % ",".join(oldroot)
+            )
+        elif len(oldroot) == 1:
+            self.kill(oldroot[0])
+        return self.root
+
+    def merge_with_support(
+        self, bstrees=None, constree=None, threshold=0.5, outgroup=None
+    ):
+        """Merge clade support (from consensus or list of bootstrap-trees) with phylogeny.
+
+        tree=merge_bootstrap(phylo,bs_tree=<list_of_trees>)
+        or
+        tree=merge_bootstrap(phylo,consree=consensus_tree with clade support)
+        """
+        if bstrees and constree:
+            raise TreeError(
+                "Specify either list of bootstrap trees or consensus tree, not both"
+            )
+        if not (bstrees or constree):
+            raise TreeError("Specify either list of bootstrap trees or consensus tree.")
+        # no outgroup specified: use the smallest clade of the root
+        if outgroup is None:
+            try:
+                succnodes = self.node(self.root).succ
+                smallest = min((len(self.get_taxa(n)), n) for n in succnodes)
+                outgroup = self.get_taxa(smallest[1])
+            except Exception:
+                raise TreeError("Error determining outgroup.") from None
+        else:  # root with user specified outgroup
+            self.root_with_outgroup(outgroup)
+
+        if bstrees:  # calculate consensus
+            constree = consensus(bstrees, threshold=threshold, outgroup=outgroup)
+        else:
+            if not constree.has_support():
+                constree.branchlength2support()
+            constree.root_with_outgroup(outgroup)
+        # now we travel all nodes, and add support from consensus, if the clade is present in both
+        for pnode in self._walk():
+            cnode = constree.is_monophyletic(self.get_taxa(pnode))
+            if cnode > -1:
+                self.node(pnode).data.support = constree.node(cnode).data.support
+
+
+def consensus(trees, threshold=0.5, outgroup=None):
+    """Compute a majority rule consensus tree of all clades with relative frequency>=threshold from a list of trees."""
+    total = len(trees)
+    if total == 0:
+        return None
+    # shouldn't we make sure that it's NodeData or subclass??
+    dataclass = trees[0].dataclass
+    max_support = trees[0].max_support
+    clades = {}
+    # countclades={}
+    alltaxa = set(trees[0].get_taxa())
+    # calculate calde frequencies
+    for t in trees:
+        if alltaxa != set(t.get_taxa()):
+            raise TreeError("Trees for consensus must contain the same taxa")
+        t.root_with_outgroup(outgroup=outgroup)
+        for st_node in t._walk(t.root):
+            subclade_taxa = sorted(t.get_taxa(st_node))
+            subclade_taxa = str(subclade_taxa)  # lists are not hashable
+            if subclade_taxa in clades:
+                clades[subclade_taxa] += float(t.weight) / total
+            else:
+                clades[subclade_taxa] = float(t.weight) / total
+            # if subclade_taxa in countclades:
+            #    countclades[subclade_taxa]+=t.weight
+            # else:
+            #    countclades[subclade_taxa]=t.weight
+    # weed out clades below threshold
+    delclades = [
+        c for c, p in clades.items() if round(p, 3) < threshold
+    ]  # round can be necessary
+    for c in delclades:
+        del clades[c]
+    # create a tree with a root node
+    consensus = Tree(name="consensus_%2.1f" % float(threshold), data=dataclass)
+    # each clade needs a node in the new tree, add them as isolated nodes
+    for c, s in clades.items():
+        node = Nodes.Node(data=dataclass())
+        node.data.support = s
+        node.data.taxon = set(eval(c))
+        consensus.add(node)
+    # set root node data
+    consensus.node(consensus.root).data.support = None
+    consensus.node(consensus.root).data.taxon = alltaxa
+    # we sort the nodes by no. of taxa in the clade, so root will be the last
+    consensus_ids = consensus.all_ids()
+    consensus_ids.sort(key=lambda x: len(consensus.node(x).data.taxon))
+    # now we just have to hook each node to the next smallest node that includes all taxa of the current
+    for i, current in enumerate(
+        consensus_ids[:-1]
+    ):  # skip the last one which is the root
+        # print('----')
+        # print('current: %s' % consensus.node(current).data.taxon)
+        # search remaining nodes
+        for parent in consensus_ids[i + 1 :]:
+            # print('parent: %s' % consensus.node(parent).data.taxon)
+            if consensus.node(parent).data.taxon.issuperset(
+                consensus.node(current).data.taxon
+            ):
+                break
+        else:
+            sys.exit("corrupt tree structure?")
+        # internal nodes don't have taxa
+        if len(consensus.node(current).data.taxon) == 1:
+            consensus.node(current).data.taxon = consensus.node(
+                current
+            ).data.taxon.pop()
+            # reset the support for terminal nodes to maximum
+            # consensus.node(current).data.support=max_support
+        else:
+            consensus.node(current).data.taxon = None
+        consensus.link(parent, current)
+    # eliminate root taxon name
+    consensus.node(consensus_ids[-1]).data.taxon = None
+    if alltaxa != set(consensus.get_taxa()):
+        raise TreeError("FATAL ERROR: consensus tree is corrupt")
+    return consensus
diff --git a/code/lib/Bio/Nexus/__init__.py b/code/lib/Bio/Nexus/__init__.py
new file mode 100644
index 0000000..544f1d3
--- /dev/null
+++ b/code/lib/Bio/Nexus/__init__.py
@@ -0,0 +1,8 @@
+# Copyright Frank Kauff and Cymon J. Cox 2005. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""The Bio.Nexus contains a NEXUS file parser and objects to model this data."""
diff --git a/code/lib/Bio/Nexus/__pycache__/Nexus.cpython-37.pyc b/code/lib/Bio/Nexus/__pycache__/Nexus.cpython-37.pyc
new file mode 100644
index 0000000..693b60f
Binary files /dev/null and b/code/lib/Bio/Nexus/__pycache__/Nexus.cpython-37.pyc differ
diff --git a/code/lib/Bio/Nexus/__pycache__/Nodes.cpython-37.pyc b/code/lib/Bio/Nexus/__pycache__/Nodes.cpython-37.pyc
new file mode 100644
index 0000000..2a940a3
Binary files /dev/null and b/code/lib/Bio/Nexus/__pycache__/Nodes.cpython-37.pyc differ
diff --git a/code/lib/Bio/Nexus/__pycache__/StandardData.cpython-37.pyc b/code/lib/Bio/Nexus/__pycache__/StandardData.cpython-37.pyc
new file mode 100644
index 0000000..6107c4a
Binary files /dev/null and b/code/lib/Bio/Nexus/__pycache__/StandardData.cpython-37.pyc differ
diff --git a/code/lib/Bio/Nexus/__pycache__/Trees.cpython-37.pyc b/code/lib/Bio/Nexus/__pycache__/Trees.cpython-37.pyc
new file mode 100644
index 0000000..2b6082e
Binary files /dev/null and b/code/lib/Bio/Nexus/__pycache__/Trees.cpython-37.pyc differ
diff --git a/code/lib/Bio/Nexus/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Nexus/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..831cdd6
Binary files /dev/null and b/code/lib/Bio/Nexus/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Nexus/cnexus.c b/code/lib/Bio/Nexus/cnexus.c
new file mode 100644
index 0000000..999a5d5
--- /dev/null
+++ b/code/lib/Bio/Nexus/cnexus.c
@@ -0,0 +1,128 @@
+/* Copyright 2005 by Frank Kauff.  All rights reserved.
+ *
+ * This file is part of the Biopython distribution and governed by your
+ * choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+ * Please see the LICENSE file that should have been included as part of this
+ * package.
+ *
+ * cnexus.c
+ *
+ * Parse input strings, cut out (nested) comments, deal with quoted text.
+ * Input lines terminated with ; are separated by ASCII code 7 (something
+ * that naturally doesn't occur in plain NEXUS files).
+ *
+ * Used by Nexus.py
+ */
+
+#include <Python.h>
+#include <string.h>
+
+static PyObject * cnexus_scanfile(PyObject *self, PyObject *args)
+{
+    PyObject *cleaninput;
+    const char *input;
+    char *scanned, *scanned_start;
+    char t, quotelevel;
+    int speciallevel, commlevel;
+
+    quotelevel=0;
+    speciallevel=0;
+    commlevel=0;
+
+    if (!PyArg_ParseTuple(args, "s", &input))
+        return NULL;
+    if (!(scanned=PyMem_RawMalloc(strlen(input)+1)))
+        PyErr_NoMemory();
+    scanned_start=scanned;
+    for(t=*input;(t=*input);input++)
+    {
+        /* end of standard quote */
+        if (!(commlevel || speciallevel) && t==quotelevel)
+            quotelevel=0;
+        /* start of standard quote */
+        else if (!quotelevel && !(commlevel || speciallevel) && (t=='\'' || t=='"'))
+            quotelevel=t;
+        /* start of comment outside quote */
+        else if (!quotelevel  && t=='[')
+        {
+            /* check for special comments */
+            /*if ((*(input+1)=='!' || *(input+1)=='&' || *(input+1)=='%' ||
+                    *(input+1)=='/' || *(input+1)=='\\' || *(input+1)=='@')
+                    && !(commlevel || speciallevel))
+                speciallevel=1;
+            */
+            if ((*(input+1)=='&') && !(commlevel || speciallevel))
+                speciallevel=1;
+            else /* standard comment */
+                commlevel++;
+        }
+        else if (!quotelevel && t==']')
+        {
+            /* does it end a special comment? */
+            if (speciallevel)
+                speciallevel=0;
+            else
+            {
+                commlevel--;
+                if (commlevel<0) /* error: unmatched ] */
+                {
+                    PyMem_RawFree(scanned_start);
+                    return Py_BuildValue("s","]");
+                }
+                continue;
+            }
+        }
+        if (!commlevel)
+        {
+            /* we replace the ; at the end of command lines with special
+             * character to make subsequent parsing of blocks easier */
+            if (t==';' && !(quotelevel || speciallevel))
+                /* need an ascii code thats not part of a nexus file, used as
+                 * separator */
+                *(scanned++)=7;
+            else
+                *(scanned++)=t;
+        }
+        /* printf("t %c, commlevel %d, speciallevel %d, quotelevel '%c', scanned %d\n",
+         * t,commlevel,speciallevel,quotelevel,scanned);
+         */
+    }
+
+    if (commlevel>0)
+    {
+        /* error: unmatched [ */
+        PyMem_RawFree(scanned_start);
+        return Py_BuildValue("s","[");
+    }
+    else
+    {
+        *scanned=0; /* end of string */
+        cleaninput= Py_BuildValue("s",scanned_start);
+        PyMem_RawFree(scanned_start);
+        return cleaninput;
+    }
+}
+
+static PyMethodDef cNexusMethods[]=
+{
+    {"scanfile",cnexus_scanfile,METH_VARARGS,"Scan file and deal with comments and quotes."},
+    {NULL,NULL,0,NULL}
+};
+
+static struct PyModuleDef moduledef = {
+        PyModuleDef_HEAD_INIT,
+        "cnexus",
+        NULL,
+        -1,
+        cNexusMethods,
+        NULL,
+        NULL,
+        NULL,
+        NULL
+};
+
+PyObject *
+PyInit_cnexus(void)
+{
+    return PyModule_Create(&moduledef);
+}
diff --git a/code/lib/Bio/Nexus/cnexus.cp37-win_amd64.pyd b/code/lib/Bio/Nexus/cnexus.cp37-win_amd64.pyd
new file mode 100644
index 0000000..10e9a34
Binary files /dev/null and b/code/lib/Bio/Nexus/cnexus.cp37-win_amd64.pyd differ
diff --git a/code/lib/Bio/PDB/AbstractPropertyMap.py b/code/lib/Bio/PDB/AbstractPropertyMap.py
new file mode 100644
index 0000000..971cb5f
--- /dev/null
+++ b/code/lib/Bio/PDB/AbstractPropertyMap.py
@@ -0,0 +1,130 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Class that maps (chain_id, residue_id) to a residue property."""
+
+
+class AbstractPropertyMap:
+    """Define base class, map holder of residue properties."""
+
+    def __init__(self, property_dict, property_keys, property_list):
+        """Initialize the class."""
+        self.property_dict = property_dict
+        self.property_keys = property_keys
+        self.property_list = property_list
+
+    def _translate_id(self, entity_id):
+        """Return entity identifier (PRIVATE)."""
+        return entity_id
+
+    def __contains__(self, id):
+        """Check if the mapping has a property for this residue.
+
+        :param chain_id: chain id
+        :type chain_id: char
+
+        :param res_id: residue id
+        :type res_id: char
+
+        Examples
+        --------
+        This is an incomplete but illustrative example::
+
+            if (chain_id, res_id) in apmap:
+                res, prop = apmap[(chain_id, res_id)]
+
+        """
+        translated_id = self._translate_id(id)
+        return translated_id in self.property_dict
+
+    def __getitem__(self, key):
+        """Return property for a residue.
+
+        :param chain_id: chain id
+        :type chain_id: char
+
+        :param res_id: residue id
+        :type res_id: int or (char, int, char)
+
+        :return: some residue property
+        :rtype: anything (can be a tuple)
+        """
+        translated_id = self._translate_id(key)
+        return self.property_dict[translated_id]
+
+    def __len__(self):
+        """Return number of residues for which the property is available.
+
+        :return: number of residues
+        :rtype: int
+        """
+        return len(self.property_dict)
+
+    def keys(self):
+        """Return the list of residues.
+
+        :return: list of residues for which the property was calculated
+        :rtype: [(chain_id, res_id), (chain_id, res_id),...]
+        """
+        return self.property_keys
+
+    def __iter__(self):
+        """Iterate over the (entity, property) list.
+
+        Handy alternative to the dictionary-like access.
+
+        :return: iterator
+
+        Examples
+        --------
+        >>> entity_property_list = [
+        ...     ('entity_1', 'property_1'),
+        ...     ('entity_2', 'property_2')
+        ... ]
+        >>> map = AbstractPropertyMap({}, [], entity_property_list)
+        >>> for (res, property) in iter(map):
+        ...     print(res, property)
+        entity_1 property_1
+        entity_2 property_2
+
+        """
+        for i in range(0, len(self.property_list)):
+            yield self.property_list[i]
+
+
+class AbstractResiduePropertyMap(AbstractPropertyMap):
+    """Define class for residue properties map."""
+
+    def __init__(self, property_dict, property_keys, property_list):
+        """Initialize the class."""
+        AbstractPropertyMap.__init__(self, property_dict, property_keys, property_list)
+
+    def _translate_id(self, ent_id):
+        """Return entity identifier on residue (PRIVATE)."""
+        chain_id, res_id = ent_id
+        if isinstance(res_id, int):
+            ent_id = (chain_id, (" ", res_id, " "))
+        return ent_id
+
+
+class AbstractAtomPropertyMap(AbstractPropertyMap):
+    """Define class for atom properties map."""
+
+    def __init__(self, property_dict, property_keys, property_list):
+        """Initialize the class."""
+        AbstractPropertyMap.__init__(self, property_dict, property_keys, property_list)
+
+    def _translate_id(self, ent_id):
+        """Return entity identifier on atoms (PRIVATE)."""
+        if len(ent_id) == 4:
+            chain_id, res_id, atom_name, icode = ent_id
+        else:
+            chain_id, res_id, atom_name = ent_id
+            icode = None
+        if isinstance(res_id, int):
+            ent_id = (chain_id, (" ", res_id, " "), atom_name, icode)
+        return ent_id
diff --git a/code/lib/Bio/PDB/Atom.py b/code/lib/Bio/PDB/Atom.py
new file mode 100644
index 0000000..d2f792f
--- /dev/null
+++ b/code/lib/Bio/PDB/Atom.py
@@ -0,0 +1,566 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Atom class, used in Structure objects."""
+
+import copy
+import sys
+import warnings
+
+import numpy as np
+
+from Bio.PDB.Entity import DisorderedEntityWrapper
+from Bio.PDB.PDBExceptions import PDBConstructionWarning
+from Bio.PDB.vectors import Vector
+from Bio.Data import IUPACData
+
+
+class Atom:
+    """Define Atom class.
+
+    The Atom object stores atom name (both with and without spaces),
+    coordinates, B factor, occupancy, alternative location specifier
+    and (optionally) anisotropic B factor and standard deviations of
+    B factor and positions.
+
+    In the case of PQR files, B factor and occupancy are replaced by
+    atomic charge and radius.
+    """
+
+    def __init__(
+        self,
+        name,
+        coord,
+        bfactor,
+        occupancy,
+        altloc,
+        fullname,
+        serial_number,
+        element=None,
+        pqr_charge=None,
+        radius=None,
+    ):
+        """Initialize Atom object.
+
+        :param name: atom name (eg. "CA"). Note that spaces are normally stripped.
+        :type name: string
+
+        :param coord: atomic coordinates (x,y,z)
+        :type coord: Numeric array (Float0, size 3)
+
+        :param bfactor: isotropic B factor
+        :type bfactor: number
+
+        :param occupancy: occupancy (0.0-1.0)
+        :type occupancy: number
+
+        :param altloc: alternative location specifier for disordered atoms
+        :type altloc: string
+
+        :param fullname: full atom name, including spaces, e.g. " CA ". Normally
+                         these spaces are stripped from the atom name.
+        :type fullname: string
+
+        :param element: atom element, e.g. "C" for Carbon, "HG" for mercury,
+        :type element: uppercase string (or None if unknown)
+
+        :param pqr_charge: atom charge
+        :type pqr_charge: number
+
+        :param radius: atom radius
+        :type radius: number
+        """
+        self.level = "A"
+        # Reference to the residue
+        self.parent = None
+        # the atomic data
+        self.name = name  # eg. CA, spaces are removed from atom name
+        self.fullname = fullname  # e.g. " CA ", spaces included
+        self.coord = coord
+        self.bfactor = bfactor
+        self.occupancy = occupancy
+        self.altloc = altloc
+        self.full_id = None  # (structure id, model id, chain id, residue id, atom id)
+        self.id = name  # id of atom is the atom name (e.g. "CA")
+        self.disordered_flag = 0
+        self.anisou_array = None
+        self.siguij_array = None
+        self.sigatm_array = None
+        self.serial_number = serial_number
+        # Dictionary that keeps additional properties
+        self.xtra = {}
+        assert not element or element == element.upper(), element
+        self.element = self._assign_element(element)
+        self.mass = self._assign_atom_mass()
+        self.pqr_charge = pqr_charge
+        self.radius = radius
+
+        # For atom sorting (protein backbone atoms first)
+        self._sorting_keys = {"N": 0, "CA": 1, "C": 2, "O": 3}
+
+    # Sorting Methods
+    # standard across different objects and allows direct comparison
+    def __eq__(self, other):
+        """Test equality."""
+        if isinstance(other, Atom):
+            return self.full_id[1:] == other.full_id[1:]
+        else:
+            return NotImplemented
+
+    def __ne__(self, other):
+        """Test inequality."""
+        if isinstance(other, Atom):
+            return self.full_id[1:] != other.full_id[1:]
+        else:
+            return NotImplemented
+
+    def __gt__(self, other):
+        """Test greater than."""
+        if isinstance(other, Atom):
+            if self.parent != other.parent:
+                return self.parent > other.parent
+            order_s = self._sorting_keys.get(self.name, 4)
+            order_o = self._sorting_keys.get(other.name, 4)
+            if order_s != order_o:
+                return order_s > order_o
+            elif self.name != other.name:
+                return self.name > other.name
+            else:
+                return self.altloc > other.altloc
+        else:
+            return NotImplemented
+
+    def __ge__(self, other):
+        """Test greater or equal."""
+        if isinstance(other, Atom):
+            if self.parent != other.parent:
+                return self.parent >= other.parent
+            order_s = self._sorting_keys.get(self.name, 4)
+            order_o = self._sorting_keys.get(other.name, 4)
+            if order_s != order_o:
+                return order_s >= order_o
+            elif self.name != other.name:
+                return self.name >= other.name
+            else:
+                return self.altloc >= other.altloc
+        else:
+            return NotImplemented
+
+    def __lt__(self, other):
+        """Test less than."""
+        if isinstance(other, Atom):
+            if self.parent != other.parent:
+                return self.parent < other.parent
+            order_s = self._sorting_keys.get(self.name, 4)
+            order_o = self._sorting_keys.get(other.name, 4)
+            if order_s != order_o:
+                return order_s < order_o
+            elif self.name != other.name:
+                return self.name < other.name
+            else:
+                return self.altloc < other.altloc
+        else:
+            return NotImplemented
+
+    def __le__(self, other):
+        """Test less or equal."""
+        if isinstance(other, Atom):
+            if self.parent != other.parent:
+                return self.parent <= other.parent
+            order_s = self._sorting_keys.get(self.name, 4)
+            order_o = self._sorting_keys.get(other.name, 4)
+            if order_s != order_o:
+                return order_s <= order_o
+            elif self.name != other.name:
+                return self.name <= other.name
+            else:
+                return self.altloc <= other.altloc
+        else:
+            return NotImplemented
+
+    # Hash method to allow uniqueness (set)
+    def __hash__(self):
+        """Return atom full identifier."""
+        return hash(self.get_full_id())
+
+    def _assign_element(self, element):
+        """Guess element from atom name if not recognised (PRIVATE).
+
+        There is little documentation about extracting/encoding element
+        information in atom names, but some conventions seem to prevail:
+
+            - C, N, O, S, H, P, F atom names start with a blank space (e.g. " CA ")
+              unless the name is 4 characters long (e.g. HE21 in glutamine). In both
+              these cases, the element is the first character.
+
+            - Inorganic elements do not have a blank space (e.g. "CA  " for calcium)
+              but one must check the full name to differentiate between e.g. helium
+              ("HE  ") and long-name hydrogens (e.g. "HE21").
+
+            - Atoms with unknown or ambiguous elements are marked with 'X', e.g.
+              PDB 4cpa. If we fail to identify an element, we should mark it as
+              such.
+
+        """
+        if not element or element.capitalize() not in IUPACData.atom_weights:
+            if self.fullname[0].isalpha() and not self.fullname[2:].isdigit():
+                putative_element = self.name.strip()
+            else:
+                # Hs may have digit in [0]
+                if self.name[0].isdigit():
+                    putative_element = self.name[1]
+                else:
+                    putative_element = self.name[0]
+
+            if putative_element.capitalize() in IUPACData.atom_weights:
+                msg = "Used element %r for Atom (name=%s) with given element %r" % (
+                    putative_element,
+                    self.name,
+                    element,
+                )
+                element = putative_element
+            else:
+                msg = (
+                    "Could not assign element %r for Atom (name=%s) with given element %r"
+                    % (putative_element, self.name, element)
+                )
+                element = "X"  # mark as unknown/ambiguous
+            warnings.warn(msg, PDBConstructionWarning)
+
+        return element
+
+    def _assign_atom_mass(self):
+        """Return atom weight (PRIVATE)."""
+        try:
+            return IUPACData.atom_weights[self.element.capitalize()]
+        except (AttributeError, KeyError):
+            return float("NaN")
+
+    # Special methods
+
+    def __repr__(self):
+        """Print Atom object as <Atom atom_name>."""
+        return "<Atom %s>" % self.get_id()
+
+    def __sub__(self, other):
+        """Calculate distance between two atoms.
+
+        :param other: the other atom
+        :type other: L{Atom}
+
+        Examples
+        --------
+        This is an incomplete but illustrative example::
+
+            distance = atom1 - atom2
+
+        """
+        diff = self.coord - other.coord
+        return np.sqrt(np.dot(diff, diff))
+
+    # set methods
+
+    def set_serial_number(self, n):
+        """Set serial number."""
+        self.serial_number = n
+
+    def set_bfactor(self, bfactor):
+        """Set isotroptic B factor."""
+        self.bfactor = bfactor
+
+    def set_coord(self, coord):
+        """Set coordinates."""
+        self.coord = coord
+
+    def set_altloc(self, altloc):
+        """Set alternative location specifier."""
+        self.altloc = altloc
+
+    def set_occupancy(self, occupancy):
+        """Set occupancy."""
+        self.occupancy = occupancy
+
+    def set_sigatm(self, sigatm_array):
+        """Set standard deviation of atomic parameters.
+
+        The standard deviation of atomic parameters consists
+        of 3 positional, 1 B factor and 1 occupancy standard
+        deviation.
+
+        :param sigatm_array: standard deviations of atomic parameters.
+        :type sigatm_array: Numeric array (length 5)
+        """
+        self.sigatm_array = sigatm_array
+
+    def set_siguij(self, siguij_array):
+        """Set standard deviations of anisotropic temperature factors.
+
+        :param siguij_array: standard deviations of anisotropic temperature factors.
+        :type siguij_array: Numeric array (length 6)
+        """
+        self.siguij_array = siguij_array
+
+    def set_anisou(self, anisou_array):
+        """Set anisotropic B factor.
+
+        :param anisou_array: anisotropic B factor.
+        :type anisou_array: Numeric array (length 6)
+        """
+        self.anisou_array = anisou_array
+
+    def set_charge(self, pqr_charge):
+        """Set charge."""
+        self.pqr_charge = pqr_charge
+
+    def set_radius(self, radius):
+        """Set radius."""
+        self.radius = radius
+
+    # Public methods
+
+    def flag_disorder(self):
+        """Set the disordered flag to 1.
+
+        The disordered flag indicates whether the atom is disordered or not.
+        """
+        self.disordered_flag = 1
+
+    def is_disordered(self):
+        """Return the disordered flag (1 if disordered, 0 otherwise)."""
+        return self.disordered_flag
+
+    def set_parent(self, parent):
+        """Set the parent residue.
+
+        Arguments:
+         - parent - Residue object
+
+        """
+        self.parent = parent
+        self.full_id = self.get_full_id()
+
+    def detach_parent(self):
+        """Remove reference to parent."""
+        self.parent = None
+
+    def get_sigatm(self):
+        """Return standard deviation of atomic parameters."""
+        return self.sigatm_array
+
+    def get_siguij(self):
+        """Return standard deviations of anisotropic temperature factors."""
+        return self.siguij_array
+
+    def get_anisou(self):
+        """Return anisotropic B factor."""
+        return self.anisou_array
+
+    def get_parent(self):
+        """Return parent residue."""
+        return self.parent
+
+    def get_serial_number(self):
+        """Return the serial number."""
+        return self.serial_number
+
+    def get_name(self):
+        """Return atom name."""
+        return self.name
+
+    def get_id(self):
+        """Return the id of the atom (which is its atom name)."""
+        return self.id
+
+    def get_full_id(self):
+        """Return the full id of the atom.
+
+        The full id of an atom is a tuple used to uniquely identify
+        the atom and consists of the following elements:
+        (structure id, model id, chain id, residue id, atom name, altloc)
+        """
+        try:
+            return self.parent.get_full_id() + ((self.name, self.altloc),)
+        except AttributeError:
+            return (None, None, None, None, self.name, self.altloc)
+
+    def get_coord(self):
+        """Return atomic coordinates."""
+        return self.coord
+
+    def get_bfactor(self):
+        """Return B factor."""
+        return self.bfactor
+
+    def get_occupancy(self):
+        """Return occupancy."""
+        return self.occupancy
+
+    def get_fullname(self):
+        """Return the atom name, including leading and trailing spaces."""
+        return self.fullname
+
+    def get_altloc(self):
+        """Return alternative location specifier."""
+        return self.altloc
+
+    def get_level(self):
+        """Return level."""
+        return self.level
+
+    def get_charge(self):
+        """Return charge."""
+        return self.pqr_charge
+
+    def get_radius(self):
+        """Return radius."""
+        return self.radius
+
+    def transform(self, rot, tran):
+        """Apply rotation and translation to the atomic coordinates.
+
+        :param rot: A right multiplying rotation matrix
+        :type rot: 3x3 Numeric array
+
+        :param tran: the translation vector
+        :type tran: size 3 Numeric array
+
+        Examples
+        --------
+        This is an incomplete but illustrative example::
+
+            from numpy import pi, array
+            from Bio.PDB.vectors import Vector, rotmat
+            rotation = rotmat(pi, Vector(1, 0, 0))
+            translation = array((0, 0, 1), 'f')
+            atom.transform(rotation, translation)
+
+        """
+        self.coord = np.dot(self.coord, rot) + tran
+
+    def get_vector(self):
+        """Return coordinates as Vector.
+
+        :return: coordinates as 3D vector
+        :rtype: Bio.PDB.Vector class
+        """
+        x, y, z = self.coord
+        return Vector(x, y, z)
+
+    def copy(self):
+        """Create a copy of the Atom.
+
+        Parent information is lost.
+        """
+        # Do a shallow copy then explicitly copy what needs to be deeper.
+        shallow = copy.copy(self)
+        shallow.detach_parent()
+        shallow.set_coord(copy.copy(self.get_coord()))
+        shallow.xtra = self.xtra.copy()
+        return shallow
+
+
+class DisorderedAtom(DisorderedEntityWrapper):
+    """Contains all Atom objects that represent the same disordered atom.
+
+    One of these atoms is "selected" and all method calls not caught
+    by DisorderedAtom are forwarded to the selected Atom object. In that way, a
+    DisorderedAtom behaves exactly like a normal Atom. By default, the selected
+    Atom object represents the Atom object with the highest occupancy, but a
+    different Atom object can be selected by using the disordered_select(altloc)
+    method.
+    """
+
+    def __init__(self, id):
+        """Create DisorderedAtom.
+
+        Arguments:
+         - id - string, atom name
+
+        """
+        # TODO - make this a private attribute?
+        self.last_occupancy = -sys.maxsize
+        DisorderedEntityWrapper.__init__(self, id)
+
+    # Special methods
+    # Override parent class __iter__ method
+    def __iter__(self):
+        """Iterate through disordered atoms."""
+        yield from self.disordered_get_list()
+
+    def __repr__(self):
+        """Return disordered atom identifier."""
+        if self.child_dict:
+            return "<DisorderedAtom %s>" % self.get_id()
+        else:
+            return "<Empty DisorderedAtom %s>" % self.get_id()
+
+    # This is a separate method from Entity.center_of_mass since DisorderedAtoms
+    # will be unpacked by Residue.get_unpacked_list(). Here we allow for a very
+    # specific use case that is much simpler than the general implementation.
+    def center_of_mass(self):
+        """Return the center of mass of the DisorderedAtom as a numpy array.
+
+        Assumes all child atoms have the same mass (same element).
+        """
+        children = self.disordered_get_list()
+
+        if not children:
+            raise ValueError(f"{self} does not have children")
+
+        coords = np.asarray([a.coord for a in children], dtype=np.float32)
+        return np.average(coords, axis=0, weights=None)
+
+    def disordered_get_list(self):
+        """Return list of atom instances.
+
+        Sorts children by altloc (empty, then alphabetical).
+        """
+        return sorted(self.child_dict.values(), key=lambda a: ord(a.altloc))
+
+    def disordered_add(self, atom):
+        """Add a disordered atom."""
+        # Add atom to dict, use altloc as key
+        atom.flag_disorder()
+        # set the residue parent of the added atom
+        residue = self.get_parent()
+        atom.set_parent(residue)
+        altloc = atom.get_altloc()
+        occupancy = atom.get_occupancy()
+        self[altloc] = atom
+        if occupancy > self.last_occupancy:
+            self.last_occupancy = occupancy
+            self.disordered_select(altloc)
+
+    def disordered_remove(self, altloc):
+        """Remove a child atom altloc from the DisorderedAtom.
+
+        Arguments:
+         - altloc - name of the altloc to remove, as a string.
+
+        """
+        # Get child altloc
+        atom = self.child_dict[altloc]
+        is_selected = self.selected_child is atom
+
+        # Detach
+        del self.child_dict[altloc]
+        atom.detach_parent()
+
+        if is_selected and self.child_dict:  # pick next highest occupancy
+            child = sorted(self.child_dict.values(), key=lambda a: a.occupancy)[-1]
+            self.disordered_select(child.altloc)
+        elif not self.child_dict:
+            self.selected_child = None
+            self.last_occupancy = -sys.maxsize
+
+    def transform(self, rot, tran):
+        """Apply rotation and translation to all children.
+
+        See the documentation of Atom.transform for details.
+        """
+        for child in self:
+            child.coord = np.dot(child.coord, rot) + tran
diff --git a/code/lib/Bio/PDB/Chain.py b/code/lib/Bio/PDB/Chain.py
new file mode 100644
index 0000000..b32cfa8
--- /dev/null
+++ b/code/lib/Bio/PDB/Chain.py
@@ -0,0 +1,213 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Chain class, used in Structure objects."""
+
+from Bio.PDB.Entity import Entity
+from Bio.PDB.internal_coords import IC_Chain
+
+from typing import Optional
+
+
+class Chain(Entity):
+    """Define Chain class.
+
+    Chain is an object of type Entity, stores residues and includes a method to
+    access atoms from residues.
+    """
+
+    def __init__(self, id):
+        """Initialize the class."""
+        self.level = "C"
+        self.internal_coord = None
+        Entity.__init__(self, id)
+
+    # Sorting methods: empty chain IDs come last.
+    def __gt__(self, other):
+        """Validate if id is greater than other.id."""
+        if isinstance(other, Chain):
+            if self.id == " " and other.id != " ":
+                return 0
+            elif self.id != " " and other.id == " ":
+                return 1
+            else:
+                return self.id > other.id
+        else:
+            return NotImplemented
+
+    def __ge__(self, other):
+        """Validate if id is greater or equal than other.id."""
+        if isinstance(other, Chain):
+            if self.id == " " and other.id != " ":
+                return 0
+            elif self.id != " " and other.id == " ":
+                return 1
+            else:
+                return self.id >= other.id
+        else:
+            return NotImplemented
+
+    def __lt__(self, other):
+        """Validate if id is less than other.id."""
+        if isinstance(other, Chain):
+            if self.id == " " and other.id != " ":
+                return 0
+            elif self.id != " " and other.id == " ":
+                return 1
+            else:
+                return self.id < other.id
+        else:
+            return NotImplemented
+
+    def __le__(self, other):
+        """Validate if id is less or equal than other id."""
+        if isinstance(other, Chain):
+            if self.id == " " and other.id != " ":
+                return 0
+            elif self.id != " " and other.id == " ":
+                return 1
+            else:
+                return self.id <= other.id
+        else:
+            return NotImplemented
+
+    def _translate_id(self, id):
+        """Translate sequence identifier to tuple form (PRIVATE).
+
+        A residue id is normally a tuple (hetero flag, sequence identifier,
+        insertion code). Since for most residues the hetero flag and the
+        insertion code are blank (i.e. " "), you can just use the sequence
+        identifier to index a residue in a chain. The _translate_id method
+        translates the sequence identifier to the (" ", sequence identifier,
+        " ") tuple.
+
+        Arguments:
+         - id - int, residue resseq
+
+        """
+        if isinstance(id, int):
+            id = (" ", id, " ")
+        return id
+
+    def __getitem__(self, id):
+        """Return the residue with given id.
+
+        The id of a residue is (hetero flag, sequence identifier, insertion code).
+        If id is an int, it is translated to (" ", id, " ") by the _translate_id
+        method.
+
+        Arguments:
+         - id - (string, int, string) or int
+
+        """
+        id = self._translate_id(id)
+        return Entity.__getitem__(self, id)
+
+    def __contains__(self, id):
+        """Check if a residue with given id is present in this chain.
+
+        Arguments:
+         - id - (string, int, string) or int
+
+        """
+        id = self._translate_id(id)
+        return Entity.__contains__(self, id)
+
+    def __delitem__(self, id):
+        """Delete item.
+
+        Arguments:
+         - id - (string, int, string) or int
+
+        """
+        id = self._translate_id(id)
+        return Entity.__delitem__(self, id)
+
+    def __repr__(self):
+        """Return the chain identifier."""
+        return "<Chain id=%s>" % self.get_id()
+
+    # Public methods
+
+    def get_unpacked_list(self):
+        """Return a list of undisordered residues.
+
+        Some Residue objects hide several disordered residues
+        (DisorderedResidue objects). This method unpacks them,
+        ie. it returns a list of simple Residue objects.
+        """
+        unpacked_list = []
+        for residue in self.get_list():
+            if residue.is_disordered() == 2:
+                for dresidue in residue.disordered_get_list():
+                    unpacked_list.append(dresidue)
+            else:
+                unpacked_list.append(residue)
+        return unpacked_list
+
+    def has_id(self, id):
+        """Return 1 if a residue with given id is present.
+
+        The id of a residue is (hetero flag, sequence identifier, insertion code).
+
+        If id is an int, it is translated to (" ", id, " ") by the _translate_id
+        method.
+
+        Arguments:
+         - id - (string, int, string) or int
+
+        """
+        id = self._translate_id(id)
+        return Entity.has_id(self, id)
+
+    # Public
+
+    def get_residues(self):
+        """Return residues."""
+        yield from self
+
+    def get_atoms(self):
+        """Return atoms from residues."""
+        for r in self.get_residues():
+            yield from r
+
+    def atom_to_internal_coordinates(self, verbose: bool = False) -> None:
+        """Create/update internal coordinates from Atom X,Y,Z coordinates.
+
+        Internal coordinates are bond length, angle and dihedral angles.
+
+        :param verbose bool: default False
+            describe runtime problems
+        """
+        if not self.internal_coord:
+            self.internal_coord = IC_Chain(self, verbose)
+        self.internal_coord.atom_to_internal_coordinates(verbose=verbose)
+
+    def internal_to_atom_coordinates(
+        self,
+        verbose: bool = False,
+        start: Optional[int] = None,
+        fin: Optional[int] = None,
+    ):
+        """Create/update atom coordinates from internal coordinates.
+
+        :param verbose bool: default False
+            describe runtime problems
+        :param: start, fin lists
+            sequence position, insert code for begin, end of subregion to
+            process
+        :raises Exception: if any chain does not have .pic attribute
+        """
+        if self.internal_coord:
+            self.internal_coord.internal_to_atom_coordinates(
+                verbose=verbose, start=start, fin=fin
+            )
+        else:
+            raise Exception(
+                "Structure %s Chain %s does not have internal coordinates set"
+                % (self.parent.parent, self)
+            )
diff --git a/code/lib/Bio/PDB/DSSP.py b/code/lib/Bio/PDB/DSSP.py
new file mode 100644
index 0000000..61bcb2d
--- /dev/null
+++ b/code/lib/Bio/PDB/DSSP.py
@@ -0,0 +1,620 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+r"""Use the DSSP program to calculate secondary structure and accessibility.
+
+You need to have a working version of DSSP (and a license, free for academic
+use) in order to use this. For DSSP, see http://swift.cmbi.ru.nl/gv/dssp/.
+
+The following Accessible surface area (ASA) values can be used, defaulting
+to the Sander and Rost values:
+
+    Miller
+        Miller et al. 1987 https://doi.org/10.1016/0022-2836(87)90038-6
+    Sander
+        Sander and Rost 1994 https://doi.org/10.1002/prot.340200303
+    Wilke
+        Tien et al. 2013 https://doi.org/10.1371/journal.pone.0080635
+
+The DSSP codes for secondary structure used here are:
+
+    =====     ====
+    Code      Structure
+    =====     ====
+     H        Alpha helix (4-12)
+     B        Isolated beta-bridge residue
+     E        Strand
+     G        3-10 helix
+     I        Pi helix
+     T        Turn
+     S        Bend
+     \-       None
+    =====     ====
+
+Usage
+-----
+The DSSP class can be used to run DSSP on a PDB or mmCIF file, and provides a
+handle to the DSSP secondary structure and accessibility.
+
+**Note** that DSSP can only handle one model, and will only run
+calculations on the first model in the provided PDB file.
+
+Examples
+--------
+Typical use::
+
+    from Bio.PDB import PDBParser
+    from Bio.PDB.DSSP import DSSP
+    p = PDBParser()
+    structure = p.get_structure("1MOT", "/local-pdb/1mot.pdb")
+    model = structure[0]
+    dssp = DSSP(model, "/local-pdb/1mot.pdb")
+
+Note that the recent DSSP executable from the DSSP-2 package was
+renamed from ``dssp`` to ``mkdssp``. If using a recent DSSP release,
+you may need to provide the name of your DSSP executable::
+
+    dssp = DSSP(model, '/local-pdb/1mot.pdb', dssp='mkdssp')
+
+DSSP data is accessed by a tuple - (chain id, residue id)::
+
+    a_key = list(dssp.keys())[2]
+    dssp[a_key]
+
+The dssp data returned for a single residue is a tuple in the form:
+
+    ============ ===
+    Tuple Index  Value
+    ============ ===
+    0            DSSP index
+    1            Amino acid
+    2            Secondary structure
+    3            Relative ASA
+    4            Phi
+    5            Psi
+    6            NH-->O_1_relidx
+    7            NH-->O_1_energy
+    8            O-->NH_1_relidx
+    9            O-->NH_1_energy
+    10           NH-->O_2_relidx
+    11           NH-->O_2_energy
+    12           O-->NH_2_relidx
+    13           O-->NH_2_energy
+    ============ ===
+
+"""
+
+
+import re
+import os
+from io import StringIO
+import subprocess
+import warnings
+
+from Bio.PDB.AbstractPropertyMap import AbstractResiduePropertyMap
+from Bio.PDB.PDBExceptions import PDBException
+from Bio.PDB.PDBParser import PDBParser
+from Bio.PDB.Polypeptide import three_to_one
+from Bio.PDB.MMCIF2Dict import MMCIF2Dict
+
+# Match C in DSSP
+_dssp_cys = re.compile("[a-z]")
+
+# Maximal ASA of amino acids
+# Used for relative accessibility
+
+residue_max_acc = {
+    # Miller max acc: Miller et al. 1987 https://doi.org/10.1016/0022-2836(87)90038-6
+    # Wilke: Tien et al. 2013 https://doi.org/10.1371/journal.pone.0080635
+    # Sander: Sander & Rost 1994 https://doi.org/10.1002/prot.340200303
+    "Miller": {
+        "ALA": 113.0,
+        "ARG": 241.0,
+        "ASN": 158.0,
+        "ASP": 151.0,
+        "CYS": 140.0,
+        "GLN": 189.0,
+        "GLU": 183.0,
+        "GLY": 85.0,
+        "HIS": 194.0,
+        "ILE": 182.0,
+        "LEU": 180.0,
+        "LYS": 211.0,
+        "MET": 204.0,
+        "PHE": 218.0,
+        "PRO": 143.0,
+        "SER": 122.0,
+        "THR": 146.0,
+        "TRP": 259.0,
+        "TYR": 229.0,
+        "VAL": 160.0,
+    },
+    "Wilke": {
+        "ALA": 129.0,
+        "ARG": 274.0,
+        "ASN": 195.0,
+        "ASP": 193.0,
+        "CYS": 167.0,
+        "GLN": 225.0,
+        "GLU": 223.0,
+        "GLY": 104.0,
+        "HIS": 224.0,
+        "ILE": 197.0,
+        "LEU": 201.0,
+        "LYS": 236.0,
+        "MET": 224.0,
+        "PHE": 240.0,
+        "PRO": 159.0,
+        "SER": 155.0,
+        "THR": 172.0,
+        "TRP": 285.0,
+        "TYR": 263.0,
+        "VAL": 174.0,
+    },
+    "Sander": {
+        "ALA": 106.0,
+        "ARG": 248.0,
+        "ASN": 157.0,
+        "ASP": 163.0,
+        "CYS": 135.0,
+        "GLN": 198.0,
+        "GLU": 194.0,
+        "GLY": 84.0,
+        "HIS": 184.0,
+        "ILE": 169.0,
+        "LEU": 164.0,
+        "LYS": 205.0,
+        "MET": 188.0,
+        "PHE": 197.0,
+        "PRO": 136.0,
+        "SER": 130.0,
+        "THR": 142.0,
+        "TRP": 227.0,
+        "TYR": 222.0,
+        "VAL": 142.0,
+    },
+}
+
+
+def ss_to_index(ss):
+    """Secondary structure symbol to index.
+
+    H=0
+    E=1
+    C=2
+    """
+    if ss == "H":
+        return 0
+    if ss == "E":
+        return 1
+    if ss == "C":
+        return 2
+    assert 0
+
+
+def dssp_dict_from_pdb_file(in_file, DSSP="dssp"):
+    """Create a DSSP dictionary from a PDB file.
+
+    Parameters
+    ----------
+    in_file : string
+        pdb file
+
+    DSSP : string
+        DSSP executable (argument to subprocess)
+
+    Returns
+    -------
+    (out_dict, keys) : tuple
+        a dictionary that maps (chainid, resid) to
+        amino acid type, secondary structure code and
+        accessibility.
+
+    Examples
+    --------
+    How dssp_dict_frompdb_file could be used::
+
+        from Bio.PDB.DSSP import dssp_dict_from_pdb_file
+        dssp_tuple = dssp_dict_from_pdb_file("/local-pdb/1fat.pdb")
+        dssp_dict = dssp_tuple[0]
+        print(dssp_dict['A',(' ', 1, ' ')])
+
+    """
+    # Using universal newlines is important on Python 3, this
+    # gives text handles rather than bytes handles.
+    # Newer version of DSSP executable is named 'mkdssp',
+    # and calling 'dssp' will hence not work in some operating systems
+    # (Debian distribution of DSSP includes a symlink for 'dssp' argument)
+    try:
+        p = subprocess.Popen(
+            [DSSP, in_file],
+            universal_newlines=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+    except FileNotFoundError:
+        if DSSP == "mkdssp":
+            raise
+        p = subprocess.Popen(
+            ["mkdssp", in_file],
+            universal_newlines=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+    out, err = p.communicate()
+
+    # Alert user for errors
+    if err.strip():
+        warnings.warn(err)
+        if not out.strip():
+            raise Exception("DSSP failed to produce an output")
+
+    out_dict, keys = _make_dssp_dict(StringIO(out))
+    return out_dict, keys
+
+
+def make_dssp_dict(filename):
+    """DSSP dictionary mapping identifiers to properties.
+
+    Return a DSSP dictionary that maps (chainid, resid) to
+    aa, ss and accessibility, from a DSSP file.
+
+    Parameters
+    ----------
+    filename : string
+        the DSSP output file
+
+    """
+    with open(filename) as handle:
+        return _make_dssp_dict(handle)
+
+
+def _make_dssp_dict(handle):
+    """Return a DSSP dictionary, used by mask_dssp_dict (PRIVATE).
+
+    DSSP dictionary maps (chainid, resid) to an amino acid,
+    secondary structure symbol, solvent accessibility value, and hydrogen bond
+    information (relative dssp indices and hydrogen bond energies) from an open
+    DSSP file object.
+
+    Parameters
+    ----------
+    handle : file
+        the open DSSP output file handle
+
+    """
+    dssp = {}
+    start = 0
+    keys = []
+    for l in handle:
+        sl = l.split()
+        if len(sl) < 2:
+            continue
+        if sl[1] == "RESIDUE":
+            # Start parsing from here
+            start = 1
+            continue
+        if not start:
+            continue
+        if l[9] == " ":
+            # Skip -- missing residue
+            continue
+
+        dssp_index = int(l[:5])
+        resseq = int(l[5:10])
+        icode = l[10]
+        chainid = l[11]
+        aa = l[13]
+        ss = l[16]
+        if ss == " ":
+            ss = "-"
+        try:
+            NH_O_1_relidx = int(l[38:45])
+            NH_O_1_energy = float(l[46:50])
+            O_NH_1_relidx = int(l[50:56])
+            O_NH_1_energy = float(l[57:61])
+            NH_O_2_relidx = int(l[61:67])
+            NH_O_2_energy = float(l[68:72])
+            O_NH_2_relidx = int(l[72:78])
+            O_NH_2_energy = float(l[79:83])
+
+            acc = int(l[34:38])
+            phi = float(l[103:109])
+            psi = float(l[109:115])
+        except ValueError as exc:
+            # DSSP output breaks its own format when there are >9999
+            # residues, since only 4 digits are allocated to the seq num
+            # field.  See 3kic chain T res 321, 1vsy chain T res 6077.
+            # Here, look for whitespace to figure out the number of extra
+            # digits, and shift parsing the rest of the line by that amount.
+            if l[34] != " ":
+                shift = l[34:].find(" ")
+
+                NH_O_1_relidx = int(l[38 + shift : 45 + shift])
+                NH_O_1_energy = float(l[46 + shift : 50 + shift])
+                O_NH_1_relidx = int(l[50 + shift : 56 + shift])
+                O_NH_1_energy = float(l[57 + shift : 61 + shift])
+                NH_O_2_relidx = int(l[61 + shift : 67 + shift])
+                NH_O_2_energy = float(l[68 + shift : 72 + shift])
+                O_NH_2_relidx = int(l[72 + shift : 78 + shift])
+                O_NH_2_energy = float(l[79 + shift : 83 + shift])
+
+                acc = int(l[34 + shift : 38 + shift])
+                phi = float(l[103 + shift : 109 + shift])
+                psi = float(l[109 + shift : 115 + shift])
+            else:
+                raise ValueError(exc) from None
+        res_id = (" ", resseq, icode)
+        dssp[(chainid, res_id)] = (
+            aa,
+            ss,
+            acc,
+            phi,
+            psi,
+            dssp_index,
+            NH_O_1_relidx,
+            NH_O_1_energy,
+            O_NH_1_relidx,
+            O_NH_1_energy,
+            NH_O_2_relidx,
+            NH_O_2_energy,
+            O_NH_2_relidx,
+            O_NH_2_energy,
+        )
+        keys.append((chainid, res_id))
+    return dssp, keys
+
+
+class DSSP(AbstractResiduePropertyMap):
+    """Run DSSP and parse secondary structure and accessibility.
+
+    Run DSSP on a PDB/mmCIF file, and provide a handle to the
+    DSSP secondary structure and accessibility.
+
+    **Note** that DSSP can only handle one model.
+
+    Examples
+    --------
+    How DSSP could be used::
+
+        from Bio.PDB import PDBParser
+        from Bio.PDB.DSSP import DSSP
+        p = PDBParser()
+        structure = p.get_structure("1MOT", "/local-pdb/1mot.pdb")
+        model = structure[0]
+        dssp = DSSP(model, "/local-pdb/1mot.pdb")
+        # DSSP data is accessed by a tuple (chain_id, res_id)
+        a_key = list(dssp.keys())[2]
+        # (dssp index, amino acid, secondary structure, relative ASA, phi, psi,
+        # NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy,
+        # NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy)
+        dssp[a_key]
+
+    """
+
+    def __init__(self, model, in_file, dssp="dssp", acc_array="Sander", file_type=""):
+        """Create a DSSP object.
+
+        Parameters
+        ----------
+        model : Model
+            The first model of the structure
+        in_file : string
+            Either a PDB file or a DSSP file.
+        dssp : string
+            The dssp executable (ie. the argument to subprocess)
+        acc_array : string
+            Accessible surface area (ASA) from either Miller et al. (1987),
+            Sander & Rost (1994), or Wilke: Tien et al. 2013, as string
+            Sander/Wilke/Miller. Defaults to Sander.
+        file_type: string
+            File type switch: either PDB, MMCIF or DSSP. Inferred from the
+            file extension by default.
+
+        """
+        self.residue_max_acc = residue_max_acc[acc_array]
+
+        # create DSSP dictionary
+        if file_type == "":
+            file_type = os.path.splitext(in_file)[1][1:]
+        file_type = file_type.upper()
+        if file_type == "CIF":
+            file_type = "MMCIF"
+        assert file_type in [
+            "PDB",
+            "MMCIF",
+            "DSSP",
+        ], "File type must be PDB, mmCIF or DSSP"
+        # If the input file is a PDB or mmCIF file run DSSP and parse output:
+        if file_type == "PDB" or file_type == "MMCIF":
+            # Newer versions of DSSP program call the binary 'mkdssp', so
+            # calling 'dssp' will not work in some operating systems
+            # (Debian distribution of DSSP includes a symlink for 'dssp' argument)
+            try:
+                dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp)
+            except FileNotFoundError:
+                if dssp == "dssp":
+                    dssp = "mkdssp"
+                elif dssp == "mkdssp":
+                    dssp = "dssp"
+                else:
+                    raise
+            dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp)
+        # If the input file is a DSSP file just parse it directly:
+        elif file_type == "DSSP":
+            dssp_dict, dssp_keys = make_dssp_dict(in_file)
+
+        dssp_map = {}
+        dssp_list = []
+
+        def resid2code(res_id):
+            """Serialize a residue's resseq and icode for easy comparison."""
+            return "%s%s" % (res_id[1], res_id[2])
+
+        # DSSP outputs label_asym_id from the mmCIF file as the chain ID
+        # But MMCIFParser reads in the auth_asym_id
+        # Here we create a dictionary to map label_asym_id to auth_asym_id
+        # using the mmCIF file
+        if file_type == "MMCIF":
+            mmcif_dict = MMCIF2Dict(in_file)
+            mmcif_chain_dict = {}
+            for i, c in enumerate(mmcif_dict["_atom_site.label_asym_id"]):
+                if c not in mmcif_chain_dict:
+                    mmcif_chain_dict[c] = mmcif_dict["_atom_site.auth_asym_id"][i]
+            dssp_mapped_keys = []
+
+        # Now create a dictionary that maps Residue objects to
+        # secondary structure and accessibility, and a list of
+        # (residue, (secondary structure, accessibility)) tuples
+        for key in dssp_keys:
+            chain_id, res_id = key
+            if file_type == "MMCIF":
+                chain_id = mmcif_chain_dict[chain_id]
+                dssp_mapped_keys.append((chain_id, res_id))
+            chain = model[chain_id]
+            try:
+                res = chain[res_id]
+            except KeyError:
+                # In DSSP, HET field is not considered in residue identifier.
+                # Thus HETATM records may cause unnecessary exceptions.
+                # (See 3jui chain A res 593.)
+                # Try the lookup again with all HETATM other than water
+                res_seq_icode = resid2code(res_id)
+                for r in chain:
+                    if r.id[0] not in (" ", "W"):
+                        # Compare resseq + icode
+                        if resid2code(r.id) == res_seq_icode:
+                            # Found a matching residue
+                            res = r
+                            break
+                else:
+                    raise KeyError(res_id) from None
+
+            # For disordered residues of point mutations, Biopython uses the
+            # last one as default, But DSSP takes the first one (alternative
+            # location is blank, A or 1). See 1h9h chain E resi 22.
+            # Here we select the res in which all atoms have altloc blank, A or
+            # 1. If no such residues are found, simply use the first one appears
+            # (as DSSP does).
+            if res.is_disordered() == 2:
+                for rk in res.disordered_get_id_list():
+                    # All atoms in the disordered residue should have the same
+                    # altloc, so it suffices to check the altloc of the first
+                    # atom.
+                    altloc = res.child_dict[rk].get_list()[0].get_altloc()
+                    if altloc in tuple("A1 "):
+                        res.disordered_select(rk)
+                        break
+                else:
+                    # Simply select the first one
+                    res.disordered_select(res.disordered_get_id_list()[0])
+
+            # Sometimes point mutations are put into HETATM and ATOM with altloc
+            # 'A' and 'B'.
+            # See 3piu chain A residue 273:
+            #   <Residue LLP het=H_LLP resseq=273 icode= >
+            #   <Residue LYS het=  resseq=273 icode= >
+            # DSSP uses the HETATM LLP as it has altloc 'A'
+            # We check the altloc code here.
+            elif res.is_disordered() == 1:
+                # Check altloc of all atoms in the DisorderedResidue. If it
+                # contains blank, A or 1, then use it.  Otherwise, look for HET
+                # residues of the same seq+icode.  If not such HET residues are
+                # found, just accept the current one.
+                altlocs = {a.get_altloc() for a in res.get_unpacked_list()}
+                if altlocs.isdisjoint("A1 "):
+                    # Try again with all HETATM other than water
+                    res_seq_icode = resid2code(res_id)
+                    for r in chain:
+                        if r.id[0] not in (" ", "W"):
+                            if resid2code(r.id) == res_seq_icode and r.get_list()[
+                                0
+                            ].get_altloc() in tuple("A1 "):
+                                res = r
+                                break
+
+            (
+                aa,
+                ss,
+                acc,
+                phi,
+                psi,
+                dssp_index,
+                NH_O_1_relidx,
+                NH_O_1_energy,
+                O_NH_1_relidx,
+                O_NH_1_energy,
+                NH_O_2_relidx,
+                NH_O_2_energy,
+                O_NH_2_relidx,
+                O_NH_2_energy,
+            ) = dssp_dict[key]
+
+            res.xtra["SS_DSSP"] = ss
+            res.xtra["EXP_DSSP_ASA"] = acc
+            res.xtra["PHI_DSSP"] = phi
+            res.xtra["PSI_DSSP"] = psi
+            res.xtra["DSSP_INDEX"] = dssp_index
+            res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx
+            res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy
+            res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx
+            res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy
+            res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx
+            res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy
+            res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx
+            res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy
+
+            # Relative accessibility
+            resname = res.get_resname()
+            try:
+                rel_acc = acc / self.residue_max_acc[resname]
+            except KeyError:
+                # Invalid value for resname
+                rel_acc = "NA"
+            else:
+                if rel_acc > 1.0:
+                    rel_acc = 1.0
+            res.xtra["EXP_DSSP_RASA"] = rel_acc
+            # Verify if AA in DSSP == AA in Structure
+            # Something went wrong if this is not true!
+            # NB: DSSP uses X often
+            try:
+                resname = three_to_one(resname)
+            except KeyError:
+                resname = "X"
+            if resname == "C":
+                # DSSP renames C in C-bridges to a,b,c,d,...
+                # - we rename it back to 'C'
+                if _dssp_cys.match(aa):
+                    aa = "C"
+            # Take care of HETATM again
+            if (resname != aa) and (res.id[0] == " " or aa != "X"):
+                raise PDBException("Structure/DSSP mismatch at %s" % res)
+
+            dssp_vals = (
+                dssp_index,
+                aa,
+                ss,
+                rel_acc,
+                phi,
+                psi,
+                NH_O_1_relidx,
+                NH_O_1_energy,
+                O_NH_1_relidx,
+                O_NH_1_energy,
+                NH_O_2_relidx,
+                NH_O_2_energy,
+                O_NH_2_relidx,
+                O_NH_2_energy,
+            )
+
+            dssp_map[(chain_id, res_id)] = dssp_vals
+            dssp_list.append(dssp_vals)
+
+        if file_type == "MMCIF":
+            dssp_keys = dssp_mapped_keys
+        AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
diff --git a/code/lib/Bio/PDB/Dice.py b/code/lib/Bio/PDB/Dice.py
new file mode 100644
index 0000000..44045e2
--- /dev/null
+++ b/code/lib/Bio/PDB/Dice.py
@@ -0,0 +1,78 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Code for chopping up (dicing) a structure.
+
+This module is used internally by the Bio.PDB.extract() function.
+"""
+
+import re
+import warnings
+
+from Bio.PDB.PDBIO import PDBIO
+from Bio import BiopythonWarning
+
+_hydrogen = re.compile("[123 ]*H.*")
+
+
+class ChainSelector:
+    """Only accepts residues with right chainid, between start and end.
+
+    Remove hydrogens, waters and ligands. Only use model 0 by default.
+    """
+
+    def __init__(self, chain_id, start, end, model_id=0):
+        """Initialize the class."""
+        self.chain_id = chain_id
+        self.start = start
+        self.end = end
+        self.model_id = model_id
+
+    def accept_model(self, model):
+        """Verify if model match the model identifier."""
+        # model - only keep model 0
+        if model.get_id() == self.model_id:
+            return 1
+        return 0
+
+    def accept_chain(self, chain):
+        """Verify if chain match chain identifier."""
+        if chain.get_id() == self.chain_id:
+            return 1
+        return 0
+
+    def accept_residue(self, residue):
+        """Verify if a residue sequence is between the start and end sequence."""
+        # residue - between start and end
+        hetatm_flag, resseq, icode = residue.get_id()
+        if hetatm_flag != " ":
+            # skip HETATMS
+            return 0
+        if icode != " ":
+            warnings.warn(
+                "WARNING: Icode %s at position %s" % (icode, resseq), BiopythonWarning
+            )
+        if self.start <= resseq <= self.end:
+            return 1
+        return 0
+
+    def accept_atom(self, atom):
+        """Verify if atoms are not Hydrogen."""
+        # atoms - get rid of hydrogens
+        name = atom.get_id()
+        if _hydrogen.match(name):
+            return 0
+        else:
+            return 1
+
+
+def extract(structure, chain_id, start, end, filename):
+    """Write out selected portion to filename."""
+    sel = ChainSelector(chain_id, start, end)
+    io = PDBIO()
+    io.set_structure(structure)
+    io.save(filename, sel)
diff --git a/code/lib/Bio/PDB/Entity.py b/code/lib/Bio/PDB/Entity.py
new file mode 100644
index 0000000..5feb70f
--- /dev/null
+++ b/code/lib/Bio/PDB/Entity.py
@@ -0,0 +1,499 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Base class for Residue, Chain, Model and Structure classes.
+
+It is a simple container class, with list and dictionary like properties.
+"""
+
+from collections import deque
+from copy import copy
+
+import numpy as np
+
+from Bio.PDB.PDBExceptions import PDBConstructionException
+
+
+class Entity:
+    """Basic container object for PDB heirachy.
+
+    Structure, Model, Chain and Residue are subclasses of Entity.
+    It deals with storage and lookup.
+    """
+
+    def __init__(self, id):
+        """Initialize the class."""
+        self._id = id
+        self.full_id = None
+        self.parent = None
+        self.child_list = []
+        self.child_dict = {}
+        # Dictionary that keeps additional properties
+        self.xtra = {}
+
+    # Special methods
+
+    def __len__(self):
+        """Return the number of children."""
+        return len(self.child_list)
+
+    def __getitem__(self, id):
+        """Return the child with given id."""
+        return self.child_dict[id]
+
+    def __delitem__(self, id):
+        """Remove a child."""
+        return self.detach_child(id)
+
+    def __contains__(self, id):
+        """Check if there is a child element with the given id."""
+        return id in self.child_dict
+
+    def __iter__(self):
+        """Iterate over children."""
+        yield from self.child_list
+
+    # Generic id-based comparison methods considers all parents as well as children
+    # Works for all Entities - Atoms have comparable custom operators
+    def __eq__(self, other):
+        """Test for equality. This compares full_id including the IDs of all parents."""
+        if isinstance(other, type(self)):
+            if self.parent is None:
+                return self.id == other.id
+            else:
+                return self.full_id[1:] == other.full_id[1:]
+        else:
+            return NotImplemented
+
+    def __ne__(self, other):
+        """Test for inequality."""
+        if isinstance(other, type(self)):
+            if self.parent is None:
+                return self.id != other.id
+            else:
+                return self.full_id[1:] != other.full_id[1:]
+        else:
+            return NotImplemented
+
+    def __gt__(self, other):
+        """Test greater than."""
+        if isinstance(other, type(self)):
+            if self.parent is None:
+                return self.id > other.id
+            else:
+                return self.full_id[1:] > other.full_id[1:]
+        else:
+            return NotImplemented
+
+    def __ge__(self, other):
+        """Test greater or equal."""
+        if isinstance(other, type(self)):
+            if self.parent is None:
+                return self.id >= other.id
+            else:
+                return self.full_id[1:] >= other.full_id[1:]
+        else:
+            return NotImplemented
+
+    def __lt__(self, other):
+        """Test less than."""
+        if isinstance(other, type(self)):
+            if self.parent is None:
+                return self.id < other.id
+            else:
+                return self.full_id[1:] < other.full_id[1:]
+        else:
+            return NotImplemented
+
+    def __le__(self, other):
+        """Test less or equal."""
+        if isinstance(other, type(self)):
+            if self.parent is None:
+                return self.id <= other.id
+            else:
+                return self.full_id[1:] <= other.full_id[1:]
+        else:
+            return NotImplemented
+
+    def __hash__(self):
+        """Hash method to allow uniqueness (set)."""
+        return hash(self.full_id)
+
+    # Private methods
+
+    def _reset_full_id(self):
+        """Reset the full_id (PRIVATE).
+
+        Resets the full_id of this entity and
+        recursively of all its children based on their ID.
+        """
+        for child in self:
+            try:
+                child._reset_full_id()
+            except AttributeError:
+                pass  # Atoms do not cache their full ids.
+        self.full_id = self._generate_full_id()
+
+    def _generate_full_id(self):
+        """Generate full_id (PRIVATE).
+
+        Generate the full_id of the Entity based on its
+        Id and the IDs of the parents.
+        """
+        entity_id = self.get_id()
+        parts = [entity_id]
+        parent = self.get_parent()
+        while parent is not None:
+            entity_id = parent.get_id()
+            parts.append(entity_id)
+            parent = parent.get_parent()
+        parts.reverse()
+        return tuple(parts)
+
+    # Public methods
+
+    @property
+    def id(self):
+        """Return identifier."""
+        return self._id
+
+    @id.setter
+    def id(self, value):
+        """Change the id of this entity.
+
+        This will update the child_dict of this entity's parent
+        and invalidate all cached full ids involving this entity.
+
+        @raises: ValueError
+        """
+        if value == self._id:
+            return
+        if self.parent:
+            if value in self.parent.child_dict:
+                raise ValueError(
+                    f"Cannot change id from `{self._id}` to `{value}`."
+                    f" The id `{value}` is already used for a sibling of this entity."
+                )
+            del self.parent.child_dict[self._id]
+            self.parent.child_dict[value] = self
+
+        self._id = value
+        self._reset_full_id()
+
+    def get_level(self):
+        """Return level in hierarchy.
+
+        A - atom
+        R - residue
+        C - chain
+        M - model
+        S - structure
+        """
+        return self.level
+
+    def set_parent(self, entity):
+        """Set the parent Entity object."""
+        self.parent = entity
+        self._reset_full_id()
+
+    def detach_parent(self):
+        """Detach the parent."""
+        self.parent = None
+
+    def detach_child(self, id):
+        """Remove a child."""
+        child = self.child_dict[id]
+        child.detach_parent()
+        del self.child_dict[id]
+        self.child_list.remove(child)
+
+    def add(self, entity):
+        """Add a child to the Entity."""
+        entity_id = entity.get_id()
+        if self.has_id(entity_id):
+            raise PDBConstructionException(f"{entity_id} defined twice")
+        entity.set_parent(self)
+        self.child_list.append(entity)
+        self.child_dict[entity_id] = entity
+
+    def insert(self, pos, entity):
+        """Add a child to the Entity at a specified position."""
+        entity_id = entity.get_id()
+        if self.has_id(entity_id):
+            raise PDBConstructionException(f"{entity_id} defined twice")
+        entity.set_parent(self)
+        self.child_list[pos:pos] = [entity]
+        self.child_dict[entity_id] = entity
+
+    def get_iterator(self):
+        """Return iterator over children."""
+        yield from self.child_list
+
+    def get_list(self):
+        """Return a copy of the list of children."""
+        return copy(self.child_list)
+
+    def has_id(self, id):
+        """Check if a child with given id exists."""
+        return id in self.child_dict
+
+    def get_parent(self):
+        """Return the parent Entity object."""
+        return self.parent
+
+    def get_id(self):
+        """Return the id."""
+        return self.id
+
+    def get_full_id(self):
+        """Return the full id.
+
+        The full id is a tuple containing all id's starting from
+        the top object (Structure) down to the current object. A full id for
+        a Residue object e.g. is something like:
+
+        ("1abc", 0, "A", (" ", 10, "A"))
+
+        This corresponds to:
+
+        Structure with id "1abc"
+        Model with id 0
+        Chain with id "A"
+        Residue with id (" ", 10, "A")
+
+        The Residue id indicates that the residue is not a hetero-residue
+        (or a water) because it has a blank hetero field, that its sequence
+        identifier is 10 and its insertion code "A".
+        """
+        if self.full_id is None:
+            self.full_id = self._generate_full_id()
+        return self.full_id
+
+    def transform(self, rot, tran):
+        """Apply rotation and translation to the atomic coordinates.
+
+        :param rot: A right multiplying rotation matrix
+        :type rot: 3x3 Numeric array
+
+        :param tran: the translation vector
+        :type tran: size 3 Numeric array
+
+        Examples
+        --------
+        This is an incomplete but illustrative example::
+
+            from numpy import pi, array
+            from Bio.PDB.vectors import Vector, rotmat
+            rotation = rotmat(pi, Vector(1, 0, 0))
+            translation = array((0, 0, 1), 'f')
+            entity.transform(rotation, translation)
+
+        """
+        for o in self.get_list():
+            o.transform(rot, tran)
+
+    def center_of_mass(self, geometric=False):
+        """Return the center of mass of the Entity as a numpy array.
+
+        If geometric is True, returns the center of geometry instead.
+        """
+        # Recursively iterate through children until we get all atom coordinates
+
+        if not len(self):
+            raise ValueError(f"{self} does not have children")
+
+        maybe_disordered = {"R", "C"}  # to know when to use get_unpacked_list
+        only_atom_level = {"A"}
+
+        entities = deque([self])  # start with [self] to avoid auto-unpacking
+        while True:
+            e = entities.popleft()
+            if e.level in maybe_disordered:
+                entities += e.get_unpacked_list()
+            else:
+                entities += e.child_list
+
+            elevels = {e.level for e in entities}
+            if elevels == only_atom_level:
+                break  # nothing else to unpack
+
+        coords = np.asarray([a.coord for a in entities], dtype=np.float32)
+        if geometric:
+            masses = None
+        else:
+            masses = np.asarray([a.mass for a in entities], dtype=np.float32)
+
+        return np.average(coords, axis=0, weights=masses)
+
+    def copy(self):
+        """Copy entity recursively."""
+        shallow = copy(self)
+
+        shallow.child_list = []
+        shallow.child_dict = {}
+        shallow.xtra = copy(self.xtra)
+
+        shallow.detach_parent()
+
+        for child in self.child_list:
+            shallow.add(child.copy())
+        return shallow
+
+
+class DisorderedEntityWrapper:
+    """Wrapper class to group equivalent Entities.
+
+    This class is a simple wrapper class that groups a number of equivalent
+    Entities and forwards all method calls to one of them (the currently selected
+    object). DisorderedResidue and DisorderedAtom are subclasses of this class.
+
+    E.g.: A DisorderedAtom object contains a number of Atom objects,
+    where each Atom object represents a specific position of a disordered
+    atom in the structure.
+    """
+
+    def __init__(self, id):
+        """Initialize the class."""
+        self.id = id
+        self.child_dict = {}
+        self.selected_child = None
+        self.parent = None
+
+    # Special methods
+
+    def __getattr__(self, method):
+        """Forward the method call to the selected child."""
+        if method == "__setstate__":
+            # Avoid issues with recursion when attempting deepcopy
+            raise AttributeError
+        if not hasattr(self, "selected_child"):
+            # Avoid problems with pickling
+            # Unpickling goes into infinite loop!
+            raise AttributeError
+        return getattr(self.selected_child, method)
+
+    def __getitem__(self, id):
+        """Return the child with the given id."""
+        return self.selected_child[id]
+
+    # XXX Why doesn't this forward to selected_child?
+    # (NB: setitem was here before getitem, iter, len, sub)
+    def __setitem__(self, id, child):
+        """Add a child, associated with a certain id."""
+        self.child_dict[id] = child
+
+    def __contains__(self, id):
+        """Check if the child has the given id."""
+        return id in self.selected_child
+
+    def __iter__(self):
+        """Return the number of children."""
+        return iter(self.selected_child)
+
+    def __len__(self):
+        """Return the number of children."""
+        return len(self.selected_child)
+
+    def __sub__(self, other):
+        """Subtraction with another object."""
+        return self.selected_child - other
+
+    # Sorting
+    # Directly compare the selected child
+    def __gt__(self, other):
+        """Return if child is greater than other."""
+        return self.selected_child > other
+
+    def __ge__(self, other):
+        """Return if child is greater or equal than other."""
+        return self.selected_child >= other
+
+    def __lt__(self, other):
+        """Return if child is less than other."""
+        return self.selected_child < other
+
+    def __le__(self, other):
+        """Return if child is less or equal than other."""
+        return self.selected_child <= other
+
+    # Public methods
+    def copy(self):
+        """Copy disorderd entity recursively."""
+        shallow = copy(self)
+        shallow.child_dict = {}
+        shallow.detach_parent()
+
+        for child in self.disordered_get_list():
+            shallow.disordered_add(child.copy())
+
+        return shallow
+
+    def get_id(self):
+        """Return the id."""
+        return self.id
+
+    def disordered_has_id(self, id):
+        """Check if there is an object present associated with this id."""
+        return id in self.child_dict
+
+    def detach_parent(self):
+        """Detach the parent."""
+        self.parent = None
+        for child in self.disordered_get_list():
+            child.detach_parent()
+
+    def get_parent(self):
+        """Return parent."""
+        return self.parent
+
+    def set_parent(self, parent):
+        """Set the parent for the object and its children."""
+        self.parent = parent
+        for child in self.disordered_get_list():
+            child.set_parent(parent)
+
+    def disordered_select(self, id):
+        """Select the object with given id as the currently active object.
+
+        Uncaught method calls are forwarded to the selected child object.
+        """
+        self.selected_child = self.child_dict[id]
+
+    def disordered_add(self, child):
+        """Add disordered entry.
+
+        This is implemented by DisorderedAtom and DisorderedResidue.
+        """
+        raise NotImplementedError
+
+    def disordered_remove(self, child):
+        """Remove disordered entry.
+
+        This is implemented by DisorderedAtom and DisorderedResidue.
+        """
+        raise NotImplementedError
+
+    def is_disordered(self):
+        """Return 2, indicating that this Entity is a collection of Entities."""
+        return 2
+
+    def disordered_get_id_list(self):
+        """Return a list of id's."""
+        # sort id list alphabetically
+        return sorted(self.child_dict)
+
+    def disordered_get(self, id=None):
+        """Get the child object associated with id.
+
+        If id is None, the currently selected child is returned.
+        """
+        if id is None:
+            return self.selected_child
+        return self.child_dict[id]
+
+    def disordered_get_list(self):
+        """Return list of children."""
+        return list(self.child_dict.values())
diff --git a/code/lib/Bio/PDB/FragmentMapper.py b/code/lib/Bio/PDB/FragmentMapper.py
new file mode 100644
index 0000000..8573372
--- /dev/null
+++ b/code/lib/Bio/PDB/FragmentMapper.py
@@ -0,0 +1,326 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classify protein backbone structure with Kolodny et al's fragment libraries.
+
+It can be regarded as a form of objective secondary structure classification.
+Only fragments of length 5 or 7 are supported (ie. there is a 'central'
+residue).
+
+Full reference:
+
+Kolodny R, Koehl P, Guibas L, Levitt M.
+Small libraries of protein fragments model native protein structures accurately.
+J Mol Biol. 2002 323(2):297-307.
+
+The definition files of the fragments can be obtained from:
+
+http://github.com/csblab/fragments/
+
+You need these files to use this module.
+
+The following example uses the library with 10 fragments of length 5.
+The library files can be found in directory 'fragment_data'.
+
+    >>> from Bio.PDB.PDBParser import PDBParser
+    >>> from Bio.PDB.FragmentMapper import FragmentMapper
+    >>> parser = PDBParser()
+    >>> structure = parser.get_structure("1a8o", "PDB/1A8O.pdb")
+    >>> model = structure[0]
+    >>> fm = FragmentMapper(model, lsize=10, flength=5, fdir="PDB")
+    >>> chain = model['A']
+    >>> res152 = chain[152]
+    >>> res157 = chain[157]
+    >>> res152 in fm # is res152 mapped? (fragment of a C-alpha polypeptide)
+    False
+    >>> res157 in fm # is res157 mapped? (fragment of a C-alpha polypeptide)
+    True
+
+"""
+
+
+import numpy
+
+from Bio.SVDSuperimposer import SVDSuperimposer
+
+from Bio.PDB.PDBExceptions import PDBException
+from Bio.PDB.Polypeptide import PPBuilder
+
+
+# fragment file (lib_SIZE_z_LENGTH.txt)
+# SIZE=number of fragments
+# LENGTH=length of fragment (4,5,6,7)
+_FRAGMENT_FILE = "lib_%s_z_%s.txt"
+
+
+def _read_fragments(size, length, dir="."):
+    """Read a fragment spec file (PRIVATE).
+
+    Read a fragment spec file available from
+    http://github.com/csblab/fragments/
+    and return a list of Fragment objects.
+
+    :param size: number of fragments in the library
+    :type size: int
+
+    :param length: length of the fragments
+    :type length: int
+
+    :param dir: directory where the fragment spec files can be found
+    :type dir: string
+    """
+    filename = (dir + "/" + _FRAGMENT_FILE) % (size, length)
+    with open(filename) as fp:
+        flist = []
+        # ID of fragment=rank in spec file
+        fid = 0
+        for l in fp:
+            # skip comment and blank lines
+            if l[0] == "*" or l[0] == "\n":
+                continue
+            sl = l.split()
+            if sl[1] == "------":
+                # Start of fragment definition
+                f = Fragment(length, fid)
+                flist.append(f)
+                # increase fragment id (rank)
+                fid += 1
+                continue
+            # Add CA coord to Fragment
+            coord = numpy.array([float(x) for x in sl[0:3]])
+            # XXX= dummy residue name
+            f.add_residue("XXX", coord)
+    return flist
+
+
+class Fragment:
+    """Represent a polypeptide C-alpha fragment."""
+
+    def __init__(self, length, fid):
+        """Initialize fragment object.
+
+        :param length: length of the fragment
+        :type length: int
+
+        :param fid: id for the fragment
+        :type fid: int
+        """
+        # nr of residues in fragment
+        self.length = length
+        # nr of residues added
+        self.counter = 0
+        self.resname_list = []
+        # CA coordinate matrix
+        self.coords_ca = numpy.zeros((length, 3), "d")
+        self.fid = fid
+
+    def get_resname_list(self):
+        """Get residue list.
+
+        :return: the residue names
+        :rtype: [string, string,...]
+        """
+        return self.resname_list
+
+    def get_id(self):
+        """Get identifier for the fragment.
+
+        :return: id for the fragment
+        :rtype: int
+        """
+        return self.fid
+
+    def get_coords(self):
+        """Get the CA coordinates in the fragment.
+
+        :return: the CA coords in the fragment
+        :rtype: Numeric (Nx3) array
+        """
+        return self.coords_ca
+
+    def add_residue(self, resname, ca_coord):
+        """Add a residue.
+
+        :param resname: residue name (eg. GLY).
+        :type resname: string
+
+        :param ca_coord: the c-alpha coorinates of the residues
+        :type ca_coord: Numeric array with length 3
+        """
+        if self.counter >= self.length:
+            raise PDBException("Fragment boundary exceeded.")
+        self.resname_list.append(resname)
+        self.coords_ca[self.counter] = ca_coord
+        self.counter = self.counter + 1
+
+    def __len__(self):
+        """Return lengt of the fragment."""
+        return self.length
+
+    def __sub__(self, other):
+        """Return rmsd between two fragments.
+
+        :return: rmsd between fragments
+        :rtype: float
+
+        Examples
+        --------
+        This is an incomplete but illustrative example::
+
+            rmsd = fragment1 - fragment2
+
+        """
+        sup = SVDSuperimposer()
+        sup.set(self.coords_ca, other.coords_ca)
+        sup.run()
+        return sup.get_rms()
+
+    def __repr__(self):
+        """Represent the fragment object as a string.
+
+        Returns <Fragment length=L id=ID> where L=length of fragment
+        and ID the identifier (rank in the library).
+        """
+        return "<Fragment length=%i id=%i>" % (self.length, self.fid)
+
+
+def _make_fragment_list(pp, length):
+    """Dice up a peptide in fragments of length "length" (PRIVATE).
+
+    :param pp: a list of residues (part of one peptide)
+    :type pp: [L{Residue}, L{Residue}, ...]
+
+    :param length: fragment length
+    :type length: int
+    """
+    frag_list = []
+    for i in range(0, len(pp) - length + 1):
+        f = Fragment(length, -1)
+        for j in range(0, length):
+            residue = pp[i + j]
+            resname = residue.get_resname()
+            if residue.has_id("CA"):
+                ca = residue["CA"]
+            else:
+                raise PDBException("CHAINBREAK")
+            if ca.is_disordered():
+                raise PDBException("CHAINBREAK")
+            ca_coord = ca.get_coord()
+            f.add_residue(resname, ca_coord)
+        frag_list.append(f)
+    return frag_list
+
+
+def _map_fragment_list(flist, reflist):
+    """Map flist fragments to closest entry in reflist (PRIVATE).
+
+    Map all frgaments in flist to the closest (in RMSD) fragment in reflist.
+
+    Returns a list of reflist indices.
+
+    :param flist: list of protein fragments
+    :type flist: [L{Fragment}, L{Fragment}, ...]
+
+    :param reflist: list of reference (ie. library) fragments
+    :type reflist: [L{Fragment}, L{Fragment}, ...]
+    """
+    mapped = []
+    for f in flist:
+        rank = []
+        for i in range(0, len(reflist)):
+            rf = reflist[i]
+            rms = f - rf
+            rank.append((rms, rf))
+        rank.sort()
+        fragment = rank[0][1]
+        mapped.append(fragment)
+    return mapped
+
+
+class FragmentMapper:
+    """Map polypeptides in a model to lists of representative fragments."""
+
+    def __init__(self, model, lsize=20, flength=5, fdir="."):
+        """Create instance of FragmentMapper.
+
+        :param model: the model that will be mapped
+        :type model: L{Model}
+
+        :param lsize: number of fragments in the library
+        :type lsize: int
+
+        :param flength: length of fragments in the library
+        :type flength: int
+
+        :param fdir: directory where the definition files are
+                     found (default=".")
+        :type fdir: string
+        """
+        if flength == 5:
+            self.edge = 2
+        elif flength == 7:
+            self.edge = 3
+        else:
+            raise PDBException("Fragment length should be 5 or 7.")
+        self.flength = flength
+        self.lsize = lsize
+        self.reflist = _read_fragments(lsize, flength, fdir)
+        self.model = model
+        self.fd = self._map(self.model)
+
+    def _map(self, model):
+        """Map (PRIVATE).
+
+        :param model: the model that will be mapped
+        :type model: L{Model}
+        """
+        ppb = PPBuilder()
+        ppl = ppb.build_peptides(model)
+        fd = {}
+        for pp in ppl:
+            try:
+                # make fragments
+                flist = _make_fragment_list(pp, self.flength)
+                # classify fragments
+                mflist = _map_fragment_list(flist, self.reflist)
+                for i in range(0, len(pp)):
+                    res = pp[i]
+                    if i < self.edge:
+                        # start residues
+                        continue
+                    elif i >= (len(pp) - self.edge):
+                        # end residues
+                        continue
+                    else:
+                        # fragment
+                        index = i - self.edge
+                        assert index >= 0
+                        fd[res] = mflist[index]
+            except PDBException as why:
+                if why == "CHAINBREAK":
+                    # Funny polypeptide - skip
+                    pass
+                else:
+                    raise PDBException(why) from None
+        return fd
+
+    def __contains__(self, res):
+        """Check if the given residue is in any of the mapped fragments.
+
+        :type res: L{Residue}
+        """
+        return res in self.fd
+
+    def __getitem__(self, res):
+        """Get an entry.
+
+        :type res: L{Residue}
+
+        :return: fragment classification
+        :rtype: L{Fragment}
+        """
+        return self.fd[res]
diff --git a/code/lib/Bio/PDB/HSExposure.py b/code/lib/Bio/PDB/HSExposure.py
new file mode 100644
index 0000000..af4ca46
--- /dev/null
+++ b/code/lib/Bio/PDB/HSExposure.py
@@ -0,0 +1,327 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Half-sphere exposure and coordination number calculation."""
+
+
+import warnings
+from math import pi
+
+from Bio.PDB.AbstractPropertyMap import AbstractPropertyMap
+from Bio.PDB.Polypeptide import CaPPBuilder, is_aa
+from Bio.PDB.vectors import rotaxis
+
+
+class _AbstractHSExposure(AbstractPropertyMap):
+    """Abstract class to calculate Half-Sphere Exposure (HSE).
+
+    The HSE can be calculated based on the CA-CB vector, or the pseudo CB-CA
+    vector based on three consecutive CA atoms. This is done by two separate
+    subclasses.
+    """
+
+    def __init__(self, model, radius, offset, hse_up_key, hse_down_key, angle_key=None):
+        """Initialize class.
+
+        :param model: model
+        :type model: L{Model}
+
+        :param radius: HSE radius
+        :type radius: float
+
+        :param offset: number of flanking residues that are ignored in the
+                       calculation of the number of neighbors
+        :type offset: int
+
+        :param hse_up_key: key used to store HSEup in the entity.xtra attribute
+        :type hse_up_key: string
+
+        :param hse_down_key: key used to store HSEdown in the entity.xtra attribute
+        :type hse_down_key: string
+
+        :param angle_key: key used to store the angle between CA-CB and CA-pCB in
+                          the entity.xtra attribute
+        :type angle_key: string
+        """
+        assert offset >= 0
+        # For PyMOL visualization
+        self.ca_cb_list = []
+        ppb = CaPPBuilder()
+        ppl = ppb.build_peptides(model)
+        hse_map = {}
+        hse_list = []
+        hse_keys = []
+        for pp1 in ppl:
+            for i in range(0, len(pp1)):
+                if i == 0:
+                    r1 = None
+                else:
+                    r1 = pp1[i - 1]
+                r2 = pp1[i]
+                if i == len(pp1) - 1:
+                    r3 = None
+                else:
+                    r3 = pp1[i + 1]
+                # This method is provided by the subclasses to calculate HSE
+                result = self._get_cb(r1, r2, r3)
+                if result is None:
+                    # Missing atoms, or i==0, or i==len(pp1)-1
+                    continue
+                pcb, angle = result
+                hse_u = 0
+                hse_d = 0
+                ca2 = r2["CA"].get_vector()
+                for pp2 in ppl:
+                    for j in range(0, len(pp2)):
+                        if pp1 is pp2 and abs(i - j) <= offset:
+                            # neighboring residues in the chain are ignored
+                            continue
+                        ro = pp2[j]
+                        if not is_aa(ro) or not ro.has_id("CA"):
+                            continue
+                        cao = ro["CA"].get_vector()
+                        d = cao - ca2
+                        if d.norm() < radius:
+                            if d.angle(pcb) < (pi / 2):
+                                hse_u += 1
+                            else:
+                                hse_d += 1
+                res_id = r2.get_id()
+                chain_id = r2.get_parent().get_id()
+                # Fill the 3 data structures
+                hse_map[(chain_id, res_id)] = (hse_u, hse_d, angle)
+                hse_list.append((r2, (hse_u, hse_d, angle)))
+                hse_keys.append((chain_id, res_id))
+                # Add to xtra
+                r2.xtra[hse_up_key] = hse_u
+                r2.xtra[hse_down_key] = hse_d
+                if angle_key:
+                    r2.xtra[angle_key] = angle
+        AbstractPropertyMap.__init__(self, hse_map, hse_keys, hse_list)
+
+    def _get_cb(self, r1, r2, r3):
+        return NotImplemented
+
+    def _get_gly_cb_vector(self, residue):
+        """Return a pseudo CB vector for a Gly residue (PRIVATE).
+
+        The pseudoCB vector is centered at the origin.
+
+        CB coord=N coord rotated over -120 degrees
+        along the CA-C axis.
+        """
+        try:
+            n_v = residue["N"].get_vector()
+            c_v = residue["C"].get_vector()
+            ca_v = residue["CA"].get_vector()
+        except Exception:
+            return None
+        # center at origin
+        n_v = n_v - ca_v
+        c_v = c_v - ca_v
+        # rotation around c-ca over -120 deg
+        rot = rotaxis(-pi * 120.0 / 180.0, c_v)
+        cb_at_origin_v = n_v.left_multiply(rot)
+        # move back to ca position
+        cb_v = cb_at_origin_v + ca_v
+        # This is for PyMol visualization
+        self.ca_cb_list.append((ca_v, cb_v))
+        return cb_at_origin_v
+
+
+class HSExposureCA(_AbstractHSExposure):
+    """Class to calculate HSE based on the approximate CA-CB vectors.
+
+    Uses three consecutive CA positions.
+    """
+
+    def __init__(self, model, radius=12, offset=0):
+        """Initialize class.
+
+        :param model: the model that contains the residues
+        :type model: L{Model}
+
+        :param radius: radius of the sphere (centred at the CA atom)
+        :type radius: float
+
+        :param offset: number of flanking residues that are ignored
+                       in the calculation of the number of neighbors
+        :type offset: int
+        """
+        _AbstractHSExposure.__init__(
+            self,
+            model,
+            radius,
+            offset,
+            "EXP_HSE_A_U",
+            "EXP_HSE_A_D",
+            "EXP_CB_PCB_ANGLE",
+        )
+
+    def _get_cb(self, r1, r2, r3):
+        """Calculate approx CA-CB direction (PRIVATE).
+
+        Calculate the approximate CA-CB direction for a central
+        CA atom based on the two flanking CA positions, and the angle
+        with the real CA-CB vector.
+
+        The CA-CB vector is centered at the origin.
+
+        :param r1, r2, r3: three consecutive residues
+        :type r1, r2, r3: L{Residue}
+        """
+        if r1 is None or r3 is None:
+            return None
+        try:
+            ca1 = r1["CA"].get_vector()
+            ca2 = r2["CA"].get_vector()
+            ca3 = r3["CA"].get_vector()
+        except Exception:
+            return None
+        # center
+        d1 = ca2 - ca1
+        d3 = ca2 - ca3
+        d1.normalize()
+        d3.normalize()
+        # bisection
+        b = d1 + d3
+        b.normalize()
+        # Add to ca_cb_list for drawing
+        self.ca_cb_list.append((ca2, b + ca2))
+        if r2.has_id("CB"):
+            cb = r2["CB"].get_vector()
+            cb_ca = cb - ca2
+            cb_ca.normalize()
+            angle = cb_ca.angle(b)
+        elif r2.get_resname() == "GLY":
+            cb_ca = self._get_gly_cb_vector(r2)
+            if cb_ca is None:
+                angle = None
+            else:
+                angle = cb_ca.angle(b)
+        else:
+            angle = None
+        # vector b is centered at the origin!
+        return b, angle
+
+    def pcb_vectors_pymol(self, filename="hs_exp.py"):
+        """Write PyMol script for visualization.
+
+        Write a PyMol script that visualizes the pseudo CB-CA directions
+        at the CA coordinates.
+
+        :param filename: the name of the pymol script file
+        :type filename: string
+        """
+        if not self.ca_cb_list:
+            warnings.warn("Nothing to draw.", RuntimeWarning)
+            return
+        with open(filename, "w") as fp:
+            fp.write("from pymol.cgo import *\n")
+            fp.write("from pymol import cmd\n")
+            fp.write("obj=[\n")
+            fp.write("BEGIN, LINES,\n")
+            fp.write("COLOR, %.2f, %.2f, %.2f,\n" % (1.0, 1.0, 1.0))
+            for (ca, cb) in self.ca_cb_list:
+                x, y, z = ca.get_array()
+                fp.write("VERTEX, %.2f, %.2f, %.2f,\n" % (x, y, z))
+                x, y, z = cb.get_array()
+                fp.write("VERTEX, %.2f, %.2f, %.2f,\n" % (x, y, z))
+            fp.write("END]\n")
+            fp.write("cmd.load_cgo(obj, 'HS')\n")
+
+
+class HSExposureCB(_AbstractHSExposure):
+    """Class to calculate HSE based on the real CA-CB vectors."""
+
+    def __init__(self, model, radius=12, offset=0):
+        """Initialize class.
+
+        :param model: the model that contains the residues
+        :type model: L{Model}
+
+        :param radius: radius of the sphere (centred at the CA atom)
+        :type radius: float
+
+        :param offset: number of flanking residues that are ignored
+                       in the calculation of the number of neighbors
+        :type offset: int
+        """
+        _AbstractHSExposure.__init__(
+            self, model, radius, offset, "EXP_HSE_B_U", "EXP_HSE_B_D"
+        )
+
+    def _get_cb(self, r1, r2, r3):
+        """Calculate CB-CA vector (PRIVATE).
+
+        :param r1, r2, r3: three consecutive residues (only r2 is used)
+        :type r1, r2, r3: L{Residue}
+        """
+        if r2.get_resname() == "GLY":
+            return self._get_gly_cb_vector(r2), 0.0
+        else:
+            if r2.has_id("CB") and r2.has_id("CA"):
+                vcb = r2["CB"].get_vector()
+                vca = r2["CA"].get_vector()
+                return (vcb - vca), 0.0
+        return None
+
+
+class ExposureCN(AbstractPropertyMap):
+    """Residue exposure as number of CA atoms around its CA atom."""
+
+    def __init__(self, model, radius=12.0, offset=0):
+        """Initialize class.
+
+        A residue's exposure is defined as the number of CA atoms around
+        that residue's CA atom. A dictionary is returned that uses a L{Residue}
+        object as key, and the residue exposure as corresponding value.
+
+        :param model: the model that contains the residues
+        :type model: L{Model}
+
+        :param radius: radius of the sphere (centred at the CA atom)
+        :type radius: float
+
+        :param offset: number of flanking residues that are ignored in
+                       the calculation of the number of neighbors
+        :type offset: int
+
+        """
+        assert offset >= 0
+        ppb = CaPPBuilder()
+        ppl = ppb.build_peptides(model)
+        fs_map = {}
+        fs_list = []
+        fs_keys = []
+        for pp1 in ppl:
+            for i in range(0, len(pp1)):
+                fs = 0
+                r1 = pp1[i]
+                if not is_aa(r1) or not r1.has_id("CA"):
+                    continue
+                ca1 = r1["CA"]
+                for pp2 in ppl:
+                    for j in range(0, len(pp2)):
+                        if pp1 is pp2 and abs(i - j) <= offset:
+                            continue
+                        r2 = pp2[j]
+                        if not is_aa(r2) or not r2.has_id("CA"):
+                            continue
+                        ca2 = r2["CA"]
+                        d = ca2 - ca1
+                        if d < radius:
+                            fs += 1
+                res_id = r1.get_id()
+                chain_id = r1.get_parent().get_id()
+                # Fill the 3 data structures
+                fs_map[(chain_id, res_id)] = fs
+                fs_list.append((r1, fs))
+                fs_keys.append((chain_id, res_id))
+                # Add to xtra
+                r1.xtra["EXP_CN"] = fs
+        AbstractPropertyMap.__init__(self, fs_map, fs_keys, fs_list)
diff --git a/code/lib/Bio/PDB/MMCIF2Dict.py b/code/lib/Bio/PDB/MMCIF2Dict.py
new file mode 100644
index 0000000..f59dede
--- /dev/null
+++ b/code/lib/Bio/PDB/MMCIF2Dict.py
@@ -0,0 +1,128 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Turn an mmCIF file into a dictionary."""
+
+
+from Bio.File import as_handle
+
+
+class MMCIF2Dict(dict):
+    """Parse a mmCIF file and return a dictionary."""
+
+    def __init__(self, filename):
+        """Parse a mmCIF file and return a dictionary.
+
+        Arguments:
+         - file - name of the PDB file OR an open filehandle
+
+        """
+        self.quote_chars = ["'", '"']
+        self.whitespace_chars = [" ", "\t"]
+        with as_handle(filename) as handle:
+            loop_flag = False
+            key = None
+            tokens = self._tokenize(handle)
+            try:
+                token = next(tokens)
+            except StopIteration:
+                return  # for Python 3.7 and PEP 479
+            self[token[0:5]] = token[5:]
+            i = 0
+            n = 0
+            for token in tokens:
+                if token.lower() == "loop_":
+                    loop_flag = True
+                    keys = []
+                    i = 0
+                    n = 0
+                    continue
+                elif loop_flag:
+                    # The second condition checks we are in the first column
+                    # Some mmCIF files (e.g. 4q9r) have values in later columns
+                    # starting with an underscore and we don't want to read
+                    # these as keys
+                    if token.startswith("_") and (n == 0 or i % n == 0):
+                        if i > 0:
+                            loop_flag = False
+                        else:
+                            self[token] = []
+                            keys.append(token)
+                            n += 1
+                            continue
+                    else:
+                        self[keys[i % n]].append(token)
+                        i += 1
+                        continue
+                if key is None:
+                    key = token
+                else:
+                    self[key] = [token]
+                    key = None
+
+    # Private methods
+
+    def _splitline(self, line):
+        # See https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax for the syntax
+        in_token = False
+        # quote character of the currently open quote, or None if no quote open
+        quote_open_char = None
+        start_i = 0
+        for (i, c) in enumerate(line):
+            if c in self.whitespace_chars:
+                if in_token and not quote_open_char:
+                    in_token = False
+                    yield line[start_i:i]
+            elif c in self.quote_chars:
+                if not quote_open_char and not in_token:
+                    quote_open_char = c
+                    in_token = True
+                    start_i = i + 1
+                elif c == quote_open_char and (
+                    i + 1 == len(line) or line[i + 1] in self.whitespace_chars
+                ):
+                    quote_open_char = None
+                    in_token = False
+                    yield line[start_i:i]
+            elif c == "#" and not in_token:
+                # Skip comments. "#" is a valid non-comment char inside of a
+                # quote and inside of an unquoted token (!?!?), so we need to
+                # check that the current char is not in a token.
+                return
+            elif not in_token:
+                in_token = True
+                start_i = i
+        if in_token:
+            yield line[start_i:]
+        if quote_open_char:
+            raise ValueError("Line ended with quote open: " + line)
+
+    def _tokenize(self, handle):
+        empty = True
+        for line in handle:
+            empty = False
+            if line.startswith("#"):
+                continue
+            elif line.startswith(";"):
+                # The spec says that leading whitespace on each line must be
+                # preserved while trailing whitespace may be stripped.  The
+                # trailing newline must be stripped.
+                token_buffer = [line[1:].rstrip()]
+                for line in handle:
+                    line = line.rstrip()
+                    if line.startswith(";"):
+                        yield "\n".join(token_buffer)
+                        line = line[1:]
+                        if line and not line[0] in self.whitespace_chars:
+                            raise ValueError("Missing whitespace")
+                        break
+                    token_buffer.append(line)
+                else:
+                    raise ValueError("Missing closing semicolon")
+            yield from self._splitline(line.strip())
+        if empty:
+            raise ValueError("Empty file.")
diff --git a/code/lib/Bio/PDB/MMCIFParser.py b/code/lib/Bio/PDB/MMCIFParser.py
new file mode 100644
index 0000000..58ad045
--- /dev/null
+++ b/code/lib/Bio/PDB/MMCIFParser.py
@@ -0,0 +1,544 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""mmCIF parsers."""
+
+
+import numpy
+import warnings
+
+from Bio.File import as_handle
+
+from Bio.PDB.MMCIF2Dict import MMCIF2Dict
+from Bio.PDB.StructureBuilder import StructureBuilder
+from Bio.PDB.PDBExceptions import PDBConstructionException
+from Bio.PDB.PDBExceptions import PDBConstructionWarning
+
+
+class MMCIFParser:
+    """Parse a mmCIF file and return a Structure object."""
+
+    def __init__(self, structure_builder=None, QUIET=False):
+        """Create a PDBParser object.
+
+        The mmCIF parser calls a number of standard methods in an aggregated
+        StructureBuilder object. Normally this object is instanciated by the
+        MMCIParser object itself, but if the user provides his/her own
+        StructureBuilder object, the latter is used instead.
+
+        Arguments:
+         - structure_builder - an optional user implemented StructureBuilder class.
+         - QUIET - Evaluated as a Boolean. If true, warnings issued in constructing
+           the SMCRA data will be suppressed. If false (DEFAULT), they will be shown.
+           These warnings might be indicative of problems in the mmCIF file!
+
+        """
+        if structure_builder is not None:
+            self._structure_builder = structure_builder
+        else:
+            self._structure_builder = StructureBuilder()
+        self.header = None
+        # self.trailer = None
+        self.line_counter = 0
+        self.build_structure = None
+        self.QUIET = bool(QUIET)
+
+    # Public methods
+
+    def get_structure(self, structure_id, filename):
+        """Return the structure.
+
+        Arguments:
+         - structure_id - string, the id that will be used for the structure
+         - filename - name of mmCIF file, OR an open text mode file handle
+
+        """
+        with warnings.catch_warnings():
+            if self.QUIET:
+                warnings.filterwarnings("ignore", category=PDBConstructionWarning)
+            self._mmcif_dict = MMCIF2Dict(filename)
+            self._build_structure(structure_id)
+            self._structure_builder.set_header(self._get_header())
+
+        return self._structure_builder.get_structure()
+
+    # Private methods
+
+    def _mmcif_get(self, key, dict, deflt):
+        if key in dict:
+            rslt = dict[key][0]
+            if "?" != rslt:
+                return rslt
+        return deflt
+
+    def _update_header_entry(self, target_key, keys):
+        md = self._mmcif_dict
+        for key in keys:
+            val = md.get(key)
+            try:
+                item = val[0]
+            except (TypeError, IndexError):
+                continue
+            if item != "?":
+                self.header[target_key] = item
+                break
+
+    def _get_header(self):
+        self.header = {
+            "name": "",
+            "head": "",
+            "idcode": "",
+            "deposition_date": "",
+            "structure_method": "",
+            "resolution": None,
+        }
+
+        self._update_header_entry(
+            "idcode", ["_entry_id", "_exptl.entry_id", "_struct.entry_id"]
+        )
+        self._update_header_entry("name", ["_struct.title"])
+        self._update_header_entry(
+            "head", ["_struct_keywords.pdbx_keywords", "_struct_keywords.text"]
+        )
+        self._update_header_entry(
+            "deposition_date", ["_pdbx_database_status.recvd_initial_deposition_date"]
+        )
+        self._update_header_entry("structure_method", ["_exptl.method"])
+        self._update_header_entry(
+            "resolution",
+            [
+                "_refine.ls_d_res_high",
+                "_refine_hist.d_res_high",
+                "_em_3d_reconstruction.resolution",
+            ],
+        )
+        if self.header["resolution"] is not None:
+            try:
+                self.header["resolution"] = float(self.header["resolution"])
+            except ValueError:
+                self.header["resolution"] = None
+
+        return self.header
+
+    def _build_structure(self, structure_id):
+
+        # two special chars as placeholders in the mmCIF format
+        # for item values that cannot be explicitly assigned
+        # see: pdbx/mmcif syntax web page
+        _unassigned = {".", "?"}
+
+        mmcif_dict = self._mmcif_dict
+
+        atom_serial_list = mmcif_dict["_atom_site.id"]
+        atom_id_list = mmcif_dict["_atom_site.label_atom_id"]
+        residue_id_list = mmcif_dict["_atom_site.label_comp_id"]
+        try:
+            element_list = mmcif_dict["_atom_site.type_symbol"]
+        except KeyError:
+            element_list = None
+        chain_id_list = mmcif_dict["_atom_site.auth_asym_id"]
+        x_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_x"]]
+        y_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_y"]]
+        z_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_z"]]
+        alt_list = mmcif_dict["_atom_site.label_alt_id"]
+        icode_list = mmcif_dict["_atom_site.pdbx_PDB_ins_code"]
+        b_factor_list = mmcif_dict["_atom_site.B_iso_or_equiv"]
+        occupancy_list = mmcif_dict["_atom_site.occupancy"]
+        fieldname_list = mmcif_dict["_atom_site.group_PDB"]
+        try:
+            serial_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]]
+        except KeyError:
+            # No model number column
+            serial_list = None
+        except ValueError:
+            # Invalid model number (malformed file)
+            raise PDBConstructionException("Invalid model number") from None
+        try:
+            aniso_u11 = mmcif_dict["_atom_site_anisotrop.U[1][1]"]
+            aniso_u12 = mmcif_dict["_atom_site_anisotrop.U[1][2]"]
+            aniso_u13 = mmcif_dict["_atom_site_anisotrop.U[1][3]"]
+            aniso_u22 = mmcif_dict["_atom_site_anisotrop.U[2][2]"]
+            aniso_u23 = mmcif_dict["_atom_site_anisotrop.U[2][3]"]
+            aniso_u33 = mmcif_dict["_atom_site_anisotrop.U[3][3]"]
+            aniso_flag = 1
+        except KeyError:
+            # no anisotropic B factors
+            aniso_flag = 0
+        # if auth_seq_id is present, we use this.
+        # Otherwise label_seq_id is used.
+        if "_atom_site.auth_seq_id" in mmcif_dict:
+            seq_id_list = mmcif_dict["_atom_site.auth_seq_id"]
+        else:
+            seq_id_list = mmcif_dict["_atom_site.label_seq_id"]
+        # Now loop over atoms and build the structure
+        current_chain_id = None
+        current_residue_id = None
+        current_resname = None
+        structure_builder = self._structure_builder
+        structure_builder.init_structure(structure_id)
+        structure_builder.init_seg(" ")
+        # Historically, Biopython PDB parser uses model_id to mean array index
+        # so serial_id means the Model ID specified in the file
+        current_model_id = -1
+        current_serial_id = -1
+        for i in range(0, len(atom_id_list)):
+
+            # set the line_counter for 'ATOM' lines only and not
+            # as a global line counter found in the PDBParser()
+            structure_builder.set_line_counter(i)
+
+            # Try coercing serial to int, for compatibility with PDBParser
+            # But do not quit if it fails. mmCIF format specs allow strings.
+            try:
+                serial = int(atom_serial_list[i])
+            except ValueError:
+                serial = atom_serial_list[i]
+                warnings.warn(
+                    "PDBConstructionWarning: "
+                    "Some atom serial numbers are not numerical",
+                    PDBConstructionWarning,
+                )
+
+            x = x_list[i]
+            y = y_list[i]
+            z = z_list[i]
+            resname = residue_id_list[i]
+            chainid = chain_id_list[i]
+            altloc = alt_list[i]
+            if altloc in _unassigned:
+                altloc = " "
+            int_resseq = int(seq_id_list[i])
+            icode = icode_list[i]
+            if icode in _unassigned:
+                icode = " "
+            name = atom_id_list[i]
+            # occupancy & B factor
+            try:
+                tempfactor = float(b_factor_list[i])
+            except ValueError:
+                raise PDBConstructionException("Invalid or missing B factor") from None
+            try:
+                occupancy = float(occupancy_list[i])
+            except ValueError:
+                raise PDBConstructionException("Invalid or missing occupancy") from None
+            fieldname = fieldname_list[i]
+            if fieldname == "HETATM":
+                if resname == "HOH" or resname == "WAT":
+                    hetatm_flag = "W"
+                else:
+                    hetatm_flag = "H"
+            else:
+                hetatm_flag = " "
+
+            resseq = (hetatm_flag, int_resseq, icode)
+
+            if serial_list is not None:
+                # model column exists; use it
+                serial_id = serial_list[i]
+                if current_serial_id != serial_id:
+                    # if serial changes, update it and start new model
+                    current_serial_id = serial_id
+                    current_model_id += 1
+                    structure_builder.init_model(current_model_id, current_serial_id)
+                    current_chain_id = None
+                    current_residue_id = None
+                    current_resname = None
+            else:
+                # no explicit model column; initialize single model
+                structure_builder.init_model(current_model_id)
+
+            if current_chain_id != chainid:
+                current_chain_id = chainid
+                structure_builder.init_chain(current_chain_id)
+                current_residue_id = None
+                current_resname = None
+
+            if current_residue_id != resseq or current_resname != resname:
+                current_residue_id = resseq
+                current_resname = resname
+                structure_builder.init_residue(resname, hetatm_flag, int_resseq, icode)
+
+            coord = numpy.array((x, y, z), "f")
+            element = element_list[i].upper() if element_list else None
+            structure_builder.init_atom(
+                name,
+                coord,
+                tempfactor,
+                occupancy,
+                altloc,
+                name,
+                serial_number=serial,
+                element=element,
+            )
+            if aniso_flag == 1 and i < len(aniso_u11):
+                u = (
+                    aniso_u11[i],
+                    aniso_u12[i],
+                    aniso_u13[i],
+                    aniso_u22[i],
+                    aniso_u23[i],
+                    aniso_u33[i],
+                )
+                mapped_anisou = [float(_) for _ in u]
+                anisou_array = numpy.array(mapped_anisou, "f")
+                structure_builder.set_anisou(anisou_array)
+        # Now try to set the cell
+        try:
+            a = float(mmcif_dict["_cell.length_a"][0])
+            b = float(mmcif_dict["_cell.length_b"][0])
+            c = float(mmcif_dict["_cell.length_c"][0])
+            alpha = float(mmcif_dict["_cell.angle_alpha"][0])
+            beta = float(mmcif_dict["_cell.angle_beta"][0])
+            gamma = float(mmcif_dict["_cell.angle_gamma"][0])
+            cell = numpy.array((a, b, c, alpha, beta, gamma), "f")
+            spacegroup = mmcif_dict["_symmetry.space_group_name_H-M"][0]
+            spacegroup = spacegroup[1:-1]  # get rid of quotes!!
+            if spacegroup is None:
+                raise Exception
+            structure_builder.set_symmetry(spacegroup, cell)
+        except Exception:
+            pass  # no cell found, so just ignore
+
+
+class FastMMCIFParser:
+    """Parse an MMCIF file and return a Structure object."""
+
+    def __init__(self, structure_builder=None, QUIET=False):
+        """Create a FastMMCIFParser object.
+
+        The mmCIF parser calls a number of standard methods in an aggregated
+        StructureBuilder object. Normally this object is instanciated by the
+        parser object itself, but if the user provides his/her own
+        StructureBuilder object, the latter is used instead.
+
+        The main difference between this class and the regular MMCIFParser is
+        that only 'ATOM' and 'HETATM' lines are parsed here. Use if you are
+        interested only in coordinate information.
+
+        Arguments:
+         - structure_builder - an optional user implemented StructureBuilder class.
+         - QUIET - Evaluated as a Boolean. If true, warnings issued in constructing
+           the SMCRA data will be suppressed. If false (DEFAULT), they will be shown.
+           These warnings might be indicative of problems in the mmCIF file!
+
+        """
+        if structure_builder is not None:
+            self._structure_builder = structure_builder
+        else:
+            self._structure_builder = StructureBuilder()
+
+        self.line_counter = 0
+        self.build_structure = None
+        self.QUIET = bool(QUIET)
+
+    # Public methods
+
+    def get_structure(self, structure_id, filename):
+        """Return the structure.
+
+        Arguments:
+         - structure_id - string, the id that will be used for the structure
+         - filename - name of the mmCIF file OR an open filehandle
+
+        """
+        with warnings.catch_warnings():
+            if self.QUIET:
+                warnings.filterwarnings("ignore", category=PDBConstructionWarning)
+            with as_handle(filename) as handle:
+                self._build_structure(structure_id, handle)
+
+        return self._structure_builder.get_structure()
+
+    # Private methods
+
+    def _build_structure(self, structure_id, filehandle):
+
+        # two special chars as placeholders in the mmCIF format
+        # for item values that cannot be explicitly assigned
+        # see: pdbx/mmcif syntax web page
+        _unassigned = {".", "?"}
+
+        # Read only _atom_site. and atom_site_anisotrop entries
+        read_atom, read_aniso = False, False
+        _fields, _records = [], []
+        _anisof, _anisors = [], []
+        for line in filehandle:
+            if line.startswith("_atom_site."):
+                read_atom = True
+                _fields.append(line.strip())
+            elif line.startswith("_atom_site_anisotrop."):
+                read_aniso = True
+                _anisof.append(line.strip())
+            elif read_atom and line.startswith("#"):
+                read_atom = False
+            elif read_aniso and line.startswith("#"):
+                read_aniso = False
+            elif read_atom:
+                _records.append(line.strip())
+            elif read_aniso:
+                _anisors.append(line.strip())
+
+        # Dumping the shlex module here since this particular
+        # category should be rather straightforward.
+        # Quite a performance boost..
+        _record_tbl = zip(*map(str.split, _records))
+        _anisob_tbl = zip(*map(str.split, _anisors))
+
+        mmcif_dict = dict(zip(_fields, _record_tbl))
+        mmcif_dict.update(dict(zip(_anisof, _anisob_tbl)))
+
+        # Build structure object
+        atom_serial_list = mmcif_dict["_atom_site.id"]
+        atom_id_list = mmcif_dict["_atom_site.label_atom_id"]
+        residue_id_list = mmcif_dict["_atom_site.label_comp_id"]
+
+        try:
+            element_list = mmcif_dict["_atom_site.type_symbol"]
+        except KeyError:
+            element_list = None
+
+        chain_id_list = mmcif_dict["_atom_site.auth_asym_id"]
+
+        x_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_x"]]
+        y_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_y"]]
+        z_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_z"]]
+        alt_list = mmcif_dict["_atom_site.label_alt_id"]
+        icode_list = mmcif_dict["_atom_site.pdbx_PDB_ins_code"]
+        b_factor_list = mmcif_dict["_atom_site.B_iso_or_equiv"]
+        occupancy_list = mmcif_dict["_atom_site.occupancy"]
+        fieldname_list = mmcif_dict["_atom_site.group_PDB"]
+
+        try:
+            serial_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]]
+        except KeyError:
+            # No model number column
+            serial_list = None
+        except ValueError:
+            # Invalid model number (malformed file)
+            raise PDBConstructionException("Invalid model number") from None
+
+        try:
+            aniso_u11 = mmcif_dict["_atom_site_anisotrop.U[1][1]"]
+            aniso_u12 = mmcif_dict["_atom_site_anisotrop.U[1][2]"]
+            aniso_u13 = mmcif_dict["_atom_site_anisotrop.U[1][3]"]
+            aniso_u22 = mmcif_dict["_atom_site_anisotrop.U[2][2]"]
+            aniso_u23 = mmcif_dict["_atom_site_anisotrop.U[2][3]"]
+            aniso_u33 = mmcif_dict["_atom_site_anisotrop.U[3][3]"]
+            aniso_flag = 1
+        except KeyError:
+            # no anisotropic B factors
+            aniso_flag = 0
+
+        # if auth_seq_id is present, we use this.
+        # Otherwise label_seq_id is used.
+        if "_atom_site.auth_seq_id" in mmcif_dict:
+            seq_id_list = mmcif_dict["_atom_site.auth_seq_id"]
+        else:
+            seq_id_list = mmcif_dict["_atom_site.label_seq_id"]
+
+        # Now loop over atoms and build the structure
+        current_chain_id = None
+        current_residue_id = None
+        current_resname = None
+        structure_builder = self._structure_builder
+        structure_builder.init_structure(structure_id)
+        structure_builder.init_seg(" ")
+
+        # Historically, Biopython PDB parser uses model_id to mean array index
+        # so serial_id means the Model ID specified in the file
+        current_model_id = -1
+        current_serial_id = -1
+        for i in range(0, len(atom_id_list)):
+
+            # set the line_counter for 'ATOM' lines only and not
+            # as a global line counter found in the PDBParser()
+            structure_builder.set_line_counter(i)
+
+            serial = atom_serial_list[i]
+
+            x = x_list[i]
+            y = y_list[i]
+            z = z_list[i]
+            resname = residue_id_list[i]
+            chainid = chain_id_list[i]
+            altloc = alt_list[i]
+            if altloc in _unassigned:
+                altloc = " "
+            int_resseq = int(seq_id_list[i])
+            icode = icode_list[i]
+            if icode in _unassigned:
+                icode = " "
+            # Remove occasional " from quoted atom names (e.g. xNA)
+            name = atom_id_list[i].strip('"')
+
+            # occupancy & B factor
+            try:
+                tempfactor = float(b_factor_list[i])
+            except ValueError:
+                raise PDBConstructionException("Invalid or missing B factor") from None
+
+            try:
+                occupancy = float(occupancy_list[i])
+            except ValueError:
+                raise PDBConstructionException("Invalid or missing occupancy") from None
+
+            fieldname = fieldname_list[i]
+            if fieldname == "HETATM":
+                hetatm_flag = "H"
+            else:
+                hetatm_flag = " "
+
+            resseq = (hetatm_flag, int_resseq, icode)
+
+            if serial_list is not None:
+                # model column exists; use it
+                serial_id = serial_list[i]
+                if current_serial_id != serial_id:
+                    # if serial changes, update it and start new model
+                    current_serial_id = serial_id
+                    current_model_id += 1
+                    structure_builder.init_model(current_model_id, current_serial_id)
+                    current_chain_id = None
+                    current_residue_id = None
+                    current_resname = None
+            else:
+                # no explicit model column; initialize single model
+                structure_builder.init_model(current_model_id)
+
+            if current_chain_id != chainid:
+                current_chain_id = chainid
+                structure_builder.init_chain(current_chain_id)
+                current_residue_id = None
+                current_resname = None
+
+            if current_residue_id != resseq or current_resname != resname:
+                current_residue_id = resseq
+                current_resname = resname
+                structure_builder.init_residue(resname, hetatm_flag, int_resseq, icode)
+
+            coord = numpy.array((x, y, z), "f")
+            element = element_list[i] if element_list else None
+            structure_builder.init_atom(
+                name,
+                coord,
+                tempfactor,
+                occupancy,
+                altloc,
+                name,
+                serial_number=serial,
+                element=element,
+            )
+            if aniso_flag == 1 and i < len(aniso_u11):
+                u = (
+                    aniso_u11[i],
+                    aniso_u12[i],
+                    aniso_u13[i],
+                    aniso_u22[i],
+                    aniso_u23[i],
+                    aniso_u33[i],
+                )
+                mapped_anisou = [float(_) for _ in u]
+                anisou_array = numpy.array(mapped_anisou, "f")
+                structure_builder.set_anisou(anisou_array)
diff --git a/code/lib/Bio/PDB/Model.py b/code/lib/Bio/PDB/Model.py
new file mode 100644
index 0000000..31ba673
--- /dev/null
+++ b/code/lib/Bio/PDB/Model.py
@@ -0,0 +1,74 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Model class, used in Structure objects."""
+
+from Bio.PDB.Entity import Entity
+from Bio.PDB.internal_coords import IC_Chain
+
+
+class Model(Entity):
+    """The object representing a model in a structure.
+
+    In a structure derived from an X-ray crystallography experiment,
+    only a single model will be present (with some exceptions). NMR
+    structures normally contain many different models.
+    """
+
+    def __init__(self, id, serial_num=None):
+        """Initialize.
+
+        Arguments:
+         - id - int
+         - serial_num - int
+
+        """
+        self.level = "M"
+        if serial_num is None:
+            self.serial_num = id
+        else:
+            self.serial_num = serial_num
+
+        Entity.__init__(self, id)
+
+    def __repr__(self):
+        """Return model identifier."""
+        return "<Model id=%s>" % self.get_id()
+
+    def get_chains(self):
+        """Return chains."""
+        yield from self
+
+    def get_residues(self):
+        """Return residues."""
+        for c in self.get_chains():
+            yield from c
+
+    def get_atoms(self):
+        """Return atoms."""
+        for r in self.get_residues():
+            yield from r
+
+    def atom_to_internal_coordinates(self, verbose: bool = False) -> None:
+        """Create/update internal coordinates from Atom X,Y,Z coordinates.
+
+        Internal coordinates are bond length, angle and dihedral angles.
+
+        :param verbose bool: default False
+            describe runtime problems
+        """
+        for chn in self.get_chains():
+            chn.atom_to_internal_coordinates(verbose)
+
+    def internal_to_atom_coordinates(self, verbose: bool = False) -> None:
+        """Create/update atom coordinates from internal coordinates.
+
+        :param verbose bool: default False
+            describe runtime problems
+
+        :raises Exception: if any chain does not have .pic attribute
+        """
+        for chn in self.get_chains():
+            chn.internal_to_atom_coordinates(verbose)
diff --git a/code/lib/Bio/PDB/NACCESS.py b/code/lib/Bio/PDB/NACCESS.py
new file mode 100644
index 0000000..b6ae401
--- /dev/null
+++ b/code/lib/Bio/PDB/NACCESS.py
@@ -0,0 +1,212 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+# NACCESS interface adapted from Bio/PDB/DSSP.py
+
+"""Interface for the program NACCESS.
+
+See: http://wolf.bms.umist.ac.uk/naccess/
+Atomic Solvent Accessible Area Calculations
+
+errors likely to occur with the binary:
+default values are often due to low default settings in accall.pars
+- e.g. max cubes error: change in accall.pars and recompile binary
+
+use naccess -y, naccess -h or naccess -w to include HETATM records
+"""
+
+
+import os
+import tempfile
+import shutil
+import subprocess
+import warnings
+from Bio.PDB.PDBIO import PDBIO
+from Bio.PDB.AbstractPropertyMap import (
+    AbstractResiduePropertyMap,
+    AbstractAtomPropertyMap,
+)
+
+
+def run_naccess(
+    model, pdb_file, probe_size=None, z_slice=None, naccess="naccess", temp_path="/tmp/"
+):
+    """Run naccess for a pdb file."""
+    # make temp directory;
+    tmp_path = tempfile.mkdtemp(dir=temp_path)
+
+    # file name must end with '.pdb' to work with NACCESS
+    # -> create temp file of existing pdb
+    #    or write model to temp file
+    handle, tmp_pdb_file = tempfile.mkstemp(".pdb", dir=tmp_path)
+    os.close(handle)
+    if pdb_file:
+        pdb_file = os.path.abspath(pdb_file)
+        shutil.copy(pdb_file, tmp_pdb_file)
+    else:
+        writer = PDBIO()
+        writer.set_structure(model.get_parent())
+        writer.save(tmp_pdb_file)
+
+    # chdir to temp directory, as NACCESS writes to current working directory
+    old_dir = os.getcwd()
+    os.chdir(tmp_path)
+
+    # create the command line and run
+    # catch standard out & err
+    command = [naccess, tmp_pdb_file]
+    if probe_size:
+        command.extend(["-p", probe_size])
+    if z_slice:
+        command.extend(["-z", z_slice])
+
+    p = subprocess.Popen(
+        command, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
+    out, err = p.communicate()
+    os.chdir(old_dir)
+
+    rsa_file = tmp_pdb_file[:-4] + ".rsa"
+    asa_file = tmp_pdb_file[:-4] + ".asa"
+    # Alert user for errors
+    if err.strip():
+        warnings.warn(err)
+
+    if (not os.path.exists(rsa_file)) or (not os.path.exists(asa_file)):
+        raise Exception("NACCESS did not execute or finish properly.")
+
+    # get the output, then delete the temp directory
+    with open(rsa_file) as rf:
+        rsa_data = rf.readlines()
+    with open(asa_file) as af:
+        asa_data = af.readlines()
+
+    # shutil.rmtree(tmp_path, ignore_errors=True)
+    return rsa_data, asa_data
+
+
+def process_rsa_data(rsa_data):
+    """Process the .rsa output file: residue level SASA data."""
+    naccess_rel_dict = {}
+    for line in rsa_data:
+        if line.startswith("RES"):
+            res_name = line[4:7]
+            chain_id = line[8]
+            resseq = int(line[9:13])
+            icode = line[13]
+            res_id = (" ", resseq, icode)
+            naccess_rel_dict[(chain_id, res_id)] = {
+                "res_name": res_name,
+                "all_atoms_abs": float(line[16:22]),
+                "all_atoms_rel": float(line[23:28]),
+                "side_chain_abs": float(line[29:35]),
+                "side_chain_rel": float(line[36:41]),
+                "main_chain_abs": float(line[42:48]),
+                "main_chain_rel": float(line[49:54]),
+                "non_polar_abs": float(line[55:61]),
+                "non_polar_rel": float(line[62:67]),
+                "all_polar_abs": float(line[68:74]),
+                "all_polar_rel": float(line[75:80]),
+            }
+    return naccess_rel_dict
+
+
+def process_asa_data(rsa_data):
+    """Process the .asa output file: atomic level SASA data."""
+    naccess_atom_dict = {}
+    for line in rsa_data:
+        full_atom_id = line[12:16]
+        atom_id = full_atom_id.strip()
+        chainid = line[21]
+        resseq = int(line[22:26])
+        icode = line[26]
+        res_id = (" ", resseq, icode)
+        id = (chainid, res_id, atom_id)
+        asa = line[54:62]  # solvent accessibility in Angstrom^2
+        naccess_atom_dict[id] = asa
+    return naccess_atom_dict
+
+
+class NACCESS(AbstractResiduePropertyMap):
+    """Define NACCESS class for residue properties map."""
+
+    def __init__(
+        self, model, pdb_file=None, naccess_binary="naccess", tmp_directory="/tmp"
+    ):
+        """Initialize the class."""
+        res_data, atm_data = run_naccess(
+            model, pdb_file, naccess=naccess_binary, temp_path=tmp_directory
+        )
+        naccess_dict = process_rsa_data(res_data)
+        property_dict = {}
+        property_keys = []
+        property_list = []
+        # Now create a dictionary that maps Residue objects to accessibility
+        for chain in model:
+            chain_id = chain.get_id()
+            for res in chain:
+                res_id = res.get_id()
+                if (chain_id, res_id) in naccess_dict:
+                    item = naccess_dict[(chain_id, res_id)]
+                    res_name = item["res_name"]
+                    assert res_name == res.get_resname()
+                    property_dict[(chain_id, res_id)] = item
+                    property_keys.append((chain_id, res_id))
+                    property_list.append((res, item))
+                    res.xtra["EXP_NACCESS"] = item
+                else:
+                    pass
+        AbstractResiduePropertyMap.__init__(
+            self, property_dict, property_keys, property_list
+        )
+
+
+class NACCESS_atomic(AbstractAtomPropertyMap):
+    """Define NACCESS atomic class for atom properties map."""
+
+    def __init__(
+        self, model, pdb_file=None, naccess_binary="naccess", tmp_directory="/tmp"
+    ):
+        """Initialize the class."""
+        res_data, atm_data = run_naccess(
+            model, pdb_file, naccess=naccess_binary, temp_path=tmp_directory
+        )
+        self.naccess_atom_dict = process_asa_data(atm_data)
+        property_dict = {}
+        property_keys = []
+        property_list = []
+        # Now create a dictionary that maps Atom objects to accessibility
+        for chain in model:
+            chain_id = chain.get_id()
+            for residue in chain:
+                res_id = residue.get_id()
+                for atom in residue:
+                    atom_id = atom.get_id()
+                    full_id = (chain_id, res_id, atom_id)
+                    if full_id in self.naccess_atom_dict:
+                        asa = self.naccess_atom_dict[full_id]
+                        property_dict[full_id] = asa
+                        property_keys.append(full_id)
+                        property_list.append((atom, asa))
+                        atom.xtra["EXP_NACCESS"] = asa
+        AbstractAtomPropertyMap.__init__(
+            self, property_dict, property_keys, property_list
+        )
+
+
+if __name__ == "__main__":
+    import sys
+    from Bio.PDB import PDBParser
+
+    p = PDBParser()
+    s = p.get_structure("X", sys.argv[1])
+    model = s[0]
+
+    n = NACCESS(model, sys.argv[1])
+    for e in n:
+        """Initialize the class."""
+        print(e)
diff --git a/code/lib/Bio/PDB/NeighborSearch.py b/code/lib/Bio/PDB/NeighborSearch.py
new file mode 100644
index 0000000..4060e55
--- /dev/null
+++ b/code/lib/Bio/PDB/NeighborSearch.py
@@ -0,0 +1,130 @@
+# Copyright (C) 2002, 2004 Thomas Hamelryck (thamelry@binf.ku.dk)
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Fast atom neighbor lookup using a KD tree (implemented in C)."""
+
+
+import numpy
+
+from Bio.PDB.PDBExceptions import PDBException
+from Bio.PDB.Selection import unfold_entities, entity_levels, uniqueify
+
+
+class NeighborSearch:
+    """Class for neighbor searching.
+
+    This class can be used for two related purposes:
+
+     1. To find all atoms/residues/chains/models/structures within radius
+        of a given query position.
+     2. To find all atoms/residues/chains/models/structures that are within
+        a fixed radius of each other.
+
+    NeighborSearch makes use of the KDTree class implemented in C for speed.
+    """
+
+    def __init__(self, atom_list, bucket_size=10):
+        """Create the object.
+
+        Arguments:
+         - atom_list - list of atoms. This list is used in the queries.
+           It can contain atoms from different structures.
+         - bucket_size - bucket size of KD tree. You can play around
+           with this to optimize speed if you feel like it.
+
+        """
+        from Bio.PDB.kdtrees import KDTree
+
+        self.atom_list = atom_list
+        # get the coordinates
+        coord_list = [a.get_coord() for a in atom_list]
+        # to Nx3 array of type float
+        self.coords = numpy.array(coord_list, dtype="d")
+        assert bucket_size > 1
+        assert self.coords.shape[1] == 3
+        self.kdt = KDTree(self.coords, bucket_size)
+
+    # Private
+
+    def _get_unique_parent_pairs(self, pair_list):
+        # translate a list of (entity, entity) tuples to
+        # a list of (parent entity, parent entity) tuples,
+        # thereby removing duplicate (parent entity, parent entity)
+        # pairs.
+        # o pair_list - a list of (entity, entity) tuples
+        parent_pair_list = []
+        for (e1, e2) in pair_list:
+            p1 = e1.get_parent()
+            p2 = e2.get_parent()
+            if p1 == p2:
+                continue
+            elif p1 < p2:
+                parent_pair_list.append((p1, p2))
+            else:
+                parent_pair_list.append((p2, p1))
+        return uniqueify(parent_pair_list)
+
+    # Public
+
+    def search(self, center, radius, level="A"):
+        """Neighbor search.
+
+        Return all atoms/residues/chains/models/structures
+        that have at least one atom within radius of center.
+        What entity level is returned (e.g. atoms or residues)
+        is determined by level (A=atoms, R=residues, C=chains,
+        M=models, S=structures).
+
+        Arguments:
+         - center - Numeric array
+         - radius - float
+         - level - char (A, R, C, M, S)
+
+        """
+        if level not in entity_levels:
+            raise PDBException("%s: Unknown level" % level)
+        center = numpy.require(center, dtype="d", requirements="C")
+        if center.shape != (3,):
+            raise Exception("Expected a 3-dimensional NumPy array")
+        points = self.kdt.search(center, radius)
+        atom_list = [self.atom_list[point.index] for point in points]
+        if level == "A":
+            return atom_list
+        else:
+            return unfold_entities(atom_list, level)
+
+    def search_all(self, radius, level="A"):
+        """All neighbor search.
+
+        Search all entities that have atoms pairs within
+        radius.
+
+        Arguments:
+         - radius - float
+         - level - char (A, R, C, M, S)
+
+        """
+        if level not in entity_levels:
+            raise PDBException("%s: Unknown level" % level)
+        neighbors = self.kdt.neighbor_search(radius)
+        atom_list = self.atom_list
+        atom_pair_list = []
+        for neighbor in neighbors:
+            i1 = neighbor.index1
+            i2 = neighbor.index2
+            a1 = atom_list[i1]
+            a2 = atom_list[i2]
+            atom_pair_list.append((a1, a2))
+        if level == "A":
+            # return atoms
+            return atom_pair_list
+        next_level_pair_list = atom_pair_list
+        for l in ["R", "C", "M", "S"]:
+            next_level_pair_list = self._get_unique_parent_pairs(next_level_pair_list)
+            if level == l:
+                return next_level_pair_list
diff --git a/code/lib/Bio/PDB/PDBExceptions.py b/code/lib/Bio/PDB/PDBExceptions.py
new file mode 100644
index 0000000..1420309
--- /dev/null
+++ b/code/lib/Bio/PDB/PDBExceptions.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Some Bio.PDB-specific exceptions."""
+
+from Bio import BiopythonWarning
+
+
+# General error
+class PDBException(Exception):
+    """Define class PDBException."""
+
+    pass
+
+
+# The PDB file cannot be unambiguously represented in the SMCRA
+# data structure
+class PDBConstructionException(Exception):
+    """Define class PDBConstructionException."""
+
+    pass
+
+
+class PDBConstructionWarning(BiopythonWarning):
+    """Define class PDBConstructionWarning."""
+
+    pass
diff --git a/code/lib/Bio/PDB/PDBIO.py b/code/lib/Bio/PDB/PDBIO.py
new file mode 100644
index 0000000..a4dc043
--- /dev/null
+++ b/code/lib/Bio/PDB/PDBIO.py
@@ -0,0 +1,394 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Output of PDB files."""
+
+
+# To allow saving of chains, residues, etc..
+from Bio.PDB.StructureBuilder import StructureBuilder
+
+# Allowed Elements
+from Bio.Data.IUPACData import atom_weights
+
+
+_ATOM_FORMAT_STRING = (
+    "%s%5i %-4s%c%3s %c%4i%c   %8.3f%8.3f%8.3f%s%6.2f      %4s%2s%2s\n"
+)
+_PQR_ATOM_FORMAT_STRING = (
+    "%s%5i %-4s%c%3s %c%4i%c   %8.3f%8.3f%8.3f %7s  %6s      %2s\n"
+)
+
+
+class Select:
+    """Select everything for PDB output (for use as a base class).
+
+    Default selection (everything) during writing - can be used as base class
+    to implement selective output. This selects which entities will be written out.
+    """
+
+    def __repr__(self):
+        """Represent the output as a string for debugging."""
+        return "<Select all>"
+
+    def accept_model(self, model):
+        """Overload this to reject models for output."""
+        return 1
+
+    def accept_chain(self, chain):
+        """Overload this to reject chains for output."""
+        return 1
+
+    def accept_residue(self, residue):
+        """Overload this to reject residues for output."""
+        return 1
+
+    def accept_atom(self, atom):
+        """Overload this to reject atoms for output."""
+        return 1
+
+
+_select = Select()
+
+
+class StructureIO:
+    """Base class to derive structure file format writers from."""
+
+    def __init__(self):
+        """Initialise."""
+        pass
+
+    def set_structure(self, pdb_object):
+        """Check what the user is providing and build a structure."""
+        # The idea here is to build missing upstream components of
+        # the SMCRA object representation. E.g., if the user provides
+        # a Residue, build Structure/Model/Chain.
+
+        if pdb_object.level == "S":
+            structure = pdb_object
+        else:  # Not a Structure
+            sb = StructureBuilder()
+            sb.init_structure("pdb")
+            sb.init_seg(" ")
+
+            if pdb_object.level == "M":
+                sb.structure.add(pdb_object.copy())
+                self.structure = sb.structure
+            else:  # Not a Model
+                sb.init_model(0)
+
+                if pdb_object.level == "C":
+                    sb.structure[0].add(pdb_object.copy())
+                else:  # Not a Chain
+                    chain_id = "A"  # default
+                    sb.init_chain(chain_id)
+
+                    if pdb_object.level == "R":  # Residue
+                        # Residue extracted from a larger structure?
+                        if pdb_object.parent is not None:
+                            og_chain_id = pdb_object.parent.id
+                            sb.structure[0][chain_id].id = og_chain_id
+                            chain_id = og_chain_id
+
+                        sb.structure[0][chain_id].add(pdb_object.copy())
+
+                    else:  # Atom
+                        sb.init_residue("DUM", " ", 1, " ")  # Dummy residue
+                        sb.structure[0][chain_id].child_list[0].add(pdb_object.copy())
+
+                        # Fix chain identifier if Atom has grandparents.
+                        try:
+                            og_chain_id = pdb_object.parent.parent.id
+                        except AttributeError:  # pdb_object.parent == None
+                            pass
+                        else:
+                            sb.structure[0][chain_id].id = og_chain_id
+
+            # Return structure
+            structure = sb.structure
+        self.structure = structure
+
+
+class PDBIO(StructureIO):
+    """Write a Structure object (or a subset of a Structure object) as a PDB or PQR file.
+
+    Examples
+    --------
+    >>> from Bio.PDB import PDBParser
+    >>> from Bio.PDB.PDBIO import PDBIO
+    >>> parser = PDBParser()
+    >>> structure = parser.get_structure("1a8o", "PDB/1A8O.pdb")
+    >>> io=PDBIO()
+    >>> io.set_structure(structure)
+    >>> io.save("bio-pdb-pdbio-out.pdb")
+    >>> import os
+    >>> os.remove("bio-pdb-pdbio-out.pdb")  # tidy up
+
+
+    """
+
+    def __init__(self, use_model_flag=0, is_pqr=False):
+        """Create the PDBIO object.
+
+        :param use_model_flag: if 1, force use of the MODEL record in output.
+        :type use_model_flag: int
+        :param is_pqr: if True, build PQR file. Otherwise build PDB file.
+        :type is_pqr: Boolean
+        """
+        self.use_model_flag = use_model_flag
+        self.is_pqr = is_pqr
+
+    # private methods
+
+    def _get_atom_line(
+        self,
+        atom,
+        hetfield,
+        segid,
+        atom_number,
+        resname,
+        resseq,
+        icode,
+        chain_id,
+        charge="  ",
+    ):
+        """Return an ATOM PDB string (PRIVATE)."""
+        if hetfield != " ":
+            record_type = "HETATM"
+        else:
+            record_type = "ATOM  "
+
+        if atom.element:
+            element = atom.element.strip().upper()
+            if element.capitalize() not in atom_weights and element != "X":
+                raise ValueError("Unrecognised element %r" % atom.element)
+            element = element.rjust(2)
+        else:
+            element = "  "
+
+        name = atom.get_fullname().strip()
+        # Pad atom name if:
+        #     - smaller than 4 characters
+        # AND - is not C, N, O, S, H, F, P, ..., one letter elements
+        # AND - first character is NOT numeric (funky hydrogen naming rules)
+        if len(name) < 4 and name[:1].isalpha() and len(element.strip()) < 2:
+            name = " " + name
+
+        altloc = atom.get_altloc()
+        x, y, z = atom.get_coord()
+
+        # PDB Arguments
+        if not self.is_pqr:
+            bfactor = atom.get_bfactor()
+            occupancy = atom.get_occupancy()
+
+        # PQR Arguments
+        else:
+            radius = atom.get_radius()
+            pqr_charge = atom.get_charge()
+
+        if not self.is_pqr:
+            try:
+                occupancy_str = "%6.2f" % occupancy
+            except TypeError:
+                if occupancy is None:
+                    occupancy_str = " " * 6
+                    import warnings
+                    from Bio import BiopythonWarning
+
+                    warnings.warn(
+                        "Missing occupancy in atom %r written as blank"
+                        % (atom.get_full_id(),),
+                        BiopythonWarning,
+                    )
+                else:
+                    raise TypeError(
+                        "Invalid occupancy %r in atom %r"
+                        % (occupancy, atom.get_full_id())
+                    ) from None
+
+            args = (
+                record_type,
+                atom_number,
+                name,
+                altloc,
+                resname,
+                chain_id,
+                resseq,
+                icode,
+                x,
+                y,
+                z,
+                occupancy_str,
+                bfactor,
+                segid,
+                element,
+                charge,
+            )
+            return _ATOM_FORMAT_STRING % args
+
+        else:
+            # PQR case
+            try:
+                pqr_charge = "%7.4f" % pqr_charge
+            except TypeError:
+                if pqr_charge is None:
+                    pqr_charge = " " * 7
+                    import warnings
+                    from Bio import BiopythonWarning
+
+                    warnings.warn(
+                        "Missing charge in atom %r written as blank"
+                        % (atom.get_full_id(),),
+                        BiopythonWarning,
+                    )
+                else:
+                    raise TypeError(
+                        "Invalid charge %r in atom %r"
+                        % (pqr_charge, atom.get_full_id())
+                    ) from None
+            try:
+                radius = "%6.4f" % radius
+            except TypeError:
+                if radius is None:
+                    radius = " " * 6
+                    import warnings
+                    from Bio import BiopythonWarning
+
+                    warnings.warn(
+                        "Missing radius in atom %r written as blank"
+                        % (atom.get_full_id(),),
+                        BiopythonWarning,
+                    )
+                else:
+                    raise TypeError(
+                        "Invalid radius %r in atom %r" % (radius, atom.get_full_id())
+                    ) from None
+
+            args = (
+                record_type,
+                atom_number,
+                name,
+                altloc,
+                resname,
+                chain_id,
+                resseq,
+                icode,
+                x,
+                y,
+                z,
+                pqr_charge,
+                radius,
+                element,
+            )
+
+            return _PQR_ATOM_FORMAT_STRING % args
+
+    # Public methods
+
+    def save(self, file, select=_select, write_end=True, preserve_atom_numbering=False):
+        """Save structure to a file.
+
+        :param file: output file
+        :type file: string or filehandle
+
+        :param select: selects which entities will be written.
+        :type select: object
+
+        Typically select is a subclass of L{Select}, it should
+        have the following methods:
+
+         - accept_model(model)
+         - accept_chain(chain)
+         - accept_residue(residue)
+         - accept_atom(atom)
+
+        These methods should return 1 if the entity is to be
+        written out, 0 otherwise.
+
+        Typically select is a subclass of L{Select}.
+        """
+        get_atom_line = self._get_atom_line
+        if isinstance(file, str):
+            fp = open(file, "w")
+            close_file = 1
+        else:
+            # filehandle, I hope :-)
+            fp = file
+            close_file = 0
+        # multiple models?
+        if len(self.structure) > 1 or self.use_model_flag:
+            model_flag = 1
+        else:
+            model_flag = 0
+        for model in self.structure.get_list():
+            if not select.accept_model(model):
+                continue
+            # necessary for ENDMDL
+            # do not write ENDMDL if no residues were written
+            # for this model
+            model_residues_written = 0
+            if not preserve_atom_numbering:
+                atom_number = 1
+            if model_flag:
+                fp.write("MODEL      %s\n" % model.serial_num)
+            for chain in model.get_list():
+                if not select.accept_chain(chain):
+                    continue
+                chain_id = chain.get_id()
+                # necessary for TER
+                # do not write TER if no residues were written
+                # for this chain
+                chain_residues_written = 0
+                for residue in chain.get_unpacked_list():
+                    if not select.accept_residue(residue):
+                        continue
+                    hetfield, resseq, icode = residue.get_id()
+                    resname = residue.get_resname()
+                    segid = residue.get_segid()
+                    for atom in residue.get_unpacked_list():
+                        if select.accept_atom(atom):
+                            chain_residues_written = 1
+                            model_residues_written = 1
+                            if preserve_atom_numbering:
+                                atom_number = atom.get_serial_number()
+
+                                # Check if the atom serial number is an integer
+                                # Not always the case for mmCIF files.
+                                try:
+                                    atom_number = int(atom_number)
+                                except ValueError:
+                                    raise ValueError(
+                                        f"{repr(atom_number)} is not a number."
+                                        "Atom serial numbers must be numerical"
+                                        " If you are converting from an mmCIF"
+                                        " structure, try using"
+                                        " preserve_atom_numbering=False"
+                                    )
+
+                            s = get_atom_line(
+                                atom,
+                                hetfield,
+                                segid,
+                                atom_number,
+                                resname,
+                                resseq,
+                                icode,
+                                chain_id,
+                            )
+                            fp.write(s)
+                            if not preserve_atom_numbering:
+                                atom_number += 1
+                if chain_residues_written:
+                    fp.write(
+                        "TER   %5i      %3s %c%4i%c                                                      \n"
+                        % (atom_number, resname, chain_id, resseq, icode)
+                    )
+
+            if model_flag and model_residues_written:
+                fp.write("ENDMDL\n")
+        if write_end:
+            fp.write("END   \n")
+        if close_file:
+            fp.close()
diff --git a/code/lib/Bio/PDB/PDBList.py b/code/lib/Bio/PDB/PDBList.py
new file mode 100644
index 0000000..90ec9f2
--- /dev/null
+++ b/code/lib/Bio/PDB/PDBList.py
@@ -0,0 +1,579 @@
+#!/usr/bin/env python
+# Copyright 2003, by Kristian Rother. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# PDBList.py
+#
+# A tool for tracking changes in the PDB Protein Structure Database.
+#
+# (c) 2003 Kristian Rother
+# This work was supported by the German Ministry of Education
+# and Research (BMBF). Project http://www.bcbio.de
+#
+# Contact the author
+#    homepage : http://www.rubor.de/bioinf
+#    email    : krother@genesilico.pl
+#
+#
+# (c) 2016 Wiktoria Karwicka & Jacek Smietanski
+#   - updated and Python 3.x compatible code
+#   - new options to enable download PDBx/mmCif, PDBML and mmtf formatted
+#       files as well as large PDB bundles
+#   - unit tests for the module
+#
+# Contact the corresponding author
+#   homepage : http://jaceksmietanski.net
+#   email    : jacek.smietanski@ii.uj.edu.pl
+#
+# It may be distributed freely with respect to the original authors.
+# Any maintainer of the Biopython code may change this notice
+# when appropriate.
+
+"""Access the PDB over the internet (e.g. to download structures)."""
+
+
+import contextlib
+import gzip
+import os
+import shutil
+import re
+import sys
+
+from urllib.request import urlopen
+from urllib.request import urlretrieve
+from urllib.request import urlcleanup
+
+
+class PDBList:
+    """Quick access to the structure lists on the PDB or its mirrors.
+
+    This class provides quick access to the structure lists on the
+    PDB server or its mirrors. The structure lists contain
+    four-letter PDB codes, indicating that structures are
+    new, have been modified or are obsolete. The lists are released
+    on a weekly basis.
+
+    It also provides a function to retrieve PDB files from the server.
+    To use it properly, prepare a directory /pdb or the like,
+    where PDB files are stored.
+
+    All available file formats (PDB, PDBx/mmCif, PDBML, mmtf) are supported.
+    Please note that large structures (containing >62 chains
+    and/or 99999 ATOM lines) are no longer stored as a single PDB file
+    and by default (when PDB format selected) are not downloaded.
+
+    Large structures can be downloaded in other formats, including PDBx/mmCif
+    or as a .tar file (a collection of PDB-like formatted files for a given
+    structure).
+
+    If you want to use this module from inside a proxy, add
+    the proxy variable to your environment, e.g. in Unix:
+    export HTTP_PROXY='http://realproxy.charite.de:888'
+    (This can also be added to ~/.bashrc)
+    """
+
+    PDB_REF = """
+    The Protein Data Bank: a computer-based archival file for macromolecular structures.
+    F.C.Bernstein, T.F.Koetzle, G.J.B.Williams, E.F.Meyer Jr, M.D.Brice, J.R.Rodgers, O.Kennard, T.Shimanouchi, M.Tasumi
+    J. Mol. Biol. 112 pp. 535-542 (1977)
+    http://www.pdb.org/.
+    """
+
+    def __init__(
+        self, server="ftp://ftp.wwpdb.org", pdb=None, obsolete_pdb=None, verbose=True
+    ):
+        """Initialize the class with the default server or a custom one.
+
+        Argument pdb is the local path to use, defaulting to the current
+        directory at the moment of initialisation.
+        """
+        self.pdb_server = server  # remote pdb server
+        if pdb:
+            self.local_pdb = pdb  # local pdb file tree
+        else:
+            self.local_pdb = os.getcwd()
+
+        # enable or disable verbose
+        self._verbose = verbose
+
+        # local file tree for obsolete pdb files
+        if obsolete_pdb:
+            self.obsolete_pdb = obsolete_pdb
+        else:
+            self.obsolete_pdb = os.path.join(self.local_pdb, "obsolete")
+            if not os.access(self.obsolete_pdb, os.F_OK):
+                os.makedirs(self.obsolete_pdb)
+
+        # variable for command-line option
+        self.flat_tree = False
+
+    @staticmethod
+    def _print_default_format_warning(file_format):
+        """Print a warning to stdout (PRIVATE).
+
+        Temporary warning (similar to a deprecation warning) that files
+        are being downloaded in mmCIF.
+        """
+        if file_format is None:
+            sys.stderr.write(
+                "WARNING: The default download format has changed from PDB to PDBx/mmCif\n"
+            )
+            return "mmCif"
+        return file_format
+
+    @staticmethod
+    def get_status_list(url):
+        """Retrieve a list of pdb codes in the weekly pdb status file from given URL.
+
+        Used by get_recent_changes. Typical contents of the list files parsed
+        by this method is now very simply - one PDB name per line.
+        """
+        with contextlib.closing(urlopen(url)) as handle:
+            answer = []
+            for line in handle:
+                pdb = line.strip()
+                assert len(pdb) == 4
+                answer.append(pdb.decode())
+        return answer
+
+    def get_recent_changes(self):
+        """Return three lists of the newest weekly files (added,mod,obsolete).
+
+        Reads the directories with changed entries from the PDB server and
+        returns a tuple of three URL's to the files of new, modified and
+        obsolete entries from the most recent list. The directory with the
+        largest numerical name is used.
+        Returns None if something goes wrong.
+
+        Contents of the data/status dir (20031013 would be used);:
+
+            drwxrwxr-x   2 1002     sysadmin     512 Oct  6 18:28 20031006
+            drwxrwxr-x   2 1002     sysadmin     512 Oct 14 02:14 20031013
+            -rw-r--r--   1 1002     sysadmin    1327 Mar 12  2001 README
+
+        """
+        path = self.pdb_server + "/pub/pdb/data/status/latest/"
+
+        # Retrieve the lists
+        added = self.get_status_list(path + "added.pdb")
+        modified = self.get_status_list(path + "modified.pdb")
+        obsolete = self.get_status_list(path + "obsolete.pdb")
+        return [added, modified, obsolete]
+
+    def get_all_entries(self):
+        """Retrieve the big file containing all the PDB entries and some annotation.
+
+        Returns a list of PDB codes in the index file.
+        """
+        url = self.pdb_server + "/pub/pdb/derived_data/index/entries.idx"
+        if self._verbose:
+            print("Retrieving index file. Takes about 27 MB.")
+        with contextlib.closing(urlopen(url)) as handle:
+            all_entries = [
+                line[:4].decode() for line in handle.readlines()[2:] if len(line) > 4
+            ]
+        return all_entries
+
+    def get_all_obsolete(self):
+        """Return a list of all obsolete entries ever in the PDB.
+
+        Returns a list of all obsolete pdb codes that have ever been
+        in the PDB.
+
+        Gets and parses the file from the PDB server in the format
+        (the first pdb_code column is the one used). The file looks
+        like this::
+
+             LIST OF OBSOLETE COORDINATE ENTRIES AND SUCCESSORS
+            OBSLTE    31-JUL-94 116L     216L
+            ...
+            OBSLTE    29-JAN-96 1HFT     2HFT
+            OBSLTE    21-SEP-06 1HFV     2J5X
+            OBSLTE    21-NOV-03 1HG6
+            OBSLTE    18-JUL-84 1HHB     2HHB 3HHB
+            OBSLTE    08-NOV-96 1HID     2HID
+            OBSLTE    01-APR-97 1HIU     2HIU
+            OBSLTE    14-JAN-04 1HKE     1UUZ
+            ...
+
+        """
+        url = self.pdb_server + "/pub/pdb/data/status/obsolete.dat"
+        with contextlib.closing(urlopen(url)) as handle:
+            # Extract pdb codes. Could use a list comprehension, but I want
+            # to include an assert to check for mis-reading the data.
+            obsolete = []
+            for line in handle:
+                if not line.startswith(b"OBSLTE "):
+                    continue
+                pdb = line.split()[2]
+                assert len(pdb) == 4
+                obsolete.append(pdb.decode())
+        return obsolete
+
+    def retrieve_pdb_file(
+        self, pdb_code, obsolete=False, pdir=None, file_format=None, overwrite=False
+    ):
+        """Fetch PDB structure file from PDB server, and store it locally.
+
+        The PDB structure's file name is returned as a single string.
+        If obsolete ``==`` True, the file will be saved in a special file tree.
+
+        NOTE. The default download format has changed from PDB to PDBx/mmCif
+
+        :param pdb_code: 4-symbols structure Id from PDB (e.g. 3J92).
+        :type pdb_code: string
+
+        :param file_format:
+            File format. Available options:
+
+            * "mmCif" (default, PDBx/mmCif file),
+            * "pdb" (format PDB),
+            * "xml" (PDBML/XML format),
+            * "mmtf" (highly compressed),
+            * "bundle" (PDB formatted archive for large structure}
+
+        :type file_format: string
+
+        :param overwrite: if set to True, existing structure files will be overwritten. Default: False
+        :type overwrite: bool
+
+        :param obsolete:
+            Has a meaning only for obsolete structures. If True, download the obsolete structure
+            to 'obsolete' folder, otherwise download won't be performed.
+            This option doesn't work for mmtf format as obsoleted structures aren't stored in mmtf.
+            Also doesn't have meaning when parameter pdir is specified.
+            Note: make sure that you are about to download the really obsolete structure.
+            Trying to download non-obsolete structure into obsolete folder will not work
+            and you face the "structure doesn't exists" error.
+            Default: False
+
+        :type obsolete: bool
+
+        :param pdir: put the file in this directory (default: create a PDB-style directory tree)
+        :type pdir: string
+
+        :return: filename
+        :rtype: string
+        """
+        # Deprecation warning
+        file_format = self._print_default_format_warning(file_format)
+
+        # Get the compressed PDB structure
+        code = pdb_code.lower()
+        archive = {
+            "pdb": "pdb%s.ent.gz",
+            "mmCif": "%s.cif.gz",
+            "xml": "%s.xml.gz",
+            "mmtf": "%s",
+            "bundle": "%s-pdb-bundle.tar.gz",
+        }
+        archive_fn = archive[file_format] % code
+
+        if file_format not in archive.keys():
+            raise (
+                "Specified file_format %s doesn't exists or is not supported. Maybe a "
+                "typo. Please, use one of the following: mmCif, pdb, xml, mmtf, bundle"
+                % file_format
+            )
+
+        if file_format in ("pdb", "mmCif", "xml"):
+            pdb_dir = "divided" if not obsolete else "obsolete"
+            file_type = (
+                "pdb"
+                if file_format == "pdb"
+                else "mmCIF"
+                if file_format == "mmCif"
+                else "XML"
+            )
+            url = self.pdb_server + "/pub/pdb/data/structures/%s/%s/%s/%s" % (
+                pdb_dir,
+                file_type,
+                code[1:3],
+                archive_fn,
+            )
+        elif file_format == "bundle":
+            url = self.pdb_server + "/pub/pdb/compatible/pdb_bundle/%s/%s/%s" % (
+                code[1:3],
+                code,
+                archive_fn,
+            )
+        else:
+            url = "http://mmtf.rcsb.org/v1.0/full/%s" % code
+
+        # Where does the final PDB file get saved?
+        if pdir is None:
+            path = self.local_pdb if not obsolete else self.obsolete_pdb
+            if not self.flat_tree:  # Put in PDB-style directory tree
+                path = os.path.join(path, code[1:3])
+        else:  # Put in specified directory
+            path = pdir
+        if not os.access(path, os.F_OK):
+            os.makedirs(path)
+        filename = os.path.join(path, archive_fn)
+        final = {
+            "pdb": "pdb%s.ent",
+            "mmCif": "%s.cif",
+            "xml": "%s.xml",
+            "mmtf": "%s.mmtf",
+            "bundle": "%s-pdb-bundle.tar",
+        }
+        final_file = os.path.join(path, final[file_format] % code)
+
+        # Skip download if the file already exists
+        if not overwrite:
+            if os.path.exists(final_file):
+                if self._verbose:
+                    print("Structure exists: '%s' " % final_file)
+                return final_file
+
+        # Retrieve the file
+        if self._verbose:
+            print("Downloading PDB structure '%s'..." % pdb_code)
+        try:
+            urlcleanup()
+            urlretrieve(url, filename)
+        except OSError:
+            print("Desired structure doesn't exists")
+        else:
+            with gzip.open(filename, "rb") as gz:
+                with open(final_file, "wb") as out:
+                    out.writelines(gz)
+            os.remove(filename)
+        return final_file
+
+    def update_pdb(self, file_format=None):
+        """Update your local copy of the PDB files.
+
+        I guess this is the 'most wanted' function from this module.
+        It gets the weekly lists of new and modified pdb entries and
+        automatically downloads the according PDB files.
+        You can call this module as a weekly cron job.
+        """
+        assert os.path.isdir(self.local_pdb)
+        assert os.path.isdir(self.obsolete_pdb)
+
+        # Deprecation warning
+        file_format = self._print_default_format_warning(file_format)
+
+        new, modified, obsolete = self.get_recent_changes()
+
+        for pdb_code in new + modified:
+            try:
+                self.retrieve_pdb_file(pdb_code, file_format=file_format)
+            except Exception:
+                print("error %s\n" % pdb_code)
+                # you can insert here some more log notes that
+                # something has gone wrong.
+
+        # Move the obsolete files to a special folder
+        for pdb_code in obsolete:
+            if self.flat_tree:
+                old_file = os.path.join(self.local_pdb, "pdb%s.ent" % pdb_code)
+                new_dir = self.obsolete_pdb
+            else:
+                old_file = os.path.join(
+                    self.local_pdb, pdb_code[1:3], "pdb%s.ent" % pdb_code
+                )
+                new_dir = os.path.join(self.obsolete_pdb, pdb_code[1:3])
+            new_file = os.path.join(new_dir, "pdb%s.ent" % pdb_code)
+            if os.path.isfile(old_file):
+                if not os.path.isdir(new_dir):
+                    os.mkdir(new_dir)
+                try:
+                    shutil.move(old_file, new_file)
+                except Exception:
+                    print("Could not move %s to obsolete folder" % old_file)
+            elif os.path.isfile(new_file):
+                if self._verbose:
+                    print("Obsolete file %s already moved" % old_file)
+            else:
+                if self._verbose:
+                    print("Obsolete file %s is missing" % old_file)
+
+    def download_pdb_files(
+        self, pdb_codes, obsolete=False, pdir=None, file_format=None, overwrite=False
+    ):
+        """Fetch set of PDB structure files from the PDB server and stores them locally.
+
+        The PDB structure's file name is returned as a single string.
+        If obsolete ``==`` True, the files will be saved in a special file tree.
+
+        :param pdb_codes: a list of 4-symbols structure Ids from PDB
+        :type pdb_codes: list of strings
+
+        :param file_format:
+            File format. Available options:
+
+            * "mmCif" (default, PDBx/mmCif file),
+            * "pdb" (format PDB),
+            * "xml" (PMDML/XML format),
+            * "mmtf" (highly compressed),
+            * "bundle" (PDB formatted archive for large structure}
+
+        :param overwrite: if set to True, existing structure files will be overwritten. Default: False
+        :type overwrite: bool
+
+        :param obsolete:
+            Has a meaning only for obsolete structures.
+            If True, download the obsolete structure
+            to 'obsolete' folder, otherwise download won't be performed.
+            This option doesn't work for mmtf format as obsoleted structures are not availbe as mmtf.
+            (default: False)
+
+        :type obsolete: bool
+
+        :param pdir: put the file in this directory (default: create a PDB-style directory tree)
+        :type pdir: string
+
+        :return: filenames
+        :rtype: string
+        """
+        # Deprecation warning
+        file_format = self._print_default_format_warning(file_format)
+        for pdb_code in pdb_codes:
+            self.retrieve_pdb_file(
+                pdb_code,
+                obsolete=obsolete,
+                pdir=pdir,
+                file_format=file_format,
+                overwrite=overwrite,
+            )
+
+    def download_entire_pdb(self, listfile=None, file_format=None):
+        """Retrieve all PDB entries not present in the local PDB copy.
+
+        :param listfile: filename to which all PDB codes will be written (optional)
+
+        :param file_format:
+            File format. Available options:
+
+            * "mmCif" (default, PDBx/mmCif file),
+            * "pdb" (format PDB),
+            * "xml" (PMDML/XML format),
+            * "mmtf" (highly compressed),
+            * "bundle" (PDB formatted archive for large structure}
+
+        NOTE. The default download format has changed from PDB to PDBx/mmCif
+        """
+        # Deprecation warning
+        file_format = self._print_default_format_warning(file_format)
+        entries = self.get_all_entries()
+        for pdb_code in entries:
+            self.retrieve_pdb_file(pdb_code, file_format=file_format)
+        # Write the list
+        if listfile:
+            with open(listfile, "w") as outfile:
+                outfile.writelines(x + "\n" for x in entries)
+
+    def download_obsolete_entries(self, listfile=None, file_format=None):
+        """Retrieve all obsolete PDB entries not present in local obsolete PDB copy.
+
+        :param listfile: filename to which all PDB codes will be written (optional)
+
+        :param file_format: file format. Available options:
+            "mmCif" (default, PDBx/mmCif file),
+            "pdb" (format PDB),
+            "xml" (PMDML/XML format),
+
+        NOTE. The default download format has changed from PDB to PDBx/mmCif
+        """
+        # Deprecation warning
+        file_format = self._print_default_format_warning(file_format)
+        entries = self.get_all_obsolete()
+        for pdb_code in entries:
+            self.retrieve_pdb_file(pdb_code, obsolete=True, file_format=file_format)
+
+        # Write the list
+        if listfile:
+            with open(listfile, "w") as outfile:
+                outfile.writelines(x + "\n" for x in entries)
+
+    def get_seqres_file(self, savefile="pdb_seqres.txt"):
+        """Retrieve and save a (big) file containing all the sequences of PDB entries."""
+        if self._verbose:
+            print("Retrieving sequence file (takes over 110 MB).")
+        url = self.pdb_server + "/pub/pdb/derived_data/pdb_seqres.txt"
+        urlretrieve(url, savefile)
+
+
+if __name__ == "__main__":
+
+    doc = """PDBList.py
+    (c) Kristian Rother 2003, Wiktoria Karwicka & Jacek Smietanski 2016
+    Contributed to Biopython
+
+    Usage::
+
+        PDBList.py update <pdb_path> [options]   - write weekly PDB updates to
+                                                   local pdb tree.
+        PDBList.py all    <pdb_path> [options]   - write all PDB entries to
+                                                   local pdb tree.
+        PDBList.py obsol  <pdb_path> [options]   - write all obsolete PDB
+                                                   entries to local pdb tree.
+        PDBList.py <PDB-ID> <pdb_path> [options] - retrieve single structure
+        PDBList.py (<PDB-ID1>,<PDB-ID2>,...) <pdb_path> [options] - retrieve a set
+                                                   of structures
+
+    Options:
+     -d       A single directory will be used as <pdb_path>, not a tree.
+     -o       Overwrite existing structure files.
+     -pdb     Downloads structures in PDB format
+     -xml     Downloads structures in PDBML (XML) format
+     -mmtf    Downloads structures in mmtf format
+
+    Maximum one format can be specified simultaneously (if more selected, only
+    the last will be considered). By default (no format specified) structures are
+    downloaded as PDBx/mmCif files.
+    """
+    print(doc)
+
+    file_format = "mmCif"
+    overwrite = False
+
+    if len(sys.argv) > 2:
+        pdb_path = sys.argv[2]
+        pl = PDBList(pdb=pdb_path)
+        if len(sys.argv) > 3:
+            for option in sys.argv[3:]:
+                if option == "-d":
+                    pl.flat_tree = True
+                elif option == "-o":
+                    overwrite = True
+                elif option in ("-pdb", "-xml", "-mmtf"):
+                    file_format = option[1:]
+    else:
+        pdb_path = os.getcwd()
+        pl = PDBList()
+        pl.flat_tree = True
+
+    if len(sys.argv) > 1:
+        if sys.argv[1] == "update":
+            # update PDB
+            print("updating local PDB at " + pdb_path)
+            pl.update_pdb(file_format=file_format)
+
+        elif sys.argv[1] == "all":
+            # get the entire PDB
+            pl.download_entire_pdb(file_format=file_format)
+
+        elif sys.argv[1] == "obsol":
+            # get all obsolete entries
+            pl.download_obsolete_entries(pdb_path, file_format=file_format)
+
+        elif len(sys.argv[1]) == 4 and sys.argv[1][0].isdigit():
+            # get single PDB entry
+            pl.retrieve_pdb_file(
+                sys.argv[1], pdir=pdb_path, file_format=file_format, overwrite=overwrite
+            )
+
+        elif sys.argv[1][0] == "(":
+            # get a set of PDB entries
+            pdb_ids = re.findall(sys.argv[1], "[0-9A-Za-z]{4}")
+            for pdb_id in pdb_ids:
+                pl.retrieve_pdb_file(
+                    pdb_id, pdir=pdb_path, file_format=file_format, overwrite=overwrite
+                )
diff --git a/code/lib/Bio/PDB/PDBParser.py b/code/lib/Bio/PDB/PDBParser.py
new file mode 100644
index 0000000..e55fef8
--- /dev/null
+++ b/code/lib/Bio/PDB/PDBParser.py
@@ -0,0 +1,425 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Parser for PDB files."""
+
+
+import warnings
+
+try:
+    import numpy
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Install NumPy if you want to use the PDB parser."
+    ) from None
+
+from Bio.File import as_handle
+
+from Bio.PDB.PDBExceptions import PDBConstructionException
+from Bio.PDB.PDBExceptions import PDBConstructionWarning
+
+from Bio.PDB.StructureBuilder import StructureBuilder
+from Bio.PDB.parse_pdb_header import _parse_pdb_header_list
+
+
+# If PDB spec says "COLUMNS 18-20" this means line[17:20]
+
+
+class PDBParser:
+    """Parse a PDB file and return a Structure object."""
+
+    def __init__(
+        self,
+        PERMISSIVE=True,
+        get_header=False,
+        structure_builder=None,
+        QUIET=False,
+        is_pqr=False,
+    ):
+        """Create a PDBParser object.
+
+        The PDB parser call a number of standard methods in an aggregated
+        StructureBuilder object. Normally this object is instanciated by the
+        PDBParser object itself, but if the user provides his/her own
+        StructureBuilder object, the latter is used instead.
+
+        Arguments:
+         - PERMISSIVE - Evaluated as a Boolean. If false, exceptions in
+           constructing the SMCRA data structure are fatal. If true (DEFAULT),
+           the exceptions are caught, but some residues or atoms will be missing.
+           THESE EXCEPTIONS ARE DUE TO PROBLEMS IN THE PDB FILE!.
+         - get_header - unused argument kept for historical compatibilty.
+         - structure_builder - an optional user implemented StructureBuilder class.
+         - QUIET - Evaluated as a Boolean. If true, warnings issued in constructing
+           the SMCRA data will be suppressed. If false (DEFAULT), they will be shown.
+           These warnings might be indicative of problems in the PDB file!
+         - is_pqr - Evaluated as a Boolean. Specifies the type of file to be parsed.
+           If false (DEFAULT) a .pdb file format is assumed. Set it to true if you
+           want to parse a .pqr file instead.
+
+        """
+        # get_header is not used but is left in for API compatibility
+        if structure_builder is not None:
+            self.structure_builder = structure_builder
+        else:
+            self.structure_builder = StructureBuilder()
+        self.header = None
+        self.trailer = None
+        self.line_counter = 0
+        self.PERMISSIVE = bool(PERMISSIVE)
+        self.QUIET = bool(QUIET)
+        self.is_pqr = bool(is_pqr)
+
+    # Public methods
+
+    def get_structure(self, id, file):
+        """Return the structure.
+
+        Arguments:
+         - id - string, the id that will be used for the structure
+         - file - name of the PDB file OR an open filehandle
+
+        """
+        with warnings.catch_warnings():
+            if self.QUIET:
+                warnings.filterwarnings("ignore", category=PDBConstructionWarning)
+
+            self.header = None
+            self.trailer = None
+            # Make a StructureBuilder instance (pass id of structure as parameter)
+            self.structure_builder.init_structure(id)
+
+            with as_handle(file) as handle:
+                lines = handle.readlines()
+                if not lines:
+                    raise ValueError("Empty file.")
+                self._parse(lines)
+
+            self.structure_builder.set_header(self.header)
+            # Return the Structure instance
+            structure = self.structure_builder.get_structure()
+
+        return structure
+
+    def get_header(self):
+        """Return the header."""
+        return self.header
+
+    def get_trailer(self):
+        """Return the trailer."""
+        return self.trailer
+
+    # Private methods
+
+    def _parse(self, header_coords_trailer):
+        """Parse the PDB file (PRIVATE)."""
+        # Extract the header; return the rest of the file
+        self.header, coords_trailer = self._get_header(header_coords_trailer)
+        # Parse the atomic data; return the PDB file trailer
+        self.trailer = self._parse_coordinates(coords_trailer)
+
+    def _get_header(self, header_coords_trailer):
+        """Get the header of the PDB file, return the rest (PRIVATE)."""
+        structure_builder = self.structure_builder
+        i = 0
+        for i in range(0, len(header_coords_trailer)):
+            structure_builder.set_line_counter(i + 1)
+            line = header_coords_trailer[i]
+            record_type = line[0:6]
+            if record_type in ("ATOM  ", "HETATM", "MODEL "):
+                break
+        header = header_coords_trailer[0:i]
+        # Return the rest of the coords+trailer for further processing
+        self.line_counter = i
+        coords_trailer = header_coords_trailer[i:]
+        header_dict = _parse_pdb_header_list(header)
+        return header_dict, coords_trailer
+
+    def _parse_coordinates(self, coords_trailer):
+        """Parse the atomic data in the PDB file (PRIVATE)."""
+        allowed_records = {
+            "ATOM  ",
+            "HETATM",
+            "MODEL ",
+            "ENDMDL",
+            "TER   ",
+            "ANISOU",
+            # These are older 2.3 format specs:
+            "SIGATM",
+            "SIGUIJ",
+            # bookkeeping records after coordinates:
+            "MASTER",
+        }
+
+        local_line_counter = 0
+        structure_builder = self.structure_builder
+        current_model_id = 0
+        # Flag we have an open model
+        model_open = 0
+        current_chain_id = None
+        current_segid = None
+        current_residue_id = None
+        current_resname = None
+
+        for i in range(0, len(coords_trailer)):
+            line = coords_trailer[i].rstrip("\n")
+            record_type = line[0:6]
+            global_line_counter = self.line_counter + local_line_counter + 1
+            structure_builder.set_line_counter(global_line_counter)
+            if not line.strip():
+                continue  # skip empty lines
+            elif record_type == "ATOM  " or record_type == "HETATM":
+                # Initialize the Model - there was no explicit MODEL record
+                if not model_open:
+                    structure_builder.init_model(current_model_id)
+                    current_model_id += 1
+                    model_open = 1
+                fullname = line[12:16]
+                # get rid of whitespace in atom names
+                split_list = fullname.split()
+                if len(split_list) != 1:
+                    # atom name has internal spaces, e.g. " N B ", so
+                    # we do not strip spaces
+                    name = fullname
+                else:
+                    # atom name is like " CA ", so we can strip spaces
+                    name = split_list[0]
+                altloc = line[16]
+                resname = line[17:20].strip()
+                chainid = line[21]
+                try:
+                    serial_number = int(line[6:11])
+                except Exception:
+                    serial_number = 0
+                resseq = int(line[22:26].split()[0])  # sequence identifier
+                icode = line[26]  # insertion code
+                if record_type == "HETATM":  # hetero atom flag
+                    if resname == "HOH" or resname == "WAT":
+                        hetero_flag = "W"
+                    else:
+                        hetero_flag = "H"
+                else:
+                    hetero_flag = " "
+                residue_id = (hetero_flag, resseq, icode)
+                # atomic coordinates
+                try:
+                    x = float(line[30:38])
+                    y = float(line[38:46])
+                    z = float(line[46:54])
+                except Exception:
+                    # Should we allow parsing to continue in permissive mode?
+                    # If so, what coordinates should we default to?  Easier to abort!
+                    raise PDBConstructionException(
+                        "Invalid or missing coordinate(s) at line %i."
+                        % global_line_counter
+                    ) from None
+                coord = numpy.array((x, y, z), "f")
+
+                # occupancy & B factor
+                if not self.is_pqr:
+                    try:
+                        occupancy = float(line[54:60])
+                    except Exception:
+                        self._handle_PDB_exception(
+                            "Invalid or missing occupancy", global_line_counter
+                        )
+                        occupancy = None  # Rather than arbitrary zero or one
+                    if occupancy is not None and occupancy < 0:
+                        # TODO - Should this be an error in strict mode?
+                        # self._handle_PDB_exception("Negative occupancy",
+                        #                            global_line_counter)
+                        # This uses fixed text so the warning occurs once only:
+                        warnings.warn(
+                            "Negative occupancy in one or more atoms",
+                            PDBConstructionWarning,
+                        )
+                    try:
+                        bfactor = float(line[60:66])
+                    except Exception:
+                        self._handle_PDB_exception(
+                            "Invalid or missing B factor", global_line_counter
+                        )
+                        bfactor = 0.0  # PDB uses a default of zero if missing
+
+                elif self.is_pqr:
+                    # Attempt to parse charge and radius fields
+                    try:
+                        pqr_charge = float(line[54:62])
+                    except Exception:
+                        self._handle_PDB_exception(
+                            "Invalid or missing charge", global_line_counter
+                        )
+                        pqr_charge = None  # Rather than arbitrary zero or one
+                    try:
+                        radius = float(line[62:70])
+                    except Exception:
+                        self._handle_PDB_exception(
+                            "Invalid or missing radius", global_line_counter
+                        )
+                        radius = None
+                    if radius is not None and radius < 0:
+                        # In permissive mode raise fatal exception.
+                        message = "Negative atom radius"
+                        self._handle_PDB_exception(message, global_line_counter)
+                        radius = None
+
+                segid = line[72:76]
+                element = line[76:78].strip().upper()
+                if current_segid != segid:
+                    current_segid = segid
+                    structure_builder.init_seg(current_segid)
+                if current_chain_id != chainid:
+                    current_chain_id = chainid
+                    structure_builder.init_chain(current_chain_id)
+                    current_residue_id = residue_id
+                    current_resname = resname
+                    try:
+                        structure_builder.init_residue(
+                            resname, hetero_flag, resseq, icode
+                        )
+                    except PDBConstructionException as message:
+                        self._handle_PDB_exception(message, global_line_counter)
+                elif current_residue_id != residue_id or current_resname != resname:
+                    current_residue_id = residue_id
+                    current_resname = resname
+                    try:
+                        structure_builder.init_residue(
+                            resname, hetero_flag, resseq, icode
+                        )
+                    except PDBConstructionException as message:
+                        self._handle_PDB_exception(message, global_line_counter)
+
+                if not self.is_pqr:
+                    # init atom with pdb fields
+                    try:
+                        structure_builder.init_atom(
+                            name,
+                            coord,
+                            bfactor,
+                            occupancy,
+                            altloc,
+                            fullname,
+                            serial_number,
+                            element,
+                        )
+                    except PDBConstructionException as message:
+                        self._handle_PDB_exception(message, global_line_counter)
+                elif self.is_pqr:
+                    try:
+                        structure_builder.init_atom(
+                            name,
+                            coord,
+                            pqr_charge,
+                            radius,
+                            altloc,
+                            fullname,
+                            serial_number,
+                            element,
+                            pqr_charge,
+                            radius,
+                            self.is_pqr,
+                        )
+                    except PDBConstructionException as message:
+                        self._handle_PDB_exception(message, global_line_counter)
+            elif record_type == "ANISOU":
+                anisou = [
+                    float(x)
+                    for x in (
+                        line[28:35],
+                        line[35:42],
+                        line[43:49],
+                        line[49:56],
+                        line[56:63],
+                        line[63:70],
+                    )
+                ]
+                # U's are scaled by 10^4
+                anisou_array = (numpy.array(anisou, "f") / 10000.0).astype("f")
+                structure_builder.set_anisou(anisou_array)
+            elif record_type == "MODEL ":
+                try:
+                    serial_num = int(line[10:14])
+                except Exception:
+                    self._handle_PDB_exception(
+                        "Invalid or missing model serial number", global_line_counter
+                    )
+                    serial_num = 0
+                structure_builder.init_model(current_model_id, serial_num)
+                current_model_id += 1
+                model_open = 1
+                current_chain_id = None
+                current_residue_id = None
+            elif record_type == "END   " or record_type == "CONECT":
+                # End of atomic data, return the trailer
+                self.line_counter += local_line_counter
+                return coords_trailer[local_line_counter:]
+            elif record_type == "ENDMDL":
+                model_open = 0
+                current_chain_id = None
+                current_residue_id = None
+            elif record_type == "SIGUIJ":
+                # standard deviation of anisotropic B factor
+                siguij = [
+                    float(x)
+                    for x in (
+                        line[28:35],
+                        line[35:42],
+                        line[42:49],
+                        line[49:56],
+                        line[56:63],
+                        line[63:70],
+                    )
+                ]
+                # U sigma's are scaled by 10^4
+                siguij_array = (numpy.array(siguij, "f") / 10000.0).astype("f")
+                structure_builder.set_siguij(siguij_array)
+            elif record_type == "SIGATM":
+                # standard deviation of atomic positions
+                sigatm = [
+                    float(x)
+                    for x in (
+                        line[30:38],
+                        line[38:46],
+                        line[46:54],
+                        line[54:60],
+                        line[60:66],
+                    )
+                ]
+                sigatm_array = numpy.array(sigatm, "f")
+                structure_builder.set_sigatm(sigatm_array)
+            elif record_type not in allowed_records:
+                warnings.warn(
+                    "Ignoring unrecognized record '{}' at line {}".format(
+                        record_type, global_line_counter
+                    ),
+                    PDBConstructionWarning,
+                )
+            local_line_counter += 1
+        # EOF (does not end in END or CONECT)
+        self.line_counter = self.line_counter + local_line_counter
+        return []
+
+    def _handle_PDB_exception(self, message, line_counter):
+        """Handle exception (PRIVATE).
+
+        This method catches an exception that occurs in the StructureBuilder
+        object (if PERMISSIVE), or raises it again, this time adding the
+        PDB line number to the error message.
+        """
+        message = "%s at line %i." % (message, line_counter)
+        if self.PERMISSIVE:
+            # just print a warning - some residues/atoms may be missing
+            warnings.warn(
+                "PDBConstructionException: %s\n"
+                "Exception ignored.\n"
+                "Some atoms or residues may be missing in the data structure."
+                % message,
+                PDBConstructionWarning,
+            )
+        else:
+            # exceptions are fatal - raise again with new message (including line nr)
+            raise PDBConstructionException(message) from None
diff --git a/code/lib/Bio/PDB/PICIO.py b/code/lib/Bio/PDB/PICIO.py
new file mode 100644
index 0000000..d412fa3
--- /dev/null
+++ b/code/lib/Bio/PDB/PICIO.py
@@ -0,0 +1,386 @@
+# Copyright 2019 by Robert T. Miller.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""PICIO: read and write Protein Internal Coordinate (.pic) data files."""
+
+import re
+
+try:
+    import numpy
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Install NumPy to build proteins from internal coordinates."
+    )
+
+from Bio.File import as_handle
+from Bio.PDB.StructureBuilder import StructureBuilder
+from Bio.PDB.parse_pdb_header import _parse_pdb_header_list
+from Bio.PDB.PDBExceptions import PDBException
+
+from Bio.PDB.internal_coords import IC_Residue, IC_Chain, Edron, AtomKey
+
+from typing import Dict, TextIO
+from Bio.PDB.Structure import Structure
+
+
+def read_PIC(file: TextIO, verbose: bool = False) -> Structure:
+    """Load Protein Internal Coordinate (.pic) data from file.
+
+    PIC file format:
+        - comment lines start with #
+        - (optional) PDB HEADER record
+           - idcode and deposition date recommended but optional
+           - deposition date in PDB format or as changed by Biopython
+        - (optional) PDB TITLE record
+        - repeat:
+           - Biopython Residue Full ID - sets residue IDs of returned structure
+           - (optional) PDB N, CA, C ATOM records for chain start
+           - (optional) PIC Hedra records for residue
+           - (optional) PIC Dihedra records for residue
+           - (optional) BFAC records listing AtomKeys and b-factors
+
+    An improvement would define relative positions for HOH (water) entries.
+
+    N.B. dihedron (i-1)C-N-CA-CB is ignored in assembly if O exists.
+
+    C-beta is by default placed using O-C-CA-CB, but O is missing
+    in some PDB file residues, which means the sidechain cannot be
+    placed.  The alternate CB path (i-1)C-N-CA-CB is provided to
+    circumvent this, but if this is needed then it must be adjusted in
+    conjunction with PHI ((i-1)C-N-CA-C) as they overlap.  (i-1)C-N-CA-CB is
+    included by default in .pic files for consistency and informational
+    (e.g. statistics gathering) purposes, as otherwise the dihedron would only
+    appear in the few cases it is needed for.
+
+    :param Bio.File file: file name or handle
+    :param bool verbose: complain when lines not as expected
+    :returns: Biopython Structure object, Residues with .internal_coord attributes
+        but no coordinates except for chain start N, CA, C atoms if supplied,
+        **OR** None on parse fail (silent unless verbose=True)
+
+    """
+    pdb_hdr_re = re.compile(
+        r"^HEADER\s{4}(?P<cf>.{1,40})"
+        r"(?:\s+(?P<dd>\d\d\d\d-\d\d-\d\d|\d\d-\w\w\w-\d\d))?"
+        r"(?:\s+(?P<id>[0-9A-Z]{4}))?\s*$"
+    )
+    # ^\('(?P<pid>\w*)',\s(?P<mdl>\d+),\s'(?P<chn>\w)',\s\('(?P<het>\s|[\w-]+)',\s(?P<pos>\d+),\s'(?P<icode>\s|\w)'\)\)\s(?P<res>[A-Z]{3})\s(\[(?P<segid>[a-zA-z\s]{4})\])?\s*$
+    pdb_ttl_re = re.compile(r"^TITLE\s{5}(?P<ttl>.+)\s*$")
+    biop_id_re = re.compile(
+        r"^\('(?P<pid>[^\s]*)',\s(?P<mdl>\d+),\s"
+        r"'(?P<chn>\s|\w)',\s\('(?P<het>\s|[\w\s-]+)"
+        r"',\s(?P<pos>-?\d+),\s'(?P<icode>\s|\w)'\)\)"
+        r"\s+(?P<res>[\w]{1,3})"
+        r"(\s\[(?P<segid>[a-zA-z\s]+)\])?"
+        r"\s*$"
+    )
+    pdb_atm_re = re.compile(
+        r"^ATOM\s\s(?:\s*(?P<ser>\d+))\s(?P<atm>[\w\s]{4})"
+        r"(?P<alc>\w|\s)(?P<res>[\w]{3})\s(?P<chn>.)"
+        r"(?P<pos>[\s\-\d]{4})(?P<icode>[A-Za-z\s])\s\s\s"
+        r"(?P<x>[\s\-\d\.]{8})(?P<y>[\s\-\d\.]{8})"
+        r"(?P<z>[\s\-\d\.]{8})(?P<occ>[\s\d\.]{6})"
+        r"(?P<tfac>[\s\d\.]{6})\s{6}"
+        r"(?P<segid>[a-zA-z\s]{4})(?P<elm>.{2})"
+        r"(?P<chg>.{2})?\s*$"
+    )
+    bfac_re = re.compile(
+        r"^BFAC:\s([^\s]+\s+[\-\d\.]+)"
+        r"\s*([^\s]+\s+[\-\d\.]+)?"
+        r"\s*([^\s]+\s+[\-\d\.]+)?"
+        r"\s*([^\s]+\s+[\-\d\.]+)?"
+        r"\s*([^\s]+\s+[\-\d\.]+)?"
+    )
+    bfac2_re = re.compile(r"([^\s]+)\s+([\-\d\.]+)")
+    struct_builder = StructureBuilder()
+
+    # init empty header dict
+    # - could use to parse HEADER and TITLE lines except
+    #   deposition_date format changed from original PDB header
+    header_dict = _parse_pdb_header_list([])
+
+    curr_SMCS = [None, None, None, None]  # struct model chain seg
+    SMCS_init = [
+        struct_builder.init_structure,
+        struct_builder.init_model,
+        struct_builder.init_chain,
+        struct_builder.init_seg,
+    ]
+
+    sb_res = None
+
+    with as_handle(file, mode="r") as handle:
+        for aline in handle.readlines():
+            if aline.startswith("#"):
+                pass  # skip comment lines
+            elif aline.startswith("HEADER "):
+                m = pdb_hdr_re.match(aline)
+                if m:
+                    header_dict["head"] = m.group("cf")  # classification
+                    header_dict["idcode"] = m.group("id")
+                    header_dict["deposition_date"] = m.group("dd")
+                elif verbose:
+                    print("Reading pic file", file, "HEADER parse fail: ", aline)
+            elif aline.startswith("TITLE "):
+                m = pdb_ttl_re.match(aline)
+                if m:
+                    header_dict["name"] = m.group("ttl").strip()
+                    # print('TTL: ', m.group('ttl').strip())
+                elif verbose:
+                    print("Reading pic file", file, "TITLE parse fail:, ", aline)
+            elif aline.startswith("("):  # Biopython ID line for Residue
+                m = biop_id_re.match(aline)
+                if m:
+                    # check SMCS = Structure, Model, Chain, SegID
+                    segid = m.group(9)
+                    if segid is None:
+                        segid = "    "
+                    this_SMCS = [m.group(1), int(m.group(2)), m.group(3), segid]
+                    if curr_SMCS != this_SMCS:
+                        # init new SMCS level as needed
+                        for i in range(4):
+                            if curr_SMCS[i] != this_SMCS[i]:
+                                SMCS_init[i](this_SMCS[i])
+                                curr_SMCS[i] = this_SMCS[i]
+                                if 0 == i:
+                                    # 0 = init structure so add header
+                                    struct_builder.set_header(header_dict)
+                                elif 1 == i:
+                                    # new model means new chain and new segid
+                                    curr_SMCS[2] = curr_SMCS[3] = None
+
+                    struct_builder.init_residue(
+                        m.group("res"),
+                        m.group("het"),
+                        int(m.group("pos")),
+                        m.group("icode"),
+                    )
+
+                    sb_res = struct_builder.residue
+                    if 2 == sb_res.is_disordered():
+                        for r in sb_res.child_dict.values():
+                            if not r.internal_coord:
+                                sb_res = r
+                                break
+                    sb_res.internal_coord = IC_Residue(sb_res)
+                    # print('res id:', m.groupdict())
+                    # print(report_IC(struct_builder.get_structure()))
+                else:
+                    if verbose:
+                        print(
+                            "Reading pic file", file, "residue ID parse fail: ", aline
+                        )
+                    return None
+            elif aline.startswith("ATOM "):
+                m = pdb_atm_re.match(aline)
+                if m:
+                    if sb_res is None:
+                        # ATOM without res spec already loaded, not a pic file
+                        if verbose:
+                            print(
+                                "Reading pic file",
+                                file,
+                                "ATOM without residue configured:, ",
+                                aline,
+                            )
+                        return None
+                    if sb_res.resname != m.group("res") or sb_res.id[1] != int(
+                        m.group("pos")
+                    ):
+                        if verbose:
+                            print(
+                                "Reading pic file",
+                                file,
+                                "ATOM not in configured residue (",
+                                sb_res.resname,
+                                str(sb_res.id),
+                                "):",
+                                aline,
+                            )
+                        return None
+                    coord = numpy.array(
+                        (float(m.group("x")), float(m.group("y")), float(m.group("z"))),
+                        "f",
+                    )
+                    struct_builder.init_atom(
+                        m.group("atm").strip(),
+                        coord,
+                        float(m.group("tfac")),
+                        float(m.group("occ")),
+                        m.group("alc"),
+                        m.group("atm"),
+                        int(m.group("ser")),
+                        m.group("elm").strip(),
+                    )
+
+                    # print('atom: ', m.groupdict())
+                # elif verbose:
+                #     print("Reading pic file", file, "ATOM parse fail:", aline)
+            elif aline.startswith("BFAC: "):
+                m = bfac_re.match(aline)
+                if m:
+                    for bfac_pair in m.groups():
+                        if bfac_pair is not None:
+                            m2 = bfac2_re.match(bfac_pair)
+                            if m2 and sb_res is not None and sb_res.internal_coord:
+                                rp = sb_res.internal_coord
+                                rp.bfactors[m2.group(1)] = float(m2.group(2))
+                # else:
+                #    print('Reading pic file', file, 'B-factor line fail: ', aline)
+            else:
+                m = Edron.edron_re.match(aline)
+                if m and sb_res is not None:
+                    sb_res.internal_coord.load_PIC(m.groupdict())
+                elif m:
+                    print(
+                        "PIC file: ",
+                        file,
+                        " error: no residue info before reading (di/h)edron data: ",
+                        aline,
+                    )
+                    return None
+                elif aline.strip():
+                    if verbose:
+                        print("Reading PIC file", file, "parse fail on: .", aline, ".")
+                    return None
+
+    struct = struct_builder.get_structure()
+    for chn in struct.get_chains():
+        chnp = chn.internal_coord = IC_Chain(chn)
+        # done in IC_Chain init : chnp.set_residues()
+        chnp.link_residues()
+        chnp.init_edra()
+
+    # print(report_PIC(struct_builder.get_structure()))
+    return struct
+
+
+def _wpr(entity, fp, pdbid, chainid):
+    if entity.internal_coord:
+        if not chainid or not pdbid:
+            chain = entity.parent
+            if not chainid:
+                chainid = chain.id
+            if not pdbid:
+                struct = chain.parent.parent
+                pdbid = struct.header.get("idcode")
+
+        fp.write(entity.internal_coord.write_PIC(pdbid, chainid))
+    else:
+        fp.write(IC_Residue._residue_string(entity))
+
+
+def _enumerate_entity_atoms(entity):
+    need = False
+    for atm in entity.get_atoms():
+        need = not atm.get_serial_number()
+        break
+    if need:
+        anum = 1
+        for res in entity.get_residues():
+            if 2 == res.is_disordered():
+                for r in res.child_dict.values():
+                    for atm in r.get_unpacked_list():
+                        atm.set_serial_number(anum)
+                        anum += 1
+            else:
+                for atm in res.get_unpacked_list():
+                    atm.set_serial_number(anum)
+                    anum += 1
+
+
+def enumerate_atoms(entity):
+    """Ensure all atoms in entity have serial_number set."""
+    while entity.get_parent():
+        entity = entity.get_parent()  # get to top level
+
+    if "S" == entity.level:
+        for mdl in entity:  # each model starts with 1
+            _enumerate_entity_atoms(mdl)
+    else:  # only Chain or Residue, start with 1
+        _enumerate_entity_atoms(entity)
+
+
+def pdb_date(datestr: str) -> str:
+    """Convert yyyy-mm-dd date to dd-month-yy."""
+    if datestr:
+        m = re.match(r"(\d{4})-(\d{2})-(\d{2})", datestr)
+        if m:
+            mo = [
+                "XXX",
+                "JAN",
+                "FEB",
+                "MAR",
+                "APR",
+                "MAY",
+                "JUN",
+                "JUL",
+                "AUG",
+                "SEP",
+                "OCT",
+                "NOV",
+                "DEC",
+            ][int(m.group(2))]
+            datestr = m.group(3) + "-" + mo + "-" + m.group(1)[-2:]
+    return datestr
+
+
+def write_PIC(entity, file, pdbid=None, chainid=None):
+    """Write Protein Internal Coordinates (PIC) to file.
+
+    See read_PIC() for file format.  Recurses to lower entity levels (M, C, R).
+
+    :param Entity entity: Biopython PDB Entity object: S, M, C or R
+    :param Bio.File file: file name or handle
+    :param str pdbid: PDB idcode, read from entity if not supplied
+    :param char chainid: PDB Chain ID, set from C level entity.id if needed
+    :raises PDBException: if entity level not S, M, C, or R
+    :raises Exception: if entity does not have .level attribute
+    """
+    enumerate_atoms(entity)
+
+    with as_handle(file, "w") as fp:
+        try:
+            if "A" == entity.level:
+                raise PDBException("No PIC output at Atom level")
+            elif "R" == entity.level:
+                if 2 == entity.is_disordered():
+                    for r in entity.child_dict.values():
+                        _wpr(r, fp, pdbid, chainid)
+                else:
+                    _wpr(entity, fp, pdbid, chainid)
+            elif "C" == entity.level:
+                if not chainid:
+                    chainid = entity.id
+                for res in entity:
+                    write_PIC(res, fp, pdbid, chainid)
+            elif "M" == entity.level:
+                for chn in entity:
+                    write_PIC(chn, fp, pdbid, chainid)
+            elif "S" == entity.level:
+                if not pdbid:
+                    pdbid = entity.header.get("idcode", None)
+                hdr = entity.header.get("head", None)
+                dd = pdb_date(entity.header.get("deposition_date", None))
+                if hdr:
+                    fp.write(
+                        ("HEADER    {:40}{:8}   {:4}\n").format(
+                            hdr.upper(), (dd or ""), (pdbid or "")
+                        )
+                    )
+                nam = entity.header.get("name", None)
+                if nam:
+                    fp.write("TITLE     " + nam.upper() + "\n")
+                for mdl in entity:
+                    write_PIC(mdl, fp, pdbid, chainid)
+            else:
+                raise PDBException("Cannot identify level: " + str(entity.level))
+        except KeyError:
+            raise Exception(
+                "write_PIC: argument is not a Biopython PDB Entity " + str(entity)
+            )
diff --git a/code/lib/Bio/PDB/PSEA.py b/code/lib/Bio/PDB/PSEA.py
new file mode 100644
index 0000000..8d2621d
--- /dev/null
+++ b/code/lib/Bio/PDB/PSEA.py
@@ -0,0 +1,118 @@
+# Copyright (C) 2006, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Wrappers for PSEA, a program for secondary structure assignment.
+
+See this citation for P-SEA, PMID: 9183534
+
+Labesse G, Colloc'h N, Pothier J, Mornon J-P:  P-SEA: a new efficient
+assignment of secondary structure from C_alpha.
+Comput Appl Biosci 1997 , 13:291-295
+
+ftp://ftp.lmcp.jussieu.fr/pub/sincris/software/protein/p-sea/
+"""
+
+import subprocess
+
+from Bio.PDB.Polypeptide import is_aa
+
+
+def run_psea(fname):
+    """Run PSEA and return output filename.
+
+    Note that this assumes the P-SEA binary is called "psea" and that it is
+    on the path.
+
+    Note that P-SEA will write an output file in the current directory using
+    the input filename with extension ".sea".
+
+    Note that P-SEA will write output to the terminal while run.
+    """
+    subprocess.call(["psea", fname])
+    last = fname.split("/")[-1]
+    base = last.split(".")[0]
+    return base + ".sea"
+
+
+def psea(pname):
+    """Parse PSEA output file."""
+    fname = run_psea(pname)
+    start = 0
+    ss = ""
+    with open(fname) as fp:
+        for l in fp:
+            if l[0:6] == ">p-sea":
+                start = 1
+                continue
+            if not start:
+                continue
+            if l[0] == "\n":
+                break
+            ss = ss + l[0:-1]
+    return ss
+
+
+def psea2HEC(pseq):
+    """Translate PSEA secondary structure string into HEC."""
+    seq = []
+    for ss in pseq:
+        if ss == "a":
+            n = "H"
+        elif ss == "b":
+            n = "E"
+        elif ss == "c":
+            n = "C"
+        seq.append(n)
+    return seq
+
+
+def annotate(m, ss_seq):
+    """Apply secondary structure information to residues in model."""
+    c = m.get_list()[0]
+    all = c.get_list()
+    residues = []
+    # Now remove HOH etc.
+    for res in all:
+        if is_aa(res):
+            residues.append(res)
+    L = len(residues)
+    if not L == len(ss_seq):
+        raise ValueError("Length mismatch %i %i" % (L, len(ss_seq)))
+    for i in range(0, L):
+        residues[i].xtra["SS_PSEA"] = ss_seq[i]
+    # subprocess.call(["rm", fname])
+
+
+class PSEA:
+    """Define PSEA class.
+
+    PSEA object is a wrapper to PSEA program for secondary structure assignment.
+    """
+
+    def __init__(self, model, filename):
+        """Initialize the class."""
+        ss_seq = psea(filename)
+        ss_seq = psea2HEC(ss_seq)
+        annotate(model, ss_seq)
+        self.ss_seq = ss_seq
+
+    def get_seq(self):
+        """Return secondary structure string."""
+        return self.ss_seq
+
+
+if __name__ == "__main__":
+
+    import sys
+    from Bio.PDB import PDBParser
+
+    # Parse PDB file
+    p = PDBParser()
+    s = p.get_structure("X", sys.argv[1])
+
+    # Annotate structure with PSEA secondary structure info
+    PSEA(s[0], sys.argv[1])
diff --git a/code/lib/Bio/PDB/Polypeptide.py b/code/lib/Bio/PDB/Polypeptide.py
new file mode 100644
index 0000000..e74eb0d
--- /dev/null
+++ b/code/lib/Bio/PDB/Polypeptide.py
@@ -0,0 +1,479 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Polypeptide-related classes (construction and representation).
+
+Simple example with multiple chains,
+
+    >>> from Bio.PDB.PDBParser import PDBParser
+    >>> from Bio.PDB.Polypeptide import PPBuilder
+    >>> structure = PDBParser().get_structure('2BEG', 'PDB/2BEG.pdb')
+    >>> ppb=PPBuilder()
+    >>> for pp in ppb.build_peptides(structure):
+    ...     print(pp.get_sequence())
+    LVFFAEDVGSNKGAIIGLMVGGVVIA
+    LVFFAEDVGSNKGAIIGLMVGGVVIA
+    LVFFAEDVGSNKGAIIGLMVGGVVIA
+    LVFFAEDVGSNKGAIIGLMVGGVVIA
+    LVFFAEDVGSNKGAIIGLMVGGVVIA
+
+Example with non-standard amino acids using HETATM lines in the PDB file,
+in this case selenomethionine (MSE):
+
+    >>> from Bio.PDB.PDBParser import PDBParser
+    >>> from Bio.PDB.Polypeptide import PPBuilder
+    >>> structure = PDBParser().get_structure('1A8O', 'PDB/1A8O.pdb')
+    >>> ppb=PPBuilder()
+    >>> for pp in ppb.build_peptides(structure):
+    ...     print(pp.get_sequence())
+    DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW
+    TETLLVQNANPDCKTILKALGPGATLEE
+    TACQG
+
+If you want to, you can include non-standard amino acids in the peptides:
+
+    >>> for pp in ppb.build_peptides(structure, aa_only=False):
+    ...     print(pp.get_sequence())
+    ...     print("%s %s" % (pp.get_sequence()[0], pp[0].get_resname()))
+    ...     print("%s %s" % (pp.get_sequence()[-7], pp[-7].get_resname()))
+    ...     print("%s %s" % (pp.get_sequence()[-6], pp[-6].get_resname()))
+    MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG
+    M MSE
+    M MSE
+    M MSE
+
+In this case the selenomethionines (the first and also seventh and sixth from
+last residues) have been shown as M (methionine) by the get_sequence method.
+"""
+
+import warnings
+
+from Bio.Data import SCOPData
+from Bio.Seq import Seq
+from Bio.PDB.PDBExceptions import PDBException
+from Bio.PDB.vectors import calc_dihedral, calc_angle
+
+
+standard_aa_names = [
+    "ALA",
+    "CYS",
+    "ASP",
+    "GLU",
+    "PHE",
+    "GLY",
+    "HIS",
+    "ILE",
+    "LYS",
+    "LEU",
+    "MET",
+    "ASN",
+    "PRO",
+    "GLN",
+    "ARG",
+    "SER",
+    "THR",
+    "VAL",
+    "TRP",
+    "TYR",
+]
+
+
+aa1 = "ACDEFGHIKLMNPQRSTVWY"
+aa3 = standard_aa_names
+
+d1_to_index = {}
+dindex_to_1 = {}
+d3_to_index = {}
+dindex_to_3 = {}
+
+# Create some lookup tables
+for i in range(0, 20):
+    n1 = aa1[i]
+    n3 = aa3[i]
+    d1_to_index[n1] = i
+    dindex_to_1[i] = n1
+    d3_to_index[n3] = i
+    dindex_to_3[i] = n3
+
+
+def index_to_one(index):
+    """Index to corresponding one letter amino acid name.
+
+    >>> index_to_one(0)
+    'A'
+    >>> index_to_one(19)
+    'Y'
+    """
+    return dindex_to_1[index]
+
+
+def one_to_index(s):
+    """One letter code to index.
+
+    >>> one_to_index('A')
+    0
+    >>> one_to_index('Y')
+    19
+    """
+    return d1_to_index[s]
+
+
+def index_to_three(i):
+    """Index to corresponding three letter amino acid name.
+
+    >>> index_to_three(0)
+    'ALA'
+    >>> index_to_three(19)
+    'TYR'
+    """
+    return dindex_to_3[i]
+
+
+def three_to_index(s):
+    """Three letter code to index.
+
+    >>> three_to_index('ALA')
+    0
+    >>> three_to_index('TYR')
+    19
+    """
+    return d3_to_index[s]
+
+
+def three_to_one(s):
+    """Three letter code to one letter code.
+
+    >>> three_to_one('ALA')
+    'A'
+    >>> three_to_one('TYR')
+    'Y'
+
+    For non-standard amino acids, you get a KeyError:
+
+    >>> three_to_one('MSE')
+    Traceback (most recent call last):
+       ...
+    KeyError: 'MSE'
+    """
+    i = d3_to_index[s]
+    return dindex_to_1[i]
+
+
+def one_to_three(s):
+    """One letter code to three letter code.
+
+    >>> one_to_three('A')
+    'ALA'
+    >>> one_to_three('Y')
+    'TYR'
+    """
+    i = d1_to_index[s]
+    return dindex_to_3[i]
+
+
+def is_aa(residue, standard=False):
+    """Return True if residue object/string is an amino acid.
+
+    :param residue: a L{Residue} object OR a three letter amino acid code
+    :type residue: L{Residue} or string
+
+    :param standard: flag to check for the 20 AA (default false)
+    :type standard: boolean
+
+    >>> is_aa('ALA')
+    True
+
+    Known three letter codes for modified amino acids are supported,
+
+    >>> is_aa('FME')
+    True
+    >>> is_aa('FME', standard=True)
+    False
+    """
+    # TODO - What about special cases like XXX, can they appear in PDB files?
+    if not isinstance(residue, str):
+        residue = residue.get_resname()
+    residue = residue.upper()
+    if standard:
+        return residue in d3_to_index
+    else:
+        return residue in SCOPData.protein_letters_3to1
+
+
+class Polypeptide(list):
+    """A polypeptide is simply a list of L{Residue} objects."""
+
+    def get_ca_list(self):
+        """Get list of C-alpha atoms in the polypeptide.
+
+        :return: the list of C-alpha atoms
+        :rtype: [L{Atom}, L{Atom}, ...]
+        """
+        ca_list = []
+        for res in self:
+            ca = res["CA"]
+            ca_list.append(ca)
+        return ca_list
+
+    def get_phi_psi_list(self):
+        """Return the list of phi/psi dihedral angles."""
+        ppl = []
+        lng = len(self)
+        for i in range(0, lng):
+            res = self[i]
+            try:
+                n = res["N"].get_vector()
+                ca = res["CA"].get_vector()
+                c = res["C"].get_vector()
+            except Exception:
+                # Some atoms are missing
+                # Phi/Psi cannot be calculated for this residue
+                ppl.append((None, None))
+                res.xtra["PHI"] = None
+                res.xtra["PSI"] = None
+                continue
+            # Phi
+            if i > 0:
+                rp = self[i - 1]
+                try:
+                    cp = rp["C"].get_vector()
+                    phi = calc_dihedral(cp, n, ca, c)
+                except Exception:
+                    phi = None
+            else:
+                # No phi for residue 0!
+                phi = None
+            # Psi
+            if i < (lng - 1):
+                rn = self[i + 1]
+                try:
+                    nn = rn["N"].get_vector()
+                    psi = calc_dihedral(n, ca, c, nn)
+                except Exception:
+                    psi = None
+            else:
+                # No psi for last residue!
+                psi = None
+            ppl.append((phi, psi))
+            # Add Phi/Psi to xtra dict of residue
+            res.xtra["PHI"] = phi
+            res.xtra["PSI"] = psi
+        return ppl
+
+    def get_tau_list(self):
+        """List of tau torsions angles for all 4 consecutive Calpha atoms."""
+        ca_list = self.get_ca_list()
+        tau_list = []
+        for i in range(0, len(ca_list) - 3):
+            atom_list = (ca_list[i], ca_list[i + 1], ca_list[i + 2], ca_list[i + 3])
+            v1, v2, v3, v4 = [a.get_vector() for a in atom_list]
+            tau = calc_dihedral(v1, v2, v3, v4)
+            tau_list.append(tau)
+            # Put tau in xtra dict of residue
+            res = ca_list[i + 2].get_parent()
+            res.xtra["TAU"] = tau
+        return tau_list
+
+    def get_theta_list(self):
+        """List of theta angles for all 3 consecutive Calpha atoms."""
+        theta_list = []
+        ca_list = self.get_ca_list()
+        for i in range(0, len(ca_list) - 2):
+            atom_list = (ca_list[i], ca_list[i + 1], ca_list[i + 2])
+            v1, v2, v3 = [a.get_vector() for a in atom_list]
+            theta = calc_angle(v1, v2, v3)
+            theta_list.append(theta)
+            # Put tau in xtra dict of residue
+            res = ca_list[i + 1].get_parent()
+            res.xtra["THETA"] = theta
+        return theta_list
+
+    def get_sequence(self):
+        """Return the AA sequence as a Seq object.
+
+        :return: polypeptide sequence
+        :rtype: L{Seq}
+        """
+        s = "".join(
+            SCOPData.protein_letters_3to1.get(res.get_resname(), "X") for res in self
+        )
+        return Seq(s)
+
+    def __repr__(self):
+        """Return string representation of the polypeptide.
+
+        Return <Polypeptide start=START end=END>, where START
+        and END are sequence identifiers of the outer residues.
+        """
+        start = self[0].get_id()[1]
+        end = self[-1].get_id()[1]
+        return "<Polypeptide start=%s end=%s>" % (start, end)
+
+
+class _PPBuilder:
+    """Base class to extract polypeptides.
+
+    It checks if two consecutive residues in a chain are connected.
+    The connectivity test is implemented by a subclass.
+
+    This assumes you want both standard and non-standard amino acids.
+    """
+
+    def __init__(self, radius):
+        """Initialize the base class.
+
+        :param radius: distance
+        :type radius: float
+        """
+        self.radius = radius
+
+    def _accept(self, residue, standard_aa_only):
+        """Check if the residue is an amino acid (PRIVATE)."""
+        if is_aa(residue, standard=standard_aa_only):
+            return True
+        elif not standard_aa_only and "CA" in residue.child_dict:
+            # It has an alpha carbon...
+            # We probably need to update the hard coded list of
+            # non-standard residues, see function is_aa for details.
+            warnings.warn(
+                "Assuming residue %s is an unknown modified amino acid"
+                % residue.get_resname()
+            )
+            return True
+        else:
+            # not a standard AA so skip
+            return False
+
+    def build_peptides(self, entity, aa_only=1):
+        """Build and return a list of Polypeptide objects.
+
+        :param entity: polypeptides are searched for in this object
+        :type entity: L{Structure}, L{Model} or L{Chain}
+
+        :param aa_only: if 1, the residue needs to be a standard AA
+        :type aa_only: int
+        """
+        is_connected = self._is_connected
+        accept = self._accept
+        level = entity.get_level()
+        # Decide which entity we are dealing with
+        if level == "S":
+            model = entity[0]
+            chain_list = model.get_list()
+        elif level == "M":
+            chain_list = entity.get_list()
+        elif level == "C":
+            chain_list = [entity]
+        else:
+            raise PDBException("Entity should be Structure, Model or Chain.")
+        pp_list = []
+        for chain in chain_list:
+            chain_it = iter(chain)
+            try:
+                prev_res = next(chain_it)
+                while not accept(prev_res, aa_only):
+                    prev_res = next(chain_it)
+            except StopIteration:
+                # No interesting residues at all in this chain
+                continue
+            pp = None
+            for next_res in chain_it:
+                if (
+                    accept(prev_res, aa_only)
+                    and accept(next_res, aa_only)
+                    and is_connected(prev_res, next_res)
+                ):
+                    if pp is None:
+                        pp = Polypeptide()
+                        pp.append(prev_res)
+                        pp_list.append(pp)
+                    pp.append(next_res)
+                else:
+                    # Either too far apart, or one of the residues is unwanted.
+                    # End the current peptide
+                    pp = None
+                prev_res = next_res
+        return pp_list
+
+
+class CaPPBuilder(_PPBuilder):
+    """Use CA--CA distance to find polypeptides."""
+
+    def __init__(self, radius=4.3):
+        """Initialize the class."""
+        _PPBuilder.__init__(self, radius)
+
+    def _is_connected(self, prev_res, next_res):
+        for r in [prev_res, next_res]:
+            if not r.has_id("CA"):
+                return False
+        n = next_res["CA"]
+        p = prev_res["CA"]
+        # Unpack disordered
+        if n.is_disordered():
+            nlist = n.disordered_get_list()
+        else:
+            nlist = [n]
+        if p.is_disordered():
+            plist = p.disordered_get_list()
+        else:
+            plist = [p]
+        for nn in nlist:
+            for pp in plist:
+                if (nn - pp) < self.radius:
+                    return True
+        return False
+
+
+class PPBuilder(_PPBuilder):
+    """Use C--N distance to find polypeptides."""
+
+    def __init__(self, radius=1.8):
+        """Initialize the class."""
+        _PPBuilder.__init__(self, radius)
+
+    def _is_connected(self, prev_res, next_res):
+        if not prev_res.has_id("C"):
+            return False
+        if not next_res.has_id("N"):
+            return False
+        test_dist = self._test_dist
+        c = prev_res["C"]
+        n = next_res["N"]
+        # Test all disordered atom positions!
+        if c.is_disordered():
+            clist = c.disordered_get_list()
+        else:
+            clist = [c]
+        if n.is_disordered():
+            nlist = n.disordered_get_list()
+        else:
+            nlist = [n]
+        for nn in nlist:
+            for cc in clist:
+                # To form a peptide bond, N and C must be
+                # within radius and have the same altloc
+                # identifier or one altloc blank
+                n_altloc = nn.get_altloc()
+                c_altloc = cc.get_altloc()
+                if n_altloc == c_altloc or n_altloc == " " or c_altloc == " ":
+                    if test_dist(nn, cc):
+                        # Select the disordered atoms that
+                        # are indeed bonded
+                        if c.is_disordered():
+                            c.disordered_select(c_altloc)
+                        if n.is_disordered():
+                            n.disordered_select(n_altloc)
+                        return True
+        return False
+
+    def _test_dist(self, c, n):
+        """Return 1 if distance between atoms<radius (PRIVATE)."""
+        if (c - n) < self.radius:
+            return 1
+        else:
+            return 0
diff --git a/code/lib/Bio/PDB/QCPSuperimposer/__init__.py b/code/lib/Bio/PDB/QCPSuperimposer/__init__.py
new file mode 100644
index 0000000..0b022df
--- /dev/null
+++ b/code/lib/Bio/PDB/QCPSuperimposer/__init__.py
@@ -0,0 +1,165 @@
+# Copyright (C) 2015, Anuj Sharma (anuj.sharma80@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Structural alignment using Quaternion Characteristic Polynomial (QCP).
+
+QCPSuperimposer finds the best rotation and translation to put
+two point sets on top of each other (minimizing the RMSD). This is
+eg. useful to superimpose crystal structures. QCP stands for
+Quaternion Characteristic Polynomial, which is used in the algorithm.
+"""
+
+
+from numpy import dot, sqrt, array, inner
+from .qcprotmodule import FastCalcRMSDAndRotation
+
+
+class QCPSuperimposer:
+    """Quaternion Characteristic Polynomial (QCP) Superimposer.
+
+    QCPSuperimposer finds the best rotation and translation to put
+    two point sets on top of each other (minimizing the RMSD). This is
+    eg. useful to superimposing 3D structures of proteins.
+
+    QCP stands for Quaternion Characteristic Polynomial, which is used
+    in the algorithm.
+
+    Reference:
+
+    Douglas L Theobald (2005), "Rapid calculation of RMSDs using a
+    quaternion-based characteristic polynomial.", Acta Crystallogr
+    A 61(4):478-480
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self._clear()
+
+    # Private methods
+
+    def _clear(self):
+        self.reference_coords = None
+        self.coords = None
+        self.transformed_coords = None
+        self.rot = None
+        self.tran = None
+        self.rms = None
+        self.init_rms = None
+
+    def _rms(self, coords1, coords2):
+        """Return rms deviations between coords1 and coords2 (PRIVATE)."""
+        diff = coords1 - coords2
+        return sqrt(sum(dot(diff, diff)) / coords1.shape[0])
+
+    def _inner_product(self, coords1, coords2):
+        G1 = inner(coords1, coords1).diagonal().sum()
+        G2 = inner(coords2, coords2).diagonal().sum()
+        A = dot(coords1.T, coords2)
+        return ((G1 + G2) / 2, A)
+
+    def _align(self, centered_coords1, centered_coords2):
+        (E0, A) = self._inner_product(centered_coords1, centered_coords2)
+        (
+            rmsd,
+            r0,
+            r1,
+            r2,
+            r3,
+            r4,
+            r5,
+            r6,
+            r7,
+            r8,
+            q1,
+            q2,
+            q3,
+            q4,
+        ) = FastCalcRMSDAndRotation(
+            A[0][0],
+            A[0][1],
+            A[0][2],
+            A[1][0],
+            A[1][1],
+            A[1][2],
+            A[2][0],
+            A[2][1],
+            A[2][2],
+            E0,
+            len(centered_coords1),
+            -1.0,
+        )
+        rot = array([r0, r1, r2, r3, r4, r5, r6, r7, r8]).reshape(3, 3)
+        return (rmsd, rot.T, [q1, q2, q3, q4])
+
+    # Public methods
+
+    def set(self, reference_coords, coords):
+        """Set the coordinates to be superimposed.
+
+        coords will be put on top of reference_coords.
+
+        - reference_coords: an NxDIM array
+        - coords: an NxDIM array
+
+        DIM is the dimension of the points, N is the number
+        of points to be superimposed.
+        """
+        # clear everything from previous runs
+        self._clear()
+        # store cordinates
+        self.reference_coords = reference_coords
+        self.coords = coords
+        n = reference_coords.shape
+        m = coords.shape
+        if n != m or n[1] != 3 or m[1] != 3:
+            raise Exception("Coordinate number/dimension mismatch.")
+        self.n = n[0]
+
+    def run(self):
+        """Superimpose the coordinate sets."""
+        if self.coords is None or self.reference_coords is None:
+            raise Exception("No coordinates set.")
+        coords = self.coords
+        reference_coords = self.reference_coords
+        # center on centroid
+        av1 = sum(coords) / self.n
+        av2 = sum(reference_coords) / self.n
+        coords = coords - av1
+        reference_coords = reference_coords - av2
+        #
+        (self.rms, self.rot, self.lquart) = self._align(coords, reference_coords)
+        self.tran = av2 - dot(av1, self.rot)
+
+    def get_transformed(self):
+        """Get the transformed coordinate set."""
+        if self.coords is None or self.reference_coords is None:
+            raise Exception("No coordinates set.")
+        if self.rot is None:
+            raise Exception("Nothing superimposed yet.")
+        if self.transformed_coords is None:
+            self.transformed_coords = dot(self.coords, self.rot) + self.tran
+        return self.transformed_coords
+
+    def get_rotran(self):
+        """Right multiplying rotation matrix and translation."""
+        if self.rot is None:
+            raise Exception("Nothing superimposed yet.")
+        return self.rot, self.tran
+
+    def get_init_rms(self):
+        """Root mean square deviation of untransformed coordinates."""
+        if self.coords is None:
+            raise Exception("No coordinates set yet.")
+        if self.init_rms is None:
+            self.init_rms = self._rms(self.coords, self.reference_coords)
+        return self.init_rms
+
+    def get_rms(self):
+        """Root mean square deviation of superimposed coordinates."""
+        if self.rms is None:
+            raise Exception("Nothing superimposed yet.")
+        return self.rms
diff --git a/code/lib/Bio/PDB/QCPSuperimposer/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/PDB/QCPSuperimposer/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..97d0fbc
Binary files /dev/null and b/code/lib/Bio/PDB/QCPSuperimposer/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/QCPSuperimposer/qcprotmodule.c b/code/lib/Bio/PDB/QCPSuperimposer/qcprotmodule.c
new file mode 100644
index 0000000..63e757d
--- /dev/null
+++ b/code/lib/Bio/PDB/QCPSuperimposer/qcprotmodule.c
@@ -0,0 +1,250 @@
+/**
+ * The file contains implementation of the fast rmsd calculation method. The
+ * sources is a marginally modified version of the implementation available
+ * at theobald.brandeis.edu/qcp/ (qcprot.c). 
+ *
+ *********************************************************************************
+ * NOTE: following changes have been made to the original (qcprot.c):
+ *  - introduced python bindings: change in method signature and return types
+ *  - removed methods that are not used by this module (only one method retained)
+ *********************************************************************************
+ *
+ *  Copyright (c) 2009-2013 Pu Liu and Douglas L. Theobald
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without modification, are permitted
+ *  provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice, this list of
+ *    conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice, this list
+ *    of conditions and the following disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *  * Neither the name of the <ORGANIZATION> nor the names of its contributors may be used to
+ *    endorse or promote products derived from this software without specific prior written
+ *    permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ *
+ */
+#include <math.h>
+#include <Python.h>
+
+static PyObject* py_FastCalcRMSDAndRotation(PyObject* self, PyObject* args) {
+	double Sxx, Sxy, Sxz, Syx, Syy, Syz, Szx, Szy, Szz;
+	double E0;
+	double len;
+	double minScore;
+	double Szz2, Syy2, Sxx2, Sxy2, Syz2, Sxz2, Syx2, Szy2, Szx2, SyzSzymSyySzz2,
+			Sxx2Syy2Szz2Syz2Szy2, Sxy2Sxz2Syx2Szx2, SxzpSzx, SyzpSzy, SxypSyx,
+			SyzmSzy, SxzmSzx, SxymSyx, SxxpSyy, SxxmSyy;
+	double C[4], rot[9];
+	int i;
+	double mxEigenV, rmsd;
+	double oldg = 0.0;
+	double b, a, delta, rms, qsqr;
+	double q1, q2, q3, q4, normq;
+	double a11, a12, a13, a14, a21, a22, a23, a24;
+	double a31, a32, a33, a34, a41, a42, a43, a44;
+	double a2, x2, y2, z2;
+	double xy, az, zx, ay, yz, ax;
+	double a3344_4334, a3244_4234, a3243_4233, a3143_4133, a3144_4134,
+			a3142_4132;
+	double evecprec = 1e-6;
+	double evalprec = 1e-11;
+
+	/* parse the arguments  */
+	PyArg_ParseTuple(args, "dddddddddddd", &Sxx, &Sxy, &Sxz, &Syx, &Syy, &Syz, &Szx, &Szy, &Szz, &E0, &len, &minScore);
+
+	Sxx2 = Sxx * Sxx;
+	Syy2 = Syy * Syy;
+	Szz2 = Szz * Szz;
+
+	Sxy2 = Sxy * Sxy;
+	Syz2 = Syz * Syz;
+	Sxz2 = Sxz * Sxz;
+
+	Syx2 = Syx * Syx;
+	Szy2 = Szy * Szy;
+	Szx2 = Szx * Szx;
+
+	SyzSzymSyySzz2 = 2.0 * (Syz * Szy - Syy * Szz);
+	Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2;
+
+	C[2] = -2.0
+			* (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2);
+	C[1] = 8.0
+			* (Sxx * Syz * Szy + Syy * Szx * Sxz + Szz * Sxy * Syx
+					- Sxx * Syy * Szz - Syz * Szx * Sxy - Szy * Syx * Sxz);
+
+	SxzpSzx = Sxz + Szx;
+	SyzpSzy = Syz + Szy;
+	SxypSyx = Sxy + Syx;
+	SyzmSzy = Syz - Szy;
+	SxzmSzx = Sxz - Szx;
+	SxymSyx = Sxy - Syx;
+	SxxpSyy = Sxx + Syy;
+	SxxmSyy = Sxx - Syy;
+	Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2;
+
+	C[0] = Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2
+			+ (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2)
+					* (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2)
+			+ (-(SxzpSzx) * (SyzmSzy) + (SxymSyx) * (SxxmSyy - Szz))
+					* (-(SxzmSzx) * (SyzpSzy) + (SxymSyx) * (SxxmSyy + Szz))
+			+ (-(SxzpSzx) * (SyzpSzy) - (SxypSyx) * (SxxpSyy - Szz))
+					* (-(SxzmSzx) * (SyzmSzy) - (SxypSyx) * (SxxpSyy + Szz))
+			+ (+(SxypSyx) * (SyzpSzy) + (SxzpSzx) * (SxxmSyy + Szz))
+					* (-(SxymSyx) * (SyzmSzy) + (SxzpSzx) * (SxxpSyy + Szz))
+			+ (+(SxypSyx) * (SyzmSzy) + (SxzmSzx) * (SxxmSyy - Szz))
+					* (-(SxymSyx) * (SyzpSzy) + (SxzmSzx) * (SxxpSyy - Szz));
+
+	mxEigenV = E0;
+	for (i = 0; i < 50; ++i) {
+		oldg = mxEigenV;
+		x2 = mxEigenV * mxEigenV;
+		b = (x2 + C[2]) * mxEigenV;
+		a = b + C[1];
+		delta = ((a * mxEigenV + C[0]) / (2.0 * x2 * mxEigenV + b + a));
+		mxEigenV -= delta;
+		/* printf("\n diff[%3d]: %16g %16g %16g", i, mxEigenV - oldg, evalprec*mxEigenV, mxEigenV); */
+		if (fabs(mxEigenV - oldg) < fabs(evalprec * mxEigenV))
+			break;
+	}
+
+	if (i == 50)
+		PySys_WriteStderr("\nMore than %d iterations needed!\n", i);
+
+	/* the fabs() is to guard against extremely small, but *negative* numbers due to floating point error */
+	rms = sqrt(fabs(2.0 * (E0 - mxEigenV) / len));
+	rmsd = rms;
+	/* printf("\n\n %16g %16g %16g \n", rms, E0, 2.0 * (E0 - mxEigenV)/len); */
+
+	if (minScore > 0) {
+		if (rms < minScore)  {
+			return Py_BuildValue("dddddddddddddd", -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+		}
+	}
+
+	a11 = SxxpSyy + Szz - mxEigenV;
+	a12 = SyzmSzy;
+	a13 = -SxzmSzx;
+	a14 = SxymSyx;
+	a21 = SyzmSzy;
+	a22 = SxxmSyy - Szz - mxEigenV;
+	a23 = SxypSyx;
+	a24 = SxzpSzx;
+	a31 = a13;
+	a32 = a23;
+	a33 = Syy - Sxx - Szz - mxEigenV;
+	a34 = SyzpSzy;
+	a41 = a14;
+	a42 = a24;
+	a43 = a34;
+	a44 = Szz - SxxpSyy - mxEigenV;
+	a3344_4334 = a33 * a44 - a43 * a34;
+	a3244_4234 = a32 * a44 - a42 * a34;
+	a3243_4233 = a32 * a43 - a42 * a33;
+	a3143_4133 = a31 * a43 - a41 * a33;
+	a3144_4134 = a31 * a44 - a41 * a34;
+	a3142_4132 = a31 * a42 - a41 * a32;
+	q1 = a22 * a3344_4334 - a23 * a3244_4234 + a24 * a3243_4233;
+	q2 = -a21 * a3344_4334 + a23 * a3144_4134 - a24 * a3143_4133;
+	q3 = a21 * a3244_4234 - a22 * a3144_4134 + a24 * a3142_4132;
+	q4 = -a21 * a3243_4233 + a22 * a3143_4133 - a23 * a3142_4132;
+	
+	qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4;
+
+	if (qsqr < evecprec) {
+		q1 = a12 * a3344_4334 - a13 * a3244_4234 + a14 * a3243_4233;
+		q2 = -a11 * a3344_4334 + a13 * a3144_4134 - a14 * a3143_4133;
+		q3 = a11 * a3244_4234 - a12 * a3144_4134 + a14 * a3142_4132;
+		q4 = -a11 * a3243_4233 + a12 * a3143_4133 - a13 * a3142_4132;
+		qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4;
+
+		if (qsqr < evecprec) {
+			double a1324_1423 = a13 * a24 - a14 * a23, a1224_1422 = a12 * a24
+					- a14 * a22;
+			double a1223_1322 = a12 * a23 - a13 * a22, a1124_1421 = a11 * a24
+					- a14 * a21;
+			double a1123_1321 = a11 * a23 - a13 * a21, a1122_1221 = a11 * a22
+					- a12 * a21;
+
+			q1 = a42 * a1324_1423 - a43 * a1224_1422 + a44 * a1223_1322;
+			q2 = -a41 * a1324_1423 + a43 * a1124_1421 - a44 * a1123_1321;
+			q3 = a41 * a1224_1422 - a42 * a1124_1421 + a44 * a1122_1221;
+			q4 = -a41 * a1223_1322 + a42 * a1123_1321 - a43 * a1122_1221;
+			qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4;
+
+			if (qsqr < evecprec) {
+				q1 = a32 * a1324_1423 - a33 * a1224_1422 + a34 * a1223_1322;
+				q2 = -a31 * a1324_1423 + a33 * a1124_1421 - a34 * a1123_1321;
+				q3 = a31 * a1224_1422 - a32 * a1124_1421 + a34 * a1122_1221;
+				q4 = -a31 * a1223_1322 + a32 * a1123_1321 - a33 * a1122_1221;
+				qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4;
+
+				if (qsqr < evecprec) {
+					rot[0] = rot[4] = rot[8] = 1.0;
+					rot[1] = rot[2] = rot[3] = rot[5] = rot[6] = rot[7] = 0.0;
+
+					return Py_BuildValue("dddddddddddddd", rmsd, rot[0], rot[1], rot[2], rot[3], rot[4], rot[5], rot[6], rot[7], rot[8], q1, q2, q3, q4);
+				}
+			}
+		}
+	}
+
+	normq = sqrt(qsqr);
+	q1 /= normq;
+	q2 /= normq;
+	q3 /= normq;
+	q4 /= normq;
+
+	a2 = q1 * q1;
+	x2 = q2 * q2;
+	y2 = q3 * q3;
+	z2 = q4 * q4;
+
+	xy = q2 * q3;
+	az = q1 * q4;
+	zx = q4 * q2;
+	ay = q1 * q3;
+	yz = q3 * q4;
+	ax = q1 * q2;
+
+	rot[0] = a2 + x2 - y2 - z2;
+	rot[1] = 2 * (xy + az);
+	rot[2] = 2 * (zx - ay);
+	rot[3] = 2 * (xy - az);
+	rot[4] = a2 - x2 + y2 - z2;
+	rot[5] = 2 * (yz + ax);
+	rot[6] = 2 * (zx + ay);
+	rot[7] = 2 * (yz - ax);
+	rot[8] = a2 - x2 - y2 + z2;
+
+	return Py_BuildValue("dddddddddddddd", rmsd, rot[0], rot[1], rot[2], rot[3], rot[4], rot[5], rot[6], rot[7], rot[8], q1, q2, q3, q4);
+}
+
+static PyMethodDef qcprot_methods[] = {
+        {"FastCalcRMSDAndRotation", (PyCFunction)py_FastCalcRMSDAndRotation, METH_VARARGS, "The method calculates the RMSD by solving for the most positive eigenvalue using the Newton-Raphson method. The rotation matrix is given by the corresponding eigenvector and is calculated by finding roots of the characteristic polynomial of the matrix. The method returns the rmsd, the rotation matrix and the 4 quaternions."},
+        {NULL, NULL, 0, NULL} 
+};
+
+
+static struct PyModuleDef moduledef = {
+        PyModuleDef_HEAD_INIT, "qcprotmodule", NULL,
+        -1, qcprot_methods, NULL, NULL, NULL, NULL};
+
+
+PyObject * PyInit_qcprotmodule(void) {
+        return PyModule_Create(&moduledef);
+}
diff --git a/code/lib/Bio/PDB/QCPSuperimposer/qcprotmodule.cp37-win_amd64.pyd b/code/lib/Bio/PDB/QCPSuperimposer/qcprotmodule.cp37-win_amd64.pyd
new file mode 100644
index 0000000..b9ed14d
Binary files /dev/null and b/code/lib/Bio/PDB/QCPSuperimposer/qcprotmodule.cp37-win_amd64.pyd differ
diff --git a/code/lib/Bio/PDB/Residue.py b/code/lib/Bio/PDB/Residue.py
new file mode 100644
index 0000000..76b7f23
--- /dev/null
+++ b/code/lib/Bio/PDB/Residue.py
@@ -0,0 +1,161 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Residue class, used by Structure objects."""
+
+from Bio.PDB.PDBExceptions import PDBConstructionException
+from Bio.PDB.Entity import Entity, DisorderedEntityWrapper
+
+
+_atom_name_dict = {}
+_atom_name_dict["N"] = 1
+_atom_name_dict["CA"] = 2
+_atom_name_dict["C"] = 3
+_atom_name_dict["O"] = 4
+
+
+class Residue(Entity):
+    """Represents a residue. A Residue object stores atoms."""
+
+    def __init__(self, id, resname, segid):
+        """Initialize the class."""
+        self.level = "R"
+        self.disordered = 0
+        self.resname = resname
+        self.segid = segid
+        self.internal_coord = None
+        Entity.__init__(self, id)
+
+    def __repr__(self):
+        """Return the residue full id."""
+        resname = self.get_resname()
+        hetflag, resseq, icode = self.get_id()
+        full_id = (resname, hetflag, resseq, icode)
+        return "<Residue %s het=%s resseq=%s icode=%s>" % full_id
+
+    def add(self, atom):
+        """Add an Atom object.
+
+        Checks for adding duplicate atoms, and raises a
+        PDBConstructionException if so.
+        """
+        atom_id = atom.get_id()
+        if self.has_id(atom_id):
+            raise PDBConstructionException(
+                "Atom %s defined twice in residue %s" % (atom_id, self)
+            )
+        Entity.add(self, atom)
+
+    def flag_disordered(self):
+        """Set the disordered flag."""
+        self.disordered = 1
+
+    def is_disordered(self):
+        """Return 1 if the residue contains disordered atoms."""
+        return self.disordered
+
+    def get_resname(self):
+        """Return the residue name."""
+        return self.resname
+
+    def get_unpacked_list(self):
+        """Return the list of all atoms, unpack DisorderedAtoms."""
+        atom_list = self.get_list()
+        undisordered_atom_list = []
+        for atom in atom_list:
+            if atom.is_disordered():
+                undisordered_atom_list += atom.disordered_get_list()
+            else:
+                undisordered_atom_list.append(atom)
+        return undisordered_atom_list
+
+    def get_segid(self):
+        """Return the segment identifier."""
+        return self.segid
+
+    def get_atoms(self):
+        """Return atoms."""
+        yield from self
+
+
+class DisorderedResidue(DisorderedEntityWrapper):
+    """DisorderedResidue is a wrapper around two or more Residue objects.
+
+    It is used to represent point mutations (e.g. there is a Ser 60 and a Cys 60
+    residue, each with 50 % occupancy).
+    """
+
+    def __init__(self, id):
+        """Initialize the class."""
+        DisorderedEntityWrapper.__init__(self, id)
+
+    def __repr__(self):
+        """Return disordered residue full identifier."""
+        if self.child_dict:
+            resname = self.get_resname()
+            hetflag, resseq, icode = self.get_id()
+            full_id = (resname, hetflag, resseq, icode)
+            return "<DisorderedResidue %s het=%s resseq=%i icode=%s>" % full_id
+        else:
+            return "<Empty DisorderedResidue>"
+
+    def add(self, atom):
+        """Add atom to residue."""
+        residue = self.disordered_get()
+        if not atom.is_disordered() == 2:
+            # Atoms in disordered residues should have non-blank
+            # altlocs, and are thus represented by DisorderedAtom objects.
+            resname = residue.get_resname()
+            het, resseq, icode = residue.get_id()
+            # add atom anyway, if PDBParser ignores exception the atom will be part of the residue
+            residue.add(atom)
+            raise PDBConstructionException(
+                "Blank altlocs in duplicate residue %s (%s, %i, %s)"
+                % (resname, het, resseq, icode)
+            )
+        residue.add(atom)
+
+    def sort(self):
+        """Sort the atoms in the child Residue objects."""
+        for residue in self.disordered_get_list():
+            residue.sort()
+
+    def disordered_add(self, residue):
+        """Add a residue object and use its resname as key.
+
+        Arguments:
+         - residue - Residue object
+
+        """
+        resname = residue.get_resname()
+        # add chain parent to residue
+        chain = self.get_parent()
+        residue.set_parent(chain)
+        assert not self.disordered_has_id(resname)
+        self[resname] = residue
+        self.disordered_select(resname)
+
+    def disordered_remove(self, resname):
+        """Remove a child residue from the DisorderedResidue.
+
+        Arguments:
+         - resname - name of the child residue to remove, as a string.
+
+        """
+        # Get child residue
+        residue = self.child_dict[resname]
+        is_selected = self.selected_child is residue
+
+        # Detach
+        del self.child_dict[resname]
+        residue.detach_parent()
+
+        if is_selected and self.child_dict:  # pick another selected_child
+            child = next(iter(self.child_dict))
+            self.disordered_select(child)
+        elif not self.child_dict:  # no more children
+            self.selected_child = None
diff --git a/code/lib/Bio/PDB/ResidueDepth.py b/code/lib/Bio/PDB/ResidueDepth.py
new file mode 100644
index 0000000..fa941f3
--- /dev/null
+++ b/code/lib/Bio/PDB/ResidueDepth.py
@@ -0,0 +1,608 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+# Copyright (C) 2017, Joao Rodrigues (j.p.g.l.m.rodrigues@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Calculation of residue depth using command line tool MSMS.
+
+This module uses Michel Sanner's MSMS program for the surface calculation.
+See: http://mgltools.scripps.edu/packages/MSMS
+
+Residue depth is the average distance of the atoms of a residue from
+the solvent accessible surface.
+
+Residue Depth::
+
+    from Bio.PDB.ResidueDepth import ResidueDepth
+    from Bio.PDB.PDBParser import PDBParser
+    parser = PDBParser()
+    structure = parser.get_structure("1a8o", "Tests/PDB/1A8O.pdb")
+    model = structure[0]
+    rd = ResidueDepth(model)
+    print(rd['A',(' ', 152, ' ')])
+
+Direct MSMS interface, typical use::
+
+    from Bio.PDB.ResidueDepth import get_surface
+    surface = get_surface(model)
+
+The surface is a Numeric array with all the surface vertices.
+
+Distance to surface::
+
+    from Bio.PDB.ResidueDepth import min_dist
+    coord = (1.113, 35.393,  9.268)
+    dist = min_dist(coord, surface)
+
+where coord is the coord of an atom within the volume bound by
+the surface (ie. atom depth).
+
+To calculate the residue depth (average atom depth of the atoms
+in a residue)::
+
+    from Bio.PDB.ResidueDepth import residue_depth
+    chain = model['A']
+    res152 = chain[152]
+    rd = residue_depth(res152, surface)
+
+"""
+
+
+import os
+import tempfile
+import warnings
+import subprocess
+
+import numpy
+
+from Bio.PDB import PDBParser
+from Bio.PDB import Selection
+from Bio.PDB.AbstractPropertyMap import AbstractPropertyMap
+from Bio.PDB.Polypeptide import is_aa
+
+from Bio import BiopythonWarning
+
+# Table 1: Atom Type to radius
+_atomic_radii = {
+    #   atom num dist  Rexplicit Runited-atom
+    1: (0.57, 1.40, 1.40),
+    2: (0.66, 1.40, 1.60),
+    3: (0.57, 1.40, 1.40),
+    4: (0.70, 1.54, 1.70),
+    5: (0.70, 1.54, 1.80),
+    6: (0.70, 1.54, 2.00),
+    7: (0.77, 1.74, 2.00),
+    8: (0.77, 1.74, 2.00),
+    9: (0.77, 1.74, 2.00),
+    10: (0.67, 1.74, 1.74),
+    11: (0.70, 1.74, 1.86),
+    12: (1.04, 1.80, 1.85),
+    13: (1.04, 1.80, 1.80),  # P, S, and LonePairs
+    14: (0.70, 1.54, 1.54),  # non-protonated nitrogens
+    15: (0.37, 1.20, 1.20),  # H, D  hydrogen and deuterium
+    16: (0.70, 0.00, 1.50),  # obsolete entry, purpose unknown
+    17: (3.50, 5.00, 5.00),  # pseudoatom - big ball
+    18: (1.74, 1.97, 1.97),  # Ca calcium
+    19: (1.25, 1.40, 1.40),  # Zn zinc    (traditional radius)
+    20: (1.17, 1.40, 1.40),  # Cu copper  (traditional radius)
+    21: (1.45, 1.30, 1.30),  # Fe heme iron
+    22: (1.41, 1.49, 1.49),  # Cd cadmium
+    23: (0.01, 0.01, 0.01),  # pseudoatom - tiny dot
+    24: (0.37, 1.20, 0.00),  # hydrogen vanishing if united-atoms
+    25: (1.16, 1.24, 1.24),  # Fe not in heme
+    26: (1.36, 1.60, 1.60),  # Mg magnesium
+    27: (1.17, 1.24, 1.24),  # Mn manganese
+    28: (1.16, 1.25, 1.25),  # Co cobalt
+    29: (1.17, 2.15, 2.15),  # Se selenium
+    30: (3.00, 3.00, 3.00),  # obsolete entry, original purpose unknown
+    31: (1.15, 1.15, 1.15),  # Yb ytterbium +3 ion --- wild guess only
+    38: (0.95, 1.80, 1.80),  # obsolete entry, original purpose unknown
+}
+
+# Table 2: Resname/Aname to Atom Type
+# MSMS uses an awk/gawk pattern matching strategy that we cannot replicate
+# We will take advantage of our parser to help us in the mapping.
+
+
+def _get_atom_radius(atom, rtype="united"):
+    """Translate an atom object to an atomic radius defined in MSMS (PRIVATE).
+
+    Uses information from the parent residue and the atom object to define
+    the atom type.
+
+    Returns the radius (float) according to the selected type:
+     - explicit (reads hydrogens)
+     - united (default)
+
+    """
+    if rtype == "explicit":
+        typekey = 1
+    elif rtype == "united":
+        typekey = 2
+    else:
+        raise ValueError(
+            "Radius type (%r) not understood. Must be 'explicit' or 'united'" % rtype
+        )
+
+    resname = atom.parent.resname
+    het_atm = atom.parent.id[0]
+
+    at_name = atom.name
+    at_elem = atom.element
+
+    # Hydrogens
+    if at_elem == "H" or at_elem == "D":
+        return _atomic_radii[15][typekey]
+    # HETATMs
+    elif het_atm == "W" and at_elem == "O":
+        return _atomic_radii[2][typekey]
+    elif het_atm != " " and at_elem == "CA":
+        return _atomic_radii[18][typekey]
+    elif het_atm != " " and at_elem == "CD":
+        return _atomic_radii[22][typekey]
+    elif resname == "ACE" and at_name == "CA":
+        return _atomic_radii[9][typekey]
+    # Main chain atoms
+    elif at_name == "N":
+        return _atomic_radii[4][typekey]
+    elif at_name == "CA":
+        return _atomic_radii[7][typekey]
+    elif at_name == "C":
+        return _atomic_radii[10][typekey]
+    elif at_name == "O":
+        return _atomic_radii[1][typekey]
+    elif at_name == "P":
+        return _atomic_radii[13][typekey]
+    # CB atoms
+    elif at_name == "CB" and resname == "ALA":
+        return _atomic_radii[9][typekey]
+    elif at_name == "CB" and resname in {"ILE", "THR", "VAL"}:
+        return _atomic_radii[7][typekey]
+    elif at_name == "CB":
+        return _atomic_radii[8][typekey]
+    # CG atoms
+    elif at_name == "CG" and resname in {
+        "ASN",
+        "ASP",
+        "ASX",
+        "HIS",
+        "HIP",
+        "HIE",
+        "HID",
+        "HISN",
+        "HISL",
+        "LEU",
+        "PHE",
+        "TRP",
+        "TYR",
+    }:
+        return _atomic_radii[10][typekey]
+    elif at_name == "CG" and resname == "LEU":
+        return _atomic_radii[7][typekey]
+    elif at_name == "CG":
+        return _atomic_radii[8][typekey]
+    # General amino acids in alphabetical order
+    elif resname == "GLN" and at_elem == "O":
+        return _atomic_radii[3][typekey]
+    elif resname == "ACE" and at_name == "CH3":
+        return _atomic_radii[9][typekey]
+    elif resname == "ARG" and at_name == "CD":
+        return _atomic_radii[8][typekey]
+    elif resname == "ARG" and at_name in {"NE", "RE"}:
+        return _atomic_radii[4][typekey]
+    elif resname == "ARG" and at_name == "CZ":
+        return _atomic_radii[10][typekey]
+    elif resname == "ARG" and at_name.startswith(("NH", "RH")):
+        return _atomic_radii[5][typekey]
+    elif resname == "ASN" and at_name == "OD1":
+        return _atomic_radii[1][typekey]
+    elif resname == "ASN" and at_name == "ND2":
+        return _atomic_radii[5][typekey]
+    elif resname == "ASN" and at_name.startswith("AD"):
+        return _atomic_radii[3][typekey]
+    elif resname == "ASP" and at_name.startswith(("OD", "ED")):
+        return _atomic_radii[3][typekey]
+    elif resname == "ASX" and at_name.startswith("OD1"):
+        return _atomic_radii[1][typekey]
+    elif resname == "ASX" and at_name == "ND2":
+        return _atomic_radii[3][typekey]
+    elif resname == "ASX" and at_name.startswith(("OD", "AD")):
+        return _atomic_radii[3][typekey]
+    elif resname in {"CYS", "CYX", "CYM"} and at_name == "SG":
+        return _atomic_radii[13][typekey]
+    elif resname in {"CYS", "MET"} and at_name.startswith("LP"):
+        return _atomic_radii[13][typekey]
+    elif resname == "CUH" and at_name == "SG":
+        return _atomic_radii[12][typekey]
+    elif resname == "GLU" and at_name.startswith(("OE", "EE")):
+        return _atomic_radii[3][typekey]
+    elif resname in {"GLU", "GLN", "GLX"} and at_name == "CD":
+        return _atomic_radii[10][typekey]
+    elif resname == "GLN" and at_name == "OE1":
+        return _atomic_radii[1][typekey]
+    elif resname == "GLN" and at_name == "NE2":
+        return _atomic_radii[5][typekey]
+    elif resname in {"GLN", "GLX"} and at_name.startswith("AE"):
+        return _atomic_radii[3][typekey]
+    # Histdines and friends
+    # There are 4 kinds of HIS rings: HIS (no protons), HID (proton on Delta),
+    #   HIE (proton on epsilon), and HIP (protons on both)
+    # Protonated nitrogens are numbered 4, else 14
+    # HIS is treated here as the same as HIE
+    #
+    # HISL is a deprotonated HIS (the L means liganded)
+    elif resname in {"HIS", "HID", "HIE", "HIP", "HISL"} and at_name in {"CE1", "CD2"}:
+        return _atomic_radii[11][typekey]
+    elif resname in {"HIS", "HID", "HIE", "HISL"} and at_name == "ND1":
+        return _atomic_radii[14][typekey]
+    elif resname in {"HID", "HIP"} and at_name in {"ND1", "RD1"}:
+        return _atomic_radii[4][typekey]
+    elif resname in {"HIS", "HIE", "HIP"} and at_name in {"NE2", "RE2"}:
+        return _atomic_radii[4][typekey]
+    elif resname in {"HID", "HISL"} and at_name in {"NE2", "RE2"}:
+        return _atomic_radii[14][typekey]
+    elif resname in {"HIS", "HID", "HIP", "HISL"} and at_name.startswith(("AD", "AE")):
+        return _atomic_radii[4][typekey]
+    # More amino acids
+    elif resname == "ILE" and at_name == "CG1":
+        return _atomic_radii[8][typekey]
+    elif resname == "ILE" and at_name == "CG2":
+        return _atomic_radii[9][typekey]
+    elif resname == "ILE" and at_name in {"CD", "CD1"}:
+        return _atomic_radii[9][typekey]
+    elif resname == "LEU" and at_name.startswith("CD"):
+        return _atomic_radii[9][typekey]
+    elif resname == "LYS" and at_name in {"CG", "CD", "CE"}:
+        return _atomic_radii[8][typekey]
+    elif resname == "LYS" and at_name in {"NZ", "KZ"}:
+        return _atomic_radii[6][typekey]
+    elif resname == "MET" and at_name == "SD":
+        return _atomic_radii[13][typekey]
+    elif resname == "MET" and at_name == "CE":
+        return _atomic_radii[9][typekey]
+    elif resname == "PHE" and at_name.startswith(("CD", "CE", "CZ")):
+        return _atomic_radii[11][typekey]
+    elif resname == "PRO" and at_name in {"CG", "CD"}:
+        return _atomic_radii[8][typekey]
+    elif resname == "CSO" and at_name in {"SE", "SEG"}:
+        return _atomic_radii[9][typekey]
+    elif resname == "CSO" and at_name.startswith("OD"):
+        return _atomic_radii[3][typekey]
+    elif resname == "SER" and at_name == "OG":
+        return _atomic_radii[2][typekey]
+    elif resname == "THR" and at_name == "OG1":
+        return _atomic_radii[2][typekey]
+    elif resname == "THR" and at_name == "CG2":
+        return _atomic_radii[9][typekey]
+    elif resname == "TRP" and at_name == "CD1":
+        return _atomic_radii[11][typekey]
+    elif resname == "TRP" and at_name in {"CD2", "CE2"}:
+        return _atomic_radii[10][typekey]
+    elif resname == "TRP" and at_name == "NE1":
+        return _atomic_radii[4][typekey]
+    elif resname == "TRP" and at_name in {"CE3", "CZ2", "CZ3", "CH2"}:
+        return _atomic_radii[11][typekey]
+    elif resname == "TYR" and at_name in {"CD1", "CD2", "CE1", "CE2"}:
+        return _atomic_radii[11][typekey]
+    elif resname == "TYR" and at_name == "CZ":
+        return _atomic_radii[10][typekey]
+    elif resname == "TYR" and at_name == "OH":
+        return _atomic_radii[2][typekey]
+    elif resname == "VAL" and at_name in {"CG1", "CG2"}:
+        return _atomic_radii[9][typekey]
+    elif at_name in {"CD", "CD"}:
+        return _atomic_radii[8][typekey]
+    # Co-factors, and other weirdos
+    elif (
+        resname in {"FS3", "FS4"}
+        and at_name.startswith("FE")
+        and at_name.endswith(("1", "2", "3", "4", "5", "6", "7"))
+    ):
+        return _atomic_radii[21][typekey]
+    elif (
+        resname in {"FS3", "FS4"}
+        and at_name.startswith("S")
+        and at_name.endswith(("1", "2", "3", "4", "5", "6", "7"))
+    ):
+        return _atomic_radii[13][typekey]
+    elif resname == "FS3" and at_name == "OXO":
+        return _atomic_radii[1][typekey]
+    elif resname == "FEO" and at_name in {"FE1", "FE2"}:
+        return _atomic_radii[21][typekey]
+    elif resname == "HEM" and at_name in {"O1", "O2"}:
+        return _atomic_radii[1][typekey]
+    elif resname == "HEM" and at_name == "FE":
+        return _atomic_radii[21][typekey]
+    elif resname == "HEM" and at_name in {
+        "CHA",
+        "CHB",
+        "CHC",
+        "CHD",
+        "CAB",
+        "CAC",
+        "CBB",
+        "CBC",
+    }:
+        return _atomic_radii[11][typekey]
+    elif resname == "HEM" and at_name in {
+        "NA",
+        "NB",
+        "NC",
+        "ND",
+        "N A",
+        "N B",
+        "N C",
+        "N D",
+    }:
+        return _atomic_radii[14][typekey]
+    elif resname == "HEM" and at_name in {
+        "C1A",
+        "C1B",
+        "C1C",
+        "C1D",
+        "C2A",
+        "C2B",
+        "C2C",
+        "C2D",
+        "C3A",
+        "C3B",
+        "C3C",
+        "C3D",
+        "C4A",
+        "C4B",
+        "C4C",
+        "C4D",
+        "CGA",
+        "CGD",
+    }:
+        return _atomic_radii[10][typekey]
+    elif resname == "HEM" and at_name in {"CMA", "CMB", "CMC", "CMD"}:
+        return _atomic_radii[9][typekey]
+    elif resname == "HEM" and at_name == "OH2":
+        return _atomic_radii[2][typekey]
+    elif resname == "AZI" and at_name in {"N1", "N2", "N3"}:
+        return _atomic_radii[14][typekey]
+    elif resname == "MPD" and at_name in {"C1", "C5", "C6"}:
+        return _atomic_radii[9][typekey]
+    elif resname == "MPD" and at_name == "C2":
+        return _atomic_radii[10][typekey]
+    elif resname == "MPD" and at_name == "C3":
+        return _atomic_radii[8][typekey]
+    elif resname == "MPD" and at_name == "C4":
+        return _atomic_radii[7][typekey]
+    elif resname == "MPD" and at_name in {"O7", "O8"}:
+        return _atomic_radii[2][typekey]
+    elif resname in {"SO4", "SUL"} and at_name == "S":
+        return _atomic_radii[13][typekey]
+    elif resname in {"SO4", "SUL", "PO4", "PHO"} and at_name in {
+        "O1",
+        "O2",
+        "O3",
+        "O4",
+    }:
+        return _atomic_radii[3][typekey]
+    elif resname == "PC " and at_name in {"O1", "O2", "O3", "O4"}:
+        return _atomic_radii[3][typekey]
+    elif resname == "PC " and at_name == "P1":
+        return _atomic_radii[13][typekey]
+    elif resname == "PC " and at_name in {"C1", "C2"}:
+        return _atomic_radii[8][typekey]
+    elif resname == "PC " and at_name in {"C3", "C4", "C5"}:
+        return _atomic_radii[9][typekey]
+    elif resname == "PC " and at_name == "N1":
+        return _atomic_radii[14][typekey]
+    elif resname == "BIG" and at_name == "BAL":
+        return _atomic_radii[17][typekey]
+    elif resname in {"POI", "DOT"} and at_name in {"POI", "DOT"}:
+        return _atomic_radii[23][typekey]
+    elif resname == "FMN" and at_name in {"N1", "N5", "N10"}:
+        return _atomic_radii[4][typekey]
+    elif resname == "FMN" and at_name in {
+        "C2",
+        "C4",
+        "C7",
+        "C8",
+        "C10",
+        "C4A",
+        "C5A",
+        "C9A",
+    }:
+        return _atomic_radii[10][typekey]
+    elif resname == "FMN" and at_name in {"O2", "O4"}:
+        return _atomic_radii[1][typekey]
+    elif resname == "FMN" and at_name == "N3":
+        return _atomic_radii[14][typekey]
+    elif resname == "FMN" and at_name in {"C6", "C9"}:
+        return _atomic_radii[11][typekey]
+    elif resname == "FMN" and at_name in {"C7M", "C8M"}:
+        return _atomic_radii[9][typekey]
+    elif resname == "FMN" and at_name.startswith(("C1", "C2", "C3", "C4", "C5")):
+        return _atomic_radii[8][typekey]
+    elif resname == "FMN" and at_name.startswith(("O2", "O3", "O4")):
+        return _atomic_radii[2][typekey]
+    elif resname == "FMN" and at_name.startswith("O5"):
+        return _atomic_radii[3][typekey]
+    elif resname == "FMN" and at_name in {"OP1", "OP2", "OP3"}:
+        return _atomic_radii[3][typekey]
+    elif resname in {"ALK", "MYR"} and at_name == "OT1":
+        return _atomic_radii[3][typekey]
+    elif resname in {"ALK", "MYR"} and at_name == "C01":
+        return _atomic_radii[10][typekey]
+    elif resname == "ALK" and at_name == "C16":
+        return _atomic_radii[9][typekey]
+    elif resname == "MYR" and at_name == "C14":
+        return _atomic_radii[9][typekey]
+    elif resname in {"ALK", "MYR"} and at_name.startswith("C"):
+        return _atomic_radii[8][typekey]
+    # Metals
+    elif at_elem == "CU":
+        return _atomic_radii[20][typekey]
+    elif at_elem == "ZN":
+        return _atomic_radii[19][typekey]
+    elif at_elem == "MN":
+        return _atomic_radii[27][typekey]
+    elif at_elem == "FE":
+        return _atomic_radii[25][typekey]
+    elif at_elem == "MG":
+        return _atomic_radii[26][typekey]
+    elif at_elem == "CO":
+        return _atomic_radii[28][typekey]
+    elif at_elem == "SE":
+        return _atomic_radii[29][typekey]
+    elif at_elem == "YB":
+        return _atomic_radii[31][typekey]
+    # Others
+    elif at_name == "SEG":
+        return _atomic_radii[9][typekey]
+    elif at_name == "OXT":
+        return _atomic_radii[3][typekey]
+    # Catch-alls
+    elif at_name.startswith(("OT", "E")):
+        return _atomic_radii[3][typekey]
+    elif at_name.startswith("S"):
+        return _atomic_radii[13][typekey]
+    elif at_name.startswith("C"):
+        return _atomic_radii[7][typekey]
+    elif at_name.startswith("A"):
+        return _atomic_radii[11][typekey]
+    elif at_name.startswith("O"):
+        return _atomic_radii[1][typekey]
+    elif at_name.startswith(("N", "R")):
+        return _atomic_radii[4][typekey]
+    elif at_name.startswith("K"):
+        return _atomic_radii[6][typekey]
+    elif at_name in {"PA", "PB", "PC", "PD"}:
+        return _atomic_radii[13][typekey]
+    elif at_name.startswith("P"):
+        return _atomic_radii[13][typekey]
+    elif resname in {"FAD", "NAD", "AMX", "APU"} and at_name.startswith("O"):
+        return _atomic_radii[1][typekey]
+    elif resname in {"FAD", "NAD", "AMX", "APU"} and at_name.startswith("N"):
+        return _atomic_radii[4][typekey]
+    elif resname in {"FAD", "NAD", "AMX", "APU"} and at_name.startswith("C"):
+        return _atomic_radii[7][typekey]
+    elif resname in {"FAD", "NAD", "AMX", "APU"} and at_name.startswith("P"):
+        return _atomic_radii[13][typekey]
+    elif resname in {"FAD", "NAD", "AMX", "APU"} and at_name.startswith("H"):
+        return _atomic_radii[15][typekey]
+    else:
+        warnings.warn(f"{at_name}:{resname} not in radii library.", BiopythonWarning)
+        return 0.01
+
+
+def _read_vertex_array(filename):
+    """Read the vertex list into a Numeric array (PRIVATE)."""
+    with open(filename) as fp:
+        vertex_list = []
+        for l in fp:
+            sl = l.split()
+            if len(sl) != 9:
+                # skip header
+                continue
+            vl = [float(x) for x in sl[0:3]]
+            vertex_list.append(vl)
+    return numpy.array(vertex_list)
+
+
+def get_surface(model, MSMS="msms"):
+    """Represent molecular surface as a vertex list array.
+
+    Return a Numpy array that represents the vertex list of the
+    molecular surface.
+
+    Arguments:
+     - MSMS - msms executable (used as argument to subprocess.call)
+
+    """
+    # Replace pdb_to_xyzr
+    # Make x,y,z,radius file
+    atom_list = Selection.unfold_entities(model, "A")
+
+    xyz_tmp = tempfile.mktemp()
+    with open(xyz_tmp, "w") as pdb_to_xyzr:
+        for atom in atom_list:
+            x, y, z = atom.coord
+            radius = _get_atom_radius(atom, rtype="united")
+            pdb_to_xyzr.write(f"{x:6.3f}\t{y:6.3f}\t{z:6.3f}\t{radius:1.2f}\n")
+
+    # make surface
+    surface_tmp = tempfile.mktemp()
+    MSMS = MSMS + " -probe_radius 1.5 -if %s -of %s > " + tempfile.mktemp()
+    make_surface = MSMS % (xyz_tmp, surface_tmp)
+    subprocess.call(make_surface, shell=True)
+    surface_file = surface_tmp + ".vert"
+    if not os.path.isfile(surface_file):
+        raise RuntimeError(
+            "Failed to generate surface file using command:\n%s" % make_surface
+        )
+
+    # read surface vertices from vertex file
+    surface = _read_vertex_array(surface_file)
+    return surface
+
+
+def min_dist(coord, surface):
+    """Return minimum distance between coord and surface."""
+    d = surface - coord
+    d2 = numpy.sum(d * d, 1)
+    return numpy.sqrt(min(d2))
+
+
+def residue_depth(residue, surface):
+    """Residue depth as average depth of all its atoms.
+
+    Return average distance to surface for all atoms in a residue,
+    ie. the residue depth.
+    """
+    atom_list = residue.get_unpacked_list()
+    length = len(atom_list)
+    d = 0
+    for atom in atom_list:
+        coord = atom.get_coord()
+        d = d + min_dist(coord, surface)
+    return d / length
+
+
+def ca_depth(residue, surface):
+    """Return CA depth."""
+    if not residue.has_id("CA"):
+        return None
+    ca = residue["CA"]
+    coord = ca.get_coord()
+    return min_dist(coord, surface)
+
+
+class ResidueDepth(AbstractPropertyMap):
+    """Calculate residue and CA depth for all residues."""
+
+    def __init__(self, model, msms_exec=None):
+        """Initialize the class."""
+        if msms_exec is None:
+            msms_exec = "msms"
+
+        depth_dict = {}
+        depth_list = []
+        depth_keys = []
+        # get_residue
+        residue_list = Selection.unfold_entities(model, "R")
+        # make surface from PDB file using MSMS
+        surface = get_surface(model, MSMS=msms_exec)
+        # calculate rdepth for each residue
+        for residue in residue_list:
+            if not is_aa(residue):
+                continue
+            rd = residue_depth(residue, surface)
+            ca_rd = ca_depth(residue, surface)
+            # Get the key
+            res_id = residue.get_id()
+            chain_id = residue.get_parent().get_id()
+            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
+            depth_list.append((residue, (rd, ca_rd)))
+            depth_keys.append((chain_id, res_id))
+            # Update xtra information
+            residue.xtra["EXP_RD"] = rd
+            residue.xtra["EXP_RD_CA"] = ca_rd
+        AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
diff --git a/code/lib/Bio/PDB/SASA.py b/code/lib/Bio/PDB/SASA.py
new file mode 100644
index 0000000..ec59caa
--- /dev/null
+++ b/code/lib/Bio/PDB/SASA.py
@@ -0,0 +1,251 @@
+# Copyright (C) 2020, Joao Rodrigues (j.p.g.l.m.rodrigues@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Calculation of solvent accessible surface areas for Bio.PDB entities.
+
+Uses the "rolling ball" algorithm developed by Shrake & Rupley algorithm,
+which uses a sphere (of equal radius to a solvent molecule) to probe the
+surface of the molecule.
+
+Reference:
+    Shrake, A; Rupley, JA. (1973). J Mol Biol
+    "Environment and exposure to solvent of protein atoms. Lysozyme and insulin".
+"""
+
+import collections
+import math
+
+import numpy as np
+
+from Bio.PDB.kdtrees import KDTree
+
+__all__ = ["ShrakeRupley"]
+
+_ENTITY_HIERARCHY = {
+    "A": 0,
+    "R": 1,
+    "C": 2,
+    "M": 3,
+    "S": 4,
+}
+
+# vdW radii taken from:
+# https://en.wikipedia.org/wiki/Atomic_radii_of_the_elements_(data_page)
+#
+# Radii for CL, K, NA, etc are _not_ ionic radii.
+#
+# References:
+# A. Bondi (1964). "van der Waals Volumes and Radii".
+# M. Mantina, A.C. et al., J. Phys. Chem. 2009, 113, 5806.
+ATOMIC_RADII = collections.defaultdict(lambda x: 2.0)
+ATOMIC_RADII.update(
+    {
+        "H": 1.200,
+        "HE": 1.400,
+        "C": 1.700,
+        "N": 1.550,
+        "O": 1.520,
+        "F": 1.470,
+        "NA": 2.270,
+        "MG": 1.730,
+        "P": 1.800,
+        "S": 1.800,
+        "CL": 1.750,
+        "K": 2.750,
+        "CA": 2.310,
+        "NI": 1.630,
+        "CU": 1.400,
+        "ZN": 1.390,
+        "SE": 1.900,
+        "BR": 1.850,
+        "CD": 1.580,
+        "I": 1.980,
+        "HG": 1.550,
+    }
+)
+
+
+class ShrakeRupley:
+    """Calculates SASAs using the Shrake-Rupley algorithm."""
+
+    def __init__(self, probe_radius=1.40, n_points=100, radii_dict=None):
+        """Initialize the class.
+
+        :param probe_radius: radius of the probe in A. Default is 1.40, roughly
+            the radius of a water molecule.
+        :type probe_radius: float
+
+        :param n_points: resolution of the surface of each atom. Default is 100.
+            A higher number of points results in more precise measurements, but
+            slows down the calculation.
+        :type n_points: int
+
+        :param radii_dict: user-provided dictionary of atomic radii to use in
+            the calculation. Values will replace/complement those in the
+            default ATOMIC_RADII dictionary.
+        :type radii_dict: dict
+
+        Examples:
+        >>> sr = ShrakeRupley()
+        >>> sr = ShrakeRupley(n_points=960)
+        >>> sr = ShrakeRupley(radii_dict={"O": 3.1415})
+        """
+        if probe_radius <= 0.0:
+            raise ValueError(
+                f"Probe radius must be a positive number: {probe_radius} <= 0"
+            )
+
+        self.probe_radius = float(probe_radius)
+
+        if n_points < 1:
+            raise ValueError(
+                f"Number of sphere points must be larger than 1: {n_points}"
+            )
+        self.n_points = n_points
+
+        # Update radii list with user provided lists.
+        self.radii_dict = ATOMIC_RADII.copy()
+        if radii_dict is not None:
+            self.radii_dict.update(radii_dict)
+
+        # Pre-compute reference sphere
+        self._sphere = self._compute_sphere()
+
+    def _compute_sphere(self):
+        """Return the 3D coordinates of n points on a sphere.
+
+        Uses the golden spiral algorithm to place points 'evenly' on the sphere
+        surface. We compute this once and then move the sphere to the centroid
+        of each atom as we compute the ASAs.
+        """
+        n = self.n_points
+
+        dl = np.pi * (3 - 5 ** 0.5)
+        dz = 2.0 / n
+
+        longitude = 0
+        z = 1 - dz / 2
+
+        coords = np.zeros((n, 3), dtype=np.float32)
+        for k in range(n):
+            r = (1 - z * z) ** 0.5
+            coords[k, 0] = math.cos(longitude) * r
+            coords[k, 1] = math.sin(longitude) * r
+            coords[k, 2] = z
+            z -= dz
+            longitude += dl
+
+        return coords
+
+    def compute(self, entity, level="A"):
+        """Calculate surface accessibility surface area for an entity.
+
+        The resulting atomic surface accessibility values are attached to the
+        .sasa attribute of each entity (or atom), depending on the level. For
+        example, if level="R", all residues will have a .sasa attribute. Atoms
+        will always be assigned a .sasa attribute with their individual values.
+
+        :param entity: input entity.
+        :type entity: Bio.PDB.Entity, e.g. Residue, Chain, ...
+
+        :param level: the level at which ASA values are assigned, which can be
+            one of "A" (Atom), "R" (Residue), "C" (Chain), "M" (Model), or
+            "S" (Structure). The ASA value of an entity is the sum of all ASA
+            values of its children. Defaults to "A".
+        :type entity: Bio.PDB.Entity
+
+        Example:
+        >>> from Bio.PDB import PDBParser
+        >>> p = PDBParser(QUIET=1)
+        >>> struct = p.get_structure("1LCD", "PDB/1LCD.pdb")
+        >>> sr = ShrakeRupley()
+        >>> sr.compute(struct, level="S")
+        >>> print(round(struct.sasa, 2))
+        7053.43
+        >>> print(round(struct[0]["A"][11]["OE1"].sasa, 2))
+        9.64
+        """
+        is_valid = hasattr(entity, "level") and entity.level in {"R", "C", "M", "S"}
+        if not is_valid:
+            raise ValueError(
+                f"Invalid entity type '{type(entity)}'. "
+                "Must be Residue, Chain, Model, or Structure"
+            )
+
+        if level not in _ENTITY_HIERARCHY:
+            raise ValueError(f"Invalid level '{level}'. Must be A, R, C, M, or S.")
+        elif _ENTITY_HIERARCHY[level] > _ENTITY_HIERARCHY[entity.level]:
+            raise ValueError(
+                f"Level '{level}' must be equal or smaller than input entity: {entity.level}"
+            )
+
+        # Get atoms onto list for lookup
+        atoms = list(entity.get_atoms())
+        n_atoms = len(atoms)
+        if not n_atoms:
+            raise ValueError("Entity has no child atoms.")
+
+        # Get coordinates as a numpy array
+        # We trust DisorderedAtom and friends to pick representatives.
+        coords = np.array([a.coord for a in atoms], dtype=np.float64)
+
+        # Pre-compute atom neighbors using KDTree
+        kdt = KDTree(coords, 10)
+
+        # Pre-compute radius * probe table
+        radii_dict = self.radii_dict
+        radii = np.array([radii_dict[a.element] for a in atoms], dtype=np.float64)
+        radii += self.probe_radius
+        twice_maxradii = np.max(radii) * 2
+
+        # Calculate ASAs
+        asa_array = np.zeros((n_atoms, 1), dtype=np.int)
+        ptset = set(range(self.n_points))
+        for i in range(n_atoms):
+
+            r_i = radii[i]
+
+            # Move sphere to atom
+            s_on_i = (np.array(self._sphere, copy=True) * r_i) + coords[i]
+            available_set = ptset.copy()
+
+            # KDtree for sphere points
+            kdt_sphere = KDTree(s_on_i, 10)
+
+            # Iterate over neighbors of atom i
+            for jj in kdt.search(coords[i], twice_maxradii):
+                j = jj.index
+                if i == j:
+                    continue
+
+                if jj.radius < (r_i + radii[j]):
+                    # Remove overlapping points on sphere from available set
+                    available_set -= {
+                        pt.index for pt in kdt_sphere.search(coords[j], radii[j])
+                    }
+
+            asa_array[i] = len(available_set)  # update counts
+
+        # Convert accessible point count to surface area in A**2
+        f = radii * radii * (4 * np.pi / self.n_points)
+        asa_array = asa_array * f[:, np.newaxis]
+
+        # Set atom .sasa
+        for i, atom in enumerate(atoms):
+            atom.sasa = asa_array[i, 0]
+
+        # Aggregate values per entity level if necessary
+        if level != "A":
+            entities = set(atoms)
+            target = _ENTITY_HIERARCHY[level]
+            for _ in range(target):
+                entities = {e.parent for e in entities}
+
+            atomdict = {a.full_id: idx for idx, a in enumerate(atoms)}
+            for e in entities:
+                e_atoms = [atomdict[a.full_id] for a in e.get_atoms()]
+                e.sasa = asa_array[e_atoms].sum()
diff --git a/code/lib/Bio/PDB/SCADIO.py b/code/lib/Bio/PDB/SCADIO.py
new file mode 100644
index 0000000..c347733
--- /dev/null
+++ b/code/lib/Bio/PDB/SCADIO.py
@@ -0,0 +1,806 @@
+# Copyright 2019 by Robert T. Miller.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""SCADIO: write OpenSCAD program to create protein structure 3D model.
+
+3D printing a protein structure is a non-trivial exercise due to the
+overall complexity and the general requirement for supporting overhang regions
+while printing.  This software is a path to generating a model for printing
+(e.g. an STL file), and does not address the issues around converting the
+model to a physical product.  OpenSCAD <http://www.openscad.org/> can create
+a printable model from the script this software produces.  MeshMixer
+<http://www.meshmixer.com/>, various slicer software, and the 3D printer
+technology available to you provide options for addressing the problems around
+physically rendering the model.
+
+I suggest generating your initial model using the OpenSCAD script provided
+here, then modifying that script according to your needs.  Changing the
+atomScale and bondRadius values can simplify the model by removing gaps and
+the corresponding need for supports, or you may wish to modify the hedronDispatch()
+routine to select residues or chain sections for printing separately and
+subsequently joining with rotatable bonds.  During this development phase you
+will likely have your version `include` only the data matrices generated here,
+by using the `includeCode-False` option to write_SCAD().  An example project
+with modifications of the script generated here is
+<https://www.thingiverse.com/thing:3957471>.
+"""
+# import re
+
+from Bio.File import as_handle
+from Bio.PDB.PDBExceptions import PDBException
+
+from Bio.PDB.internal_coords import IC_Residue, IC_Chain
+
+# from Bio.PDB.Structure import Structure
+# from Bio.PDB.Residue import Residue
+from Bio.PDB.vectors import homog_scale_mtx
+
+
+def _scale_residue(res, scale, scaleMtx):
+    if res.internal_coord:
+        res.internal_coord.applyMtx(scaleMtx)
+        if res.internal_coord.gly_Cbeta:
+            res.internal_coord.scale = scale
+
+
+def write_SCAD(
+    entity,
+    file,
+    scale=None,
+    pdbid=None,
+    backboneOnly=False,
+    includeCode=True,
+    maxPeptideBond=None,
+    handle="protein",
+):
+    """Write hedron assembly to file as OpenSCAD matrices.
+
+    This routine calls both internal_to_atom_coordinates() and
+    atom_to_internal_coordinates() due to requirements for scaling, explicit
+    bonds around rings, and setting the coordinate space of the output model.
+
+    Output data format is primarily:
+
+    - matrix for each hedron:
+        len1, angle2, len3, atom covalent bond class, flags to indicate
+        atom/bond represented in previous hedron (OpenSCAD very slow with
+        redundant overlapping elements), flags for bond features
+    - transform matrices to assemble each hedron into residue dihedra sets
+    - transform matrices for each residue to position in chain
+
+    OpenSCAD software is included in this Python file to process these
+    matrices into a model suitable for a 3D printing project.
+
+    :param entity: Biopython PDB structure entity
+        structure data to export
+    :param file: Bipoython as_handle filename or open file pointer
+        file to write data to
+    :param scale: float
+        units (usually mm) per angstrom for STL output, written in output
+    :param pdbid: str
+        PDB idcode, written in output. Defaults to '0PDB' if not supplied
+        and no 'idcode' set in entity
+    :param backboneOnly: bool default False
+        Do not output side chain data past Cbeta if True
+    :param includeCode: bool default True
+        Include OpenSCAD software (inline below) so output file can be loaded
+        into OpenSCAD; if False, output data matrices only
+    :param maxPeptideBond: Optional[float] default None
+        Override the cut-off in IC_Chain class (default 1.4) for detecting
+        chain breaks.  If your target has chain breaks, pass a large number here
+        to create a very long 'bond' spanning the break.
+    :param handle: str, default 'protein'
+        name for top level of generated OpenSCAD matrix structure
+    """
+    if maxPeptideBond is not None:
+        mpbStash = IC_Chain.MaxPeptideBond
+        IC_Chain.MaxPeptideBond = float(maxPeptideBond)
+
+    # step one need IC_Residue atom_coords loaded in order to scale
+    # so if no internal_coords, initialise from Atom coordinates
+    added_IC_Atoms = False
+    if "S" == entity.level or "M" == entity.level:
+        for chn in entity.get_chains():
+            if not chn.internal_coord:
+                chn.internal_coord = IC_Chain(chn)
+                added_IC_Atoms = True
+    elif "C" == entity.level:
+        if not entity.internal_coord:
+            entity.internal_coord = IC_Chain(entity)
+            added_IC_Atoms = True
+    else:
+        raise PDBException("level not S, M or C: " + str(entity.level))
+
+    if not added_IC_Atoms and scale is not None:
+        # if loaded pic file and need to scale, generate atom coords
+        entity.internal_to_atom_coordinates()
+
+    # need to reset rnext and rprev in case MaxPeptideBond changed
+    if not added_IC_Atoms:
+        if "C" == entity.level:
+            if entity.internal_coord is not None:
+                entity.internal_coord.clear_ic()
+            chnp = entity.internal_coord = IC_Chain(entity)
+            chnp.atom_to_internal_coordinates()
+            # chnp.link_residues()
+            # chnp.init_edra()  # render_dihedra()
+            # chnp.init_atom_coords()
+        else:
+            for chn in entity.get_chains():
+                if chn.internal_coord is not None:
+                    chn.internal_coord.clear_ic()
+                chnp = chn.internal_coord = IC_Chain(chn)
+                chnp.atom_to_internal_coordinates()
+                # chnp.link_residues()
+                # chnp.init_edra()  # render_dihedra()
+                # chnp.init_atom_coords()
+
+    if scale is not None:
+        scaleMtx = homog_scale_mtx(scale)
+        for res in entity.get_residues():
+            if 2 == res.is_disordered():
+                for r in res.child_dict.values():
+                    _scale_residue(r, scale, scaleMtx)
+            else:
+                _scale_residue(res, scale, scaleMtx)
+
+    # generate internal coords for scaled entity
+    # (hedron bond lengths have changed if scaled)
+    # if not scaling, still need to generate internal coordinate
+    # bonds for ring sidechains
+    # AllBonds is a class attribute for IC_Residue.atom_to_internal_coordinates
+    # to generate explicit hedra covering all bonds
+
+    allBondsStash = IC_Residue.AllBonds
+    IC_Residue.AllBonds = True
+    entity.atom_to_internal_coordinates()
+    IC_Residue.AllBonds = allBondsStash
+
+    # clear initNCaC - want at origin, not match PDB file
+    if "C" == entity.level:
+        entity.internal_coord.initNCaC = {}
+    else:
+        for chn in entity.get_chains():
+            chn.internal_coord.initNCaC = {}
+
+    # rebuild atom coordinates now starting at origin: in OpenSCAD code, each
+    # residue model is transformed to N-Ca-C start position instead of updating
+    # transform matrix along chain
+    entity.internal_to_atom_coordinates()
+
+    with as_handle(file, "w") as fp:
+        if includeCode:
+            fp.write(peptide_scad)
+
+        if not pdbid and hasattr(entity, "header"):
+            pdbid = entity.header.get("idcode", None)
+        if pdbid is None or "" == pdbid:
+            pdbid = "0PDB"
+        fp.write(
+            'protein = [ "' + pdbid + '", ' + str(scale) + ",  // ID, protein_scale\n"
+        )
+
+        if "S" == entity.level or "M" == entity.level:
+            for chn in entity.get_chains():
+                fp.write(" [\n")
+                chn.internal_coord.write_SCAD(fp, backboneOnly)
+                fp.write(" ]\n")
+        elif "C" == entity.level:
+            fp.write(" [\n")
+            entity.internal_coord.write_SCAD(fp, backboneOnly)
+            fp.write(" ]\n")
+        elif "R" == entity.level:
+            raise NotImplementedError("writescad single residue not yet implemented.")
+
+        fp.write("\n];\n")
+
+    if maxPeptideBond is not None:
+        IC_Chain.MaxPeptideBond = mpbStash
+
+
+peptide_scad = """
+/*
+//
+// peptide.scad
+// Copyright (c) 2019 Robert T. Miller.  All rights reserved.
+// This file is part of the Biopython distribution and governed by your
+// choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+// Please see the LICENSE file that should have been included as part of this
+// package.
+//
+// This is the support file to build an OpenSCAD (http://www.openscad.org/) model
+// of a protein from internal coordinates.  The resulting model may be constructed
+// on a 3D printer.
+//
+// data matrices should be appended below to form a program ready to load into
+// the OpenSCAD application.
+//
+//  The protein_scale value used throughout is the second element of the protein[]
+//    array appended below.
+//    This is the value supplied when generating the data for build units per PDB angstrom.
+//    You may wish to modify it here to adjust the appearance of the model in terms of atom sphere
+//    or bond cylinder diameter, however the bond lengths are fixed with the supplied value when
+//    the data matrices are generated.  Atom sphere and bond cylinder radii may be individually
+//    adusted below as well.
+//
+//  $fn (fragment number) is an OpenSCAD parameter controlling the smoothness of the model surface.
+//    Smaller values will render faster, but yield more 'blocky' models.
+//
+//  This is intended to be a working example, you are encouraged to modify the OpenSCAD subroutines
+//    below to generate a model to your liking.  For more information, start with
+//    http://www.openscad.org/cheatsheet/index.html
+//
+//  Note especially the hedronDispatch() subroutine below: here you may select hedra based on
+//    residue, sequence position, and class (hedron atoms) for special handling.  Also see the
+//    per hedron render options in the hedra[] array.
+//
+//  If you modify this file, you may find it useful to generate the data matrices without this
+//    OpenSCAD code by calling write_SCAD() with the includeCode=False option, then use the OpenSCAD
+//    'include <>' facility at the end of your modified OpenSCAD program.
+*/
+
+rotate([-90,0,0])  // convenient for default location (no N-Ca-C start coordinates)
+    chain(protein);   // this is the main subroutine call to  build the structure
+
+// top-level OpenSCAD $fn for visible surfaces.  Rotatable bonds use $fn=8 inside, regardless of this setting.
+$fn = 0;  // 0 yields OpenSCAD default of 30.  $n=8 should print with minimal support
+
+tubes=false;     // style: render atoms and bonds as constant diameter cylinders, preferred for rotatable bonds / h-bonds
+support=false;   // enable print-in-place internal support for rotatable bonds
+// N.B. rotatable bonds must be parallel to build plate for internal support structures to be generated correctly by slicer
+
+// output parameters
+atomScale=1.0;  // 0.8 better for rotatable bonds
+defaultAtomRadius = 0.77;  // used if tubes = true
+
+bondRadius = (tubes ? defaultAtomRadius * atomScale : 0.4);
+jBondRadius = defaultAtomRadius * atomScale;  // radius for rotatable bonds
+
+// general printer, slicer, print settings
+layerHeight=0.15;  // must match slicer setting for print-in-place support
+clearance=0.3;     // sliding clearance - can be smaller (0.2) if not doing print-in-place
+pClearance=0.2;    // press-fit clearance (magnets for h-bonds)
+shim=0.05;         // extra to make OpenSCAD surfaces distinct in difference()
+nozzleDiameter=0.4;
+
+// need one magnet for each side of hydrogen bond, suggest 3mm x 5mm e.g. from eBay
+// use compass to identify poles if you care, North pointing (red) repels compass North pointing
+magR=3/2;    // magnet radius
+magL=5;      // magnet length
+
+// for $fn=8 which works nice on fdm printer
+oRot = 22.5;              // 45/2, rotate to make fn=8 spheres and cylinders flat on build plate
+apmFac = cos(180/8);      // apothem factor - multiply by radius for center to octagon side distance
+octSide = 2* tan(180/8);  // multiply by radius to get length of octagon side
+// for values of $fn:
+fnRot = ($fn ? 90-(180/$fn) : 90-(180/30));
+
+bondLenFac = 0.6;         // fraction of bond length to extend from atom for each arm of hedron in join
+
+hblen = 1.97;             // hydrogen bond length
+
+wall = 3*nozzleDiameter;
+joinerStep = 1;           // radius difference between rotatable bond axle and end knob inside bond cylinder
+
+caTop = false;     // only make top of N_C-alpha_C hedron plus C-beta (see hedron() and hedron_dispatch() examples)
+
+/*
+//
+// Generate a sphere to represent an atom.
+// Colour and size determined for the atom covalent radius specified by the parameter 'a' by lookup
+//   in the atomData table below, then scaled by the supplied parameter 'scal'.
+//
+// scal : protein_scale
+// clr : additional radius if used to create clearance for rotatable bonds
+//
+*/
+module atom(a,scal,clr=0)
+{
+    ad = atomData[search([a],atomData)[0]];
+    color(ad[1]) {
+        rotate([0,0,fnRot]) sphere(r=((ad[2]*atomScale)*scal)+clr);
+    }
+}
+
+/*
+//
+// a hedron (below) may be 'reversed' in terms of the order of its two bonds; this function fixes the ordering
+//
+*/
+function hFlip(h,rev) =
+        //   yes reversed                                     :  not reversed
+        //    0    1     2     3     4     5     6      7     :     0     1     2     3    4     5      6      7
+        //  len1  len3  atom1 atom3  a1    a2   a1-a2  a2-a3      len1  len3  atom1 atom3   a1    a3  a1-a2  a2-a3
+    (rev ? [ h[2], h[0], h[5], h[3], h[8], h[6], h[10], h[9] ] : [ h[0], h[2], h[3], h[5], h[6], h[8],  h[9], h[10] ]);
+    // h[1] = angle2 for both cases
+
+
+/*
+//
+// generate the male or female interior cylinders of a rotating bond
+//
+*/
+module joinUnit(cOuterLen, cOuterRad, cInnerLen, cInnerRad, male=false) {
+    if (male) {
+        rotate([0,0,oRot]) {
+            cylinder(h=cInnerLen, r=cInnerRad, center=false, $fn=8);
+            cylinder(h=cOuterLen, r=cOuterRad, center=false, $fn=8);
+        }
+    } else {
+        rotate([0,0,fnRot]) {
+            cylinder(h=cInnerLen, r=cInnerRad, center=false, $fn=30);
+            cylinder(h=cOuterLen, r=cOuterRad, center=false, $fn=30);
+        }
+    }
+}
+
+/*
+//
+// create a rotatable bond
+//
+// supportSel : 0 for no support, 1 or 2 for support on top or bottom (needed for reversed hedra)
+//
+*/
+module joiner(bondlen, scal, male=0, ver=0, supportSelect=0) {  // ver = differentiate joiner part lengths to guide assembly, but not used
+    lenfac = bondLenFac;
+    jClr = clearance+0.05;
+
+    cOuterRad = (jBondRadius * scal) - (2*wall + (male ? jClr/2 : -jClr/2));
+    cInnerRad = cOuterRad - joinerStep;  // m/f jClr already in cOuterRad;  - (male ? 0 : -0*jClr/2);
+
+    hArmLen = (bondlen * lenfac);
+    lenClr = 0.6*jClr;  // length clearance applied to male and female both, so effective clearance is 2x this value
+    cOuterLen = hArmLen * lenfac + (ver ? 0.5 : - 0.5) - (wall+ (male ? lenClr*2 : -lenClr*2  ));
+
+    joinerOffset = (hArmLen * (1 - lenfac)) + (male ? lenClr : -lenClr) - (ver ? 1 : 0);
+
+    i=supportSelect-1;
+    oside = cOuterRad*octSide;
+    wid = oside+2*wall+4*jClr+1;
+
+    if (male) {
+        rotate([0,180,0])
+        translate([0,0,-(bondlen-joinerOffset)]) {
+            difference() {
+                joinUnit(cOuterLen, cOuterRad, bondlen, cInnerRad, male=true);
+                if (supportSelect) {
+                    rotate([0,0,i*180]) {
+                        translate([0,(cOuterRad*apmFac)-0.5*layerHeight,cOuterLen/2]) {
+                                cube([oside+2*shim,layerHeight+shim,cOuterLen+2*shim],center=true);
+                        }
+                    }
+                }
+            }
+            if (supportSelect) {
+                rotate([0,0,i*180]) {
+                    translate([0,(cOuterRad*apmFac)-0.5*layerHeight,cOuterLen/2]) {
+                        for (j=[0:1]) {
+                            rotate([0,(j?60:-60),0])
+                                cube([wid,layerHeight,2*nozzleDiameter],center=true);
+                        }
+                    }
+                }
+            }
+        }
+    } else {
+        translate([0,0,joinerOffset]) {
+            joinUnit(cOuterLen, cOuterRad, bondlen, cInnerRad);
+            if (supportSelect) {  // extra gap top and bottom because filament sags
+                supHeight = max(5*layerHeight,2*(cOuterRad-cOuterRad*apmFac));  // double because center=true below
+                for(j=[0:1]) {
+                    rotate([0,0,j*180]) {
+                        translate([0,(cOuterRad*apmFac),cOuterLen/2]) {
+                            cube([oside+2*shim,supHeight+shim,cOuterLen+2*shim],center=true);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+/*
+//
+// create bond with different options (regular, skinny, h-bond atom, rotatable male or female
+//
+//  parameters:
+//  bl : bond length
+//  br : bond radius
+//  scal : protein_scale
+//  key : option symbols dfined below
+//  atm : atomic element symbol, used for color and radius by atom() routine above
+//  ver : make rotatable bonds slightly different based on value; currently unused
+//  supporSel : enable print-in-place support for rotatable bonds
+//
+*/
+
+// option symbols - these names generated in BioPython code so avoid changing without thought
+StdBond = 1;
+FemaleJoinBond = 2;
+MaleJoinBond = 3;
+SkinnyBond = 4;        // Calpha - Cbeta bond cylinder needs to be skinny for clearance with rotating bonds
+HBond = 5;             // make room inside atom/bond to insert magnet to appropriate depth
+
+module bond(bl, br, scal, key, atm, ver, supportSel=0) {
+
+    br = (key == FemaleJoinBond ? jBondRadius * scal : br)  * (key == SkinnyBond ? 0.65 : 1);   // bond radius smaller for skinnyBond
+    bl = (key == FemaleJoinBond ? bl * bondLenFac : bl);  // make female joiner shorter
+    if (key == MaleJoinBond) { // male join is direct solid, others need difference()
+        joiner(bl, scal, male = true, ver = ver, supportSelect=supportSel);
+    } else {  // regular bond / skinny / h-bond / female join
+        bhblen = bl +(hblen/2 * scal);
+        rotate([0,0,fnRot]) {
+            difference() {
+                union() {
+                    cylinder(h=bl,r=br,center=false);
+                    if (key == HBond) {  // make extension collar for h-bond magnet
+                        rotate([0,0,oRot-fnRot]) cylinder(h=bhblen-1,r=(magR + clearance +wall),center=false, $fn=8);
+                    }
+                }
+                atom(atm,scal,-clearance);  // remove overlap with atom to clear area for female join
+                if (key == HBond) {     // make space to insert magnet inside bond cylinder
+                    translate([0,0,(bhblen-magL)-pClearance])
+                        cylinder(h=magL+pClearance+shim, r=magR+pClearance, center=false, $fn=8);
+                }
+            }
+        }
+    }
+}
+
+/*
+//
+// Generate a 'hedron', one plane of 3 points, consisting of 3 atoms joined by two bonds.
+//   Defined as bond length - bond angle - bond length
+//
+// In some cases the sequence of atoms in the h[] array is reversed (rev flag), as detailed in the comments.
+//
+// other parameters:
+//
+// h = hedron array data according to rev flag:
+//   yes reversed                                     :  not reversed
+//    0    1     2     3     4     5     6      7     :     0     1     2     3    4     5      6      7
+//  len1  len3  atom1 atom3  a1    a2   a1-a2  a2-a3      len1  len3  atom1 atom3   a1    a3  a1-a2  a2-a3
+//
+// split: chop half of the hedron - to selectively print parts of a rotating bond to be glued together.
+//   top or bottom half selected by global caTop (C-alpha top) variable, undef by default so bottom half.
+//
+// supporSel: enable support structure inside rotatable bond to print in place.  Please note the bond needs to be exactly parallel
+//  to the buildplate and the layerHeight global variable above needs to be set correctly for the structure to be correctly created
+//  by your slicer software.
+//
+ */
+
+module hedron(h,rev=0,scal,split=0, supportSel) {
+
+    newh = hFlip(h, rev);  // make a consistent hedron array regardless of rev flag
+
+    bondRad = bondRadius * scal;
+    difference() {
+        union(){
+            if (h[7]) {
+                // central atom at 0,0,0
+                atom(h[4],scal);
+            }
+
+            if (newh[5] && newh[7] != FemaleJoinBond) {  // not female join
+                // comments for non-reversed case
+                // atom 3 is len3 up on +z
+                translate([0,0,newh[1]])
+                    difference() {
+                        atom(newh[3],scal * (newh[7] == SkinnyBond ? 0.7 : 1));  // if skinny bond make atom (C-beta) same diameter as bond
+                        if (newh[7] == HBond) {  // make room for hbond magnet through atom - this branch not used for backbone N,O
+                            translate([0,0,scal*hblen/2-magL-pClearance])
+                                cylinder(h=magL+pClearance,r=magR+pClearance,$fn=8);
+                        }
+                    }
+            }
+
+            if (newh[7]) {
+                // atom 2 - atom 3 bond from origin up +z distance len3
+                bond(newh[1], bondRad, scal, newh[7], h[4], ver=1, supportSel=supportSel);
+            }
+            rotate([0, h[1], 0]) {                        // rotate following elements by angle2 about Y
+                if (newh[6]) {
+                    bond(newh[0], bondRad, scal, newh[6], h[4], ver=1, supportSel=supportSel);  // h[4] is center atom (atom 2)
+                }
+                if (newh[4] && newh[6] != FemaleJoinBond) {   // if draw atom 2 and atom1-atom2 not joiner
+                    translate([0,0,newh[0]]) {
+                        difference() {
+                            atom(newh[2],scal * (newh[6] == SkinnyBond ? 0.7 : 1));  // put atom1 sphere len1 away on Z
+                            if (newh[6] == HBond) {  // make room for hbond magnet through atom
+                                translate([0,0,scal*hblen/2-magL-pClearance])
+                                    cylinder(h=magL+pClearance,r=magR+pClearance,$fn=8);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        if (split) {
+            // top / bottom half cutter
+            thick = 2*bondRadius * scal;
+            Zdim = newh[0];
+            Xdim = newh[1];
+
+            cside = 7* defaultAtomRadius * atomScale * scal / 12 + (caTop ? pClearance : -pClearance);
+            difference() {
+                translate([-Xdim,((rev || caTop) ? 0 : -thick),-Zdim]) {
+                    cube([2*Xdim,thick,2*Zdim]);
+                }
+                if (!caTop) {
+                    rotate([0,(rev ? h[1] : 0),0])
+                    rotate([45,0,0])
+                    cube([cside, cside, cside],center=true);
+                }
+            }
+            if (caTop) {
+                //translate([tx+cside,0,tx+cside])
+                    rotate([0,(rev ? h[1] : 0),0])
+                        rotate([45,0,0])
+                        cube([cside, cside, cside], center=true);
+            }
+        }
+
+        if (newh[7] == FemaleJoinBond) {  // female join
+            joiner(newh[1], scal, male=false, ver=1, supportSelect=supportSel);
+        }
+
+        if (newh[6] == FemaleJoinBond) {  // female join
+            rotate([0, h[1], 0]) {                        // rotate following elements by angle2 about Y
+            joiner(newh[0], scal, male=false, ver=1, supportSelect=supportSel);
+            translate([0,0,newh[0]])
+                atom(newh[2],scal+0.5,clearance);  // clearance for atom against join outer cylinder
+            }
+        }
+
+        if (newh[7] == FemaleJoinBond || newh[6] == FemaleJoinBond) {  // female join both hedron arms
+            translate([0,0,newh[1]]) atom(newh[3],scal+0.5,clearance);  // clearance for atom against join outer cylinder
+        }
+    }
+}
+
+/*
+//
+// Hook to call custom routines for specific hedra.
+//
+// Residue is h[h_residue]
+// Sequence position is h[h_seqpos]
+//
+*/
+module hedronDispatch(h,rev=0,scal) {
+    // default action is just to pass to hedron()
+
+    hedron(h, rev, scal, 0, (support ? 1 : 0));
+
+    /*
+    // Some examples for special handling for specific hedra below:
+
+    // caTop needs to be a global variable so hedron() above can see it.
+
+caBase1 = false;   // only make bottom of N_C-alpha_C hedron
+caBase2 = false;   // same as caBase1 but for case of reversed hedron (for testing, should be identical to caBase1 result)
+amideOnly = false; // make only the first amide
+
+    if (caTop) {
+        if (h[h_seqpos] == 1) {
+            if (h[h_class] == "NCAC") {
+                hedron(h, rev, scal, 1);
+            } else if (h[h_class] == "CBCAC") {
+                color("yellow") {  // ca-cb
+                    hedron(h, rev, scal);
+                }
+            }
+        }
+    } else if (caBase1) {
+        if (h[h_seqpos] == 1 && (h[h_class] == "NCAC")) {
+            hedron(h, rev, scal, true, (support ? 1 : 0));
+        }
+    } else if (caBase2) {
+        if (h[h_seqpos] == 5 && (h[h_class] == "NCAC")) {
+            hedron(h, rev, scal, true, (support ? 1 : 0));
+        }
+    } else if (amideOnly) {
+        if (h[h_seqpos] == 1) {
+            if (h[h_class] == "CACN") {
+                color("darkgray") {
+                    hedron(h, rev, scal);
+                }
+            }  else if (h[h_class] == "CACO") {
+                color("red") {   // c=o
+                    hedron(h, rev, scal);
+                }
+            }  else if (h[h_class] == "CNCA") {
+                color("cyan") {  // h=n
+                    hedron(h, rev, scal);
+                }
+            }
+        } else if ((h[h_seqpos] == 2) && (h[h_class] == "HNCA")) {
+            color("cyan") {  // h=n
+                hedron(h, rev, scal);
+            }
+        }
+       // actions above select out only a single hedron
+    } else {
+        // actions below will process hedra all but handle selected ones differently
+
+        if (h[h_class] == "NCAC") {
+            if (h[h_seqpos] == 1) {
+                if (! CCap && NCap) {  // make split rotatable bond for terminal NH3
+                    hedron(h, rev, scal, true, (support ? 1 : 0));
+                }
+            } else if (h[h_seqpos] == 5) {  // make split rotatable bond for terminal COOH
+                hedron(h, rev, scal, true, (support ? 2 : 0));  // note supportSel = 2
+            } else {
+                hedron(h, rev, scal, 0, (support ? 2 : 0));
+            }
+        } else if (h[h_class] == "CBCAC") {
+            color("yellow") {                     // ca-cb -- color yellow in OpenSCAD renderer
+                if (h[h_seqpos] == 1 ) {         // don't make here for N-term
+                } else if (h[h_seqpos] == 5 ) {  // don't make here for C-term
+                } else {
+                    hedron(h, rev, scal);       // otherwise do make here
+                }
+            }
+        } else if (h[h_class] == "HNCA") {
+            color("cyan") { // color h-n in OenSCAD renderer
+                if (h[h_seqpos] == 1) {
+                    if (NCap) {                      // only make at N term if variable NCap is true
+                        hedron(h, rev, scal, 0, (support ? 1 : 0));
+                    }
+                } else {
+                    hedron(h, rev, scal, 0, (support ? 1 : 0));
+                }
+            }
+        } else if (h[h_residue] == "P") {
+            color("darkgray")   // hightlight Prolines in OpenSCAD renderer
+                hedron(h, rev, scal);
+        } else {
+            echo("unrecognised hedron", h[h_class]);
+            color("pink")
+                hedron(h, rev, scal, 0, (support ? 1 : 0));
+        }
+    }
+    */
+}
+
+/*
+//
+// Generate a hedron rotated to specific angle d
+//
+*/
+module d2(d,hedra,scal)
+{
+    tz = (d[d_reversed] ? hedra[d[d_h2ndx]][2] : hedra[d[d_h2ndx]][0]);      // get h2 len1 depending on reversed
+    rotate(d[d_dangle1]) {                                                   // 4. rotate h2 to specified dihedral angle
+        translate([0,0,tz]) {                                               // 3. translate h2 h2:len1 up +z
+            rotate([180, 0, 0]) {                                          // 2. rotate h2r about X so h2:a3 in +z and h2:a1 in -z
+                hedronDispatch(hedra[d[d_h2ndx]],(!d[d_reversed]),scal);  // 1. reverse hedron 2 orientation = h2r
+            }
+        }
+    }
+}
+
+/*
+//
+// Generate two hedra at specified dihedral angle d
+//
+*/
+module dihedron(d,hedra,scal)
+{
+    if (d[d_h1new])
+        hedronDispatch(hedra[d[d_h1ndx]],d[d_reversed],scal);                // reverse h1 if dihedral reversed
+    if (d[d_h2new])
+        d2(d,hedra,scal);
+}
+
+/*
+//
+// Generate a residue consisting of the set of dihedra in the parameter 'r', referring to hedra the
+//   table speicified in the parameter 'hedra'.
+//
+*/
+module residue(r,hedra, scal)
+{
+    for (d = r) {
+        multmatrix(d[d_dihedralTransform]) {
+            dihedron(d, hedra, scal);
+        }
+    }
+}
+
+/*
+//
+// Generate a chain of residues, each positioned by a supplied rotation/translation matrix.
+//
+*/
+module chain(protein)
+{
+    chnD = protein[p_chainData];
+    c = chnD[c_residues];
+    dihedra = chnD[c_dihedra];
+    hedra = chnD[c_hedra];
+    for (r = c) {
+        multmatrix(r[r_resTransform]) {
+            residue(dihedra[r[r_resNdx]],hedra, protein[p_proteinScale]);
+        }
+    }
+}
+
+/*
+//
+// OpenSCAD array indices to reference protein data - tied to BioPython code
+//
+*/
+
+// protein base level
+p_pdbid = 0;
+p_proteinScale = 1;
+p_chainData = 2;
+
+// chain level data
+c_chainID = 0;
+c_dihedra = 1;
+c_hedra = 2;
+c_residues = 3;
+
+// hedra definitions
+h_len1 = 0;
+h_angle2 = 1;
+h_len3 = 2;
+h_atom1class = 3;
+h_atom2class = 4;
+h_atom3class = 5;
+h_atom1state = 6;
+h_atom2state = 7;
+h_atom3state = 8;
+h_bond1state = 9;
+h_bond2state = 10;
+h_residue = 11;
+h_seqpos = 12;  // residue sequence position for first atom in hedra
+h_class = 13;
+
+// dihedra specifications for each residue in sequence, dihedral array
+d_dangle1 = 0;
+d_h1ndx = 1;
+d_h2ndx = 2;
+d_reversed = 3;
+d_h1new = 4;
+d_h2new = 5;
+d_dihedralTransform = 6;
+
+// residueSet: world transform for each residue in sequence array
+r_resNdx = 0;
+r_resID = 1;
+r_resTransform = 2;
+
+
+// use single default atom radius for all atoms if tubes = true, else use covalent radii from literature
+atomData = ( tubes ?
+            [   ["Csb","green" , defaultAtomRadius], ["Cres","green" , defaultAtomRadius], ["Cdb","green" , defaultAtomRadius],
+                ["Osb","red" , defaultAtomRadius], ["Ores","red" , defaultAtomRadius], ["Odb","red" , defaultAtomRadius],
+                ["Nsb","blue" , defaultAtomRadius], ["Nres","blue" , defaultAtomRadius], ["Ndb","blue" , defaultAtomRadius],
+                ["Hsb","gray" , defaultAtomRadius],
+                ["Ssb","yellow" , defaultAtomRadius] ]
+            :
+
+// covalent radii from Heyrovska, Raji : 'Atomic Structures of all the Twenty Essential Amino Acids and a Tripeptide, with Bond Lengths as Sums of Atomic Covalent Radii'
+// https://arxiv.org/pdf/0804.2488.pdf
+
+            [   ["Csb","green" , 0.77], ["Cres","green" , 0.72], ["Cdb","green" , 0.67],
+                ["Osb","red" , 0.67], ["Ores","red" , 0.635], ["Odb","red" , 0.60],
+                ["Nsb","blue" , 0.70], ["Nres","blue" , 0.66], ["Ndb","blue" , 0.62],
+                ["Hsb","gray" , 0.37],
+                ["Ssb","yellow" , 1.04] ]
+    );
+
+
+// optionally include protein array data here [ write_SCAD(includeCode=False) ], e.g.:
+// include <1rtm.scad>;
+// or paste below
+
+"""
diff --git a/code/lib/Bio/PDB/Selection.py b/code/lib/Bio/PDB/Selection.py
new file mode 100644
index 0000000..00ac6fe
--- /dev/null
+++ b/code/lib/Bio/PDB/Selection.py
@@ -0,0 +1,84 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Selection of atoms, residues, etc."""
+
+
+import itertools
+
+from Bio.PDB.Atom import Atom
+from Bio.PDB.Entity import Entity
+from Bio.PDB.PDBExceptions import PDBException
+
+
+entity_levels = ["A", "R", "C", "M", "S"]
+
+
+def uniqueify(items):
+    """Return a list of the unique items in the given iterable.
+
+    Order is NOT preserved.
+    """
+    return list(set(items))
+
+
+def get_unique_parents(entity_list):
+    """Translate a list of entities to a list of their (unique) parents."""
+    unique_parents = {entity.get_parent() for entity in entity_list}
+    return list(unique_parents)
+
+
+def unfold_entities(entity_list, target_level):
+    """Unfold entities list to a child level (e.g. residues in chain).
+
+    Unfold a list of entities to a list of entities of another
+    level.  E.g.:
+
+    list of atoms -> list of residues
+    list of modules -> list of atoms
+    list of residues -> list of chains
+
+    - entity_list - list of entities or a single entity
+    - target_level - char (A, R, C, M, S)
+
+    Note that if entity_list is an empty list, you get an empty list back:
+
+    >>> unfold_entities([], "A")
+    []
+
+    """
+    if target_level not in entity_levels:
+        raise PDBException("%s: Not an entity level." % target_level)
+    if entity_list == []:
+        return []
+    if isinstance(entity_list, (Entity, Atom)):
+        entity_list = [entity_list]
+
+    level = entity_list[0].get_level()
+    if not all(entity.get_level() == level for entity in entity_list):
+        raise PDBException("Entity list is not homogeneous.")
+
+    target_index = entity_levels.index(target_level)
+    level_index = entity_levels.index(level)
+
+    if level_index == target_index:  # already right level
+        return entity_list
+
+    if level_index > target_index:  # we're going down, e.g. S->A
+        for i in range(target_index, level_index):
+            entity_list = itertools.chain.from_iterable(entity_list)
+    else:  # we're going up, e.g. A->S
+        for i in range(level_index, target_index):
+            # find unique parents
+            entity_list = {entity.get_parent() for entity in entity_list}
+    return list(entity_list)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/PDB/Structure.py b/code/lib/Bio/PDB/Structure.py
new file mode 100644
index 0000000..790a450
--- /dev/null
+++ b/code/lib/Bio/PDB/Structure.py
@@ -0,0 +1,66 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""The structure class, representing a macromolecular structure."""
+
+from Bio.PDB.Entity import Entity
+from Bio.PDB.internal_coords import IC_Chain
+
+
+class Structure(Entity):
+    """The Structure class contains a collection of Model instances."""
+
+    def __init__(self, id):
+        """Initialize the class."""
+        self.level = "S"
+        Entity.__init__(self, id)
+
+    def __repr__(self):
+        """Return the structure identifier."""
+        return "<Structure id=%s>" % self.get_id()
+
+    def get_models(self):
+        """Return models."""
+        yield from self
+
+    def get_chains(self):
+        """Return chains from models."""
+        for m in self.get_models():
+            yield from m
+
+    def get_residues(self):
+        """Return residues from chains."""
+        for c in self.get_chains():
+            yield from c
+
+    def get_atoms(self):
+        """Return atoms from residue."""
+        for r in self.get_residues():
+            yield from r
+
+    def atom_to_internal_coordinates(self, verbose: bool = False) -> None:
+        """Create/update internal coordinates from Atom X,Y,Z coordinates.
+
+        Internal coordinates are bond length, angle and dihedral angles.
+
+        :param verbose bool: default False
+            describe runtime problems
+
+        """
+        for chn in self.get_chains():
+            chn.atom_to_internal_coordinates(verbose)
+
+    def internal_to_atom_coordinates(self, verbose: bool = False) -> None:
+        """Create/update atom coordinates from internal coordinates.
+
+        :param verbose bool: default False
+            describe runtime problems
+
+        :raises Exception: if any chain does not have .pic attribute
+        """
+        for chn in self.get_chains():
+            chn.internal_to_atom_coordinates(verbose)
diff --git a/code/lib/Bio/PDB/StructureAlignment.py b/code/lib/Bio/PDB/StructureAlignment.py
new file mode 100644
index 0000000..572f704
--- /dev/null
+++ b/code/lib/Bio/PDB/StructureAlignment.py
@@ -0,0 +1,97 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Map residues of two structures to each other based on a FASTA alignment."""
+
+
+from Bio.Data import SCOPData
+
+from Bio.PDB import Selection
+from Bio.PDB.Polypeptide import is_aa
+
+
+class StructureAlignment:
+    """Class to align two structures based on an alignment of their sequences."""
+
+    def __init__(self, fasta_align, m1, m2, si=0, sj=1):
+        """Initialize.
+
+        Attributes:
+         - fasta_align - Alignment object
+         - m1, m2 - two models
+         - si, sj - the sequences in the Alignment object that
+           correspond to the structures
+
+        """
+        length = fasta_align.get_alignment_length()
+        # Get the residues in the models
+        rl1 = Selection.unfold_entities(m1, "R")
+        rl2 = Selection.unfold_entities(m2, "R")
+        # Residue positions
+        p1 = 0
+        p2 = 0
+        # Map equivalent residues to each other
+        map12 = {}
+        map21 = {}
+        # List of residue pairs (None if -)
+        duos = []
+        for i in range(length):
+            column = fasta_align[:, i]
+            aa1 = column[si]
+            aa2 = column[sj]
+            if aa1 != "-":
+                # Position in seq1 is not -
+                while True:
+                    # Loop until an aa is found
+                    r1 = rl1[p1]
+                    p1 = p1 + 1
+                    if is_aa(r1):
+                        break
+                self._test_equivalence(r1, aa1)
+            else:
+                r1 = None
+            if aa2 != "-":
+                # Position in seq2 is not -
+                while True:
+                    # Loop until an aa is found
+                    r2 = rl2[p2]
+                    p2 = p2 + 1
+                    if is_aa(r2):
+                        break
+                self._test_equivalence(r2, aa2)
+            else:
+                r2 = None
+            if r1:
+                # Map residue in seq1 to its equivalent in seq2
+                map12[r1] = r2
+            if r2:
+                # Map residue in seq2 to its equivalent in seq1
+                map21[r2] = r1
+            # Append aligned pair (r is None if gap)
+            duos.append((r1, r2))
+        self.map12 = map12
+        self.map21 = map21
+        self.duos = duos
+
+    def _test_equivalence(self, r1, aa1):
+        """Test if aa in sequence fits aa in structure (PRIVATE)."""
+        resname = r1.get_resname()
+        resname = SCOPData.protein_letters_3to1[resname]
+        assert aa1 == resname
+
+    def get_maps(self):
+        """Map residues between the structures.
+
+        Return two dictionaries that map a residue in one structure to
+        the equivealent residue in the other structure.
+        """
+        return self.map12, self.map21
+
+    def get_iterator(self):
+        """Create an iterator over all residue pairs."""
+        for i in range(0, len(self.duos)):
+            yield self.duos[i]
diff --git a/code/lib/Bio/PDB/StructureBuilder.py b/code/lib/Bio/PDB/StructureBuilder.py
new file mode 100644
index 0000000..f9f3746
--- /dev/null
+++ b/code/lib/Bio/PDB/StructureBuilder.py
@@ -0,0 +1,313 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Consumer class that builds a Structure object.
+
+This is used by the PDBParser and MMCIFparser classes.
+"""
+
+import warnings
+
+# SMCRA hierarchy
+from Bio.PDB.Structure import Structure
+from Bio.PDB.Model import Model
+from Bio.PDB.Chain import Chain
+from Bio.PDB.Residue import Residue, DisorderedResidue
+from Bio.PDB.Atom import Atom, DisorderedAtom
+
+from Bio.PDB.PDBExceptions import PDBConstructionException
+from Bio.PDB.PDBExceptions import PDBConstructionWarning
+
+
+class StructureBuilder:
+    """Deals with constructing the Structure object.
+
+    The StructureBuilder class is used by the PDBParser classes to
+    translate a file to a Structure object.
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.line_counter = 0
+        self.header = {}
+
+    def _is_completely_disordered(self, residue):
+        """Return 1 if all atoms in the residue have a non blank altloc (PRIVATE)."""
+        atom_list = residue.get_unpacked_list()
+        for atom in atom_list:
+            altloc = atom.get_altloc()
+            if altloc == " ":
+                return 0
+        return 1
+
+    # Public methods called by the Parser classes
+
+    def set_header(self, header):
+        """Set header."""
+        self.header = header
+
+    def set_line_counter(self, line_counter):
+        """Tracks line in the PDB file that is being parsed.
+
+        Arguments:
+         - line_counter - int
+
+        """
+        self.line_counter = line_counter
+
+    def init_structure(self, structure_id):
+        """Initialize a new Structure object with given id.
+
+        Arguments:
+         - id - string
+
+        """
+        self.structure = Structure(structure_id)
+
+    def init_model(self, model_id, serial_num=None):
+        """Create a new Model object with given id.
+
+        Arguments:
+         - id - int
+         - serial_num - int
+
+        """
+        self.model = Model(model_id, serial_num)
+        self.structure.add(self.model)
+
+    def init_chain(self, chain_id):
+        """Create a new Chain object with given id.
+
+        Arguments:
+         - chain_id - string
+
+        """
+        if self.model.has_id(chain_id):
+            self.chain = self.model[chain_id]
+            warnings.warn(
+                "WARNING: Chain %s is discontinuous at line %i."
+                % (chain_id, self.line_counter),
+                PDBConstructionWarning,
+            )
+        else:
+            self.chain = Chain(chain_id)
+            self.model.add(self.chain)
+
+    def init_seg(self, segid):
+        """Flag a change in segid.
+
+        Arguments:
+         - segid - string
+
+        """
+        self.segid = segid
+
+    def init_residue(self, resname, field, resseq, icode):
+        """Create a new Residue object.
+
+        Arguments:
+         - resname - string, e.g. "ASN"
+         - field - hetero flag, "W" for waters, "H" for
+           hetero residues, otherwise blank.
+         - resseq - int, sequence identifier
+         - icode - string, insertion code
+
+        """
+        if field != " ":
+            if field == "H":
+                # The hetero field consists of H_ + the residue name (e.g. H_FUC)
+                field = "H_" + resname
+        res_id = (field, resseq, icode)
+        if field == " ":
+            if self.chain.has_id(res_id):
+                # There already is a residue with the id (field, resseq, icode).
+                # This only makes sense in the case of a point mutation.
+                warnings.warn(
+                    "WARNING: Residue ('%s', %i, '%s') redefined at line %i."
+                    % (field, resseq, icode, self.line_counter),
+                    PDBConstructionWarning,
+                )
+                duplicate_residue = self.chain[res_id]
+                if duplicate_residue.is_disordered() == 2:
+                    # The residue in the chain is a DisorderedResidue object.
+                    # So just add the last Residue object.
+                    if duplicate_residue.disordered_has_id(resname):
+                        # The residue was already made
+                        self.residue = duplicate_residue
+                        duplicate_residue.disordered_select(resname)
+                    else:
+                        # Make a new residue and add it to the already
+                        # present DisorderedResidue
+                        new_residue = Residue(res_id, resname, self.segid)
+                        duplicate_residue.disordered_add(new_residue)
+                        self.residue = duplicate_residue
+                        return
+                else:
+                    if resname == duplicate_residue.resname:
+                        warnings.warn(
+                            "WARNING: Residue ('%s', %i, '%s','%s') already defined "
+                            "with the same name at line  %i."
+                            % (field, resseq, icode, resname, self.line_counter),
+                            PDBConstructionWarning,
+                        )
+                        self.residue = duplicate_residue
+                        return
+                    # Make a new DisorderedResidue object and put all
+                    # the Residue objects with the id (field, resseq, icode) in it.
+                    # These residues each should have non-blank altlocs for all their atoms.
+                    # If not, the PDB file probably contains an error.
+                    if not self._is_completely_disordered(duplicate_residue):
+                        # if this exception is ignored, a residue will be missing
+                        self.residue = None
+                        raise PDBConstructionException(
+                            "Blank altlocs in duplicate residue %s ('%s', %i, '%s')"
+                            % (resname, field, resseq, icode)
+                        )
+                    self.chain.detach_child(res_id)
+                    new_residue = Residue(res_id, resname, self.segid)
+                    disordered_residue = DisorderedResidue(res_id)
+                    self.chain.add(disordered_residue)
+                    disordered_residue.disordered_add(duplicate_residue)
+                    disordered_residue.disordered_add(new_residue)
+                    self.residue = disordered_residue
+                    return
+        self.residue = Residue(res_id, resname, self.segid)
+        self.chain.add(self.residue)
+
+    def init_atom(
+        self,
+        name,
+        coord,
+        b_factor,
+        occupancy,
+        altloc,
+        fullname,
+        serial_number=None,
+        element=None,
+        pqr_charge=None,
+        radius=None,
+        is_pqr=False,
+    ):
+        """Create a new Atom object.
+
+        Arguments:
+         - name - string, atom name, e.g. CA, spaces should be stripped
+         - coord - Numeric array (Float0, size 3), atomic coordinates
+         - b_factor - float, B factor
+         - occupancy - float
+         - altloc - string, alternative location specifier
+         - fullname - string, atom name including spaces, e.g. " CA "
+         - element - string, upper case, e.g. "HG" for mercury
+         - pqr_charge - float, atom charge (PQR format)
+         - radius - float, atom radius (PQR format)
+         - is_pqr - boolean, flag to specify if a .pqr file is being parsed
+
+        """
+        residue = self.residue
+        # if residue is None, an exception was generated during
+        # the construction of the residue
+        if residue is None:
+            return
+        # First check if this atom is already present in the residue.
+        # If it is, it might be due to the fact that the two atoms have atom
+        # names that differ only in spaces (e.g. "CA.." and ".CA.",
+        # where the dots are spaces). If that is so, use all spaces
+        # in the atom name of the current atom.
+        if residue.has_id(name):
+            duplicate_atom = residue[name]
+            # atom name with spaces of duplicate atom
+            duplicate_fullname = duplicate_atom.get_fullname()
+            if duplicate_fullname != fullname:
+                # name of current atom now includes spaces
+                name = fullname
+                warnings.warn(
+                    "Atom names %r and %r differ only in spaces at line %i."
+                    % (duplicate_fullname, fullname, self.line_counter),
+                    PDBConstructionWarning,
+                )
+        if not is_pqr:
+            self.atom = Atom(
+                name,
+                coord,
+                b_factor,
+                occupancy,
+                altloc,
+                fullname,
+                serial_number,
+                element,
+            )
+        elif is_pqr:
+            self.atom = Atom(
+                name,
+                coord,
+                None,
+                None,
+                altloc,
+                fullname,
+                serial_number,
+                element,
+                pqr_charge,
+                radius,
+            )
+        if altloc != " ":
+            # The atom is disordered
+            if residue.has_id(name):
+                # Residue already contains this atom
+                duplicate_atom = residue[name]
+                if duplicate_atom.is_disordered() == 2:
+                    duplicate_atom.disordered_add(self.atom)
+                else:
+                    # This is an error in the PDB file:
+                    # a disordered atom is found with a blank altloc
+                    # Detach the duplicate atom, and put it in a
+                    # DisorderedAtom object together with the current
+                    # atom.
+                    residue.detach_child(name)
+                    disordered_atom = DisorderedAtom(name)
+                    residue.add(disordered_atom)
+                    disordered_atom.disordered_add(self.atom)
+                    disordered_atom.disordered_add(duplicate_atom)
+                    residue.flag_disordered()
+                    warnings.warn(
+                        "WARNING: disordered atom found with blank altloc before "
+                        "line %i.\n" % self.line_counter,
+                        PDBConstructionWarning,
+                    )
+            else:
+                # The residue does not contain this disordered atom
+                # so we create a new one.
+                disordered_atom = DisorderedAtom(name)
+                residue.add(disordered_atom)
+                # Add the real atom to the disordered atom, and the
+                # disordered atom to the residue
+                disordered_atom.disordered_add(self.atom)
+                residue.flag_disordered()
+        else:
+            # The atom is not disordered
+            residue.add(self.atom)
+
+    def set_anisou(self, anisou_array):
+        """Set anisotropic B factor of current Atom."""
+        self.atom.set_anisou(anisou_array)
+
+    def set_siguij(self, siguij_array):
+        """Set standard deviation of anisotropic B factor of current Atom."""
+        self.atom.set_siguij(siguij_array)
+
+    def set_sigatm(self, sigatm_array):
+        """Set standard deviation of atom position of current Atom."""
+        self.atom.set_sigatm(sigatm_array)
+
+    def get_structure(self):
+        """Return the structure."""
+        # first sort everything
+        # self.structure.sort()
+        # Add the header dict
+        self.structure.header = self.header
+        return self.structure
+
+    def set_symmetry(self, spacegroup, cell):
+        """Set symmetry."""
+        pass
diff --git a/code/lib/Bio/PDB/Superimposer.py b/code/lib/Bio/PDB/Superimposer.py
new file mode 100644
index 0000000..ddb46b8
--- /dev/null
+++ b/code/lib/Bio/PDB/Superimposer.py
@@ -0,0 +1,57 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Superimpose two structures."""
+
+
+import numpy
+
+from Bio.SVDSuperimposer import SVDSuperimposer
+from Bio.PDB.PDBExceptions import PDBException
+
+
+class Superimposer:
+    """Rotate/translate one set of atoms on top of another to minimize RMSD."""
+
+    def __init__(self):
+        """Initialize the class."""
+        self.rotran = None
+        self.rms = None
+
+    def set_atoms(self, fixed, moving):
+        """Prepare translation/rotation to minimize RMSD between atoms.
+
+        Put (translate/rotate) the atoms in fixed on the atoms in
+        moving, in such a way that the RMSD is minimized.
+
+        :param fixed: list of (fixed) atoms
+        :param moving: list of (moving) atoms
+        :type fixed,moving: [L{Atom}, L{Atom},...]
+        """
+        if not len(fixed) == len(moving):
+            raise PDBException("Fixed and moving atom lists differ in size")
+        length = len(fixed)
+        fixed_coord = numpy.zeros((length, 3))
+        moving_coord = numpy.zeros((length, 3))
+        for i in range(0, length):
+            fixed_coord[i] = fixed[i].get_coord()
+            moving_coord[i] = moving[i].get_coord()
+        sup = SVDSuperimposer()
+        sup.set(fixed_coord, moving_coord)
+        sup.run()
+        self.rms = sup.get_rms()
+        self.rotran = sup.get_rotran()
+
+    def apply(self, atom_list):
+        """Rotate/translate a list of atoms."""
+        if self.rotran is None:
+            raise PDBException("No transformation has been calculated yet")
+        rot, tran = self.rotran
+        rot = rot.astype("f")
+        tran = tran.astype("f")
+        for atom in atom_list:
+            atom.transform(rot, tran)
diff --git a/code/lib/Bio/PDB/__init__.py b/code/lib/Bio/PDB/__init__.py
new file mode 100644
index 0000000..b41c89e
--- /dev/null
+++ b/code/lib/Bio/PDB/__init__.py
@@ -0,0 +1,90 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes that deal with macromolecular crystal structures.
+
+Includes: PDB and mmCIF parsers, a Structure class, a module to keep a local
+copy of the PDB up-to-date, selective IO of PDB files, etc.
+
+Original Author: Thomas Hamelryck.
+Contributions by:
+- Peter Cock
+- Joe Greener
+- Rob Miller
+- Lenna X. Peterson
+- Joao Rodrigues
+- Kristian Rother
+- Eric Talevich
+- and many others.
+"""
+
+# Get a Structure object from a PDB file
+from .PDBParser import PDBParser
+
+from .MMCIFParser import MMCIFParser
+from .MMCIFParser import FastMMCIFParser
+
+# Download from the PDB
+from .PDBList import PDBList
+
+# Parse PDB header directly
+from .parse_pdb_header import parse_pdb_header
+
+# Find connected polypeptides in a Structure
+from .Polypeptide import PPBuilder, CaPPBuilder, is_aa, standard_aa_names
+
+# This is also useful :-)
+from Bio.Data.SCOPData import protein_letters_3to1
+
+# IO of PDB files (including flexible selective output)
+from .PDBIO import PDBIO, Select
+from .mmcifio import MMCIFIO
+
+# Some methods to eg. get a list of Residues
+# from a list of Atoms.
+from . import Selection
+
+# Superimpose atom sets
+from .Superimposer import Superimposer
+
+# 3D vector class
+from .vectors import Vector, calc_angle, calc_dihedral, refmat, rotmat, rotaxis
+from .vectors import vector_to_axis, m2rotaxis, rotaxis2m
+
+# Alignment module
+from .StructureAlignment import StructureAlignment
+
+# DSSP handle
+# (secondary structure and solvent accessible area calculation)
+from .DSSP import DSSP, make_dssp_dict
+
+# Residue depth:
+# distance of residue atoms from solvent accessible surface
+from .ResidueDepth import ResidueDepth, get_surface
+
+# Calculation of Half Sphere Solvent Exposure
+from .HSExposure import HSExposureCA, HSExposureCB, ExposureCN
+
+# Kolodny et al.'s backbone libraries
+from .FragmentMapper import FragmentMapper
+
+# Write out chain(start-end) to PDB file
+from .Dice import extract
+
+# Fast atom neighbor search
+# Depends on kdtrees C module
+try:
+    from .NeighborSearch import NeighborSearch
+except ImportError:
+    pass
+
+# Native Shrake-Rupley algorithm for SASA calculations.
+# Depends on kdtrees C module
+try:
+    from .SASA import ShrakeRupley
+except ImportError:
+    pass
diff --git a/code/lib/Bio/PDB/__pycache__/AbstractPropertyMap.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/AbstractPropertyMap.cpython-37.pyc
new file mode 100644
index 0000000..65c14d4
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/AbstractPropertyMap.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/Atom.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/Atom.cpython-37.pyc
new file mode 100644
index 0000000..fa5acd8
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/Atom.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/Chain.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/Chain.cpython-37.pyc
new file mode 100644
index 0000000..96be494
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/Chain.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/DSSP.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/DSSP.cpython-37.pyc
new file mode 100644
index 0000000..18843ec
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/DSSP.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/Dice.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/Dice.cpython-37.pyc
new file mode 100644
index 0000000..64d4dd3
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/Dice.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/Entity.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/Entity.cpython-37.pyc
new file mode 100644
index 0000000..0ad88a9
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/Entity.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/FragmentMapper.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/FragmentMapper.cpython-37.pyc
new file mode 100644
index 0000000..8961f64
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/FragmentMapper.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/HSExposure.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/HSExposure.cpython-37.pyc
new file mode 100644
index 0000000..ab04829
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/HSExposure.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/MMCIF2Dict.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/MMCIF2Dict.cpython-37.pyc
new file mode 100644
index 0000000..e726b85
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/MMCIF2Dict.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/MMCIFParser.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/MMCIFParser.cpython-37.pyc
new file mode 100644
index 0000000..6059748
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/MMCIFParser.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/Model.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/Model.cpython-37.pyc
new file mode 100644
index 0000000..7c0000c
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/Model.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/NACCESS.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/NACCESS.cpython-37.pyc
new file mode 100644
index 0000000..16e8943
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/NACCESS.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/NeighborSearch.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/NeighborSearch.cpython-37.pyc
new file mode 100644
index 0000000..76143e8
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/NeighborSearch.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/PDBExceptions.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/PDBExceptions.cpython-37.pyc
new file mode 100644
index 0000000..55d7892
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/PDBExceptions.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/PDBIO.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/PDBIO.cpython-37.pyc
new file mode 100644
index 0000000..e537f5e
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/PDBIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/PDBList.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/PDBList.cpython-37.pyc
new file mode 100644
index 0000000..07677b4
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/PDBList.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/PDBParser.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/PDBParser.cpython-37.pyc
new file mode 100644
index 0000000..611f959
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/PDBParser.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/PICIO.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/PICIO.cpython-37.pyc
new file mode 100644
index 0000000..7cc5fe4
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/PICIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/PSEA.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/PSEA.cpython-37.pyc
new file mode 100644
index 0000000..fcdad1e
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/PSEA.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/Polypeptide.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/Polypeptide.cpython-37.pyc
new file mode 100644
index 0000000..5a48c08
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/Polypeptide.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/Residue.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/Residue.cpython-37.pyc
new file mode 100644
index 0000000..eb9d749
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/Residue.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/ResidueDepth.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/ResidueDepth.cpython-37.pyc
new file mode 100644
index 0000000..8d45e8c
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/ResidueDepth.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/SASA.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/SASA.cpython-37.pyc
new file mode 100644
index 0000000..9167a8d
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/SASA.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/SCADIO.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/SCADIO.cpython-37.pyc
new file mode 100644
index 0000000..1e1dbbc
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/SCADIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/Selection.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/Selection.cpython-37.pyc
new file mode 100644
index 0000000..6fa1c51
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/Selection.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/Structure.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/Structure.cpython-37.pyc
new file mode 100644
index 0000000..7e2841e
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/Structure.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/StructureAlignment.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/StructureAlignment.cpython-37.pyc
new file mode 100644
index 0000000..530fa4e
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/StructureAlignment.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/StructureBuilder.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/StructureBuilder.cpython-37.pyc
new file mode 100644
index 0000000..a29ec05
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/StructureBuilder.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/Superimposer.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/Superimposer.cpython-37.pyc
new file mode 100644
index 0000000..89e0840
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/Superimposer.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..48c0b80
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/ic_data.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/ic_data.cpython-37.pyc
new file mode 100644
index 0000000..4d9a402
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/ic_data.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/ic_rebuild.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/ic_rebuild.cpython-37.pyc
new file mode 100644
index 0000000..8ae983a
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/ic_rebuild.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/internal_coords.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/internal_coords.cpython-37.pyc
new file mode 100644
index 0000000..5d33677
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/internal_coords.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/mmcifio.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/mmcifio.cpython-37.pyc
new file mode 100644
index 0000000..3e257a2
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/mmcifio.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/parse_pdb_header.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/parse_pdb_header.cpython-37.pyc
new file mode 100644
index 0000000..ef2306b
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/parse_pdb_header.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/__pycache__/vectors.cpython-37.pyc b/code/lib/Bio/PDB/__pycache__/vectors.cpython-37.pyc
new file mode 100644
index 0000000..3bc0304
Binary files /dev/null and b/code/lib/Bio/PDB/__pycache__/vectors.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/ic_data.py b/code/lib/Bio/PDB/ic_data.py
new file mode 100644
index 0000000..c328f5e
--- /dev/null
+++ b/code/lib/Bio/PDB/ic_data.py
@@ -0,0 +1,500 @@
+# Copyright 2019 by Robert T. Miller.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Per residue backbone and sidechain hedra and dihedra definitions.
+
+Listed in order of output for internal coordinates (.pic) output file.
+Require sufficient overlap to link all defined dihedra.  Entries in these
+tables without corresponding atom coordinates are ignored.
+
+<http://www.imgt.org/IMGTeducation/Aide-memoire/_UK/aminoacids/formuleAA/>
+for naming of individual atoms
+"""
+
+# Backbone hedra and dihedra - within residue, no next or prev.
+ic_data_backbone = (
+    ("N", "CA", "C", "O"),  # locate backbone O
+    ("O", "C", "CA", "CB"),  # locate CB
+    ("CA", "C", "O"),
+    ("CB", "CA", "C"),
+    ("CA", "C", "OXT"),  # OXT if present
+    ("N", "CA", "C", "OXT"),
+    ("H", "N", "CA"),  # amide proton if present
+    ("C", "CA", "N", "H"),
+    ("HA", "CA", "C"),  # CA proton
+    ("O", "C", "CA", "HA"),
+    ("HA2", "CA", "C"),  # gly CA proton
+    ("O", "C", "CA", "HA2"),
+    ("HA3", "CA", "C"),  # gly CA proton
+    ("O", "C", "CA", "HA3"),
+    ("N", "CA", "CB"),
+    ("N", "CA", "CB", "HB"),  # CB protons
+    ("N", "CA", "CB", "HB1"),
+    ("N", "CA", "CB", "HB2"),
+    ("N", "CA", "CB", "HB3"),
+    ("CA", "CB", "HB"),
+    ("CA", "CB", "HB1"),
+    ("CA", "CB", "HB2"),
+    ("CA", "CB", "HB3"),
+    ("H1", "N", "CA"),  # chain start protons
+    ("H2", "N", "CA"),
+    ("H3", "N", "CA"),
+    ("C", "CA", "N", "H1"),
+    ("C", "CA", "N", "H2"),
+    ("C", "CA", "N", "H3"),
+)
+
+# Sidechain hedra and dihedra.
+ic_data_sidechains = {
+    "V": (
+        ("CA", "CB", "CG1"),
+        ("N", "CA", "CB", "CG1", "chi1"),  # chi1
+        ("CA", "CB", "CG2"),
+        ("N", "CA", "CB", "CG2"),
+        ("CB", "CG1", "HG11"),
+        ("CB", "CG1", "HG12"),
+        ("CB", "CG1", "HG13"),
+        ("CB", "CG2", "HG21"),
+        ("CB", "CG2", "HG22"),
+        ("CB", "CG2", "HG23"),
+        ("CA", "CB", "CG1", "HG11"),
+        ("CA", "CB", "CG1", "HG12"),
+        ("CA", "CB", "CG1", "HG13"),
+        ("CA", "CB", "CG2", "HG21"),
+        ("CA", "CB", "CG2", "HG22"),
+        ("CA", "CB", "CG2", "HG23"),
+    ),
+    "L": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "CD1"),
+        ("CA", "CB", "CG", "CD1", "chi2"),  # chi2
+        ("CB", "CG", "CD2"),
+        ("CA", "CB", "CG", "CD2"),
+        ("CB", "CG", "HG"),
+        ("CA", "CB", "CG", "HG"),
+        ("CG", "CD1", "HD11"),
+        ("CG", "CD1", "HD12"),
+        ("CG", "CD1", "HD13"),
+        ("CG", "CD2", "HD21"),
+        ("CG", "CD2", "HD22"),
+        ("CG", "CD2", "HD23"),
+        ("CB", "CG", "CD1", "HD11"),
+        ("CB", "CG", "CD1", "HD12"),
+        ("CB", "CG", "CD1", "HD13"),
+        ("CB", "CG", "CD2", "HD21"),
+        ("CB", "CG", "CD2", "HD22"),
+        ("CB", "CG", "CD2", "HD23"),
+    ),
+    "I": (
+        ("CA", "CB", "CG1"),
+        ("N", "CA", "CB", "CG1", "chi1"),  # chi1
+        ("CB", "CG1", "CD1"),
+        ("CA", "CB", "CG1", "CD1", "chi2"),  # chi2
+        ("CA", "CB", "CG2"),
+        ("N", "CA", "CB", "CG2"),
+        ("CB", "CG1", "HG12"),
+        ("CB", "CG1", "HG13"),
+        ("CB", "CG2", "HG21"),
+        ("CB", "CG2", "HG22"),
+        ("CB", "CG2", "HG23"),
+        ("CA", "CB", "CG1", "HG12"),
+        ("CA", "CB", "CG1", "HG13"),
+        ("CA", "CB", "CG2", "HG21"),
+        ("CA", "CB", "CG2", "HG22"),
+        ("CA", "CB", "CG2", "HG23"),
+        ("CG1", "CD1", "HD11"),
+        ("CG1", "CD1", "HD12"),
+        ("CG1", "CD1", "HD13"),
+        ("CB", "CG1", "CD1", "HD11"),
+        ("CB", "CG1", "CD1", "HD12"),
+        ("CB", "CG1", "CD1", "HD13"),
+    ),
+    "M": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "SD"),
+        ("CA", "CB", "CG", "SD", "chi2"),  # chi2
+        ("CG", "SD", "CE"),
+        ("CB", "CG", "SD", "CE", "chi3"),  # chi3
+        ("CB", "CG", "HG2"),
+        ("CB", "CG", "HG3"),
+        ("CA", "CB", "CG", "HG2"),
+        ("CA", "CB", "CG", "HG3"),
+        ("SD", "CE", "HE1"),
+        ("SD", "CE", "HE2"),
+        ("SD", "CE", "HE3"),
+        ("CG", "SD", "CE", "HE1"),
+        ("CG", "SD", "CE", "HE2"),
+        ("CG", "SD", "CE", "HE3"),
+    ),
+    "F": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "CD1"),
+        ("CA", "CB", "CG", "CD1", "chi2"),  # chi2
+        ("CG", "CD1", "CE1"),
+        ("CB", "CG", "CD1", "CE1"),
+        ("CD1", "CE1", "CZ"),
+        ("CG", "CD1", "CE1", "CZ"),
+        ("CB", "CG", "CD2"),
+        ("CA", "CB", "CG", "CD2"),
+        ("CG", "CD2", "CE2"),
+        ("CB", "CG", "CD2", "CE2"),
+        ("CG", "CD1", "HD1"),
+        ("CB", "CG", "CD1", "HD1"),
+        ("CG", "CD2", "HD2"),
+        ("CB", "CG", "CD2", "HD2"),
+        ("CD1", "CE1", "HE1"),
+        ("CG", "CD1", "CE1", "HE1"),
+        ("CD2", "CE2", "HE2"),
+        ("CG", "CD2", "CE2", "HE2"),
+        ("CE1", "CZ", "HZ"),
+        ("CD1", "CE1", "CZ", "HZ"),
+    ),
+    "P": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "CD"),
+        ("CA", "CB", "CG", "CD", "chi2"),  # chi2
+        ("CB", "CG", "HG2"),
+        ("CB", "CG", "HG3"),
+        ("CA", "CB", "CG", "HG2"),
+        ("CA", "CB", "CG", "HG3"),
+        ("CG", "CD", "HD2"),
+        ("CG", "CD", "HD3"),
+        ("CB", "CG", "CD", "HD2"),
+        ("CB", "CG", "CD", "HD3"),
+    ),
+    "S": (
+        ("CA", "CB", "OG"),
+        ("N", "CA", "CB", "OG", "chi1"),  # chi1
+        ("CB", "OG", "HG"),
+        ("CA", "CB", "OG", "HG"),
+    ),
+    "T": (
+        ("CA", "CB", "OG1"),
+        ("N", "CA", "CB", "OG1", "chi1"),  # chi1
+        ("CA", "CB", "CG2"),
+        ("N", "CA", "CB", "CG2"),
+        ("CB", "OG1", "HG1"),
+        ("CA", "CB", "OG1", "HG1"),
+        ("CB", "CG2", "HG21"),
+        ("CB", "CG2", "HG22"),
+        ("CB", "CG2", "HG23"),
+        ("CA", "CB", "CG2", "HG21"),
+        ("CA", "CB", "CG2", "HG22"),
+        ("CA", "CB", "CG2", "HG23"),
+    ),
+    "C": (
+        ("CA", "CB", "SG"),
+        ("N", "CA", "CB", "SG", "chi1"),  # chi1
+        ("CB", "SG", "HG"),
+        ("CA", "CB", "SG", "HG"),
+    ),
+    "N": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "OD1"),
+        ("CA", "CB", "CG", "OD1", "chi2"),  # chi2
+        ("CB", "CG", "ND2"),
+        ("CA", "CB", "CG", "ND2"),
+        ("CG", "ND2", "HD21"),
+        ("CG", "ND2", "HD22"),
+        ("CB", "CG", "ND2", "HD21"),
+        ("CB", "CG", "ND2", "HD22"),
+    ),
+    "Q": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "CD"),
+        ("CA", "CB", "CG", "CD", "chi2"),  # chi2
+        ("CG", "CD", "OE1"),
+        ("CB", "CG", "CD", "OE1", "chi3"),  # chi3
+        ("CG", "CD", "NE2"),
+        ("CB", "CG", "CD", "NE2"),
+        ("CB", "CG", "HG2"),
+        ("CB", "CG", "HG3"),
+        ("CA", "CB", "CG", "HG2"),
+        ("CA", "CB", "CG", "HG3"),
+        ("CD", "NE2", "HE21"),
+        ("CD", "NE2", "HE22"),
+        ("CG", "CD", "NE2", "HE21"),
+        ("CG", "CD", "NE2", "HE22"),
+    ),
+    "Y": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "CD1"),
+        ("CA", "CB", "CG", "CD1", "chi2"),  # chi2
+        ("CG", "CD1", "CE1"),
+        ("CB", "CG", "CD1", "CE1"),
+        ("CD1", "CE1", "CZ"),
+        ("CG", "CD1", "CE1", "CZ"),
+        ("CE1", "CZ", "OH"),
+        ("CD1", "CE1", "CZ", "OH"),
+        ("CB", "CG", "CD2"),
+        ("CA", "CB", "CG", "CD2"),
+        ("CG", "CD2", "CE2"),
+        ("CB", "CG", "CD2", "CE2"),
+        ("CG", "CD1", "HD1"),
+        ("CB", "CG", "CD1", "HD1"),
+        ("CG", "CD2", "HD2"),
+        ("CB", "CG", "CD2", "HD2"),
+        ("CD1", "CE1", "HE1"),
+        ("CG", "CD1", "CE1", "HE1"),
+        ("CD2", "CE2", "HE2"),
+        ("CG", "CD2", "CE2", "HE2"),
+        ("CZ", "OH", "HH"),
+        ("CE1", "CZ", "OH", "HH"),
+    ),
+    "W": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "CD1"),
+        ("CA", "CB", "CG", "CD1", "chi2"),  # chi2
+        ("CG", "CD1", "NE1"),
+        ("CB", "CG", "CD1", "NE1"),
+        ("CB", "CG", "CD2"),
+        ("CA", "CB", "CG", "CD2"),
+        ("CG", "CD2", "CE2"),
+        ("CB", "CG", "CD2", "CE2"),
+        ("CD2", "CE2", "CZ2"),
+        ("CG", "CD2", "CE2", "CZ2"),
+        ("CE2", "CZ2", "CH2"),
+        ("CD2", "CE2", "CZ2", "CH2"),
+        ("CE2", "CZ2", "HZ2"),
+        ("CD2", "CE2", "CZ2", "HZ2"),
+        ("CG", "CD2", "CE3"),
+        ("CB", "CG", "CD2", "CE3"),
+        ("CZ2", "CH2", "CZ3"),
+        ("CE2", "CZ2", "CH2", "CZ3"),
+        ("CG", "CD1", "HD1"),
+        ("CB", "CG", "CD1", "HD1"),
+        ("CD1", "NE1", "HE1"),
+        ("CG", "CD1", "NE1", "HE1"),
+        ("CD2", "CE3", "HE3"),
+        ("CG", "CD2", "CE3", "HE3"),
+        ("CH2", "CZ3", "HZ3"),
+        ("CZ2", "CH2", "CZ3", "HZ3"),
+        ("CZ2", "CH2", "HH2"),
+        ("CE2", "CZ2", "CH2", "HH2"),
+    ),
+    "D": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "OD1"),
+        ("CA", "CB", "CG", "OD1", "chi2"),  # chi2
+        ("CB", "CG", "OD2"),
+        ("CA", "CB", "CG", "OD2"),
+    ),
+    "E": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "CD"),
+        ("CA", "CB", "CG", "CD", "chi2"),  # chi2
+        ("CG", "CD", "OE1"),
+        ("CB", "CG", "CD", "OE1", "chi3"),  # chi3
+        ("CG", "CD", "OE2"),
+        ("CB", "CG", "CD", "OE2"),
+        ("CB", "CG", "HG2"),
+        ("CB", "CG", "HG3"),
+        ("CA", "CB", "CG", "HG2"),
+        ("CA", "CB", "CG", "HG3"),
+    ),
+    "H": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "ND1"),
+        ("CA", "CB", "CG", "ND1", "chi2"),  # chi2
+        ("CG", "ND1", "CE1"),
+        ("CB", "CG", "ND1", "CE1"),
+        ("CB", "CG", "CD2"),
+        ("CA", "CB", "CG", "CD2"),
+        ("CG", "CD2", "NE2"),
+        ("CB", "CG", "CD2", "NE2"),
+        ("CG", "ND1", "HD1"),
+        ("CB", "CG", "ND1", "HD1"),
+        ("CG", "CD2", "HD2"),
+        ("CB", "CG", "CD2", "HD2"),
+        ("ND1", "CE1", "HE1"),
+        ("CG", "ND1", "CE1", "HE1"),
+        ("CD2", "NE2", "HE2"),
+        ("CG", "CD2", "NE2", "HE2"),
+    ),
+    "K": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "CD"),
+        ("CA", "CB", "CG", "CD", "chi2"),  # chi2
+        ("CG", "CD", "CE"),
+        ("CB", "CG", "CD", "CE", "chi3"),  # chi3
+        ("CD", "CE", "NZ"),
+        ("CG", "CD", "CE", "NZ", "chi4"),  # chi4
+        ("CB", "CG", "HG2"),
+        ("CB", "CG", "HG3"),
+        ("CA", "CB", "CG", "HG2"),
+        ("CA", "CB", "CG", "HG3"),
+        ("CG", "CD", "HD2"),
+        ("CG", "CD", "HD3"),
+        ("CB", "CG", "CD", "HD2"),
+        ("CB", "CG", "CD", "HD3"),
+        ("CD", "CE", "HE2"),
+        ("CD", "CE", "HE3"),
+        ("CG", "CD", "CE", "HE2"),
+        ("CG", "CD", "CE", "HE3"),
+        ("CE", "NZ", "HZ1"),
+        ("CE", "NZ", "HZ2"),
+        ("CE", "NZ", "HZ3"),
+        ("CD", "CE", "NZ", "HZ1"),
+        ("CD", "CE", "NZ", "HZ2"),
+        ("CD", "CE", "NZ", "HZ3"),
+    ),
+    "R": (
+        ("CA", "CB", "CG"),
+        ("N", "CA", "CB", "CG", "chi1"),  # chi1
+        ("CB", "CG", "CD"),
+        ("CA", "CB", "CG", "CD", "chi2"),  # chi2
+        ("CG", "CD", "NE"),
+        ("CB", "CG", "CD", "NE", "chi3"),  # chi3
+        ("CD", "NE", "CZ"),
+        ("CG", "CD", "NE", "CZ", "chi4"),  # chi4
+        ("NE", "CZ", "NH1"),
+        ("CD", "NE", "CZ", "NH1", "chi5"),  # chi5
+        ("NE", "CZ", "NH2"),
+        ("CD", "NE", "CZ", "NH2"),
+        ("CB", "CG", "HG2"),
+        ("CB", "CG", "HG3"),
+        ("CA", "CB", "CG", "HG2"),
+        ("CA", "CB", "CG", "HG3"),
+        ("CG", "CD", "HD2"),
+        ("CG", "CD", "HD3"),
+        ("CB", "CG", "CD", "HD2"),
+        ("CB", "CG", "CD", "HD3"),
+        ("CD", "NE", "HE"),
+        ("CG", "CD", "NE", "HE"),
+        ("CZ", "NH1", "HH11"),
+        ("CZ", "NH1", "HH12"),
+        ("NE", "CZ", "NH1", "HH11"),
+        ("NE", "CZ", "NH1", "HH12"),
+        ("CZ", "NH2", "HH21"),
+        ("CZ", "NH2", "HH22"),
+        ("NE", "CZ", "NH2", "HH21"),
+        ("NE", "CZ", "NH2", "HH22"),
+    ),
+}
+
+# Additional sidechain entries for explicit bonds.
+
+# OpenSCAD output requires specification of bonds to be rendered as cylinders.
+# These entries define hedra and dihedra to explicitly cover all bonds in rings,
+# otherwise the entries above only capture atoms.
+
+ic_data_sidechain_extras = {
+    "F": (("CE1", "CZ", "CE2"), ("CD1", "CE1", "CZ", "CE2")),
+    "P": (("CG", "CD", "N"), ("CB", "CG", "CD", "N")),
+    "Y": (("CE1", "CZ", "CE2"), ("CD1", "CE1", "CZ", "CE2")),
+    "W": (
+        ("CD2", "CE3", "CZ3"),
+        ("CG", "CD2", "CE3", "CZ3"),
+        ("CD1", "NE1", "CE2"),
+        ("CG", "CD1", "NE1", "CE2"),
+    ),
+    "H": (("ND1", "CE1", "NE2"), ("CG", "ND1", "CE1", "NE2")),
+}
+
+# Covalent radii for OpenSCAD output.
+
+# Covalent radii from Heyrovska, Raji : 'Atomic Structures of all the Twenty
+# Essential Amino Acids and a Tripeptide, with Bond Lengths as Sums of Atomic
+# Covalent Radii' <https://arxiv.org/pdf/0804.2488.pdf>
+# Adding Ores between Osb and Odb for Asp and Glu, Nres between Nsb and Ndb
+# for Arg, as PDB does not specify
+
+covalent_radii = {
+    "Csb": 0.77,
+    "Cres": 0.72,
+    "Cdb": 0.67,
+    "Osb": 0.67,
+    "Ores": 0.635,
+    "Odb": 0.60,
+    "Nsb": 0.70,
+    "Nres": 0.66,
+    "Ndb": 0.62,
+    "Hsb": 0.37,
+    "Ssb": 1.04,
+}
+
+# Atom classes based on Heyrovska, Raji covalent radii paper.
+
+residue_atom_bond_state = {
+    "X": {
+        "N": "Nsb",
+        "CA": "Csb",
+        "C": "Cdb",
+        "O": "Odb",
+        "OXT": "Osb",
+        "CB": "Csb",
+        "H": "Hsb",
+    },
+    "V": {"CG1": "Csb", "CG2": "Csb"},
+    "L": {"CG": "Csb", "CD1": "Csb", "CD2": "Csb"},
+    "I": {"CG1": "Csb", "CG2": "Csb", "CD1": "Csb"},
+    "M": {"CG": "Csb", "SD": "Ssb", "CE": "Csb"},
+    "F": {
+        "CG": "Cdb",
+        "CD1": "Cres",
+        "CD2": "Cres",
+        "CE1": "Cdb",
+        "CE2": "Cdb",
+        "CZ": "Cres",
+    },
+    "P": {"CG": "Csb", "CD": "Csb"},
+    "S": {"OG": "Osb"},
+    "T": {"OG1": "Osb", "CG2": "Csb"},
+    "C": {"SG": "Ssb"},
+    "N": {"CG": "Csb", "OD1": "Odb", "ND2": "Ndb"},
+    "Q": {"CG": "Csb", "CD": "Csb", "OE1": "Odb", "NE2": "Ndb"},
+    "Y": {
+        "CG": "Cdb",
+        "CD1": "Cres",
+        "CD2": "Cres",
+        "CE1": "Cdb",
+        "CE2": "Cdb",
+        "CZ": "Cres",
+        "OH": "Osb",
+    },
+    "W": {
+        "CG": "Cdb",
+        "CD1": "Cdb",
+        "CD2": "Cres",
+        "NE1": "Nsb",
+        "CE2": "Cdb",
+        "CE3": "Cdb",
+        "CZ2": "Cres",
+        "CZ3": "Cres",
+        "CH2": "Cdb",
+    },
+    "D": {"CG": "Csb", "OD1": "Ores", "OD2": "Ores"},
+    "E": {"CG": "Csb", "CD": "Csb", "OE1": "Ores", "OE2": "Ores"},
+    "H": {"CG": "Cdb", "CD2": "Cdb", "ND1": "Nsb", "CE1": "Cdb", "NE2": "Ndb"},
+    "K": {"CG": "Csb", "CD": "Csb", "CE": "Csb", "NZ": "Nsb"},
+    "R": {
+        "CG": "Csb",
+        "CD": "Csb",
+        "NE": "Nsb",
+        "CZ": "Cdb",
+        "NH1": "Nres",
+        "NH2": "Nres",
+    },
+}
+
+
+# atomic weights of C,O,N,H,S
+atomic_weight = {"C": 12.0107, "O": 15.9994, "N": 14.0067, "H": 1.0079, "S": 32.065}
+
+# electronegativity values for C,O,N,H,S
+electronegativity = {"C": 2.55, "O": 3.44, "N": 3.04, "H": 2.20, "S": 2.58}
diff --git a/code/lib/Bio/PDB/ic_rebuild.py b/code/lib/Bio/PDB/ic_rebuild.py
new file mode 100644
index 0000000..f60fa3a
--- /dev/null
+++ b/code/lib/Bio/PDB/ic_rebuild.py
@@ -0,0 +1,381 @@
+# Copyright 2019 by Robert T. Miller.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Convert XYZ Structure to internal coordinates and back, test result."""
+
+import re
+
+from itertools import zip_longest
+
+try:
+    import numpy
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Install NumPy to build proteins from internal coordinates."
+    )
+
+from Bio.PDB.PDBExceptions import PDBException
+from io import StringIO
+from Bio.File import as_handle
+from Bio.PDB.PDBIO import PDBIO
+
+from Bio.PDB.Structure import Structure
+from Bio.PDB.internal_coords import IC_Residue, IC_Chain
+from Bio.PDB.PICIO import write_PIC, read_PIC, enumerate_atoms, pdb_date
+
+# for typing
+from typing import Dict, Union, Any
+from Bio.PDB.Atom import Atom
+from Bio.PDB.Residue import Residue, DisorderedResidue
+from Bio.PDB.Model import Model
+from Bio.PDB.Chain import Chain
+from Bio.PDB.Entity import Entity
+
+
+def structure_rebuild_test(entity, verbose: bool = False) -> Dict:
+    """Test rebuild PDB structure from internal coordinates.
+
+    :param entity: Biopython Structure, Model or Chain
+        Structure to test
+    :param verbose: bool
+        print extra messages
+    :returns: dict
+        comparison dict from compare_residues()
+    """
+    sp = StringIO()
+    entity.atom_to_internal_coordinates(verbose)
+    write_PIC(entity, sp)
+    sp.seek(0)
+    pdb2 = read_PIC(sp)
+    if verbose:
+        report_IC(pdb2, verbose=True)
+    pdb2.internal_to_atom_coordinates(verbose)
+    r = compare_residues(entity, pdb2, verbose=verbose)
+    return r
+
+
+def report_IC(
+    entity: Union[Structure, Model, Chain, Residue],
+    reportDict: Dict[str, Any] = None,
+    verbose: bool = False,
+) -> Dict[str, Any]:
+    """Generate dict with counts of ic data elements for each entity level.
+
+    reportDict entries are:
+        - idcode : PDB ID
+        - hdr : PDB header lines
+        - mdl : models
+        - chn : chains
+        - res : residue objects
+        - res_e : residues with dihedra and/or hedra
+        - dih : dihedra
+        - hed : hedra
+
+    :param Entity entity: Biopython PDB Entity object: S, M, C or R
+    :raises PDBException: if entity level not S, M, C, or R
+    :raises Exception: if entity does not have .level attribute
+    :returns: dict with counts of IC data elements
+    """
+    if reportDict is None:
+        reportDict = {
+            "idcode": None,
+            "hdr": 0,
+            "mdl": 0,
+            "chn": 0,
+            "chn_ids": [],
+            "res": 0,
+            "res_e": 0,
+            "dih": 0,
+            "hed": 0,
+        }
+    try:
+        if "A" == entity.level:
+            raise PDBException("No IC output at Atom level")
+        elif isinstance(entity, Residue) or isinstance(
+            entity, DisorderedResidue
+        ):  # "R" == entity.level:
+            if entity.internal_coord:
+                reportDict["res"] += 1
+                dlen = len(entity.internal_coord.dihedra)
+                hlen = len(entity.internal_coord.hedra)
+                if 0 < dlen or 0 < hlen:
+                    reportDict["res_e"] += 1
+                    reportDict["dih"] += dlen
+                    reportDict["hed"] += hlen
+
+        elif isinstance(entity, Chain):  # "C" == entity.level:
+            reportDict["chn"] += 1
+            reportDict["chn_ids"].append(entity.id)
+            for res in entity:
+                reportDict = report_IC(res, reportDict)
+
+        elif isinstance(entity, Model):  # "M" == entity.level:
+            reportDict["mdl"] += 1
+            for chn in entity:
+                reportDict = report_IC(chn, reportDict)
+
+        elif isinstance(entity, Structure):  # "S" == entity.level:
+            if hasattr(entity, "header"):
+                if reportDict["idcode"] is None:
+                    reportDict["idcode"] = entity.header.get("idcode", None)
+
+                hdr = entity.header.get("head", None)
+                if hdr:
+                    reportDict["hdr"] += 1
+                nam = entity.header.get("name", None)
+                if nam:
+                    reportDict["hdr"] += 1
+            for mdl in entity:
+                reportDict = report_IC(mdl, reportDict)
+        else:
+            raise PDBException("Cannot identify level: " + str(entity.level))
+    except KeyError:
+        raise Exception(
+            "write_PIC: argument is not a Biopython PDB Entity " + str(entity)
+        )
+
+    if verbose:
+        print(
+            "{} : {} models {} chains {} {} residue objects "
+            "{} residues with {} dihedra {} hedra".format(
+                reportDict["idcode"],
+                reportDict["mdl"],
+                reportDict["chn"],
+                reportDict["chn_ids"],
+                reportDict["res"],
+                reportDict["res_e"],
+                reportDict["dih"],
+                reportDict["hed"],
+            )
+        )
+
+    return reportDict
+
+
+def IC_duplicate(entity) -> Structure:
+    """Duplicate structure entity with IC data, no atom coordinates.
+
+    Employs write_PIC(), read_PIC() with StringIO buffer.
+    Calls atom_to_internal_coordinates() if needed.
+
+    :param entity: Biopython PDB Entity (will fail for Atom)
+    :returns: Biopython PDBStructure, no Atom objects
+    """
+    sp = StringIO()
+    hasInternalCoords = False
+    for res in entity.get_residues():
+        if res.internal_coord:
+            if len(res.internal_coord.hedra) > 0:
+                hasInternalCoords = True
+                break
+    if not hasInternalCoords:
+        if isinstance(entity, Residue):  # "R" == entity.level:
+            # works better at chain level but leave option here
+            if not res.internal_coord:
+                res.internal_coord = IC_Residue(entity)
+            res.internal_coord.atom_to_internal_coordinates()
+        else:
+            entity.atom_to_internal_coordinates()
+
+    write_PIC(entity, sp)
+    sp.seek(0)
+    return read_PIC(sp)
+
+
+def _cmp_atm(
+    r0: Residue, r1: Residue, a0: Atom, a1: Atom, verbose: bool, cmpdict: Dict
+) -> None:
+    cmpdict["aCount"] += 1
+    if a0 is None:
+        if verbose:
+            print(r1.get_full_id(), "None !=", a1.get_full_id(), a1.parent.resname)
+    elif a1 is None:
+        if verbose:
+            print(r0.get_full_id(), a0.get_full_id(), a0.parent.resname, "!= None")
+    else:
+        if a0.get_full_id() == a1.get_full_id():
+            cmpdict["aFullIdMatchCount"] += 1
+        elif verbose:
+            print(
+                r0.get_full_id(),
+                a0.get_full_id(),
+                a0.parent.resname,
+                "!=",
+                a1.get_full_id(),
+            )
+        a0c = a0.get_coord()
+        a1c = a1.get_coord()
+        if numpy.allclose(a0c, a1c, rtol=1e-05, atol=1e-08):
+            cmpdict["aCoordMatchCount"] += 1
+        elif verbose:
+            print(
+                "atom coords disagree:",
+                r0.get_full_id(),
+                a0.get_full_id(),
+                a1.get_full_id(),
+                a0c,
+                "!=",
+                a1c,
+            )
+
+
+def _cmp_res(r0: Residue, r1: Residue, verbose: bool, cmpdict: Dict) -> None:
+    r0id, r0fid, r1fid = r0.id, r0.full_id, r1.full_id
+    chn = r0.parent.id
+    if chn not in cmpdict["chains"]:
+        cmpdict["chains"].append(chn)
+    cmpdict["rCount"] += 1
+    if r0fid == r1fid:
+        cmpdict["rMatchCount"] += 1
+    elif verbose:
+        print(r0fid, "!=", r1fid)
+    if " " == r0id[0] and not (" " == r0.resname[0] or 2 == len(r0.resname)):
+        # skip water, DNA (' ' == [0] for pdb, 2 == len() for mmcif)
+        cmpdict["residues"] += 1
+        longer = r0 if len(r0.child_dict) >= len(r1.child_dict) else r1
+        for ak in longer.child_dict:
+            a0 = r0.child_dict.get(ak, None)
+            if a0 is None:
+                aknd = re.sub("D", "H", ak, count=1)
+                a0 = r0.child_dict.get(aknd, None)
+            a1 = r1.child_dict.get(ak, None)
+            if a1 is None:
+                aknd = re.sub("D", "H", ak, count=1)
+                a1 = r1.child_dict.get(aknd, None)
+            if (
+                a0 is None
+                or a1 is None
+                or 0 == a0.is_disordered() == a1.is_disordered()
+            ):
+                _cmp_atm(r0, r1, a0, a1, verbose, cmpdict)
+            elif 2 == a0.is_disordered() == a1.is_disordered():
+                cmpdict["disAtmCount"] += 1
+                for da0k in a0.child_dict:
+                    _cmp_atm(
+                        r0,
+                        r1,
+                        a0.child_dict.get(da0k, None),
+                        a1.child_dict.get(da0k, None),
+                        verbose,
+                        cmpdict,
+                    )
+            else:
+                if verbose:
+                    print("disorder disagreement:", r0.get_full_id(), ak)
+                cmpdict["aCount"] += 1
+
+
+def compare_residues(
+    e0: Union[Structure, Model, Chain],
+    e1: Union[Structure, Model, Chain],
+    verbose: bool = False,
+) -> Dict[str, Any]:
+    """Compare full IDs and atom coordinates for 2 Biopython PDB entities.
+
+    Skip DNA and HETATMs.
+
+    :param e0, e1: Biopython PDB Entity objects (S, M or C)
+        Structures, Models or Chains to be compared
+    :param verbose: Bool
+        whether to print mismatch info, default False
+    :returns: Dictionary
+        Result counts for Residues, Full ID match Residues, Atoms,
+        Full ID match atoms, and Coordinate match atoms; report string;
+        error status (bool)
+    """
+    cmpdict: Dict[str, Any] = {}
+    cmpdict["chains"] = []
+    cmpdict["residues"] = 0
+    cmpdict["rCount"] = 0
+    cmpdict["rMatchCount"] = 0
+    cmpdict["aCount"] = 0
+    cmpdict["disAtmCount"] = 0
+    cmpdict["aCoordMatchCount"] = 0
+    cmpdict["aFullIdMatchCount"] = 0
+    cmpdict["id0"] = e0.get_full_id()
+    cmpdict["id1"] = e1.get_full_id()
+    cmpdict["pass"] = None
+    cmpdict["report"] = None
+
+    for r0, r1 in zip_longest(e0.get_residues(), e1.get_residues()):
+        if 2 == r0.is_disordered() == r1.is_disordered():
+            for dr0, dr1 in zip_longest(r0.child_dict.values(), r1.child_dict.values()):
+                _cmp_res(dr0, dr1, verbose, cmpdict)
+        else:
+            _cmp_res(r0, r1, verbose, cmpdict)
+
+    if (
+        cmpdict["rMatchCount"] == cmpdict["rCount"]
+        and cmpdict["aCoordMatchCount"] == cmpdict["aCount"]
+        and cmpdict["aFullIdMatchCount"] == cmpdict["aCount"]
+    ):
+        cmpdict["pass"] = True
+    else:
+        cmpdict["pass"] = False
+
+    rstr = (
+        "{}:{} {} -- {} of {} residue IDs match; {} residues {} atom coords, "
+        "{} full IDs of {} atoms ({} disordered) match : {}".format(
+            cmpdict["id0"],
+            cmpdict["id1"],
+            cmpdict["chains"],
+            cmpdict["rMatchCount"],
+            cmpdict["rCount"],
+            cmpdict["residues"],
+            cmpdict["aCoordMatchCount"],
+            cmpdict["aFullIdMatchCount"],
+            cmpdict["aCount"],
+            cmpdict["disAtmCount"],
+            "ERROR" if not cmpdict["pass"] else "ALL OK",
+        )
+    )
+    if not cmpdict["pass"]:
+        if cmpdict["rMatchCount"] != cmpdict["rCount"]:
+            rstr += " -RESIDUE IDS-"
+        if cmpdict["aCoordMatchCount"] != cmpdict["aFullIdMatchCount"]:
+            rstr += " -COORDINATES-"
+        if cmpdict["aFullIdMatchCount"] != cmpdict["aCount"]:
+            rstr += " -ATOM IDS-"
+    cmpdict["report"] = rstr
+
+    return cmpdict
+
+
+def write_PDB(
+    entity: Structure, file: str, pdbid: str = None, chainid: str = None
+) -> None:
+    """Write PDB file with HEADER and TITLE."""
+    enumerate_atoms(entity)
+    with as_handle(file, "w") as fp:
+        try:
+            if "S" == entity.level:
+                if hasattr(entity, "header"):
+                    if not pdbid:
+                        pdbid = entity.header.get("idcode", None)
+                    hdr = entity.header.get("head", None)
+                    dd = pdb_date(entity.header.get("deposition_date", None))
+
+                    if hdr:
+                        fp.write(
+                            ("HEADER    {:40}{:8}   {:4}\n").format(
+                                hdr.upper(), (dd or ""), (pdbid or "")
+                            )
+                        )
+                    nam = entity.header.get("name", None)
+                    if nam:
+                        fp.write("TITLE     " + nam.upper() + "\n")
+                io = PDBIO()
+                io.set_structure(entity)
+                io.save(fp, preserve_atom_numbering=True)
+
+            else:
+                raise PDBException("level not 'S': " + str(entity.level))
+        except KeyError:
+            raise Exception(
+                "write_PIC: argument is not a Biopython PDB Entity " + str(entity)
+            )
diff --git a/code/lib/Bio/PDB/internal_coords.py b/code/lib/Bio/PDB/internal_coords.py
new file mode 100644
index 0000000..d8df82d
--- /dev/null
+++ b/code/lib/Bio/PDB/internal_coords.py
@@ -0,0 +1,3492 @@
+# Copyright 2019 by Robert T. Miller.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes to support internal coordinates for protein structures.
+
+Internal coordinates comprise Psi, Phi and Omega dihedral angles along the
+protein backbone, Chi angles along the sidechains, and all 3-atom angles and
+bond lengths comprising a protein chain.  These routines can compute internal
+coordinates from atom XYZ coordinates, and compute atom XYZ coordinates from
+internal coordinates.
+
+Internal coordinates are defined on sequences of atoms which span
+residues or follow accepted nomenclature along sidechains.  To manage these
+sequences and support Biopython's disorder mechanisms, AtomKey specifiers are
+implemented to capture residue, atom and variant identification in a single
+object.  A Hedron object is specified as three sequential AtomKeys, comprising
+two bond lengths and the bond angle between them.  A Dihedron consists of four
+sequential AtomKeys, linking two Hedra with a dihedral angle between them.
+
+A Protein Internal Coordinate (.pic) file format is defined to capture
+sufficient detail to reproduce a PDB file from chain starting coordinates
+(first residue N, Ca, C XYZ coordinates) and remaining internal coordinates.
+These files are used internally to verify that a given structure can be
+regenerated from its internal coordinates.
+
+Internal coordinates may also be exported as OpenSCAD data arrays for
+generating 3D printed protein models.  OpenSCAD software is provided as
+proof-of-concept for generating such models.
+
+The following classes comprise the core functionality for processing internal
+coordinates and are sufficiently related and coupled to place them together in
+this module:
+
+IC_Chain: Extends Biopython Chain on .internal_coord attribute.
+    Manages connected sequence of residues and chain breaks; methods generally
+    apply IC_Residue methods along chain.
+
+IC_Residue: Extends for Biopython Residue on .internal_coord attribute.
+    Most control and methods of interest are in this class, see API.
+
+Dihedron: four joined atoms forming a dihedral angle.
+    Dihedral angle, homogeneous atom coordinates in local coordinate space,
+    references to relevant Hedra and IC_Residue.  Methods to compute
+    residue dihedral angles, bond angles and bond lengths.
+
+Hedron: three joined atoms forming a plane.
+    Contains homogeneous atom coordinates in local coordinate space as well as
+    bond lengths and angle between them.
+
+Edron: base class for Hedron and Dihedron classes.
+    Tuple of AtomKeys comprising child, string ID, mainchain membership boolean
+    and other routines common for both Hedra and Dihedra.  Implements rich
+    comparison.
+
+AtomKey: keys (dictionary and string) for referencing atom sequences.
+    Capture residue and disorder/occupancy information, provides a
+    no-whitespace key for .pic files, and implements rich comparison.
+
+Custom exception classes: HedronMatchError and MissingAtomError
+"""
+
+import re
+from collections import deque, namedtuple
+
+try:
+    import numpy  # type: ignore
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Install NumPy to build proteins from internal coordinates."
+    )
+
+from Bio.PDB.Atom import Atom, DisorderedAtom
+from Bio.PDB.Polypeptide import three_to_one
+
+from Bio.PDB.vectors import coord_space, multi_rot_Z, multi_rot_Y
+
+# , calc_dihedral, Vector
+from Bio.PDB.ic_data import ic_data_backbone, ic_data_sidechains
+from Bio.PDB.ic_data import ic_data_sidechain_extras, residue_atom_bond_state
+
+# for type checking only
+from typing import (
+    List,
+    Dict,
+    Set,
+    TextIO,
+    Union,
+    Tuple,
+    cast,
+    TYPE_CHECKING,
+    Optional,
+)
+
+if TYPE_CHECKING:
+    from Bio.PDB.Residue import Residue
+    from Bio.PDB.Chain import Chain
+
+HKT = Tuple["AtomKey", "AtomKey", "AtomKey"]  # Hedron key tuple
+DKT = Tuple["AtomKey", "AtomKey", "AtomKey", "AtomKey"]  # Dihedron Key Tuple
+EKT = Union[HKT, DKT]  # Edron Key Tuple
+BKT = Tuple["AtomKey", "AtomKey"]  # Bond Key Tuple
+
+# HACS = Tuple[numpy.array, numpy.array, numpy.array]  # Hedron Atom Coord Set
+HACS = numpy.array  # Hedron Atom Coord Set
+DACS = Tuple[
+    numpy.array, numpy.array, numpy.array, numpy.array
+]  # Dihedron Atom Coord Set
+
+
+class IC_Chain:
+    """Class to extend Biopython Chain with internal coordinate data.
+
+    Attributes
+    ----------
+    chain: biopython Chain object reference
+        The Chain object this extends
+
+    initNCaC: AtomKey indexed dictionary of N, Ca, C atom coordinates.
+        NCaCKeys start chain segments (first residue or after chain break).
+        These 3 atoms define the coordinate space for a contiguous chain segment,
+        as initially specified by PDB or mmCIF file.
+
+    MaxPeptideBond: **Class** attribute to detect chain breaks.
+        Override for fully contiguous chains with some very long bonds - e.g.
+        for 3D printing (OpenSCAD output) a structure with fully disordered
+        (missing) residues.
+
+    ordered_aa_ic_list: list of IC_Residue objects
+        IC_Residue objects ic algorithms can process (e.g. no waters)
+
+    hedra: dict indexed by 3-tuples of AtomKeys
+        Hedra forming residues in this chain
+
+    hedraLen: int length of hedra dict
+
+    hedraNdx: dict mapping hedra AtomKeys to numpy array data
+
+    dihedra: dict indexed by 4-tuples of AtomKeys
+        Dihedra forming (overlapping) this residue
+
+    dihedraLen: int length of dihedra dict
+
+    dihedraNdx: dict mapping dihedra AtomKeys to numpy array data
+
+    atomArray: numpy array of homogeneous atom coords for chain
+
+    atomArrayIndex: dict mapping AtomKeys to atomArray indexes
+
+    numpy arrays for vector processing of chain di/hedra:
+
+    hedraIC: length-angle-length entries for each hedron
+
+    hAtoms: homogeneous atom coordinates (3x4) of hedra, central atom at origin
+
+    hAtomsR: hAtoms in reverse order
+
+    hAtoms_needs_update: booleans indicating whether hAtoms represent hedraIC
+
+    dihedraIC: dihedral angles for each dihedron
+
+    dAtoms: homogeneous atom coordinates (4x4) of dihedra, second atom at origin
+
+    dAtoms_needs_update: booleans indicating whether dAtoms represent dihedraIC
+
+    Methods
+    -------
+    internal_to_atom_coordinates(verbose, start, fin)
+        Process ic data to Residue/Atom coordinates; calls assemble_residues()
+        followed by coords_to_structure()
+    assemble_residues(verbose, start, fin)
+        Generate IC_Residue atom coords from internal coordinates
+    coords_to_structure()
+        update Biopython Residue.Atom coords from IC_Residue coords for all
+        Residues with IC_Residue attributes
+    atom_to_internal_coordinates(verbose)
+        Calculate dihedrals, angles, bond lengths (internal coordinates) for
+        Atom data
+    link_residues()
+        Call link_dihedra() on each IC_Residue (needs rprev, rnext set)
+    set_residues()
+        Add .internal_coord attribute for all Residues in parent Chain, populate
+        ordered_aa_ic_list, set IC_Residue rprev, rnext or initNCaC coordinates
+    write_SCAD()
+        Write OpenSCAD matrices for internal coordinate data comprising chain
+
+    """
+
+    MaxPeptideBond = 1.4  # larger C-N distance than this is chain break
+
+    def __init__(self, parent: "Chain", verbose: bool = False) -> None:
+        """Initialize IC_Chain object, with or without residue/Atom data.
+
+        :param parent: Biopython Chain object
+            Chain object this extends
+        """
+        # type hinting parent as Chain leads to import cycle
+        self.chain = parent
+        self.ordered_aa_ic_list: List[IC_Residue] = []
+        self.initNCaC: Dict[Tuple[str], Dict["AtomKey", numpy.array]] = {}
+        self.sqMaxPeptideBond = IC_Chain.MaxPeptideBond * IC_Chain.MaxPeptideBond
+        # need init here for _gen_edra():
+        self.hedra = {}
+        # self.hedraNdx = {}
+        self.dihedra = {}
+        # self.dihedraNdx = {}
+        self.set_residues(verbose)  # no effect if no residues loaded
+
+    # return True if a0, a1 within supplied cutoff
+    def _atm_dist_chk(self, a0: Atom, a1: Atom, cutoff: float, sqCutoff: float) -> bool:
+        diff = a0.coord - a1.coord
+        sum = 0
+        for axis in diff:
+            if axis > cutoff:
+                # print("axis: ", axis)
+                return False
+            sum += axis * axis
+        if sum > sqCutoff:
+            # print("sq axis: ", sqrt(sum))  # need import math.sqrt
+            return False
+        return True
+
+    # return a string describing issue, or None if OK
+    def _peptide_check(self, prev: "Residue", curr: "Residue") -> Optional[str]:
+        if 0 == len(curr.child_dict):
+            # curr residue with no atoms => reading pic file, no break
+            return None
+        if (0 != len(curr.child_dict)) and (0 == len(prev.child_dict)):
+            # prev residue with no atoms, curr has atoms => reading pic file,
+            # have break
+            return "PIC data missing atoms"
+
+        # handle non-standard AA not marked as HETATM (1KQF, 1NTH)
+        if not prev.internal_coord.is20AA:
+            return "previous residue not standard amino acid"
+
+        # both biopython Residues have Atoms, so check distance
+        Natom = curr.child_dict.get("N", None)
+        pCatom = prev.child_dict.get("C", None)
+        if Natom is None or pCatom is None:
+            return f"missing {'previous C' if pCatom is None else 'N'} atom"
+
+        # confirm previous residue has all backbone atoms
+        pCAatom = prev.child_dict.get("CA", None)
+        pNatom = prev.child_dict.get("N", None)
+        if pNatom is None or pCAatom is None:
+            return "previous residue missing N or Ca"
+
+        tooFar = f"MaxPeptideBond ({IC_Chain.MaxPeptideBond} angstroms) exceeded"
+        if not Natom.is_disordered() and not pCatom.is_disordered():
+            dc = self._atm_dist_chk(
+                Natom, pCatom, IC_Chain.MaxPeptideBond, self.sqMaxPeptideBond
+            )
+            if dc:
+                return None
+            else:
+                return tooFar
+
+        Nlist: List[Atom] = []
+        pClist: List[Atom] = []
+        if Natom.is_disordered():
+            Nlist.extend(Natom.child_dict.values())
+        else:
+            Nlist = [Natom]
+        if pCatom.is_disordered():
+            pClist.extend(pCatom.child_dict.values())
+        else:
+            pClist = [pCatom]
+
+        for n in Nlist:
+            for c in pClist:
+                if self._atm_dist_chk(
+                    Natom, pCatom, IC_Chain.MaxPeptideBond, self.sqMaxPeptideBond
+                ):
+                    return None
+        return tooFar
+
+    def clear_ic(self):
+        """Clear residue internal_coord settings for this chain."""
+        for res in self.chain.get_residues():
+            res.internal_coord = None
+
+    def _add_residue(
+        self, res: "Residue", last_res: List, last_ord_res: List, verbose: bool = False
+    ) -> bool:
+        """Set rprev, rnext, determine chain break."""
+        if not res.internal_coord:
+            res.internal_coord = IC_Residue(res)
+            res.internal_coord.cic = self
+        if (
+            0 < len(last_res)
+            and last_ord_res == last_res
+            and self._peptide_check(last_ord_res[0].residue, res) is None
+        ):
+            # no chain break
+            for prev in last_ord_res:
+                prev.rnext.append(res.internal_coord)
+                res.internal_coord.rprev.append(prev)
+            return True
+        elif all(atm in res.child_dict for atm in ("N", "CA", "C")):
+            # chain break, save coords for restart
+            if verbose and len(last_res) != 0:  # not first residue
+                if last_ord_res != last_res:
+                    reason = "disordered residues after {last_ord_res.pretty_str()}"
+                else:
+                    reason = cast(
+                        str, self._peptide_check(last_ord_res[0].residue, res)
+                    )
+                print(
+                    f"chain break at {res.internal_coord.pretty_str()} due to {reason}"
+                )
+            initNCaC: Dict["AtomKey", numpy.array] = {}
+            ric = res.internal_coord
+            for atm in ("N", "CA", "C"):
+                bpAtm = res.child_dict[atm]
+                if bpAtm.is_disordered():
+                    for altAtom in bpAtm.child_dict.values():
+                        ak = AtomKey(ric, altAtom)
+                        initNCaC[ak] = IC_Residue.atm241(altAtom.coord)
+                else:
+                    ak = AtomKey(ric, bpAtm)
+                    initNCaC[ak] = IC_Residue.atm241(bpAtm.coord)
+            self.initNCaC[ric.rbase] = initNCaC
+            return True
+        elif (
+            0 == len(res.child_list)
+            and self.chain.child_list[0].id == res.id
+            and res.internal_coord.is20AA
+        ):
+            # this is first residue, no atoms at all, is std amino acid
+            # conclude reading pic file with no N-Ca-C coords
+            return True
+        # chain break but do not have N, Ca, C coords to restart from
+        return False
+
+    def set_residues(self, verbose: bool = False) -> None:
+        """Initialize internal_coord data for loaded Residues.
+
+        Add IC_Residue as .internal_coord attribute for each Residue in parent
+        Chain; populate ordered_aa_ic_list with IC_Residue references for residues
+        which can be built (amino acids and some hetatms); set rprev and rnext
+        on each sequential IC_Residue, populate initNCaC at start and after
+        chain breaks.
+        """
+        # ndx = 0
+        last_res: List["IC_Residue"] = []
+        last_ord_res: List["IC_Residue"] = []
+
+        for res in self.chain.get_residues():
+            # select only not hetero or accepted hetero
+            if res.id[0] == " " or res.id[0] in IC_Residue.accept_resnames:
+                this_res: List["IC_Residue"] = []
+                if 2 == res.is_disordered():
+                    # print('disordered res:', res.is_disordered(), res)
+                    for r in res.child_dict.values():
+                        if self._add_residue(r, last_res, last_ord_res, verbose):
+                            this_res.append(r.internal_coord)
+                else:
+                    if self._add_residue(res, last_res, last_ord_res, verbose):
+                        this_res.append(res.internal_coord)
+
+                if 0 < len(this_res):
+                    self.ordered_aa_ic_list.extend(this_res)
+                    last_ord_res = this_res
+                last_res = this_res
+
+    def link_residues(self) -> None:
+        """link_dihedra() for each IC_Residue; needs rprev, rnext set.
+
+        Called by PICIO:read_PIC() after finished reading chain
+        """
+        for ric in self.ordered_aa_ic_list:
+            ric.cic = self
+            ric.link_dihedra()
+
+    def assemble_residues(
+        self,
+        verbose: bool = False,
+        start: Optional[int] = None,
+        fin: Optional[int] = None,
+    ) -> None:
+        """Generate IC_Residue atom coords from internal coordinates.
+
+        Filter positions between start and fin if set, find appropriate start
+        coordinates for each residue and pass to IC_Residue.assemble()
+
+        :param verbose bool: default False
+            describe runtime problems
+        :param: start, fin lists
+            sequence position, insert code for begin, end of subregion to
+            process
+
+        """
+        for ric in self.ordered_aa_ic_list:
+            ric.clear_transforms()
+
+        for ric in self.ordered_aa_ic_list:
+            if not hasattr(ric, "NCaCKey"):
+                if verbose:
+                    print(
+                        f"no assembly for {str(ric)} due to missing N, Ca and/or C atoms"
+                    )
+                continue
+            respos = ric.residue.id[1]
+            if start and start > respos:
+                continue
+            if fin and fin < respos:
+                continue
+
+            ric.atom_coords = cast(
+                Dict[AtomKey, numpy.array], ric.assemble(verbose=verbose)
+            )
+            if ric.atom_coords:
+                ric.ak_set = set(ric.atom_coords.keys())
+
+    def coords_to_structure(self) -> None:
+        """Promote all ic atom_coords to Biopython Residue/Atom coords.
+
+        IC atom_coords are homogeneous [4], Biopython atom coords are XYZ [3].
+        """
+        self.ndx = 0
+        for res in self.chain.get_residues():
+            if 2 == res.is_disordered():
+                for r in res.child_dict.values():
+                    if r.internal_coord:
+                        if r.internal_coord.atom_coords:
+                            r.internal_coord.coords_to_residue()
+                        elif (
+                            r.internal_coord.rprev
+                            and r.internal_coord.rprev[0].atom_coords
+                        ):
+                            r.internal_coord.rprev[0].coords_to_residue(rnext=True)
+            elif res.internal_coord:
+                if res.internal_coord.atom_coords:
+                    res.internal_coord.coords_to_residue()
+                elif (
+                    res.internal_coord.rprev and res.internal_coord.rprev[0].atom_coords
+                ):
+                    res.internal_coord.rprev[0].coords_to_residue(rnext=True)
+
+    def init_edra(self) -> None:
+        """Create chain level di/hedra arrays.
+
+        If called by read_PIC, self.di/hedra = {} and object tree has IC data.
+        -> build chain arrays from IC data
+
+        If called at start of atom_to_internal_coords, self.di/hedra fully
+        populated.  -> create empty chain numpy arrays
+
+        In both cases, fix di/hedra object attributes to be views on
+        chain-level array data
+        """
+        # hedra:
+
+        if self.hedra == {}:
+            # loaded objects from PIC file, so no chain-level hedra
+            hLAL = {}
+            for ric in self.ordered_aa_ic_list:
+                for k, h in ric.hedra.items():
+                    self.hedra[k] = h
+                    hLAL[k] = h.lal
+            self.hedraLen = len(self.hedra)
+            self.hedraIC = numpy.array(tuple(hLAL.values()))
+        else:
+            # atom_to_internal_coords() populates self.hedra via _gen_edra()
+            # a_to_ic will set ic so create empty
+            self.hedraLen = len(self.hedra)
+            self.hedraIC = numpy.empty((self.hedraLen, 3), dtype=numpy.float64)
+
+        self.hedraNdx = dict(zip(self.hedra.keys(), range(len(self.hedra))))
+
+        self.hAtoms: numpy.ndarray = numpy.zeros(
+            (self.hedraLen, 3, 4), dtype=numpy.float64
+        )
+        self.hAtoms[:, :, 3] = 1.0  # homogeneous
+        self.hAtomsR: numpy.ndarray = numpy.copy(self.hAtoms)
+        self.hAtoms_needs_update = numpy.full(self.hedraLen, True)
+
+        for ric in self.ordered_aa_ic_list:
+            for k, h in ric.hedra.items():
+                # all h.lal become views on hedraIC
+                h.lal = self.hedraIC[self.hedraNdx[k]]
+
+        # dihedra:
+
+        if self.dihedra == {}:
+            # loaded objects from PIC file, so no chain-level hedra
+            dic = {}
+            for ric in self.ordered_aa_ic_list:
+                for k, d in ric.dihedra.items():
+                    self.dihedra[k] = d
+                    dic[k] = d.angle
+            self.dihedraIC = numpy.array(tuple(dic.values()))
+            self.dihedraICr = numpy.deg2rad(self.dihedraIC)
+            self.dihedraLen = len(self.dihedra)
+        else:
+            # atom_to_internal_coords() populates self.hedra via _gen_edra()
+            # a_to_ic will set ic so create empty
+            self.dihedraLen = len(self.dihedra)
+            self.dihedraIC = numpy.empty(self.dihedraLen)
+            self.dihedraICr = numpy.empty(self.dihedraLen)
+
+        self.dihedraNdx = dict(zip(self.dihedra.keys(), range(len(self.dihedra))))
+
+        self.dAtoms: numpy.ndarray = numpy.empty(
+            (self.dihedraLen, 4, 4), dtype=numpy.float64
+        )
+        self.dAtoms[:, :, 3] = 1.0  # homogeneous
+        self.a4_pre_rotation = numpy.empty((self.dihedraLen, 4))
+
+        for k, d in self.dihedra.items():
+            d.initial_coords = self.dAtoms[self.dihedraNdx[k]]
+            d.a4_pre_rotation = self.a4_pre_rotation[self.dihedraNdx[k]]
+
+        self.dAtoms_needs_update = numpy.full(self.dihedraLen, True)
+
+        self.dRev = numpy.array(tuple(d.reverse for d in self.dihedra.values()))
+        self.dFwd = self.dRev != True  # noqa: E712
+        self.dH1ndx = numpy.array(
+            tuple(self.hedraNdx[d.h1key] for d in self.dihedra.values())
+        )
+        self.dH2ndx = numpy.array(
+            tuple(self.hedraNdx[d.h2key] for d in self.dihedra.values())
+        )
+
+    # @profile
+    def init_atom_coords(self) -> None:
+        """Set chain level di/hedra initial coord arrays from IC_Residue data."""
+        if not numpy.all(self.dAtoms_needs_update):
+            self.dAtoms_needs_update |= (self.hAtoms_needs_update[self.dH1ndx]) | (
+                self.hAtoms_needs_update[self.dH2ndx]
+            )
+
+        if numpy.any(self.hAtoms_needs_update):
+            # hedra initial coords
+
+            # supplementary angle radian: angles which add to 180 are supplementary
+            sar = numpy.deg2rad(
+                180.0 - self.hedraIC[:, 1][self.hAtoms_needs_update]
+            )  # angle
+            sinSar = numpy.sin(sar)
+            cosSarN = numpy.cos(sar) * -1
+
+            # a2 is len3 up from a2 on Z axis, X=Y=0
+            self.hAtoms[:, 2, 2][self.hAtoms_needs_update] = self.hedraIC[:, 2][
+                self.hAtoms_needs_update
+            ]
+
+            # a0 X is sin( sar ) * len12
+            self.hAtoms[:, 0, 0][self.hAtoms_needs_update] = (
+                sinSar * self.hedraIC[:, 0][self.hAtoms_needs_update]
+            )
+
+            # a0 Z is -(cos( sar ) * len12)
+            # (assume angle always obtuse, so a0 is in -Z)
+            self.hAtoms[:, 0, 2][self.hAtoms_needs_update] = (
+                cosSarN * self.hedraIC[:, 0][self.hAtoms_needs_update]
+            )
+
+            # same again but 'reversed' : a0 on Z axis, a1 at origin, a2 in -Z
+
+            # a0r is len12 up from a1 on Z axis, X=Y=0
+            self.hAtomsR[:, 0, 2][self.hAtoms_needs_update] = self.hedraIC[:, 0][
+                self.hAtoms_needs_update
+            ]
+            # a2r X is sin( sar ) * len23
+            self.hAtomsR[:, 2, 0][self.hAtoms_needs_update] = (
+                sinSar * self.hedraIC[:, 2][self.hAtoms_needs_update]
+            )
+            # a2r Z is -(cos( sar ) * len23)
+            self.hAtomsR[:, 2, 2][self.hAtoms_needs_update] = (
+                cosSarN * self.hedraIC[:, 2][self.hAtoms_needs_update]
+            )
+
+            self.hAtoms_needs_update[...] = False
+
+        # dihedra initial coords
+
+        dhlen = numpy.sum(self.dAtoms_needs_update)  # self.dihedraLen
+
+        # full size masks:
+        mdFwd = self.dFwd & self.dAtoms_needs_update
+        mdRev = self.dRev & self.dAtoms_needs_update
+
+        # update size masks
+        udFwd = self.dFwd[self.dAtoms_needs_update]
+        udRev = self.dRev[self.dAtoms_needs_update]
+
+        # only 4th atom takes work:
+        # pick 4th atom based on rev flag
+        self.a4_pre_rotation[mdRev] = self.hAtoms[self.dH2ndx, 0][mdRev]
+        self.a4_pre_rotation[mdFwd] = self.hAtomsR[self.dH2ndx, 2][mdFwd]
+
+        # numpy multiply, add operations below intermediate array but out= not
+        # working with masking:
+        self.a4_pre_rotation[:, 2][self.dAtoms_needs_update] = numpy.multiply(
+            self.a4_pre_rotation[:, 2][self.dAtoms_needs_update], -1
+        )  # a4 to +Z
+
+        a4shift = numpy.empty(dhlen)
+        a4shift[udRev] = self.hedraIC[self.dH2ndx, 2][mdRev]  # len23
+        a4shift[udFwd] = self.hedraIC[self.dH2ndx, 0][mdFwd]  # len12
+
+        self.a4_pre_rotation[:, 2][self.dAtoms_needs_update] = numpy.add(
+            self.a4_pre_rotation[:, 2][self.dAtoms_needs_update], a4shift,
+        )  # so a2 at origin
+
+        # build rz rotation matrix for dihedral angle
+        rz = multi_rot_Z(self.dihedraICr[self.dAtoms_needs_update])
+
+        # p = numpy.matmul(mt, dha[:, 0].reshape(-1, 4, 1)).reshape(-1, 4)
+        a4rot = numpy.matmul(
+            rz, self.a4_pre_rotation[self.dAtoms_needs_update][:].reshape(-1, 4, 1)
+        ).reshape(-1, 4)
+        # a4rot = rz.dot(self.a4_pre_rotation) # numpy.matmul(self.a4_pre_rotation, rz)
+
+        # now build dihedra initial coords
+
+        dH1atoms = self.hAtoms[self.dH1ndx]  # fancy indexing so
+        dH1atomsR = self.hAtomsR[self.dH1ndx]  # these copy not view
+
+        self.dAtoms[:, :3][mdFwd] = dH1atoms[mdFwd]
+        self.dAtoms[:, 3][mdFwd] = a4rot[udFwd]  # [self.dFwd]
+
+        self.dAtoms[:, :3][mdRev] = dH1atomsR[:, 2::-1][mdRev]
+        self.dAtoms[:, 3][mdRev] = a4rot[udRev]  # [self.dRev]
+
+        self.dAtoms_needs_update[...] = False
+
+    def internal_to_atom_coordinates(
+        self,
+        verbose: bool = False,
+        start: Optional[int] = None,
+        fin: Optional[int] = None,
+        promote: Optional[bool] = True,
+    ) -> None:
+        """Process, IC data to Residue/Atom coords.
+
+        Not yet vectorized.
+
+        :param verbose bool: default False
+            describe runtime problems
+        :param: start, fin lists
+            sequence position, insert code for begin, end of subregion to
+            process
+        :param promote bool: default True
+            If True (the default) copy result atom XYZ coordinates to
+            Biopython Atom objects for access by other Biopython methods;
+            otherwise, updated atom coordinates must be accessed through
+            IC_Residue and hedron objects.
+        """
+        if self.dihedra == {}:
+            return  # escape if nothing to process
+
+        self.init_atom_coords()
+        self.assemble_residues(
+            verbose=verbose, start=start, fin=fin
+        )  # internal to XYZ coordinates
+        if promote:
+            self.coords_to_structure()  # promote to BioPython Residue/Atom
+
+    # @profile
+    def atom_to_internal_coordinates(self, verbose: bool = False) -> None:
+        """Calculate dihedrals, angles, bond lengths for Atom data.
+
+        :param verbose bool: default False
+            describe runtime problems
+        """
+        hedraAtomDict = {}
+        dihedraAtomDict = {}
+        hInDset = set()
+        hedraDict2 = {}
+        gCBdihedra = set()
+
+        for ric in self.ordered_aa_ic_list:
+            ric.atom_to_internal_coordinates(verbose=verbose)  # builds di/hedra objects
+
+        self.init_edra()
+
+        for ric in self.ordered_aa_ic_list:
+            for k, d in ric.dihedra.items():
+                hInDset.update((d.h1key, d.h2key))
+                try:
+                    # get tuple of atom_coords from ric dict
+                    dihedraAtomDict[k] = d.gen_acs(ric.atom_coords)
+                except KeyError:
+                    gCBdihedra.add(d)  # no atom_coords yet for gly CB
+                    # init to rough approximation, overwrite later
+                    # dihedron = O-C-Ca-Cb
+                    # h1 = Ca-C-O (reversed)
+                    # h2 = Cb-Ca-C (reversed)
+                    # need dihedron atom coords all forward
+                    h1 = d.hedron1.gen_acs(ric.atom_coords)
+                    h1 = numpy.flipud(h1)  # reverse h1 coords for building dihedron
+                    xgcb = numpy.append(h1, [h1[2]], axis=0)
+                    xgcb[3, 0] = xgcb[3, 0] + 1.0
+                    dihedraAtomDict[k] = xgcb
+
+            for k, h in ric.hedra.items():
+                if k not in hInDset:
+                    # print("inaccessible hedron outside dihedron: ", h)
+                    try:
+                        hedraAtomDict[k] = h.gen_acs(ric.atom_coords)
+                    except KeyError:  # gly CB
+                        hedraAtomDict[k] = numpy.array(
+                            [[1, 2, 3, 1], [2, 2, 3, 1], [3, 2, 3, 1]]
+                        )
+
+        if self.dihedra == {}:
+            return  # escape if no hedra loaded for this chain
+
+        if hedraAtomDict != {}:
+            # some hedra not in dihedra to process
+            # issue from alternate CB path, triggered by residue sidechain path not
+            # including n-ca-cb-xg
+            # not needed to build chain but include for consistency / statistics
+            hedraDict2 = {k: h for k, h in self.hedra.items() if k not in hInDset}
+            lh2a = len(hedraDict2)
+            if lh2a > 0:
+                h2a = numpy.array(tuple(hedraAtomDict.values()))
+                h2ai = dict(zip(hedraDict2.keys(), range(lh2a)))
+
+                # get dad for hedra
+                h_a0a1 = numpy.linalg.norm(h2a[:, 0] - h2a[:, 1], axis=1)
+                h_a1a2 = numpy.linalg.norm(h2a[:, 1] - h2a[:, 2], axis=1)
+                h_a0a2 = numpy.linalg.norm(h2a[:, 0] - h2a[:, 2], axis=1)
+                h_a0a1a2 = numpy.rad2deg(
+                    numpy.arccos(
+                        ((h_a0a1 * h_a0a1) + (h_a1a2 * h_a1a2) - (h_a0a2 * h_a0a2))
+                        / (2 * h_a0a1 * h_a1a2)
+                    )
+                )
+
+                for k, h in hedraDict2.items():
+                    hndx = h2ai[k]
+                    h.lal[:] = (h_a0a1[hndx], h_a0a1a2[hndx], h_a1a2[hndx])
+
+        # now process dihedra
+        dLen = self.dihedraLen
+        dha = numpy.array(tuple(dihedraAtomDict.values()))
+        dhai = dict(zip(self.dihedra.keys(), range(dLen)))
+
+        # get dadad dist-angle-dist-angle-dist for dihedra
+        a0a1 = numpy.linalg.norm(dha[:, 0] - dha[:, 1], axis=1)
+        a1a2 = numpy.linalg.norm(dha[:, 1] - dha[:, 2], axis=1)
+        a2a3 = numpy.linalg.norm(dha[:, 2] - dha[:, 3], axis=1)
+
+        a0a2 = numpy.linalg.norm(dha[:, 0] - dha[:, 2], axis=1)
+        a1a3 = numpy.linalg.norm(dha[:, 1] - dha[:, 3], axis=1)
+        sqr_a1a2 = numpy.multiply(a1a2, a1a2)
+
+        a0a1a2 = numpy.rad2deg(
+            numpy.arccos(((a0a1 * a0a1) + sqr_a1a2 - (a0a2 * a0a2)) / (2 * a0a1 * a1a2))
+        )
+
+        a1a2a3 = numpy.rad2deg(
+            numpy.arccos((sqr_a1a2 + (a2a3 * a2a3) - (a1a3 * a1a3)) / (2 * a1a2 * a2a3))
+        )
+
+        # develop coord_space matrix for 1st 3 atoms of dihedra:
+
+        # build tm translation matrix: atom1 to origin
+        tm = numpy.empty((dLen, 4, 4))
+        tm[...] = numpy.identity(4)
+        tm[:, 0:3, 3] = -dha[:, 1, 0:3]
+
+        # directly translate a2 into new space using a1
+        p = dha[:, 2] - dha[:, 1]
+
+        # get spherical coords of translated a2 (p)
+        r = numpy.linalg.norm(p, axis=1)
+        azimuth = numpy.arctan2(p[:, 1], p[:, 0])
+        polar_angle = numpy.arccos(numpy.divide(p[:, 2], r, where=r != 0))
+
+        # build rz rotation matrix: translated a2 -azimuth around Z
+        # (enables next step rotating around Y to align with Z)
+        rz = multi_rot_Z(-azimuth)
+
+        # build ry rotation matrix: translated a2 -polar_angle around Y
+        ry = multi_rot_Y(-polar_angle)
+
+        # mt completes a1-a2 on Z-axis, still need to align a0 with XZ plane
+        mt = numpy.matmul(ry, numpy.matmul(rz, tm))
+
+        # transform a0 to mt space
+        p = numpy.matmul(mt, dha[:, 0].reshape(-1, 4, 1)).reshape(-1, 4)
+        # print("mt[0]:\n", mt[0], "\ndha[0][0] (a0):\n", dha[0][0], "\np[0]:\n", p[0])
+
+        # get azimuth of translated a0
+        azimuth2 = numpy.arctan2(p[:, 1], p[:, 0])
+
+        # build rotation matrix rz2 to rotate a0 -azimuth about Z to align with X
+        rz2 = multi_rot_Z(-azimuth2)
+
+        # update mt to be complete transform into hedron coordinate space
+        mt = numpy.matmul(rz2, mt[:])
+
+        # now put atom 4 into that coordinate space and read dihedral as azimuth
+        do4 = numpy.matmul(mt, dha[:, 3].reshape(-1, 4, 1)).reshape(-1, 4)
+
+        numpy.arctan2(do4[:, 1], do4[:, 0], out=self.dihedraICr)
+        numpy.rad2deg(self.dihedraICr, out=self.dihedraIC)
+
+        # build hedra arrays
+
+        hIC = self.hedraIC
+        hNdx = self.hedraNdx
+
+        for k, d in self.dihedra.items():
+            dndx = dhai[k]
+            # d.angle = dh1d[dndx]
+            rev, hed1, hed2 = (d.reverse, d.hedron1, d.hedron2)
+            h1ndx, h2ndx = (hNdx[d.h1key], hNdx[d.h2key])
+            if not rev:
+                hIC[h1ndx, :] = (a0a1[dndx], a0a1a2[dndx], a1a2[dndx])
+                hIC[h2ndx, :] = (a1a2[dndx], a1a2a3[dndx], a2a3[dndx])
+                # hed1.len12 = a0a1[dndx]
+                # hed1.len23 = hed2.len12 = a1a2[dndx]
+                # hed2.len23 = a2a3[dndx]
+            else:
+                hIC[h1ndx, :] = (a1a2[dndx], a0a1a2[dndx], a0a1[dndx])
+                hIC[h2ndx, :] = (a2a3[dndx], a1a2a3[dndx], a1a2[dndx])
+                # hed1.len23 = a0a1[dndx]
+                # hed1.len12 = hed2.len23 = a1a2[dndx]
+                # hed2.len12 = a2a3[dndx]
+
+            hed1.lal = hIC[h1ndx]
+            hed2.lal = hIC[h2ndx]
+
+            # hed1.angle = a0a1a2[dndx]
+            # hed2.angle = a1a2a3[dndx]
+
+        for gCBd in gCBdihedra:
+            gCBd.ic_residue.build_glyCB(gCBd)
+
+    @staticmethod
+    def _write_mtx(fp: TextIO, mtx: numpy.array) -> None:
+        fp.write("[ ")
+        rowsStarted = False
+        for row in mtx:
+            if rowsStarted:
+                fp.write(", [ ")
+            else:
+                fp.write("[ ")
+                rowsStarted = True
+            colsStarted = False
+            for col in row:
+                if colsStarted:
+                    fp.write(", " + str(col))
+                else:
+                    fp.write(str(col))
+                    colsStarted = True
+            fp.write(" ]")  # close row
+        fp.write(" ]")
+
+    @staticmethod
+    def _writeSCAD_dihed(
+        fp: TextIO, d: "Dihedron", hedraNdx: Dict, hedraSet: Set[EKT]
+    ) -> None:
+        fp.write(
+            "[ {:9.5f}, {}, {}, {}, ".format(
+                d.angle, hedraNdx[d.h1key], hedraNdx[d.h2key], (1 if d.reverse else 0)
+            )
+        )
+        fp.write(
+            "{}, {}, ".format(
+                (0 if d.h1key in hedraSet else 1), (0 if d.h2key in hedraSet else 1)
+            )
+        )
+        fp.write(
+            "    // {} [ {} -- {} ] {}\n".format(
+                d.id, d.hedron1.id, d.hedron2.id, ("reversed" if d.reverse else "")
+            )
+        )
+        fp.write("        ")
+        IC_Chain._write_mtx(fp, d.rcst)
+        fp.write(" ]")  # close residue array of dihedra entry
+
+    def write_SCAD(self, fp: TextIO, backboneOnly: bool) -> None:
+        """Write self to file fp as OpenSCAD data matrices.
+
+        Works with write_SCAD() and embedded OpenSCAD routines in SCADIO.py.
+        The OpenSCAD code explicitly creates spheres and cylinders to
+        represent atoms and bonds in a 3D model.  Options are available
+        to support rotatable bonds and magnetic hydrogen bonds.
+
+        Matrices are written to link, enumerate and describe residues,
+        dihedra, hedra, and chains, mirroring contents of the relevant IC_*
+        data structures.
+
+        The OpenSCAD matrix of hedra has additional information as follows:
+
+        * the atom and bond state (single, double, resonance) are logged
+          so that covalent radii may be used for atom spheres in the 3D models
+
+        * bonds and atoms are tracked so that each is only created once
+
+        * bond options for rotation and magnet holders for hydrogen bonds
+          may be specified
+
+        Note the application of IC_Chain attribute MaxPeptideBond: missing
+        residues may be linked (joining chain segments with arbitrarily long
+        bonds) by setting this to a large value.
+
+        All ALTLOC (disordered) residues and atoms are written to the output model.
+        """
+        fp.write(f'   "{self.chain.id}", // chain id\n')
+
+        # generate dict for all hedra to eliminate redundant references
+        hedra = {}
+        for ric in self.ordered_aa_ic_list:
+            respos, resicode = ric.residue.id[1:]
+            for k, h in ric.hedra.items():
+                hedra[k] = h
+        atomSet: Set[AtomKey] = set()
+        bondDict: Dict = {}  # set()
+        hedraSet: Set[EKT] = set()
+        ndx = 0
+        hedraNdx = {}
+
+        for hk in sorted(hedra):
+            hedraNdx[hk] = ndx
+            ndx += 1
+
+        # write residue dihedra table
+
+        fp.write("   [  // residue array of dihedra")
+        resNdx = {}
+        dihedraNdx = {}
+        ndx = 0
+        chnStarted = False
+        for ric in self.ordered_aa_ic_list:
+            if "O" not in ric.akc:
+                if ric.lc != "G" and ric.lc != "A":
+                    print(
+                        f"Unable to generate complete sidechain for {ric} {ric.lc} missing O atom"
+                    )
+            resNdx[ric] = ndx
+            if chnStarted:
+                fp.write("\n     ],")
+            else:
+                chnStarted = True
+            fp.write(
+                "\n     [ // "
+                + str(ndx)
+                + " : "
+                + str(ric.residue.id)
+                + " "
+                + ric.lc
+                + " backbone\n"
+            )
+            ndx += 1
+            # assemble with no start position, return transform matrices
+            ric.clear_transforms()
+            ric.assemble(resetLocation=True)
+            ndx2 = 0
+            started = False
+            for i in range(1 if backboneOnly else 2):
+                if i == 1:
+                    cma = "," if started else ""
+                    fp.write(
+                        f"{cma}\n       // {str(ric.residue.id)} {ric.lc} sidechain\n"
+                    )
+                started = False
+                for dk, d in sorted(ric.dihedra.items()):
+                    if d.h2key in hedraNdx and (
+                        (i == 0 and d.is_backbone()) or (i == 1 and not d.is_backbone())
+                    ):
+                        if d.rcst is not None:
+                            if started:
+                                fp.write(",\n")
+                            else:
+                                started = True
+                            fp.write("      ")
+                            IC_Chain._writeSCAD_dihed(fp, d, hedraNdx, hedraSet)
+                            dihedraNdx[dk] = ndx2
+                            hedraSet.add(d.h1key)
+                            hedraSet.add(d.h2key)
+                            ndx2 += 1
+                        else:
+                            print(
+                                f"Atom missing for {d.id3}-{d.id32}, OpenSCAD chain may be discontiguous"
+                            )
+        fp.write("   ],")  # end of residue entry dihedra table
+        fp.write("\n  ],\n")  # end of all dihedra table
+
+        # write hedra table
+
+        fp.write("   [  //hedra\n")
+        for hk in sorted(hedra):
+            hed = hedra[hk]
+            fp.write("     [ ")
+            fp.write(
+                "{:9.5f}, {:9.5f}, {:9.5f}".format(
+                    set_accuracy_95(hed.lal[0]),  # len12
+                    set_accuracy_95(hed.lal[1]),  # angle
+                    set_accuracy_95(hed.lal[2]),  # len23
+                )
+            )
+            atom_str = ""  # atom and bond state
+            atom_done_str = ""  # create each only once
+            akndx = 0
+            for ak in hed.aks:
+                atm = ak.akl[AtomKey.fields.atm]
+                res = ak.akl[AtomKey.fields.resname]
+                # try first for generic backbone/Cbeta atoms
+                ab_state_res = residue_atom_bond_state["X"]
+                ab_state = ab_state_res.get(atm, None)
+                if "H" == atm[0]:
+                    ab_state = "Hsb"
+                if ab_state is None:
+                    # not found above, must be sidechain atom
+                    ab_state_res = residue_atom_bond_state.get(res, None)
+                    if ab_state_res is not None:
+                        ab_state = ab_state_res.get(atm, "")
+                    else:
+                        ab_state = ""
+                atom_str += ', "' + ab_state + '"'
+
+                if ak in atomSet:
+                    atom_done_str += ", 0"
+                elif hk in hedraSet:
+                    if (
+                        hasattr(hed, "flex_female_1") or hasattr(hed, "flex_male_1")
+                    ) and akndx != 2:
+                        if akndx == 0:
+                            atom_done_str += ", 0"
+                        elif akndx == 1:
+                            atom_done_str += ", 1"
+                            atomSet.add(ak)
+                    elif (
+                        hasattr(hed, "flex_female_2") or hasattr(hed, "flex_male_2")
+                    ) and akndx != 0:
+                        if akndx == 2:
+                            atom_done_str += ", 0"
+                        elif akndx == 1:
+                            atom_done_str += ", 1"
+                            atomSet.add(ak)
+                    else:
+                        atom_done_str += ", 1"
+                        atomSet.add(ak)
+                else:
+                    atom_done_str += ", 0"
+                akndx += 1
+            fp.write(atom_str)
+            fp.write(atom_done_str)
+
+            # specify bond options
+
+            bond = []
+            bond.append(hed.aks[0].id + "-" + hed.aks[1].id)
+            bond.append(hed.aks[1].id + "-" + hed.aks[2].id)
+            b0 = True
+            for b in bond:
+                wstr = ""
+                if b in bondDict and bondDict[b] == "StdBond":
+                    wstr = ", 0"
+                elif hk in hedraSet:
+                    bondType = "StdBond"
+                    if b0:
+                        if hasattr(hed, "flex_female_1"):
+                            bondType = "FemaleJoinBond"
+                        elif hasattr(hed, "flex_male_1"):
+                            bondType = "MaleJoinBond"
+                        elif hasattr(hed, "skinny_1"):
+                            bondType = "SkinnyBond"
+                        elif hasattr(hed, "hbond_1"):
+                            bondType = "HBond"
+                    else:
+                        if hasattr(hed, "flex_female_2"):
+                            bondType = "FemaleJoinBond"
+                        elif hasattr(hed, "flex_male_2"):
+                            bondType = "MaleJoinBond"
+                        # elif hasattr(hed, 'skinny_2'):  # unused
+                        #     bondType = 'SkinnyBond'
+                        elif hasattr(hed, "hbond_2"):
+                            bondType = "HBond"
+                    if b in bondDict:
+                        bondDict[b] = "StdBond"
+                    else:
+                        bondDict[b] = bondType
+                    wstr = ", " + str(bondType)
+                else:
+                    wstr = ", 0"
+                fp.write(wstr)
+                b0 = False
+            akl = hed.aks[0].akl
+            fp.write(
+                ', "'
+                + akl[AtomKey.fields.resname]
+                + '", '
+                + akl[AtomKey.fields.respos]
+                + ', "'
+                + hed.dh_class
+                + '"'
+            )
+            fp.write(" ], // " + str(hk) + "\n")
+        fp.write("   ],\n")  # end of hedra table
+
+        # write chain table
+
+        fp.write("\n[  // chain - world transform for each residue\n")
+        chnStarted = False
+        for ric in self.ordered_aa_ic_list:
+            # handle start / end
+            for NCaCKey in sorted(ric.NCaCKey):  # type: ignore
+                if 0 < len(ric.rprev):
+                    for rpr in ric.rprev:
+                        acl = [rpr.atom_coords[ak] for ak in NCaCKey]
+                        mt, mtr = coord_space(acl[0], acl[1], acl[2], True)
+                else:
+                    mtr = numpy.identity(4, dtype=numpy.float64)
+                if chnStarted:
+                    fp.write(",\n")
+                else:
+                    chnStarted = True
+                fp.write("     [ " + str(resNdx[ric]) + ', "' + str(ric.residue.id[1]))
+                fp.write(ric.lc + '", //' + str(NCaCKey) + "\n")
+                IC_Chain._write_mtx(fp, mtr)
+                fp.write(" ]")
+        fp.write("\n   ]\n")
+
+
+class IC_Residue:
+    """Class to extend Biopython Residue with internal coordinate data.
+
+    Parameters
+    ----------
+    parent: biopython Residue object this class extends
+    NO_ALTLOC: bool default False
+    Disable processing of ALTLOC atoms if True, use only selected atoms.
+
+    Attributes
+    ----------
+    residue: Biopython Residue object reference
+        The Residue object this extends
+    hedra: dict indexed by 3-tuples of AtomKeys
+        Hedra forming this residue
+    dihedra: dict indexed by 4-tuples of AtomKeys
+        Dihedra forming (overlapping) this residue
+    rprev, rnext: lists of IC_Residue objects
+        References to adjacent (bonded, not missing, possibly disordered)
+        residues in chain
+    atom_coords: AtomKey indexed dict of numpy [4] arrays
+        Local copy of atom homogeneous coordinates [4] for work
+        distinct from Bopython Residue/Atom values
+    alt_ids: list of char
+        AltLoc IDs from PDB file
+    bfactors: dict
+        AtomKey indexed B-factors as read from PDB file
+    NCaCKey: List of tuples of AtomKeys
+        List of tuples of N, Ca, C backbone atom AtomKeys; usually only 1
+        but more if backbone altlocs. Set by link_dihedra()
+    is20AA: bool
+        True if residue is one of 20 standard amino acids, based on
+        Residue resname
+    accept_atoms: tuple
+        list of PDB atom names to use when generatiing internal coordinates.
+        Default is:
+
+        `accept_atoms = accept_mainchain + accept_hydrogens`
+
+        to exclude hydrogens in internal coordinates and generated PDB files,
+        override as:
+
+        `IC_Residue.accept_atoms = IC_Residue.accept_mainchain`
+
+        to get only backbone atoms plus amide proton, use:
+
+        `IC_Residue.accept_atoms = IC_Residue.accept_mainchain + ('H',)`
+
+        to convert D atoms to H, set `AtomKey.d2h = True` and use:
+
+        `IC_Residue.accept_atoms = accept_mainchain + accept_hydrogens + accept_deuteriums`
+
+        There is currently no option to output internal coordinates with D
+        instead of H
+
+    accept_resnames: tuple
+        list of 3-letter residue names for HETATMs to accept when generating
+        internal coordinates from atoms.  HETATM sidechain will be ignored, but normal
+        backbone atoms (N, CA, C, O, CB) will be included.  Currently only
+        CYG, YCM and UNK; override at your own risk.  To generate
+        sidechain, add appropriate entries to ic_data_sidechains in
+        ic_data.py and support in atom_to_internal_coordinates()
+    gly_Cbeta: bool default False
+        override class variable to True to generate internal coordinates for
+        glycine CB atoms in atom_to_internal_coordinates().
+
+        `IC_Residue.gly_Cbeta = True`
+    allBonds: bool default False
+        whereas a PDB file just specifies atoms, OpenSCAD output for 3D printing
+        needs all bonds specified explicitly - otherwise, e.g. PHE rings will not
+        be closed.  This variable is managed by the Write_SCAD() code and enables
+        this.
+    cic: IC_Chain default None
+        parent chain IC_Chain object
+
+    scale: optional float
+        used for OpenSCAD output to generate gly_Cbeta bond length
+
+    Methods
+    -------
+    applyMtx()
+        multiply all IC_Residue atom_cords by passed matrix
+    assemble(atomCoordsIn, resetLocation, verbose)
+        Compute atom coordinates for this residue from internal coordinates
+    atm241(coord)
+        Convert 1x3 cartesian coords to 4x1 homogeneous coords
+    coords_to_residue()
+        Convert homogeneous atom_coords to Biopython cartesian Atom coords
+    atom_to_internal_coordinates(verbose)
+        Create hedra and dihedra for atom coordinates
+    get_angle()
+        Return angle for passed key
+    get_length()
+        Return bond length for specified pair
+    link_dihedra()
+        Link dihedra to this residue, form id3_dh_index
+    load_PIC(edron)
+        Process parsed (di-/h-)edron data from PIC file
+    pick_angle()
+        Find Hedron or Dihedron for passed key
+    pick_length()
+        Find hedra for passed AtomKey pair
+    rak(atom info)
+        Residue AtomKey - per residue AtomKey result cache
+    set_angle()
+        Set angle for passed key (no position updates)
+    set_length()
+        Set bond length in all relevant hedra for specified pair
+    write_PIC(pdbid, chainId, s)
+        Generate PIC format strings for this residue
+
+    """
+
+    # add 3-letter residue name here for non-standard residues with
+    # normal backbone.  CYG for test case 4LGY (1305 residue contiguous
+    # chain)
+    accept_resnames = ("CYG", "YCM", "UNK")
+
+    AllBonds: bool = False  # For OpenSCAD, generate explicit hedra covering all bonds if True.
+
+    def __init__(self, parent: "Residue", NO_ALTLOC: bool = False) -> None:
+        """Initialize IC_Residue with parent Biopython Residue.
+
+        :param parent: Biopython Residue object
+            The Biopython Residue this object extends
+        :param NO_ALTLOC: bool default False
+            Option to disable processing altloc disordered atoms, use selected.
+        """
+        # NO_ALTLOC=True will turn off alotloc positions and just use selected
+        self.residue = parent
+        self.cic: IC_Chain
+        # dict of hedron objects indexed by hedron keys
+        self.hedra: Dict[HKT, Hedron] = {}
+        # dict of dihedron objects indexed by dihedron keys
+        self.dihedra: Dict[DKT, Dihedron] = {}
+        # map of dihedron key (first 3 atom keys) to dihedron
+        # for all dihedra in Residue
+        # built by link_dihedra()
+        self.id3_dh_index: Dict[HKT, List[Dihedron]] = {}
+        # cache of AtomKey results for rak()
+        self.akc: Dict[Union[str, Atom], AtomKey] = {}
+        # set of AtomKeys involved in dihedra, used by split_akl, build_rak_cache
+        # built by __init__ for XYZ (PDB coord) input, link_dihedra for PIC input
+        self.ak_set: Set[AtomKey] = set()
+        # reference to adjacent residues in chain
+        self.rprev: List[IC_Residue] = []
+        self.rnext: List[IC_Residue] = []
+        # local copy, homogeneous coordinates for atoms, numpy [4]
+        # generated from dihedra include some i+1 atoms
+        # or initialised here from parent residue if loaded from coordinates
+        self.atom_coords: Dict["AtomKey", numpy.array] = {}
+        # bfactors copied from PDB file
+        self.bfactors: Dict[str, float] = {}
+        self.alt_ids: Union[List[str], None] = None if NO_ALTLOC else []
+        self.is20AA = True
+        # rbase = position, insert code or none, resname (1 letter if in 20)
+        rid = parent.id
+        rbase = [rid[1], rid[2] if " " != rid[2] else None, parent.resname]
+        try:
+            rbase[2] = three_to_one(rbase[2]).upper()
+        except KeyError:
+            self.is20AA = False
+
+        self.rbase = tuple(rbase)
+        self.lc = rbase[2]
+
+        if self.is20AA or rbase[2] in self.accept_resnames:
+            for atom in parent.get_atoms():
+                if hasattr(atom, "child_dict"):
+                    if NO_ALTLOC:
+                        self._add_atom(atom.selected_child)
+                    else:
+                        for atm in atom.child_dict.values():
+                            self._add_atom(atm)
+                else:
+                    self._add_atom(atom)
+            if self.ak_set:  # only for coordinate (pdb) input
+                self.build_rak_cache()  # init cache ready for atom_to_internal_coords
+                # self.NCaCKey = [(self.rak("N"), self.rak("CA"), self.rak("C"))]
+
+            # print(self.atom_coords)
+
+    def rak(self, atm: Union[str, Atom]) -> "AtomKey":
+        """Cache calls to AtomKey for this residue."""
+        try:
+            ak = self.akc[atm]
+        except (KeyError):
+            ak = self.akc[atm] = AtomKey(self, atm)
+            if isinstance(atm, str):
+                # print(atm)  # debug code
+                ak.missing = True
+        return ak
+
+    def build_rak_cache(self) -> None:
+        """Create explicit entries for for atoms so don't miss altlocs."""
+        for ak in sorted(self.ak_set):
+            atmName = ak.akl[3]
+            if self.akc.get(atmName) is None:
+                self.akc[atmName] = ak
+
+    accept_mainchain = (
+        "N",
+        "CA",
+        "C",
+        "O",
+        "CB",
+        "CG",
+        "CG1",
+        "OG1",
+        "OG",
+        "SG",
+        "CG2",
+        "CD",
+        "CD1",
+        "SD",
+        "OD1",
+        "ND1",
+        "CD2",
+        "ND2",
+        "CE",
+        "CE1",
+        "NE",
+        "OE1",
+        "NE1",
+        "CE2",
+        "OE2",
+        "NE2",
+        "CE3",
+        "CZ",
+        "NZ",
+        "CZ2",
+        "CZ3",
+        "OD2",
+        "OH",
+        "CH2",
+        "OXT",
+    )
+    accept_hydrogens = (
+        "H",
+        "H1",
+        "H2",
+        "H3",
+        "HA",
+        "HA2",
+        "HA3",
+        "HB",
+        "HB1",
+        "HB2",
+        "HB3",
+        "HG2",
+        "HG3",
+        "HD2",
+        "HD3",
+        "HE2",
+        "HE3",
+        "HZ1",
+        "HZ2",
+        "HZ3",
+        "HG11",
+        "HG12",
+        "HG13",
+        "HG21",
+        "HG22",
+        "HG23",
+        "HZ",
+        "HD1",
+        "HE1",
+        "HD11",
+        "HD12",
+        "HD13",
+        "HG",
+        "HG1",
+        "HD21",
+        "HD22",
+        "HD23",
+        "NH1",
+        "NH2",
+        "HE",
+        "HH11",
+        "HH12",
+        "HH21",
+        "HH22",
+        "HE21",
+        "HE22",
+        "HE2",
+        "HH",
+        "HH2",
+    )
+    accept_deuteriums = (
+        "D",
+        "D1",
+        "D2",
+        "D3",
+        "DA",
+        "DA2",
+        "DA3",
+        "DB",
+        "DB1",
+        "DB2",
+        "DB3",
+        "DG2",
+        "DG3",
+        "DD2",
+        "DD3",
+        "DE2",
+        "DE3",
+        "DZ1",
+        "DZ2",
+        "DZ3",
+        "DG11",
+        "DG12",
+        "DG13",
+        "DG21",
+        "DG22",
+        "DG23",
+        "DZ",
+        "DD1",
+        "DE1",
+        "DD11",
+        "DD12",
+        "DD13",
+        "DG",
+        "DG1",
+        "DD21",
+        "DD22",
+        "DD23",
+        "ND1",
+        "ND2",
+        "DE",
+        "DH11",
+        "DH12",
+        "DH21",
+        "DH22",
+        "DE21",
+        "DE22",
+        "DE2",
+        "DH",
+        "DH2",
+    )
+    accept_atoms = accept_mainchain + accept_hydrogens
+
+    gly_Cbeta = False
+
+    @staticmethod
+    def atm241(coord: numpy.array) -> numpy.array:
+        """Convert 1x3 cartesian coordinates to 4x1 homogeneous coordinates."""
+        arr41 = numpy.empty(4)
+        arr41[0:3] = coord
+        arr41[3] = 1.0
+        return arr41
+
+    def _add_atom(self, atm: Atom) -> None:
+        """Filter Biopython Atom with accept_atoms; set atom_coords, ak_set.
+
+        Arbitrarily renames O' and O'' to O and OXT
+        """
+        if "O'" == atm.name:
+            atm.name = "O"
+        if "O''" == atm.name:
+            atm.name = "OXT"
+
+        if atm.name not in self.accept_atoms:
+            # print('skip:', atm.name)
+            return
+        ak = self.rak(atm)  # passing Atom here not string
+        self.atom_coords[ak] = IC_Residue.atm241(atm.coord)
+        self.ak_set.add(ak)
+
+    def __repr__(self) -> str:
+        """Print string is parent Residue ID."""
+        return str(self.residue.full_id)
+
+    def pretty_str(self) -> str:
+        """Nice string for residue ID."""
+        id = self.residue.id
+        return f"{self.residue.resname} {id[0]}{str(id[1])}{id[2]}"
+
+    def load_PIC(self, edron: Dict[str, str]) -> None:
+        """Process parsed (di-/h-)edron data from PIC file.
+
+        :param edron: parse dictionary from Edron.edron_re
+        """
+        if edron["a4"] is None:  # PIC line is Hedron
+            ek = (AtomKey(edron["a1"]), AtomKey(edron["a2"]), AtomKey(edron["a3"]))
+            self.hedra[ek] = Hedron(ek, **edron)
+        else:  # PIC line is Dihedron
+            ek = (
+                AtomKey(edron["a1"]),
+                AtomKey(edron["a2"]),
+                AtomKey(edron["a3"]),
+                AtomKey(edron["a4"]),
+            )
+            self.dihedra[ek] = Dihedron(ek, **edron)
+
+    def link_dihedra(self, verbose: bool = False) -> None:
+        """Housekeeping after loading all residues and dihedra.
+
+        - Link dihedra to this residue
+        - form id3_dh_index
+        - form ak_set
+        - set NCaCKey to be available AtomKeys
+        """
+        id3i: Dict[HKT, List[Dihedron]] = {}
+        for dh in self.dihedra.values():
+            dh.ic_residue = self  # each dihedron can find its IC_Residue
+            dh.cic = self.cic  # each dihedron can update chain dihedral angles
+            id3 = dh.id3
+            if id3 not in id3i:
+                id3i[id3] = []
+            id3i[id3].append(dh)
+            self.ak_set.update(dh.aks)
+            dh.set_hedra()
+        for h in self.hedra.values():  # collect any atoms in orphan hedra
+            self.ak_set.update(h.aks)  # e.g. alternate CB path with no O
+            h.cic = self.cic  # each hedron can update chain hedra
+
+        # map to find each dihedron from atom tokens 1-3
+        self.id3_dh_index = id3i
+
+        # if loaded PIC data, akc not initialised yet
+        if not self.akc:
+            self.build_rak_cache()
+
+        # initialise NCaCKey here:
+
+        # not rak here to avoid polluting akc cache with no-altloc keys
+        # so starting with 'generic' key:
+        self.NCaCKey = [(AtomKey(self, "N"), AtomKey(self, "CA"), AtomKey(self, "C"))]
+
+        newNCaCKey: List[Tuple["AtomKey", ...]] = []
+
+        try:
+            for tpl in sorted(self.NCaCKey):
+                newNCaCKey.extend(self._split_akl(tpl))
+            self.NCaCKey = cast(List[HKT], newNCaCKey)
+            # if len(newNCaCKey) != 1 and len(self.rprev) == 0:
+            #  debug code to find examples of chains starting with disordered residues
+            #    print(f"chain start multiple NCaCKey  {newNCaCKey} : {self}")
+        except AttributeError:
+            if verbose:
+                print(
+                    f"Missing N, Ca and/or C atoms for residue {str(self.residue)} chain {self.residue.parent.id}"
+                )
+
+    def set_flexible(self) -> None:
+        """For OpenSCAD, mark N-CA and CA-C bonds to be flexible joints."""
+        for h in self.hedra.values():
+            if h.dh_class == "NCAC":
+                h.flex_female_1 = True
+                h.flex_female_2 = True
+            elif h.dh_class.endswith("NCA"):
+                h.flex_male_2 = True
+            elif h.dh_class.startswith("CAC") and h.aks[1].akl[3] == "C":
+                h.flex_male_1 = True
+            elif h.dh_class == "CBCAC":
+                h.skinny_1 = True  # CA-CB bond interferes with flex join
+
+    def set_hbond(self) -> None:
+        """For OpenSCAD, mark H-N and C-O bonds to be hbonds (magnets)."""
+        for h in self.hedra.values():
+            if h.dh_class == "HNCA":
+                h.hbond_1 = True
+            elif h.dh_class == "CACO":
+                h.hbond_2 = True
+
+    def default_startpos(self) -> Dict["AtomKey", numpy.array]:
+        """Generate default N-Ca-C coordinates to build this residue from."""
+        atomCoords = {}
+        dlist0 = [self.id3_dh_index.get(akl, None) for akl in sorted(self.NCaCKey)]
+        dlist1 = [d for d in dlist0 if d is not None]
+        # https://stackoverflow.com/questions/11264684/flatten-list-of-lists
+        dlist = [val for sublist in dlist1 for val in sublist]
+        # dlist = self.id3_dh_index[NCaCKey]
+        for d in dlist:
+            for i, a in enumerate(d.aks):
+                atomCoords[a] = d.initial_coords[i]
+        # if "O" not in self.akc and "CB" in self.akc:
+        #    # need CB coord if no O coord - handle alternate CB path
+        #    # but not clear how to do this for default position
+        #    pass
+        return atomCoords
+
+    def get_startpos(self) -> Dict["AtomKey", numpy.array]:
+        """Find N-Ca-C coordinates to build this residue from."""
+        if 0 < len(self.rprev):
+            # if there is a previous residue, build on from it
+            startPos = {}
+            # nb akl for this res n-ca-c in rp (prev res) dihedra
+            akl: List[AtomKey] = []
+            for tpl in self.NCaCKey:
+                akl.extend(tpl)
+            if self.rak("O").missing:
+                # alternate CB path - only use if O is missing
+                # else have problems modifying phi angle
+                akl.append(AtomKey(self, "CB"))
+            for ak in akl:
+                for rp in self.rprev:
+                    rpak = rp.atom_coords.get(ak, None)
+                    if rpak is not None:
+                        startPos[ak] = rpak
+            if 3 > len(startPos):  # if don't have all 3, reset to have none
+                startPos = {}
+        else:
+            # get atom posns already added by load_structure
+            sp = self.residue.parent.internal_coord.initNCaC.get(self.rbase, None)
+            if sp is None:
+                startPos = {}
+            else:
+                # need copy Here (shallow ok) else assemble() adds to this dict
+                startPos = cast(Dict["AtomKey", numpy.array], sp.copy())
+
+        if startPos == {}:
+            startPos = self.default_startpos()
+
+        return startPos
+
+    def clear_transforms(self):
+        """Set cst and rcst attributes to none before assemble()."""
+        for d in self.dihedra.values():
+            d.cst = None
+            d.rcst = None
+
+    def assemble(
+        self, resetLocation: bool = False, verbose: bool = False,
+    ) -> Union[Dict["AtomKey", numpy.array], Dict[HKT, numpy.array], None]:
+        """Compute atom coordinates for this residue from internal coordinates.
+
+        Join dihedrons starting from N-CA-C and N-CA-CB hedrons, computing protein
+        space coordinates for backbone and sidechain atoms
+
+        Sets forward and reverse transforms on each Dihedron to convert from
+        protein coordinates to dihedron space coordinates for first three
+        atoms (see coord_space())
+
+        Not vectorized (yet).
+
+        **Algorithm**
+
+        form double-ended queue, start with c-ca-n, o-c-ca, n-ca-cb, n-ca-c.
+
+        if resetLocation=True, use initial coords from generating dihedron
+        for n-ca-c initial positions (result in dihedron coordinate space)
+
+        while queue not empty
+            get 3-atom hedron key
+
+            for each dihedron starting with hedron key (1st hedron of dihedron)
+
+                if have coordinates for all 4 atoms already
+                    add 2nd hedron key to back of queue
+                else if have coordinates for 1st 3 atoms
+                    compute forward and reverse transforms to take 1st 3 atoms
+                    to/from dihedron initial coordinate space
+
+                    use reverse transform to get position of 4th atom in
+                    current coordinates from dihedron initial coordinates
+
+                    add 2nd hedron key to back of queue
+                else
+                    ordering failed, put hedron key at back of queue and hope
+                    next time we have 1st 3 atom positions (should not happen)
+
+        loop terminates (queue drains) as hedron keys which do not start any
+        dihedra are removed without action
+
+        :param resetLocation: bool default False
+            - Option to ignore start location and orient so N-Ca-C hedron
+            at origin.
+
+        :returns:
+            Dict of AtomKey -> homogeneous atom coords for residue in protein space
+            relative to previous residue
+
+        """
+        # debug statements below still useful, commented for performance
+        # dbg = False
+
+        NCaCKey = sorted(self.NCaCKey)
+
+        if not self.ak_set:
+            return None  # give up now if no atoms to work with
+
+        # order of these startLst entries matters
+        startLst = self._split_akl((self.rak("C"), self.rak("CA"), self.rak("N")))
+        if "CB" in self.akc:
+            startLst.extend(
+                self._split_akl((self.rak("N"), self.rak("CA"), self.rak("CB")))
+            )
+        if "O" in self.akc:
+            startLst.extend(
+                self._split_akl((self.rak("O"), self.rak("C"), self.rak("CA")))
+            )
+
+        startLst.extend(NCaCKey)
+
+        q = deque(startLst)
+        # resnum = self.rbase[0]
+
+        # get initial coords from previous residue or IC_Chain info
+        # or default coords
+        if resetLocation:
+            # use N-CA-C initial coords from creating dihedral
+            atomCoords = self.default_startpos()
+        else:
+            atomCoords = self.get_startpos()
+
+        while q:  # deque is not empty
+            # if dbg:
+            #    print("assemble loop start q=", q)
+            h1k = cast(HKT, q.pop())
+            dihedra = self.id3_dh_index.get(h1k, None)
+            # if dbg:
+            #    print(
+            #        "  h1k:",
+            #        h1k,
+            #        "len dihedra: ",
+            #        len(dihedra) if dihedra is not None else "None",
+            #    )
+            if dihedra is not None:
+                for d in dihedra:
+                    if 4 == len(d.initial_coords) and d.initial_coords[3] is not None:
+                        # skip incomplete dihedron if don't have 4th atom due
+                        # to missing input data
+                        d_h2key = d.hedron2.aks
+                        akl = d.aks
+                        # if dbg:
+                        #    print("    process", d, d_h2key, akl)
+
+                        acount = len([a for a in d.aks if a in atomCoords])
+
+                        if 4 == acount:  # and not need_transform:
+                            # dihedron already done, queue 2nd hedron key
+                            q.appendleft(d_h2key)
+                            # if dbg:
+                            #    print("    4- already done, append left")
+                            if d.rcst is None:  # missing transform
+                                # can happen for altloc atoms
+                                # only needed for write_SCAD output
+                                acs = [atomCoords[a] for a in h1k]
+                                d.cst, d.rcst = coord_space(
+                                    acs[0], acs[1], acs[2], True
+                                )
+                        elif 3 == acount:
+                            # if dbg:
+                            #    print("    3- call coord_space")
+
+                            acs = [atomCoords[a] for a in h1k]
+                            d.cst, d.rcst = coord_space(acs[0], acs[1], acs[2], True)
+                            # print(d.cst)
+                            # print(d.rcst)
+                            # if dbg:
+                            #    print(
+                            #        "        initial_coords[3]=",
+                            #        d.initial_coords[3].transpose(),
+                            #    )
+                            acak3 = d.rcst.dot(d.initial_coords[3])
+                            # if dbg:
+                            #    print("        acak3=", acak3.transpose())
+
+                            atomCoords[akl[3]] = numpy.round(
+                                acak3, 3
+                            )  # round to PDB format 8.3
+                            # if dbg:
+                            #    print(
+                            #        "        3- finished, ak:",
+                            #        akl[3],
+                            #        "coords:",
+                            #        atomCoords[akl[3]].transpose(),
+                            #    )
+                            q.appendleft(d_h2key)
+                        else:
+                            if verbose:
+                                print("no coords to start", d)
+                                print(
+                                    [
+                                        a
+                                        for a in d.aks
+                                        if atomCoords.get(a, None) is not None
+                                    ]
+                                )
+                    else:
+                        if verbose:
+                            print("no initial coords for", d)
+
+        return atomCoords
+
+    def _split_akl(
+        self,
+        lst: Union[Tuple["AtomKey", ...], List["AtomKey"]],
+        missingOK: bool = False,
+    ) -> List[Tuple["AtomKey", ...]]:
+        """Get AtomKeys for this residue (ak_set) given generic list of AtomKeys.
+
+        Given a list of AtomKeys (aks) for a Hedron or Dihedron,
+          return:
+                list of matching aks that have id3_dh in this residue
+                (ak may change if occupancy != 1.00)
+
+            or
+                multiple lists of matching aks expanded for all atom altlocs
+
+            or
+                empty list if any of atom_coord(ak) missing and not missingOK
+
+        :param lst: list[3] or [4] of AtomKeys
+            non-altloc AtomKeys to match to specific AtomKeys for this residue
+        """
+        altloc_ndx = AtomKey.fields.altloc
+        occ_ndx = AtomKey.fields.occ
+
+        # step 1
+        # given a list of AtomKeys (aks)
+        #  form a new list of same aks with coords or diheds in this residue
+        #      plus lists of matching altloc aks in coords or diheds
+        edraLst: List[Tuple[AtomKey, ...]] = []
+        altlocs = set()
+        posnAltlocs: Dict["AtomKey", Set[str]] = {}
+        akMap = {}
+        for ak in lst:
+            posnAltlocs[ak] = set()
+            if (
+                ak in self.ak_set
+                and ak.akl[altloc_ndx] is None
+                and ak.akl[occ_ndx] is None
+            ):
+                # simple case no altloc and exact match in set
+                edraLst.append((ak,))  # tuple of ak
+            else:
+                ak2_lst = []
+                for ak2 in self.ak_set:
+                    if ak.altloc_match(ak2):
+                        # print(key)
+                        ak2_lst.append(ak2)
+                        akMap[ak2] = ak
+                        altloc = ak2.akl[altloc_ndx]
+                        if altloc is not None:
+                            altlocs.add(altloc)
+                            posnAltlocs[ak].add(altloc)
+                edraLst.append(tuple(ak2_lst))
+
+        # step 2
+        # check and finish for
+        #   missing atoms
+        #   simple case no altlocs
+        # else form new AtomKey lists covering all altloc permutations
+        maxc = 0
+        for akl in edraLst:
+            lenAKL = len(akl)
+            if 0 == lenAKL and not missingOK:
+                return []  # atom missing in atom_coords, cannot form object
+            elif maxc < lenAKL:
+                maxc = lenAKL
+        if 1 == maxc:  # simple case no altlocs for any ak in list
+            newAKL = []
+            for akl in edraLst:
+                if akl:  # may have empty lists if missingOK, do not append
+                    newAKL.append(akl[0])
+            return [tuple(newAKL)]
+        else:
+            new_edraLst = []
+            for al in altlocs:
+                # form complete new list for each altloc
+                alhl = []
+                for akl in edraLst:
+                    lenAKL = len(akl)
+                    if 0 == lenAKL:
+                        continue  # ignore empty list from missingOK
+                    if 1 == lenAKL:
+                        alhl.append(akl[0])  # not all atoms will have altloc
+                    # elif (lenAKL < maxc
+                    #      and al not in posnAltlocs[akMap[akl[0]]]):
+                    elif al not in posnAltlocs[akMap[akl[0]]]:
+                        # this position has fewer altlocs than other positions
+                        # and this position does not have this al,
+                        # so just grab first to form angle as could be any
+                        alhl.append(sorted(akl)[0])
+                    else:
+                        for ak in akl:
+                            if ak.akl[altloc_ndx] == al:
+                                alhl.append(ak)
+                new_edraLst.append(tuple(alhl))
+
+            # print(new_edraLst)
+            return new_edraLst
+
+    def _gen_edra(self, lst: Union[Tuple["AtomKey", ...], List["AtomKey"]]) -> None:
+        """Populate hedra/dihedra given edron ID tuple.
+
+        Given list of AtomKeys defining hedron or dihedron
+          convert to AtomKeys with coordinates in this residue
+          add appropriately to self.di/hedra, expand as needed atom altlocs
+
+        :param lst: tuple of AtomKeys
+            Specifies Hedron or Dihedron
+        """
+        for ak in lst:
+            if ak.missing:
+                return  # give up if atoms actually missing
+
+        lenLst = len(lst)
+        if 4 > lenLst:
+            cdct, dct, obj = self.cic.hedra, self.hedra, Hedron
+        else:
+            cdct, dct, obj = self.cic.dihedra, self.dihedra, Dihedron  # type: ignore
+
+        if isinstance(lst, List):
+            tlst = tuple(lst)
+        else:
+            tlst = lst
+
+        hl = self._split_akl(tlst)  # expand tlst with any altlocs
+        # returns list of tuples
+
+        for tnlst in hl:
+            # do not add edron if split_akl() made something shorter
+            if len(tnlst) == lenLst:
+                # if edron already exists, then update not replace with new
+                if tnlst not in cdct:
+                    cdct[tnlst] = obj(tnlst)  # type: ignore
+                if tnlst not in dct:
+                    dct[tnlst] = cdct[tnlst]  # type: ignore
+
+                dct[tnlst].needs_update = True  # type: ignore
+
+    def atom_to_internal_coordinates(self, verbose: bool = False) -> None:
+        """Create hedra and dihedra for atom coordinates.
+
+        :param verbose: bool default False
+            warn about missing N, Ca, C backbone atoms.
+        """
+        # on entry we have all Biopython Atoms loaded
+        if not self.ak_set:
+            return  # so give up if no atoms loaded for this residue
+
+        sN, sCA, sC = self.rak("N"), self.rak("CA"), self.rak("C")
+        if self.lc != "G":
+            sCB = self.rak("CB")
+
+        # first __init__ di/hedra, AtomKey objects and atom_coords for di/hedra
+        # which extend into next residue.
+
+        if 0 < len(self.rnext) and self.rnext[0].ak_set:
+            # atom_coords, hedra and dihedra for backbone dihedra
+            # which reach into next residue
+            for rn in self.rnext:
+                nN, nCA, nC = rn.rak("N"), rn.rak("CA"), rn.rak("C")
+
+                nextNCaC = rn._split_akl((nN, nCA, nC), missingOK=True)
+
+                for tpl in nextNCaC:
+                    for ak in tpl:
+                        if ak in rn.atom_coords:
+                            self.atom_coords[ak] = rn.atom_coords[ak]
+                            self.ak_set.add(ak)
+                        else:
+                            for rn_ak in rn.atom_coords.keys():
+                                if rn_ak.altloc_match(ak):
+                                    self.atom_coords[rn_ak] = rn.atom_coords[rn_ak]
+                                    self.ak_set.add(rn_ak)
+
+                self._gen_edra((sN, sCA, sC, nN))  # psi
+                self._gen_edra((sCA, sC, nN, nCA))  # omega i+1
+                self._gen_edra((sC, nN, nCA, nC))  # phi i+1
+                self._gen_edra((sCA, sC, nN))
+                self._gen_edra((sC, nN, nCA))
+                self._gen_edra((nN, nCA, nC))  # tau i+1
+
+                # redundant next residue C-beta locator (alternate CB path)
+                # otherwise missing O will cause no sidechain
+                # not rn.rak so don't trigger missing CB for Gly
+                nCB = rn.akc.get("CB", None)
+                if nCB is not None and nCB in rn.atom_coords:
+                    self.atom_coords[nCB] = rn.atom_coords[nCB]
+                    self.ak_set.add(nCB)
+                    self._gen_edra((nN, nCA, nCB))
+                    self._gen_edra((sC, nN, nCA, nCB))
+
+        # if start of chain then need to __init__ NCaC hedron as not in previous residue
+        if 0 == len(self.rprev):
+            self._gen_edra((sN, sCA, sC))
+
+        # now __init__ di/hedra for standard backbone atoms independent of neighbours
+        backbone = ic_data_backbone
+        for edra in backbone:
+            # only need to build if this residue has all the atoms in the edra
+            if all(atm in self.akc for atm in edra):
+                r_edra = [self.rak(atom) for atom in edra]
+                self._gen_edra(r_edra)  # [4] is label on some table entries
+
+        # next __init__ sidechain di/hedra
+        if self.lc is not None:
+            sidechain = ic_data_sidechains.get(self.lc, [])
+            for edraLong in sidechain:
+                edra = edraLong[0:4]  # [4] is label on some sidechain table entries
+                # lots of H di/hedra can be avoided if don't have those atoms
+                if all(atm in self.akc for atm in edra):
+                    r_edra = [self.rak(atom) for atom in edra]
+                    self._gen_edra(r_edra)
+            if IC_Residue.AllBonds:  # openscad output needs all bond cylinders explicit
+                sidechain = ic_data_sidechain_extras.get(self.lc, [])
+                for edra in sidechain:
+                    # test less useful here but avoids populating rak cache if possible
+                    if all(atm in self.akc for atm in edra):
+                        r_edra = [self.rak(atom) for atom in edra]
+                        self._gen_edra(r_edra)
+
+        # final processing of all dihedra just generated
+        self.link_dihedra(verbose)
+
+        # now do the actual work computing di/hedra values from atom coordinates
+        # -> updated to process at chain level  (vectorized)
+        #
+        # for d in self.dihedra.values():
+        #    # populate values and hedra for dihedron objects
+        #    d.dihedron_from_atoms()
+        # for h in self.hedra.values():
+        #    # miss redundant hedra above, needed for some chi1 angles
+        #    # also miss if missing atoms means hedron not in dihedra
+        #    if h.atoms_needs_update:
+        #        h.hedron_from_atoms(self.atom_coords)
+
+        # create di/hedra for gly Cbeta if needed, populate values later
+        if self.gly_Cbeta and "G" == self.lc:  # and self.atom_coords[sCB] is None:
+            # add C-beta for Gly
+
+            self.ak_set.add(AtomKey(self, "CB"))
+            sCB = self.rak("CB")
+            sCB.missing = False  # was True because akc cache did not have entry
+
+            # self.atom_coords[sCB] = None
+
+            # main orientation comes from O-C-Ca-Cb so make Cb-Ca-C hedron
+            sO = self.rak("O")
+            htpl = (sCB, sCA, sC)
+            self._gen_edra(htpl)
+            # values generated in build_glyCB
+            # h = self.hedra[htpl]
+            # h.lal[2] = self.hedra[(sCA, sC, sO)].lal[0]  # CaCO len12 -> len23
+            # h.lal[1] = 110.17513  # angle
+            # h.lal[0] = Ca_Cb_Len  # len12
+
+            # generate dihedral based on N-Ca-C-O offset from db query above
+            dtpl = (sO, sC, sCA, sCB)
+            self._gen_edra(dtpl)
+            d = self.dihedra[dtpl]
+            d.ic_residue = self
+            d.set_hedra()
+
+            self.link_dihedra(verbose)  # re-run for new dihedra
+
+        if verbose:
+            # oAtom =
+            self.rak("O")  # trigger missing flag if needed
+            missing = []
+            for akk, akv in self.akc.items():
+                if isinstance(akk, str) and akv.missing:
+                    missing.append(akv)
+            if missing:
+                chn = self.residue.parent
+                chn_id = chn.id
+                chn_len = len(chn.internal_coord.ordered_aa_ic_list)
+                print(f"chain {chn_id} len {chn_len} missing atom(s): {missing}")
+
+    def build_glyCB(self, gCBd: "Dihedron"):
+        """Populate values for Gly C-beta, rest of chain complete.
+
+        Data averaged from Sep 2019 Dunbrack cullpdb_pc20_res2.2_R1.0
+        restricted to structures with amide protons.
+
+        Ala avg rotation of OCCACB from NCACO query:
+        select avg(g.rslt) as avg_rslt, stddev(g.rslt) as sd_rslt, count(*)
+        from
+        (select f.d1d, f.d2d,
+        (case when f.rslt > 0 then f.rslt-360.0 else f.rslt end) as rslt
+        from (select d1.angle as d1d, d2.angle as d2d,
+        (d2.angle - d1.angle) as rslt from dihedron d1,
+        dihedron d2 where d1.rdh_class='AOACACAACB' and
+        d2.rdh_class='ANACAACAO' and d1.pdb=d2.pdb and d1.chn=d2.chn
+        and d1.res=d2.res) as f) as g
+
+        | avg_rslt          | sd_rslt          | count   |
+        | -122.682194862932 | 5.04403040513919 | 14098   |
+
+        """
+        Ca_Cb_Len = 1.53363
+        if hasattr(self, "scale"):  # used for openscad output
+            Ca_Cb_Len *= self.scale  # type: ignore
+
+        sN, sCA, sC, sO = self.rak("N"), self.rak("CA"), self.rak("C"), self.rak("O")
+
+        # generated dihedron is O-Ca-C-Cb
+        # hedron2 is reversed: Cb-Ca-C (also h1 reversed: C-Ca-O)
+        h2 = gCBd.hedron2
+        h2.lal[:] = (Ca_Cb_Len, 110.17513, self.hedra[(sCA, sC, sO)].lal[0])
+
+        refval = self.dihedra.get((sN, sCA, sC, sO), None)
+        if refval:
+            gCBd.angle = 122.68219 + refval.angle
+            if gCBd.angle > 180.0:
+                gCBd.angle -= 360.0
+        else:
+            gCBd.angle = 120
+
+    @staticmethod
+    def _pdb_atom_string(atm: Atom) -> str:
+        """Generate PDB ATOM record.
+
+        :param atm: Biopython Atom object reference
+        """
+        if 2 == atm.is_disordered():
+            s = ""
+            for a in atm.child_dict.values():
+                s += IC_Residue._pdb_atom_string(a)
+            return s
+        else:
+            res = atm.parent
+            chn = res.parent
+            s = (
+                "{:6}{:5d} {:4}{:1}{:3} {:1}{:4}{:1}   {:8.3f}{:8.3f}{:8.3f}"
+                "{:6.2f}{:6.2f}        {:>4}\n"
+            ).format(
+                "ATOM",
+                atm.serial_number,
+                atm.fullname,
+                atm.altloc,
+                res.resname,
+                chn.id,
+                res.id[1],
+                res.id[2],
+                atm.coord[0],
+                atm.coord[1],
+                atm.coord[2],
+                atm.occupancy,
+                atm.bfactor,
+                atm.element,
+            )
+            # print(s)
+        return s
+
+    @staticmethod
+    def _residue_string(res: "Residue") -> str:
+        """Generate PIC Residue string.
+
+        Enough to create Biopython Residue object without actual Atoms.
+
+        :param res: Biopython Residue object reference
+        """
+        segid = res.get_segid()
+        if segid.isspace() or "" == segid:
+            segid = ""
+        else:
+            segid = " [" + segid + "]"
+        return str(res.get_full_id()) + " " + res.resname + segid + "\n"
+
+    def _write_pic_bfac(self, atm: Atom, s: str, col: int) -> Tuple[str, int]:
+        ak = self.rak(atm)
+        if 0 == col % 5:
+            s += "BFAC:"
+        s += " " + ak.id + " " + f"{atm.get_bfactor():6.2f}"
+        col += 1
+        if 0 == col % 5:
+            s += "\n"
+        return s, col
+
+    def write_PIC(self, pdbid: str = "0PDB", chainid: str = "A", s: str = "") -> str:
+        """Write PIC format lines for this residue.
+
+        :param str pdbid: PDB idcode string; default 0PDB
+        :param str chainid: PDB Chain ID character; default A
+        :param str s: result string to add to
+        """
+        if pdbid is None:
+            pdbid = "0PDB"
+        if chainid is None:
+            chainid = "A"
+        s += IC_Residue._residue_string(self.residue)
+
+        if (
+            0 == len(self.rprev)
+            and hasattr(self, "NCaCKey")
+            and self.NCaCKey is not None
+        ):
+            NCaChedron = self.pick_angle(self.NCaCKey[0])  # first tau
+            if NCaChedron is not None:
+                try:
+                    ts = IC_Residue._pdb_atom_string(self.residue["N"])
+                    ts += IC_Residue._pdb_atom_string(self.residue["CA"])
+                    ts += IC_Residue._pdb_atom_string(self.residue["C"])
+                    s += ts  # only if no exception: have all 3 atoms
+                except KeyError:
+                    pass
+
+        base = pdbid + " " + chainid + " "
+        for h in sorted(self.hedra.values()):
+            try:
+                s += (
+                    base
+                    + h.id
+                    + " "
+                    + "{:9.5f} {:9.5f} {:9.5f}".format(
+                        set_accuracy_95(h.lal[0]),  # len12
+                        set_accuracy_95(h.lal[1]),  # angle
+                        set_accuracy_95(h.lal[2]),  # len23
+                    )
+                )
+            except KeyError:
+                pass
+            s += "\n"
+        for d in sorted(self.dihedra.values()):
+            try:
+                s += base + d.id + " " + "{:9.5f}".format(set_accuracy_95(d.angle))
+            except KeyError:
+                pass
+            s += "\n"
+
+        col = 0
+        for a in sorted(self.residue.get_atoms()):
+            if 2 == a.is_disordered():  # hasattr(a, 'child_dict'):
+                if self.alt_ids is None:
+                    s, col = self._write_pic_bfac(a.selected_child, s, col)
+                else:
+                    for atm in a.child_dict.values():
+                        s, col = self._write_pic_bfac(atm, s, col)
+            else:
+                s, col = self._write_pic_bfac(a, s, col)
+        if 0 != col % 5:
+            s += "\n"
+
+        return s
+
+    def coords_to_residue(self, rnext: bool = False) -> None:
+        """Convert self.atom_coords to biopython Residue Atom coords.
+
+        Copy homogeneous IC_Residue atom_coords to self.residue cartesian
+        Biopython Atom coords.
+
+        :param rnext: bool default False
+            next IC_Residue has no atom coords due to missing atoms, so try to
+            populate with any available coords calculated for this residue
+            di/hedra extending into next
+        """
+        if rnext:
+            respos, icode = self.rnext[0].residue.id[1:3]
+        else:
+            respos, icode = self.residue.id[1:3]
+        respos = str(respos)
+        spNdx, icNdx, resnNdx, atmNdx, altlocNdx, occNdx = AtomKey.fields
+
+        if rnext:
+            Res = self.rnext[0].residue
+        else:
+            Res = self.residue
+        ndx = Res.parent.internal_coord.ndx
+
+        for ak in sorted(self.atom_coords):
+            # print(ak)
+            if respos == ak.akl[spNdx] and (
+                (icode == " " and ak.akl[icNdx] is None) or icode == ak.akl[icNdx]
+            ):
+
+                ac = self.atom_coords[ak]
+                atm_coords = ac[:3]
+                akl = ak.akl
+                atm, altloc = akl[atmNdx], akl[altlocNdx]
+
+                Atm = None
+                newAtom = None
+
+                if Res.has_id(atm):
+                    Atm = Res[atm]
+
+                if Atm is None or (
+                    2 == Atm.is_disordered() and not Atm.disordered_has_id(altloc)
+                ):
+                    # print('new', ak)
+                    occ = akl[occNdx]
+                    aloc = akl[altlocNdx]
+                    bfac = self.bfactors.get(ak.id, None)
+                    newAtom = Atom(
+                        atm,
+                        atm_coords,
+                        (0.0 if bfac is None else bfac),
+                        (1.00 if occ is None else float(occ)),
+                        (" " if aloc is None else aloc),
+                        atm,
+                        ndx,
+                        atm[0],
+                    )
+                    ndx += 1
+                    if Atm is None:
+                        if altloc is None:
+                            Res.add(newAtom)
+                        else:
+                            disordered_atom = DisorderedAtom(atm)
+                            Res.add(disordered_atom)
+                            disordered_atom.disordered_add(newAtom)
+                            Res.flag_disordered()
+                    else:
+                        Atm.disordered_add(newAtom)
+                else:
+                    # Atm is not None, might be disordered with altloc
+                    # print('update', ak)
+                    if 2 == Atm.is_disordered() and Atm.disordered_has_id(altloc):
+                        Atm.disordered_select(altloc)
+                    Atm.set_coord(atm_coords)
+                    sn = Atm.get_serial_number()
+                    if sn is not None:
+                        ndx = sn + 1
+
+        Res.parent.internal_coord.ndx = ndx
+
+    def _get_ak_tuple(self, ak_str: str) -> Optional[Tuple["AtomKey", ...]]:
+        """Convert atom pair string to AtomKey tuple.
+
+        :param ak_str: str
+            Two atom names separated by ':', e.g. 'N:CA'
+            Optional position specifier relative to self,
+            e.g. '-1C:N' for preceding peptide bond.
+        """
+        AK = AtomKey
+        S = self
+        angle_key2 = []
+        akstr_list = ak_str.split(":")
+        lenInput = len(akstr_list)
+        for a in akstr_list:
+            m = self.relative_atom_re.match(a)
+            if m:
+                if m.group(1) == "-1":
+                    if 0 < len(S.rprev):
+                        angle_key2.append(AK(S.rprev[0], m.group(2)))
+                elif m.group(1) == "1":
+                    if 0 < len(S.rnext):
+                        angle_key2.append(AK(S.rnext[0], m.group(2)))
+                elif m.group(1) == "0":
+                    angle_key2.append(self.rak(m.group(2)))
+            else:
+                angle_key2.append(self.rak(a))
+        if len(angle_key2) != lenInput:
+            return None
+        return tuple(angle_key2)
+
+    relative_atom_re = re.compile(r"^(-?[10])([A-Z]+)$")
+
+    def _get_angle_for_tuple(
+        self, angle_key: EKT
+    ) -> Optional[Union["Hedron", "Dihedron"]]:
+        len_mkey = len(angle_key)
+        rval: Optional[Union["Hedron", "Dihedron"]]
+        if 4 == len_mkey:
+            rval = self.dihedra.get(cast(DKT, angle_key), None)
+        elif 3 == len_mkey:
+            rval = self.hedra.get(cast(HKT, angle_key), None)
+        else:
+            return None
+        return rval
+
+    def pick_angle(
+        self, angle_key: Union[EKT, str]
+    ) -> Optional[Union["Hedron", "Dihedron"]]:
+        """Get Hedron or Dihedron for angle_key.
+
+        :param angle_key:
+            - tuple of 3 or 4 AtomKeys
+            - string of atom names ('CA') separated by :'s
+            - string of [-1, 0, 1]<atom name> separated by ':'s. -1 is
+              previous residue, 0 is this residue, 1 is next residue
+            - psi, phi, omg, omega, chi1, chi2, chi3, chi4, chi5
+            - tau (N-CA-C angle) see Richardson1981
+            - except for tuples of AtomKeys, no option to access alternate disordered atoms
+
+        Observe that a residue's phi and omega dihedrals, as well as the hedra
+        comprising them (including the N:Ca:C tau hedron), are stored in the
+        n-1 di/hedra sets; this is handled here, but may be an issue if accessing
+        directly.
+
+        The following are equivalent (except for sidechains with non-carbon
+        atoms for chi2)::
+
+            ric = r.internal_coord
+            print(
+                r,
+                ric.get_angle("psi"),
+                ric.get_angle("phi"),
+                ric.get_angle("omg"),
+                ric.get_angle("tau"),
+                ric.get_angle("chi2"),
+            )
+            print(
+                r,
+                ric.get_angle("N:CA:C:1N"),
+                ric.get_angle("-1C:N:CA:C"),
+                ric.get_angle("-1CA:-1C:N:CA"),
+                ric.get_angle("N:CA:C"),
+                ric.get_angle("CA:CB:CG:CD"),
+            )
+
+        See ic_data.py for detail of atoms in the enumerated sidechain angles
+        and the backbone angles which do not span the peptide bond. Using 's'
+        for current residue ('self') and 'n' for next residue, the spanning
+        angles are::
+
+                (sN, sCA, sC, nN)   # psi
+                (sCA, sC, nN, nCA)  # omega i+1
+                (sC, nN, nCA, nC)   # phi i+1
+                (sCA, sC, nN)
+                (sC, nN, nCA)
+                (nN, nCA, nC)       # tau i+1
+
+        :return: Matching Hedron, Dihedron, or None.
+        """
+        rval: Optional[Union["Hedron", "Dihedron"]] = None
+        if isinstance(angle_key, tuple):
+            rval = self._get_angle_for_tuple(angle_key)
+            if rval is None and self.rprev:
+                rval = self.rprev[0]._get_angle_for_tuple(angle_key)
+        elif ":" in angle_key:
+            angle_key = cast(EKT, self._get_ak_tuple(cast(str, angle_key)))
+            if angle_key is None:
+                return None
+            rval = self._get_angle_for_tuple(angle_key)
+            if rval is None and self.rprev:
+                rval = self.rprev[0]._get_angle_for_tuple(angle_key)
+        elif "psi" == angle_key:
+            if 0 == len(self.rnext):
+                return None
+            rn = self.rnext[0]
+            sN, sCA, sC = self.rak("N"), self.rak("CA"), self.rak("C")
+            nN = rn.rak("N")
+            rval = self.dihedra.get((sN, sCA, sC, nN), None)
+        elif "phi" == angle_key:
+            if 0 == len(self.rprev):
+                return None
+            rp = self.rprev[0]
+            pC, sN, sCA = rp.rak("C"), self.rak("N"), self.rak("CA")
+            sC = self.rak("C")
+            rval = rp.dihedra.get((pC, sN, sCA, sC), None)
+        elif "omg" == angle_key or "omega" == angle_key:
+            if 0 == len(self.rprev):
+                return None
+            rp = self.rprev[0]
+            pCA, pC, sN = rp.rak("CA"), rp.rak("C"), self.rak("N")
+            sCA = self.rak("CA")
+            rval = rp.dihedra.get((pCA, pC, sN, sCA), None)
+        elif "tau" == angle_key:
+            sN, sCA, sC = self.rak("N"), self.rak("CA"), self.rak("C")
+            rval = self.hedra.get((sN, sCA, sC), None)
+            if rval is None and 0 != len(self.rprev):
+                rp = self.rprev[0]  # tau in prev residue for all but first
+                rval = rp.hedra.get((sN, sCA, sC), None)
+        elif angle_key.startswith("chi"):
+            sclist = ic_data_sidechains.get(self.lc, None)
+            if sclist is None:
+                return None
+            for akl in sclist:
+                if 5 == len(akl):
+                    if akl[4] == angle_key:
+                        klst = [self.rak(a) for a in akl[0:4]]
+                        tklst = cast(DKT, tuple(klst))
+                        rval = self.dihedra.get(tklst, None)
+
+        return rval
+
+    def get_angle(self, angle_key: Union[EKT, str]) -> Optional[float]:
+        """Get dihedron or hedron angle for specified key.
+
+        See pick_angle() for key specifications.
+        """
+        edron = self.pick_angle(angle_key)
+        if edron:
+            return edron.angle
+        return None
+
+    def set_angle(self, angle_key: Union[EKT, str], v: float):
+        """Set dihedron or hedron angle for specified key.
+
+        See pick_angle() for key specifications.
+        """
+        rval = self.pick_angle(angle_key)
+        if rval is not None:
+            rval.angle = v
+
+    def pick_length(
+        self, ak_spec: Union[str, BKT]
+    ) -> Tuple[Optional[List["Hedron"]], Optional[BKT]]:
+        """Get list of hedra containing specified atom pair.
+
+        :param ak_spec:
+            - tuple of two AtomKeys
+            - string: two atom names separated by ':', e.g. 'N:CA' with
+              optional position specifier relative to self, e.g. '-1C:N' for
+              preceding peptide bond.
+
+        The following are equivalent::
+
+            ric = r.internal_coord
+            print(
+                r,
+                ric.get_length("0C:1N"),
+            )
+            print(
+                r,
+                None
+                if not ric.rnext
+                else ric.get_length((ric.rak("C"), ric.rnext[0].rak("N"))),
+            )
+
+        :return: list of hedra containing specified atom pair, tuple of atom keys
+        """
+        rlst: List[Hedron] = []
+        # if ":" in ak_spec:
+        if isinstance(ak_spec, str):
+            ak_spec = cast(BKT, self._get_ak_tuple(ak_spec))
+        if ak_spec is None:
+            return None, None
+        for hed_key, hed_val in self.hedra.items():
+            if all(ak in hed_key for ak in ak_spec):
+                rlst.append(hed_val)
+        return rlst, ak_spec
+
+    def get_length(self, ak_spec: Union[str, BKT]) -> Optional[float]:
+        """Get bond length for specified atom pair.
+
+        See pick_length() for ak_spec.
+        """
+        hed_lst, ak_spec2 = self.pick_length(ak_spec)
+        if hed_lst is None or ak_spec2 is None:
+            return None
+
+        for hed in hed_lst:
+            val = hed.get_length(ak_spec2)
+            if val is not None:
+                return val
+        return None
+
+    def set_length(self, ak_spec: Union[str, BKT], val: float) -> None:
+        """Set bond length for specified atom pair.
+
+        See pick_length() for ak_spec.
+        """
+        hed_lst, ak_spec2 = self.pick_length(ak_spec)
+        if hed_lst is not None and ak_spec2 is not None:
+            for hed in hed_lst:
+                hed.set_length(ak_spec2, val)
+
+    def applyMtx(self, mtx: numpy.array) -> None:
+        """Apply matrix to atom_coords for this IC_Residue."""
+        for ak, ac in self.atom_coords.items():
+            # self.atom_coords[ak] = mtx @ ac
+            self.atom_coords[ak] = mtx.dot(ac)
+
+
+class Edron:
+    """Base class for Hedron and Dihedron classes.
+
+    Supports rich comparison based on lists of AtomKeys.
+
+    Attributes
+    ----------
+    aks: tuple
+        3 (hedron) or 4 (dihedron) AtomKeys defining this di/hedron
+    id: str
+        ':'-joined string of AtomKeys for this di/hedron
+    needs_update: bool
+        indicates di/hedron local atom_coords do NOT reflect current di/hedron
+        angle and length values in hedron local coordinate space
+    dh_class: str
+        sequence of atoms (no position or residue) comprising di/hedron
+        for statistics
+    rdh_class: str
+        sequence of residue, atoms comprising di/hedron for statistics
+    edron_re: compiled regex (Class Attribute)
+        A compiled regular expression matching string IDs for Hedron
+        and Dihedron objects
+    cic: IC_Chain reference
+        Chain internal coords object containing this hedron
+
+    Methods
+    -------
+    gen_key([AtomKey, ...] or AtomKey, ...) (Static Method)
+        generate a ':'-joined string of AtomKey Ids
+    gen_acs(atom_coords)
+        generate tuple of atom coords for keys in self.aks
+    is_backbone()
+        Return True if all aks atoms are N, Ca, C or O
+
+    """
+
+    # regular expresion to capture hedron and dihedron specifications, as in
+    #  .pic files
+    edron_re = re.compile(
+        # pdbid and chain id
+        r"^(?P<pdbid>\w+)?\s(?P<chn>[\w|\s])?\s"
+        # 3 atom specifiers for hedron
+        r"(?P<a1>[\w\-\.]+):(?P<a2>[\w\-\.]+):(?P<a3>[\w\-\.]+)"
+        # 4th atom specifier for dihedron
+        r"(:(?P<a4>[\w\-\.]+))?"
+        r"\s+"
+        # len-angle-len for hedron
+        r"(((?P<len12>\S+)\s+(?P<angle>\S+)\s+(?P<len23>\S+)\s*$)|"
+        # dihedral angle for dihedron
+        r"((?P<dihedral>\S+)\s*$))"
+    )
+
+    @staticmethod
+    def gen_key(lst: Union[List[str], List["AtomKey"]]) -> str:
+        """Generate string of ':'-joined AtomKey strings from input.
+
+        :param lst: list of AtomKey objects or id strings
+        """
+        if isinstance(lst[0], str):
+            lst = cast(List[str], lst)
+            return ":".join(lst)
+        else:
+            lst = cast(List[AtomKey], lst)
+            return ":".join(ak.id for ak in lst)
+
+    def __init__(self, *args: Union[List["AtomKey"], EKT], **kwargs: str) -> None:
+        """Initialize Edron with sequence of AtomKeys.
+
+        Acceptable input:
+
+            [ AtomKey, ... ]  : list of AtomKeys
+            AtomKey, ...      : sequence of AtomKeys as args
+            {'a1': str, 'a2': str, ... }  : dict of AtomKeys as 'a1', 'a2' ...
+        """
+        aks: List[AtomKey] = []
+        for arg in args:
+            if isinstance(arg, list):
+                aks = arg
+            elif isinstance(arg, tuple):
+                aks = list(arg)
+            else:
+                if arg is not None:
+                    aks.append(arg)
+        if [] == aks and all(k in kwargs for k in ("a1", "a2", "a3")):
+            aks = [AtomKey(kwargs["a1"]), AtomKey(kwargs["a2"]), AtomKey(kwargs["a3"])]
+            if "a4" in kwargs and kwargs["a4"] is not None:
+                aks.append(AtomKey(kwargs["a4"]))
+
+        # if args are atom key strings instead of AtomKeys
+        # for i in range(len(aks)):
+        #    if not isinstance(aks[i], AtomKey):
+        #        aks[i] = AtomKey(aks[i])
+
+        self.aks = tuple(aks)
+        self.id = Edron.gen_key(aks)
+        self._hash = hash(self.aks)
+
+        # flag indicating that atom coordinates are up to date
+        # (do not need to be recalculated from angle and or length values)
+        self.needs_update = True
+
+        # no residue or position, just atoms
+        self.dh_class = ""
+        # same but residue specific
+        self.rdh_class = ""
+
+        # IC_Chain which contains this di/hedron
+        self.cic: IC_Chain
+
+        atmNdx = AtomKey.fields.atm
+        resNdx = AtomKey.fields.resname
+        for ak in aks:
+            akl = ak.akl
+            self.dh_class += akl[atmNdx]
+            self.rdh_class += akl[resNdx] + akl[atmNdx]
+
+    def gen_acs(
+        self, atom_coords: Dict["AtomKey", numpy.array]
+    ) -> Tuple[numpy.array, ...]:
+        """Generate tuple of atom coord arrays for keys in self.aks.
+
+        :param atom_coords: AtomKey dict of atom coords for residue
+        :raises: MissingAtomError any atoms in self.aks missing coordinates
+        """
+        aks = self.aks
+        acs: List[numpy.array] = []
+        estr = ""
+        for ak in aks:
+            ac = atom_coords[ak]
+            if ac is None:
+                estr += str(ak) + " "
+            else:
+                acs.append(ac)
+        if estr != "":
+            raise MissingAtomError("%s missing coordinates for %s" % (self, estr))
+        return tuple(acs)
+
+    def is_backbone(self) -> bool:
+        """Report True for contains only N, C, CA, O, H atoms."""
+        atmNdx = AtomKey.fields.atm
+        if all(
+            atm in ("N", "C", "CA", "O", "H")
+            for atm in (ak.akl[atmNdx] for ak in self.aks)
+        ):
+            return True
+        return False
+
+    def __repr__(self) -> str:
+        """Tuple of AtomKeys is default repr string."""
+        return str(self.aks)
+
+    def __hash__(self) -> int:
+        """Hash calculated at init from aks tuple."""
+        return self._hash
+
+    def _cmp(self, other: "Edron") -> Union[Tuple["AtomKey", "AtomKey"], bool]:
+        """Comparison function ranking self vs. other; False on equal."""
+        for ak_s, ak_o in zip(self.aks, other.aks):
+            if ak_s != ak_o:
+                return ak_s, ak_o
+        return False
+
+    def __eq__(self, other: object) -> bool:
+        """Test for equality."""
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        return self.id == other.id
+
+    def __ne__(self, other: object) -> bool:
+        """Test for inequality."""
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        return self.id != other.id
+
+    def __gt__(self, other: object) -> bool:
+        """Test greater than."""
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        rslt = self._cmp(other)
+        if rslt:
+            rslt = cast(Tuple[AtomKey, AtomKey], rslt)
+            return rslt[0] > rslt[1]
+        return False
+
+    def __ge__(self, other: object) -> bool:
+        """Test greater or equal."""
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        rslt = self._cmp(other)
+        if rslt:
+            rslt = cast(Tuple[AtomKey, AtomKey], rslt)
+            return rslt[0] >= rslt[1]
+        return True
+
+    def __lt__(self, other: object) -> bool:
+        """Test less than."""
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        rslt = self._cmp(other)
+        if rslt:
+            rslt = cast(Tuple[AtomKey, AtomKey], rslt)
+            return rslt[0] < rslt[1]
+        return False
+
+    def __le__(self, other: object) -> bool:
+        """Test less or equal."""
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        rslt = self._cmp(other)
+        if rslt:
+            rslt = cast(Tuple[AtomKey, AtomKey], rslt)
+            return rslt[0] <= rslt[1]
+        return True
+
+
+class Hedron(Edron):
+    """Class to represent three joined atoms forming a plane.
+
+    Contains atom coordinates in local coordinate space, central atom
+    at origin.  Stored in two orientations, with the 3rd (forward) or
+    first (reversed) atom on the +Z axis.
+
+    Attributes
+    ----------
+    lal: numpy array of len12, angle, len23
+        len12 = distance between 1st and 2nd atom
+        angle = angle (degrees) formed by 3 atoms
+        len23 = distance between 2nd and 3rd atoms
+
+    atoms: 3x4 numpy arrray (view on chain array)
+        3 homogeneous atoms comprising hedron, 1st on XZ, 2nd at origin, 3rd on +Z
+    atomsR: 3x4 numpy array (view on chain array)
+        atoms reversed, 1st on +Z, 2nd at origin, 3rd on XZ plane
+
+    Methods
+    -------
+    get_length()
+        get bond length for specified atom pair
+    set_length()
+        set bond length for specified atom pair
+    angle(), len12(), len23()
+        gettters and setters for relevant attributes (angle in degrees)
+    """
+
+    def __init__(self, *args: Union[List["AtomKey"], HKT], **kwargs: str) -> None:
+        """Initialize Hedron with sequence of AtomKeys, kwargs.
+
+        Acceptable input:
+            As for Edron, plus optional 'len12', 'angle', 'len23'
+            keyworded values.
+        """
+        super().__init__(*args, **kwargs)
+
+        # print('initialising', self.id)
+
+        # 3 matrices specifying hedron space coordinates of constituent atoms,
+        # initially atom3 on +Z axis
+        self.atoms: HACS
+        # 3 matrices, hedron space coordinates, reversed order
+        # initially atom1 on +Z axis
+        self.atomsR: HACS
+
+        if "len12" in kwargs:
+            self.lal = numpy.array(
+                (
+                    float(kwargs["len12"]),
+                    float(kwargs["angle"]),
+                    float(kwargs["len23"]),
+                )
+            )
+        else:
+            self.lal = numpy.zeros(3)
+
+        # else:
+        #    self.len12 = None
+        #    self.angle = None
+        #    self.len23 = None
+
+        # print(self)
+
+    def __repr__(self) -> str:
+        """Print string for Hedron object."""
+        return f"3-{self.id} {self.rdh_class} {str(self.lal[0])} {str(self.lal[1])} {str(self.lal[2])}"
+
+    @property
+    def angle(self) -> float:
+        """Get this hedron angle."""
+        try:
+            return self.lal[1]  # _angle
+        except AttributeError:
+            return 0.0
+
+    @angle.setter
+    def angle(self, angle_deg) -> None:
+        """Set this hedron angle; sets needs_update."""
+        self.lal[1] = angle_deg  # view on chain numpy arrays
+        self.cic.hAtoms_needs_update[self.cic.hedraNdx[self.aks]] = True
+
+    @property
+    def len12(self):
+        """Get first length for Hedron."""
+        try:
+            return self.lal[0]  # _len12
+        except AttributeError:
+            return 0.0
+
+    @len12.setter
+    def len12(self, len):
+        """Set first length for Hedron; sets needs_update."""
+        self.lal[0]  # _len12 = len
+        self.cic.hAtoms_needs_update[self.cic.hedraNdx[self.aks]] = True
+
+    @property
+    def len23(self) -> float:
+        """Get second length for Hedron."""
+        try:
+            return self.lal[2]  # _len23
+        except AttributeError:
+            return 0.0
+
+    @len23.setter
+    def len23(self, len):
+        """Set second length for Hedron; sets needs_update."""
+        self.lal[2] = len
+        self.cic.hAtoms_needs_update[self.cic.hedraNdx[self.aks]] = True
+
+    def get_length(self, ak_tpl: BKT) -> Optional[float]:
+        """Get bond length for specified atom pair.
+
+        :param ak_tpl: tuple of AtomKeys
+            pair of atoms in this Hedron
+        """
+        if 2 > len(ak_tpl):
+            return None
+        if all(ak in self.aks[:2] for ak in ak_tpl):
+            return self.lal[0]  # len12
+        if all(ak in self.aks[1:] for ak in ak_tpl):
+            return self.lal[2]  # len23
+        return None
+
+    def set_length(self, ak_tpl: BKT, newLength: float):
+        """Set bond length for specified atom pair; sets needs_update.
+
+        :param ak_tpl: tuple of AtomKeys
+            pair of atoms in this Hedron
+        """
+        if 2 > len(ak_tpl):
+            raise TypeError("Require exactly 2 AtomKeys: %s" % (str(ak_tpl)))
+        elif all(ak in self.aks[:2] for ak in ak_tpl):
+            self.lal[0] = newLength  # len12
+        elif all(ak in self.aks[1:] for ak in ak_tpl):
+            self.lal[2] = newLength  # len23
+        else:
+            raise TypeError("%s not found in %s" % (str(ak_tpl), self))
+
+        self.cic.hAtoms_needs_update[self.cic.hedraNdx[self.aks]] = True
+
+
+class Dihedron(Edron):
+    """Class to represent four joined atoms forming a dihedral angle.
+
+    Attributes
+    ----------
+    angle: float
+        Measurement or specification of dihedral angle in degrees
+    hedron1, hedron2: Hedron object references
+        The two hedra which form the dihedral angle
+    h1key, h2key: tuples of AtomKeys
+        Hash keys for hedron1 and hedron2
+    id3,id32: tuples of AtomKeys
+        First 3 and second 3 atoms comprising dihedron; hxkey orders may differ
+    initial_coords: tuple[4] of numpy arrays [4]
+        Local atom coords for 4 atoms, [0] on XZ plane, [1] at origin,
+        [2] on +Z, [3] rotated by dihedral
+    a4_pre_rotation: numpy array [4]
+        4th atom of dihedral aligned to XZ plane (angle not applied)
+    ic_residue: IC_Residue object reference
+        IC_Residue object containing this dihedral
+    reverse: bool
+        Indicates order of atoms in dihedron is reversed from order of atoms
+        in hedra (configured by set_hedra())
+    cst, rcst: numpy array [4][4]
+        transforms to and from coordinate space defined by first hedron.
+        set by IC_Residue.assemble().  defined by id3 order NOT h1key order
+        (atoms may be reversed between these two)
+
+    Methods
+    -------
+    set_hedra()
+        work out hedra keys and orientation for this dihedron
+    angle()
+        getter/setter for dihdral angle in degrees
+
+    """
+
+    def __init__(self, *args: Union[List["AtomKey"], DKT], **kwargs: str) -> None:
+        """Initialize Dihedron with sequence of AtomKeys and optional dihedral angle.
+
+        Acceptable input:
+            As for Edron, plus optional 'dihedral' keyworded angle value.
+        """
+        super().__init__(*args, **kwargs)
+
+        # hedra making up this dihedron; set by self:set_hedra()
+        self.hedron1: Hedron  # = None
+        self.hedron2: Hedron  # = None
+
+        self.h1key: HKT  # = None
+        self.h2key: HKT  # = None
+
+        self.id3: HKT = cast(HKT, tuple(self.aks[0:3]))
+        self.id32: HKT = cast(HKT, tuple(self.aks[1:4]))
+
+        # 4 matrices specifying hedron space coordinates of constituent atoms,
+        # in this space atom 3 is on on +Z axis
+        # see coord_space()
+        self.initial_coords: DACS
+        self.a4_pre_rotation: numpy.array
+
+        # IC_Residue object which includes this dihedron;
+        # set by Residue:linkDihedra()
+        self.ic_residue: IC_Residue
+        # order of atoms in dihedron is reversed from order of atoms in hedra
+        self.reverse = False
+
+        # coordinate space transform matrices
+        # defined by id3 order NOT h1key order (may be reversed)
+        # self.cst = None  # protein coords to 1st hedron coord space
+        # self.rcst = None  # reverse = 1st hedron coords back to protein coords
+
+        if "dihedral" in kwargs:
+            self.angle = float(kwargs["dihedral"])
+
+    def __repr__(self) -> str:
+        """Print string for Dihedron object."""
+        return f"4-{str(self.id)} {self.rdh_class} {str(self.angle)} {str(self.ic_residue)}"
+
+    @staticmethod
+    def _get_hedron(ic_res: IC_Residue, id3: HKT) -> Optional[Hedron]:
+        """Find specified hedron on this residue or its adjacent neighbors."""
+        hedron = ic_res.hedra.get(id3, None)
+        if not hedron and 0 < len(ic_res.rprev):
+            for rp in ic_res.rprev:
+                hedron = rp.hedra.get(id3, None)
+                if hedron is not None:
+                    break
+        if not hedron and 0 < len(ic_res.rnext):
+            for rn in ic_res.rnext:
+                hedron = rn.hedra.get(id3, None)
+                if hedron is not None:
+                    break
+        return hedron
+
+    def set_hedra(self) -> Tuple[bool, Hedron, Hedron]:
+        """Work out hedra keys and set rev flag."""
+        try:
+            return self.rev, self.hedron1, self.hedron2
+        except AttributeError:
+            pass
+
+        rev = False
+        res = self.ic_residue
+        h1key = self.id3
+        hedron1 = Dihedron._get_hedron(res, h1key)
+        if not hedron1:
+            rev = True
+            h1key = cast(HKT, tuple(self.aks[2::-1]))
+            hedron1 = Dihedron._get_hedron(res, h1key)
+            h2key = cast(HKT, tuple(self.aks[3:0:-1]))
+        else:
+            h2key = self.id32
+
+        if not hedron1:
+            raise HedronMatchError(
+                "can't find 1st hedron for key %s dihedron %s" % (h1key, self)
+            )
+
+        hedron2 = Dihedron._get_hedron(res, h2key)
+
+        if not hedron2:
+            raise HedronMatchError(
+                "can't find 2nd hedron for key %s dihedron %s" % (h2key, self)
+            )
+
+        self.hedron1 = hedron1
+        self.h1key = h1key
+        self.hedron2 = hedron2
+        self.h2key = h2key
+
+        self.reverse = rev
+
+        return rev, hedron1, hedron2
+
+    @property
+    def angle(self) -> float:
+        """Get dihedral angle."""
+        try:
+            return self.cic.dihedraIC[self.cic.dihedraNdx[self.aks]]
+        except AttributeError:
+            try:
+                return self._dihedral
+            except AttributeError:
+                return 360.0  # error value without type hint hassles
+
+    @angle.setter
+    def angle(self, dangle_deg: float) -> None:
+        """Save new dihedral angle; sets needs_update.
+
+        faster to modify IC_Chain level arrays directly.
+
+        N.B. dihedron (i-1)C-N-CA-CB is ignored if O exists.
+        C-beta is by default placed using O-C-CA-CB, but O is missing
+        in some PDB file residues, which means the sidechain cannot be
+        placed.  The alternate CB path (i-1)C-N-CA-CB is provided to
+        circumvent this, but if this is needed then it must be adjusted in
+        conjunction with PHI ((i-1)C-N-CA-C) as they overlap.
+
+        :param dangle_deg: float new dihedral angle in degrees
+        """
+        self._dihedral = dangle_deg
+        self.needs_update = True
+        try:
+            dndx = self.cic.dihedraNdx[self.aks]
+            self.cic.dihedraIC[dndx] = dangle_deg
+            self.cic.dihedraICr[dndx] = numpy.deg2rad(dangle_deg)
+            self.cic.dAtoms_needs_update[dndx] = True
+        except AttributeError:
+            pass
+
+
+class AtomKey:
+    """Class for dict keys to reference atom coordinates.
+
+    AtomKeys capture residue and disorder information together, and
+    provide a no-whitespace string key for .pic files.
+
+    Supports rich comparison and multiple ways to instantiate.
+
+    AtomKeys contain:
+     residue position, insertion code, 1 or 3 char residue name,
+     atom name, altloc, and occupancy
+
+    Attributes
+    ----------
+    akl: tuple
+        All six fields of AtomKey
+    fieldNames: tuple (Class Attribute)
+        Mapping of key index positions to names
+    fields: namedtuple (Class Attribute)
+        Mapping of field names to index positions
+    id: str
+        '_'-joined AtomKey fields, excluding 'None' fields
+    atom_re: compiled regex (Class Attribute)
+        A compiled regular expression matching the string form of the key
+    endnum_re: compiled regex (Class Attribute)
+        A compiled regular expresion capturing digits at end of a string
+    d2h: bool (Class Attribute)
+        Convert D atoms to H on input; must also modify IC_Residue.accept_atoms
+    missing: bool default False
+        AtomKey __init__'d from string is probably missing, set this flag to
+        note the issue (not set here)
+
+    Methods
+    -------
+    altloc_match(other)
+        Returns True if this AtomKey matches other AtomKey excluding altloc
+        and occupancy fields
+
+    """
+
+    atom_re = re.compile(
+        r"^(?P<respos>-?\d+)(?P<icode>[A-Za-z])?"
+        r"_(?P<resname>[a-zA-Z]+)_(?P<atm>[A-Za-z0-9]+)"
+        r"(?:_(?P<altloc>\w))?(?:_(?P<occ>-?\d\.\d?\d?))?$"
+    )
+
+    endnum_re = re.compile(r"\D+(\d+)$")
+
+    # PDB altLoc = Character = [\w ] (any non-ctrl ASCII incl space)
+    # PDB iCode = AChar = [A-Za-z]
+
+    fieldNames = ("respos", "icode", "resname", "atm", "altloc", "occ")
+    fieldsDef = namedtuple(
+        "fieldsDef", ["respos", "icode", "resname", "atm", "altloc", "occ"]
+    )
+    fields = fieldsDef(0, 1, 2, 3, 4, 5)
+
+    d2h = False  # convert D Deuterium to H Hydrogen on input
+    # icd = {"icr": 0, "atm": 0, "lst": 0, "dct": 0, "_": 0, "else": 0}
+
+    def __init__(
+        self, *args: Union[IC_Residue, Atom, List, Dict, str], **kwargs: str
+    ) -> None:
+        """Initialize AtomKey with residue and atom data.
+
+        Examples of acceptable input:
+            (<IC_Residue>, 'CA', ...)    : IC_Residue with atom info
+            (<IC_Residue>, <Atom>)       : IC_Residue with Biopython Atom
+            ([52, None, 'G', 'CA', ...])  : list of ordered data fields
+            (52, None, 'G', 'CA', ...)    : multiple ordered arguments
+            ({respos: 52, icode: None, atm: 'CA', ...}) : dict with fieldNames
+            (respos: 52, icode: None, atm: 'CA', ...) : kwargs with fieldNames
+            52_G_CA, 52B_G_CA, 52_G_CA_0.33, 52_G_CA_B_0.33  : id strings
+        """
+        akl: List[Optional[str]] = []
+        # self.id = None
+        for arg in args:
+            if isinstance(arg, str):
+                if "_" in arg:
+                    # AtomKey.icd["_"] += 1
+                    # got atom key string, recurse with regex parse
+                    m = self.atom_re.match(arg)
+                    if m is not None:
+                        if akl != []:  # [] != akl:
+                            raise Exception(
+                                "Atom Key init full key not first argument: " + arg
+                            )
+                        # for fn in AtomKey.fieldNames:
+                        #    akl.append(m.group(fn))
+                        # akl = [m.group(fn) for fn in AtomKey.fieldNames]
+                        akl = list(map(m.group, AtomKey.fieldNames))
+                else:
+                    # AtomKey.icd["else"] += 1
+                    akl.append(arg)
+
+            elif isinstance(arg, IC_Residue):
+                # AtomKey.icd["icr"] += 1
+                if akl != []:
+                    raise Exception("Atom Key init Residue not first argument")
+                akl = list(arg.rbase)
+            elif isinstance(arg, Atom):
+                # AtomKey.icd["atm"] += 1
+                if 3 != len(akl):
+                    raise Exception("Atom Key init Atom before Residue info")
+                akl.append(arg.name)
+                altloc = arg.altloc
+                akl.append(altloc if altloc != " " else None)
+                occ = float(arg.occupancy)
+                akl.append(str(occ) if occ != 1.00 else None)
+            elif isinstance(arg, list):
+                # AtomKey.icd["lst"] += 1
+                akl += arg
+            elif isinstance(arg, dict):
+                # AtomKey.icd["dct"] += 1
+                for k in AtomKey.fieldNames:
+                    akl.append(arg.get(k, None))
+            else:
+                raise Exception("Atom Key init not recognised")
+
+        # process kwargs, initialize occ and altloc to None
+        # if not specified above
+        # for i in range(6):
+        #    if len(akl) <= i:
+        #        fld = kwargs.get(AtomKey.fieldNames[i])
+        #        if fld is not None:
+        #            akl.append(fld)
+
+        for i in range(len(akl), 6):
+            if len(akl) <= i:
+                fld = kwargs.get(AtomKey.fieldNames[i])
+                if fld is not None:
+                    akl.append(fld)
+
+        # tweak local akl to generate id string
+        if isinstance(akl[0], int):
+            akl[0] = str(akl[0])  # numeric residue position to string
+
+        # occNdx = AtomKey.fields.occ
+        # if akl[occNdx] is not None:
+        #    akl[occNdx] = str(akl[occNdx])  # numeric occupancy to string
+
+        if self.d2h:
+            atmNdx = AtomKey.fields.atm
+            if akl[atmNdx][0] == "D":
+                akl[atmNdx] = re.sub("D", "H", akl[atmNdx], count=1)
+
+            # unused option:
+            # (self.respos, self.icode, self.resname, self.atm, self.occ,
+            #    self.altloc) = akl
+
+        self.id = "_".join(
+            [
+                "".join(filter(None, akl[:2])),
+                str(akl[2]),  # exclude None
+                "_".join(filter(None, akl[3:])),
+            ]
+        )
+
+        # while len(akl) < 6:
+        #    akl.append(None)  # add no altloc, occ if not specified
+        akl += [None] * (6 - len(akl))
+
+        self.akl = tuple(akl)
+        self._hash = hash(self.akl)
+        self.missing = False
+
+    def __repr__(self) -> str:
+        """Repr string from id."""
+        return self.id
+
+    def __hash__(self) -> int:
+        """Hash calculated at init from akl tuple."""
+        return self._hash
+
+    _backbone_sort_keys = {"N": 0, "CA": 1, "C": 2, "O": 3}
+
+    _sidechain_sort_keys = {
+        "CB": 1,
+        "CG": 2,
+        "CG1": 2,
+        "OG": 2,
+        "OG1": 2,
+        "SG": 2,
+        "CG2": 3,
+        "CD": 4,
+        "CD1": 4,
+        "SD": 4,
+        "OD1": 4,
+        "ND1": 4,
+        "CD2": 5,
+        "ND2": 5,
+        "OD2": 5,
+        "CE": 6,
+        "NE": 6,
+        "CE1": 6,
+        "OE1": 6,
+        "NE1": 6,
+        "CE2": 7,
+        "OE2": 7,
+        "NE2": 7,
+        "CE3": 8,
+        "CZ": 9,
+        "CZ2": 9,
+        "NZ": 9,
+        "NH1": 10,
+        "OH": 10,
+        "CZ3": 10,
+        "CH2": 11,
+        "NH2": 11,
+        "OXT": 12,
+        "H": 13,
+    }
+
+    _greek_sort_keys = {"A": 0, "B": 1, "G": 2, "D": 3, "E": 4, "Z": 5, "H": 6}
+
+    def altloc_match(self, other: "AtomKey") -> bool:
+        """Test AtomKey match other discounting occupancy and altloc."""
+        if isinstance(other, type(self)):
+            return self.akl[:4] == other.akl[:4]
+        else:
+            return NotImplemented
+
+    def _cmp(self, other: "AtomKey") -> Tuple[int, int]:
+        """Comparison function ranking self vs. other."""
+        akl_s = self.akl
+        akl_o = other.akl
+        atmNdx = AtomKey.fields.atm
+        occNdx = AtomKey.fields.occ
+        rsNdx = AtomKey.fields.respos
+        # rsnNdx = AtomKey.fields.resname
+        for i in range(6):
+            s, o = akl_s[i], akl_o[i]
+            if s != o:
+                # insert_code, altloc can be None, deal with first
+                if s is None and o is not None:
+                    # no insert code before named insert code
+                    return 0, 1
+                elif o is None and s is not None:
+                    return 1, 0
+                # now we know s, o not None
+                s, o = cast(str, s), cast(str, o)
+
+                if atmNdx != i:
+                    # only sorting complications at atom level, occ.
+                    # otherwise respos, insertion code will trigger
+                    # before residue name
+                    if occNdx == i:
+                        oi = int(float(s) * 100)
+                        si = int(float(o) * 100)
+                        return si, oi  # swap so higher occupancy comes first
+                    elif rsNdx == i:
+                        return int(s), int(o)
+                    else:  # resname or altloc
+                        return ord(s), ord(o)
+
+                # atom names from here
+                # backbone atoms before sidechain atoms
+
+                sb = self._backbone_sort_keys.get(s, None)
+                ob = self._backbone_sort_keys.get(o, None)
+                if sb is not None and ob is not None:
+                    return sb, ob
+                elif sb is not None and ob is None:
+                    return 0, 1
+                elif sb is None and ob is not None:
+                    return 1, 0
+                # finished backbone and backbone vs. sidechain atoms
+
+                # sidechain vs sidechain, sidechain vs H
+                ss = self._sidechain_sort_keys.get(s, None)
+                os = self._sidechain_sort_keys.get(o, None)
+                if ss is not None and os is not None:
+                    return ss, os
+                elif ss is not None and os is None:
+                    return 0, 1
+                elif ss is None and os is not None:
+                    return 1, 0
+
+                # amide single 'H' captured above in sidechain sort
+                # now 'complex'' hydrogens after sidechain
+                s0, s1, o0, o1 = s[0], s[1], o[0], o[1]
+                s1d, o1d = s1.isdigit(), o1.isdigit()
+                # if "H" == s0 == o0: # breaks cython
+                if ("H" == s0) and ("H" == o0):
+
+                    if (s1 == o1) or (s1d and o1d):
+                        enmS = self.endnum_re.findall(s)
+                        enmO = self.endnum_re.findall(o)
+                        if (enmS != []) and (enmO != []):
+                            return int(enmS[0]), int(enmO[0])
+                        elif enmS == []:
+                            return 0, 1
+                        else:
+                            return 1, 0
+                    elif s1d:
+                        return 0, 1
+                    elif o1d:
+                        return 1, 0
+                    else:
+                        return (self._greek_sort_keys[s1], self._greek_sort_keys[o1])
+                return int(s), int(o)  # raise exception?
+        return 1, 1
+
+    def __ne__(self, other: object) -> bool:
+        """Test for inequality."""
+        if isinstance(other, type(self)):
+            return self.akl != other.akl
+        else:
+            return NotImplemented
+
+    def __eq__(self, other: object) -> bool:  # type: ignore
+        """Test for equality."""
+        if isinstance(other, type(self)):
+            return self.akl == other.akl
+        else:
+            return NotImplemented
+
+    def __gt__(self, other: object) -> bool:
+        """Test greater than."""
+        if isinstance(other, type(self)):
+            rslt = self._cmp(other)
+            return rslt[0] > rslt[1]
+        else:
+            return NotImplemented
+
+    def __ge__(self, other: object) -> bool:
+        """Test greater or equal."""
+        if isinstance(other, type(self)):
+            rslt = self._cmp(other)
+            return rslt[0] >= rslt[1]
+        else:
+            return NotImplemented
+
+    def __lt__(self, other: object) -> bool:
+        """Test less than."""
+        if isinstance(other, type(self)):
+            rslt = self._cmp(other)
+            return rslt[0] < rslt[1]
+        else:
+            return NotImplemented
+
+    def __le__(self, other: object) -> bool:
+        """Test less or equal."""
+        if isinstance(other, type(self)):
+            rslt = self._cmp(other)
+            return rslt[0] <= rslt[1]
+        else:
+            return NotImplemented
+
+
+def set_accuracy_95(num: float) -> float:
+    """Reduce floating point accuracy to 9.5 (xxxx.xxxxx).
+
+    Used by Hedron and Dihedron classes writing PIC and SCAD files.
+    :param float num: input number
+    :returns: float with specified accuracy
+    """
+    # return round(num, 5)  # much slower
+    return float(f"{num:9.5f}")
+
+
+# only used for writing PDB atoms so inline in
+# _pdb_atom_string(atm: Atom)
+# def set_accuracy_83(num: float) -> float:
+#    """Reduce floating point accuracy to 8.3 (xxxxx.xxx).
+#
+#    Used by IC_Residue class, matches PDB output format.
+#    :param float num: input number
+#    :returns: float with specified accuracy
+#    """
+#    return float("{:8.3f}".format(num))
+
+
+# internal coordinates construction Exceptions
+class HedronMatchError(Exception):
+    """Cannot find hedron in residue for given key."""
+
+    pass
+
+
+class MissingAtomError(Exception):
+    """Missing atom coordinates for hedron or dihedron."""
+
+    pass
diff --git a/code/lib/Bio/PDB/kdtrees.c b/code/lib/Bio/PDB/kdtrees.c
new file mode 100644
index 0000000..3f165cf
--- /dev/null
+++ b/code/lib/Bio/PDB/kdtrees.c
@@ -0,0 +1,1461 @@
+/* Copyright 2018-2020 by Michiel de Hoon.  All rights reserved.
+ *
+ * This file is part of the Biopython distribution and governed by your
+ * choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+ * Please see the LICENSE file that should have been included as part of this
+ * package.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <Python.h>
+
+#define INF 1000000
+
+#define DIM 3 /* three spatial dimensions */
+
+/* DataPoint */
+
+static int DataPoint_current_dim = 0;
+
+typedef struct
+{
+    long int _index;
+    double _coord[DIM];
+} DataPoint;
+
+static int compare(const void* self, const void* other)
+{
+    const DataPoint* p = self;
+    const DataPoint* q = other;
+    const double a = p->_coord[DataPoint_current_dim];
+    const double b = q->_coord[DataPoint_current_dim];
+    if (a < b) return -1;
+    if (a > b) return +1;
+    return 0;
+}
+
+static void DataPoint_sort(DataPoint* list, int n, int i)
+{
+    /* set sort dimension */
+    DataPoint_current_dim = i;
+    qsort(list, n, sizeof(DataPoint), compare);
+}
+
+/* Point */
+
+typedef struct {
+    PyObject_HEAD
+    long int index;
+    double radius;
+} Point;
+
+static int
+Point_init(Point *self, PyObject *args, PyObject *kwds)
+{
+    int index;
+    double radius = 0.0;
+    static char *kwlist[] = {"index", "radius", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "i|d", kwlist,
+                                     &index, &radius))
+        return -1;
+
+    self->index = index;
+    self->radius = radius;
+    return 0;
+}
+
+static PyObject*
+Point_repr(Point* self)
+{
+    return PyUnicode_FromFormat("%ld: %g", self->index, self->radius);
+}
+
+static char Point_index__doc__[] =
+"index";
+
+static PyObject*
+Point_getindex(Point* self, void* closure)
+{
+    return PyLong_FromLong(self->index);
+}
+
+static char Point_radius__doc__[] = "the radius";
+
+static PyObject*
+Point_getradius(Point* self, void* closure)
+{
+    const double value = self->radius;
+    return PyFloat_FromDouble(value);
+}
+
+static PyGetSetDef Point_getset[] = {
+    {"index", (getter)Point_getindex, NULL, Point_index__doc__, NULL},
+    {"radius", (getter)Point_getradius, NULL, Point_radius__doc__, NULL},
+    {NULL}  /* Sentinel */
+};
+
+static char Point_doc[] =
+"A single point; attributes are index and radius.\n";
+
+static PyTypeObject PointType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "Point",                   /* tp_name*/
+    sizeof(Point),             /* tp_basicsize*/
+    0,                         /* tp_itemsize*/
+    0,                         /* tp_dealloc*/
+    0,                         /* tp_print*/
+    0,                         /* tp_getattr*/
+    0,                         /* tp_setattr*/
+    0,                         /* tp_compare*/
+    (reprfunc)Point_repr,      /* tp_repr*/
+    0,                         /* tp_as_number*/
+    0,                         /* tp_as_sequence*/
+    0,                         /* tp_as_mapping*/
+    0,                         /* tp_hash */
+    0,                         /* tp_call*/
+    0,                         /* tp_str*/
+    0,                         /* tp_getattro*/
+    0,                         /* tp_setattro*/
+    0,                         /* tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT,        /* tp_flags*/
+    Point_doc,                 /* tp_doc */
+    0,                         /* tp_traverse */
+    0,                         /* tp_clear */
+    0,                         /* tp_richcompare */
+    0,                         /* tp_weaklistoffset */
+    0,                         /* tp_iter */
+    0,                         /* tp_iternext */
+    0,                         /* tp_methods */
+    0,                         /* tp_members */
+    Point_getset,              /* tp_getset */
+    0,                         /* tp_base */
+    0,                         /* tp_dict */
+    0,                         /* tp_descr_get */
+    0,                         /* tp_descr_set */
+    0,                         /* tp_dictoffset */
+    (initproc)Point_init,      /* tp_init */
+};
+
+/* Neighbor */
+
+typedef struct {
+    PyObject_HEAD
+    long int index1;
+    long int index2;
+    double radius;
+} Neighbor;
+
+static int
+Neighbor_init(Neighbor *self, PyObject *args, PyObject *kwds)
+{
+    int index1, index2;
+    double radius = 0.0;
+    static char *kwlist[] = {"index1", "index2", "radius", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "ii|d", kwlist,
+                                     &index1, &index2, &radius))
+        return -1;
+    self->index1 = index1;
+    self->index2 = index2;
+    self->radius = radius;
+
+    return 0;
+}
+
+static PyObject*
+Neighbor_repr(Neighbor* self)
+{
+    return PyUnicode_FromFormat("(%ld, %ld): %g",
+                                self->index1, self->index2, self->radius);
+}
+
+static char Neighbor_index1__doc__[] =
+"index of the first neighbor";
+
+static PyObject*
+Neighbor_getindex1(Neighbor* self, void* closure)
+{
+    return PyLong_FromLong(self->index1);
+}
+
+static char Neighbor_index2__doc__[] =
+"index of the second neighbor";
+
+static PyObject*
+Neighbor_getindex2(Neighbor* self, void* closure)
+{
+    return PyLong_FromLong(self->index2);
+}
+
+static char Neighbor_radius__doc__[] = "the radius";
+
+static PyObject*
+Neighbor_getradius(Neighbor* self, void* closure)
+{
+    const double value = self->radius;
+    return PyFloat_FromDouble(value);
+}
+
+static PyGetSetDef Neighbor_getset[] = {
+    {"index1", (getter)Neighbor_getindex1, NULL, Neighbor_index1__doc__, NULL},
+    {"index2", (getter)Neighbor_getindex2, NULL, Neighbor_index2__doc__, NULL},
+    {"radius", (getter)Neighbor_getradius, NULL, Neighbor_radius__doc__, NULL},
+    {NULL}  /* Sentinel */
+};
+
+static char Neighbor_doc[] =
+"A neighbor pair; attributes are index1, index2, and radius.\n";
+
+static PyTypeObject NeighborType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "Neighbor",                /* tp_name*/
+    sizeof(Neighbor),          /* tp_basicsize*/
+    0,                         /* tp_itemsize*/
+    0,                         /* tp_dealloc*/
+    0,                         /* tp_print*/
+    0,                         /* tp_getattr*/
+    0,                         /* tp_setattr*/
+    0,                         /* tp_compare*/
+    (reprfunc)Neighbor_repr,   /* tp_repr*/
+    0,                         /* tp_as_number*/
+    0,                         /* tp_as_sequence*/
+    0,                         /* tp_as_mapping*/
+    0,                         /* tp_hash */
+    0,                         /* tp_call*/
+    0,                         /* tp_str*/
+    0,                         /* tp_getattro*/
+    0,                         /* tp_setattro*/
+    0,                         /* tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT,        /* tp_flags*/
+    Neighbor_doc,              /* tp_doc */
+    0,                         /* tp_traverse */
+    0,                         /* tp_clear */
+    0,                         /* tp_richcompare */
+    0,                         /* tp_weaklistoffset */
+    0,                         /* tp_iter */
+    0,                         /* tp_iternext */
+    0,                         /* tp_methods */
+    0,                         /* tp_members */
+    Neighbor_getset,           /* tp_getset */
+    0,                         /* tp_base */
+    0,                         /* tp_dict */
+    0,                         /* tp_descr_get */
+    0,                         /* tp_descr_set */
+    0,                         /* tp_dictoffset */
+    (initproc)Neighbor_init,   /* tp_init */
+};
+
+/* Node */
+
+typedef struct Node
+{
+    struct Node *_left;
+    struct Node *_right;
+    double _cut_value;
+    int _cut_dim;
+    long int _start, _end;
+} Node;
+
+static Node*
+Node_create(double cut_value, int cut_dim, long int start, long int end)
+{
+    Node* node = PyMem_Malloc(sizeof(Node));
+    if (node == NULL) return NULL;
+    node->_left = NULL;
+    node->_right = NULL;
+    node->_cut_value = cut_value;
+    node->_cut_dim = cut_dim;
+    /* start and end index in _data_point_list */
+    node->_start = start;
+    node->_end = end;
+    return node;
+}
+
+static void Node_destroy(Node* node)
+{
+    if (node == NULL) return;
+    Node_destroy(node->_left);
+    Node_destroy(node->_right);
+    PyMem_Free(node);
+}
+
+static int Node_is_leaf(Node* node)
+{
+    if (node->_left == NULL && node->_right == NULL) return 1;
+    else return 0;
+}
+
+/* Region */
+
+typedef struct
+{
+    double _left[DIM];
+    double _right[DIM];
+} Region;
+
+static Region* Region_create(const double *left, const double *right)
+{
+    int i;
+    Region* region = PyMem_Malloc(sizeof(Region));
+    if (region == NULL) return NULL;
+
+    if (left == NULL || right == NULL)
+    {
+        /* [-INF, INF] */
+        for (i = 0; i < DIM; i++) {
+            region->_left[i] = -INF;
+            region->_right[i] = INF;
+        }
+    }
+    else
+    {
+        for (i = 0; i < DIM; i++) {
+            region->_left[i] = left[i];
+            region->_right[i] = right[i];
+        }
+    }
+    return region;
+}
+
+static void Region_destroy(Region* region)
+{
+    if (region) PyMem_Free(region);
+}
+
+static int Region_encloses(Region* region, double *coord)
+{
+    int i;
+    for (i = 0; i < DIM; i++)
+    {
+        if (!(coord[i] >= region->_left[i] && coord[i] <= region->_right[i]))
+        {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+static int
+Region_test_intersect_left(Region* region, double split_coord, int current_dim)
+{
+    const double r = region->_right[current_dim];
+    const double l = region->_left[current_dim];
+    if (split_coord < l) return -1;
+    else if (split_coord < r) return 0; /* split point in interval */
+    else return +1;
+}
+
+static int
+Region_test_intersect_right(Region* region, double split_coord, int current_dim)
+{
+    const double r = region->_right[current_dim];
+    const double l = region->_left[current_dim];
+    if (split_coord <= l) return -1;
+    else if (split_coord <= r) return 0; /* split point in interval */
+    else return +1;
+}
+
+static int
+Region_test_intersection(Region* this_region, Region *query_region, double radius)
+{
+    int status = 2;
+
+    int i;
+    for (i = 0; i < DIM; i++)
+    {
+        double rs = this_region->_right[i];
+        double ls = this_region->_left[i];
+        double rq = query_region->_right[i];
+        double lq = query_region->_left[i];
+
+        if (ls-rq > radius)
+        {
+            /* outside */
+            return 0;
+        }
+        else if (lq-rs > radius)
+        {
+            /* outside */
+            return 0;
+        }
+        else if (rs <= rq && ls>=lq)
+        {
+            /* inside (at least in dim i) */
+            if (status > 2) status = 2;
+        }
+        else
+        {
+            /* overlap (at least in dim i) */
+            status = 1;
+        }
+    }
+    return status;
+}
+
+static Region*
+Region_create_intersect_left(Region* region, double split_coord, int current_dim)
+{
+    Region* p;
+    const double value = region->_right[current_dim];
+    region->_right[current_dim] = split_coord;
+    p = Region_create(region->_left, region->_right);
+    region->_right[current_dim] = value;
+    return p;
+}
+
+static Region*
+Region_create_intersect_right(Region* region, double split_coord, int current_dim)
+{
+    Region* p;
+    const double value = region->_left[current_dim];
+    region->_left[current_dim] = split_coord;
+    p = Region_create(region->_left, region->_right);
+    region->_left[current_dim] = value;
+    return p;
+}
+
+/* Radius */
+
+typedef struct
+{
+    long int index;
+    double value;
+} Radius;
+
+/* KDTree */
+
+typedef struct {
+    PyObject_HEAD
+    DataPoint* _data_point_list;
+    int _data_point_list_size;
+    Node *_root;
+    int _bucket_size;
+    /* The following are temporary variables used during a search only. */
+    double _radius;
+    double _radius_sq;
+    double _neighbor_radius;
+    double _neighbor_radius_sq;
+    double _center_coord[DIM];
+} KDTree;
+
+static double KDTree_dist(double *coord1, double *coord2)
+{
+    /* returns the SQUARE of the distance between two points */
+    int i;
+    double sum = 0, dif = 0;
+
+    for (i = 0; i < DIM; i++) {
+        dif = coord1[i]-coord2[i];
+        sum += dif*dif;
+    }
+    return sum;
+}
+
+static int
+KDTree_report_point(KDTree* self, DataPoint* data_point, PyObject* points)
+{
+    int ok;
+    long int index = data_point->_index;
+    double *coord = data_point->_coord;
+    const double r = KDTree_dist(self->_center_coord, coord);
+    if (r <= self->_radius_sq)
+    {
+        Point* point;
+        point = (Point*) PointType.tp_alloc(&PointType, 0);
+        if (!point) return 0;
+        point->index = index;
+        point->radius = sqrt(r); /* note sqrt */
+        ok = PyList_Append(points, (PyObject*)point);
+        Py_DECREF(point);
+        if (ok == -1) return 0;
+    }
+    return 1;
+}
+
+static int
+KDTree_test_neighbors(KDTree* self, DataPoint* p1, DataPoint* p2, PyObject* neighbors)
+{
+    int ok;
+    const double r = KDTree_dist(p1->_coord, p2->_coord);
+    if (r <= self->_neighbor_radius_sq)
+    {
+        /* we found a neighbor pair! */
+        Neighbor* neighbor;
+        long int index1, index2;
+        neighbor = (Neighbor*) NeighborType.tp_alloc(&NeighborType, 0);
+        if (!neighbor) return 0;
+        index1 = p1->_index;
+        index2 = p2->_index;
+        if (index1 < index2) {
+            neighbor->index1 = index1;
+            neighbor->index2 = index2;
+        }
+        else {
+            neighbor->index1 = index2;
+            neighbor->index2 = index1;
+        }
+        neighbor->radius = sqrt(r); /* note sqrt */
+        ok = PyList_Append(neighbors, (PyObject*)neighbor);
+        Py_DECREF(neighbor);
+        if (ok == -1) return 0;
+    }
+
+    return 1;
+}
+
+static int
+KDTree_search_neighbors_in_bucket(KDTree* self, Node *node, PyObject* neighbors)
+{
+    long int i;
+    int ok;
+
+    for (i = node->_start; i < node->_end; i++)
+    {
+        DataPoint p1;
+        long int j;
+
+        p1 = self->_data_point_list[i];
+
+        for (j = i+1; j < node->_end; j++) {
+            DataPoint p2 = self->_data_point_list[j];
+            ok = KDTree_test_neighbors(self, &p1, &p2, neighbors);
+            if (!ok) return 0;
+        }
+    }
+    return 1;
+}
+
+static int KDTree_search_neighbors_between_buckets(KDTree* self, Node *node1, Node *node2, PyObject* neighbors)
+{
+    long int i;
+    int ok;
+
+    for (i = node1->_start; i < node1->_end; i++)
+    {
+        DataPoint p1;
+        long int j;
+
+        p1 = self->_data_point_list[i];
+
+        for (j = node2->_start; j < node2->_end; j++)
+        {
+            DataPoint p2 = self->_data_point_list[j];
+            ok = KDTree_test_neighbors(self, &p1, &p2, neighbors);
+            if (!ok) return 0;
+        }
+    }
+    return 1;
+}
+
+static int KDTree_neighbor_search_pairs(KDTree* self, Node *down, Region *down_region, Node *up, Region *up_region, int depth, PyObject* neighbors)
+{
+    int down_is_leaf, up_is_leaf;
+    int localdim;
+    int ok = 1;
+
+    /* if regions do not overlap - STOP */
+    if (!down || !up || !down_region || !up_region)
+    {
+        /* STOP */
+        return ok;
+    }
+
+    if (Region_test_intersection(down_region, up_region, self->_neighbor_radius)== 0)
+    {
+        /* regions cannot contain neighbors */
+        return ok;
+    }
+
+    /* dim */
+    localdim = depth % DIM;
+
+    /* are they leaves? */
+    up_is_leaf = Node_is_leaf(up);
+    down_is_leaf = Node_is_leaf(down);
+
+    if (up_is_leaf && down_is_leaf)
+    {
+        /* two leaf nodes */
+        ok = KDTree_search_neighbors_between_buckets(self, down, up, neighbors);
+    }
+    else
+    {
+        /* one or no leaf nodes */
+
+        Node *up_right, *up_left, *down_left, *down_right;
+        Region *up_left_region = NULL;
+        Region *up_right_region = NULL;
+        Region *down_left_region = NULL;
+        Region *down_right_region = NULL;
+
+        if (down_is_leaf)
+        {
+            down_left = down;
+            /* make a copy of down_region */
+            down_left_region = Region_create(down_region->_left, down_region->_right);
+            if (down_left_region == NULL) ok = 0;
+            down_right = NULL;
+            down_right_region = NULL;
+        }
+        else
+        {
+            double cut_value;
+            int intersect;
+
+            cut_value = down->_cut_value;
+
+            down_left = down->_left;
+            down_right = down->_right;
+            intersect = Region_test_intersect_left(down_region, cut_value, localdim);
+            switch (intersect) {
+                case 1:
+                    down_left_region = Region_create(down_region->_left, down_region->_right);
+                    if (down_left_region == NULL) ok = 0;
+                    break;
+                case 0:
+                    down_left_region = Region_create_intersect_left(down_region, cut_value, localdim);
+                    if (down_left_region == NULL) ok = 0;
+                    break;
+                case -1: /* intersect is -1 if no overlap */
+                    down_left_region = NULL;
+                    break;
+            }
+
+            intersect = Region_test_intersect_right(down_region, cut_value, localdim);
+            switch (intersect) {
+                case -1:
+                    down_right_region = Region_create(down_region->_left, down_region->_right);
+                    if (down_right_region == NULL) ok = 0;
+                    break;
+                case 0:
+                    down_right_region = Region_create_intersect_right(down_region, cut_value, localdim);
+                    if (down_right_region == NULL) ok = 0;
+                    break;
+                case +1:
+                    down_right_region = NULL;
+                    break;
+            }
+        }
+
+        if (up_is_leaf)
+        {
+            up_left = up;
+            /* make a copy of up_region */
+            up_left_region = Region_create(up_region->_left, up_region->_right);
+            if (up_left_region == NULL) ok = 0;
+            up_right = NULL;
+            up_right_region = NULL;
+        }
+        else
+        {
+            double cut_value;
+            int intersect;
+
+            cut_value = up->_cut_value;
+
+            up_left = up->_left;
+            up_right = up->_right;
+            intersect = Region_test_intersect_left(up_region, cut_value, localdim);
+
+            switch (intersect) {
+                case 1:
+                    up_left_region = Region_create(up_region->_left, up_region->_right);
+                    if (up_left_region == NULL) ok = 0;
+                    break;
+                case 0:
+                    up_left_region = Region_create_intersect_left(up_region, cut_value, localdim);
+                    if (up_left_region == NULL) ok = 0;
+                    break;
+                case -1: /* intersect is -1 if no overlap */
+                    up_left_region = NULL;
+                    break;
+            }
+
+            intersect = Region_test_intersect_right(up_region, cut_value, localdim);
+            switch (intersect) {
+                case -1:
+                    up_right_region = Region_create(up_region->_left, up_region->_right);
+                    if (up_right_region == NULL) ok = 0;
+                    break;
+                case 0:
+                    up_right_region = Region_create_intersect_right(up_region, cut_value, localdim);
+                    if (up_right_region == NULL) ok = 0;
+                    break;
+                case +1: /* intersect is +1 if no overlap */
+                    up_right_region = NULL;
+                    break;
+            }
+        }
+
+        if (ok)
+            ok = KDTree_neighbor_search_pairs(self, up_left, up_left_region, down_left, down_left_region, depth+1, neighbors);
+        if (ok)
+            ok = KDTree_neighbor_search_pairs(self, up_left, up_left_region, down_right, down_right_region, depth+1, neighbors);
+        if (ok)
+            ok = KDTree_neighbor_search_pairs(self, up_right, up_right_region, down_left, down_left_region, depth+1, neighbors);
+        if (ok)
+            ok = KDTree_neighbor_search_pairs(self, up_right, up_right_region, down_right, down_right_region, depth+1, neighbors);
+
+        Region_destroy(down_left_region);
+        Region_destroy(down_right_region);
+        Region_destroy(up_left_region);
+        Region_destroy(up_right_region);
+    }
+    return ok;
+}
+
+static int KDTree_neighbor_search(KDTree* self, Node *node, Region *region, int depth, PyObject* neighbors)
+{
+    Node *left, *right;
+    Region *left_region = NULL;
+    Region *right_region = NULL;
+    int localdim;
+    int intersect;
+    double cut_value;
+    int ok = 1;
+
+    localdim = depth % DIM;
+
+    left = node->_left;
+    right = node->_right;
+
+    cut_value = node->_cut_value;
+
+    /* planes of left and right nodes */
+    intersect = Region_test_intersect_left(region, cut_value, localdim);
+    switch (intersect) {
+        case 1:
+            left_region = Region_create(region->_left, region->_right);
+            if (!left_region) ok = 0;
+            break;
+        case 0:
+            left_region = Region_create_intersect_left(region, cut_value, localdim);
+            if (!left_region) ok = 0;
+            break;
+        case -1: /* intersect is -1 if no overlap */
+            left_region = NULL;
+            break;
+    }
+
+    intersect = Region_test_intersect_right(region, cut_value, localdim);
+    switch (intersect) {
+        case -1:
+            right_region = Region_create(region->_left, region->_right);
+            if (!right_region) ok = 0;
+            break;
+        case 0:
+            right_region = Region_create_intersect_right(region, cut_value, localdim);
+            if (!right_region) ok = 0;
+            break;
+        case +1: /* intersect is +1 if no overlap */
+            right_region = NULL;
+            break;
+    }
+
+    if (ok)
+    {
+        if (!Node_is_leaf(left))
+        {
+            /* search for pairs in this half plane */
+            ok = KDTree_neighbor_search(self, left, left_region, depth+1, neighbors);
+        }
+        else
+        {
+            ok = KDTree_search_neighbors_in_bucket(self, left, neighbors);
+        }
+    }
+
+    if (ok)
+    {
+        if (!Node_is_leaf(right))
+        {
+            /* search for pairs in this half plane */
+            ok = KDTree_neighbor_search(self, right, right_region, depth+1, neighbors);
+        }
+        else
+        {
+            ok = KDTree_search_neighbors_in_bucket(self, right, neighbors);
+        }
+    }
+
+    /* search for pairs between the half planes */
+    if (ok)
+    {
+        ok = KDTree_neighbor_search_pairs(self, left, left_region, right, right_region, depth+1, neighbors);
+    }
+
+    /* cleanup */
+    Region_destroy(left_region);
+    Region_destroy(right_region);
+
+    return ok;
+}
+
+static Node *
+KDTree_build_tree(KDTree* self, long int offset_begin, long int offset_end, int depth)
+{
+    int localdim;
+
+    if (depth == 0)
+    {
+        /* start with [begin, end+1] */
+        offset_begin = 0;
+        offset_end = self->_data_point_list_size;
+        localdim = 0;
+    }
+    else
+    {
+        localdim = depth % DIM;
+    }
+
+    if ((offset_end-offset_begin) <= self->_bucket_size)
+    {
+        /* leaf node */
+        return Node_create(-1, localdim, offset_begin, offset_end);
+    }
+    else
+    {
+        long int offset_split;
+        long int left_offset_begin, left_offset_end;
+        long int right_offset_begin, right_offset_end;
+        long int d;
+        double cut_value;
+        DataPoint data_point;
+        Node *left_node, *right_node, *new_node;
+
+        DataPoint_sort(self->_data_point_list+offset_begin, offset_end-offset_begin, localdim);
+
+        /* calculate index of split point */
+        d = offset_end-offset_begin;
+        offset_split = d/2+d%2;
+
+        data_point = self->_data_point_list[offset_begin+offset_split-1];
+        cut_value = data_point._coord[localdim];
+
+        /* create new node and bind to left & right nodes */
+        new_node = Node_create(cut_value, localdim, offset_begin, offset_end);
+        if (new_node == NULL) return NULL;
+
+        /* left */
+        left_offset_begin = offset_begin;
+        left_offset_end = offset_begin+offset_split;
+        left_node = KDTree_build_tree(self, left_offset_begin, left_offset_end, depth+1);
+
+        /* right */
+        right_offset_begin = left_offset_end;
+        right_offset_end = offset_end;
+        right_node = KDTree_build_tree(self, right_offset_begin, right_offset_end, depth+1);
+
+        new_node->_left = left_node;
+        new_node->_right = right_node;
+
+        if (left_node == NULL || right_node == NULL)
+        {
+            Node_destroy(new_node);
+            return NULL;
+        }
+
+        return new_node;
+    }
+}
+
+static int KDTree_report_subtree(KDTree* self, Node *node, PyObject* points)
+{
+    int ok;
+    if (Node_is_leaf(node)) {
+        /* report point(s) */
+        long int i;
+        for (i = node->_start; i < node->_end; i++) {
+            ok = KDTree_report_point(self, &self->_data_point_list[i], points);
+            if (!ok) return 0;
+        }
+    }
+    else {
+        /* find points in subtrees via recursion */
+        ok = KDTree_report_subtree(self, node->_left, points);
+        if (!ok) return 0;
+        ok = KDTree_report_subtree(self, node->_right, points);
+        if (!ok) return 0;
+    }
+    return 1;
+}
+
+static int
+KDTree_search(KDTree* self, Region *region, Node *node, int depth, Region* query_region, PyObject* points);
+
+static int KDTree_test_region(KDTree* self, Node *node, Region *region, int depth, Region* query_region, PyObject* points)
+{
+    int ok;
+    int intersect_flag;
+
+    /* is node region inside, outside or overlapping
+     * with query region? */
+    intersect_flag = Region_test_intersection(region, query_region, 0);
+
+    switch (intersect_flag) {
+        case 2:
+            /* inside - extract points */
+            ok = KDTree_report_subtree(self, node, points);
+            /* end of recursion -- get rid of region */
+            Region_destroy(region);
+            break;
+        case 1:
+            /* overlap - recursion */
+            ok = KDTree_search(self, region, node, depth+1, query_region, points);
+            /* search does cleanup of region */
+            break;
+        default:
+            /* outside - stop */
+            ok = 1;
+            /* end of recursion -- get rid of region */
+            Region_destroy(region);
+            break;
+    }
+    return ok;
+}
+
+static int
+KDTree_search(KDTree* self, Region *region, Node *node, int depth, Region* query_region, PyObject* points)
+{
+    int current_dim;
+    int ok = 1;
+
+    if (depth == 0)
+    {
+        /* start with [-INF, INF] region */
+
+        region = Region_create(NULL, NULL);
+        if (region == NULL) return 0;
+
+        /* start with root node */
+        node = self->_root;
+    }
+
+    current_dim = depth % DIM;
+
+    if (Node_is_leaf(node)) {
+        long int i;
+        DataPoint* data_point;
+        for (i = node->_start; i < node->_end; i++) {
+            data_point = &self->_data_point_list[i];
+            if (Region_encloses(query_region, data_point->_coord)) {
+                /* point is enclosed in query region - report & stop */
+                ok = KDTree_report_point(self, data_point, points);
+            }
+        }
+    }
+    else {
+        Node *left_node, *right_node;
+        Region *left_region, *right_region;
+        int intersect_left, intersect_right;
+
+        left_node = node->_left;
+
+        /* LEFT HALF PLANE */
+
+        /* new region */
+        intersect_left = Region_test_intersect_left(region, node->_cut_value, current_dim);
+
+        switch (intersect_left) {
+            case 1:
+                left_region = Region_create(region->_left, region->_right);
+                if (left_region)
+                    ok = KDTree_test_region(self, left_node, left_region, depth, query_region, points);
+                else
+                    ok = 0;
+                break;
+            case 0:
+                left_region = Region_create_intersect_left(region, node->_cut_value, current_dim);
+                if (left_region)
+                    ok = KDTree_test_region(self, left_node, left_region, depth, query_region, points);
+                else
+                    ok = 0;
+                break;
+            case -1:
+                /* intersect_left is -1 if no overlap */
+                break;
+        }
+
+        /* RIGHT HALF PLANE */
+
+        right_node = node->_right;
+
+        /* new region */
+        intersect_right = Region_test_intersect_right(region, node->_cut_value, current_dim);
+        switch (intersect_right) {
+            case -1:
+                right_region = Region_create(region->_left, region->_right);
+                /* test for overlap/inside/outside & do recursion/report/stop */
+                if (right_region)
+                    ok = KDTree_test_region(self, right_node, right_region, depth, query_region, points);
+                else
+                    ok = 0;
+                break;
+            case 0:
+                right_region = Region_create_intersect_right(region, node->_cut_value, current_dim);
+                /* test for overlap/inside/outside & do recursion/report/stop */
+                if (right_region)
+                    ok = KDTree_test_region(self, right_node, right_region, depth, query_region, points);
+                else
+                    ok = 0;
+                break;
+            case +1:
+                /* intersect_right is +1 if no overlap */
+                break;
+        }
+    }
+
+    Region_destroy(region);
+    return ok;
+}
+
+/* Python interface */
+
+static void
+KDTree_dealloc(KDTree* self)
+{
+    Node_destroy(self->_root);
+    if (self->_data_point_list) PyMem_Free(self->_data_point_list);
+    Py_TYPE(self)->tp_free((PyObject*)self);
+}
+
+static PyObject*
+KDTree_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
+{
+    int bucket_size = 1;
+    double* coords;
+    Py_ssize_t n, i, j;
+    PyObject *obj;
+    const int flags = PyBUF_ND | PyBUF_C_CONTIGUOUS;
+
+    Py_buffer view;
+    KDTree* self;
+    DataPoint* data_point_list;
+    double value;
+
+    if (!PyArg_ParseTuple(args, "O|i:KDTree_new" , &obj, &bucket_size))
+        return NULL;
+
+    if (bucket_size <= 0) {
+        PyErr_SetString(PyExc_ValueError, "bucket size should be positive");
+        return NULL;
+    }
+
+    if (PyObject_GetBuffer(obj, &view, flags) == -1) return NULL;
+    if (view.itemsize != sizeof(double)) {
+        PyBuffer_Release(&view);
+        PyErr_SetString(PyExc_RuntimeError,
+                        "coords array has incorrect data type");
+        return NULL;
+    }
+    if (view.ndim != 2 || view.shape[1] != 3) {
+        PyBuffer_Release(&view);
+        PyErr_SetString(PyExc_ValueError, "expected a Nx3 numpy array");
+        return NULL;
+    }
+    n = view.shape[0];
+
+    data_point_list = PyMem_Malloc(n*sizeof(DataPoint));
+    if (data_point_list == NULL) {
+        /* KDTree_dealloc will deallocate data already stored in KDTree */
+        PyBuffer_Release(&view);
+        return PyErr_NoMemory();
+    }
+
+    coords = view.buf;
+    for (i = 0; i < n; i++) {
+        data_point_list[i]._index = i;
+        for (j = 0; j < DIM; j++, coords++) {
+            value = *coords;
+            if (value <= -1e6 || value >= 1e6) {
+                PyMem_Free(data_point_list);
+                PyBuffer_Release(&view);
+                PyErr_SetString(PyExc_ValueError,
+                    "coordinate values should lie between -1e6 and 1e6");
+                return NULL;
+            }
+            data_point_list[i]._coord[j] = value;
+        }
+    }
+    PyBuffer_Release(&view);
+
+    /* build KD tree */
+    self = (KDTree*)type->tp_alloc(type, 0);
+    if (!self) {
+        PyMem_Free(data_point_list);
+        return NULL;
+    }
+    self->_bucket_size = bucket_size;
+    self->_data_point_list = data_point_list;
+    self->_data_point_list_size = n;
+
+    self->_root = KDTree_build_tree(self, 0, 0, 0);
+    if (!self->_root) {
+        Py_DECREF(self);
+        return PyErr_NoMemory();
+    }
+    return (PyObject*)self;
+}
+
+PyDoc_STRVAR(PyKDTree_search__doc__,
+"Search all points within the given radius of center.\n\
+\n\
+Arguments:\n\
+ - center: NumPy array of size 3.\n\
+ - radius: float>0\n\
+\n\
+Returns a list of Point objects; each neighbor has an attribute\n\
+index corresponding to the index of the point, and an attribute\n\
+radius with the radius between them.");
+
+
+static PyObject*
+PyKDTree_search(KDTree* self, PyObject* args)
+{
+    PyObject *obj;
+    double radius;
+    long int i;
+    double *coords;
+    const int flags = PyBUF_ND | PyBUF_C_CONTIGUOUS;
+    Py_buffer view;
+    double left[DIM];
+    double right[DIM];
+    Region* query_region = NULL;
+    PyObject* points = NULL;
+
+    if (!PyArg_ParseTuple(args, "Od:search", &obj, &radius))
+        return NULL;
+
+    if (radius <= 0)
+    {
+        PyErr_SetString(PyExc_ValueError, "Radius must be positive.");
+        return NULL;
+    }
+
+    if (PyObject_GetBuffer(obj, &view, flags) == -1) return NULL;
+    if (view.itemsize != sizeof(double)) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "coords array has incorrect data type");
+        goto exit;
+    }
+    if (view.ndim != 1) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "coords array must be one-dimensional");
+        goto exit;
+    }
+    if (view.shape[0] != DIM) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "coords array dimension must be 3");
+        goto exit;
+    }
+    coords = view.buf;
+
+    self->_radius = radius;
+    /* use of r^2 to avoid sqrt use */
+    self->_radius_sq = radius*radius;
+
+    for (i = 0; i < DIM; i++)
+    {
+        left[i] = coords[i] - radius;
+        right[i] = coords[i] + radius;
+        /* set center of query */
+        self->_center_coord[i] = coords[i];
+    }
+
+    query_region = Region_create(left, right);
+
+    if (!query_region) {
+        PyErr_NoMemory();
+        goto exit;
+    }
+
+    points = PyList_New(0);
+    if (!points) goto exit;
+
+    if (!KDTree_search(self, NULL, NULL, 0, query_region, points)) {
+        PyErr_NoMemory();
+        Py_DECREF(points);
+        points = NULL;
+        goto exit;
+    }
+
+exit:
+    if (query_region) Region_destroy(query_region);
+    PyBuffer_Release(&view);
+    return points;
+}
+
+PyDoc_STRVAR(PyKDTree_neighbor_search__doc__,
+"All fixed neighbor search.\n\
+\n\
+Find all point pairs that are within radius of each other.\n\
+\n\
+Arguments:\n\
+ - radius: float (>0)\n\
+\n\
+Returns a list of Neighbor objects; each neighbor has attributes\n\
+index1, index2 corresponding to the indices of the point pair,\n\
+and an attribute radius with the radius between them.");
+
+
+static PyObject*
+PyKDTree_neighbor_search(KDTree* self, PyObject* args)
+{
+    int ok = 0;
+    double radius;
+    PyObject* neighbors;
+
+    if (!PyArg_ParseTuple(args, "d:neighbor_search", &radius))
+        return NULL;
+
+    if (radius <= 0) {
+        PyErr_SetString(PyExc_ValueError, "Radius must be positive.");
+        return NULL;
+    }
+
+    neighbors = PyList_New(0);
+
+    /* note the use of r^2 to avoid use of sqrt */
+    self->_neighbor_radius = radius;
+    self->_neighbor_radius_sq = radius*radius;
+
+    if (Node_is_leaf(self->_root)) {
+        /* this is a boundary condition */
+        /* bucket_size > nr of points */
+        ok = KDTree_search_neighbors_in_bucket(self, self->_root, neighbors);
+    }
+    else {
+        /* "normal" situation */
+        /* start with [-INF, INF] */
+        Region *region = Region_create(NULL, NULL);
+        if (region) {
+            ok = KDTree_neighbor_search(self, self->_root, region, 0, neighbors);
+            Region_destroy(region);
+        }
+    }
+    if (!ok) {
+        Py_DECREF(neighbors);
+        return PyErr_NoMemory();
+    }
+    return neighbors;
+}
+
+PyDoc_STRVAR(PyKDTree_neighbor_simple_search__doc__,
+"All fixed neighbor search (for testing purposes only).\n\
+\n\
+Find all point pairs that are within radius of each other, using a simple\n\
+but slow algorithm. This function is provided to be able to verify the\n\
+correctness of fast algorithm using the KD Tree for testing purposes.\n\
+\n\
+Arguments:\n\
+ - radius: float (>0)\n\
+\n\
+Returns a list of Neighbor objects; each neighbor has attributes\n\
+index1, index2 corresponding to the indices of the point pair,\n\
+and an attribute radius with the radius between them.");
+
+static PyObject*
+PyKDTree_neighbor_simple_search(KDTree* self, PyObject* args)
+{
+    int ok;
+    double radius;
+    PyObject* neighbors;
+    Py_ssize_t i;
+
+    if (!PyArg_ParseTuple(args, "d:neighbor_simple_search", &radius))
+        return NULL;
+
+    if (radius <= 0) {
+        PyErr_SetString(PyExc_ValueError, "Radius must be positive.");
+        return NULL;
+    }
+
+    neighbors = PyList_New(0);
+    if (!neighbors) return NULL;
+
+    self->_neighbor_radius = radius;
+    self->_neighbor_radius_sq = radius*radius;
+
+    DataPoint_sort(self->_data_point_list, self->_data_point_list_size, 0);
+
+    for (i = 0; i < self->_data_point_list_size; i++) {
+        double x1;
+        long int j;
+        DataPoint p1;
+
+        p1 = self->_data_point_list[i];
+        x1 = p1._coord[0];
+
+        for (j = i+1; j < self->_data_point_list_size; j++) {
+            DataPoint p2 = self->_data_point_list[j];
+            double x2 = p2._coord[0];
+            if (fabs(x2-x1) <= radius)
+            {
+                ok = KDTree_test_neighbors(self, &p1, &p2, neighbors);
+                if (!ok) return PyErr_NoMemory();
+            }
+            else
+            {
+                break;
+            }
+        }
+    }
+    return neighbors;
+}
+
+static PyMethodDef KDTree_methods[] = {
+    {"search",
+     (PyCFunction)PyKDTree_search,
+      METH_VARARGS,
+      PyKDTree_search__doc__},
+    {"neighbor_search",
+     (PyCFunction)PyKDTree_neighbor_search,
+      METH_VARARGS,
+      PyKDTree_neighbor_search__doc__},
+    {"neighbor_simple_search",
+     (PyCFunction)PyKDTree_neighbor_simple_search,
+      METH_VARARGS,
+      PyKDTree_neighbor_simple_search__doc__},
+    {NULL}  /* Sentinel */
+};
+
+PyDoc_STRVAR(KDTree_doc,
+"KDTree(coordinates, bucket_size=1) -> new KDTree\n\
+\n\
+Create a new KDTree object for the given coordinates and bucket size,\n\
+where coordinates is an Nx3 NumPy array (N being the number of points).\n\
+\n\
+The KDTree data structure can be used for neighbor searches (find all\n\
+points within a radius of a given point) and for finding all point pairs\n\
+within a certain radius of each other.\n\
+\n\
+Reference:\n\
+\n\
+Computational Geometry: Algorithms and Applications\n\
+Second Edition\n\
+Mark de Berg, Marc van Kreveld, Mark Overmars, Otfried Schwarzkopf\n\
+published by Springer-Verlag\n\
+2nd rev. ed. 2000.\n\
+ISBN: 3-540-65620-0\n\
+\n\
+The KD tree data structure is described in chapter 5, pg. 99.\n\
+\n\
+The following article made clear to me that the nodes should\n\
+contain more than one point (this leads to dramatic speed\n\
+improvements for the \"all fixed radius neighbor search\", see\n\
+below):\n\
+\n\
+JL Bentley, \"K-d trees for semidynamic point sets,\" in Sixth Annual\n\
+ACM Symposium on Computational Geometry, vol. 91. San Francisco, 1990\n\
+\n\
+This KD implementation also performs an \"all fixed radius neighbor search\",\n\
+i.e. it can find all point pairs in a set that are within a certain radius\n\
+of each other. As far as I know the algorithm has not been published.");
+
+
+static PyTypeObject KDTreeType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "C KDTree",                  /*tp_name*/
+    sizeof(KDTree),              /*tp_basicsize*/
+    0,                           /*tp_itemsize*/
+    (destructor)KDTree_dealloc,  /*tp_dealloc*/
+    0,                           /*tp_print*/
+    0,                           /*tp_getattr*/
+    0,                           /*tp_setattr*/
+    0,                           /*tp_compare*/
+    0,                           /*tp_repr*/
+    0,                           /*tp_as_number*/
+    0,                           /*tp_as_sequence*/
+    0,                           /*tp_as_mapping*/
+    0,                           /*tp_hash */
+    0,                           /*tp_call*/
+    0,                           /*tp_str*/
+    0,                           /*tp_getattro*/
+    0,                           /*tp_setattro*/
+    0,                           /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,          /*tp_flags*/
+    KDTree_doc,                  /* tp_doc */
+    0,                           /* tp_traverse */
+    0,                           /* tp_clear */
+    0,                           /* tp_richcompare */
+    0,                           /* tp_weaklistoffset */
+    0,                           /* tp_iter */
+    0,                           /* tp_iternext */
+    KDTree_methods,              /* tp_methods */
+    NULL,                        /* tp_members */
+    0,                           /* tp_getset */
+    0,                           /* tp_base */
+    0,                           /* tp_dict */
+    0,                           /* tp_descr_get */
+    0,                           /* tp_descr_set */
+    0,                           /* tp_dictoffset */
+    0,                           /* tp_init */
+    0,                           /* tp_alloc */
+    (newfunc)KDTree_new,         /* tp_new */
+};
+
+/* ========================================================================== */
+/* -- Initialization -------------------------------------------------------- */
+/* ========================================================================== */
+
+PyDoc_STRVAR(module_doc,
+"KDTree implementation for fast neighbor searches in 3D structures.\n\n\
+This module implements three objects: KDTree, Point, and Neighbor. Refer \
+to their docstrings for more documentation on usage and implementation."
+);
+
+static struct PyModuleDef moduledef = {
+        PyModuleDef_HEAD_INIT,
+        "kdtrees",
+        module_doc,
+        -1,
+        NULL,
+        NULL,
+        NULL,
+        NULL,
+        NULL
+};
+
+PyObject *
+PyInit_kdtrees(void)
+{
+  PyObject *module;
+
+  PointType.tp_new = PyType_GenericNew;
+  NeighborType.tp_new = PyType_GenericNew;
+
+  if (PyType_Ready(&KDTreeType) < 0)
+      return NULL;
+  if (PyType_Ready(&PointType) < 0)
+      return NULL;
+  if (PyType_Ready(&NeighborType) < 0)
+      return NULL;
+
+  module = PyModule_Create(&moduledef);
+  if (module == NULL) return NULL;
+
+  Py_INCREF(&KDTreeType);
+  if (PyModule_AddObject(module, "KDTree", (PyObject*) &KDTreeType) < 0) {
+      Py_DECREF(module);
+      Py_DECREF(&KDTreeType);
+      return NULL;
+  }
+
+  Py_INCREF(&PointType);
+  if (PyModule_AddObject(module, "Point", (PyObject*) &PointType) < 0) {
+      Py_DECREF(module);
+      Py_DECREF(&PointType);
+      return NULL;
+  }
+
+  Py_INCREF(&NeighborType);
+  if (PyModule_AddObject(module, "Neighbor", (PyObject*) &NeighborType) < 0) {
+      Py_DECREF(module);
+      Py_DECREF(&NeighborType);
+      return NULL;
+  }
+
+  return module;
+}
diff --git a/code/lib/Bio/PDB/kdtrees.cp37-win_amd64.pyd b/code/lib/Bio/PDB/kdtrees.cp37-win_amd64.pyd
new file mode 100644
index 0000000..57aa0f0
Binary files /dev/null and b/code/lib/Bio/PDB/kdtrees.cp37-win_amd64.pyd differ
diff --git a/code/lib/Bio/PDB/mmcifio.py b/code/lib/Bio/PDB/mmcifio.py
new file mode 100644
index 0000000..162b5f9
--- /dev/null
+++ b/code/lib/Bio/PDB/mmcifio.py
@@ -0,0 +1,378 @@
+# Copyright 2017 Joe Greener. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Write an mmCIF file.
+
+See https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax for syntax.
+"""
+
+import re
+from collections import defaultdict
+
+from Bio.PDB.StructureBuilder import StructureBuilder
+from Bio.PDB.PDBIO import Select, StructureIO
+
+# If certain entries should have a certain order of keys, that is specified here
+mmcif_order = {
+    "_atom_site": [
+        "group_PDB",
+        "id",
+        "type_symbol",
+        "label_atom_id",
+        "label_alt_id",
+        "label_comp_id",
+        "label_asym_id",
+        "label_entity_id",
+        "label_seq_id",
+        "pdbx_PDB_ins_code",
+        "Cartn_x",
+        "Cartn_y",
+        "Cartn_z",
+        "occupancy",
+        "B_iso_or_equiv",
+        "pdbx_formal_charge",
+        "auth_seq_id",
+        "auth_comp_id",
+        "auth_asym_id",
+        "auth_atom_id",
+        "pdbx_PDB_model_num",
+    ]
+}
+
+
+_select = Select()
+
+
+class MMCIFIO(StructureIO):
+    """Write a Structure object or a mmCIF dictionary as a mmCIF file.
+
+    Examples
+    --------
+        >>> from Bio.PDB import MMCIFParser
+        >>> from Bio.PDB.mmcifio import MMCIFIO
+        >>> parser = MMCIFParser()
+        >>> structure = parser.get_structure("1a8o", "PDB/1A8O.cif")
+        >>> io=MMCIFIO()
+        >>> io.set_structure(structure)
+        >>> io.save("bio-pdb-mmcifio-out.cif")
+        >>> import os
+        >>> os.remove("bio-pdb-mmcifio-out.cif")  # tidy up
+
+
+    """
+
+    def __init__(self):
+        """Initialise."""
+        pass
+
+    def set_dict(self, dic):
+        """Set the mmCIF dictionary to be written out."""
+        self.dic = dic
+        # Remove self.structure if it has been set
+        if hasattr(self, "structure"):
+            delattr(self, "structure")
+
+    def save(self, filepath, select=_select, preserve_atom_numbering=False):
+        """Save the structure to a file.
+
+        :param filepath: output file
+        :type filepath: string or filehandle
+
+        :param select: selects which entities will be written.
+        :type select: object
+
+        Typically select is a subclass of L{Select}, it should
+        have the following methods:
+
+         - accept_model(model)
+         - accept_chain(chain)
+         - accept_residue(residue)
+         - accept_atom(atom)
+
+        These methods should return 1 if the entity is to be
+        written out, 0 otherwise.
+        """
+        # Similar to the PDBIO save method, we check if the filepath is a
+        # string for a filepath or an open file handle
+        if isinstance(filepath, str):
+            fp = open(filepath, "w")
+            close_file = True
+        else:
+            fp = filepath
+            close_file = False
+        # Decide whether to save a Structure object or an mmCIF dictionary
+        if hasattr(self, "structure"):
+            self._save_structure(fp, select, preserve_atom_numbering)
+        elif hasattr(self, "dic"):
+            self._save_dict(fp)
+        else:
+            raise ValueError(
+                "Use set_structure or set_dict to set a structure or dictionary to write out"
+            )
+        if close_file:
+            fp.close()
+
+    def _save_dict(self, out_file):
+        # Form dictionary where key is first part of mmCIF key and value is list
+        # of corresponding second parts
+        key_lists = {}
+        for key in self.dic:
+            if key == "data_":
+                data_val = self.dic[key]
+            else:
+                s = re.split(r"\.", key)
+                if len(s) == 2:
+                    if s[0] in key_lists:
+                        key_lists[s[0]].append(s[1])
+                    else:
+                        key_lists[s[0]] = [s[1]]
+                else:
+                    raise ValueError("Invalid key in mmCIF dictionary: " + key)
+
+        # Re-order lists if an order has been specified
+        # Not all elements from the specified order are necessarily present
+        for key, key_list in key_lists.items():
+            if key in mmcif_order:
+                inds = []
+                for i in key_list:
+                    try:
+                        inds.append(mmcif_order[key].index(i))
+                    # Unrecognised key - add at end
+                    except ValueError:
+                        inds.append(len(mmcif_order[key]))
+                key_lists[key] = [k for _, k in sorted(zip(inds, key_list))]
+
+        # Write out top data_ line
+        if data_val:
+            out_file.write("data_" + data_val + "\n#\n")
+
+        for key, key_list in key_lists.items():
+            # Pick a sample mmCIF value, which can be a list or a single value
+            sample_val = self.dic[key + "." + key_list[0]]
+            n_vals = len(sample_val)
+            # Check the mmCIF dictionary has consistent list sizes
+            for i in key_list:
+                val = self.dic[key + "." + i]
+                if (
+                    isinstance(sample_val, list)
+                    and (isinstance(val, str) or len(val) != n_vals)
+                ) or (isinstance(sample_val, str) and isinstance(val, list)):
+                    raise ValueError(
+                        "Inconsistent list sizes in mmCIF dictionary: " + key + "." + i
+                    )
+            # If the value is a single value, write as key-value pairs
+            if isinstance(sample_val, str) or (
+                isinstance(sample_val, list) and len(sample_val) == 1
+            ):
+                m = 0
+                # Find the maximum key length
+                for i in key_list:
+                    if len(i) > m:
+                        m = len(i)
+                for i in key_list:
+                    # If the value is a single item list, just take the value
+                    if isinstance(sample_val, str):
+                        value_no_list = self.dic[key + "." + i]
+                    else:
+                        value_no_list = self.dic[key + "." + i][0]
+                    out_file.write(
+                        "{k: <{width}}".format(k=key + "." + i, width=len(key) + m + 4)
+                        + self._format_mmcif_col(value_no_list, len(value_no_list))
+                        + "\n"
+                    )
+            # If the value is more than one value, write as keys then a value table
+            elif isinstance(sample_val, list):
+                out_file.write("loop_\n")
+                col_widths = {}
+                # Write keys and find max widths for each set of values
+                for i in key_list:
+                    out_file.write(key + "." + i + "\n")
+                    col_widths[i] = 0
+                    for val in self.dic[key + "." + i]:
+                        len_val = len(val)
+                        # If the value requires quoting it will add 2 characters
+                        if self._requires_quote(val) and not self._requires_newline(
+                            val
+                        ):
+                            len_val += 2
+                        if len_val > col_widths[i]:
+                            col_widths[i] = len_val
+                # Technically the max of the sum of the column widths is 2048
+
+                # Write the values as rows
+                for i in range(n_vals):
+                    for col in key_list:
+                        out_file.write(
+                            self._format_mmcif_col(
+                                self.dic[key + "." + col][i], col_widths[col] + 1
+                            )
+                        )
+                    out_file.write("\n")
+            else:
+                raise ValueError(
+                    "Invalid type in mmCIF dictionary: " + str(type(sample_val))
+                )
+            out_file.write("#\n")
+
+    def _format_mmcif_col(self, val, col_width):
+        # Format a mmCIF data value by enclosing with quotes or semicolon lines
+        # where appropriate. See
+        # https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax for
+        # syntax.
+
+        # If there is a newline or quotes cannot be contained, use semicolon
+        # and newline construct
+        if self._requires_newline(val):
+            return "\n;" + val + "\n;\n"
+        elif self._requires_quote(val):
+            # Choose quote character
+            if "' " in val:
+                return "{v: <{width}}".format(v='"' + val + '"', width=col_width)
+            else:
+                return "{v: <{width}}".format(v="'" + val + "'", width=col_width)
+        # Safe to not quote
+        # Numbers must not be quoted
+        else:
+            return "{v: <{width}}".format(v=val, width=col_width)
+
+    def _requires_newline(self, val):
+        # Technically the space can be a tab too
+        if "\n" in val or ("' " in val and '" ' in val):
+            return True
+        else:
+            return False
+
+    def _requires_quote(self, val):
+        # Technically the words should be case-insensitive
+        if (
+            " " in val
+            or "'" in val
+            or '"' in val
+            or val[0] in ["_", "#", "$", "[", "]", ";"]
+            or val.startswith("data_")
+            or val.startswith("save_")
+            or val in ["loop_", "stop_", "global_"]
+        ):
+            return True
+        else:
+            return False
+
+    def _get_label_asym_id(self, entity_id):
+        # Convert a positive integer into a chain ID
+        # Goes A to Z, then AA to ZA, AB to ZB etc
+        # This is in line with existing mmCIF files
+        div = entity_id
+        out = ""
+        while div > 0:
+            mod = (div - 1) % 26
+            out += chr(65 + mod)
+            div = int((div - mod) / 26)
+        return out
+
+    def _save_structure(self, out_file, select, preserve_atom_numbering):
+        atom_dict = defaultdict(list)
+
+        for model in self.structure.get_list():
+            if not select.accept_model(model):
+                continue
+            # mmCIF files with a single model have it specified as model 1
+            if model.serial_num == 0:
+                model_n = "1"
+            else:
+                model_n = str(model.serial_num)
+            # This is used to write label_entity_id and label_asym_id and
+            # increments from 1, changing with each molecule
+            entity_id = 0
+            if not preserve_atom_numbering:
+                atom_number = 1
+            for chain in model.get_list():
+                if not select.accept_chain(chain):
+                    continue
+                chain_id = chain.get_id()
+                if chain_id == " ":
+                    chain_id = "."
+                # This is used to write label_seq_id and increments from 1,
+                # remaining blank for hetero residues
+                residue_number = 1
+                prev_residue_type = ""
+                prev_resname = ""
+                for residue in chain.get_unpacked_list():
+                    if not select.accept_residue(residue):
+                        continue
+                    hetfield, resseq, icode = residue.get_id()
+                    if hetfield == " ":
+                        residue_type = "ATOM"
+                        label_seq_id = str(residue_number)
+                        residue_number += 1
+                    else:
+                        residue_type = "HETATM"
+                        label_seq_id = "."
+                    resseq = str(resseq)
+                    if icode == " ":
+                        icode = "?"
+                    resname = residue.get_resname()
+                    # Check if the molecule changes within the chain
+                    # This will always increment for the first residue in a
+                    # chain due to the starting values above
+                    if residue_type != prev_residue_type or (
+                        residue_type == "HETATM" and resname != prev_resname
+                    ):
+                        entity_id += 1
+                    prev_residue_type = residue_type
+                    prev_resname = resname
+                    label_asym_id = self._get_label_asym_id(entity_id)
+                    for atom in residue.get_unpacked_list():
+                        if select.accept_atom(atom):
+                            atom_dict["_atom_site.group_PDB"].append(residue_type)
+                            if preserve_atom_numbering:
+                                atom_number = atom.get_serial_number()
+                            atom_dict["_atom_site.id"].append(str(atom_number))
+                            if not preserve_atom_numbering:
+                                atom_number += 1
+                            element = atom.element.strip()
+                            if element == "":
+                                element = "?"
+                            atom_dict["_atom_site.type_symbol"].append(element)
+                            atom_dict["_atom_site.label_atom_id"].append(
+                                atom.get_name().strip()
+                            )
+                            altloc = atom.get_altloc()
+                            if altloc == " ":
+                                altloc = "."
+                            atom_dict["_atom_site.label_alt_id"].append(altloc)
+                            atom_dict["_atom_site.label_comp_id"].append(
+                                resname.strip()
+                            )
+                            atom_dict["_atom_site.label_asym_id"].append(label_asym_id)
+                            # The entity ID should be the same for similar chains
+                            # However this is non-trivial to calculate so we write "?"
+                            atom_dict["_atom_site.label_entity_id"].append("?")
+                            atom_dict["_atom_site.label_seq_id"].append(label_seq_id)
+                            atom_dict["_atom_site.pdbx_PDB_ins_code"].append(icode)
+                            coord = atom.get_coord()
+                            atom_dict["_atom_site.Cartn_x"].append("%.3f" % coord[0])
+                            atom_dict["_atom_site.Cartn_y"].append("%.3f" % coord[1])
+                            atom_dict["_atom_site.Cartn_z"].append("%.3f" % coord[2])
+                            atom_dict["_atom_site.occupancy"].append(
+                                str(atom.get_occupancy())
+                            )
+                            atom_dict["_atom_site.B_iso_or_equiv"].append(
+                                str(atom.get_bfactor())
+                            )
+                            atom_dict["_atom_site.auth_seq_id"].append(resseq)
+                            atom_dict["_atom_site.auth_asym_id"].append(chain_id)
+                            atom_dict["_atom_site.pdbx_PDB_model_num"].append(model_n)
+
+        # Data block name is the structure ID with special characters removed
+        structure_id = self.structure.id
+        for c in ["#", "$", "'", '"', "[", "]", " ", "\t", "\n"]:
+            structure_id = structure_id.replace(c, "")
+        atom_dict["data_"] = structure_id
+
+        # Set the dictionary and write out using the generic dictionary method
+        self.dic = atom_dict
+        self._save_dict(out_file)
diff --git a/code/lib/Bio/PDB/mmtf/DefaultParser.py b/code/lib/Bio/PDB/mmtf/DefaultParser.py
new file mode 100644
index 0000000..19cdd5b
--- /dev/null
+++ b/code/lib/Bio/PDB/mmtf/DefaultParser.py
@@ -0,0 +1,235 @@
+# Copyright 2016 Anthony Bradley.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Code handle loading mmtf-python into Biopython's structures."""
+
+from Bio.PDB.StructureBuilder import StructureBuilder
+import numpy
+
+
+class StructureDecoder:
+    """Class to pass the data from mmtf-python into a Biopython data structure."""
+
+    def __init__(self):
+        """Initialize the class."""
+        self.this_type = ""
+
+    def init_structure(
+        self,
+        total_num_bonds,
+        total_num_atoms,
+        total_num_groups,
+        total_num_chains,
+        total_num_models,
+        structure_id,
+    ):
+        """Initialize the structure object.
+
+        :param total_num_bonds: the number of bonds in the structure
+        :param total_num_atoms: the number of atoms in the structure
+        :param total_num_groups: the number of groups in the structure
+        :param total_num_chains: the number of chains in the structure
+        :param total_num_models: the number of models in the structure
+        :param structure_id: the id of the structure (e.g. PDB id)
+
+        """
+        self.structure_builder = StructureBuilder()
+        self.structure_builder.init_structure(structure_id=structure_id)
+        self.chain_index_to_type_map = {}
+        self.chain_index_to_seq_map = {}
+        self.chain_index_to_description_map = {}
+        self.chain_counter = 0
+
+    def set_atom_info(
+        self,
+        atom_name,
+        serial_number,
+        alternative_location_id,
+        x,
+        y,
+        z,
+        occupancy,
+        temperature_factor,
+        element,
+        charge,
+    ):
+        """Create an atom object an set the information.
+
+        :param atom_name: the atom name, e.g. CA for this atom
+        :param serial_number: the serial id of the atom (e.g. 1)
+        :param alternative_location_id: the alternative location id for the atom, if present
+        :param x: the x coordiante of the atom
+        :param y: the y coordinate of the atom
+        :param z: the z coordinate of the atom
+        :param occupancy: the occupancy of the atom
+        :param temperature_factor: the temperature factor of the atom
+        :param element: the element of the atom, e.g. C for carbon. According to IUPAC. Calcium  is Ca
+        :param charge: the formal atomic charge of the atom
+
+        """
+        # MMTF uses "\x00" (the NUL character) to indicate to altloc, so convert
+        # that to the space required by StructureBuilder
+        if alternative_location_id == "\x00":
+            alternative_location_id = " "
+
+        # Atom_name is in twice - the full_name is with spaces
+        self.structure_builder.init_atom(
+            str(atom_name),
+            numpy.array((x, y, z), "f"),
+            temperature_factor,
+            occupancy,
+            alternative_location_id,
+            str(atom_name),
+            serial_number=serial_number,
+            element=str(element).upper(),
+        )
+
+    def set_chain_info(self, chain_id, chain_name, num_groups):
+        """Set the chain information.
+
+        :param chain_id: the asym chain id from mmCIF
+        :param chain_name: the auth chain id from mmCIF
+        :param num_groups: the number of groups this chain has
+
+        """
+        # A Bradley - chose to use chain_name (auth_id) as it complies
+        # with current Biopython. Chain_id might be better.
+        self.structure_builder.init_chain(chain_id=chain_name)
+        if self.chain_index_to_type_map[self.chain_counter] == "polymer":
+            self.this_type = " "
+        elif self.chain_index_to_type_map[self.chain_counter] == "non-polymer":
+            self.this_type = "H"
+        elif self.chain_index_to_type_map[self.chain_counter] == "water":
+            self.this_type = "W"
+        self.chain_counter += 1
+
+    def set_entity_info(self, chain_indices, sequence, description, entity_type):
+        """Set the entity level information for the structure.
+
+        :param chain_indices: the indices of the chains for this entity
+        :param sequence: the one letter code sequence for this entity
+        :param description: the description for this entity
+        :param entity_type: the entity type (polymer,non-polymer,water)
+
+        """
+        for chain_ind in chain_indices:
+            self.chain_index_to_type_map[chain_ind] = entity_type
+            self.chain_index_to_seq_map[chain_ind] = sequence
+            self.chain_index_to_description_map[chain_ind] = description
+
+    def set_group_info(
+        self,
+        group_name,
+        group_number,
+        insertion_code,
+        group_type,
+        atom_count,
+        bond_count,
+        single_letter_code,
+        sequence_index,
+        secondary_structure_type,
+    ):
+        """Set the information for a group.
+
+        :param group_name: the name of this group, e.g. LYS
+        :param group_number: the residue number of this group
+        :param insertion_code: the insertion code for this group
+        :param group_type: a string indicating the type of group (as found in the chemcomp dictionary.
+            Empty string if none available.
+        :param atom_count: the number of atoms in the group
+        :param bond_count: the number of unique bonds in the group
+        :param single_letter_code: the single letter code of the group
+        :param sequence_index: the index of this group in the sequence defined by the entity
+        :param secondary_structure_type: the type of secondary structure used
+            (types are according to DSSP and number to type mappings are defined in the specification)
+
+        """
+        # MMTF uses a NUL character to indicate a blank insertion code, but
+        # StructureBuilder expects a space instead.
+        if insertion_code == "\x00":
+            insertion_code = " "
+
+        self.structure_builder.init_seg(" ")
+        self.structure_builder.init_residue(
+            group_name, self.this_type, group_number, insertion_code
+        )
+
+    def set_model_info(self, model_id, chain_count):
+        """Set the information for a model.
+
+        :param model_id: the index for the model
+        :param chain_count: the number of chains in the model
+
+        """
+        self.structure_builder.init_model(model_id)
+
+    def set_xtal_info(self, space_group, unit_cell):
+        """Set the crystallographic information for the structure.
+
+        :param space_group: the space group name, e.g. "P 21 21 21"
+        :param unit_cell: an array of length 6 with the unit cell parameters in order: a, b, c, alpha, beta, gamma
+
+        """
+        self.structure_builder.set_symmetry(space_group, unit_cell)
+
+    def set_header_info(
+        self,
+        r_free,
+        r_work,
+        resolution,
+        title,
+        deposition_date,
+        release_date,
+        experimnetal_methods,
+    ):
+        """Set the header information.
+
+        :param r_free: the measured R-Free for the structure
+        :param r_work: the measure R-Work for the structure
+        :param resolution: the resolution of the structure
+        :param title: the title of the structure
+        :param deposition_date: the deposition date of the structure
+        :param release_date: the release date of the structure
+        :param experimnetal_methods: the list of experimental methods in the structure
+
+        """
+        pass
+
+    def set_bio_assembly_trans(
+        self, bio_assembly_index, input_chain_indices, input_transform
+    ):
+        """Set the Bioassembly transformation information. A single bioassembly can have multiple transforms.
+
+        :param bio_assembly_index: the integer index of the bioassembly
+        :param input_chain_indices: the list of integer indices for the chains of this bioassembly
+        :param input_transform: the list of doubles for  the transform of this bioassmbly transform.
+
+        """
+        pass
+
+    def finalize_structure(self):
+        """Any functions needed to cleanup the structure."""
+        pass
+
+    def set_group_bond(self, atom_index_one, atom_index_two, bond_order):
+        """Add bonds within a group.
+
+        :param atom_index_one: the integer atom index (in the group) of the first partner in the bond
+        :param atom_index_two: the integer atom index (in the group) of the second partner in the bond
+        :param bond_order: the integer bond order
+
+        """
+        pass
+
+    def set_inter_group_bond(self, atom_index_one, atom_index_two, bond_order):
+        """Add bonds between groups.
+
+        :param atom_index_one: the integer atom index (in the structure) of the first partner in the bond
+        :param atom_index_two: the integer atom index (in the structure) of the second partner in the bond
+        :param bond_order: the bond order
+
+        """
+        pass
diff --git a/code/lib/Bio/PDB/mmtf/__init__.py b/code/lib/Bio/PDB/mmtf/__init__.py
new file mode 100644
index 0000000..f09743d
--- /dev/null
+++ b/code/lib/Bio/PDB/mmtf/__init__.py
@@ -0,0 +1,50 @@
+# Copyright 2016 Anthony Bradley.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Support for loading 3D structures stored in MMTF files."""
+try:
+    from mmtf import fetch, parse
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Install mmtf to use Bio.PDB.mmtf (e.g. pip install mmtf-python)"
+    ) from None
+from Bio.PDB.mmtf.DefaultParser import StructureDecoder
+from .mmtfio import MMTFIO
+
+
+def get_from_decoded(decoder):
+    """Return structure from decoder."""
+    structure_decoder = StructureDecoder()
+    decoder.pass_data_on(structure_decoder)
+    return structure_decoder.structure_builder.get_structure()
+
+
+class MMTFParser:
+    """Class to get a Biopython structure from a URL or a filename."""
+
+    @staticmethod
+    def get_structure_from_url(pdb_id):
+        """Get a structure from a URL - given a PDB id.
+
+        :param pdb_id: the input PDB id
+        :return: the structure
+
+        """
+        decoder = fetch(pdb_id)
+        return get_from_decoded(decoder)
+
+    @staticmethod
+    def get_structure(file_path):
+        """Get a structure from a file - given a file path.
+
+        :param file_path: the input file path
+        :return: the structure
+
+        """
+        decoder = parse(file_path)
+        return get_from_decoded(decoder)
diff --git a/code/lib/Bio/PDB/mmtf/__pycache__/DefaultParser.cpython-37.pyc b/code/lib/Bio/PDB/mmtf/__pycache__/DefaultParser.cpython-37.pyc
new file mode 100644
index 0000000..6f8887d
Binary files /dev/null and b/code/lib/Bio/PDB/mmtf/__pycache__/DefaultParser.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/mmtf/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/PDB/mmtf/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..6a55618
Binary files /dev/null and b/code/lib/Bio/PDB/mmtf/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/mmtf/__pycache__/mmtfio.cpython-37.pyc b/code/lib/Bio/PDB/mmtf/__pycache__/mmtfio.cpython-37.pyc
new file mode 100644
index 0000000..16cf569
Binary files /dev/null and b/code/lib/Bio/PDB/mmtf/__pycache__/mmtfio.cpython-37.pyc differ
diff --git a/code/lib/Bio/PDB/mmtf/mmtfio.py b/code/lib/Bio/PDB/mmtf/mmtfio.py
new file mode 100644
index 0000000..28ed2e7
--- /dev/null
+++ b/code/lib/Bio/PDB/mmtf/mmtfio.py
@@ -0,0 +1,257 @@
+# Copyright 2019 Joe Greener. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Write a MMTF file."""
+
+import itertools
+from collections import defaultdict
+from string import ascii_uppercase
+from Bio.PDB.StructureBuilder import StructureBuilder
+from Bio.PDB.PDBIO import Select, StructureIO
+from mmtf.api.mmtf_writer import MMTFEncoder
+from Bio.SeqUtils import seq1
+from Bio.Data.SCOPData import protein_letters_3to1
+
+_select = Select()
+
+
+class MMTFIO(StructureIO):
+    """Write a Structure object as a MMTF file.
+
+    Examples
+    --------
+        >>> from Bio.PDB import MMCIFParser
+        >>> from Bio.PDB.mmtf import MMTFIO
+        >>> parser = MMCIFParser()
+        >>> structure = parser.get_structure("1a8o", "PDB/1A8O.cif")
+        >>> io=MMTFIO()
+        >>> io.set_structure(structure)
+        >>> io.save("bio-pdb-mmtf-out.mmtf")
+        >>> import os
+        >>> os.remove("bio-pdb-mmtf-out.mmtf")  # tidy up
+
+    """
+
+    def __init__(self):
+        """Initialise."""
+        pass
+
+    def save(self, filepath, select=_select):
+        """Save the structure to a file.
+
+        :param filepath: output file
+        :type filepath: string
+
+        :param select: selects which entities will be written.
+        :type select: object
+
+        Typically select is a subclass of L{Select}, it should
+        have the following methods:
+
+         - accept_model(model)
+         - accept_chain(chain)
+         - accept_residue(residue)
+         - accept_atom(atom)
+
+        These methods should return 1 if the entity is to be
+        written out, 0 otherwise.
+        """
+        # Similar to the PDBIO save method, we check if the filepath is a
+        # string for a filepath or an open file handle
+        if not isinstance(filepath, str):
+            raise ValueError(
+                "Writing to a file handle is not supported for MMTF, filepath must be a string"
+            )
+        if hasattr(self, "structure"):
+            self._save_structure(filepath, select)
+        else:
+            raise ValueError("Use set_structure to set a structure to write out")
+
+    def _chain_id_iterator(self):
+        """Label chains sequentially: A, B, ..., Z, AA, AB etc."""
+        for size in itertools.count(1):
+            for s in itertools.product(ascii_uppercase, repeat=size):
+                yield "".join(s)
+
+    def _save_structure(self, filepath, select):
+        count_models, count_chains, count_groups, count_atoms = 0, 0, 0, 0
+
+        # If atom serials are missing, renumber atoms starting from 1
+        atom_serials = [a.serial_number for a in self.structure.get_atoms()]
+        renumber_atoms = None in atom_serials
+
+        encoder = MMTFEncoder()
+        # The counts are set to 0 here and changed later once we have the values
+        encoder.init_structure(
+            total_num_bonds=0,
+            total_num_atoms=0,
+            total_num_groups=0,
+            total_num_chains=0,
+            total_num_models=0,
+            structure_id=self.structure.id,
+        )
+
+        encoder.set_xtal_info(space_group="", unit_cell=None)
+
+        # The header information is missing for some structure objects
+        header_dict = defaultdict(str, self.structure.header)
+        if header_dict["resolution"] == "":
+            header_dict["resolution"] = None
+        if header_dict["structure_method"] == "":
+            header_dict["structure_method"] = []
+        else:
+            header_dict["structure_method"] = [header_dict["structure_method"]]
+
+        encoder.set_header_info(
+            r_free=None,
+            r_work=None,
+            resolution=header_dict["resolution"],
+            title=header_dict["name"],
+            deposition_date=header_dict["deposition_date"],
+            release_date=header_dict["release_date"],
+            experimental_methods=header_dict["structure_method"],
+        )
+
+        # Tracks values to replace them at the end
+        chains_per_model = []
+        groups_per_chain = []
+
+        for mi, model in enumerate(self.structure.get_models()):
+            if not select.accept_model(model):
+                continue
+
+            chain_id_iterator = self._chain_id_iterator()
+
+            count_models += 1
+            encoder.set_model_info(
+                model_id=mi,  # According to mmtf-python this is meaningless
+                chain_count=0,  # Set to 0 here and changed later
+            )
+            for chain in model.get_chains():
+                if not select.accept_chain(chain):
+                    continue
+
+                seqs = []
+                seq = ""
+                prev_residue_type = ""
+                prev_resname = ""
+                first_chain = True
+
+                for residue in chain.get_unpacked_list():
+                    if not select.accept_residue(residue):
+                        continue
+
+                    count_groups += 1
+                    hetfield, resseq, icode = residue.get_id()
+                    if hetfield == " ":
+                        residue_type = "ATOM"
+                        entity_type = "polymer"
+                    elif hetfield == "W":
+                        residue_type = "HETATM"
+                        entity_type = "water"
+                    else:
+                        residue_type = "HETATM"
+                        entity_type = "non-polymer"
+                    resname = residue.get_resname()
+
+                    # Check if the molecule changes within the chain
+                    # This will always increment for the first residue in a
+                    #  chain due to the starting values above
+                    # Checking for similar entities is non-trivial from the
+                    #  structure object so we treat each molecule as a separate
+                    #  entity
+                    if residue_type != prev_residue_type or (
+                        residue_type == "HETATM" and resname != prev_resname
+                    ):
+                        encoder.set_entity_info(
+                            chain_indices=[count_chains],
+                            sequence="",  # Set to empty here and changed later
+                            description="",
+                            entity_type=entity_type,
+                        )
+                        encoder.set_chain_info(
+                            chain_id=next(chain_id_iterator),
+                            chain_name="\x00"
+                            if len(chain.get_id().strip()) == 0
+                            else chain.get_id(),
+                            num_groups=0,  # Set to 0 here and changed later
+                        )
+                        if count_chains > 0:
+                            groups_per_chain.append(
+                                count_groups - sum(groups_per_chain) - 1
+                            )
+                        if not first_chain:
+                            seqs.append(seq)
+                        first_chain = False
+                        count_chains += 1
+                        seq = ""
+
+                    if entity_type == "polymer":
+                        seq += seq1(resname, custom_map=protein_letters_3to1)
+
+                    prev_residue_type = residue_type
+                    prev_resname = resname
+
+                    encoder.set_group_info(
+                        group_name=resname,
+                        group_number=residue.id[1],
+                        insertion_code="\x00"
+                        if residue.id[2] == " "
+                        else residue.id[2],
+                        group_type="",  # Value in the chemcomp dictionary, which is unknown here
+                        atom_count=sum(
+                            1
+                            for a in residue.get_unpacked_list()
+                            if select.accept_atom(a)
+                        ),
+                        bond_count=0,
+                        single_letter_code=seq1(
+                            resname, custom_map=protein_letters_3to1
+                        ),
+                        sequence_index=len(seq) - 1 if entity_type == "polymer" else -1,
+                        secondary_structure_type=-1,
+                    )
+
+                    for atom in residue.get_unpacked_list():
+                        if select.accept_atom(atom):
+                            count_atoms += 1
+                            encoder.set_atom_info(
+                                atom_name=atom.name,
+                                serial_number=count_atoms
+                                if renumber_atoms
+                                else atom.serial_number,
+                                alternative_location_id="\x00"
+                                if atom.altloc == " "
+                                else atom.altloc,
+                                x=atom.coord[0],
+                                y=atom.coord[1],
+                                z=atom.coord[2],
+                                occupancy=atom.occupancy,
+                                temperature_factor=atom.bfactor,
+                                element=atom.element,
+                                charge=0,
+                            )
+
+                seqs.append(seq)
+                # Now that we have the sequences, edit the entities to add them
+                start_ind = len(encoder.entity_list) - len(seqs)
+                for i, seq in enumerate(seqs):
+                    encoder.entity_list[start_ind + i]["sequence"] = seq
+
+            chains_per_model.append(count_chains - sum(chains_per_model))
+
+        groups_per_chain.append(count_groups - sum(groups_per_chain))
+
+        encoder.chains_per_model = chains_per_model
+        encoder.groups_per_chain = groups_per_chain
+        encoder.num_atoms = count_atoms
+        encoder.num_groups = count_groups
+        encoder.num_chains = count_chains
+        encoder.num_models = count_models
+
+        encoder.finalize_structure()
+        encoder.write_file(filepath)
diff --git a/code/lib/Bio/PDB/parse_pdb_header.py b/code/lib/Bio/PDB/parse_pdb_header.py
new file mode 100644
index 0000000..4827b7f
--- /dev/null
+++ b/code/lib/Bio/PDB/parse_pdb_header.py
@@ -0,0 +1,328 @@
+#!/usr/bin/env python
+# Copyright 2004 Kristian Rother.
+# Revisions copyright 2004 Thomas Hamelryck.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Parse header of PDB files into a python dictionary.
+
+Emerged from the Columba database project www.columba-db.de, original author
+Kristian Rother.
+"""
+
+
+import re
+
+from Bio import File
+
+
+def _get_journal(inl):
+    # JRNL        AUTH   L.CHEN,M.DOI,F.S.MATHEWS,A.Y.CHISTOSERDOV,           2BBK   7
+    journal = ""
+    for l in inl:
+        if re.search(r"\AJRNL", l):
+            journal += l[19:72].lower()
+    journal = re.sub(r"\s\s+", " ", journal)
+    return journal
+
+
+def _get_references(inl):
+    # REMARK   1 REFERENCE 1                                                  1CSE  11
+    # REMARK   1  AUTH   W.BODE,E.PAPAMOKOS,D.MUSIL                           1CSE  12
+    references = []
+    actref = ""
+    for l in inl:
+        if re.search(r"\AREMARK   1", l):
+            if re.search(r"\AREMARK   1 REFERENCE", l):
+                if actref != "":
+                    actref = re.sub(r"\s\s+", " ", actref)
+                    if actref != " ":
+                        references.append(actref)
+                    actref = ""
+            else:
+                actref += l[19:72].lower()
+
+    if actref != "":
+        actref = re.sub(r"\s\s+", " ", actref)
+        if actref != " ":
+            references.append(actref)
+    return references
+
+
+# bring dates to format: 1909-01-08
+def _format_date(pdb_date):
+    """Convert dates from DD-Mon-YY to YYYY-MM-DD format (PRIVATE)."""
+    date = ""
+    year = int(pdb_date[7:])
+    if year < 50:
+        century = 2000
+    else:
+        century = 1900
+    date = str(century + year) + "-"
+    all_months = [
+        "xxx",
+        "Jan",
+        "Feb",
+        "Mar",
+        "Apr",
+        "May",
+        "Jun",
+        "Jul",
+        "Aug",
+        "Sep",
+        "Oct",
+        "Nov",
+        "Dec",
+    ]
+    month = str(all_months.index(pdb_date[3:6]))
+    if len(month) == 1:
+        month = "0" + month
+    date = date + month + "-" + pdb_date[:2]
+    return date
+
+
+def _chop_end_codes(line):
+    """Chops lines ending with  '     1CSA  14' and the like (PRIVATE)."""
+    return re.sub(r"\s\s\s\s+[\w]{4}.\s+\d*\Z", "", line)
+
+
+def _chop_end_misc(line):
+    """Chops lines ending with  '     14-JUL-97  1CSA' and the like (PRIVATE)."""
+    return re.sub(r"\s+\d\d-\w\w\w-\d\d\s+[1-9][0-9A-Z]{3}\s*\Z", "", line)
+
+
+def _nice_case(line):
+    """Make A Lowercase String With Capitals (PRIVATE)."""
+    line_lower = line.lower()
+    s = ""
+    i = 0
+    nextCap = 1
+    while i < len(line_lower):
+        c = line_lower[i]
+        if c >= "a" and c <= "z" and nextCap:
+            c = c.upper()
+            nextCap = 0
+        elif c in " .,;:\t-_":
+            nextCap = 1
+        s += c
+        i += 1
+    return s
+
+
+def parse_pdb_header(infile):
+    """Return the header lines of a pdb file as a dictionary.
+
+    Dictionary keys are: head, deposition_date, release_date, structure_method,
+    resolution, structure_reference, journal_reference, author and
+    compound.
+    """
+    header = []
+    with File.as_handle(infile) as f:
+        for l in f:
+            record_type = l[0:6]
+            if record_type in ("ATOM  ", "HETATM", "MODEL "):
+                break
+            else:
+                header.append(l)
+    return _parse_pdb_header_list(header)
+
+
+def _parse_remark_465(line):
+    """Parse missing residue remarks.
+
+    Returns a dictionary describing the missing residue.
+    The specification for REMARK 465 at
+    http://www.wwpdb.org/documentation/file-format-content/format33/remarks2.html#REMARK%20465
+    only gives templates, but does not say they have to be followed.
+    So we assume that not all pdb-files with a REMARK 465 can be understood.
+
+    Returns a dictionary with the following keys:
+    "model", "res_name", "chain", "ssseq", "insertion"
+    """
+    if line:
+        # Note that line has been stripped.
+        assert line[0] != " " and line[-1] not in "\n ", "line has to be stripped"
+    pattern = re.compile(
+        r"""
+        (\d+\s[\sA-Z][\sA-Z][A-Z] |   # Either model number + residue name
+            [A-Z]{1,3})               # Or only residue name with 1 (RNA) to 3 letters
+        \s ([A-Za-z0-9])              # A single character chain
+        \s+(-?\d+[A-Za-z]?)$          # Residue number: A digit followed by an optional
+                                      # insertion code (Hetero-flags make no sense in
+                                      # context with missing res)
+        """,
+        re.VERBOSE,
+    )
+    match = pattern.match(line)
+    if match is None:
+        return None
+    residue = {}
+    if " " in match.group(1):
+        model, residue["res_name"] = match.group(1).split()
+        residue["model"] = int(model)
+    else:
+        residue["model"] = None
+        residue["res_name"] = match.group(1)
+    residue["chain"] = match.group(2)
+    try:
+        residue["ssseq"] = int(match.group(3))
+    except ValueError:
+        residue["insertion"] = match.group(3)[-1]
+        residue["ssseq"] = int(match.group(3)[:-1])
+    else:
+        residue["insertion"] = None
+    return residue
+
+
+def _parse_pdb_header_list(header):
+    # database fields
+    pdbh_dict = {
+        "name": "",
+        "head": "",
+        "idcode": "",
+        "deposition_date": "1909-01-08",
+        "release_date": "1909-01-08",
+        "structure_method": "unknown",
+        "resolution": None,
+        "structure_reference": "unknown",
+        "journal_reference": "unknown",
+        "author": "",
+        "compound": {"1": {"misc": ""}},
+        "source": {"1": {"misc": ""}},
+        "has_missing_residues": False,
+        "missing_residues": [],
+    }
+
+    pdbh_dict["structure_reference"] = _get_references(header)
+    pdbh_dict["journal_reference"] = _get_journal(header)
+    comp_molid = "1"
+    last_comp_key = "misc"
+    last_src_key = "misc"
+
+    for hh in header:
+        h = re.sub(r"[\s\n\r]*\Z", "", hh)  # chop linebreaks off
+        # key=re.sub("\s.+\s*","",h)
+        key = h[:6].strip()
+        # tail=re.sub("\A\w+\s+\d*\s*","",h)
+        tail = h[10:].strip()
+        # print("%s:%s" % (key, tail)
+
+        # From here, all the keys from the header are being parsed
+        if key == "TITLE":
+            name = _chop_end_codes(tail).lower()
+            pdbh_dict["name"] = " ".join([pdbh_dict["name"], name]).strip()
+        elif key == "HEADER":
+            rr = re.search(r"\d\d-\w\w\w-\d\d", tail)
+            if rr is not None:
+                pdbh_dict["deposition_date"] = _format_date(_nice_case(rr.group()))
+            rr = re.search(r"\s+([1-9][0-9A-Z]{3})\s*\Z", tail)
+            if rr is not None:
+                pdbh_dict["idcode"] = rr.group(1)
+            head = _chop_end_misc(tail).lower()
+            pdbh_dict["head"] = head
+        elif key == "COMPND":
+            tt = re.sub(r"\;\s*\Z", "", _chop_end_codes(tail)).lower()
+            # look for E.C. numbers in COMPND lines
+            rec = re.search(r"\d+\.\d+\.\d+\.\d+", tt)
+            if rec:
+                pdbh_dict["compound"][comp_molid]["ec_number"] = rec.group()
+                tt = re.sub(r"\((e\.c\.)*\d+\.\d+\.\d+\.\d+\)", "", tt)
+            tok = tt.split(":")
+            if len(tok) >= 2:
+                ckey = tok[0]
+                cval = re.sub(r"\A\s*", "", tok[1])
+                if ckey == "mol_id":
+                    pdbh_dict["compound"][cval] = {"misc": ""}
+                    comp_molid = cval
+                    last_comp_key = "misc"
+                else:
+                    pdbh_dict["compound"][comp_molid][ckey] = cval
+                    last_comp_key = ckey
+            else:
+                pdbh_dict["compound"][comp_molid][last_comp_key] += tok[0] + " "
+        elif key == "SOURCE":
+            tt = re.sub(r"\;\s*\Z", "", _chop_end_codes(tail)).lower()
+            tok = tt.split(":")
+            # print(tok)
+            if len(tok) >= 2:
+                ckey = tok[0]
+                cval = re.sub(r"\A\s*", "", tok[1])
+                if ckey == "mol_id":
+                    pdbh_dict["source"][cval] = {"misc": ""}
+                    comp_molid = cval
+                    last_src_key = "misc"
+                else:
+                    pdbh_dict["source"][comp_molid][ckey] = cval
+                    last_src_key = ckey
+            else:
+                pdbh_dict["source"][comp_molid][last_src_key] += tok[0] + " "
+        elif key == "KEYWDS":
+            kwd = _chop_end_codes(tail).lower()
+            if "keywords" in pdbh_dict:
+                pdbh_dict["keywords"] += " " + kwd
+            else:
+                pdbh_dict["keywords"] = kwd
+        elif key == "EXPDTA":
+            expd = _chop_end_codes(tail)
+            # chop junk at end of lines for some structures
+            expd = re.sub(r"\s\s\s\s\s\s\s.*\Z", "", expd)
+            # if re.search('\Anmr',expd,re.IGNORECASE): expd='nmr'
+            # if re.search('x-ray diffraction',expd,re.IGNORECASE): expd='x-ray diffraction'
+            pdbh_dict["structure_method"] = expd.lower()
+        elif key == "CAVEAT":
+            # make Annotation entries out of these!!!
+            pass
+        elif key == "REVDAT":
+            rr = re.search(r"\d\d-\w\w\w-\d\d", tail)
+            if rr is not None:
+                pdbh_dict["release_date"] = _format_date(_nice_case(rr.group()))
+        elif key == "JRNL":
+            # print("%s:%s" % (key, tail))
+            if "journal" in pdbh_dict:
+                pdbh_dict["journal"] += tail
+            else:
+                pdbh_dict["journal"] = tail
+        elif key == "AUTHOR":
+            auth = _nice_case(_chop_end_codes(tail))
+            if "author" in pdbh_dict:
+                pdbh_dict["author"] += auth
+            else:
+                pdbh_dict["author"] = auth
+        elif key == "REMARK":
+            if re.search("REMARK   2 RESOLUTION.", hh):
+                r = _chop_end_codes(re.sub("REMARK   2 RESOLUTION.", "", hh))
+                r = re.sub(r"\s+ANGSTROM.*", "", r)
+                try:
+                    pdbh_dict["resolution"] = float(r)
+                except ValueError:
+                    # print('nonstandard resolution %r' % r)
+                    pdbh_dict["resolution"] = None
+            elif hh.startswith("REMARK 465"):
+                if tail:
+                    pdbh_dict["has_missing_residues"] = True
+                    missing_res_info = _parse_remark_465(tail)
+                    if missing_res_info:
+                        pdbh_dict["missing_residues"].append(missing_res_info)
+            elif hh.startswith("REMARK  99 ASTRAL"):
+                if tail:
+                    remark_99_keyval = tail.replace("ASTRAL ", "").split(": ")
+                    if type(remark_99_keyval) == list and len(remark_99_keyval) == 2:
+                        if "astral" not in pdbh_dict:
+                            pdbh_dict["astral"] = {
+                                remark_99_keyval[0]: remark_99_keyval[1]
+                            }
+                        else:
+                            pdbh_dict["astral"][remark_99_keyval[0]] = remark_99_keyval[
+                                1
+                            ]
+        else:
+            # print(key)
+            pass
+    if pdbh_dict["structure_method"] == "unknown":
+        res = pdbh_dict["resolution"]
+        if res is not None and res > 0.0:
+            pdbh_dict["structure_method"] = "x-ray diffraction"
+    return pdbh_dict
diff --git a/code/lib/Bio/PDB/vectors.py b/code/lib/Bio/PDB/vectors.py
new file mode 100644
index 0000000..51a3c51
--- /dev/null
+++ b/code/lib/Bio/PDB/vectors.py
@@ -0,0 +1,663 @@
+# Copyright (C) 2004, Thomas Hamelryck (thamelry@binf.ku.dk)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Vector class, including rotation-related functions."""
+
+
+import numpy  # type: ignore
+from typing import Tuple, Optional
+
+
+def m2rotaxis(m):
+    """Return angles, axis pair that corresponds to rotation matrix m.
+
+    The case where ``m`` is the identity matrix corresponds to a singularity
+    where any rotation axis is valid. In that case, ``Vector([1, 0, 0])``,
+    is returned.
+    """
+    eps = 1e-5
+
+    # Check for singularities a la http://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToAngle/
+    if (
+        abs(m[0, 1] - m[1, 0]) < eps
+        and abs(m[0, 2] - m[2, 0]) < eps
+        and abs(m[1, 2] - m[2, 1]) < eps
+    ):
+        # Singularity encountered. Check if its 0 or 180 deg
+        if (
+            abs(m[0, 1] + m[1, 0]) < eps
+            and abs(m[0, 2] + m[2, 0]) < eps
+            and abs(m[1, 2] + m[2, 1]) < eps
+            and abs(m[0, 0] + m[1, 1] + m[2, 2] - 3) < eps
+        ):
+            angle = 0
+        else:
+            angle = numpy.pi
+    else:
+        # Angle always between 0 and pi
+        # Sense of rotation is defined by axis orientation
+        t = 0.5 * (numpy.trace(m) - 1)
+        t = max(-1, t)
+        t = min(1, t)
+        angle = numpy.arccos(t)
+
+    if angle < 1e-15:
+        # Angle is 0
+        return 0.0, Vector(1, 0, 0)
+    elif angle < numpy.pi:
+        # Angle is smaller than pi
+        x = m[2, 1] - m[1, 2]
+        y = m[0, 2] - m[2, 0]
+        z = m[1, 0] - m[0, 1]
+        axis = Vector(x, y, z)
+        axis.normalize()
+        return angle, axis
+    else:
+        # Angle is pi - special case!
+        m00 = m[0, 0]
+        m11 = m[1, 1]
+        m22 = m[2, 2]
+        if m00 > m11 and m00 > m22:
+            x = numpy.sqrt(m00 - m11 - m22 + 0.5)
+            y = m[0, 1] / (2 * x)
+            z = m[0, 2] / (2 * x)
+        elif m11 > m00 and m11 > m22:
+            y = numpy.sqrt(m11 - m00 - m22 + 0.5)
+            x = m[0, 1] / (2 * y)
+            z = m[1, 2] / (2 * y)
+        else:
+            z = numpy.sqrt(m22 - m00 - m11 + 0.5)
+            x = m[0, 2] / (2 * z)
+            y = m[1, 2] / (2 * z)
+        axis = Vector(x, y, z)
+        axis.normalize()
+        return numpy.pi, axis
+
+
+def vector_to_axis(line, point):
+    """Vector to axis method.
+
+    Return the vector between a point and
+    the closest point on a line (ie. the perpendicular
+    projection of the point on the line).
+
+    :type line: L{Vector}
+    :param line: vector defining a line
+
+    :type point: L{Vector}
+    :param point: vector defining the point
+    """
+    line = line.normalized()
+    np = point.norm()
+    angle = line.angle(point)
+    return point - line ** (np * numpy.cos(angle))
+
+
+def rotaxis2m(theta, vector):
+    """Calculate left multiplying rotation matrix.
+
+    Calculate a left multiplying rotation matrix that rotates
+    theta rad around vector.
+
+    :type theta: float
+    :param theta: the rotation angle
+
+    :type vector: L{Vector}
+    :param vector: the rotation axis
+
+    :return: The rotation matrix, a 3x3 Numeric array.
+
+    Examples
+    --------
+    >>> from numpy import pi
+    >>> from Bio.PDB.vectors import rotaxis2m
+    >>> from Bio.PDB.vectors import Vector
+    >>> m = rotaxis2m(pi, Vector(1, 0, 0))
+    >>> Vector(1, 2, 3).left_multiply(m)
+    <Vector 1.00, -2.00, -3.00>
+
+    """
+    vector = vector.normalized()
+    c = numpy.cos(theta)
+    s = numpy.sin(theta)
+    t = 1 - c
+    x, y, z = vector.get_array()
+    rot = numpy.zeros((3, 3))
+    # 1st row
+    rot[0, 0] = t * x * x + c
+    rot[0, 1] = t * x * y - s * z
+    rot[0, 2] = t * x * z + s * y
+    # 2nd row
+    rot[1, 0] = t * x * y + s * z
+    rot[1, 1] = t * y * y + c
+    rot[1, 2] = t * y * z - s * x
+    # 3rd row
+    rot[2, 0] = t * x * z - s * y
+    rot[2, 1] = t * y * z + s * x
+    rot[2, 2] = t * z * z + c
+    return rot
+
+
+rotaxis = rotaxis2m
+
+
+def refmat(p, q):
+    """Return a (left multiplying) matrix that mirrors p onto q.
+
+    :type p,q: L{Vector}
+    :return: The mirror operation, a 3x3 Numeric array.
+
+    Examples
+    --------
+    >>> from Bio.PDB.vectors import refmat
+    >>> p, q = Vector(1, 2, 3), Vector(2, 3, 5)
+    >>> mirror = refmat(p, q)
+    >>> qq = p.left_multiply(mirror)
+    >>> print(q)
+    <Vector 2.00, 3.00, 5.00>
+    >>> print(qq)
+    <Vector 1.21, 1.82, 3.03>
+
+    """
+    p = p.normalized()
+    q = q.normalized()
+    if (p - q).norm() < 1e-5:
+        return numpy.identity(3)
+    pq = p - q
+    pq.normalize()
+    b = pq.get_array()
+    b.shape = (3, 1)
+    i = numpy.identity(3)
+    ref = i - 2 * numpy.dot(b, numpy.transpose(b))
+    return ref
+
+
+def rotmat(p, q):
+    """Return a (left multiplying) matrix that rotates p onto q.
+
+    :param p: moving vector
+    :type p: L{Vector}
+
+    :param q: fixed vector
+    :type q: L{Vector}
+
+    :return: rotation matrix that rotates p onto q
+    :rtype: 3x3 Numeric array
+
+    Examples
+    --------
+    >>> from Bio.PDB.vectors import rotmat
+    >>> p, q = Vector(1, 2, 3), Vector(2, 3, 5)
+    >>> r = rotmat(p, q)
+    >>> print(q)
+    <Vector 2.00, 3.00, 5.00>
+    >>> print(p)
+    <Vector 1.00, 2.00, 3.00>
+    >>> p.left_multiply(r)
+    <Vector 1.21, 1.82, 3.03>
+
+    """
+    rot = numpy.dot(refmat(q, -p), refmat(p, -p))
+    return rot
+
+
+def calc_angle(v1, v2, v3):
+    """Calculate angle method.
+
+    Calculate the angle between 3 vectors
+    representing 3 connected points.
+
+    :param v1, v2, v3: the tree points that define the angle
+    :type v1, v2, v3: L{Vector}
+
+    :return: angle
+    :rtype: float
+    """
+    v1 = v1 - v2
+    v3 = v3 - v2
+    return v1.angle(v3)
+
+
+def calc_dihedral(v1, v2, v3, v4):
+    """Calculate dihedral angle method.
+
+    Calculate the dihedral angle between 4 vectors
+    representing 4 connected points. The angle is in
+    ]-pi, pi].
+
+    :param v1, v2, v3, v4: the four points that define the dihedral angle
+    :type v1, v2, v3, v4: L{Vector}
+    """
+    ab = v1 - v2
+    cb = v3 - v2
+    db = v4 - v3
+    u = ab ** cb
+    v = db ** cb
+    w = u ** v
+    angle = u.angle(v)
+    # Determine sign of angle
+    try:
+        if cb.angle(w) > 0.001:
+            angle = -angle
+    except ZeroDivisionError:
+        # dihedral=pi
+        pass
+    return angle
+
+
+class Vector:
+    """3D vector."""
+
+    def __init__(self, x, y=None, z=None):
+        """Initialize the class."""
+        if y is None and z is None:
+            # Array, list, tuple...
+            if len(x) != 3:
+                raise ValueError("Vector: x is not a list/tuple/array of 3 numbers")
+            self._ar = numpy.array(x, "d")
+        else:
+            # Three numbers
+            self._ar = numpy.array((x, y, z), "d")
+
+    def __repr__(self):
+        """Return vector 3D coordinates."""
+        x, y, z = self._ar
+        return "<Vector %.2f, %.2f, %.2f>" % (x, y, z)
+
+    def __neg__(self):
+        """Return Vector(-x, -y, -z)."""
+        a = -self._ar
+        return Vector(a)
+
+    def __add__(self, other):
+        """Return Vector+other Vector or scalar."""
+        if isinstance(other, Vector):
+            a = self._ar + other._ar
+        else:
+            a = self._ar + numpy.array(other)
+        return Vector(a)
+
+    def __sub__(self, other):
+        """Return Vector-other Vector or scalar."""
+        if isinstance(other, Vector):
+            a = self._ar - other._ar
+        else:
+            a = self._ar - numpy.array(other)
+        return Vector(a)
+
+    def __mul__(self, other):
+        """Return Vector.Vector (dot product)."""
+        return sum(self._ar * other._ar)
+
+    def __truediv__(self, x):
+        """Return Vector(coords/a)."""
+        a = self._ar / numpy.array(x)
+        return Vector(a)
+
+    def __pow__(self, other):
+        """Return VectorxVector (cross product) or Vectorxscalar."""
+        if isinstance(other, Vector):
+            a, b, c = self._ar
+            d, e, f = other._ar
+            c1 = numpy.linalg.det(numpy.array(((b, c), (e, f))))
+            c2 = -numpy.linalg.det(numpy.array(((a, c), (d, f))))
+            c3 = numpy.linalg.det(numpy.array(((a, b), (d, e))))
+            return Vector(c1, c2, c3)
+        else:
+            a = self._ar * numpy.array(other)
+            return Vector(a)
+
+    def __getitem__(self, i):
+        """Return value of array index i."""
+        return self._ar[i]
+
+    def __setitem__(self, i, value):
+        """Assign values to array index i."""
+        self._ar[i] = value
+
+    def __contains__(self, i):
+        """Validate if i is in array."""
+        return i in self._ar
+
+    def norm(self):
+        """Return vector norm."""
+        return numpy.sqrt(sum(self._ar * self._ar))
+
+    def normsq(self):
+        """Return square of vector norm."""
+        return abs(sum(self._ar * self._ar))
+
+    def normalize(self):
+        """Normalize the Vector object.
+
+        Changes the state of ``self`` and doesn't return a value.
+        If you need to chain function calls or create a new object
+        use the ``normalized`` method.
+        """
+        if self.norm():
+            self._ar = self._ar / self.norm()
+
+    def normalized(self):
+        """Return a normalized copy of the Vector.
+
+        To avoid allocating new objects use the ``normalize`` method.
+        """
+        v = self.copy()
+        v.normalize()
+        return v
+
+    def angle(self, other):
+        """Return angle between two vectors."""
+        n1 = self.norm()
+        n2 = other.norm()
+        c = (self * other) / (n1 * n2)
+        # Take care of roundoff errors
+        c = min(c, 1)
+        c = max(-1, c)
+        return numpy.arccos(c)
+
+    def get_array(self):
+        """Return (a copy of) the array of coordinates."""
+        return numpy.array(self._ar)
+
+    def left_multiply(self, matrix):
+        """Return Vector=Matrix x Vector."""
+        a = numpy.dot(matrix, self._ar)
+        return Vector(a)
+
+    def right_multiply(self, matrix):
+        """Return Vector=Vector x Matrix."""
+        a = numpy.dot(self._ar, matrix)
+        return Vector(a)
+
+    def copy(self):
+        """Return a deep copy of the Vector."""
+        return Vector(self._ar)
+
+
+"""Homogeneous matrix geometry routines.
+
+Rotation, translation, scale, and coordinate transformations.
+
+Robert T. Miller 2019
+"""
+
+
+def homog_rot_mtx(angle_rads: float, axis: str) -> numpy.array:
+    """Generate a 4x4 single-axis numpy rotation matrix.
+
+    :param float angle_rads: the desired rotation angle in radians
+    :param char axis: character specifying the rotation axis
+    """
+    cosang = numpy.cos(angle_rads)
+    sinang = numpy.sin(angle_rads)
+
+    if "z" == axis:
+        return numpy.array(
+            (
+                (cosang, -sinang, 0, 0),
+                (sinang, cosang, 0, 0),
+                (0, 0, 1, 0),
+                (0, 0, 0, 1),
+            ),
+            dtype=numpy.float64,
+        )
+    elif "y" == axis:
+        return numpy.array(
+            (
+                (cosang, 0, sinang, 0),
+                (0, 1, 0, 0),
+                (-sinang, 0, cosang, 0),
+                (0, 0, 0, 1),
+            ),
+            dtype=numpy.float64,
+        )
+    else:
+        return numpy.array(
+            (
+                (1, 0, 0, 0),
+                (0, cosang, -sinang, 0),
+                (0, sinang, cosang, 0),
+                (0, 0, 0, 1),
+            ),
+            dtype=numpy.float64,
+        )
+
+
+def set_Z_homog_rot_mtx(angle_rads: float, mtx: numpy.ndarray):
+    """Update existing Z rotation matrix to new angle."""
+    cosang = numpy.cos(angle_rads)
+    sinang = numpy.sin(angle_rads)
+
+    mtx[0][0] = mtx[1][1] = cosang
+    mtx[1][0] = sinang
+    mtx[0][1] = -sinang
+
+
+def set_Y_homog_rot_mtx(angle_rads: float, mtx: numpy.ndarray):
+    """Update existing Y rotation matrix to new angle."""
+    cosang = numpy.cos(angle_rads)
+    sinang = numpy.sin(angle_rads)
+
+    mtx[0][0] = mtx[2][2] = cosang
+    mtx[0][2] = sinang
+    mtx[2][0] = -sinang
+
+
+def set_X_homog_rot_mtx(angle_rads: float, mtx: numpy.ndarray):
+    """Update existing X rotation matrix to new angle."""
+    cosang = numpy.cos(angle_rads)
+    sinang = numpy.sin(angle_rads)
+
+    mtx[1][1] = mtx[2][2] = cosang
+    mtx[2][1] = sinang
+    mtx[1][2] = -sinang
+
+
+def homog_trans_mtx(x: float, y: float, z: float) -> numpy.array:
+    """Generate a 4x4 numpy translation matrix.
+
+    :param x, y, z: translation in each axis
+    """
+    return numpy.array(
+        ((1, 0, 0, x), (0, 1, 0, y), (0, 0, 1, z), (0, 0, 0, 1)), dtype=numpy.float64
+    )
+
+
+def set_homog_trans_mtx(x: float, y: float, z: float, mtx: numpy.ndarray):
+    """Update existing translation matrix to new values."""
+    mtx[0][3] = x
+    mtx[1][3] = y
+    mtx[2][3] = z
+
+
+def homog_scale_mtx(scale: float) -> numpy.array:
+    """Generate a 4x4 numpy scaling matrix.
+
+    :param float scale: scale multiplier
+    """
+    return numpy.array(
+        [[scale, 0, 0, 0], [0, scale, 0, 0], [0, 0, scale, 0], [0, 0, 0, 1]],
+        dtype=numpy.float64,
+    )
+
+
+def _get_azimuth(x: float, y: float) -> float:
+    sign_y = -1.0 if y < 0.0 else 1.0
+    sign_x = -1.0 if x < 0.0 else 1.0
+    return (
+        numpy.arctan2(y, x)
+        if (0 != x and 0 != y)
+        else (numpy.pi / 2.0 * sign_y)  # +/-90 if X=0, Y!=0
+        if 0 != y
+        else numpy.pi
+        if sign_x < 0.0  # 180 if Y=0, X < 0
+        else 0.0  # 0 if Y=0, X >= 0
+    )
+
+
+def get_spherical_coordinates(xyz: numpy.array) -> Tuple[float, float, float]:
+    """Compute spherical coordinates (r, azimuth, polar_angle) for X,Y,Z point.
+
+    :param array xyz: column vector (3 row x 1 column numpy array)
+    :return: tuple of r, azimuth, polar_angle for input coordinate
+    """
+    r = numpy.linalg.norm(xyz)
+    if 0 == r:
+        return (0, 0, 0)
+    azimuth = _get_azimuth(xyz[0], xyz[1])
+    polar_angle = numpy.arccos(xyz[2] / r)
+
+    return (r, azimuth, polar_angle)
+
+
+gtm = numpy.identity(4, dtype=numpy.float64)
+gmrz = numpy.identity(4, dtype=numpy.float64)
+gmry = numpy.identity(4, dtype=numpy.float64)
+gmrz2 = numpy.identity(4, dtype=numpy.float64)
+
+
+def coord_space(
+    a0: numpy.ndarray, a1: numpy.ndarray, a2: numpy.ndarray, rev: bool = False
+) -> Tuple[numpy.ndarray, Optional[numpy.ndarray]]:
+    """Generate transformation matrix to coordinate space defined by 3 points.
+
+    New coordinate space will have:
+        acs[0] on XZ plane
+        acs[1] origin
+        acs[2] on +Z axis
+
+    :param numpy column array x3 acs: X,Y,Z column input coordinates x3
+    :param bool rev: if True, also return reverse transformation matrix
+        (to return from coord_space)
+    :returns: 4x4 numpy array, x2 if rev=True
+    """
+    # dbg = False
+    # if dbg:
+    #    print(a0.transpose())
+    #    print(a1.transpose())
+    #    print(a2.transpose())
+
+    # a0 = acs[0]
+    # a1 = acs[1]
+    # a2 = acs[2]
+
+    global gtm
+    global gmry
+    global gmrz, gmrz2
+
+    tm = gtm
+    mry = gmry
+    mrz = gmrz
+    mrz2 = gmrz2
+
+    # tx acs[1] to origin
+    # tm = homog_trans_mtx(-a1[0][0], -a1[1][0], -a1[2][0])
+    set_homog_trans_mtx(-a1[0], -a1[1], -a1[2], tm)
+
+    # directly translate a2 using a1
+    p = a2 - a1
+    sc = get_spherical_coordinates(p)
+
+    # if dbg:
+    #    print("p", p.transpose())
+    #    print("sc", sc)
+
+    # mrz = homog_rot_mtx(-sc[1], "z")  # rotate translated a2 -azimuth about Z
+    set_Z_homog_rot_mtx(-sc[1], mrz)
+    # mry = homog_rot_mtx(-sc[2], "y")  # rotate translated a2 -polar_angle about Y
+    set_Y_homog_rot_mtx(-sc[2], mry)
+
+    # mt completes a1-a2 on Z-axis, still need to align a0 with XZ plane
+    # mt = mry @ mrz @ tm  # python 3.5 and later
+    mt = gmry.dot(gmrz.dot(gtm))
+
+    # if dbg:
+    #    print("tm:\n", tm)
+    #    print("mrz:\n", mrz)
+    #    print("mry:\n", mry)
+    #    # print("mt ", mt)
+
+    p = mt.dot(a0)
+
+    # if dbg:
+    #    print("mt:\n", mt, "\na0:\n", a0, "\np:\n", p)
+
+    # need azimuth of translated a0
+    # sc2 = get_spherical_coordinates(p)
+    # print(sc2)
+    azimuth2 = _get_azimuth(p[0], p[1])
+
+    # rotate a0 -azimuth2 about Z to align with X
+    # mrz2 = homog_rot_mtx(-azimuth2, "z")
+    set_Z_homog_rot_mtx(-azimuth2, mrz2)
+
+    # mt = mrz2 @ mt
+    mt = gmrz2.dot(mt)
+
+    # if dbg:
+    #    print("mt:", mt, "\na0:", a0, "\np:", p)
+    #    # print(p, "\n", azimuth2, "\n", mrz2, "\n", mt)
+
+    # if dbg:
+    #    print("mt:\n", mt)
+    #    print("<<<<<<==============================")
+
+    if not rev:
+        return mt, None
+
+    # rev=True, so generate the reverse transformation
+
+    # rotate a0 theta about Z, reversing alignment with X
+    # mrz2 = homog_rot_mtx(azimuth2, "z")
+    set_Z_homog_rot_mtx(azimuth2, mrz2)
+    # rotate a2 phi about Y
+    # mry = homog_rot_mtx(sc[2], "y")
+    set_Y_homog_rot_mtx(sc[2], mry)
+    # rotate a2 theta about Z
+    # mrz = homog_rot_mtx(sc[1], "z")
+    set_Z_homog_rot_mtx(sc[1], mrz)
+    # translation matrix origin to a1
+    # tm = homog_trans_mtx(a1[0][0], a1[1][0], a1[2][0])
+    set_homog_trans_mtx(a1[0], a1[1], a1[2], tm)
+
+    # mr = tm @ mrz @ mry @ mrz2
+    mr = gtm.dot(gmrz.dot(gmry.dot(gmrz2)))
+    # mr = numpy.dot(tm, numpy.dot(mrz, numpy.dot(mry, mrz2)))
+
+    return mt, mr
+
+
+def multi_rot_Z(angle_rads: numpy.ndarray) -> numpy.ndarray:
+    """Create [entries] numpy Z rotation matrices for [entries] angles.
+
+    :param entries: int number of matrices generated.
+    :param angle_rads: numpy array of angles
+    :returns: entries x 4 x 4 homogeneous rotation matrices
+    """
+    rz = numpy.empty((angle_rads.shape[0], 4, 4))
+    rz[...] = numpy.identity(4)
+    rz[:, 0, 0] = rz[:, 1, 1] = numpy.cos(angle_rads)
+    rz[:, 1, 0] = numpy.sin(angle_rads)
+    rz[:, 0, 1] = -rz[:, 1, 0]
+    return rz
+
+
+def multi_rot_Y(angle_rads: numpy.ndarray) -> numpy.ndarray:
+    """Create [entries] numpy Y rotation matrices for [entries] angles.
+
+    :param entries: int number of matrices generated.
+    :param angle_rads: numpy array of angles
+    :returns: entries x 4 x 4 homogeneous rotation matrices
+    """
+    ry = numpy.empty((angle_rads.shape[0], 4, 4))
+    ry[...] = numpy.identity(4)
+    ry[:, 0, 0] = ry[:, 2, 2] = numpy.cos(angle_rads)
+    ry[:, 0, 2] = numpy.sin(angle_rads)
+    ry[:, 2, 0] = -ry[:, 0, 2]
+
+    return ry
diff --git a/code/lib/Bio/Pathway/Rep/Graph.py b/code/lib/Bio/Pathway/Rep/Graph.py
new file mode 100644
index 0000000..e3bcdc8
--- /dev/null
+++ b/code/lib/Bio/Pathway/Rep/Graph.py
@@ -0,0 +1,149 @@
+# Copyright 2002 by Tarjei Mikkelsen.  All rights reserved.
+# Revisions copyright 2018 by Maximilian Greil. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""get/set abstraction for graph representation."""
+
+from functools import reduce
+
+
+class Graph:
+    """A directed graph abstraction with labeled edges."""
+
+    def __init__(self, nodes=()):
+        """Initialize a new Graph object."""
+        self._adjacency_list = {}  # maps parent -> set of child objects
+        for n in nodes:
+            self._adjacency_list[n] = set()
+        self._label_map = {}  # maps label -> set of (parent, child) pairs
+        self._edge_map = {}  # maps (parent, child) pair -> label
+
+    def __eq__(self, g):
+        """Return true if g is equal to this graph."""
+        return (
+            isinstance(g, Graph)
+            and self._adjacency_list == g._adjacency_list
+            and self._label_map == g._label_map
+            and self._edge_map == g._edge_map
+        )
+
+    def __repr__(self):
+        """Return a unique string representation of this graph."""
+        s = "<Graph: "
+        for key in sorted(self._adjacency_list):
+            values = sorted(
+                (x, self._edge_map[(key, x)]) for x in list(self._adjacency_list[key])
+            )
+            s += "(%r: %s)" % (key, ",".join(repr(v) for v in values))
+        return s + ">"
+
+    def __str__(self):
+        """Return a concise string description of this graph."""
+        nodenum = len(self._adjacency_list)
+        edgenum = reduce(
+            lambda x, y: x + y, [len(v) for v in self._adjacency_list.values()]
+        )
+        labelnum = len(self._label_map)
+        return (
+            "<Graph: "
+            + str(nodenum)
+            + " node(s), "
+            + str(edgenum)
+            + " edge(s), "
+            + str(labelnum)
+            + " unique label(s)>"
+        )
+
+    def add_node(self, node):
+        """Add a node to this graph."""
+        if node not in self._adjacency_list:
+            self._adjacency_list[node] = set()
+
+    def add_edge(self, source, to, label=None):
+        """Add an edge to this graph."""
+        if source not in self._adjacency_list:
+            raise ValueError("Unknown <from> node: " + str(source))
+        if to not in self._adjacency_list:
+            raise ValueError("Unknown <to> node: " + str(to))
+        if (source, to) in self._edge_map:
+            raise ValueError(str(source) + " -> " + str(to) + " exists")
+        self._adjacency_list[source].add(to)
+        if label not in self._label_map:
+            self._label_map[label] = set()
+        self._label_map[label].add((source, to))
+        self._edge_map[(source, to)] = label
+
+    def child_edges(self, parent):
+        """Return a list of (child, label) pairs for parent."""
+        if parent not in self._adjacency_list:
+            raise ValueError("Unknown <parent> node: " + str(parent))
+        return [
+            (x, self._edge_map[(parent, x)])
+            for x in sorted(self._adjacency_list[parent])
+        ]
+
+    def children(self, parent):
+        """Return a list of unique children for parent."""
+        return sorted(self._adjacency_list[parent])
+
+    def edges(self, label):
+        """Return a list of all the edges with this label."""
+        if label not in self._label_map:
+            raise ValueError("Unknown label: " + str(label))
+        return sorted(self._label_map[label])
+
+    def labels(self):
+        """Return a list of all the edge labels in this graph."""
+        return sorted(self._label_map.keys())
+
+    def nodes(self):
+        """Return a list of the nodes in this graph."""
+        return list(self._adjacency_list.keys())
+
+    def parent_edges(self, child):
+        """Return a list of (parent, label) pairs for child."""
+        if child not in self._adjacency_list:
+            raise ValueError("Unknown <child> node: " + str(child))
+        parents = []
+        for parent, children in self._adjacency_list.items():
+            for x in children:
+                if x == child:
+                    parents.append((parent, self._edge_map[(parent, child)]))
+        return sorted(parents)
+
+    def parents(self, child):
+        """Return a list of unique parents for child."""
+        return sorted({x[0] for x in self.parent_edges(child)})
+
+    def remove_node(self, node):
+        """Remove node and all edges connected to it."""
+        if node not in self._adjacency_list:
+            raise ValueError("Unknown node: " + str(node))
+        # remove node (and all out-edges) from adjacency list
+        del self._adjacency_list[node]
+        # remove all in-edges from adjacency list
+        for n in self._adjacency_list.keys():
+            self._adjacency_list[n] = {x for x in self._adjacency_list[n] if x != node}
+        # remove all referring pairs in label map
+        for label in list(self._label_map.keys()):  # we're editing this!
+            lm = {
+                x for x in self._label_map[label] if (x[0] != node) and (x[1] != node)
+            }
+            # remove the entry completely if the label is now unused
+            if lm:
+                self._label_map[label] = lm
+            else:
+                del self._label_map[label]
+        # remove all referring entries in edge map
+        for edge in list(self._edge_map.keys()):  # we're editing this!
+            if edge[0] == node or edge[1] == node:
+                del self._edge_map[edge]
+
+    def remove_edge(self, parent, child, label):
+        """Remove edge (NOT IMPLEMENTED)."""
+        # hm , this is a multigraph - how should this be implemented?
+        raise NotImplementedError("remove_edge is not yet implemented")
diff --git a/code/lib/Bio/Pathway/Rep/MultiGraph.py b/code/lib/Bio/Pathway/Rep/MultiGraph.py
new file mode 100644
index 0000000..1bb36c3
--- /dev/null
+++ b/code/lib/Bio/Pathway/Rep/MultiGraph.py
@@ -0,0 +1,196 @@
+# Copyright 2001 by Tarjei Mikkelsen.  All rights reserved.
+# Revisions copyright 2018 by Maximilian Greil. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""get/set abstraction for multi-graph representation."""
+
+from functools import reduce
+
+
+# TODO - Subclass graph?
+class MultiGraph:
+    """A directed multigraph abstraction with labeled edges."""
+
+    def __init__(self, nodes=()):
+        """Initialize a new MultiGraph object."""
+        self._adjacency_list = {}  # maps parent -> set of (child, label) pairs
+        for n in nodes:
+            self._adjacency_list[n] = set()
+        self._label_map = {}  # maps label -> set of (parent, child) pairs
+
+    def __eq__(self, g):
+        """Return true if g is equal to this graph."""
+        return (
+            isinstance(g, MultiGraph)
+            and self._adjacency_list == g._adjacency_list
+            and self._label_map == g._label_map
+        )
+
+    def __repr__(self):
+        """Return a unique string representation of this graph."""
+        s = "<MultiGraph: "
+        for key in sorted(self._adjacency_list):
+            values = sorted(self._adjacency_list[key])
+            s += "(%r: %s)" % (key, ",".join(repr(v) for v in values))
+        return s + ">"
+
+    def __str__(self):
+        """Return a concise string description of this graph."""
+        nodenum = len(self._adjacency_list)
+        edgenum = reduce(
+            lambda x, y: x + y, [len(v) for v in self._adjacency_list.values()]
+        )
+        labelnum = len(self._label_map)
+        return (
+            "<MultiGraph: "
+            + str(nodenum)
+            + " node(s), "
+            + str(edgenum)
+            + " edge(s), "
+            + str(labelnum)
+            + " unique label(s)>"
+        )
+
+    def add_node(self, node):
+        """Add a node to this graph."""
+        if node not in self._adjacency_list:
+            self._adjacency_list[node] = set()
+
+    def add_edge(self, source, to, label=None):
+        """Add an edge to this graph."""
+        if source not in self._adjacency_list:
+            raise ValueError("Unknown <from> node: " + str(source))
+        if to not in self._adjacency_list:
+            raise ValueError("Unknown <to> node: " + str(to))
+        edge = (to, label)
+        self._adjacency_list[source].add(edge)
+        if label not in self._label_map:
+            self._label_map[label] = set()
+        self._label_map[label].add((source, to))
+
+    def child_edges(self, parent):
+        """Return a list of (child, label) pairs for parent."""
+        if parent not in self._adjacency_list:
+            raise ValueError("Unknown <parent> node: " + str(parent))
+        return sorted(self._adjacency_list[parent])
+
+    def children(self, parent):
+        """Return a list of unique children for parent."""
+        return sorted({x[0] for x in self.child_edges(parent)})
+
+    def edges(self, label):
+        """Return a list of all the edges with this label."""
+        if label not in self._label_map:
+            raise ValueError("Unknown label: " + str(label))
+        return sorted(self._label_map[label])
+
+    def labels(self):
+        """Return a list of all the edge labels in this graph."""
+        return sorted(self._label_map.keys())
+
+    def nodes(self):
+        """Return a list of the nodes in this graph."""
+        return list(self._adjacency_list.keys())
+
+    def parent_edges(self, child):
+        """Return a list of (parent, label) pairs for child."""
+        if child not in self._adjacency_list:
+            raise ValueError("Unknown <child> node: " + str(child))
+        parents = []
+        for parent, children in self._adjacency_list.items():
+            for x in children:
+                if x[0] == child:
+                    parents.append((parent, x[1]))
+        return sorted(parents)
+
+    def parents(self, child):
+        """Return a list of unique parents for child."""
+        return sorted({x[0] for x in self.parent_edges(child)})
+
+    def remove_node(self, node):
+        """Remove node and all edges connected to it."""
+        if node not in self._adjacency_list:
+            raise ValueError("Unknown node: " + str(node))
+        # remove node (and all out-edges) from adjacency list
+        del self._adjacency_list[node]
+        # remove all in-edges from adjacency list
+        for n in self._adjacency_list:
+            self._adjacency_list[n] = {
+                x for x in self._adjacency_list[n] if x[0] != node
+            }
+        # remove all referring pairs in label map
+        for label in list(self._label_map.keys()):  # we're editing this!
+            lm = {
+                x for x in self._label_map[label] if (x[0] != node) and (x[1] != node)
+            }
+            # remove the entry completely if the label is now unused
+            if lm:
+                self._label_map[label] = lm
+            else:
+                del self._label_map[label]
+
+    def remove_edge(self, parent, child, label):
+        """Remove edge (NOT IMPLEMENTED)."""
+        # hm , this is a multigraph - how should this be implemented?
+        raise NotImplementedError("remove_edge is not yet implemented")
+
+
+# auxiliary graph functions
+
+
+def df_search(graph, root=None):
+    """Depth first search of g.
+
+    Returns a list of all nodes that can be reached from the root node
+    in depth-first order.
+
+    If root is not given, the search will be rooted at an arbitrary node.
+    """
+    seen = {}
+    search = []
+    if len(graph.nodes()) < 1:
+        return search
+    if root is None:
+        root = (graph.nodes())[0]
+    seen[root] = 1
+    search.append(root)
+    current = graph.children(root)
+    while len(current) > 0:
+        node = current[0]
+        current = current[1:]
+        if node not in seen:
+            search.append(node)
+            seen[node] = 1
+            current = graph.children(node) + current
+    return search
+
+
+def bf_search(graph, root=None):
+    """Breadth first search of g.
+
+    Returns a list of all nodes that can be reached from the root node
+    in breadth-first order.
+
+    If root is not given, the search will be rooted at an arbitrary node.
+    """
+    seen = {}
+    search = []
+    if len(graph.nodes()) < 1:
+        return search
+    if root is None:
+        root = (graph.nodes())[0]
+    seen[root] = 1
+    search.append(root)
+    current = graph.children(root)
+    while len(current) > 0:
+        node = current[0]
+        current = current[1:]
+        if node not in seen:
+            search.append(node)
+            seen[node] = 1
+            current.extend(graph.children(node))
+    return search
diff --git a/code/lib/Bio/Pathway/Rep/__init__.py b/code/lib/Bio/Pathway/Rep/__init__.py
new file mode 100644
index 0000000..f330e66
--- /dev/null
+++ b/code/lib/Bio/Pathway/Rep/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2001 by Tarjei Mikkelsen. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""BioPython Pathway support module.
+
+Bio.Pathway.Rep is a collection of general data abstractions used in the
+implementation of the Bio.Pathway module. These abstractions are not intended
+for direct use, but if they fit your needs there's no reason to reinvent the
+wheel.
+"""
diff --git a/code/lib/Bio/Pathway/Rep/__pycache__/Graph.cpython-37.pyc b/code/lib/Bio/Pathway/Rep/__pycache__/Graph.cpython-37.pyc
new file mode 100644
index 0000000..c40e9d5
Binary files /dev/null and b/code/lib/Bio/Pathway/Rep/__pycache__/Graph.cpython-37.pyc differ
diff --git a/code/lib/Bio/Pathway/Rep/__pycache__/MultiGraph.cpython-37.pyc b/code/lib/Bio/Pathway/Rep/__pycache__/MultiGraph.cpython-37.pyc
new file mode 100644
index 0000000..5946035
Binary files /dev/null and b/code/lib/Bio/Pathway/Rep/__pycache__/MultiGraph.cpython-37.pyc differ
diff --git a/code/lib/Bio/Pathway/Rep/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Pathway/Rep/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..4fd511c
Binary files /dev/null and b/code/lib/Bio/Pathway/Rep/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Pathway/__init__.py b/code/lib/Bio/Pathway/__init__.py
new file mode 100644
index 0000000..bf7eca0
--- /dev/null
+++ b/code/lib/Bio/Pathway/__init__.py
@@ -0,0 +1,314 @@
+# Copyright 2001 by Tarjei Mikkelsen.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""BioPython Pathway module.
+
+Bio.Pathway is a lightweight class library designed to support the following tasks:
+
+ - Data interchange and preprocessing between pathway databases and analysis software.
+ - Quick prototyping of pathway analysis algorithms
+
+The basic object in the Bio.Pathway model is Interaction, which represents an arbitrary
+interaction between any number of biochemical species.
+
+Network objects are used to represent the connectivity between species in pathways
+and reaction networks.
+
+For applications where it is not necessary to explicitly represent network connectivity,
+the specialized classes Reaction and System should be used in place of Interacton and
+Network.
+
+The Bio.Pathway classes, especially Interaction, are intentionally
+designed to be very flexible. Their intended use are as wrappers around database
+specific records, such as BIND objects. The value-added in this module is a
+framework for representing collections of reactions in a way that supports
+graph theoretic and numeric analysis.
+
+Note: This module should be regarded as a prototype only. API changes are likely.
+      Comments and feature requests are most welcome.
+
+"""
+
+from functools import reduce
+
+from Bio.Pathway.Rep.MultiGraph import MultiGraph
+
+
+class Reaction:
+    """Abstraction for a biochemical transformation.
+
+    This class represents a (potentially reversible) biochemical
+    transformation of the type::
+
+      a S1 + b S2 + ... --> c P1 + d P2 + ...
+
+    where:
+     - a, b, c, d ... are positive numeric stochiometric coefficients,
+     - S1, S2, ... are substrates
+     - P1, P2, ... are products
+
+    A Reaction should be viewed as the net result of one or more individual
+    reaction steps, where each step is potentially facilitated by a different
+    catalyst. Support for 'Reaction algebra' will be added at some point in
+    the future.
+
+    Attributes:
+     - reactants   -- dict of involved species to their stochiometric coefficients:
+       reactants[S] = stochiometric constant for S
+     - catalysts   -- list/tuple of tuples of catalysts required for this reaction
+     - reversible  -- true iff reaction is reversible
+     - data        -- reference to arbitrary additional data
+
+    Invariants:
+     - for all S in reactants: reactants[S] != 0
+     - for all C in catalysts: catalysts[C] != 0
+
+    """
+
+    def __init__(self, reactants=None, catalysts=(), reversible=0, data=None):
+        """Initialize a new Reaction object."""
+        # enforce invariants on reactants:
+        if reactants is None:
+            self.reactants = {}
+        else:
+            self.reactants = reactants.copy()
+            # loop over original, edit the copy
+            for r, value in reactants.items():
+                if value == 0:
+                    del self.reactants[r]
+        self.catalysts = sorted(set(catalysts))
+        self.data = data
+        self.reversible = reversible
+
+    def __eq__(self, r):
+        """Return true iff self is equal to r."""
+        return (
+            isinstance(r, Reaction)
+            and self.reactants == r.reactants
+            and self.catalysts == r.catalysts
+            and self.data == r.data
+            and self.reversible == r.reversible
+        )
+
+    def __hash__(self):
+        """Return a hashcode for self."""
+        t = tuple(self.species())
+        return hash(t)
+
+    def __repr__(self):
+        """Return a debugging string representation of self."""
+        return "Reaction(%r, %r, %r, %r)" % (
+            self.reactants,
+            self.catalysts,
+            self.reversible,
+            self.data,
+        )
+
+    def __str__(self):
+        """Return a string representation of self."""
+        substrates = ""
+        products = ""
+        all_species = sorted(self.reactants)
+        for species in all_species:
+            stoch = self.reactants[species]
+            if stoch < 0:
+                # species is a substrate:
+                if substrates != "":
+                    substrates = substrates + " + "
+                if stoch != -1:
+                    substrates = substrates + str(abs(stoch)) + " "
+                substrates = substrates + str(species)
+            elif stoch > 0:
+                # species is a product:
+                if products != "":
+                    products = products + " + "
+                if stoch != 1:
+                    products = products + str(stoch) + " "
+                products = products + str(species)
+            else:
+                raise AttributeError("Invalid 0 coefficient in Reaction.reactants")
+        if self.reversible:
+            return substrates + " <=> " + products
+        else:
+            return substrates + " --> " + products
+
+    def reverse(self):
+        """Return a new Reaction that is the reverse of self."""
+        reactants = {}
+        for r in self.reactants:
+            reactants[r] = -self.reactants[r]
+        return Reaction(reactants, self.catalysts, self.reversible, self.data)
+
+    def species(self):
+        """Return a list of all Species involved in self."""
+        return list(self.reactants)
+
+
+class System:
+    """Abstraction for a collection of reactions.
+
+    This class is used in the Bio.Pathway framework to represent an arbitrary
+    collection of reactions without explicitly defined links.
+
+    Attributes:
+     - None
+
+    """
+
+    def __init__(self, reactions=()):
+        """Initialize a new System object."""
+        self.__reactions = set(reactions)
+
+    def __repr__(self):
+        """Return a debugging string representation of self."""
+        return "System(" + ",".join(map(repr, self.__reactions)) + ")"
+
+    def __str__(self):
+        """Return a string representation of self."""
+        return (
+            "System of "
+            + str(len(self.__reactions))
+            + " reactions involving "
+            + str(len(self.species()))
+            + " species"
+        )
+
+    def add_reaction(self, reaction):
+        """Add reaction to self."""
+        self.__reactions.add(reaction)
+
+    def remove_reaction(self, reaction):
+        """Remove reaction from self."""
+        self.__reactions.remove(reaction)
+
+    def reactions(self):
+        """Return a list of the reactions in this system.
+
+        Note the order is arbitrary!
+        """
+        # TODO - Define __lt__ so that Reactions can be sorted on Python?
+        return list(self.__reactions)
+
+    def species(self):
+        """Return a list of the species in this system."""
+        return sorted(
+            set(reduce(lambda s, x: s + x, [x.species() for x in self.reactions()], []))
+        )
+
+    def stochiometry(self):
+        """Compute the stoichiometry matrix for self.
+
+        Returns (species, reactions, stoch) where:
+         - species    = ordered list of species in this system
+         - reactions  = ordered list of reactions in this system
+         - stoch      = 2D array where stoch[i][j] is coef of the
+           jth species in the ith reaction, as defined
+           by species and reactions above
+
+        """
+        # Note: This an inefficient and ugly temporary implementation.
+        #       To be practical, stochiometric matrices should probably
+        #       be implemented by sparse matrices, which would require
+        #       NumPy dependencies.
+        #
+        # PS: We should implement automatic checking for NumPy here.
+        species = self.species()
+        reactions = self.reactions()
+        stoch = [] * len(reactions)
+        for i in range(len(reactions)):
+            stoch[i] = 0 * len(species)
+            for s in reactions[i].species():
+                stoch[species.index(s)] = reactions[i].reactants[s]
+        return (species, reactions, stoch)
+
+
+class Interaction:
+    """An arbitrary interaction between any number of species.
+
+    This class definition is intended solely as a minimal wrapper interface that should
+    be implemented and extended by more specific abstractions.
+
+    Attributes:
+     - data      -- reference to arbitrary additional data
+
+    """
+
+    def __init_(self, data):
+        self.data = data
+
+    def __hash__(self):
+        """Return a hashcode for self."""
+        return hash(self.data)
+
+    def __repr__(self):
+        """Return a debugging string representation of self."""
+        return "Interaction(" + repr(self.data) + ")"
+
+    def __str__(self):
+        """Return a string representation of self."""
+        return "<" + str(self.data) + ">"
+
+
+class Network:
+    """A set of species that are explicitly linked by interactions.
+
+    The network is a directed multigraph with labeled edges. The nodes in the graph
+    are the biochemical species involved. The edges represent an interaction between
+    two species, and the edge label is a reference to the associated Interaction
+    object.
+
+    Attributes:
+     - None
+
+    """
+
+    def __init__(self, species=()):
+        """Initialize a new Network object."""
+        self.__graph = MultiGraph(species)
+
+    def __repr__(self):
+        """Return a debugging string representation of this network."""
+        return "<Network: __graph: " + repr(self.__graph) + ">"
+
+    def __str__(self):
+        """Return a string representation of this network."""
+        return "Network of %i species and %i interactions." % (
+            len(self.species()),
+            len(self.interactions()),
+        )
+
+    def add_species(self, species):
+        """Add species to this network."""
+        self.__graph.add_node(species)
+
+    def add_interaction(self, source, sink, interaction):
+        """Add interaction to this network."""
+        self.__graph.add_edge(source, sink, interaction)
+
+    def source(self, species):
+        """Return list of unique sources for species."""
+        return self.__graph.parents(species)
+
+    def source_interactions(self, species):
+        """Return list of (source, interaction) pairs for species."""
+        return self.__graph.parent_edges(species)
+
+    def sink(self, species):
+        """Return list of unique sinks for species."""
+        return self.__graph.children(species)
+
+    def sink_interactions(self, species):
+        """Return list of (sink, interaction) pairs for species."""
+        return self.__graph.child_edges(species)
+
+    def species(self):
+        """Return list of the species in this network."""
+        return self.__graph.nodes()
+
+    def interactions(self):
+        """Return list of the unique interactions in this network."""
+        return self.__graph.labels()
diff --git a/code/lib/Bio/Pathway/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Pathway/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..7d65f12
Binary files /dev/null and b/code/lib/Bio/Pathway/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/Applications/_Fasttree.py b/code/lib/Bio/Phylo/Applications/_Fasttree.py
new file mode 100644
index 0000000..6ff18ed
--- /dev/null
+++ b/code/lib/Bio/Phylo/Applications/_Fasttree.py
@@ -0,0 +1,598 @@
+# Copyright 2013 by Nate Sutton.
+# Based on code in _Phyml.py by Eric Talevich.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command-line wrapper for tree inference program Fasttree."""
+
+
+from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline
+
+
+def _is_int(x):
+    """Test whether the argument can be serialized as an integer (PRIVATE)."""
+    return isinstance(x, int) or str(x).isdigit()
+
+
+def _is_numeric(x):
+    """Test whether the argument can be serialized as a number (PRIVATE)."""
+    try:
+        float(str(x))
+        return True
+    except ValueError:
+        return False
+
+
+class FastTreeCommandline(AbstractCommandline):
+    r"""Command-line wrapper for FastTree.
+
+    Only the ``input`` and ``out`` parameters are mandatory.
+
+    From the terminal command line use ``fasttree.exe -help`` or ``fasttree.exe -expert``
+    for more explanation of usage options.
+
+    Homepage: http://www.microbesonline.org/fasttree/
+
+    References
+    ----------
+    Price, M.N., Dehal, P.S., and Arkin, A.P. (2010) FastTree 2 -- Approximately
+    Maximum-Likelihood Trees for Large Alignments. PLoS ONE, 5(3):e9490.
+    https://doi.org/10.1371/journal.pone.0009490.
+
+    Examples
+    --------
+    This is an example on Windows::
+
+        import _Fasttree
+        fasttree_exe = r"C:\FasttreeWin32\fasttree.exe"
+        cmd = _Fasttree.FastTreeCommandline(fasttree_exe,
+        ...                                     input=r'C:\Input\ExampleAlignment.fsa',
+        ...                                     out=r'C:\Output\ExampleTree.tree')
+        print(cmd)
+        out, err = cmd()
+        print(out)
+        print(err)
+
+    """
+
+    def __init__(self, cmd="fasttree", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Switch(
+                ["-nt", "nt"],
+                "By default FastTree expects protein alignments, use -nt for nucleotides",
+            ),
+            _Option(
+                ["-n", "n"],
+                """-n -- read N multiple alignments in.
+
+                    This only works with phylip interleaved format. For example, you can
+                    use it with the output from phylip's seqboot. If you use -n, FastTree
+                    will write 1 tree per line to standard output.
+                    """,
+                checker_function=_is_int,
+                equate=False,
+            ),
+            _Switch(
+                ["-quote", "quote"],
+                """-quote -- add quotes to sequence names in output.
+
+                    Quote sequence names in the output and allow spaces, commas,
+                    parentheses, and colons in them but not ' characters (fasta files only).
+                    """,
+            ),
+            _Option(
+                ["-pseudo", "pseudo"],
+                """-pseudo [weight] -- Pseudocounts are used with sequence distance estimation.
+
+                    Use pseudocounts to estimate distances between sequences with little or no
+                    overlap. (Off by default.) Recommended if analyzing the alignment has
+                    sequences with little or no overlap.
+                    If the weight is not specified, it is 1.0
+                    """,
+                checker_function=_is_numeric,
+                equate=False,
+            ),
+            _Option(
+                ["-boot", "boot"],
+                """Specify the number of resamples for support values.
+
+                    Support value options:
+                    By default, FastTree computes local support values by resampling the site
+                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
+                    it will compute minimum-evolution bootstrap supports instead
+                    In either case, the support values are proportions ranging from 0 to 1
+
+                    Use -nosupport to turn off support values or -boot 100 to use just 100 resamples.
+                    """,
+                checker_function=_is_int,
+                equate=False,
+            ),
+            _Switch(
+                ["-nosupport", "nosupport"],
+                """Turn off support values.
+
+                    Support value options:
+                    By default, FastTree computes local support values by resampling the site
+                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
+                    it will compute minimum-evolution bootstrap supports instead
+                    In either case, the support values are proportions ranging from 0 to 1
+
+                    Use -nosupport to turn off support values or -boot 100 to use just 100 resamples.
+                    """,
+            ),
+            _Option(
+                ["-intree", "intree"],
+                """-intree newickfile -- read the starting tree in from newickfile.
+
+                    Any branch lengths in the starting trees are ignored.
+                    -intree with -n will read a separate starting tree for each alignment.
+                    """,
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-intree1", "intree1"],
+                "intree1 newickfile -- read the same starting tree for each alignment.",
+                filename=True,
+                equate=False,
+            ),
+            _Switch(
+                ["-quiet", "quiet"],
+                """-quiet -- do not write to standard error during normal operation
+
+                    (no progress indicator, no options summary, no likelihood values, etc.)
+                    """,
+            ),
+            _Switch(
+                ["-nopr", "nopr"],
+                "-nopr -- do not write the progress indicator to stderr.",
+            ),
+            _Option(
+                ["-nni", "nni"],
+                """Set the rounds of minimum-evolution nearest-neighbor interchanges
+
+                    Topology refinement:
+                    By default, FastTree tries to improve the tree with up to 4*log2(N)
+                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
+                    where N is the number of unique sequences, 2 rounds of
+                    subtree-prune-regraft (SPR) moves (also min. evo.), and
+                    up to 2*log(N) rounds of maximum-likelihood NNIs.
+                    Use -nni to set the number of rounds of min. evo. NNIs.
+                    """,
+                checker_function=_is_int,
+                equate=False,
+            ),
+            _Option(
+                ["-spr", "spr"],
+                """Set the rounds of subtree-prune-regraft moves
+
+                    Topology refinement:
+                    By default, FastTree tries to improve the tree with up to 4*log2(N)
+                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
+                    where N is the number of unique sequences, 2 rounds of
+                    subtree-prune-regraft (SPR) moves (also min. evo.), and
+                    up to 2*log(N) rounds of maximum-likelihood NNIs.
+                    Use -nni to set the number of rounds of min. evo. NNIs,
+                    and -spr to set the rounds of SPRs.
+                    """,
+                checker_function=_is_int,
+                equate=False,
+            ),
+            _Switch(
+                ["-noml", "noml"],
+                """Deactivate min-evo NNIs and SPRs.
+
+                    Topology refinement:
+                    By default, FastTree tries to improve the tree with up to 4*log2(N)
+                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
+                    where N is the number of unique sequences, 2 rounds of
+                    subtree-prune-regraft (SPR) moves (also min. evo.), and
+                    up to 2*log(N) rounds of maximum-likelihood NNIs.
+                    Use -nni to set the number of rounds of min. evo. NNIs,
+                    and -spr to set the rounds of SPRs.
+                    Use -noml to turn off both min-evo NNIs and SPRs (useful if refining
+                    an approximately maximum-likelihood tree with further NNIs).
+                    """,
+            ),
+            _Switch(
+                ["-mllen", "mllen"],
+                """Optimize branch lengths on a fixed topology.
+
+                    Topology refinement:
+                    By default, FastTree tries to improve the tree with up to 4*log2(N)
+                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
+                    where N is the number of unique sequences, 2 rounds of
+                    subtree-prune-regraft (SPR) moves (also min. evo.), and
+                    up to 2*log(N) rounds of maximum-likelihood NNIs.
+                    Use -nni to set the number of rounds of min. evo. NNIs,
+                    and -spr to set the rounds of SPRs.
+                    Use -mllen to optimize branch lengths without ML NNIs
+                    Use -mllen -nome with -intree to optimize branch lengths on a fixed topology.
+                    """,
+            ),
+            _Switch(
+                ["-nome", "nome"],
+                """Changes support values calculation to a minimum-evolution bootstrap method.
+
+                    Topology refinement:
+                    By default, FastTree tries to improve the tree with up to 4*log2(N)
+                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
+                    where N is the number of unique sequences, 2 rounds of
+                    subtree-prune-regraft (SPR) moves (also min. evo.), and
+                    up to 2*log(N) rounds of maximum-likelihood NNIs.
+                    Use -nni to set the number of rounds of min. evo. NNIs,
+                    and -spr to set the rounds of SPRs.
+                    Use -mllen to optimize branch lengths without ML NNIs
+                    Use -mllen -nome with -intree to optimize branch lengths on a fixed topology
+
+                    Support value options:
+                    By default, FastTree computes local support values by resampling the site
+                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
+                    it will compute minimum-evolution bootstrap supports instead
+                    In either case, the support values are proportions ranging from 0 to 1.
+                    """,
+            ),
+            _Option(
+                ["-mlnni", "mlnni"],
+                """Set the number of rounds of maximum-likelihood NNIs.
+
+                    Topology refinement:
+                    By default, FastTree tries to improve the tree with up to 4*log2(N)
+                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
+                    where N is the number of unique sequences, 2 rounds of
+                    subtree-prune-regraft (SPR) moves (also min. evo.), and
+                    up to 2*log(N) rounds of maximum-likelihood NNIs.
+                    Use -nni to set the number of rounds of min. evo. NNIs,
+                    and -spr to set the rounds of SPRs.
+                    Use -mlnni to set the number of rounds of maximum-likelihood NNIs.
+                    """,
+                checker_function=_is_int,
+                equate=False,
+            ),
+            _Option(
+                ["-mlacc", "mlacc"],
+                """Option for optimization of branches at each NNI.
+
+                    Topology refinement:
+                    By default, FastTree tries to improve the tree with up to 4*log2(N)
+                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
+                    where N is the number of unique sequences, 2 rounds of
+                    subtree-prune-regraft (SPR) moves (also min. evo.), and
+                    up to 2*log(N) rounds of maximum-likelihood NNIs.
+                    Use -nni to set the number of rounds of min. evo. NNIs,
+                    and -spr to set the rounds of SPRs.
+                    Use -mlacc 2 or -mlacc 3 to always optimize all 5 branches at each NNI,
+                    and to optimize all 5 branches in 2 or 3 rounds.
+                    """,
+                checker_function=_is_int,
+                equate=False,
+            ),
+            _Switch(
+                ["-slownni", "slownni"],
+                """Turn off heuristics to avoid constant subtrees with NNIs.
+
+                    Topology refinement:
+                    By default, FastTree tries to improve the tree with up to 4*log2(N)
+                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
+                    where N is the number of unique sequences, 2 rounds of
+                    subtree-prune-regraft (SPR) moves (also min. evo.), and
+                    up to 2*log(N) rounds of maximum-likelihood NNIs.
+                    Use -nni to set the number of rounds of min. evo. NNIs,
+                    and -spr to set the rounds of SPRs.
+                    Use -slownni to turn off heuristics to avoid constant subtrees
+                    (affects both ML and ME NNIs).
+                    """,
+            ),
+            _Switch(
+                ["-wag", "wag"],
+                """Maximum likelihood model options.
+
+                    Whelan-And-Goldman 2001 model instead of (default)
+                    Jones-Taylor-Thorton 1992 model (a.a. only)
+                    """,
+            ),
+            _Switch(
+                ["-gtr", "gtr"],
+                """Maximum likelihood model options.
+
+                    Use generalized time-reversible instead of (default)
+                    Jukes-Cantor (nt only)
+                    """,
+            ),
+            _Option(
+                ["-cat", "cat"],
+                """Maximum likelihood model options.
+
+                    Specify the number of rate categories of sites (default 20).""",
+                checker_function=_is_int,
+                equate=False,
+            ),
+            _Switch(
+                ["-nocat", "nocat"],
+                "Maximum likelihood model options: No CAT model (just 1 category)",
+            ),
+            _Switch(
+                ["-gamma", "gamma"],
+                """Report the likelihood under the discrete gamma model.
+
+                    Maximum likelihood model options:
+                    -gamma -- after the final round of optimizing branch lengths with the CAT model,
+                    report the likelihood under the discrete gamma model with the same
+                    number of categories. FastTree uses the same branch lengths but
+                    optimizes the gamma shape parameter and the scale of the lengths.
+                    The final tree will have rescaled lengths. Used with -log, this
+                    also generates per-site likelihoods for use with CONSEL, see
+                    GammaLogToPaup.pl and documentation on the FastTree web site.
+                    """,
+            ),
+            _Switch(
+                ["-slow", "slow"],
+                """Use an exhaustive search.
+
+                    Searching for the best join:
+                    By default, FastTree combines the 'visible set' of fast neighbor-joining with
+                    local hill-climbing as in relaxed neighbor-joining
+                    -slow -- exhaustive search (like NJ or BIONJ, but different gap handling)
+                    -slow takes half an hour instead of 8 seconds for 1,250 proteins
+                    """,
+            ),
+            _Switch(
+                ["-fastest", "fastest"],
+                """Search the visible set (the top hit for each node) only.
+
+                    Searching for the best join:
+                    By default, FastTree combines the 'visible set' of fast neighbor-joining with
+                    local hill-climbing as in relaxed neighbor-joining
+                    -fastest -- search the visible set (the top hit for each node) only
+                    Unlike the original fast neighbor-joining, -fastest updates visible(C)
+                    after joining A and B if join(AB,C) is better than join(C,visible(C))
+                    -fastest also updates out-distances in a very lazy way,
+                    -fastest sets -2nd on as well, use -fastest -no2nd to avoid this
+                    """,
+            ),
+            _Switch(
+                ["-2nd", "second"],
+                """Turn 2nd-level top hits heuristic on.
+
+                    Top-hit heuristics:
+                    By default, FastTree uses a top-hit list to speed up search
+                    Use -notop (or -slow) to turn this feature off
+                    and compare all leaves to each other,
+                    and all new joined nodes to each other
+
+                    -2nd or -no2nd to turn 2nd-level top hits heuristic on or off
+                    This reduces memory usage and running time but may lead to
+                    marginal reductions in tree quality.
+                    (By default, -fastest turns on -2nd.)
+                    """,
+            ),
+            _Switch(
+                ["-no2nd", "no2nd"],
+                """Turn 2nd-level top hits heuristic off.
+
+                    Top-hit heuristics:
+                    By default, FastTree uses a top-hit list to speed up search
+                    Use -notop (or -slow) to turn this feature off
+                    and compare all leaves to each other,
+                    and all new joined nodes to each other
+
+                    -2nd or -no2nd to turn 2nd-level top hits heuristic on or off
+                    This reduces memory usage and running time but may lead to
+                    marginal reductions in tree quality.
+                    (By default, -fastest turns on -2nd.)
+                    """,
+            ),
+            _Option(
+                ["-seed", "seed"],
+                """Use -seed to initialize the random number generator.
+
+                    Support value options:
+                    By default, FastTree computes local support values by resampling the site
+                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
+                    it will compute minimum-evolution bootstrap supports instead
+                    In either case, the support values are proportions ranging from 0 to 1.
+                    """,
+                checker_function=_is_int,
+                equate=False,
+            ),
+            _Switch(
+                ["-top", "top"],
+                """Top-hit list to speed up search
+
+                    Top-hit heuristics:
+                    By default, FastTree uses a top-hit list to speed up search
+                    Use -notop (or -slow) to turn this feature off
+                    and compare all leaves to each other,
+                    and all new joined nodes to each other.
+                    """,
+            ),
+            _Switch(
+                ["-notop", "notop"],
+                """Turn off top-hit list to speed up search
+
+                    Top-hit heuristics:
+                    By default, FastTree uses a top-hit list to speed up search
+                    Use -notop (or -slow) to turn this feature off
+                    and compare all leaves to each other,
+                    and all new joined nodes to each other.
+                    """,
+            ),
+            _Option(
+                ["-topm", "topm"],
+                """Change the top hits calculation method
+
+                    Top-hit heuristics:
+                    By default, FastTree uses a top-hit list to speed up search
+                    -topm 1.0 -- set the top-hit list size to parameter*sqrt(N)
+                    FastTree estimates the top m hits of a leaf from the
+                    top 2*m hits of a 'close' neighbor, where close is
+                    defined as d(seed,close) < 0.75 * d(seed, hit of rank 2*m),
+                    and updates the top-hits as joins proceed.
+                    """,
+                checker_function=_is_numeric,
+                equate=False,
+            ),
+            _Option(
+                ["-close", "close"],
+                """Modify the close heuristic for the top-hit list
+
+                    Top-hit heuristics:
+                    By default, FastTree uses a top-hit list to speed up search
+                    -close 0.75 -- modify the close heuristic, lower is more conservative.
+                    """,
+                checker_function=_is_numeric,
+                equate=False,
+            ),
+            _Option(
+                ["-refresh", "refresh"],
+                """Parameter for conditions that joined nodes are compared to other nodes
+
+                    Top-hit heuristics:
+                    By default, FastTree uses a top-hit list to speed up search
+                    -refresh 0.8 -- compare a joined node to all other nodes if its
+                    top-hit list is less than 80% of the desired length,
+                    or if the age of the top-hit list is log2(m) or greater.
+                    """,
+                checker_function=_is_numeric,
+                equate=False,
+            ),
+            _Option(
+                ["-matrix", "matrix"],
+                """Specify a matrix for nucleotide or amino acid distances
+
+                    Distances:
+                    Default: For protein sequences, log-corrected distances and an
+                    amino acid dissimilarity matrix derived from BLOSUM45
+                    or for nucleotide sequences, Jukes-Cantor distances
+                    To specify a different matrix, use -matrix FilePrefix or -nomatrix
+                    """,
+                filename=True,
+                equate=False,
+            ),
+            _Switch(
+                ["-nomatrix", "nomatrix"],
+                """Specify that no matrix should be used for nucleotide or amino acid distances
+
+                    Distances:
+                    Default: For protein sequences, log-corrected distances and an
+                    amino acid dissimilarity matrix derived from BLOSUM45
+                    or for nucleotide sequences, Jukes-Cantor distances
+                    To specify a different matrix, use -matrix FilePrefix or -nomatrix
+                    """,
+            ),
+            _Switch(
+                ["-nj", "nj"],
+                "Join options: regular (unweighted) neighbor-joining (default)",
+            ),
+            _Switch(
+                ["-bionj", "bionj"],
+                """Join options: weighted joins as in BIONJ.
+
+                    FastTree will also weight joins during NNIs.
+                    """,
+            ),
+            _Option(
+                ["-gtrrates", "gtrrates"], "-gtrrates ac ag at cg ct gt", equate=False
+            ),
+            _Option(["-gtrfreq", "gtrfreq"], "-gtrfreq A C G T", equate=False),
+            _Option(
+                ["-constraints", "constraints"],
+                """Specifies an alignment file for use with constrained topology searching
+
+                    Constrained topology search options:
+                    -constraints alignmentfile -- an alignment with values of 0, 1, and -
+                    Not all sequences need be present. A column of 0s and 1s defines a
+                    constrained split. Some constraints may be violated
+                    (see 'violating constraints:' in standard error).
+                    """,
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-constraintWeight", "constraintWeight"],
+                """Weight strength of contraints in topology searching.
+
+                    Constrained topology search options:
+                    -constraintWeight -- how strongly to weight the constraints. A value of 1
+                    means a penalty of 1 in tree length for violating a constraint
+                    Default: 100.0
+                    """,
+                checker_function=_is_numeric,
+                equate=False,
+            ),
+            _Option(
+                ["-log", "log"],
+                """Create log files of data such as intermediate trees and per-site rates
+
+                    -log logfile -- save intermediate trees so you can extract
+                    the trees and restart long-running jobs if they crash
+                    -log also reports the per-site rates (1 means slowest category).
+                    """,
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-makematrix", "makematrix"],
+                "-makematrix [alignment]",
+                filename=True,
+                equate=False,
+            ),
+            _Switch(
+                ["-rawdist", "rawdist"],
+                """Turn off or adjust log-correction in AA or NT distances.
+
+                    Use -rawdist to turn the log-correction off or to use
+                    %different instead of Jukes-Cantor in AA or NT distances
+
+                    Distances:
+                    Default: For protein sequences, log-corrected distances and an
+                    amino acid dissimilarity matrix derived from BLOSUM45
+                    or for nucleotide sequences, Jukes-Cantor distances
+                    To specify a different matrix, use -matrix FilePrefix or -nomatrix
+                    """,
+            ),
+            _Option(
+                ["-sprlength", "sprlength"],
+                """Set maximum SPR move length in topology refinement (default 10).
+
+                    Topology refinement:
+                    By default, FastTree tries to improve the tree with up to 4*log2(N)
+                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
+                    where N is the number of unique sequences, 2 rounds of
+                    subtree-prune-regraft (SPR) moves (also min. evo.), and
+                    up to 2*log(N) rounds of maximum-likelihood NNIs.
+                    Use -nni to set the number of rounds of min. evo. NNIs,
+                    and -spr to set the rounds of SPRs.
+                    """,
+                checker_function=_is_int,
+                equate=False,
+            ),
+            _Switch(["-help", "help"], "Show the help."),
+            _Switch(["-expert", "expert"], "Show the expert level help."),
+            _Option(
+                ["-out", "out"],
+                """Enter <output file>
+
+                    The path to a Newick Tree output file needs to be specified.
+                    """,
+                filename=True,
+                equate=False,
+            ),
+            _Argument(
+                ["input"],
+                """Enter <input file>
+
+                      An input file of sequence alignments in fasta or phylip format
+                      is needed.  By default FastTree expects protein
+                      alignments, use -nt for nucleotides.
+                      """,
+                filename=True,
+                is_required=True,
+            ),
+        ]
+
+        AbstractCommandline.__init__(self, cmd, **kwargs)
diff --git a/code/lib/Bio/Phylo/Applications/_Phyml.py b/code/lib/Bio/Phylo/Applications/_Phyml.py
new file mode 100644
index 0000000..f3aba5f
--- /dev/null
+++ b/code/lib/Bio/Phylo/Applications/_Phyml.py
@@ -0,0 +1,289 @@
+# Copyright 2011 by Eric Talevich.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command-line wrapper for the tree inference program PhyML."""
+
+from Bio.Application import _Option, _Switch, AbstractCommandline
+
+
+class PhymlCommandline(AbstractCommandline):
+    """Command-line wrapper for the tree inference program PhyML.
+
+    Homepage: http://www.atgc-montpellier.fr/phyml
+
+    References
+    ----------
+    Guindon S, Gascuel O.
+    A simple, fast, and accurate algorithm to estimate large phylogenies by maximum
+    likelihood.
+    Systematic Biology, 2003 Oct;52(5):696-704.
+    PubMed PMID: 14530136.
+
+    Guindon S, Dufayard JF, Lefort V, Anisimova M, Hordijk W, Gascuel O.
+    New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies: Assessing
+    the Performance of PhyML 3.0.
+    Systematic Biology, 2010 59(3):307-21.
+
+    """
+
+    def __init__(self, cmd="phyml", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-i", "--input", "input"],
+                "PHYLIP format input nucleotide or amino-acid sequence filenam.",
+                filename=True,
+                is_required=True,
+                equate=False,
+            ),
+            _Option(
+                ["-d", "--datatype", "datatype"],
+                "Datatype 'nt' for nucleotide (default) or 'aa' for amino-acids.",
+                checker_function=lambda x: x in ("nt", "aa"),
+                equate=False,
+            ),
+            _Switch(
+                ["-q", "--sequential", "sequential"],
+                "Changes interleaved format (default) to sequential format.",
+            ),
+            _Option(
+                ["-n", "--multiple", "multiple"],
+                "Number of data sets to analyse (integer).",
+                checker_function=(lambda x: isinstance(x, int) or x.isdigit()),
+                equate=False,
+            ),
+            _Switch(
+                ["-p", "--pars", "pars"],
+                """Use a minimum parsimony starting tree.
+
+                    This option is taken into account when the '-u' option is absent
+                    and when tree topology modifications are to be done.
+                    """,
+            ),
+            _Option(
+                ["-b", "--bootstrap", "bootstrap"],
+                r"""Number of bootstrap replicates, if value is > 0.
+
+                    Otherwise:
+
+                    0: neither approximate likelihood ratio test nor bootstrap
+                    values are computed.
+
+                    -1: approximate likelihood ratio test returning aLRT statistics.
+
+                    -2: approximate likelihood ratio test returning Chi2-based
+                    parametric branch supports.
+
+                    -4: SH-like branch supports alone.
+                    """,
+                equate=False,
+            ),
+            _Option(
+                ["-m", "--model", "model"],
+                """Substitution model name.
+
+                    Nucleotide-based models:
+
+                    HKY85 (default) | JC69 | K80 | F81 | F84 | TN93 | GTR | custom
+
+                    For the custom option, a string of six digits identifies the
+                    model. For instance, 000000 corresponds to F81 (or JC69,
+                    provided the distribution of nucleotide frequencies is uniform).
+                    012345 corresponds to GTR. This option can be used for encoding
+                    any model that is a nested within GTR.
+
+                    Amino-acid based models:
+
+                    LG (default) | WAG | JTT | MtREV | Dayhoff | DCMut | RtREV |
+                    CpREV | VT | Blosum62 | MtMam | MtArt | HIVw | HIVb | custom
+                    """,
+                checker_function=(
+                    lambda x: x
+                    in (
+                        # Nucleotide models:
+                        "HKY85",
+                        "JC69",
+                        "K80",
+                        "F81",
+                        "F84",
+                        "TN93",
+                        "GTR",
+                        # Amino acid models:
+                        "LG",
+                        "WAG",
+                        "JTT",
+                        "MtREV",
+                        "Dayhoff",
+                        "DCMut",
+                        "RtREV",
+                        "CpREV",
+                        "VT",
+                        "Blosum62",
+                        "MtMam",
+                        "MtArt",
+                        "HIVw",
+                        "HIVb",
+                    )
+                    or isinstance(x, int)
+                ),
+                equate=False,
+            ),
+            _Option(
+                ["-f", "frequencies"],
+                """Character frequencies.
+
+                    -f e, m, or "fA fC fG fT"
+
+                    e : Empirical frequencies, determined as follows :
+
+                        - Nucleotide sequences: (Empirical) the equilibrium base
+                          frequencies are estimated by counting the occurrence
+                          of the different bases in the alignment.
+                        - Amino-acid sequences: (Empirical) the equilibrium
+                          amino-acid frequencies are estimated by counting the
+                          occurrence of the different amino-acids in the alignment.
+
+                    m : ML/model-based frequencies, determined as follows :
+
+                        - Nucleotide sequences: (ML) the equilibrium base
+                          frequencies are estimated using maximum likelihood
+                        - Amino-acid sequences: (Model) the equilibrium amino-acid
+                          frequencies are estimated using the frequencies defined by
+                          the substitution model.
+
+                    "fA fC fG fT" : only valid for nucleotide-based models.
+                    fA, fC, fG and fT are floating-point numbers that correspond
+                    to the frequencies of A, C, G and T, respectively.
+                    """,
+                filename=True,  # ensure ".25 .25 .25 .25" stays quoted
+                equate=False,
+            ),
+            _Option(
+                ["-t", "--ts/tv", "ts_tv_ratio"],
+                """Transition/transversion ratio. (DNA sequences only.)
+
+                    Can be a fixed positive value (ex:4.0) or e to get the
+                    maximum-likelihood estimate.
+                    """,
+                equate=False,
+            ),
+            _Option(
+                ["-v", "--pinv", "prop_invar"],
+                """Proportion of invariable sites.
+
+                    Can be a fixed value in the range [0,1], or 'e' to get the
+                    maximum-likelihood estimate.
+                    """,
+                equate=False,
+            ),
+            _Option(
+                ["-c", "--nclasses", "nclasses"],
+                """Number of relative substitution rate categories.
+
+                    Default 1. Must be a positive integer.
+                    """,
+                equate=False,
+            ),
+            _Option(
+                ["-a", "--alpha", "alpha"],
+                """Distribution of the gamma distribution shape parameter.
+
+                    Can be a fixed positive value, or 'e' to get the
+                    maximum-likelihood estimate.
+                    """,
+                equate=False,
+            ),
+            _Option(
+                ["-s", "--search", "search"],
+                """Tree topology search operation option.
+
+                    Can be one of:
+
+                        NNI : default, fast
+
+                        SPR : a bit slower than NNI
+
+                        BEST : best of NNI and SPR search
+                    """,
+                checker_function=lambda x: x in ("NNI", "SPR", "BEST"),
+                equate=False,
+            ),
+            # alt name: user_tree_file
+            _Option(
+                ["-u", "--inputtree", "input_tree"],
+                "Starting tree filename. The tree must be in Newick format.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-o", "optimize"],
+                r"""Specific parameter optimisation.
+
+                    tlr : tree topology (t), branch length (l) and
+                    rate parameters (r) are optimised.
+
+                    tl  : tree topology and branch length are optimised.
+
+                    lr  : branch length and rate parameters are optimised.
+
+                    l   : branch length are optimised.
+
+                    r   : rate parameters are optimised.
+
+                    n   : no parameter is optimised.
+                    """,
+                equate=False,
+            ),
+            _Switch(
+                ["--rand_start", "rand_start"],
+                """Sets the initial tree to random.
+
+                    Only valid if SPR searches are to be performed.
+                    """,
+            ),
+            _Option(
+                ["--n_rand_starts", "n_rand_starts"],
+                """Number of initial random trees to be used.
+
+                    Only valid if SPR searches are to be performed.
+                    """,
+                equate=False,
+            ),
+            _Option(
+                ["--r_seed", "r_seed"],
+                """Seed used to initiate the random number generator.
+
+                    Must be an integer.
+                    """,
+                equate=False,
+            ),
+            _Switch(
+                ["--print_site_lnl", "print_site_lnl"],
+                r"Print the likelihood for each site in file \*_phyml_lk.txt.",
+            ),
+            _Switch(
+                ["--print_trace", "print_trace"],
+                r"""
+                    Print each phylogeny explored during the tree search process
+                    in file \*_phyml_trace.txt.""",
+            ),
+            _Option(
+                ["--run_id", "run_id"],
+                """Append the given string at the end of each PhyML output file.
+
+                    This option may be useful when running simulations involving
+                    PhyML.
+                    """,
+                checker_function=lambda x: isinstance(x, str),
+                equate=False,
+            ),
+            # XXX should this always be set to True?
+            _Switch(
+                ["--quiet", "quiet"],
+                "No interactive questions (for running in batch mode).",
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
diff --git a/code/lib/Bio/Phylo/Applications/_Raxml.py b/code/lib/Bio/Phylo/Applications/_Raxml.py
new file mode 100644
index 0000000..9f04358
--- /dev/null
+++ b/code/lib/Bio/Phylo/Applications/_Raxml.py
@@ -0,0 +1,404 @@
+# Copyright 2012 by Eric Talevich.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command-line wrapper for the tree inference program RAxML.
+
+Derived from the help page for RAxML version 7.3 by Alexandros Stamatakis, but
+should work for any version 7.X (and probably earlier for most options).
+"""
+
+from Bio.Application import _Option, _Switch, AbstractCommandline
+
+
+class RaxmlCommandline(AbstractCommandline):
+    """Command-line wrapper for the tree inference program RAxML.
+
+    The required parameters are 'sequences' (-s), 'model' (-m) and 'name' (-n).
+    The parameter 'parsimony_seed' (-p) must also be set for RAxML, but if you
+    do not specify it, this wrapper will set the seed to 10000 for you.
+
+    References
+    ----------
+    Stamatakis A.
+    RAxML-VI-HPC: Maximum Likelihood-based Phylogenetic Analyses with
+    Thousands of Taxa and Mixed Models.
+    Bioinformatics 2006, 22(21):2688-2690.
+
+    Homepage: http://sco.h-its.org/exelixis/software.html
+
+    Examples
+    --------
+    >>> from Bio.Phylo.Applications import RaxmlCommandline
+    >>> raxml_cline = RaxmlCommandline(sequences="Tests/Phylip/interlaced2.phy",
+    ...                                model="PROTCATWAG", name="interlaced2")
+    >>> print(raxml_cline)
+    raxmlHPC -m PROTCATWAG -n interlaced2 -p 10000 -s Tests/Phylip/interlaced2.phy
+
+    You would typically run the command line with raxml_cline() or via
+    the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="raxmlHPC", **kwargs):
+        """Initialize the class."""
+        self.parameters = [
+            _Option(
+                ["-a", "weight_filename"],
+                "Name of a column weight file to assign individual weights "
+                "to each column of the alignment. Those weights must be "
+                "integers separated by any type and number of whitespaces "
+                "within a separate file.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-b", "bootstrap_seed"], "Random seed for bootstrapping.", equate=False
+            ),
+            _Option(
+                ["-c", "num_categories"],
+                "Number of distinct rate categories for RAxML when "
+                "evolution model is set to GTRCAT or GTRMIX."
+                "Individual per-site rates are categorized into this "
+                "many rate categories to accelerate computations. "
+                "Default: 25.",
+                equate=False,
+            ),
+            _Switch(
+                ["-d", "random_starting_tree"],
+                "Start ML optimization from random starting tree.",
+            ),
+            _Option(
+                ["-e", "epsilon"],
+                "Set model optimization precision in log likelihood units "
+                "for final optimization of tree topology under MIX/MIXI "
+                "or GAMMA/GAMMAI."
+                "Default: 0.1 for models not using proportion of "
+                "invariant sites estimate; 0.001 for models using "
+                "proportion of invariant sites estimate.",
+                equate=False,
+            ),
+            _Option(
+                ["-E", "exclude_filename"],
+                "An exclude file name, containing a specification of "
+                "alignment positions you wish to exclude.  Format is "
+                "similar to Nexus, the file shall contain entries like "
+                "'100-200 300-400'; to exclude a single column write, "
+                "e.g., '100-100'. If you use a mixed model, an "
+                "appropriately adapted model file will be written.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-f", "algorithm"],
+                r"""
+                        Select algorithm:
+
+                        a: Rapid Bootstrap analysis and search for best-scoring ML
+                        tree in one program run.
+
+                        b: Draw bipartition information on a tree provided with '-t'
+                        based on multiple trees (e.g. form a bootstrap) in a file
+                        specifed by '-z'.
+
+                        c: Check if the alignment can be properly read by RAxML.
+
+                        d: New rapid hill-climbing (DEFAULT).
+
+                        e: Optimize model+branch lengths for given input tree under
+                        GAMMA/GAMMAI only.
+
+                        g: Compute per site log Likelihoods for one ore more trees
+                        passed via '-z' and write them to a file that can be read
+                        by CONSEL.
+
+                        h: Compute log likelihood test (SH-test) between best tree
+                        passed via '-t' and a bunch of other trees passed via '-z'.
+
+                        i: Perform a really thorough bootstrap, refinement of final
+                        bootstrap tree under GAMMA and a more exhaustive algorithm.
+
+                        j: Generate a bunch of bootstrapped alignment files from an
+                        original alignemnt file.
+
+                        m: Compare bipartitions between two bunches of trees passed
+                        via '-t' and '-z' respectively. This will return the
+                        Pearson correlation between all bipartitions found in the
+                        two tree files. A file called
+                        RAxML_bipartitionFrequencies.outputFileName will be
+                        printed that contains the pair-wise bipartition
+                        frequencies of the two sets.
+
+                        n: Compute the log likelihood score of all trees contained
+                        in a tree file provided by '-z' under GAMMA or
+                        GAMMA+P-Invar.
+
+                        o: Old and slower rapid hill-climbing.
+
+                        p: Perform pure stepwise MP addition of new sequences to an
+                        incomplete starting tree.
+
+                        s: Split up a multi-gene partitioned alignment into the
+                        respective subalignments.
+
+                        t: Do randomized tree searches on one fixed starting tree.
+
+                        w: Compute ELW test on a bunch of trees passed via '-z'.
+
+                        x: Compute pair-wise ML distances, ML model parameters will
+                        be estimated on an MP starting tree or a user-defined
+                        tree passed via '-t', only allowed for GAMMA-based models
+                        of rate heterogeneity.
+                        """,
+                checker_function=(lambda x: isinstance(x, str) and len(x) == 1),
+                equate=False,
+            ),
+            _Option(
+                ["-g", "grouping_constraint"],
+                "File name of a multifurcating constraint tree. "
+                "this tree does not need to be comprehensive, i.e. "
+                "contain all taxa.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-i", "rearrangements"],
+                "Initial rearrangement setting for the subsequent "
+                "application of topological changes phase.",
+                equate=False,
+            ),
+            _Switch(
+                ["-j", "checkpoints"],
+                "Write checkpoints (intermediate tree topologies).",
+            ),
+            _Switch(
+                ["-k", "bootstrap_branch_lengths"],
+                "Print bootstrapped trees with branch lengths. "
+                "The bootstraps will run a bit longer, because model "
+                "parameters will be optimized at the end of each run. "
+                "Use with CATMIX/PROTMIX or GAMMA/GAMMAI.",
+            ),
+            _Option(
+                ["-l", "cluster_threshold"],
+                "Threshold for sequence similarity clustering. "
+                "RAxML will then print out an alignment to a file "
+                "called sequenceFileName.reducedBy.threshold that "
+                "only contains sequences <= the specified threshold "
+                "that must be between 0.0 and 1.0. RAxML uses the "
+                "QT-clustering algorithm to perform this task. "
+                "In addition, a file called "
+                "RAxML_reducedList.outputFileName will be written "
+                "that contains clustering information.",
+                equate=False,
+            ),
+            _Option(
+                ["-L", "cluster_threshold_fast"],
+                "Same functionality as '-l', but uses a less "
+                "exhaustive and thus faster clustering algorithm. "
+                "This is intended for very large datasets with more "
+                "than 20,000-30,000 sequences.",
+                equate=False,
+            ),
+            _Option(
+                ["-m", "model"],
+                r"""Model of Nucleotide or Amino Acid Substitution:
+
+                        NUCLEOTIDES:
+
+                        GTRCAT         : GTR + Optimization of substitution rates + Optimization of site-specific
+                        evolutionary rates which are categorized into numberOfCategories distinct
+                        rate categories for greater computational efficiency
+                        if you do a multiple analysis with  '-#' or '-N' but without bootstrapping the program
+                        will use GTRMIX instead
+
+                        GTRGAMMA       : GTR + Optimization of substitution rates + GAMMA model of rate
+                        heterogeneity (alpha parameter will be estimated)
+
+                        GTRMIX         : Inference of the tree under GTRCAT
+                        and thereafter evaluation of the final tree topology under GTRGAMMA
+
+                        GTRCAT_GAMMA   : Inference of the tree with site-specific evolutionary rates.
+                        However, here rates are categorized using the 4 discrete GAMMA rates.
+                        Evaluation of the final tree topology under GTRGAMMA
+
+                        GTRGAMMAI      : Same as GTRGAMMA, but with estimate of proportion of invariable sites
+
+                        GTRMIXI        : Same as GTRMIX, but with estimate of proportion of invariable sites
+
+                        GTRCAT_GAMMAI  : Same as GTRCAT_GAMMA, but with estimate of proportion of invariable sites
+
+                        AMINO ACIDS:
+
+                        PROTCATmatrixName[F]    : specified AA matrix + Optimization of substitution rates + Optimization of site-specific
+                        evolutionary rates which are categorized into numberOfCategories distinct
+                        rate categories for greater computational efficiency
+                        if you do a multiple analysis with  '-#' or '-N' but without bootstrapping the program
+                        will use PROTMIX... instead
+
+                        PROTGAMMAmatrixName[F]  : specified AA matrix + Optimization of substitution rates + GAMMA model of rate
+                        heterogeneity (alpha parameter will be estimated)
+
+                        PROTMIXmatrixName[F]    : Inference of the tree under specified AA matrix + CAT
+                        and thereafter evaluation of the final tree topology under specified AA matrix + GAMMA
+
+                        PROTCAT_GAMMAmatrixName[F] : Inference of the tree under specified AA matrix and site-specific evolutionary rates.
+                        However, here rates are categorized using the 4 discrete GAMMA rates.
+                        Evaluation of the final tree topology under specified AA matrix + GAMMA
+
+                        PROTGAMMAImatrixName[F] : Same as PROTGAMMAmatrixName[F], but with estimate of proportion of invariable sites
+
+                        PROTMIXImatrixName[F]   : Same as PROTMIXmatrixName[F], but with estimate of proportion of invariable sites
+
+                        PROTCAT_GAMMAImatrixName[F] : Same as PROTCAT_GAMMAmatrixName[F], but with estimate of proportion of invariable sites
+
+                        Available AA substitution models: DAYHOFF, DCMUT, JTT, MTREV, WAG, RTREV, CPREV, VT, BLOSUM62, MTMAM, GTR
+                        With the optional 'F' appendix you can specify if you want to use empirical base frequencies
+                        Please not that for mixed models you can in addition specify the per-gene AA model in
+                        the mixed model file (see manual for details)
+                        """,
+                equate=False,
+            ),
+            _Switch(
+                ["-M", "partition_branch_lengths"],
+                "Switch on estimation of individual per-partition "
+                "branch lengths. Only has effect when used in "
+                "combination with 'partition_filename' ('-q'). "
+                "Branch lengths for individual partitions will be "
+                "printed to separate files.  A weighted average of the "
+                "branch lengths is computed by using the respective "
+                "partition lengths. ",
+            ),
+            _Option(
+                ["-n", "name"],
+                "Name used in the output files.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-o", "outgroup"],
+                "Name of a single outgroup or a comma-separated list "
+                "of outgroups, eg '-o Rat' or '-o Rat,Mouse'. In case "
+                "that multiple outgroups are not monophyletic the "
+                "first name in the list will be selected as outgroup. "
+                "Don't leave spaces between taxon names!",
+                checker_function=lambda x: len(x.split()) == 1,
+                equate=False,
+            ),
+            _Option(
+                ["-q", "partition_filename"],
+                "File name containing the assignment of models to "
+                "alignment partitions for multiple models of "
+                "substitution. For the syntax of this file please "
+                "consult the RAxML manual.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-p", "parsimony_seed"],
+                "Random number seed for the parsimony inferences. "
+                "This allows you to reproduce your results and will "
+                "help developers debug the program. This option HAS "
+                "NO EFFECT in the parallel MPI version.",
+                equate=False,
+            ),
+            _Option(
+                ["-P", "protein_model"],
+                "File name of a user-defined AA (Protein) substitution "
+                "model. This file must contain 420 entries, the first "
+                "400 being the AA substitution rates (this must be a "
+                "symmetric matrix) and the last 20 are the empirical "
+                "base frequencies.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-r", "binary_constraint"],
+                "File name of a binary constraint tree. "
+                "This tree does not need to be comprehensive, i.e. "
+                "contain all taxa.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-s", "sequences"],
+                "Name of the alignment data file, in PHYLIP format.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-t", "starting_tree"],
+                "File name of a user starting tree, in Newick format.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-T", "threads"],
+                "Number of threads to run. "
+                "PTHREADS VERSION ONLY! "
+                "Make sure to set this at most the number of CPUs "
+                "you have on your machine, otherwise, there will be "
+                "a huge performance decrease!",
+                equate=False,
+            ),
+            _Option(
+                ["-u", "num_bootstrap_searches"],
+                "Number of multiple bootstrap searches per replicate. "
+                "Use this to obtain better ML trees for each "
+                "replicate. Default: 1 ML search per bootstrap "
+                "replicate.",
+                equate=False,
+            ),
+            _Switch(["-v", "version"], "Display version information."),
+            _Option(
+                ["-w", "working_dir"],
+                "Name of the working directory where RAxML will "
+                "write its output files. Default: current directory.",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-x", "rapid_bootstrap_seed"],
+                "Random seed for rapid bootstrapping.",
+                equate=False,
+            ),
+            _Switch(
+                ["-y", "parsimony"],
+                "Only compute a parsimony starting tree, then exit.",
+            ),
+            _Option(
+                ["-z", "bipartition_filename"],
+                "Name of a file containing multiple trees, e.g. from "
+                "a bootstrap run, that shall be used to draw "
+                "bipartition values onto a tree provided with '-t'. "
+                "It can also be used to compute per-site log "
+                "likelihoods in combination with '-f g', and to read "
+                "a bunch of trees for a couple of other options "
+                "('-f h', '-f m', '-f n').",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["-N", "-#", "num_replicates"],
+                "Number of alternative runs on distinct starting trees. "
+                "In combination with the '-b' option, this will invoke a "
+                "multiple bootstrap analysis. "
+                "DEFAULT: 1 single analysis."
+                "Note that '-N' has been added as an alternative since "
+                "'-#' sometimes caused problems with certain MPI job "
+                "submission systems, since '-#' is often used to start "
+                "comments. ",
+                equate=False,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+        # ENH: enforce -s, -n and -m
+        if not self.parsimony_seed:
+            self.parsimony_seed = 10000
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Phylo/Applications/__init__.py b/code/lib/Bio/Phylo/Applications/__init__.py
new file mode 100644
index 0000000..8db9325
--- /dev/null
+++ b/code/lib/Bio/Phylo/Applications/__init__.py
@@ -0,0 +1,19 @@
+# Copyright 2011 by Eric Talevich.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Phylogenetics command line tool wrappers (OBSOLETE).
+
+We have decided to remove this module in future, and instead recommend
+building your command and invoking it via the subprocess module directly.
+"""
+
+from ._Phyml import PhymlCommandline
+from ._Raxml import RaxmlCommandline
+from ._Fasttree import FastTreeCommandline
+
+# Make this explicit, then they show up in the API docs
+__all__ = ("PhymlCommandline", "RaxmlCommandline", "FastTreeCommandline")
diff --git a/code/lib/Bio/Phylo/Applications/__pycache__/_Fasttree.cpython-37.pyc b/code/lib/Bio/Phylo/Applications/__pycache__/_Fasttree.cpython-37.pyc
new file mode 100644
index 0000000..73c8a79
Binary files /dev/null and b/code/lib/Bio/Phylo/Applications/__pycache__/_Fasttree.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/Applications/__pycache__/_Phyml.cpython-37.pyc b/code/lib/Bio/Phylo/Applications/__pycache__/_Phyml.cpython-37.pyc
new file mode 100644
index 0000000..827b9c0
Binary files /dev/null and b/code/lib/Bio/Phylo/Applications/__pycache__/_Phyml.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/Applications/__pycache__/_Raxml.cpython-37.pyc b/code/lib/Bio/Phylo/Applications/__pycache__/_Raxml.cpython-37.pyc
new file mode 100644
index 0000000..fdfa0a8
Binary files /dev/null and b/code/lib/Bio/Phylo/Applications/__pycache__/_Raxml.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/Applications/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Phylo/Applications/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..46753c9
Binary files /dev/null and b/code/lib/Bio/Phylo/Applications/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/BaseTree.py b/code/lib/Bio/Phylo/BaseTree.py
new file mode 100644
index 0000000..28ad84c
--- /dev/null
+++ b/code/lib/Bio/Phylo/BaseTree.py
@@ -0,0 +1,1250 @@
+# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Base classes for Bio.Phylo objects.
+
+All object representations for phylogenetic trees should derive from these base
+classes in order to use the common methods defined on them.
+"""
+
+import collections
+import copy
+import itertools
+import random
+import re
+import warnings
+
+from Bio import BiopythonDeprecationWarning
+
+
+# General tree-traversal algorithms
+
+
+def _level_traverse(root, get_children):
+    """Traverse a tree in breadth-first (level) order (PRIVATE)."""
+    Q = collections.deque([root])
+    while Q:
+        v = Q.popleft()
+        yield v
+        Q.extend(get_children(v))
+
+
+def _preorder_traverse(root, get_children):
+    """Traverse a tree in depth-first pre-order (parent before children) (PRIVATE)."""
+
+    def dfs(elem):
+        yield elem
+        for v in get_children(elem):
+            yield from dfs(v)
+
+    yield from dfs(root)
+
+
+def _postorder_traverse(root, get_children):
+    """Traverse a tree in depth-first post-order (children before parent) (PRIVATE)."""
+
+    def dfs(elem):
+        for v in get_children(elem):
+            yield from dfs(v)
+        yield elem
+
+    yield from dfs(root)
+
+
+def _sorted_attrs(elem):
+    """Get a flat list of elem's attributes, sorted for consistency (PRIVATE)."""
+    singles = []
+    lists = []
+    # Sort attributes for consistent results
+    for attrname, child in sorted(elem.__dict__.items(), key=lambda kv: kv[0]):
+        if child is None:
+            continue
+        if isinstance(child, list):
+            lists.extend(child)
+        else:
+            singles.append(child)
+    return (x for x in singles + lists if isinstance(x, TreeElement))
+
+
+# Factory functions to generalize searching for clades/nodes
+
+
+def _identity_matcher(target):
+    """Match a node to the target object by identity (PRIVATE)."""
+
+    def match(node):
+        return node is target
+
+    return match
+
+
+def _class_matcher(target_cls):
+    """Match a node if it's an instance of the given class (PRIVATE)."""
+
+    def match(node):
+        return isinstance(node, target_cls)
+
+    return match
+
+
+def _string_matcher(target):
+    def match(node):
+        if isinstance(node, (Clade, Tree)):
+            # Avoid triggering specialized or recursive magic methods
+            return node.name == target
+        return str(node) == target
+
+    return match
+
+
+def _attribute_matcher(kwargs):
+    """Match a node by specified attribute values (PRIVATE).
+
+    ``terminal`` is a special case: True restricts the search to external (leaf)
+    nodes, False restricts to internal nodes, and None allows all tree elements
+    to be searched, including phyloXML annotations.
+
+    Otherwise, for a tree element to match the specification (i.e. for the
+    function produced by ``_attribute_matcher`` to return True when given a tree
+    element), it must have each of the attributes specified by the keys and
+    match each of the corresponding values -- think 'and', not 'or', for
+    multiple keys.
+    """
+
+    def match(node):
+        if "terminal" in kwargs:
+            # Special case: restrict to internal/external/any nodes
+            kwa_copy = kwargs.copy()
+            pattern = kwa_copy.pop("terminal")
+            if pattern is not None and (
+                not hasattr(node, "is_terminal") or node.is_terminal() != pattern
+            ):
+                return False
+        else:
+            kwa_copy = kwargs
+        for key, pattern in kwa_copy.items():
+            # Nodes must match all other specified attributes
+            if not hasattr(node, key):
+                return False
+            target = getattr(node, key)
+            if isinstance(pattern, str):
+                return isinstance(target, str) and re.match(pattern + "$", target)
+            if isinstance(pattern, bool):
+                return pattern == bool(target)
+            if isinstance(pattern, int):
+                return pattern == target
+            if pattern is None:
+                return target is None
+            raise TypeError("invalid query type: %s" % type(pattern))
+        return True
+
+    return match
+
+
+def _function_matcher(matcher_func):
+    """Safer attribute lookup -- returns False instead of raising an error (PRIVATE)."""
+
+    def match(node):
+        try:
+            return matcher_func(node)
+        except (LookupError, AttributeError, ValueError, TypeError):
+            return False
+
+    return match
+
+
+def _object_matcher(obj):
+    """Retrieve a matcher function by passing an arbitrary object (PRIVATE).
+
+    Passing a ``TreeElement`` such as a ``Clade`` or ``Tree`` instance returns
+    an identity matcher, passing a type such as the ``PhyloXML.Taxonomy`` class
+    returns a class matcher, and passing a dictionary returns an attribute
+    matcher.
+
+    The resulting 'match' function returns True when given an object matching
+    the specification (identity, type or attribute values), otherwise False.
+    This is useful for writing functions that search the tree, and probably
+    shouldn't be used directly by the end user.
+    """
+    if isinstance(obj, TreeElement):
+        return _identity_matcher(obj)
+    if isinstance(obj, type):
+        return _class_matcher(obj)
+    if isinstance(obj, str):
+        return _string_matcher(obj)
+    if isinstance(obj, dict):
+        return _attribute_matcher(obj)
+    if callable(obj):
+        return _function_matcher(obj)
+    raise ValueError(
+        "%s (type %s) is not a valid type for comparison." % (obj, type(obj))
+    )
+
+
+def _combine_matchers(target, kwargs, require_spec):
+    """Merge target specifications with keyword arguments (PRIVATE).
+
+    Dispatch the components to the various matcher functions, then merge into a
+    single boolean function.
+    """
+    if not target:
+        if not kwargs:
+            if require_spec:
+                raise ValueError(
+                    "you must specify a target object or keyword arguments."
+                )
+            return lambda x: True
+        return _attribute_matcher(kwargs)
+    match_obj = _object_matcher(target)
+    if not kwargs:
+        return match_obj
+    match_kwargs = _attribute_matcher(kwargs)
+    return lambda x: match_obj(x) and match_kwargs(x)
+
+
+def _combine_args(first, *rest):
+    """Convert ``[targets]`` or ``*targets`` arguments to a single iterable (PRIVATE).
+
+    This helps other functions work like the built-in functions ``max`` and
+    ``min``.
+    """
+    # Background: is_monophyletic takes a single list or iterable (like the
+    # same method in Bio.Nexus.Trees); root_with_outgroup and common_ancestor
+    # take separate arguments. This mismatch was in the initial release and I
+    # didn't notice the inconsistency until after Biopython 1.55. I can think
+    # of cases where either style is more convenient, so let's support both
+    # (for backward compatibility and consistency between methods).
+    if hasattr(first, "__iter__") and not (
+        isinstance(first, TreeElement)
+        or isinstance(first, type)
+        or isinstance(first, str)
+        or isinstance(first, dict)
+    ):
+        # terminals is an iterable of targets
+        if rest:
+            raise ValueError(
+                "Arguments must be either a single list of "
+                "targets, or separately specified targets "
+                "(e.g. foo(t1, t2, t3)), but not both."
+            )
+        return first
+    # terminals is a single target -- wrap in a container
+    return itertools.chain([first], rest)
+
+
+# Class definitions
+
+
+class TreeElement:
+    """Base class for all Bio.Phylo classes."""
+
+    def __repr__(self):
+        """Show this object's constructor with its primitive arguments."""
+
+        def pair_as_kwarg_string(key, val):
+            if isinstance(val, str):
+                val = val[:57] + "..." if len(val) > 60 else val
+                return "%s='%s'" % (key, val)
+            return "%s=%s" % (key, val)
+
+        return "%s(%s)" % (
+            self.__class__.__name__,
+            ", ".join(
+                pair_as_kwarg_string(key, val)
+                for key, val in sorted(self.__dict__.items())
+                if val is not None and type(val) in (str, int, float, bool, str)
+            ),
+        )
+
+    __str__ = __repr__
+
+
+class TreeMixin:
+    """Methods for Tree- and Clade-based classes.
+
+    This lets ``Tree`` and ``Clade`` support the same traversal and searching
+    operations without requiring Clade to inherit from Tree, so Clade isn't
+    required to have all of Tree's attributes -- just ``root`` (a Clade
+    instance) and ``is_terminal``.
+    """
+
+    # Traversal methods
+
+    def _filter_search(self, filter_func, order, follow_attrs):
+        """Perform a BFS or DFS traversal through all elements in this tree (PRIVATE).
+
+        :returns: generator of all elements for which ``filter_func`` is True.
+
+        """
+        order_opts = {
+            "preorder": _preorder_traverse,
+            "postorder": _postorder_traverse,
+            "level": _level_traverse,
+        }
+        try:
+            order_func = order_opts[order]
+        except KeyError:
+            raise ValueError(
+                "Invalid order '%s'; must be one of: %s" % (order, tuple(order_opts))
+            ) from None
+
+        if follow_attrs:
+            get_children = _sorted_attrs
+            root = self
+        else:
+            get_children = lambda elem: elem.clades  # noqa: E731
+            root = self.root
+        return filter(filter_func, order_func(root, get_children))
+
+    def find_any(self, *args, **kwargs):
+        """Return the first element found by find_elements(), or None.
+
+        This is also useful for checking whether any matching element exists in
+        the tree, and can be used in a conditional expression.
+        """
+        hits = self.find_elements(*args, **kwargs)
+        try:
+            return next(hits)
+        except StopIteration:
+            return None
+
+    def find_elements(self, target=None, terminal=None, order="preorder", **kwargs):
+        """Find all tree elements matching the given attributes.
+
+        The arbitrary keyword arguments indicate the attribute name of the
+        sub-element and the value to match: string, integer or boolean. Strings
+        are evaluated as regular expression matches; integers are compared
+        directly for equality, and booleans evaluate the attribute's truth value
+        (True or False) before comparing. To handle nonzero floats, search with
+        a boolean argument, then filter the result manually.
+
+        If no keyword arguments are given, then just the class type is used for
+        matching.
+
+        The result is an iterable through all matching objects, by depth-first
+        search. (Not necessarily the same order as the elements appear in the
+        source file!)
+
+        :Parameters:
+            target : TreeElement instance, type, dict, or callable
+                Specifies the characteristics to search for. (The default,
+                TreeElement, matches any standard Bio.Phylo type.)
+            terminal : bool
+                A boolean value to select for or against terminal nodes (a.k.a.
+                leaf nodes). True searches for only terminal nodes, False
+                excludes terminal nodes, and the default, None, searches both
+                terminal and non-terminal nodes, as well as any tree elements
+                lacking the ``is_terminal`` method.
+            order : {'preorder', 'postorder', 'level'}
+                Tree traversal order: 'preorder' (default) is depth-first
+                search, 'postorder' is DFS with child nodes preceding parents,
+                and 'level' is breadth-first search.
+
+        Examples
+        --------
+        >>> from Bio import Phylo
+        >>> phx = Phylo.PhyloXMLIO.read('PhyloXML/phyloxml_examples.xml')
+        >>> matches = phx.phylogenies[5].find_elements(code='OCTVU')
+        >>> next(matches)
+        Taxonomy(code='OCTVU', scientific_name='Octopus vulgaris')
+
+        """
+        if terminal is not None:
+            kwargs["terminal"] = terminal
+        is_matching_elem = _combine_matchers(target, kwargs, False)
+        return self._filter_search(is_matching_elem, order, True)
+
+    def find_clades(self, target=None, terminal=None, order="preorder", **kwargs):
+        """Find each clade containing a matching element.
+
+        That is, find each element as with find_elements(), but return the
+        corresponding clade object. (This is usually what you want.)
+
+        :returns: an iterable through all matching objects, searching
+            depth-first (preorder) by default.
+
+        """
+
+        def match_attrs(elem):
+            orig_clades = elem.__dict__.pop("clades")
+            found = elem.find_any(target, **kwargs)
+            elem.clades = orig_clades
+            return found is not None
+
+        if terminal is None:
+            is_matching_elem = match_attrs
+        else:
+
+            def is_matching_elem(elem):
+                return (elem.is_terminal() == terminal) and match_attrs(elem)
+
+        return self._filter_search(is_matching_elem, order, False)
+
+    def get_path(self, target=None, **kwargs):
+        """List the clades directly between this root and the given target.
+
+        :returns: list of all clade objects along this path, ending with the
+            given target, but excluding the root clade.
+
+        """
+        # Only one path will work -- ignore weights and visits
+        path = []
+        match = _combine_matchers(target, kwargs, True)
+
+        def check_in_path(v):
+            if match(v):
+                path.append(v)
+                return True
+            elif v.is_terminal():
+                return False
+            for child in v:
+                if check_in_path(child):
+                    path.append(v)
+                    return True
+            return False
+
+        if not check_in_path(self.root):
+            return None
+        return path[-2::-1]
+
+    def get_nonterminals(self, order="preorder"):
+        """Get a list of all of this tree's nonterminal (internal) nodes."""
+        return list(self.find_clades(terminal=False, order=order))
+
+    def get_terminals(self, order="preorder"):
+        """Get a list of all of this tree's terminal (leaf) nodes."""
+        return list(self.find_clades(terminal=True, order=order))
+
+    def trace(self, start, finish):
+        """List of all clade object between two targets in this tree.
+
+        Excluding ``start``, including ``finish``.
+        """
+        mrca = self.common_ancestor(start, finish)
+        fromstart = mrca.get_path(start)[-2::-1]
+        to = mrca.get_path(finish)
+        return fromstart + [mrca] + to
+
+    # Information methods
+
+    def common_ancestor(self, targets, *more_targets):
+        """Most recent common ancestor (clade) of all the given targets.
+
+        Edge cases:
+         - If no target is given, returns self.root
+         - If 1 target is given, returns the target
+         - If any target is not found in this tree, raises a ValueError
+
+        """
+        paths = [self.get_path(t) for t in _combine_args(targets, *more_targets)]
+        # Validation -- otherwise izip throws a spooky error below
+        for p, t in zip(paths, targets):
+            if p is None:
+                raise ValueError("target %r is not in this tree" % t)
+        mrca = self.root
+        for level in zip(*paths):
+            ref = level[0]
+            for other in level[1:]:
+                if ref is not other:
+                    break
+            else:
+                mrca = ref
+            if ref is not mrca:
+                break
+        return mrca
+
+    def count_terminals(self):
+        """Count the number of terminal (leaf) nodes within this tree."""
+        return sum(1 for clade in self.find_clades(terminal=True))
+
+    def depths(self, unit_branch_lengths=False):  # noqa: D402
+        """Create a mapping of tree clades to depths (by branch length).
+
+        :Parameters:
+            unit_branch_lengths : bool
+                If True, count only the number of branches (levels in the tree).
+                By default the distance is the cumulative branch length leading
+                to the clade.
+
+        :returns: dict of {clade: depth}, where keys are all of the Clade
+            instances in the tree, and values are the distance from the root to
+            each clade (including terminals).
+
+        """  # noqa: D402
+        if unit_branch_lengths:
+            depth_of = lambda c: 1  # noqa: E731
+        else:
+            depth_of = lambda c: c.branch_length or 0  # noqa: E731
+        depths = {}
+
+        def update_depths(node, curr_depth):
+            depths[node] = curr_depth
+            for child in node.clades:
+                new_depth = curr_depth + depth_of(child)
+                update_depths(child, new_depth)
+
+        update_depths(self.root, self.root.branch_length or 0)
+        return depths
+
+    def distance(self, target1, target2=None):
+        """Calculate the sum of the branch lengths between two targets.
+
+        If only one target is specified, the other is the root of this tree.
+        """
+        if target2 is None:
+            return sum(
+                n.branch_length
+                for n in self.get_path(target1)
+                if n.branch_length is not None
+            )
+        mrca = self.common_ancestor(target1, target2)
+        return mrca.distance(target1) + mrca.distance(target2)
+
+    def is_bifurcating(self):
+        """Return True if tree downstream of node is strictly bifurcating.
+
+        I.e., all nodes have either 2 or 0 children (internal or external,
+        respectively). The root may have 3 descendents and still be considered
+        part of a bifurcating tree, because it has no ancestor.
+        """
+        # Root can be trifurcating
+        if isinstance(self, Tree) and len(self.root) == 3:
+            return (
+                self.root.clades[0].is_bifurcating()
+                and self.root.clades[1].is_bifurcating()
+                and self.root.clades[2].is_bifurcating()
+            )
+        if len(self.root) == 2:
+            return (
+                self.root.clades[0].is_bifurcating()
+                and self.root.clades[1].is_bifurcating()
+            )
+        if len(self.root) == 0:
+            return True
+        return False
+
+    def is_monophyletic(self, terminals, *more_terminals):
+        """MRCA of terminals if they comprise a complete subclade, or False.
+
+        I.e., there exists a clade such that its terminals are the same set as
+        the given targets.
+
+        The given targets must be terminals of the tree.
+
+        To match both ``Bio.Nexus.Trees`` and the other multi-target methods in
+        Bio.Phylo, arguments to this method can be specified either of two ways:
+        (i) as a single list of targets, or (ii) separately specified targets,
+        e.g. is_monophyletic(t1, t2, t3) -- but not both.
+
+        For convenience, this method returns the common ancestor (MCRA) of the
+        targets if they are monophyletic (instead of the value True), and False
+        otherwise.
+
+        :returns: common ancestor if terminals are monophyletic, otherwise False.
+
+        """
+        target_set = set(_combine_args(terminals, *more_terminals))
+        current = self.root
+        while True:
+            if set(current.get_terminals()) == target_set:
+                return current
+            # Try a narrower subclade
+            for subclade in current.clades:
+                if set(subclade.get_terminals()).issuperset(target_set):
+                    current = subclade
+                    break
+            else:
+                return False
+
+    def is_parent_of(self, target=None, **kwargs):
+        """Check if target is a descendent of this tree.
+
+        Not required to be a direct descendent.
+
+        To check only direct descendents of a clade, simply use list membership
+        testing: ``if subclade in clade: ...``
+        """
+        return self.get_path(target, **kwargs) is not None
+
+    def is_preterminal(self):
+        """Check if all direct descendents are terminal."""
+        if self.root.is_terminal():
+            return False
+        for clade in self.root.clades:
+            if not clade.is_terminal():
+                return False
+        return True
+
+    def total_branch_length(self):
+        """Calculate the sum of all the branch lengths in this tree."""
+        return sum(node.branch_length for node in self.find_clades(branch_length=True))
+
+    # Tree manipulation methods
+
+    def collapse(self, target=None, **kwargs):
+        """Delete target from the tree, relinking its children to its parent.
+
+        :returns: the parent clade.
+
+        """
+        path = self.get_path(target, **kwargs)
+        if not path:
+            raise ValueError("couldn't collapse %s in this tree" % (target or kwargs))
+        if len(path) == 1:
+            parent = self.root
+        else:
+            parent = path[-2]
+        popped = parent.clades.pop(parent.clades.index(path[-1]))
+        extra_length = popped.branch_length or 0
+        for child in popped:
+            child.branch_length += extra_length
+        parent.clades.extend(popped.clades)
+        return parent
+
+    def collapse_all(self, target=None, **kwargs):
+        """Collapse all the descendents of this tree, leaving only terminals.
+
+        Total branch lengths are preserved, i.e. the distance to each terminal
+        stays the same.
+
+        For example, this will safely collapse nodes with poor bootstrap
+        support:
+
+            >>> from Bio import Phylo
+            >>> tree = Phylo.read('PhyloXML/apaf.xml', 'phyloxml')
+            >>> print("Total branch length %0.2f" % tree.total_branch_length())
+            Total branch length 20.44
+            >>> tree.collapse_all(lambda c: c.confidence is not None and c.confidence < 70)
+            >>> print("Total branch length %0.2f" % tree.total_branch_length())
+            Total branch length 21.37
+
+        This implementation avoids strange side-effects by using level-order
+        traversal and testing all clade properties (versus the target
+        specification) up front. In particular, if a clade meets the target
+        specification in the original tree, it will be collapsed.  For example,
+        if the condition is:
+
+            >>> from Bio import Phylo
+            >>> tree = Phylo.read('PhyloXML/apaf.xml', 'phyloxml')
+            >>> print("Total branch length %0.2f" % tree.total_branch_length())
+            Total branch length 20.44
+            >>> tree.collapse_all(lambda c: c.branch_length < 0.1)
+            >>> print("Total branch length %0.2f" % tree.total_branch_length())
+            Total branch length 21.13
+
+        Collapsing a clade's parent node adds the parent's branch length to the
+        child, so during the execution of collapse_all, a clade's branch_length
+        may increase. In this implementation, clades are collapsed according to
+        their properties in the original tree, not the properties when tree
+        traversal reaches the clade. (It's easier to debug.) If you want the
+        other behavior (incremental testing), modifying the source code of this
+        function is straightforward.
+        """
+        # Read the iterable into a list to protect against in-place changes
+        matches = list(self.find_clades(target, False, "level", **kwargs))
+        if not matches:
+            # No matching nodes to collapse
+            return
+        # Skip the root node -- it can't be collapsed
+        if matches[0] == self.root:
+            matches.pop(0)
+        for clade in matches:
+            self.collapse(clade)
+
+    def ladderize(self, reverse=False):
+        """Sort clades in-place according to the number of terminal nodes.
+
+        Deepest clades are last by default. Use ``reverse=True`` to sort clades
+        deepest-to-shallowest.
+        """
+        self.root.clades.sort(key=lambda c: c.count_terminals(), reverse=reverse)
+        for subclade in self.root.clades:
+            subclade.ladderize(reverse=reverse)
+
+    def prune(self, target=None, **kwargs):
+        """Prunes a terminal clade from the tree.
+
+        If taxon is from a bifurcation, the connecting node will be collapsed
+        and its branch length added to remaining terminal node. This might be no
+        longer be a meaningful value.
+
+        :returns: parent clade of the pruned target
+
+        """
+        if "terminal" in kwargs and kwargs["terminal"]:
+            raise ValueError("target must be terminal")
+        path = self.get_path(target, terminal=True, **kwargs)
+        if not path:
+            raise ValueError("can't find a matching target below this root")
+        if len(path) == 1:
+            parent = self.root
+        else:
+            parent = path[-2]
+        parent.clades.remove(path[-1])
+        if len(parent) == 1:
+            # We deleted a branch from a bifurcation
+            if parent == self.root:
+                # If we're at the root, move the root upwards
+                # NB: This loses the length of the original branch
+                newroot = parent.clades[0]
+                newroot.branch_length = None
+                parent = self.root = newroot
+            else:
+                # If we're not at the root, collapse this parent
+                child = parent.clades[0]
+                if child.branch_length is not None:
+                    child.branch_length += parent.branch_length or 0.0
+                if len(path) < 3:
+                    grandparent = self.root
+                else:
+                    grandparent = path[-3]
+                # Replace parent with child at the same place in grandparent
+                index = grandparent.clades.index(parent)
+                grandparent.clades.pop(index)
+                grandparent.clades.insert(index, child)
+                parent = grandparent
+        return parent
+
+    def split(self, n=2, branch_length=1.0):
+        """Generate n (default 2) new descendants.
+
+        In a species tree, this is a speciation event.
+
+        New clades have the given branch_length and the same name as this
+        clade's root plus an integer suffix (counting from 0). For example,
+        splitting a clade named "A" produces sub-clades named "A0" and "A1".
+        If the clade has no name, the prefix "n" is used for child nodes, e.g.
+        "n0" and "n1".
+        """
+        clade_cls = type(self.root)
+        base_name = self.root.name or "n"
+        for i in range(n):
+            clade = clade_cls(name=base_name + str(i), branch_length=branch_length)
+            self.root.clades.append(clade)
+
+
+class Tree(TreeElement, TreeMixin):
+    """A phylogenetic tree, containing global info for the phylogeny.
+
+    The structure and node-specific data is accessible through the 'root'
+    clade attached to the Tree instance.
+
+    :Parameters:
+        root : Clade
+            The starting node of the tree. If the tree is rooted, this will
+            usually be the root node.
+        rooted : bool
+            Whether or not the tree is rooted. By default, a tree is assumed to
+            be rooted.
+        id : str
+            The identifier of the tree, if there is one.
+        name : str
+            The name of the tree, in essence a label.
+
+    """
+
+    def __init__(self, root=None, rooted=True, id=None, name=None):
+        """Initialize parameter for phylogenetic tree."""
+        self.root = root or Clade()
+        self.rooted = rooted
+        self.id = id
+        self.name = name
+
+    @classmethod
+    def from_clade(cls, clade, **kwargs):
+        """Create a new Tree object given a clade.
+
+        Keyword arguments are the usual ``Tree`` constructor parameters.
+        """
+        root = copy.deepcopy(clade)
+        return cls(root, **kwargs)
+
+    @classmethod
+    def randomized(cls, taxa, branch_length=1.0, branch_stdev=None):
+        """Create a randomized bifurcating tree given a list of taxa.
+
+        :param taxa: Either an integer specifying the number of taxa to create
+            (automatically named taxon#), or an iterable of taxon names, as
+            strings.
+
+        :returns: a tree of the same type as this class.
+
+        """
+        if isinstance(taxa, int):
+            taxa = ["taxon%s" % (i + 1) for i in range(taxa)]
+        elif hasattr(taxa, "__iter__"):
+            taxa = list(taxa)
+        else:
+            raise TypeError(
+                "taxa argument must be integer (# taxa) or iterable of taxon names."
+            )
+        rtree = cls()
+        terminals = [rtree.root]
+        while len(terminals) < len(taxa):
+            newsplit = random.choice(terminals)
+            newsplit.split(branch_length=branch_length)
+            newterms = newsplit.clades
+            if branch_stdev:
+                # Add some noise to the branch lengths
+                for nt in newterms:
+                    nt.branch_length = max(0, random.gauss(branch_length, branch_stdev))
+            terminals.remove(newsplit)
+            terminals.extend(newterms)
+        # Distribute taxon labels randomly
+        random.shuffle(taxa)
+        for node, name in zip(terminals, taxa):
+            node.name = name
+        return rtree
+
+    @property
+    def clade(self):
+        """Return first clade in this tree (not itself)."""
+        return self.root
+
+    def as_phyloxml(self, **kwargs):
+        """Convert this tree to a PhyloXML-compatible Phylogeny.
+
+        This lets you use the additional annotation types PhyloXML defines, and
+        save this information when you write this tree as 'phyloxml'.
+        """
+        from Bio.Phylo.PhyloXML import Phylogeny
+
+        return Phylogeny.from_tree(self, **kwargs)
+
+    def root_with_outgroup(
+        self, outgroup_targets, *more_targets, outgroup_branch_length=None
+    ):
+        """Reroot this tree with the outgroup clade containing outgroup_targets.
+
+        Operates in-place.
+
+        Edge cases:
+         - If ``outgroup == self.root``, no change
+         - If outgroup is terminal, create new bifurcating root node with a
+           0-length branch to the outgroup
+         - If outgroup is internal, use the given outgroup node as the new
+           trifurcating root, keeping branches the same
+         - If the original root was bifurcating, drop it from the tree,
+           preserving total branch lengths
+
+        :param outgroup_branch_length: length of the branch leading to the
+            outgroup after rerooting. If not specified (None), then:
+
+            - If the outgroup is an internal node (not a single terminal taxon),
+              then use that node as the new root.
+            - Otherwise, create a new root node as the parent of the outgroup.
+
+        """
+        # This raises a ValueError if any target is not in this tree
+        # Otherwise, common_ancestor guarantees outgroup is in this tree
+        outgroup = self.common_ancestor(outgroup_targets, *more_targets)
+        outgroup_path = self.get_path(outgroup)
+        if len(outgroup_path) == 0:
+            # Outgroup is the current root -- no change
+            return
+
+        prev_blen = outgroup.branch_length or 0.0
+
+        if outgroup.is_terminal() or outgroup_branch_length is not None:
+            # Create a new root with a 0-length branch to the outgroup
+            outgroup.branch_length = outgroup_branch_length or 0.0
+            new_root = self.root.__class__(
+                branch_length=self.root.branch_length, clades=[outgroup]
+            )
+            # The first branch reversal (see the upcoming loop) is modified
+            if len(outgroup_path) == 1:
+                # No nodes between the original root and outgroup to rearrange.
+                # Most of the code below will be skipped, but we still need
+                # 'new_parent' pointing at the new root.
+                new_parent = new_root
+            else:
+                parent = outgroup_path.pop(-2)
+                # First iteration of reversing the path to the outgroup
+                parent.clades.pop(parent.clades.index(outgroup))
+                (prev_blen, parent.branch_length) = (
+                    parent.branch_length,
+                    prev_blen - outgroup.branch_length,
+                )
+                new_root.clades.insert(0, parent)
+                new_parent = parent
+        else:
+            # Use the given outgroup node as the new (trifurcating) root
+            new_root = outgroup
+            new_root.branch_length = self.root.branch_length
+            new_parent = new_root
+
+        # Tracing the outgroup lineage backwards, reattach the subclades under a
+        # new root clade. Reverse the branches directly above the outgroup in
+        # the tree, but keep the descendants of those clades as they are.
+        for parent in outgroup_path[-2::-1]:
+            parent.clades.pop(parent.clades.index(new_parent))
+            prev_blen, parent.branch_length = parent.branch_length, prev_blen
+            new_parent.clades.insert(0, parent)
+            new_parent = parent
+
+        # Finally, handle the original root according to number of descendents
+        old_root = self.root
+        if outgroup in old_root.clades:
+            assert len(outgroup_path) == 1
+            old_root.clades.pop(old_root.clades.index(outgroup))
+        else:
+            old_root.clades.pop(old_root.clades.index(new_parent))
+        if len(old_root) == 1:
+            # Delete the old bifurcating root & add branch lengths
+            ingroup = old_root.clades[0]
+            if ingroup.branch_length:
+                ingroup.branch_length += prev_blen
+            else:
+                ingroup.branch_length = prev_blen
+            new_parent.clades.insert(0, ingroup)
+            # ENH: If annotations are attached to old_root, do... something.
+        else:
+            # Keep the old trifurcating/polytomous root as an internal node
+            old_root.branch_length = prev_blen
+            new_parent.clades.insert(0, old_root)
+
+        self.root = new_root
+        self.rooted = True
+
+    def root_at_midpoint(self):
+        """Root the tree at the midpoint of the two most distant taxa.
+
+        This operates in-place, leaving a bifurcating root. The topology of the
+        tree is otherwise retained, though no guarantees are made about the
+        stability of clade/node/taxon ordering.
+        """
+        # Identify the largest pairwise distance
+        max_distance = 0.0
+        tips = self.get_terminals()
+        for tip in tips:
+            self.root_with_outgroup(tip)
+            new_max = max(self.depths().items(), key=lambda nd: nd[1])
+            if new_max[1] > max_distance:
+                tip1 = tip
+                tip2 = new_max[0]
+                max_distance = new_max[1]
+        self.root_with_outgroup(tip1)
+        # Depth to go from the ingroup tip toward the outgroup tip
+        root_remainder = 0.5 * (max_distance - (self.root.branch_length or 0))
+        assert root_remainder >= 0
+        # Identify the midpoint and reroot there.
+        # Trace the path to the outgroup tip until all of the root depth has
+        # been traveled/accounted for.
+        for node in self.get_path(tip2):
+            root_remainder -= node.branch_length
+            if root_remainder < 0:
+                outgroup_node = node
+                outgroup_branch_length = -root_remainder
+                break
+        else:
+            raise ValueError("Somehow, failed to find the midpoint!")
+        self.root_with_outgroup(
+            outgroup_node, outgroup_branch_length=outgroup_branch_length
+        )
+
+    # Method assumed by TreeMixin
+
+    def is_terminal(self):
+        """Check if the root of this tree is terminal."""
+        return not self.root.clades
+
+    # Convention from SeqRecord and Alignment classes
+
+    def __format__(self, format_spec):
+        """Serialize the tree as a string in the specified file format.
+
+        This method supports Python's ``format`` built-in function.
+
+        :param format_spec: a lower-case string supported by ``Bio.Phylo.write``
+            as an output file format.
+
+        """
+        if format_spec:
+            from io import StringIO
+            from Bio.Phylo import _io
+
+            handle = StringIO()
+            _io.write([self], handle, format_spec)
+            return handle.getvalue()
+        else:
+            # Follow python convention and default to using __str__
+            return str(self)
+
+    def format(self, fmt=None, format=None):
+        """Serialize the tree as a string in the specified file format.
+
+        :param fmt: a lower-case string supported by ``Bio.Phylo.write``
+            as an output file format.
+
+        """
+        if format is not None:
+            if fmt is not None:
+                raise ValueError("The ``format`` argument has been renamed to ``fmt``.")
+            warnings.warn(
+                "The ``format`` argument has been renamed to ``fmt``.",
+                BiopythonDeprecationWarning,
+            )
+            fmt = format
+        return self.__format__(fmt)
+
+    # Pretty-printer for the entire tree hierarchy
+
+    def __str__(self):
+        """Return a string representation of the entire tree.
+
+        Serialize each sub-clade recursively using ``repr`` to create a summary
+        of the object structure.
+        """
+        TAB = "    "
+        textlines = []
+
+        def print_tree(obj, indent):
+            """Recursively serialize sub-elements.
+
+            This closes over textlines and modifies it in-place.
+            """
+            if isinstance(obj, (Tree, Clade)):
+                # Avoid infinite recursion or special formatting from str()
+                objstr = repr(obj)
+            else:
+                objstr = str(obj)
+            textlines.append(TAB * indent + objstr)
+            indent += 1
+            for attr in obj.__dict__:
+                child = getattr(obj, attr)
+                if isinstance(child, TreeElement):
+                    print_tree(child, indent)
+                elif isinstance(child, list):
+                    for elem in child:
+                        if isinstance(elem, TreeElement):
+                            print_tree(elem, indent)
+
+        print_tree(self, 0)
+        return "\n".join(textlines)
+
+
+class Clade(TreeElement, TreeMixin):
+    """A recursively defined sub-tree.
+
+    :Parameters:
+        branch_length : str
+            The length of the branch leading to the root node of this clade.
+        name : str
+            The clade's name (a label).
+        clades : list
+            Sub-trees rooted directly under this tree's root.
+        confidence : number
+            Support.
+        color : BranchColor
+            The display color of the branch and descendents.
+        width : number
+            The display width of the branch and descendents.
+
+    """
+
+    def __init__(
+        self,
+        branch_length=None,
+        name=None,
+        clades=None,
+        confidence=None,
+        color=None,
+        width=None,
+    ):
+        """Define parameters for the Clade tree."""
+        self.branch_length = branch_length
+        self.name = name
+        self.clades = clades or []
+        self.confidence = confidence
+        self.color = color
+        self.width = width
+
+    @property
+    def root(self):
+        """Allow TreeMixin methods to traverse clades properly."""
+        return self
+
+    def is_terminal(self):
+        """Check if this is a terminal (leaf) node."""
+        return not self.clades
+
+    # Sequence-type behavior methods
+
+    def __getitem__(self, index):
+        """Get clades by index (integer or slice)."""
+        if isinstance(index, (int, slice)):
+            return self.clades[index]
+        ref = self
+        for idx in index:
+            ref = ref[idx]
+        return ref
+
+    def __iter__(self):
+        """Iterate through this tree's direct descendent clades (sub-trees)."""
+        return iter(self.clades)
+
+    def __len__(self):
+        """Return the number of clades directy under the root."""
+        return len(self.clades)
+
+    def __bool__(self):
+        """Boolean value of an instance of this class (True).
+
+        NB: If this method is not defined, but ``__len__``  is, then the object
+        is considered true if the result of ``__len__()`` is nonzero. We want
+        Clade instances to always be considered True.
+        """
+        return True
+
+    def __str__(self):
+        """Return name of the class instance."""
+        if self.name:
+            return self.name[:37] + "..." if len(self.name) > 40 else self.name
+        return self.__class__.__name__
+
+    # Syntax sugar for setting the branch color
+    def _get_color(self):
+        return self._color
+
+    def _set_color(self, arg):
+        if arg is None or isinstance(arg, BranchColor):
+            self._color = arg
+        elif isinstance(arg, str):
+            if arg in BranchColor.color_names:
+                # Known color name
+                self._color = BranchColor.from_name(arg)
+            elif arg.startswith("#") and len(arg) == 7:
+                # HTML-style hex string
+                self._color = BranchColor.from_hex(arg)
+            else:
+                raise ValueError("invalid color string %s" % arg)
+        elif hasattr(arg, "__iter__") and len(arg) == 3:
+            # RGB triplet
+            self._color = BranchColor(*arg)
+        else:
+            raise ValueError("invalid color value %s" % arg)
+
+    color = property(_get_color, _set_color, doc="Branch color.")
+
+
+class BranchColor:
+    """Indicates the color of a clade when rendered graphically.
+
+    The color should be interpreted by client code (e.g. visualization
+    programs) as applying to the whole clade, unless overwritten by the
+    color(s) of sub-clades.
+
+    Color values must be integers from 0 to 255.
+    """
+
+    color_names = {
+        "red": (255, 0, 0),
+        "r": (255, 0, 0),
+        "yellow": (255, 255, 0),
+        "y": (255, 255, 0),
+        "green": (0, 128, 0),
+        "g": (0, 128, 0),
+        "cyan": (0, 255, 255),
+        "c": (0, 255, 255),
+        "blue": (0, 0, 255),
+        "b": (0, 0, 255),
+        "magenta": (255, 0, 255),
+        "m": (255, 0, 255),
+        "black": (0, 0, 0),
+        "k": (0, 0, 0),
+        "white": (255, 255, 255),
+        "w": (255, 255, 255),
+        # Names standardized in HTML/CSS spec
+        # http://w3schools.com/html/html_colornames.asp
+        "maroon": (128, 0, 0),
+        "olive": (128, 128, 0),
+        "lime": (0, 255, 0),
+        "aqua": (0, 255, 255),
+        "teal": (0, 128, 128),
+        "navy": (0, 0, 128),
+        "fuchsia": (255, 0, 255),
+        "purple": (128, 0, 128),
+        "silver": (192, 192, 192),
+        "gray": (128, 128, 128),
+        # More definitions from matplotlib/gcolor2
+        "grey": (128, 128, 128),
+        "pink": (255, 192, 203),
+        "salmon": (250, 128, 114),
+        "orange": (255, 165, 0),
+        "gold": (255, 215, 0),
+        "tan": (210, 180, 140),
+        "brown": (165, 42, 42),
+    }
+
+    def __init__(self, red, green, blue):
+        """Initialize BranchColor for a tree."""
+        for color in (red, green, blue):
+            assert (
+                isinstance(color, int) and 0 <= color <= 255
+            ), "Color values must be integers between 0 and 255."
+        self.red = red
+        self.green = green
+        self.blue = blue
+
+    @classmethod
+    def from_hex(cls, hexstr):
+        """Construct a BranchColor object from a hexadecimal string.
+
+        The string format is the same style used in HTML and CSS, such as
+        '#FF8000' for an RGB value of (255, 128, 0).
+        """
+        assert (
+            isinstance(hexstr, str) and hexstr.startswith("#") and len(hexstr) == 7
+        ), "need a 24-bit hexadecimal string, e.g. #000000"
+
+        RGB = hexstr[1:3], hexstr[3:5], hexstr[5:]
+        return cls(*[int("0x" + cc, base=16) for cc in RGB])
+
+    @classmethod
+    def from_name(cls, colorname):
+        """Construct a BranchColor object by the color's name."""
+        return cls(*cls.color_names[colorname])
+
+    def to_hex(self):
+        """Return a 24-bit hexadecimal RGB representation of this color.
+
+        The returned string is suitable for use in HTML/CSS, as a color
+        parameter in matplotlib, and perhaps other situations.
+
+        Examples
+        --------
+        >>> bc = BranchColor(12, 200, 100)
+        >>> bc.to_hex()
+        '#0cc864'
+
+        """
+        return "#%02x%02x%02x" % (self.red, self.green, self.blue)
+
+    def to_rgb(self):
+        """Return a tuple of RGB values (0 to 255) representing this color.
+
+        Examples
+        --------
+        >>> bc = BranchColor(255, 165, 0)
+        >>> bc.to_rgb()
+        (255, 165, 0)
+
+        """
+        return (self.red, self.green, self.blue)
+
+    def __repr__(self):
+        """Preserve the standard RGB order when representing this object."""
+        return "%s(red=%d, green=%d, blue=%d)" % (
+            self.__class__.__name__,
+            self.red,
+            self.green,
+            self.blue,
+        )
+
+    def __str__(self):
+        """Show the color's RGB values."""
+        return "(%d, %d, %d)" % (self.red, self.green, self.blue)
diff --git a/code/lib/Bio/Phylo/CDAO.py b/code/lib/Bio/Phylo/CDAO.py
new file mode 100644
index 0000000..39eac58
--- /dev/null
+++ b/code/lib/Bio/Phylo/CDAO.py
@@ -0,0 +1,50 @@
+# Copyright (C) 2013 by Ben Morris (ben@bendmorris.com)
+# based on code by Eric Talevich (eric.talevich@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes corresponding to CDAO trees.
+
+See classes in ``Bio.Nexus``: Trees.Tree, Trees.NodeData, and Nodes.Chain.
+"""
+
+from Bio.Phylo import BaseTree
+
+
+class Tree(BaseTree.Tree):
+    """CDAO Tree object."""
+
+    def __init__(self, root=None, rooted=False, id=None, name=None, weight=1.0):
+        """Initialize value of for the CDAO tree object."""
+        BaseTree.Tree.__init__(
+            self, root=root or Clade(), rooted=rooted, id=id, name=name
+        )
+        self.weight = weight
+        # a list of (predicate, object) pairs, containing additional triples
+        # using this tree as subject
+        self.attributes = []
+
+
+class Clade(BaseTree.Clade):
+    """CDAO Clade (sub-tree) object."""
+
+    def __init__(
+        self, branch_length=1.0, name=None, clades=None, confidence=None, comment=None
+    ):
+        """Initialize values for the CDAO Clade object."""
+        BaseTree.Clade.__init__(
+            self,
+            branch_length=branch_length,
+            name=name,
+            clades=clades,
+            confidence=confidence,
+        )
+        self.comment = comment
+        # a list of (predicate, object) pairs, containing additional triples
+        # using this clade as subject
+        self.attributes = []
+        self.tu_attributes = []
+        self.edge_attributes = []
diff --git a/code/lib/Bio/Phylo/CDAOIO.py b/code/lib/Bio/Phylo/CDAOIO.py
new file mode 100644
index 0000000..6aac5fa
--- /dev/null
+++ b/code/lib/Bio/Phylo/CDAOIO.py
@@ -0,0 +1,483 @@
+# Copyright (C) 2013 by Ben Morris (ben@bendmorris.com)
+# Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox
+# and Bio.Phylo.Newick, copyright 2009 by Eric Talevich.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""I/O function wrappers for the RDF/CDAO file format.
+
+This is an RDF format that conforms to the Comparative Data Analysis Ontology (CDAO).
+See: http://evolutionaryontology.org/cdao
+
+This module requires the librdf Python bindings (http://www.librdf.org)
+
+The CDAOIO.Parser, in addition to parsing text files, can also parse directly
+from a triple store that implements the Redland storage interface; similarly,
+the CDAOIO.Writer can store triples in a triple store instead of serializing
+them to a file.
+"""
+
+import os
+
+from io import StringIO
+
+from Bio import MissingPythonDependencyError
+
+from Bio.Phylo import CDAO
+from ._cdao_owl import cdao_namespaces, resolve_uri
+
+# import of cdao_elements from ._cdao_owl removed in Biopython 1.74
+
+
+try:
+    import rdflib
+
+    rdfver = rdflib.__version__
+    if rdfver[0] in ["1", "2"] or (rdfver in ["3.0.0", "3.1.0", "3.2.0"]):
+        raise MissingPythonDependencyError(
+            "Support for CDAO tree format requires RDFlib v3.2.1 or later."
+        )
+except ImportError:
+    raise MissingPythonDependencyError(
+        "Support for CDAO tree format requires RDFlib."
+    ) from None
+
+RDF_NAMESPACES = {
+    "owl": "http://www.w3.org/2002/07/owl#",
+    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+}
+RDF_NAMESPACES.update(cdao_namespaces)
+# pad node ids with zeroes until they're at least this length
+ZEROES = 8
+
+
+def qUri(x):
+    """Resolve URI for librdf."""
+    return resolve_uri(x, namespaces=RDF_NAMESPACES)
+
+
+def format_label(x):
+    """Format label for librdf."""
+    return x.replace("_", " ")
+
+
+# ---------------------------------------------------------
+# Public API
+
+
+def parse(handle, **kwargs):
+    """Iterate over the trees in a CDAO file handle.
+
+    :returns: generator of Bio.Phylo.CDAO.Tree objects.
+
+    """
+    return Parser(handle).parse(**kwargs)
+
+
+def write(trees, handle, plain=False, **kwargs):
+    """Write a trees in CDAO format to the given file handle.
+
+    :returns: number of trees written.
+
+    """
+    return Writer(trees).write(handle, plain=plain, **kwargs)
+
+
+# ---------------------------------------------------------
+# Input
+
+
+class Parser:
+    """Parse a CDAO tree given a file handle."""
+
+    def __init__(self, handle=None):
+        """Initialize CDAO tree parser."""
+        self.handle = handle
+        self.graph = None
+        self.node_info = None
+        self.children = {}
+        self.rooted = False
+
+    @classmethod
+    def from_string(cls, treetext):
+        """Instantiate the class from the given string."""
+        handle = StringIO(treetext)
+        return cls(handle)
+
+    def parse(self, **kwargs):
+        """Parse the text stream this object was initialized with."""
+        self.parse_handle_to_graph(**kwargs)
+        return self.parse_graph()
+
+    def parse_handle_to_graph(
+        self, rooted=False, parse_format="turtle", context=None, **kwargs
+    ):
+        """Parse self.handle into RDF model self.model."""
+        if self.graph is None:
+            self.graph = rdflib.Graph()
+        graph = self.graph
+
+        for k, v in RDF_NAMESPACES.items():
+            graph.bind(k, v)
+
+        self.rooted = rooted
+
+        if "base_uri" in kwargs:
+            base_uri = kwargs["base_uri"]
+        else:
+            # Windows style slashes cannot be used in an RDF URI
+            base_uri = "file://" + os.path.abspath(self.handle.name).replace("\\", "/")
+
+        graph.parse(file=self.handle, publicID=base_uri, format=parse_format)
+
+        return self.parse_graph(graph, context=context)
+
+    def parse_graph(self, graph=None, context=None):
+        """Iterate over RDF model yielding CDAO.Tree instances."""
+        if graph is None:
+            graph = self.graph
+
+        # look up branch lengths/TUs for all nodes
+        self.get_node_info(graph, context=context)
+
+        for root_node in self.tree_roots:
+            clade = self.parse_children(root_node)
+
+            yield CDAO.Tree(root=clade, rooted=self.rooted)
+
+    def new_clade(self, node):
+        """Return a CDAO.Clade object for a given named node."""
+        result = self.node_info[node]
+
+        kwargs = {}
+        if "branch_length" in result:
+            kwargs["branch_length"] = result["branch_length"]
+        if "label" in result:
+            kwargs["name"] = result["label"].replace("_", " ")
+        if "confidence" in result:
+            kwargs["confidence"] = result["confidence"]
+
+        clade = CDAO.Clade(**kwargs)
+
+        return clade
+
+    def get_node_info(self, graph, context=None):
+        """Create a dictionary containing information about all nodes in the tree."""
+        self.node_info = {}
+        self.obj_info = {}
+        self.children = {}
+        self.nodes = set()
+        self.tree_roots = set()
+
+        assignments = {
+            qUri("cdao:has_Parent"): "parent",
+            qUri("cdao:belongs_to_Edge_as_Child"): "edge",
+            qUri("cdao:has_Annotation"): "annotation",
+            qUri("cdao:has_Value"): "value",
+            qUri("cdao:represents_TU"): "tu",
+            qUri("rdfs:label"): "label",
+            qUri("cdao:has_Support_Value"): "confidence",
+        }
+
+        for s, v, o in graph:
+            # process each RDF triple in the graph sequentially
+
+            s, v, o = str(s), str(v), str(o)
+
+            if s not in self.obj_info:
+                self.obj_info[s] = {}
+            this = self.obj_info[s]
+
+            try:
+                # if the predicate is one we care about, store information for
+                # later
+                this[assignments[v]] = o
+            except KeyError:
+                pass
+
+            if v == qUri("rdf:type"):
+                if o in (qUri("cdao:AncestralNode"), qUri("cdao:TerminalNode")):
+                    # this is a tree node; store it in set of all nodes
+                    self.nodes.add(s)
+            if v == qUri("cdao:has_Root"):
+                # this is a tree; store its root in set of all tree roots
+                self.tree_roots.add(o)
+
+        for node in self.nodes:
+            # for each node, look up all information needed to create a
+            # CDAO.Clade
+            self.node_info[node] = {}
+            node_info = self.node_info[node]
+
+            obj = self.obj_info[node]
+            if "edge" in obj:
+                # if this object points to an edge, we need a branch length from
+                # the annotation on that edge
+                edge = self.obj_info[obj["edge"]]
+                if "annotation" in edge:
+                    annotation = self.obj_info[edge["annotation"]]
+                    if "value" in annotation:
+                        node_info["branch_length"] = float(annotation["value"])
+
+            if "tu" in obj:
+                # if this object points to a TU, we need the label of that TU
+                tu = self.obj_info[obj["tu"]]
+                if "label" in tu:
+                    node_info["label"] = tu["label"]
+
+            if "parent" in obj:
+                # store this node as a child of its parent, if it has one,
+                # so that the tree can be traversed from parent to children
+                parent = obj["parent"]
+                if parent not in self.children:
+                    self.children[parent] = []
+                self.children[parent].append(node)
+
+    def parse_children(self, node):
+        """Traverse the tree to create a nested clade structure.
+
+        Return a CDAO.Clade, and calls itself recursively for each child,
+        traversing the entire tree and creating a nested structure of CDAO.Clade
+        objects.
+        """
+        clade = self.new_clade(node)
+
+        children = self.children[node] if node in self.children else []
+        clade.clades = [self.parse_children(child_node) for child_node in children]
+
+        return clade
+
+
+# ---------------------------------------------------------
+# Output
+
+
+class Writer:
+    """Based on the writer in Bio.Nexus.Trees (str, to_string)."""
+
+    prefixes = RDF_NAMESPACES
+
+    def __init__(self, trees):
+        """Initialize parameters for writing a CDAO tree."""
+        self.trees = trees
+
+        self.node_counter = 0
+        self.edge_counter = 0
+        self.tu_counter = 0
+        self.tree_counter = 0
+
+    def write(
+        self,
+        handle,
+        tree_uri="",
+        record_complete_ancestry=False,
+        rooted=False,
+        **kwargs
+    ):
+        """Write this instance's trees to a file handle."""
+        self.rooted = rooted
+        self.record_complete_ancestry = record_complete_ancestry
+
+        if tree_uri and not tree_uri.endswith("/"):
+            tree_uri += "/"
+
+        trees = self.trees
+
+        if tree_uri:
+            handle.write("@base <%s>\n" % tree_uri)
+        for k, v in self.prefixes.items():
+            handle.write("@prefix %s: <%s> .\n" % (k, v))
+
+        handle.write("<%s> a owl:Ontology .\n" % self.prefixes["cdao"])
+
+        for tree in trees:
+            self.tree_counter += 1
+            self.tree_uri = "tree%s"
+
+            first_clade = tree.clade
+            statements = self.process_clade(first_clade, root=tree)
+            for stmt in statements:
+                self.add_stmt_to_handle(handle, stmt)
+
+    def add_stmt_to_handle(self, handle, stmt):
+        """Add URI prefix to handle."""
+        # apply URI prefixes
+        stmt_strings = []
+        for n, part in enumerate(stmt):
+            if isinstance(part, rdflib.URIRef):
+                node_uri = str(part)
+                changed = False
+                for prefix, uri in self.prefixes.items():
+                    if node_uri.startswith(uri):
+                        node_uri = node_uri.replace(uri, "%s:" % prefix, 1)
+                        if node_uri == "rdf:type":
+                            node_uri = "a"
+                        changed = True
+                if changed or ":" in node_uri:
+                    stmt_strings.append(node_uri)
+                else:
+                    stmt_strings.append("<%s>" % node_uri)
+
+            elif isinstance(part, rdflib.Literal):
+                stmt_strings.append(part.n3())
+
+            else:
+                stmt_strings.append(str(part))
+
+        handle.write("%s .\n" % " ".join(stmt_strings))
+
+    def process_clade(self, clade, parent=None, root=False):
+        """Recursively generate triples describing a tree of clades."""
+        self.node_counter += 1
+        clade.uri = "node%s" % str(self.node_counter).zfill(ZEROES)
+        if parent:
+            clade.ancestors = parent.ancestors + [parent.uri]
+        else:
+            clade.ancestors = []
+
+        def nUri(s):
+            # nUri = lambda s: rdflib.URIRef(s)
+            return rdflib.URIRef(s)
+
+        def pUri(s):
+            # pUri = lambda s: rdflib.URIRef(qUri(s))
+            return rdflib.URIRef(qUri(s))
+
+        tree_id = nUri("")
+
+        statements = []
+
+        if root is not False:
+            # create a cdao:RootedTree with reference to the tree root
+            tree_type = (
+                pUri("cdao:RootedTree") if self.rooted else pUri("cdao:UnrootedTree")
+            )
+
+            statements += [
+                (tree_id, pUri("rdf:type"), tree_type),
+                (tree_id, pUri("cdao:has_Root"), nUri(clade.uri)),
+            ]
+
+            try:
+                tree_attributes = root.attributes
+            except AttributeError:
+                tree_attributes = []
+
+            for predicate, obj in tree_attributes:
+                statements.append((tree_id, predicate, obj))
+
+        if clade.name:
+            # create TU
+            self.tu_counter += 1
+            tu_uri = "tu%s" % str(self.tu_counter).zfill(ZEROES)
+
+            statements += [
+                (nUri(tu_uri), pUri("rdf:type"), pUri("cdao:TU")),
+                (nUri(clade.uri), pUri("cdao:represents_TU"), nUri(tu_uri)),
+                (
+                    nUri(tu_uri),
+                    pUri("rdfs:label"),
+                    rdflib.Literal(format_label(clade.name)),
+                ),
+            ]
+
+            try:
+                tu_attributes = clade.tu_attributes
+            except AttributeError:
+                tu_attributes = []
+
+            for predicate, obj in tu_attributes:
+                yield (nUri(tu_uri), predicate, obj)
+
+        # create this node
+        node_type = "cdao:TerminalNode" if clade.is_terminal() else "cdao:AncestralNode"
+        statements += [
+            (nUri(clade.uri), pUri("rdf:type"), pUri(node_type)),
+            (nUri(clade.uri), pUri("cdao:belongs_to_Tree"), tree_id),
+        ]
+
+        if parent is not None:
+            # create edge from the parent node to this node
+            self.edge_counter += 1
+            edge_uri = "edge%s" % str(self.edge_counter).zfill(ZEROES)
+
+            statements += [
+                (nUri(edge_uri), pUri("rdf:type"), pUri("cdao:DirectedEdge")),
+                (nUri(edge_uri), pUri("cdao:belongs_to_Tree"), tree_id),
+                (nUri(edge_uri), pUri("cdao:has_Parent_Node"), nUri(parent.uri)),
+                (nUri(edge_uri), pUri("cdao:has_Child_Node"), nUri(clade.uri)),
+                (
+                    nUri(clade.uri),
+                    pUri("cdao:belongs_to_Edge_as_Child"),
+                    nUri(edge_uri),
+                ),
+                (nUri(clade.uri), pUri("cdao:has_Parent"), nUri(parent.uri)),
+                (
+                    nUri(parent.uri),
+                    pUri("cdao:belongs_to_Edge_as_Parent"),
+                    nUri(edge_uri),
+                ),
+            ]
+
+            try:
+                confidence = clade.confidence
+            except AttributeError:
+                pass
+            else:
+                if confidence is not None:
+                    confidence = rdflib.Literal(
+                        confidence, datatype="http://www.w3.org/2001/XMLSchema#decimal"
+                    )
+
+                    statements += [
+                        (nUri(clade.uri), pUri("cdao:has_Support_Value"), confidence)
+                    ]
+
+            if self.record_complete_ancestry and len(clade.ancestors) > 0:
+                statements += [
+                    (nUri(clade.uri), pUri("cdao:has_Ancestor"), nUri(ancestor))
+                    for ancestor in clade.ancestors
+                ]
+
+            if clade.branch_length is not None:
+                # add branch length
+                edge_ann_uri = "edge_annotation%s" % str(self.edge_counter).zfill(
+                    ZEROES
+                )
+
+                branch_length = rdflib.Literal(
+                    clade.branch_length,
+                    datatype=rdflib.URIRef("http://www.w3.org/2001/XMLSchema#decimal"),
+                )
+                statements += [
+                    (nUri(edge_ann_uri), pUri("rdf:type"), pUri("cdao:EdgeLength")),
+                    (nUri(edge_uri), pUri("cdao:has_Annotation"), nUri(edge_ann_uri)),
+                    (nUri(edge_ann_uri), pUri("cdao:has_Value"), branch_length),
+                ]
+
+            try:
+                edge_attributes = clade.edge_attributes
+            except AttributeError:
+                edge_attributes = []
+
+            for predicate, obj in edge_attributes:
+                yield (nUri(edge_uri), predicate, obj)
+
+        yield from statements
+
+        try:
+            clade_attributes = clade.attributes
+        except AttributeError:
+            clade_attributes = []
+
+        for predicate, obj in clade_attributes:
+            yield (nUri(clade.uri), predicate, obj)
+
+        if not clade.is_terminal():
+            for new_clade in clade.clades:
+                yield from self.process_clade(new_clade, parent=clade, root=False)
diff --git a/code/lib/Bio/Phylo/Consensus.py b/code/lib/Bio/Phylo/Consensus.py
new file mode 100644
index 0000000..efc0f9f
--- /dev/null
+++ b/code/lib/Bio/Phylo/Consensus.py
@@ -0,0 +1,640 @@
+# Copyright (C) 2013 by Yanbo Ye (yeyanbo289@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes and methods for finding consensus trees.
+
+This module contains a ``_BitString`` class to assist the consensus tree
+searching and some common consensus algorithms such as strict, majority rule and
+adam consensus.
+"""
+
+import random
+import itertools
+
+from ast import literal_eval
+from Bio.Phylo import BaseTree
+
+
+class _BitString(str):
+    """Helper class for binary string data (PRIVATE).
+
+    Assistant class of binary string data used for storing and
+    counting compatible clades in consensus tree searching. It includes
+    some binary manipulation(&|^~) methods.
+
+    _BitString is a sub-class of ``str`` object that only accepts two
+    characters('0' and '1'), with additional functions for binary-like
+    manipulation(&|^~). It is used to count and store the clades in
+    multiple trees in consensus tree searching. During counting, the
+    clades will be considered the same if their terminals(in terms of
+    ``name`` attribute) are the same.
+
+    For example, let's say two trees are provided as below to search
+    their strict consensus tree::
+
+        tree1: (((A, B), C),(D, E))
+        tree2: ((A, (B, C)),(D, E))
+
+    For both trees, a _BitString object '11111' will represent their
+    root clade. Each '1' stands for the terminal clade in the list
+    [A, B, C, D, E](the order might not be the same, it's determined
+    by the ``get_terminal`` method of the first tree provided). For
+    the clade ((A, B), C) in tree1 and (A, (B, C)) in tree2, they both
+    can be represented by '11100'. Similarly, '11000' represents clade
+    (A, B) in tree1, '01100' represents clade (B, C) in tree2, and '00011'
+    represents clade (D, E) in both trees.
+
+    So, with the ``_count_clades`` function in this module, finally we
+    can get the clade counts and their _BitString representation as follows
+    (the root and terminals are omitted)::
+
+        clade   _BitString   count
+        ABC     '11100'     2
+        DE      '00011'     2
+        AB      '11000'     1
+        BC      '01100'     1
+
+    To get the _BitString representation of a clade, we can use the following
+    code snippet::
+
+        # suppose we are provided with a tree list, the first thing to do is
+        # to get all the terminal names in the first tree
+        term_names = [term.name for term in trees[0].get_terminals()]
+        # for a specific clade in any of the tree, also get its terminal names
+        clade_term_names = [term.name for term in clade.get_terminals()]
+        # then create a boolean list
+        boolvals = [name in clade_term_names for name in term_names]
+        # create the string version and pass it to _BitString
+        bitstr = _BitString(''.join(map(str, map(int, boolvals))))
+        # or, equivalently:
+        bitstr = _BitString.from_bool(boolvals)
+
+    To convert back::
+
+        # get all the terminal clades of the first tree
+        terms = [term for term in trees[0].get_terminals()]
+        # get the index of terminal clades in bitstr
+        index_list = bitstr.index_one()
+        # get all terminal clades by index
+        clade_terms = [terms[i] for i in index_list]
+        # create a new calde and append all the terminal clades
+        new_clade = BaseTree.Clade()
+        new_clade.clades.extend(clade_terms)
+
+    Examples
+    --------
+    >>> from Bio.Phylo.Consensus import _BitString
+    >>> bitstr1 = _BitString('11111')
+    >>> bitstr2 = _BitString('11100')
+    >>> bitstr3 = _BitString('01101')
+    >>> bitstr1
+    _BitString('11111')
+    >>> bitstr2 & bitstr3
+    _BitString('01100')
+    >>> bitstr2 | bitstr3
+    _BitString('11101')
+    >>> bitstr2 ^ bitstr3
+    _BitString('10001')
+    >>> bitstr2.index_one()
+    [0, 1, 2]
+    >>> bitstr3.index_one()
+    [1, 2, 4]
+    >>> bitstr3.index_zero()
+    [0, 3]
+    >>> bitstr1.contains(bitstr2)
+    True
+    >>> bitstr2.contains(bitstr3)
+    False
+    >>> bitstr2.independent(bitstr3)
+    False
+    >>> bitstr1.iscompatible(bitstr2)
+    True
+    >>> bitstr2.iscompatible(bitstr3)
+    False
+
+    """
+
+    def __new__(cls, strdata):
+        """Init from a binary string data."""
+        if isinstance(strdata, str) and len(strdata) == strdata.count(
+            "0"
+        ) + strdata.count("1"):
+            return str.__new__(cls, strdata)
+        else:
+            raise TypeError(
+                "The input should be a binary string composed of '0' and '1'"
+            )
+
+    def __and__(self, other):
+        selfint = literal_eval("0b" + self)
+        otherint = literal_eval("0b" + other)
+        resultint = selfint & otherint
+        return _BitString(bin(resultint)[2:].zfill(len(self)))
+
+    def __or__(self, other):
+        selfint = literal_eval("0b" + self)
+        otherint = literal_eval("0b" + other)
+        resultint = selfint | otherint
+        return _BitString(bin(resultint)[2:].zfill(len(self)))
+
+    def __xor__(self, other):
+        selfint = literal_eval("0b" + self)
+        otherint = literal_eval("0b" + other)
+        resultint = selfint ^ otherint
+        return _BitString(bin(resultint)[2:].zfill(len(self)))
+
+    def __rand__(self, other):
+        selfint = literal_eval("0b" + self)
+        otherint = literal_eval("0b" + other)
+        resultint = otherint & selfint
+        return _BitString(bin(resultint)[2:].zfill(len(self)))
+
+    def __ror__(self, other):
+        selfint = literal_eval("0b" + self)
+        otherint = literal_eval("0b" + other)
+        resultint = otherint | selfint
+        return _BitString(bin(resultint)[2:].zfill(len(self)))
+
+    def __rxor__(self, other):
+        selfint = literal_eval("0b" + self)
+        otherint = literal_eval("0b" + other)
+        resultint = otherint ^ selfint
+        return _BitString(bin(resultint)[2:].zfill(len(self)))
+
+    def __repr__(self):
+        return "_BitString(" + str.__repr__(self) + ")"
+
+    def index_one(self):
+        """Return a list of positions where the element is '1'."""
+        return [i for i, n in enumerate(self) if n == "1"]
+
+    def index_zero(self):
+        """Return a list of positions where the element is '0'."""
+        return [i for i, n in enumerate(self) if n == "0"]
+
+    def contains(self, other):
+        """Check if current bitstr1 contains another one bitstr2.
+
+        That is to say, the bitstr2.index_one() is a subset of
+        bitstr1.index_one().
+
+        Examples:
+            "011011" contains "011000", "011001", "000011"
+
+        Be careful, "011011" also contains "000000". Actually, all _BitString
+        objects contain all-zero _BitString of the same length.
+
+        """
+        xorbit = self ^ other
+        return xorbit.count("1") == self.count("1") - other.count("1")
+
+    def independent(self, other):
+        """Check if current bitstr1 is independent of another one bitstr2.
+
+        That is to say the bitstr1.index_one() and bitstr2.index_one() have
+        no intersection.
+
+        Be careful, all _BitString objects are independent of all-zero _BitString
+        of the same length.
+        """
+        xorbit = self ^ other
+        return xorbit.count("1") == self.count("1") + other.count("1")
+
+    def iscompatible(self, other):
+        """Check if current bitstr1 is compatible with another bitstr2.
+
+        Two conditions are considered as compatible:
+         1. bitstr1.contain(bitstr2) or vise versa;
+         2. bitstr1.independent(bitstr2).
+
+        """
+        return self.contains(other) or other.contains(self) or self.independent(other)
+
+    @classmethod
+    def from_bool(cls, bools):
+        return cls("".join(map(str, map(int, bools))))
+
+
+def strict_consensus(trees):
+    """Search strict consensus tree from multiple trees.
+
+    :Parameters:
+        trees : iterable
+            iterable of trees to produce consensus tree.
+
+    """
+    trees_iter = iter(trees)
+    first_tree = next(trees_iter)
+
+    terms = first_tree.get_terminals()
+    bitstr_counts, tree_count = _count_clades(itertools.chain([first_tree], trees_iter))
+
+    # Store bitstrs for strict clades
+    strict_bitstrs = [
+        bitstr for bitstr, t in bitstr_counts.items() if t[0] == tree_count
+    ]
+    strict_bitstrs.sort(key=lambda bitstr: bitstr.count("1"), reverse=True)
+    # Create root
+    root = BaseTree.Clade()
+    if strict_bitstrs[0].count("1") == len(terms):
+        root.clades.extend(terms)
+    else:
+        raise ValueError("Taxons in provided trees should be consistent")
+    # make a bitstr to clades dict and store root clade
+    bitstr_clades = {strict_bitstrs[0]: root}
+    # create inner clades
+    for bitstr in strict_bitstrs[1:]:
+        clade_terms = [terms[i] for i in bitstr.index_one()]
+        clade = BaseTree.Clade()
+        clade.clades.extend(clade_terms)
+        for bs, c in bitstr_clades.items():
+            # check if it should be the parent of current clade
+            if bs.contains(bitstr):
+                # remove old bitstring
+                del bitstr_clades[bs]
+                # update clade childs
+                new_childs = [child for child in c.clades if child not in clade_terms]
+                c.clades = new_childs
+                # set current clade as child of c
+                c.clades.append(clade)
+                # update bitstring
+                bs = bs ^ bitstr
+                # update clade
+                bitstr_clades[bs] = c
+                break
+        # put new clade
+        bitstr_clades[bitstr] = clade
+    return BaseTree.Tree(root=root)
+
+
+def majority_consensus(trees, cutoff=0):
+    """Search majority rule consensus tree from multiple trees.
+
+    This is a extend majority rule method, which means the you can set any
+    cutoff between 0 ~ 1 instead of 0.5. The default value of cutoff is 0 to
+    create a relaxed binary consensus tree in any condition (as long as one of
+    the provided trees is a binary tree). The branch length of each consensus
+    clade in the result consensus tree is the average length of all counts for
+    that clade.
+
+    :Parameters:
+        trees : iterable
+            iterable of trees to produce consensus tree.
+
+    """
+    tree_iter = iter(trees)
+    first_tree = next(tree_iter)
+
+    terms = first_tree.get_terminals()
+    bitstr_counts, tree_count = _count_clades(itertools.chain([first_tree], tree_iter))
+
+    # Sort bitstrs by descending #occurrences, then #tips, then tip order
+    bitstrs = sorted(
+        bitstr_counts.keys(),
+        key=lambda bitstr: (bitstr_counts[bitstr][0], bitstr.count("1"), str(bitstr)),
+        reverse=True,
+    )
+    root = BaseTree.Clade()
+    if bitstrs[0].count("1") == len(terms):
+        root.clades.extend(terms)
+    else:
+        raise ValueError("Taxons in provided trees should be consistent")
+    # Make a bitstr-to-clades dict and store root clade
+    bitstr_clades = {bitstrs[0]: root}
+    # create inner clades
+    for bitstr in bitstrs[1:]:
+        # apply majority rule
+        count_in_trees, branch_length_sum = bitstr_counts[bitstr]
+        confidence = 100.0 * count_in_trees / tree_count
+        if confidence < cutoff * 100.0:
+            break
+        clade_terms = [terms[i] for i in bitstr.index_one()]
+        clade = BaseTree.Clade()
+        clade.clades.extend(clade_terms)
+        clade.confidence = confidence
+        clade.branch_length = branch_length_sum / count_in_trees
+        bsckeys = sorted(bitstr_clades, key=lambda bs: bs.count("1"), reverse=True)
+
+        # check if current clade is compatible with previous clades and
+        # record its possible parent and child clades.
+        compatible = True
+        parent_bitstr = None
+        child_bitstrs = []  # multiple independent childs
+        for bs in bsckeys:
+            if not bs.iscompatible(bitstr):
+                compatible = False
+                break
+            # assign the closest ancestor as its parent
+            # as bsckeys is sorted, it should be the last one
+            if bs.contains(bitstr):
+                parent_bitstr = bs
+            # assign the closest descendant as its child
+            # the largest and independent clades
+            if (
+                bitstr.contains(bs)
+                and bs != bitstr
+                and all(c.independent(bs) for c in child_bitstrs)
+            ):
+                child_bitstrs.append(bs)
+        if not compatible:
+            continue
+
+        if parent_bitstr:
+            # insert current clade; remove old bitstring
+            parent_clade = bitstr_clades.pop(parent_bitstr)
+            # update parent clade childs
+            parent_clade.clades = [
+                c for c in parent_clade.clades if c not in clade_terms
+            ]
+            # set current clade as child of parent_clade
+            parent_clade.clades.append(clade)
+            # update bitstring
+            # parent = parent ^ bitstr
+            # update clade
+            bitstr_clades[parent_bitstr] = parent_clade
+
+        if child_bitstrs:
+            remove_list = []
+            for c in child_bitstrs:
+                remove_list.extend(c.index_one())
+                child_clade = bitstr_clades[c]
+                parent_clade.clades.remove(child_clade)
+                clade.clades.append(child_clade)
+            remove_terms = [terms[i] for i in remove_list]
+            clade.clades = [c for c in clade.clades if c not in remove_terms]
+        # put new clade
+        bitstr_clades[bitstr] = clade
+        if (len(bitstr_clades) == len(terms) - 1) or (
+            len(bitstr_clades) == len(terms) - 2 and len(root.clades) == 3
+        ):
+            break
+    return BaseTree.Tree(root=root)
+
+
+def adam_consensus(trees):
+    """Search Adam Consensus tree from multiple trees.
+
+    :Parameters:
+        trees : list
+            list of trees to produce consensus tree.
+
+    """
+    clades = [tree.root for tree in trees]
+    return BaseTree.Tree(root=_part(clades), rooted=True)
+
+
+def _part(clades):
+    """Recursive function for Adam Consensus algorithm (PRIVATE)."""
+    new_clade = None
+    terms = clades[0].get_terminals()
+    term_names = [term.name for term in terms]
+    if len(terms) == 1 or len(terms) == 2:
+        new_clade = clades[0]
+    else:
+        bitstrs = {_BitString("1" * len(terms))}
+        for clade in clades:
+            for child in clade.clades:
+                bitstr = _clade_to_bitstr(child, term_names)
+                to_remove = set()
+                to_add = set()
+                for bs in bitstrs:
+                    if bs == bitstr:
+                        continue
+                    elif bs.contains(bitstr):
+                        to_add.add(bitstr)
+                        to_add.add(bs ^ bitstr)
+                        to_remove.add(bs)
+                    elif bitstr.contains(bs):
+                        to_add.add(bs ^ bitstr)
+                    elif not bs.independent(bitstr):
+                        to_add.add(bs & bitstr)
+                        to_add.add(bs & bitstr ^ bitstr)
+                        to_add.add(bs & bitstr ^ bs)
+                        to_remove.add(bs)
+                # bitstrs = bitstrs | to_add
+                bitstrs ^= to_remove
+                if to_add:
+                    for ta in sorted(to_add, key=lambda bs: bs.count("1")):
+                        independent = True
+                        for bs in bitstrs:
+                            if not ta.independent(bs):
+                                independent = False
+                                break
+                        if independent:
+                            bitstrs.add(ta)
+        new_clade = BaseTree.Clade()
+        for bitstr in sorted(bitstrs):
+            indices = bitstr.index_one()
+            if len(indices) == 1:
+                new_clade.clades.append(terms[indices[0]])
+            elif len(indices) == 2:
+                bifur_clade = BaseTree.Clade()
+                bifur_clade.clades.append(terms[indices[0]])
+                bifur_clade.clades.append(terms[indices[1]])
+                new_clade.clades.append(bifur_clade)
+            elif len(indices) > 2:
+                part_names = [term_names[i] for i in indices]
+                next_clades = []
+                for clade in clades:
+                    next_clades.append(_sub_clade(clade, part_names))
+                # next_clades = [clade.common_ancestor([clade.find_any(name=name) for name in part_names]) for clade in clades]
+                new_clade.clades.append(_part(next_clades))
+    return new_clade
+
+
+def _sub_clade(clade, term_names):
+    """Extract a compatible subclade that only contains the given terminal names (PRIVATE)."""
+    term_clades = [clade.find_any(name) for name in term_names]
+    sub_clade = clade.common_ancestor(term_clades)
+    if len(term_names) != sub_clade.count_terminals():
+        temp_clade = BaseTree.Clade()
+        temp_clade.clades.extend(term_clades)
+        for c in sub_clade.find_clades(terminal=False, order="preorder"):
+            if c == sub_clade.root:
+                continue
+            childs = set(c.find_clades(terminal=True)) & set(term_clades)
+            if childs:
+                for tc in temp_clade.find_clades(terminal=False, order="preorder"):
+                    tc_childs = set(tc.clades)
+                    tc_new_clades = tc_childs - childs
+                    if childs.issubset(tc_childs) and tc_new_clades:
+                        tc.clades = list(tc_new_clades)
+                        child_clade = BaseTree.Clade()
+                        child_clade.clades.extend(list(childs))
+                        tc.clades.append(child_clade)
+        sub_clade = temp_clade
+    return sub_clade
+
+
+def _count_clades(trees):
+    """Count distinct clades (different sets of terminal names) in the trees (PRIVATE).
+
+    Return a tuple first a dict of bitstring (representing clade) and a tuple of its count of
+    occurrences and sum of branch length for that clade, second the number of trees processed.
+
+    :Parameters:
+        trees : iterable
+            An iterable that returns the trees to count
+
+    """
+    bitstrs = {}
+    tree_count = 0
+    for tree in trees:
+        tree_count += 1
+        clade_bitstrs = _tree_to_bitstrs(tree)
+        for clade in tree.find_clades(terminal=False):
+            bitstr = clade_bitstrs[clade]
+            if bitstr in bitstrs:
+                count, sum_bl = bitstrs[bitstr]
+                count += 1
+                sum_bl += clade.branch_length or 0
+                bitstrs[bitstr] = (count, sum_bl)
+            else:
+                bitstrs[bitstr] = (1, clade.branch_length or 0)
+    return bitstrs, tree_count
+
+
+def get_support(target_tree, trees, len_trees=None):
+    """Calculate branch support for a target tree given bootstrap replicate trees.
+
+    :Parameters:
+        target_tree : Tree
+            tree to calculate branch support for.
+        trees : iterable
+            iterable of trees used to calculate branch support.
+        len_trees : int
+            optional count of replicates in trees. len_trees must be provided
+            when len(trees) is not a valid operation.
+
+    """
+    term_names = sorted(term.name for term in target_tree.find_clades(terminal=True))
+    bitstrs = {}
+
+    size = len_trees
+    if size is None:
+        try:
+            size = len(trees)
+        except TypeError:
+            raise TypeError(
+                "Trees does not support len(trees), "
+                "you must provide the number of replicates in trees "
+                "as the optional parameter len_trees."
+            ) from None
+
+    for clade in target_tree.find_clades(terminal=False):
+        bitstr = _clade_to_bitstr(clade, term_names)
+        bitstrs[bitstr] = (clade, 0)
+    for tree in trees:
+        for clade in tree.find_clades(terminal=False):
+            bitstr = _clade_to_bitstr(clade, term_names)
+            if bitstr in bitstrs:
+                c, t = bitstrs[bitstr]
+                c.confidence = (t + 1) * 100.0 / size
+                bitstrs[bitstr] = (c, t + 1)
+    return target_tree
+
+
+def bootstrap(msa, times):
+    """Generate bootstrap replicates from a multiple sequence alignment object.
+
+    :Parameters:
+        msa : MultipleSeqAlignment
+            multiple sequence alignment to generate replicates.
+        times : int
+            number of bootstrap times.
+
+    """
+    length = len(msa[0])
+    i = 0
+    while i < times:
+        i += 1
+        item = None
+        for j in range(length):
+            col = random.randint(0, length - 1)
+            if not item:
+                item = msa[:, col : col + 1]
+            else:
+                item += msa[:, col : col + 1]
+        yield item
+
+
+def bootstrap_trees(msa, times, tree_constructor):
+    """Generate bootstrap replicate trees from a multiple sequence alignment.
+
+    :Parameters:
+        msa : MultipleSeqAlignment
+            multiple sequence alignment to generate replicates.
+        times : int
+            number of bootstrap times.
+        tree_constructor : TreeConstructor
+            tree constructor to be used to build trees.
+
+    """
+    msas = bootstrap(msa, times)
+    for aln in msas:
+        tree = tree_constructor.build_tree(aln)
+        yield tree
+
+
+def bootstrap_consensus(msa, times, tree_constructor, consensus):
+    """Consensus tree of a series of bootstrap trees for a multiple sequence alignment.
+
+    :Parameters:
+        msa : MultipleSeqAlignment
+            Multiple sequence alignment to generate replicates.
+        times : int
+            Number of bootstrap times.
+        tree_constructor : TreeConstructor
+            Tree constructor to be used to build trees.
+        consensus : function
+            Consensus method in this module: ``strict_consensus``,
+            ``majority_consensus``, ``adam_consensus``.
+
+    """
+    trees = bootstrap_trees(msa, times, tree_constructor)
+    tree = consensus(list(trees))
+    return tree
+
+
+def _clade_to_bitstr(clade, tree_term_names):
+    """Create a BitString representing a clade, given ordered tree taxon names (PRIVATE)."""
+    clade_term_names = {term.name for term in clade.find_clades(terminal=True)}
+    return _BitString.from_bool((name in clade_term_names) for name in tree_term_names)
+
+
+def _tree_to_bitstrs(tree):
+    """Create a dict of a tree's clades to corresponding BitStrings (PRIVATE)."""
+    clades_bitstrs = {}
+    term_names = [term.name for term in tree.find_clades(terminal=True)]
+    for clade in tree.find_clades(terminal=False):
+        bitstr = _clade_to_bitstr(clade, term_names)
+        clades_bitstrs[clade] = bitstr
+    return clades_bitstrs
+
+
+def _bitstring_topology(tree):
+    """Generate a branch length dict for a tree, keyed by BitStrings (PRIVATE).
+
+    Create a dict of all clades' BitStrings to the corresponding branch
+    lengths (rounded to 5 decimal places).
+    """
+    bitstrs = {}
+    for clade, bitstr in _tree_to_bitstrs(tree).items():
+        bitstrs[bitstr] = round(clade.branch_length or 0.0, 5)
+    return bitstrs
+
+
+def _equal_topology(tree1, tree2):
+    """Are two trees are equal in terms of topology and branch lengths (PRIVATE).
+
+    (Branch lengths checked to 5 decimal places.)
+    """
+    term_names1 = {term.name for term in tree1.find_clades(terminal=True)}
+    term_names2 = {term.name for term in tree2.find_clades(terminal=True)}
+    return (term_names1 == term_names2) and (
+        _bitstring_topology(tree1) == _bitstring_topology(tree2)
+    )
diff --git a/code/lib/Bio/Phylo/NeXML.py b/code/lib/Bio/Phylo/NeXML.py
new file mode 100644
index 0000000..3e6f842
--- /dev/null
+++ b/code/lib/Bio/Phylo/NeXML.py
@@ -0,0 +1,50 @@
+# Copyright (C) 2013 Ben Morris (ben@bendmorris.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes corresponding to NeXML trees.
+
+See classes in ``Bio.Nexus``: Trees.Tree, Trees.NodeData, and Nodes.Chain.
+"""
+
+from Bio.Phylo import BaseTree
+
+
+class Tree(BaseTree.Tree):
+    """NeXML Tree object."""
+
+    def __init__(self, root=None, rooted=False, id=None, name=None, weight=1.0):
+        """Instantiate a NeXML tree object with the given parameters."""
+        BaseTree.Tree.__init__(
+            self, root=root or Clade(), rooted=rooted, id=id, name=name
+        )
+        self.weight = weight
+
+
+class Clade(BaseTree.Clade):
+    """NeXML Clade (sub-tree) object."""
+
+    def __init__(
+        self,
+        branch_length=1.0,
+        name=None,
+        clades=None,
+        confidence=None,
+        comment=None,
+        **kwargs
+    ):
+        """Initialize parameters for NeXML Clade object."""
+        BaseTree.Clade.__init__(
+            self,
+            branch_length=branch_length,
+            name=name,
+            clades=clades,
+            confidence=confidence,
+        )
+        self.comment = comment
+
+        for key, value in kwargs.items():
+            setattr(self, key, value)
diff --git a/code/lib/Bio/Phylo/NeXMLIO.py b/code/lib/Bio/Phylo/NeXMLIO.py
new file mode 100644
index 0000000..e6cf9b4
--- /dev/null
+++ b/code/lib/Bio/Phylo/NeXMLIO.py
@@ -0,0 +1,348 @@
+# Copyright (C) 2013 by Ben Morris (ben@bendmorris.com)
+# Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox
+# and Bio.Phylo.Newick, copyright 2009 by Eric Talevich.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""I/O function wrappers for the NeXML file format.
+
+See: http://www.nexml.org
+"""
+
+from io import StringIO
+from xml.dom import minidom
+from xml.etree import ElementTree
+
+from Bio.Phylo import NeXML
+
+from ._cdao_owl import cdao_elements, cdao_namespaces, resolve_uri
+
+
+NAMESPACES = {
+    "xsi": "http://www.w3.org/2001/XMLSchema-instance",
+    "xml": "http://www.w3.org/XML/1998/namespace",
+    "nex": "http://www.nexml.org/2009",
+    "xsd": "http://www.w3.org/2001/XMLSchema#",
+}
+NAMESPACES.update(cdao_namespaces)
+DEFAULT_NAMESPACE = NAMESPACES["nex"]
+VERSION = "0.9"
+SCHEMA = "http://www.nexml.org/2009/nexml/xsd/nexml.xsd"
+
+
+try:
+    register_namespace = ElementTree.register_namespace
+except AttributeError:
+    if not hasattr(ElementTree, "_namespace_map"):
+        # cElementTree needs the pure-Python xml.etree.ElementTree
+        from xml.etree import ElementTree as ET_py
+
+        ElementTree._namespace_map = ET_py._namespace_map
+
+    def register_namespace(prefix, uri):
+        """Set NameSpace map."""
+        ElementTree._namespace_map[uri] = prefix
+
+
+for prefix, uri in NAMESPACES.items():
+    register_namespace(prefix, uri)
+
+
+def qUri(s):
+    """Given a prefixed URI, return the full URI."""
+    return resolve_uri(s, namespaces=NAMESPACES, xml_style=True)
+
+
+def cdao_to_obo(s):
+    """Optionally converts a CDAO-prefixed URI into an OBO-prefixed URI."""
+    return "obo:%s" % cdao_elements[s[len("cdao:") :]]
+
+
+def matches(s):
+    """Check for matches in both CDAO and OBO namespaces."""
+    if s.startswith("cdao:"):
+        return (s, cdao_to_obo(s))
+    else:
+        return (s,)
+
+
+class NeXMLError(Exception):
+    """Exception raised when NeXML object construction cannot continue."""
+
+    pass
+
+
+# ---------------------------------------------------------
+# Public API
+
+
+def parse(handle, **kwargs):
+    """Iterate over the trees in a NeXML file handle.
+
+    :returns: generator of Bio.Phylo.NeXML.Tree objects.
+
+    """
+    return Parser(handle).parse(**kwargs)
+
+
+def write(trees, handle, plain=False, **kwargs):
+    """Write a trees in NeXML format to the given file handle.
+
+    :returns: number of trees written.
+
+    """
+    return Writer(trees).write(handle, plain=plain, **kwargs)
+
+
+# ---------------------------------------------------------
+# Input
+
+
+class Parser:
+    """Parse a NeXML tree given a file handle.
+
+    Based on the parser in ``Bio.Nexus.Trees``.
+    """
+
+    def __init__(self, handle):
+        """Initialize parameters for NeXML file parser."""
+        self.handle = handle
+
+    @classmethod
+    def from_string(cls, treetext):
+        """Convert file handle to StringIO object."""
+        handle = StringIO(treetext)
+        return cls(handle)
+
+    def add_annotation(self, node_dict, meta_node):
+        """Add annotations for the NeXML parser."""
+        if "property" in meta_node.attrib:
+            prop = meta_node.attrib["property"]
+        else:
+            prop = "meta"
+
+        if prop in matches("cdao:has_Support_Value"):
+            node_dict["confidence"] = float(meta_node.text)
+        else:
+            node_dict[prop] = meta_node.text
+
+    def parse(self, values_are_confidence=False, rooted=False):
+        """Parse the text stream this object was initialized with."""
+        nexml_doc = ElementTree.iterparse(self.handle, events=("end",))
+
+        for event, node in nexml_doc:
+            if node.tag == qUri("nex:tree"):
+                node_dict = {}
+                node_children = {}
+                root = None
+
+                nodes = []
+                edges = []
+                for child in node:
+                    if child.tag == qUri("nex:node"):
+                        nodes.append(child)
+                    if child.tag == qUri("nex:edge"):
+                        edges.append(child)
+
+                for node in nodes:
+                    node_id = node.attrib["id"]
+                    this_node = node_dict[node_id] = {}
+                    if "otu" in node.attrib and node.attrib["otu"]:
+                        this_node["name"] = node.attrib["otu"]
+                    if "root" in node.attrib and node.attrib["root"] == "true":
+                        root = node_id
+
+                    for child in node:
+                        if child.tag == qUri("nex:meta"):
+                            self.add_annotation(node_dict[node_id], child)
+
+                srcs = set()
+                tars = set()
+                for edge in edges:
+                    src, tar = edge.attrib["source"], edge.attrib["target"]
+                    srcs.add(src)
+                    tars.add(tar)
+                    if src not in node_children:
+                        node_children[src] = set()
+
+                    node_children[src].add(tar)
+                    if "length" in edge.attrib:
+                        node_dict[tar]["branch_length"] = float(edge.attrib["length"])
+                    if "property" in edge.attrib and edge.attrib["property"] in matches(
+                        "cdao:has_Support_Value"
+                    ):
+                        node_dict[tar]["confidence"] = float(edge.attrib["content"])
+
+                    for child in edge:
+                        if child.tag == qUri("nex:meta"):
+                            self.add_annotation(node_dict[tar], child)
+
+                if root is None:
+                    # if no root specified, start the recursive tree creation function
+                    # with the first node that's not a child of any other nodes
+                    rooted = False
+                    possible_roots = (
+                        node.attrib["id"]
+                        for node in nodes
+                        if node.attrib["id"] in srcs and node.attrib["id"] not in tars
+                    )
+                    root = next(possible_roots)
+                else:
+                    rooted = True
+
+                yield NeXML.Tree(
+                    root=self._make_tree(root, node_dict, node_children), rooted=rooted
+                )
+
+    @classmethod
+    def _make_tree(cls, node, node_dict, children):
+        """Traverse the tree creating a nested clade structure (PRIVATE).
+
+        Return a NeXML.Clade, and calls itself recursively for each child,
+        traversing the  entire tree and creating a nested structure of NeXML.Clade
+        objects.
+        """
+        this_node = node_dict[node]
+        clade = NeXML.Clade(**this_node)
+
+        if node in children:
+            clade.clades = [
+                cls._make_tree(child, node_dict, children) for child in children[node]
+            ]
+
+        return clade
+
+
+# ---------------------------------------------------------
+# Output
+
+
+class Writer:
+    """Based on the writer in Bio.Nexus.Trees (str, to_string)."""
+
+    def __init__(self, trees):
+        """Initialize parameters for NeXML writer."""
+        self.trees = trees
+
+        self.node_counter = 0
+        self.edge_counter = 0
+        self.tree_counter = 0
+
+    def new_label(self, obj_type):
+        """Create new labels for the NeXML writer."""
+        counter = "%s_counter" % obj_type
+        setattr(self, counter, getattr(self, counter) + 1)
+        return "%s%s" % (obj_type, getattr(self, counter))
+
+    def write(self, handle, cdao_to_obo=True, **kwargs):
+        """Write this instance's trees to a file handle."""
+        self.cdao_to_obo = cdao_to_obo
+
+        # set XML namespaces
+        root_node = ElementTree.Element("nex:nexml")
+        root_node.set("version", VERSION)
+        root_node.set("xmlns", DEFAULT_NAMESPACE)
+        root_node.set("xsi:schemaLocation", SCHEMA)
+
+        for prefix, uri in NAMESPACES.items():
+            root_node.set("xmlns:%s" % prefix, uri)
+
+        otus = ElementTree.SubElement(
+            root_node, "otus", **{"id": "tax", "label": "RootTaxaBlock"}
+        )
+
+        # create trees
+        trees = ElementTree.SubElement(
+            root_node,
+            "trees",
+            **{"id": "Trees", "label": "TreesBlockFromXML", "otus": "tax"}
+        )
+        count = 0
+        tus = set()
+        for tree in self.trees:
+            this_tree = ElementTree.SubElement(
+                trees, "tree", **{"id": self.new_label("tree")}
+            )
+
+            first_clade = tree.clade
+            tus.update(self._write_tree(first_clade, this_tree, rooted=tree.rooted))
+
+            count += 1
+
+        # create OTUs
+        for tu in tus:
+            otu = ElementTree.SubElement(otus, "otu", **{"id": tu})
+
+        # write XML document to file handle
+        # xml_doc = ElementTree.ElementTree(root_node)
+        # xml_doc.write(handle,
+        #              xml_declaration=True, encoding='utf-8',
+        #              method='xml')
+
+        # use xml.dom.minodom for pretty printing
+        rough_string = ElementTree.tostring(root_node, "utf-8")
+        reparsed = minidom.parseString(rough_string)
+        try:
+            # XML handles ought to be in binary mode
+            handle.write(reparsed.toprettyxml(indent="  ").encode("utf8"))
+        except TypeError:
+            # Fall back for text mode
+            handle.write(reparsed.toprettyxml(indent="  "))
+
+        return count
+
+    def _write_tree(self, clade, tree, parent=None, rooted=False):
+        """Recursively process tree, adding nodes and edges to Tree object (PRIVATE).
+
+        Returns a set of all OTUs encountered.
+        """
+        tus = set()
+
+        convert_uri = cdao_to_obo if self.cdao_to_obo else (lambda s: s)
+
+        node_id = self.new_label("node")
+        clade.node_id = node_id
+        attrib = {"id": node_id, "label": node_id}
+        root = rooted and parent is None
+        if root:
+            attrib["root"] = "true"
+        if clade.name:
+            tus.add(clade.name)
+            attrib["otu"] = clade.name
+        node = ElementTree.SubElement(tree, "node", **attrib)
+
+        if parent is not None:
+            edge_id = self.new_label("edge")
+            attrib = {
+                "id": edge_id,
+                "source": parent.node_id,
+                "target": node_id,
+                "length": str(clade.branch_length),
+                "typeof": convert_uri("cdao:Edge"),
+            }
+            try:
+                confidence = clade.confidence
+            except AttributeError:
+                pass
+            else:
+                if confidence is not None:
+                    attrib.update(
+                        {
+                            "property": convert_uri("cdao:has_Support_Value"),
+                            "datatype": "xsd:float",
+                            "content": "%1.2f" % confidence,
+                        }
+                    )
+            node = ElementTree.SubElement(tree, "edge", **attrib)
+
+        if not clade.is_terminal():
+            for new_clade in clade.clades:
+                tus.update(self._write_tree(new_clade, tree, parent=clade))
+
+        del clade.node_id
+
+        return tus
diff --git a/code/lib/Bio/Phylo/Newick.py b/code/lib/Bio/Phylo/Newick.py
new file mode 100644
index 0000000..a9fc3fd
--- /dev/null
+++ b/code/lib/Bio/Phylo/Newick.py
@@ -0,0 +1,41 @@
+# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes corresponding to Newick trees, also used for Nexus trees.
+
+See classes in ``Bio.Nexus``: Trees.Tree, Trees.NodeData, and Nodes.Chain.
+"""
+
+from Bio.Phylo import BaseTree
+
+
+class Tree(BaseTree.Tree):
+    """Newick Tree object."""
+
+    def __init__(self, root=None, rooted=False, id=None, name=None, weight=1.0):
+        """Initialize parameters for a Newick tree object."""
+        BaseTree.Tree.__init__(
+            self, root=root or Clade(), rooted=rooted, id=id, name=name
+        )
+        self.weight = weight
+
+
+class Clade(BaseTree.Clade):
+    """Newick Clade (sub-tree) object."""
+
+    def __init__(
+        self, branch_length=None, name=None, clades=None, confidence=None, comment=None
+    ):
+        """Initialize parameters for a Newick Clade object."""
+        BaseTree.Clade.__init__(
+            self,
+            branch_length=branch_length,
+            name=name,
+            clades=clades,
+            confidence=confidence,
+        )
+        self.comment = comment
diff --git a/code/lib/Bio/Phylo/NewickIO.py b/code/lib/Bio/Phylo/NewickIO.py
new file mode 100644
index 0000000..b9af509
--- /dev/null
+++ b/code/lib/Bio/Phylo/NewickIO.py
@@ -0,0 +1,373 @@
+# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
+# Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""I/O function wrappers for the Newick file format.
+
+See: http://evolution.genetics.washington.edu/phylip/newick_doc.html
+"""
+
+import re
+from io import StringIO
+
+from Bio.Phylo import Newick
+
+
+class NewickError(Exception):
+    """Exception raised when Newick object construction cannot continue."""
+
+    pass
+
+
+tokens = [
+    (r"\(", "open parens"),
+    (r"\)", "close parens"),
+    (r"[^\s\(\)\[\]\'\:\;\,]+", "unquoted node label"),
+    (r"\:\ ?[+-]?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?", "edge length"),
+    (r"\,", "comma"),
+    (r"\[(\\.|[^\]])*\]", "comment"),
+    (r"\'(\\.|[^\'])*\'", "quoted node label"),
+    (r"\;", "semicolon"),
+    (r"\n", "newline"),
+]
+tokenizer = re.compile("(%s)" % "|".join(token[0] for token in tokens))
+token_dict = {name: re.compile(token) for token, name in tokens}
+
+
+# ---------------------------------------------------------
+# Public API
+
+
+def parse(handle, **kwargs):
+    """Iterate over the trees in a Newick file handle.
+
+    :returns: generator of Bio.Phylo.Newick.Tree objects.
+
+    """
+    return Parser(handle).parse(**kwargs)
+
+
+def write(trees, handle, plain=False, **kwargs):
+    """Write a trees in Newick format to the given file handle.
+
+    :returns: number of trees written.
+
+    """
+    return Writer(trees).write(handle, plain=plain, **kwargs)
+
+
+# ---------------------------------------------------------
+# Input
+
+
+def _parse_confidence(text):
+    if text.isdigit():
+        return int(text)
+        # NB: Could make this more consistent by treating as a percentage
+        # return int(text) / 100.
+    try:
+        return float(text)
+        # NB: This should be in [0.0, 1.0], but who knows what people will do
+        # assert 0 <= current_clade.confidence <= 1
+    except ValueError:
+        return None
+
+
+def _format_comment(text):
+    return "[%s]" % (text.replace("[", "\\[").replace("]", "\\]"))
+
+
+def _get_comment(clade):
+    try:
+        comment = clade.comment
+    except AttributeError:
+        pass
+    else:
+        if comment:
+            return _format_comment(str(comment))
+    return ""
+
+
+class Parser:
+    """Parse a Newick tree given a file handle.
+
+    Based on the parser in ``Bio.Nexus.Trees``.
+    """
+
+    def __init__(self, handle):
+        """Initialize file handle for the Newick Tree."""
+        if handle.read(0) != "":
+            raise ValueError("Newick files must be opened in text mode") from None
+        self.handle = handle
+
+    @classmethod
+    def from_string(cls, treetext):
+        """Instantiate the Newick Tree class from the given string."""
+        handle = StringIO(treetext)
+        return cls(handle)
+
+    def parse(
+        self, values_are_confidence=False, comments_are_confidence=False, rooted=False
+    ):
+        """Parse the text stream this object was initialized with."""
+        self.values_are_confidence = values_are_confidence
+        self.comments_are_confidence = comments_are_confidence
+        self.rooted = rooted
+        buf = ""
+        for line in self.handle:
+            buf += line.rstrip()
+            if buf.endswith(";"):
+                yield self._parse_tree(buf)
+                buf = ""
+        if buf:
+            # Last tree is missing a terminal ';' character -- that's OK
+            yield self._parse_tree(buf)
+
+    def _parse_tree(self, text):
+        """Parse the text representation into an Tree object (PRIVATE)."""
+        tokens = re.finditer(tokenizer, text.strip())
+
+        new_clade = self.new_clade
+        root_clade = new_clade()
+
+        current_clade = root_clade
+        entering_branch_length = False
+
+        lp_count = 0
+        rp_count = 0
+        for match in tokens:
+            token = match.group()
+
+            if token.startswith("'"):
+                # quoted label; add characters to clade name
+                current_clade.name = token[1:-1]
+
+            elif token.startswith("["):
+                # comment
+                current_clade.comment = token[1:-1]
+                if self.comments_are_confidence:
+                    # Try to use this comment as a numeric support value
+                    current_clade.confidence = _parse_confidence(current_clade.comment)
+
+            elif token == "(":
+                # start a new clade, which is a child of the current clade
+                current_clade = new_clade(current_clade)
+                entering_branch_length = False
+                lp_count += 1
+
+            elif token == ",":
+                # if the current clade is the root, then the external parentheses
+                # are missing and a new root should be created
+                if current_clade is root_clade:
+                    root_clade = new_clade()
+                    current_clade.parent = root_clade
+                # start a new child clade at the same level as the current clade
+                parent = self.process_clade(current_clade)
+                current_clade = new_clade(parent)
+                entering_branch_length = False
+
+            elif token == ")":
+                # done adding children for this parent clade
+                parent = self.process_clade(current_clade)
+                if not parent:
+                    raise NewickError("Parenthesis mismatch.")
+                current_clade = parent
+                entering_branch_length = False
+                rp_count += 1
+
+            elif token == ";":
+                break
+
+            elif token.startswith(":"):
+                # branch length or confidence
+                value = float(token[1:])
+                if self.values_are_confidence:
+                    current_clade.confidence = value
+                else:
+                    current_clade.branch_length = value
+
+            elif token == "\n":
+                pass
+
+            else:
+                # unquoted node label
+                current_clade.name = token
+
+        if not lp_count == rp_count:
+            raise NewickError("Number of open/close parentheses do not match.")
+
+        # if ; token broke out of for loop, there should be no remaining tokens
+        try:
+            next_token = next(tokens)
+            raise NewickError(
+                "Text after semicolon in Newick tree: %s" % next_token.group()
+            )
+        except StopIteration:
+            pass
+
+        self.process_clade(current_clade)
+        self.process_clade(root_clade)
+        return Newick.Tree(root=root_clade, rooted=self.rooted)
+
+    def new_clade(self, parent=None):
+        """Return new Newick.Clade, optionally with temporary reference to parent."""
+        clade = Newick.Clade()
+        if parent:
+            clade.parent = parent
+        return clade
+
+    def process_clade(self, clade):
+        """Remove node's parent and return it. Final processing of parsed clade."""
+        if (
+            (clade.name)
+            and not (self.values_are_confidence or self.comments_are_confidence)
+            and (clade.confidence is None)
+            and (clade.clades)
+        ):
+            clade.confidence = _parse_confidence(clade.name)
+            if clade.confidence is not None:
+                clade.name = None
+
+        try:
+            parent = clade.parent
+        except AttributeError:
+            pass
+        else:
+            parent.clades.append(clade)
+            del clade.parent
+            return parent
+
+
+# ---------------------------------------------------------
+# Output
+
+
+class Writer:
+    """Based on the writer in Bio.Nexus.Trees (str, to_string)."""
+
+    def __init__(self, trees):
+        """Initialize parameter for Tree Writer object."""
+        self.trees = trees
+
+    def write(self, handle, **kwargs):
+        """Write this instance's trees to a file handle."""
+        count = 0
+        for treestr in self.to_strings(**kwargs):
+            handle.write(treestr + "\n")
+            count += 1
+        return count
+
+    def to_strings(
+        self,
+        confidence_as_branch_length=False,
+        branch_length_only=False,
+        plain=False,
+        plain_newick=True,
+        ladderize=None,
+        max_confidence=1.0,
+        format_confidence="%1.2f",
+        format_branch_length="%1.5f",
+    ):
+        """Return an iterable of PAUP-compatible tree lines."""
+        # If there's a conflict in the arguments, we override plain=True
+        if confidence_as_branch_length or branch_length_only:
+            plain = False
+        make_info_string = self._info_factory(
+            plain,
+            confidence_as_branch_length,
+            branch_length_only,
+            max_confidence,
+            format_confidence,
+            format_branch_length,
+        )
+
+        def newickize(clade):
+            """Convert a node tree to a Newick tree string, recursively."""
+            label = clade.name or ""
+            if label:
+                unquoted_label = re.match(token_dict["unquoted node label"], label)
+                if (not unquoted_label) or (unquoted_label.end() < len(label)):
+                    label = "'%s'" % label.replace("\\", "\\\\").replace("'", "\\'")
+
+            if clade.is_terminal():  # terminal
+                return label + make_info_string(clade, terminal=True)
+            else:
+                subtrees = (newickize(sub) for sub in clade)
+                return "(%s)%s" % (",".join(subtrees), label + make_info_string(clade))
+
+        # Convert each tree to a string
+        for tree in self.trees:
+            if ladderize in ("left", "LEFT", "right", "RIGHT"):
+                # Nexus compatibility shim, kind of
+                tree.ladderize(reverse=(ladderize in ("right", "RIGHT")))
+            rawtree = newickize(tree.root) + ";"
+            if plain_newick:
+                yield rawtree
+                continue
+            # Nexus-style (?) notation before the raw Newick tree
+            treeline = ["tree", (tree.name or "a_tree"), "="]
+            if tree.weight != 1:
+                treeline.append("[&W%s]" % round(float(tree.weight), 3))
+            if tree.rooted:
+                treeline.append("[&R]")
+            treeline.append(rawtree)
+            yield " ".join(treeline)
+
+    def _info_factory(
+        self,
+        plain,
+        confidence_as_branch_length,
+        branch_length_only,
+        max_confidence,
+        format_confidence,
+        format_branch_length,
+    ):
+        """Return a function that creates a nicely formatted node tag (PRIVATE)."""
+        if plain:
+            # Plain tree only. That's easy.
+            def make_info_string(clade, terminal=False):
+                return _get_comment(clade)
+
+        elif confidence_as_branch_length:
+            # Support as branchlengths (eg. PAUP), ignore actual branchlengths
+            def make_info_string(clade, terminal=False):
+                if terminal:
+                    # terminal branches have 100% support
+                    return (":" + format_confidence % max_confidence) + _get_comment(
+                        clade
+                    )
+                else:
+                    return (":" + format_confidence % clade.confidence) + _get_comment(
+                        clade
+                    )
+
+        elif branch_length_only:
+            # write only branchlengths, ignore support
+            def make_info_string(clade, terminal=False):
+                return (
+                    ":" + format_branch_length % clade.branch_length
+                ) + _get_comment(clade)
+
+        else:
+            # write support and branchlengths (e.g. .con tree of mrbayes)
+            def make_info_string(clade, terminal=False):
+                if (
+                    terminal
+                    or not hasattr(clade, "confidence")
+                    or clade.confidence is None
+                ):
+                    return (":" + format_branch_length) % (
+                        clade.branch_length or 0.0
+                    ) + _get_comment(clade)
+                else:
+                    return (format_confidence + ":" + format_branch_length) % (
+                        clade.confidence,
+                        clade.branch_length or 0.0,
+                    ) + _get_comment(clade)
+
+        return make_info_string
diff --git a/code/lib/Bio/Phylo/NexusIO.py b/code/lib/Bio/Phylo/NexusIO.py
new file mode 100644
index 0000000..944ea19
--- /dev/null
+++ b/code/lib/Bio/Phylo/NexusIO.py
@@ -0,0 +1,83 @@
+# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""I/O function wrappers for ``Bio.Nexus`` trees."""
+
+from itertools import chain
+
+from Bio.Nexus import Nexus
+from Bio.Phylo import Newick, NewickIO
+
+
+# Structure of a Nexus tree-only file
+NEX_TEMPLATE = """\
+#NEXUS
+Begin Taxa;
+ Dimensions NTax=%(count)d;
+ TaxLabels %(labels)s;
+End;
+Begin Trees;
+ %(trees)s
+End;
+"""
+
+# 'index' starts from 1; 'tree' is the Newick tree string
+TREE_TEMPLATE = "Tree tree%(index)d=%(tree)s"
+
+
+def parse(handle):
+    """Parse the trees in a Nexus file.
+
+    Uses the old Nexus.Trees parser to extract the trees, converts them back to
+    plain Newick trees, and feeds those strings through the new Newick parser.
+    This way we don't have to modify the Nexus module yet. (Perhaps we'll
+    eventually change Nexus to use the new NewickIO parser directly.)
+    """
+    nex = Nexus.Nexus(handle)
+
+    # NB: Once Nexus.Trees is modified to use Tree.Newick objects, do this:
+    # return iter(nex.trees)
+    # Until then, convert the Nexus.Trees.Tree object hierarchy:
+    def node2clade(nxtree, node):
+        subclades = [node2clade(nxtree, nxtree.node(n)) for n in node.succ]
+        return Newick.Clade(
+            branch_length=node.data.branchlength,
+            name=node.data.taxon,
+            clades=subclades,
+            confidence=node.data.support,
+            comment=node.data.comment,
+        )
+
+    for nxtree in nex.trees:
+        newroot = node2clade(nxtree, nxtree.node(nxtree.root))
+        yield Newick.Tree(
+            root=newroot, rooted=nxtree.rooted, name=nxtree.name, weight=nxtree.weight
+        )
+
+
+def write(obj, handle, **kwargs):
+    """Write a new Nexus file containing the given trees.
+
+    Uses a simple Nexus template and the NewickIO writer to serialize just the
+    trees and minimal supporting info needed for a valid Nexus file.
+    """
+    trees = list(obj)
+    writer = NewickIO.Writer(trees)
+    nexus_trees = [
+        TREE_TEMPLATE % {"index": idx + 1, "tree": nwk}
+        for idx, nwk in enumerate(
+            writer.to_strings(plain=False, plain_newick=True, **kwargs)
+        )
+    ]
+    tax_labels = [str(x.name) for x in chain(*(t.get_terminals() for t in trees))]
+    text = NEX_TEMPLATE % {
+        "count": len(tax_labels),
+        "labels": " ".join(tax_labels),
+        "trees": "\n".join(nexus_trees),
+    }
+    handle.write(text)
+    return len(nexus_trees)
diff --git a/code/lib/Bio/Phylo/PAML/__init__.py b/code/lib/Bio/Phylo/PAML/__init__.py
new file mode 100644
index 0000000..2b3cf3f
--- /dev/null
+++ b/code/lib/Bio/Phylo/PAML/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Support for PAML."""
diff --git a/code/lib/Bio/Phylo/PAML/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Phylo/PAML/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..4ca1484
Binary files /dev/null and b/code/lib/Bio/Phylo/PAML/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/PAML/__pycache__/_paml.cpython-37.pyc b/code/lib/Bio/Phylo/PAML/__pycache__/_paml.cpython-37.pyc
new file mode 100644
index 0000000..45763bc
Binary files /dev/null and b/code/lib/Bio/Phylo/PAML/__pycache__/_paml.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/PAML/__pycache__/_parse_baseml.cpython-37.pyc b/code/lib/Bio/Phylo/PAML/__pycache__/_parse_baseml.cpython-37.pyc
new file mode 100644
index 0000000..3d9304a
Binary files /dev/null and b/code/lib/Bio/Phylo/PAML/__pycache__/_parse_baseml.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/PAML/__pycache__/_parse_codeml.cpython-37.pyc b/code/lib/Bio/Phylo/PAML/__pycache__/_parse_codeml.cpython-37.pyc
new file mode 100644
index 0000000..05fd4be
Binary files /dev/null and b/code/lib/Bio/Phylo/PAML/__pycache__/_parse_codeml.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/PAML/__pycache__/_parse_yn00.cpython-37.pyc b/code/lib/Bio/Phylo/PAML/__pycache__/_parse_yn00.cpython-37.pyc
new file mode 100644
index 0000000..b424525
Binary files /dev/null and b/code/lib/Bio/Phylo/PAML/__pycache__/_parse_yn00.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/PAML/__pycache__/baseml.cpython-37.pyc b/code/lib/Bio/Phylo/PAML/__pycache__/baseml.cpython-37.pyc
new file mode 100644
index 0000000..43ee10c
Binary files /dev/null and b/code/lib/Bio/Phylo/PAML/__pycache__/baseml.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/PAML/__pycache__/chi2.cpython-37.pyc b/code/lib/Bio/Phylo/PAML/__pycache__/chi2.cpython-37.pyc
new file mode 100644
index 0000000..71ff601
Binary files /dev/null and b/code/lib/Bio/Phylo/PAML/__pycache__/chi2.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/PAML/__pycache__/codeml.cpython-37.pyc b/code/lib/Bio/Phylo/PAML/__pycache__/codeml.cpython-37.pyc
new file mode 100644
index 0000000..71bd203
Binary files /dev/null and b/code/lib/Bio/Phylo/PAML/__pycache__/codeml.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/PAML/__pycache__/yn00.cpython-37.pyc b/code/lib/Bio/Phylo/PAML/__pycache__/yn00.cpython-37.pyc
new file mode 100644
index 0000000..64adbdb
Binary files /dev/null and b/code/lib/Bio/Phylo/PAML/__pycache__/yn00.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/PAML/_paml.py b/code/lib/Bio/Phylo/PAML/_paml.py
new file mode 100644
index 0000000..8b38d28
--- /dev/null
+++ b/code/lib/Bio/Phylo/PAML/_paml.py
@@ -0,0 +1,136 @@
+# Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Base class for the support of PAML, Phylogenetic Analysis by Maximum Likelihood."""
+
+
+import os
+import subprocess
+
+
+class PamlError(EnvironmentError):
+    """paml has failed.
+
+    Run with verbose=True to view the error message.
+    """
+
+
+class Paml:
+    """Base class for wrapping PAML commands."""
+
+    def __init__(self, alignment=None, working_dir=None, out_file=None):
+        """Initialize the class."""
+        if working_dir is None:
+            self.working_dir = os.getcwd()
+        else:
+            self.working_dir = working_dir
+        if alignment is not None:
+            if not os.path.exists(alignment):
+                raise FileNotFoundError("The specified alignment file does not exist.")
+        self.alignment = alignment
+        self.out_file = out_file
+        self._options = {}  # will be set in subclasses
+
+    def write_ctl_file(self):
+        """Write control file."""
+        pass
+
+    def read_ctl_file(self):
+        """Read control file."""
+        pass
+
+    def print_options(self):
+        """Print out all of the options and their current settings."""
+        for option in self._options.items():
+            print("%s = %s" % (option[0], option[1]))
+
+    def set_options(self, **kwargs):
+        """Set the value of an option.
+
+        This function abstracts the options dict to prevent the user from
+        adding options that do not exist or misspelling options.
+        """
+        for option, value in kwargs.items():
+            if option not in self._options:
+                raise KeyError("Invalid option: " + option)
+            else:
+                self._options[option] = value
+
+    def get_option(self, option):
+        """Return the value of an option."""
+        if option not in self._options:
+            raise KeyError("Invalid option: " + option)
+        else:
+            return self._options.get(option)
+
+    def get_all_options(self):
+        """Return the values of all the options."""
+        return list(self._options.items())
+
+    def _set_rel_paths(self):
+        """Convert all file/directory locations to paths relative to the current working directory (PRIVATE).
+
+        paml requires that all paths specified in the control file be
+        relative to the directory from which it is called rather than
+        absolute paths.
+        """
+        if self.working_dir is not None:
+            self._rel_working_dir = os.path.relpath(self.working_dir)
+        if self.alignment is not None:
+            self._rel_alignment = os.path.relpath(self.alignment, self.working_dir)
+        if self.out_file is not None:
+            self._rel_out_file = os.path.relpath(self.out_file, self.working_dir)
+
+    def run(self, ctl_file, verbose, command):
+        """Run a paml program using the current configuration.
+
+        Check that the class attributes exist and raise an error
+        if not. Then run the command and check if it succeeds with
+        a return code of 0, otherwise raise an error.
+
+        The arguments may be passed as either absolute or relative
+        paths, despite the fact that paml requires relative paths.
+        """
+        if self.alignment is None:
+            raise ValueError("Alignment file not specified.")
+        if not os.path.exists(self.alignment):
+            raise FileNotFoundError("The specified alignment file does not exist.")
+        if self.out_file is None:
+            raise ValueError("Output file not specified.")
+        if self.working_dir is None:
+            raise ValueError("Working directory not specified.")
+        # Get the current working directory
+        cwd = os.getcwd()
+        # Move to the desired working directory
+        if not os.path.exists(self.working_dir):
+            os.mkdir(self.working_dir)
+        os.chdir(self.working_dir)
+        # If no external control file was specified...
+        if ctl_file is None:
+            # Dynamically build a control file
+            self.write_ctl_file()
+            ctl_file = self.ctl_file
+        else:
+            if not os.path.exists(ctl_file):
+                raise FileNotFoundError("The specified control file does not exist.")
+        if verbose:
+            result_code = subprocess.call([command, ctl_file])
+        else:
+            with open(os.devnull) as dn:
+                result_code = subprocess.call([command, ctl_file], stdout=dn, stderr=dn)
+        os.chdir(cwd)
+        if result_code > 0:
+            # If the program fails for any reason
+            raise PamlError(
+                "%s has failed (return code %i). Run with verbose = True to view error message"
+                % (command, result_code)
+            )
+        if result_code < 0:
+            # If the paml process is killed by a signal somehow
+            raise OSError(
+                "The %s process was killed (return code %i)." % (command, result_code)
+            )
diff --git a/code/lib/Bio/Phylo/PAML/_parse_baseml.py b/code/lib/Bio/Phylo/PAML/_parse_baseml.py
new file mode 100644
index 0000000..e5f3be8
--- /dev/null
+++ b/code/lib/Bio/Phylo/PAML/_parse_baseml.py
@@ -0,0 +1,262 @@
+# Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Methods for parsing baseml results files."""
+
+import re
+
+
+line_floats_re = re.compile(r"-*\d+\.\d+")
+
+
+def parse_basics(lines, results):
+    """Parse the basics that should be present in most baseml results files."""
+    version_re = re.compile(r"BASEML \(in paml version (\d+\.\d+[a-z]*).*")
+    np_re = re.compile(r"lnL\(ntime:\s+\d+\s+np:\s+(\d+)\)")
+    num_params = -1
+    for line in lines:
+        # Find all floating point numbers in this line
+        line_floats_res = line_floats_re.findall(line)
+        line_floats = [float(val) for val in line_floats_res]
+        # Find the version number
+        # Example match:
+        # "BASEML (in paml version 4.3, August 2009)  alignment.phylip"
+        version_res = version_re.match(line)
+        if version_res is not None:
+            results["version"] = version_res.group(1)
+        # Find max lnL
+        # Example match:
+        # ln Lmax (unconstrained) = -316.049385
+        if "ln Lmax" in line and len(line_floats) == 1:
+            results["lnL max"] = line_floats[0]
+        # Find lnL values.
+        # Example match (lnL = -2021.348300):
+        # "lnL(ntime: 19  np: 22):  -2021.348300      +0.000000"
+        elif "lnL(ntime:" in line and line_floats:
+            results["lnL"] = line_floats[0]
+            np_res = np_re.match(line)
+            if np_res is not None:
+                num_params = int(np_res.group(1))
+        # Find tree lengths.
+        # Example match: "tree length =   1.71931"
+        elif "tree length" in line and len(line_floats) == 1:
+            results["tree length"] = line_floats[0]
+        # Find the estimated tree, only taking the tree if it has
+        # branch lengths
+        elif re.match(r"\(+", line) is not None:
+            if ":" in line:
+                results["tree"] = line.strip()
+    return (results, num_params)
+
+
+def parse_parameters(lines, results, num_params):
+    """Parse the various parameters from the file."""
+    parameters = {}
+    parameters = parse_parameter_list(lines, parameters, num_params)
+    parameters = parse_kappas(lines, parameters)
+    parameters = parse_rates(lines, parameters)
+    parameters = parse_freqs(lines, parameters)
+    results["parameters"] = parameters
+    return results
+
+
+def parse_parameter_list(lines, parameters, num_params):
+    """Parse the parameters list, which is just an unlabeled list of numeric values."""
+    for line_num in range(len(lines)):
+        line = lines[line_num]
+        # Find all floating point numbers in this line
+        line_floats_res = line_floats_re.findall(line)
+        line_floats = [float(val) for val in line_floats_res]
+        # Get parameter list. This can be useful for specifying starting
+        # parameters in another run by copying the list of parameters
+        # to a file called in.baseml. Since the parameters must be in
+        # a fixed order and format, copying and pasting to the file is
+        # best. For this reason, they are grabbed here just as a long
+        # string and not as individual numbers.
+        if len(line_floats) == num_params:
+            parameters["parameter list"] = line.strip()
+            # Find SEs. The same format as parameters above is maintained
+            # since there is a correspondence between the SE format and
+            # the parameter format.
+            # Example match:
+            # "SEs for parameters:
+            # -1.00000 -1.00000 -1.00000 801727.63247 730462.67590 -1.00000
+            if "SEs for parameters:" in lines[line_num + 1]:
+                SEs_line = lines[line_num + 2]
+                parameters["SEs"] = SEs_line.strip()
+            break
+    return parameters
+
+
+def parse_kappas(lines, parameters):
+    """Parse out the kappa parameters."""
+    kappa_found = False
+    for line in lines:
+        # Find all floating point numbers in this line
+        line_floats_res = line_floats_re.findall(line)
+        line_floats = [float(val) for val in line_floats_res]
+        # Find kappa parameter (F84, HKY85, T92 model)
+        # Example match:
+        # "Parameters (kappa) in the rate matrix (F84) (Yang 1994 J Mol Evol 39:105-111):
+        #    3.00749"
+        if "Parameters (kappa)" in line:
+            kappa_found = True
+        elif kappa_found and line_floats:
+            branch_res = re.match(r"\s(\d+\.\.\d+)", line)
+            if branch_res is None:
+                if len(line_floats) == 1:
+                    parameters["kappa"] = line_floats[0]
+                else:
+                    parameters["kappa"] = line_floats
+                kappa_found = False
+            else:
+                if parameters.get("branches") is None:
+                    parameters["branches"] = {}
+                branch = branch_res.group(1)
+                if line_floats:
+                    parameters["branches"][branch] = {
+                        "t": line_floats[0],
+                        "kappa": line_floats[1],
+                        "TS": line_floats[2],
+                        "TV": line_floats[3],
+                    }
+        # Find kappa under REV
+        # Example match:
+        # kappa under REV: 999.00000 145.76453  0.00001  0.00001  0.00001
+        elif "kappa under" in line and line_floats:
+            if len(line_floats) == 1:
+                parameters["kappa"] = line_floats[0]
+            else:
+                parameters["kappa"] = line_floats
+    return parameters
+
+
+def parse_rates(lines, parameters):
+    """Parse the rate parameters."""
+    Q_mat_found = False
+    trans_probs_found = False
+    for line in lines:
+        # Find all floating point numbers in this line
+        line_floats_res = line_floats_re.findall(line)
+        line_floats = [float(val) for val in line_floats_res]
+        # Find rate parameters
+        # Example match:
+        # "Rate parameters:   999.00000 145.59775  0.00001  0.00001  0.00001"
+        if "Rate parameters:" in line and line_floats:
+            parameters["rate parameters"] = line_floats
+        # Find rates
+        # Example match:
+        # "rate:   0.90121  0.96051  0.99831  1.03711  1.10287"
+        elif "rate: " in line and line_floats:
+            parameters["rates"] = line_floats
+        # Find Rate matrix Q & average kappa (REV model)
+        # Example match:
+        # Rate matrix Q, Average Ts/Tv =   3.0308
+        #  -2.483179    1.865730    0.617449    0.000000
+        #   2.298662   -2.298662    0.000000    0.000000
+        #   0.335015    0.000000   -0.338059    0.003044
+        #   0.000000    0.000000    0.004241   -0.004241
+        elif "matrix Q" in line:
+            parameters["Q matrix"] = {"matrix": []}
+            if line_floats:
+                parameters["Q matrix"]["average Ts/Tv"] = line_floats[0]
+            Q_mat_found = True
+        elif Q_mat_found and line_floats:
+            parameters["Q matrix"]["matrix"].append(line_floats)
+            if len(parameters["Q matrix"]["matrix"]) == 4:
+                Q_mat_found = False
+        # Find alpha (gamma shape parameter for variable rates)
+        # Example match: "alpha (gamma, K=5) = 192.47918"
+        elif "alpha" in line and line_floats:
+            parameters["alpha"] = line_floats[0]
+        # Find rho for auto-discrete-gamma model
+        elif "rho" in line and line_floats:
+            parameters["rho"] = line_floats[0]
+        elif "transition probabilities" in line:
+            parameters["transition probs."] = []
+            trans_probs_found = True
+        elif trans_probs_found and line_floats:
+            parameters["transition probs."].append(line_floats)
+            if len(parameters["transition probs."]) == len(parameters["rates"]):
+                trans_probs_found = False
+    return parameters
+
+
+def parse_freqs(lines, parameters):
+    """Parse the basepair frequencies."""
+    root_re = re.compile(r"Note: node (\d+) is root.")
+    branch_freqs_found = False
+    base_freqs_found = False
+    for line in lines:
+        # Find all floating point numbers in this line
+        line_floats_res = line_floats_re.findall(line)
+        line_floats = [float(val) for val in line_floats_res]
+        # Find base frequencies from baseml 4.3
+        # Example match:
+        # "Base frequencies:   0.20090  0.16306  0.37027  0.26577"
+        if "Base frequencies" in line and line_floats:
+            base_frequencies = {}
+            base_frequencies["T"] = line_floats[0]
+            base_frequencies["C"] = line_floats[1]
+            base_frequencies["A"] = line_floats[2]
+            base_frequencies["G"] = line_floats[3]
+            parameters["base frequencies"] = base_frequencies
+        # Find base frequencies from baseml 4.1:
+        # Example match:
+        # "base frequency parameters
+        # "  0.20317  0.16768  0.36813  0.26102"
+        elif "base frequency parameters" in line:
+            base_freqs_found = True
+        # baseml 4.4 returns to having the base frequencies on the next line
+        # but the heading changed
+        elif "Base frequencies" in line and not line_floats:
+            base_freqs_found = True
+        elif base_freqs_found and line_floats:
+            base_frequencies = {}
+            base_frequencies["T"] = line_floats[0]
+            base_frequencies["C"] = line_floats[1]
+            base_frequencies["A"] = line_floats[2]
+            base_frequencies["G"] = line_floats[3]
+            parameters["base frequencies"] = base_frequencies
+            base_freqs_found = False
+        # Find frequencies
+        # Example match:
+        # "freq:   0.90121  0.96051  0.99831  1.03711  1.10287"
+        elif "freq: " in line and line_floats:
+            parameters["rate frequencies"] = line_floats
+        # Find branch-specific frequency parameters
+        # Example match (note: I think it's possible to have 4 more
+        # values per line, enclosed in brackets, so I'll account for
+        # this):
+        # (frequency parameters for branches)  [frequencies at nodes] (see Yang & Roberts 1995 fig 1)
+        #
+        # Node #1  ( 0.25824  0.24176  0.25824  0.24176 )
+        # Node #2  ( 0.00000  0.50000  0.00000  0.50000 )
+        elif "(frequency parameters for branches)" in line:
+            parameters["nodes"] = {}
+            branch_freqs_found = True
+        elif branch_freqs_found:
+            if line_floats:
+                node_res = re.match(r"Node \#(\d+)", line)
+                node_num = int(node_res.group(1))
+                node = {"root": False}
+                node["frequency parameters"] = line_floats[:4]
+                if len(line_floats) > 4:
+                    node["base frequencies"] = {
+                        "T": line_floats[4],
+                        "C": line_floats[5],
+                        "A": line_floats[6],
+                        "G": line_floats[7],
+                    }
+                parameters["nodes"][node_num] = node
+            else:
+                root_res = root_re.match(line)
+                if root_res is not None:
+                    root_node = int(root_res.group(1))
+                    parameters["nodes"][root_node]["root"] = True
+                    branch_freqs_found = False
+    return parameters
diff --git a/code/lib/Bio/Phylo/PAML/_parse_codeml.py b/code/lib/Bio/Phylo/PAML/_parse_codeml.py
new file mode 100644
index 0000000..3c90a7c
--- /dev/null
+++ b/code/lib/Bio/Phylo/PAML/_parse_codeml.py
@@ -0,0 +1,481 @@
+# Copyright (C) 2011, 2016 by Brandon Invergo (b.invergo@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Methods for parsing codeml results files."""
+
+import re
+
+line_floats_re = re.compile(r"-*\d+\.\d+")
+
+
+def parse_basics(lines, results):
+    """Parse the basic information that should be present in most codeml output files."""
+    # multi_models is used to designate there being results for more than one
+    # model in the file
+    multi_models = False
+    multi_genes = False
+    version_re = re.compile(r".+ \(in paml version (\d+\.\d+[a-z]*).*")
+    model_re = re.compile(r"Model:\s+(.+)")
+    num_genes_re = re.compile(r"\(([0-9]+) genes: separate data\)")
+    # In codeml 4.1, the codon substitution model is headed by:
+    # "Codon frequencies:"
+    # In codeml 4.3+ it is headed by:
+    # "Codon frequency model:"
+    codon_freq_re = re.compile(r"Codon frequenc[a-z\s]{3,7}:\s+(.+)")
+    siteclass_re = re.compile(r"Site-class models:\s*([^\s]*)")
+    for line in lines:
+        # Find all floating point numbers in this line
+        line_floats_res = line_floats_re.findall(line)
+        line_floats = [float(val) for val in line_floats_res]
+        # Get the program version number
+        version_res = version_re.match(line)
+        if version_res is not None:
+            results["version"] = version_res.group(1)
+            continue
+        # Find the model description at the beginning of the file.
+        model_res = model_re.match(line)
+        if model_res is not None:
+            results["model"] = model_res.group(1)
+        # Find out if more than one genes are analyzed
+        num_genes_res = num_genes_re.search(line)
+        if num_genes_res is not None:
+            results["genes"] = []
+            num_genes = int(num_genes_res.group(1))
+            for n in range(num_genes):
+                results["genes"].append({})
+            multi_genes = True
+            continue
+        # Get the codon substitution model
+        codon_freq_res = codon_freq_re.match(line)
+        if codon_freq_res is not None:
+            results["codon model"] = codon_freq_res.group(1)
+            continue
+        # Find the site-class model name at the beginning of the file.
+        # This exists only if a NSsites class other than 0 is used.
+        # If there's no site class model listed, then there are multiple
+        # models in the results file
+        # Example match: "Site-class models:  PositiveSelection"
+        siteclass_res = siteclass_re.match(line)
+        if siteclass_res is not None:
+            siteclass_model = siteclass_res.group(1)
+            if siteclass_model != "":
+                results["site-class model"] = siteclass_model
+                multi_models = False
+            else:
+                multi_models = True
+        # Get the maximum log-likelihood
+        if "ln Lmax" in line and line_floats:
+            results["lnL max"] = line_floats[0]
+    return (results, multi_models, multi_genes)
+
+
+def parse_nssites(lines, results, multi_models, multi_genes):
+    """Determine which NSsites models are present and parse them."""
+    ns_sites = {}
+    model_re = re.compile(r"Model (\d+):\s+(.+)")
+    gene_re = re.compile(r"Gene\s+([0-9]+)\s+.+")
+    siteclass_model = results.get("site-class model")
+    if not multi_models:
+        # If there's only one model in the results, find out
+        # which one it is and then parse it.
+        if siteclass_model is None:
+            siteclass_model = "one-ratio"
+        current_model = {
+            "one-ratio": 0,
+            "NearlyNeutral": 1,
+            "PositiveSelection": 2,
+            "discrete": 3,
+            "beta": 7,
+            "beta&w>1": 8,
+            "M2a_rel": 22,
+        }[siteclass_model]
+        if multi_genes:
+            genes = results["genes"]
+            current_gene = None
+            gene_start = None
+            model_results = None
+            for line_num, line in enumerate(lines):
+                gene_res = gene_re.match(line)
+                if gene_res:
+                    if current_gene is not None:
+                        assert model_results is not None
+                        parse_model(lines[gene_start:line_num], model_results)
+                        genes[current_gene - 1] = model_results
+                    gene_start = line_num
+                    current_gene = int(gene_res.group(1))
+                    model_results = {"description": siteclass_model}
+            if len(genes[current_gene - 1]) == 0:
+                model_results = parse_model(lines[gene_start:], model_results)
+                genes[current_gene - 1] = model_results
+        else:
+            model_results = {"description": siteclass_model}
+            model_results = parse_model(lines, model_results)
+            ns_sites[current_model] = model_results
+    else:
+        # If there are multiple models in the results, scan through
+        # the file and send each model's text to be parsed individually.
+        current_model = None
+        model_start = None
+        for line_num, line in enumerate(lines):
+            # Find model names. If this is found on a line,
+            # all of the following lines until the next time this matches
+            # contain results for Model X.
+            # Example match: "Model 1: NearlyNeutral (2 categories)"
+            model_res = model_re.match(line)
+            if model_res:
+                if current_model is not None:
+                    # We've already been tracking a model, so it's time
+                    # to send those lines off for parsing before beginning
+                    # a new one.
+                    parse_model(lines[model_start:line_num], model_results)
+                    ns_sites[current_model] = model_results
+                model_start = line_num
+                current_model = int(model_res.group(1))
+                model_results = {"description": model_res.group(2)}
+        if ns_sites.get(current_model) is None:
+            # When we reach the end of the file, we'll still have one more
+            # model to parse.
+            model_results = parse_model(lines[model_start:], model_results)
+            ns_sites[current_model] = model_results
+    # Only add the ns_sites dict to the results if we really have results.
+    # Model M0 is added by default in some cases, so if it exists, make sure
+    # it's not empty
+    if len(ns_sites) == 1:
+        m0 = ns_sites.get(0)
+        if not m0 or len(m0) > 1:
+            results["NSsites"] = ns_sites
+    elif len(ns_sites) > 1:
+        results["NSsites"] = ns_sites
+    return results
+
+
+def parse_model(lines, results):
+    """Parse an individual NSsites model's results."""
+    parameters = {}
+    SEs_flag = False
+    dS_tree_flag = False
+    dN_tree_flag = False
+    w_tree_flag = False
+    num_params = None
+    tree_re = re.compile(r"^\([\w #:',.()]*\);\s*$")
+    branch_re = re.compile(r"\s+(\d+\.\.\d+)[\s+\d+\.\d+]+")
+    model_params_re = re.compile(r"(?<!\S)([a-z]\d?)\s*=\s+(\d+\.\d+)")
+    for line in lines:
+        # Find all floating point numbers in this line
+        line_floats_res = line_floats_re.findall(line)
+        line_floats = [float(val) for val in line_floats_res]
+        # Check if branch-specific results are in the line
+        branch_res = branch_re.match(line)
+        # Check if additional model parameters are in the line
+        model_params = model_params_re.findall(line)
+        # Find lnL values.
+        # Example match (lnL = -2021.348300):
+        # "lnL(ntime: 19  np: 22):  -2021.348300      +0.000000"
+        if "lnL(ntime:" in line and line_floats:
+            results["lnL"] = line_floats[0]
+            np_res = re.match(r"lnL\(ntime:\s+\d+\s+np:\s+(\d+)\)", line)
+            if np_res is not None:
+                num_params = int(np_res.group(1))
+        # Get parameter list. This can be useful for specifying starting
+        # parameters in another run by copying the list of parameters
+        # to a file called in.codeml. Since the parameters must be in
+        # a fixed order and format, copying and pasting to the file is
+        # best. For this reason, they are grabbed here just as a long
+        # string and not as individual numbers.
+        elif len(line_floats) == num_params and not SEs_flag:
+            parameters["parameter list"] = line.strip()
+        # Find SEs. The same format as parameters above is maintained
+        # since there is a correspondence between the SE format and
+        # the parameter format.
+        # Example match:
+        # "SEs for parameters:
+        # -1.00000 -1.00000 -1.00000 801727.63247 730462.67590 -1.00000
+        elif "SEs for parameters:" in line:
+            SEs_flag = True
+        elif SEs_flag and len(line_floats) == num_params:
+            parameters["SEs"] = line.strip()
+            SEs_flag = False
+        # Find tree lengths.
+        # Example match: "tree length =   1.71931"
+        elif "tree length =" in line and line_floats:
+            results["tree length"] = line_floats[0]
+        # Find the estimated trees only taking the tree if it has
+        # lengths or rate estimates on the branches
+        elif tree_re.match(line) is not None:
+            if ":" in line or "#" in line:
+                if dS_tree_flag:
+                    results["dS tree"] = line.strip()
+                    dS_tree_flag = False
+                elif dN_tree_flag:
+                    results["dN tree"] = line.strip()
+                    dN_tree_flag = False
+                elif w_tree_flag:
+                    results["omega tree"] = line.strip()
+                    w_tree_flag = False
+                else:
+                    results["tree"] = line.strip()
+        elif "dS tree:" in line:
+            dS_tree_flag = True
+        elif "dN tree:" in line:
+            dN_tree_flag = True
+        elif "w ratios as labels for TreeView:" in line:
+            w_tree_flag = True
+        # Find rates for multiple genes
+        # Example match: "rates for 2 genes:     1  2.75551"
+        elif "rates for" in line and line_floats:
+            line_floats.insert(0, 1.0)
+            parameters["rates"] = line_floats
+        # Find kappa values.
+        # Example match: "kappa (ts/tv) =  2.77541"
+        elif "kappa (ts/tv)" in line and line_floats:
+            parameters["kappa"] = line_floats[0]
+        # Find omega values.
+        # Example match: "omega (dN/dS) =  0.25122"
+        elif "omega (dN/dS)" in line and line_floats:
+            parameters["omega"] = line_floats[0]
+        elif "w (dN/dS)" in line and line_floats:
+            parameters["omega"] = line_floats
+        # Find omega and kappa values for multi-gene files
+        # Example match: "gene # 1: kappa =   1.72615 omega =   0.39333"
+        elif "gene # " in line:
+            gene_num = int(re.match(r"gene # (\d+)", line).group(1))
+            if parameters.get("genes") is None:
+                parameters["genes"] = {}
+            parameters["genes"][gene_num] = {
+                "kappa": line_floats[0],
+                "omega": line_floats[1],
+            }
+        # Find dN values.
+        # Example match: "tree length for dN:       0.2990"
+        elif "tree length for dN" in line and line_floats:
+            parameters["dN"] = line_floats[0]
+        # Find dS values
+        # Example match: "tree length for dS:       1.1901"
+        elif "tree length for dS" in line and line_floats:
+            parameters["dS"] = line_floats[0]
+        # Find site class distributions.
+        # Example match 1 (normal model, 2 site classes):
+        # "p:   0.77615  0.22385"
+        # Example match 2 (branch site A model, 4 site classes):
+        # "proportion       0.00000  0.00000  0.73921  0.26079"
+        elif line[0:2] == "p:" or line[0:10] == "proportion":
+            site_classes = parse_siteclass_proportions(line_floats)
+            parameters["site classes"] = site_classes
+        # Find the omega value corresponding to each site class
+        # Example match (2 site classes): "w:   0.10224  1.00000"
+        elif line[0:2] == "w:":
+            site_classes = parameters.get("site classes")
+            site_classes = parse_siteclass_omegas(line, site_classes)
+            parameters["site classes"] = site_classes
+        # Find the omega values corresponding to a branch type from
+        # the clade model C for each site class
+        # Example match:
+        # "branch type 0:    0.31022   1.00000   0.00000"
+        elif "branch type " in line:
+            branch_type = re.match(r"branch type (\d)", line)
+            if branch_type:
+                site_classes = parameters.get("site classes")
+                branch_type_no = int(branch_type.group(1))
+                site_classes = parse_clademodelc(
+                    branch_type_no, line_floats, site_classes
+                )
+                parameters["site classes"] = site_classes
+        # Find the omega values of the foreground branch for each site
+        # class in the branch site A model
+        # Example match:
+        # "foreground w     0.07992  1.00000 134.54218 134.54218"
+        elif line[0:12] == "foreground w":
+            site_classes = parameters.get("site classes")
+            site_classes = parse_branch_site_a(True, line_floats, site_classes)
+            parameters["site classes"] = site_classes
+        # Find the omega values of the background for each site
+        # class in the branch site A model
+        # Example match:
+        # "background w     0.07992  1.00000  0.07992  1.00000"
+        elif line[0:12] == "background w":
+            site_classes = parameters.get("site classes")
+            site_classes = parse_branch_site_a(False, line_floats, site_classes)
+            parameters["site classes"] = site_classes
+        # Find dN & dS for each branch, which is organized in a table
+        # The possibility of NaNs forces me to not use the line_floats
+        # method.
+        # Example row (some spaces removed to make it smaller...).
+        # " 6..7   0.000  167.7  54.3  0.0000  0.0000  0.0000  0.0  0.0"
+        elif branch_res is not None and line_floats:
+            branch = branch_res.group(1)
+            if parameters.get("branches") is None:
+                parameters["branches"] = {}
+            params = line.strip().split()[1:]
+            parameters["branches"][branch] = {
+                "t": float(params[0].strip()),
+                "N": float(params[1].strip()),
+                "S": float(params[2].strip()),
+                "omega": float(params[3].strip()),
+                "dN": float(params[4].strip()),
+                "dS": float(params[5].strip()),
+                "N*dN": float(params[6].strip()),
+                "S*dS": float(params[7].strip()),
+            }
+        # Find model parameters, which can be spread across multiple
+        # lines.
+        # Example matches:
+        # "  p0=  0.99043  p=  0.36657 q=  1.04445
+        # "  (p1=  0.00957) w=  3.25530"
+        elif model_params:
+            float_model_params = []
+            for param in model_params:
+                float_model_params.append((param[0], float(param[1])))
+            parameters.update(dict(float_model_params))
+    if parameters:
+        results["parameters"] = parameters
+    return results
+
+
+def parse_siteclass_proportions(line_floats):
+    """Find proportion of alignment assigned to each class.
+
+    For models which have multiple site classes, find the proportion of the
+    alignment assigned to each class.
+    """
+    site_classes = {}
+    if line_floats:
+        for n in range(len(line_floats)):
+            site_classes[n] = {"proportion": line_floats[n]}
+    return site_classes
+
+
+def parse_siteclass_omegas(line, site_classes):
+    """Find omega estimate for each class.
+
+    For models which have multiple site classes, find the omega estimated
+    for each class.
+    """
+    # The omega results are tabular with strictly 9 characters per column
+    # (1 to 3 digits before the  decimal point and 5 after). This causes
+    # numbers to sometimes run into each other, so we must use a different
+    # regular expression to account for this. i.e.:
+    # w:   0.00012  1.00000109.87121
+    line_floats = re.findall(r"\d{1,3}\.\d{5}", line)
+    if not site_classes or len(line_floats) == 0:
+        return
+    for n in range(len(line_floats)):
+        site_classes[n]["omega"] = line_floats[n]
+    return site_classes
+
+
+def parse_clademodelc(branch_type_no, line_floats, site_classes):
+    """Parse results specific to the clade model C."""
+    if not site_classes or len(line_floats) == 0:
+        return
+    for n in range(len(line_floats)):
+        if site_classes[n].get("branch types") is None:
+            site_classes[n]["branch types"] = {}
+        site_classes[n]["branch types"][branch_type_no] = line_floats[n]
+    return site_classes
+
+
+def parse_branch_site_a(foreground, line_floats, site_classes):
+    """Parse results specific to the branch site A model."""
+    if not site_classes or len(line_floats) == 0:
+        return
+    for n in range(len(line_floats)):
+        if site_classes[n].get("branch types") is None:
+            site_classes[n]["branch types"] = {}
+        if foreground:
+            site_classes[n]["branch types"]["foreground"] = line_floats[n]
+        else:
+            site_classes[n]["branch types"]["background"] = line_floats[n]
+    return site_classes
+
+
+def parse_pairwise(lines, results):
+    """Parse results from pairwise comparisons."""
+    # Find pairwise comparisons
+    # Example:
+    # 2 (Pan_troglo) ... 1 (Homo_sapie)
+    # lnL = -291.465693
+    #  0.01262 999.00000  0.00100
+    #
+    # t= 0.0126  S=    81.4  N=   140.6  dN/dS= 0.0010  dN= 0.0000  dS= 0.0115
+    pair_re = re.compile(r"\d+ \((.+)\) ... \d+ \((.+)\)")
+    pairwise = {}
+    seq1 = None
+    seq2 = None
+    for line in lines:
+        # Find all floating point numbers in this line
+        line_floats_res = line_floats_re.findall(line)
+        line_floats = [float(val) for val in line_floats_res]
+        pair_res = pair_re.match(line)
+        if pair_res:
+            seq1 = pair_res.group(1)
+            seq2 = pair_res.group(2)
+            if seq1 not in pairwise:
+                pairwise[seq1] = {}
+            if seq2 not in pairwise:
+                pairwise[seq2] = {}
+        if len(line_floats) == 1 and seq1 is not None and seq2 is not None:
+            pairwise[seq1][seq2] = {"lnL": line_floats[0]}
+            pairwise[seq2][seq1] = pairwise[seq1][seq2]
+        elif len(line_floats) == 6 and seq1 is not None and seq2 is not None:
+            pairwise[seq1][seq2].update(
+                {
+                    "t": line_floats[0],
+                    "S": line_floats[1],
+                    "N": line_floats[2],
+                    "omega": line_floats[3],
+                    "dN": line_floats[4],
+                    "dS": line_floats[5],
+                }
+            )
+            pairwise[seq2][seq1] = pairwise[seq1][seq2]
+    if pairwise:
+        results["pairwise"] = pairwise
+    return results
+
+
+def parse_distances(lines, results):
+    """Parse amino acid sequence distance results."""
+    distances = {}
+    sequences = []
+    raw_aa_distances_flag = False
+    ml_aa_distances_flag = False
+    matrix_row_re = re.compile(r"(.+)\s{5,15}")
+    for line in lines:
+        # Find all floating point numbers in this line
+        line_floats_res = line_floats_re.findall(line)
+        line_floats = [float(val) for val in line_floats_res]
+        if "AA distances" in line:
+            raw_aa_distances_flag = True
+            # In current versions, the raw distances always come
+            # first but I don't trust this to always be true
+            ml_aa_distances_flag = False
+        elif "ML distances of aa seqs." in line:
+            ml_aa_distances_flag = True
+            raw_aa_distances_flag = False
+        # Parse AA distances (raw or ML), in a lower diagonal matrix
+        matrix_row_res = matrix_row_re.match(line)
+        if matrix_row_res and (raw_aa_distances_flag or ml_aa_distances_flag):
+            seq_name = matrix_row_res.group(1).strip()
+            if seq_name not in sequences:
+                sequences.append(seq_name)
+            if raw_aa_distances_flag:
+                if distances.get("raw") is None:
+                    distances["raw"] = {}
+                distances["raw"][seq_name] = {}
+                for i in range(0, len(line_floats)):
+                    distances["raw"][seq_name][sequences[i]] = line_floats[i]
+                    distances["raw"][sequences[i]][seq_name] = line_floats[i]
+            else:
+                if distances.get("ml") is None:
+                    distances["ml"] = {}
+                distances["ml"][seq_name] = {}
+                for i in range(0, len(line_floats)):
+                    distances["ml"][seq_name][sequences[i]] = line_floats[i]
+                    distances["ml"][sequences[i]][seq_name] = line_floats[i]
+    if distances:
+        results["distances"] = distances
+    return results
diff --git a/code/lib/Bio/Phylo/PAML/_parse_yn00.py b/code/lib/Bio/Phylo/PAML/_parse_yn00.py
new file mode 100644
index 0000000..fe9628d
--- /dev/null
+++ b/code/lib/Bio/Phylo/PAML/_parse_yn00.py
@@ -0,0 +1,154 @@
+# Copyright (C) 2011, 2019 by Brandon Invergo (b.invergo@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Methods for parsing yn00 results files."""
+
+import re
+
+
+def parse_ng86(lines, results):
+    """Parse the Nei & Gojobori (1986) section of the results.
+
+    Nei_Gojobori results are organized in a lower
+    triangular matrix, with the sequence names labeling
+    the rows and statistics in the format:
+    w (dN dS) per column
+    Example row (2 columns):
+    0.0000 (0.0000 0.0207) 0.0000 (0.0000 0.0421)
+    """
+    sequences = []
+    for line in lines:
+        # The purpose of this complex regex is to parse the NG86 section for
+        # valid lines of data that are mixed in with citations and comments.
+        # The data lines begin with a taxon name, followed by zero or more
+        # fields containing numeric values, sometimes enclosed in parens.
+        # Taxon names are from 1-30 characters and are usually separated from
+        # the numeric portion of the line by space(s).  Long taxon names to are
+        # truncated to 30 characters, and may run into the data fields without
+        # any separator., e.g. some_long_name-1.0000
+        # This regex is an attempt to cover more pathological cases while also
+        # parsing all existing versions of yn00 output with shorter names.
+
+        matrix_row_res = re.match(
+            r"^([^\s]+?)(\s+-?\d+\.\d+.*$|\s*$|-1.0000\s*\(.*$)", line
+        )
+        if matrix_row_res is not None:
+            # Find all floating point numbers in this line, accounting
+            # for the fact that the sequence IDs might have bits that
+            # look like floating point values.
+            line_floats_res = re.findall(r"-*\d+\.\d+", matrix_row_res.group(2))
+            line_floats = [float(val) for val in line_floats_res]
+
+            seq_name = matrix_row_res.group(1).strip()
+            sequences.append(seq_name)
+            results[seq_name] = {}
+            for i in range(0, len(line_floats), 3):
+                NG86 = {}
+                NG86["omega"] = line_floats[i]
+                NG86["dN"] = line_floats[i + 1]
+                NG86["dS"] = line_floats[i + 2]
+                results[seq_name][sequences[i // 3]] = {"NG86": NG86}
+                results[sequences[i // 3]][seq_name] = {"NG86": NG86}
+    return (results, sequences)
+
+
+def parse_yn00(lines, results, sequences):
+    """Parse the Yang & Nielsen (2000) part of the results.
+
+    Yang & Nielsen results are organized in a table with
+    each row comprising one pairwise species comparison.
+    Rows are labeled by sequence number rather than by
+    sequence name.
+    """
+    # Example (header row and first table row):
+    # seq. seq.     S       N        t   kappa   omega     dN +- SE    dS +- SE
+    # 2    1    67.3   154.7   0.0136  3.6564  0.0000 -0.0000 +- 0.0000  0.0150
+    # +- 0.0151
+    for line in lines:
+        # Find all floating point numbers in this line
+        line_floats_res = re.findall(r"-*\d+\.\d+", line)
+        line_floats = [float(val) for val in line_floats_res]
+        row_res = re.match(r"\s+(\d+)\s+(\d+)", line)
+        if row_res is not None:
+            seq1 = int(row_res.group(1))
+            seq2 = int(row_res.group(2))
+            seq_name1 = sequences[seq1 - 1]
+            seq_name2 = sequences[seq2 - 1]
+            YN00 = {}
+            YN00["S"] = line_floats[0]
+            YN00["N"] = line_floats[1]
+            YN00["t"] = line_floats[2]
+            YN00["kappa"] = line_floats[3]
+            YN00["omega"] = line_floats[4]
+            YN00["dN"] = line_floats[5]
+            YN00["dN SE"] = line_floats[6]
+            YN00["dS"] = line_floats[7]
+            YN00["dS SE"] = line_floats[8]
+            results[seq_name1][seq_name2]["YN00"] = YN00
+            results[seq_name2][seq_name1]["YN00"] = YN00
+            seq_name1 = None
+            seq_name2 = None
+    return results
+
+
+def parse_others(lines, results, sequences):
+    """Parse the results from the other methods.
+
+    The remaining methods are grouped together. Statistics
+    for all three are listed for each of the pairwise
+    species comparisons, with each method's results on its
+    own line.
+    The stats in this section must be handled differently
+    due to the possible presence of NaN values, which won't
+    get caught by my typical "line_floats" method used above.
+    """
+    # Example:
+    # 2 (Pan_troglo) vs. 1 (Homo_sapie)
+
+    # L(i):      143.0      51.0      28.0  sum=    222.0
+    # Ns(i):    0.0000    1.0000    0.0000  sum=   1.0000
+    # Nv(i):    0.0000    0.0000    0.0000  sum=   0.0000
+    # A(i):     0.0000    0.0200    0.0000
+    # B(i):    -0.0000   -0.0000   -0.0000
+    # LWL85:  dS =  0.0227 dN =  0.0000 w = 0.0000 S =   45.0 N =  177.0
+    # LWL85m: dS =    -nan dN =    -nan w =   -nan S =   -nan N =   -nan (rho = -nan)
+    # LPB93:  dS =  0.0129 dN =  0.0000 w = 0.0000
+    seq_name1 = None
+    seq_name2 = None
+    for line in lines:
+        comp_res = re.match(r"\d+ \((.+)\) vs. \d+ \((.+)\)", line)
+        if comp_res is not None:
+            seq_name1 = comp_res.group(1)
+            seq_name2 = comp_res.group(2)
+        elif seq_name1 is not None and seq_name2 is not None:
+            if "dS =" in line:
+                stats = {}
+                line_stats = line.split(":")[1].strip()
+                # Find all of the xx = ###### values in a row
+                # ie dS =  0.0227
+                # For dN and dS, the values have 8 characters from the equals
+                # sign, while the rest have 7 characters. On Windows,
+                # NaNs take on weird values like -1.#IND, which might fill the
+                # entire fixed column width.
+                res_matches = re.findall(r"[dSNwrho]{1,3} =.{7,8}?", line_stats)
+                for stat_pair in res_matches:
+                    stat = stat_pair.split("=")[0].strip()
+                    value = stat_pair.split("=")[1].strip()
+                    try:
+                        stats[stat] = float(value)
+                    except ValueError:
+                        stats[stat] = None
+                if "LWL85:" in line:
+                    results[seq_name1][seq_name2]["LWL85"] = stats
+                    results[seq_name2][seq_name1]["LWL85"] = stats
+                elif "LWL85m" in line:
+                    results[seq_name1][seq_name2]["LWL85m"] = stats
+                    results[seq_name2][seq_name1]["LWL85m"] = stats
+                elif "LPB93" in line:
+                    results[seq_name1][seq_name2]["LPB93"] = stats
+                    results[seq_name2][seq_name1]["LPB93"] = stats
+    return results
diff --git a/code/lib/Bio/Phylo/PAML/baseml.py b/code/lib/Bio/Phylo/PAML/baseml.py
new file mode 100644
index 0000000..a256257
--- /dev/null
+++ b/code/lib/Bio/Phylo/PAML/baseml.py
@@ -0,0 +1,202 @@
+# Copyright (C) 2011, 2018 by Brandon Invergo (b.invergo@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes for the support of baseml.
+
+Maximum likelihood analysis of nucleotide sequences.
+"""
+
+import os
+import os.path
+from ._paml import Paml
+from . import _parse_baseml
+
+
+class BasemlError(EnvironmentError):
+    """BASEML failed. Run with verbose=True to view BASEML's error message."""
+
+
+class Baseml(Paml):
+    """An interface to BASEML, part of the PAML package."""
+
+    def __init__(self, alignment=None, tree=None, working_dir=None, out_file=None):
+        """Initialize the Baseml instance.
+
+        The user may optionally pass in strings specifying the locations
+        of the input alignment and tree files, the working directory and
+        the final output file.
+        """
+        Paml.__init__(self, alignment, working_dir, out_file)
+        if tree is not None:
+            if not os.path.exists(tree):
+                raise FileNotFoundError("The specified tree file does not exist.")
+        self.tree = tree
+        self.ctl_file = "baseml.ctl"
+        self._options = {
+            "noisy": None,
+            "verbose": None,
+            "runmode": None,
+            "model": None,
+            "model_options": None,
+            "Mgene": None,
+            "ndata": None,
+            "clock": None,
+            "fix_kappa": None,
+            "kappa": None,
+            "fix_alpha": None,
+            "alpha": None,
+            "Malpha": None,
+            "ncatG": None,
+            "fix_rho": None,
+            "rho": None,
+            "nparK": None,
+            "nhomo": None,
+            "getSE": None,
+            "RateAncestor": None,
+            "Small_Diff": None,
+            "cleandata": None,
+            "icode": None,
+            "fix_blength": None,
+            "method": None,
+        }
+
+    def write_ctl_file(self):
+        """Dynamically build a BASEML control file from the options.
+
+        The control file is written to the location specified by the
+        ctl_file property of the baseml class.
+        """
+        # Make sure all paths are relative to the working directory
+        self._set_rel_paths()
+        with open(self.ctl_file, "w") as ctl_handle:
+            ctl_handle.write("seqfile = %s\n" % self._rel_alignment)
+            ctl_handle.write("outfile = %s\n" % self._rel_out_file)
+            ctl_handle.write("treefile = %s\n" % self._rel_tree)
+            for option in self._options.items():
+                if option[1] is None:
+                    # If an option has a value of None, there's no need
+                    # to write it in the control file; it's normally just
+                    # commented out.
+                    continue
+                if option[0] == "model_options":
+                    continue
+                # If "model" is 9 or 10, it may be followed in the cotnrol
+                # file by further options such as
+                # [1 (TC CT AG GA)]
+                # or
+                # [5 (AC CA) (AG GA) (AT TA) (CG GC) (CT TC)]
+                # which are to be stored in "model_options" as a string.
+                if option[0] == "model" and option[1] in [9, 10]:
+                    if self._options["model_options"] is not None:
+                        ctl_handle.write(
+                            "model = %s  %s"
+                            % (option[1], self._options["model_options"])
+                        )
+                        continue
+                ctl_handle.write("%s = %s\n" % (option[0], option[1]))
+
+    def read_ctl_file(self, ctl_file):
+        """Parse a control file and load the options into the Baseml instance."""
+        temp_options = {}
+        if not os.path.isfile(ctl_file):
+            raise FileNotFoundError("File not found: %r" % ctl_file)
+        else:
+            with open(ctl_file) as ctl_handle:
+                for line in ctl_handle:
+                    line = line.strip()
+                    uncommented = line.split("*", 1)[0]
+                    if uncommented != "":
+                        if "=" not in uncommented:
+                            raise AttributeError(
+                                "Malformed line in control file:\n%r" % line
+                            )
+                        (option, value) = uncommented.split("=")
+                        option = option.strip()
+                        value = value.strip()
+                        if option == "seqfile":
+                            self.alignment = value
+                        elif option == "treefile":
+                            self.tree = value
+                        elif option == "outfile":
+                            self.out_file = value
+                        elif option not in self._options:
+                            raise KeyError("Invalid option: %s" % option)
+                        elif option == "model":
+                            if len(value) <= 2 and value.isdigit():
+                                temp_options["model"] = int(value)
+                                temp_options["model_options"] = None
+                            else:
+                                model_num = value.partition(" ")[0]
+                                model_opt = value.partition(" ")[2].strip()
+                                temp_options["model"] = int(model_num)
+                                temp_options["model_options"] = model_opt
+                        else:
+                            if "." in value or "e-" in value:
+                                try:
+                                    converted_value = float(value)
+                                except ValueError:
+                                    converted_value = value
+                            else:
+                                try:
+                                    converted_value = int(value)
+                                except ValueError:
+                                    converted_value = value
+                            temp_options[option] = converted_value
+        for option in self._options:
+            if option in temp_options:
+                self._options[option] = temp_options[option]
+            else:
+                self._options[option] = None
+
+    def _set_rel_paths(self):
+        """Make file/directory paths relative to the PWD (PRIVATE).
+
+        BASEML requires that all paths specified in the control file be
+        relative to the directory from which it is called rather than
+        absolute paths.
+        """
+        Paml._set_rel_paths(self)
+        if self.tree is not None:
+            self._rel_tree = os.path.relpath(self.tree, self.working_dir)
+
+    def run(self, ctl_file=None, verbose=False, command="baseml", parse=True):
+        """Run baseml using the current configuration.
+
+        Check that the tree attribute is specified and exists, and then
+        run baseml. If parse is True then read and return the result,
+        otherwise return none.
+
+        The arguments may be passed as either absolute or relative paths,
+        despite the fact that BASEML requires relative paths.
+        """
+        if self.tree is None:
+            raise ValueError("Tree file not specified.")
+        if not os.path.exists(self.tree):
+            raise FileNotFoundError("The specified tree file does not exist.")
+        Paml.run(self, ctl_file, verbose, command)
+        if parse:
+            return read(self.out_file)
+        return None
+
+
+def read(results_file):
+    """Parse a BASEML results file."""
+    results = {}
+    if not os.path.exists(results_file):
+        raise FileNotFoundError("Results file does not exist.")
+    with open(results_file) as handle:
+        lines = handle.readlines()
+    if not lines:
+        raise ValueError(
+            "Empty results file.  Did BASEML exit successfully?  "
+            "Run 'Baseml.run()' with 'verbose=True'."
+        )
+    (results, num_params) = _parse_baseml.parse_basics(lines, results)
+    results = _parse_baseml.parse_parameters(lines, results, num_params)
+    if results.get("version") is None:
+        raise ValueError("Invalid results file")
+    return results
diff --git a/code/lib/Bio/Phylo/PAML/chi2.py b/code/lib/Bio/Phylo/PAML/chi2.py
new file mode 100644
index 0000000..515ba18
--- /dev/null
+++ b/code/lib/Bio/Phylo/PAML/chi2.py
@@ -0,0 +1,136 @@
+# Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# This code is adapted (with permission) from the C source code of chi2.c,
+# written by Ziheng Yang and included in the PAML software package:
+# http://abacus.gene.ucl.ac.uk/software/paml.html
+
+"""Methods to calculate p-values from a Chi-squared cumulative distribution function.
+
+for likelihood ratio tests.
+"""
+
+from math import log, exp
+
+
+def cdf_chi2(df, stat):
+    """Compute p-value, from distribution function and test statistics."""
+    if df < 1:
+        raise ValueError("df must be at least 1")
+    if stat < 0:
+        raise ValueError("The test statistic must be positive")
+    x = 0.5 * stat
+    alpha = df / 2.0
+    prob = 1 - _incomplete_gamma(x, alpha)
+    return prob
+
+
+def _ln_gamma_function(alpha):
+    """Compute the log of the gamma function for a given alpha (PRIVATE).
+
+    Comments from Z. Yang:
+    Returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.
+    Stirling's formula is used for the central polynomial part of the procedure.
+    Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
+    Communications of the Association for Computing Machinery, 9:684
+    """
+    if alpha <= 0:
+        raise ValueError
+    x = alpha
+    f = 0
+    if x < 7:
+        f = 1
+        z = x
+        while z < 7:
+            f *= z
+            z += 1
+        x = z
+        f = -log(f)
+    z = 1 / (x * x)
+    return (
+        f
+        + (x - 0.5) * log(x)
+        - x
+        + 0.918938533204673
+        + (
+            ((-0.000595238095238 * z + 0.000793650793651) * z - 0.002777777777778) * z
+            + 0.083333333333333
+        )
+        / x
+    )
+
+
+def _incomplete_gamma(x, alpha):
+    """Compute an incomplete gamma ratio (PRIVATE).
+
+    Comments from Z. Yang::
+
+        Returns the incomplete gamma ratio I(x,alpha) where x is the upper
+               limit of the integration and alpha is the shape parameter.
+        returns (-1) if in error
+        ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant.
+        (1) series expansion     if alpha>x or x<=1
+        (2) continued fraction   otherwise
+        RATNEST FORTRAN by
+        Bhattacharjee GP (1970) The incomplete gamma integral.  Applied Statistics,
+        19: 285-287 (AS32)
+
+    """
+    p = alpha
+    g = _ln_gamma_function(alpha)
+    accurate = 1e-8
+    overflow = 1e30
+    gin = 0
+    rn = 0
+    a = 0
+    b = 0
+    an = 0
+    dif = 0
+    term = 0
+    if x == 0:
+        return 0
+    if x < 0 or p <= 0:
+        return -1
+    factor = exp(p * log(x) - x - g)
+    if x > 1 and x >= p:
+        a = 1 - p
+        b = a + x + 1
+        term = 0
+        pn = [1, x, x + 1, x * b, None, None]
+        gin = pn[2] / pn[3]
+    else:
+        gin = 1
+        term = 1
+        rn = p
+        while term > accurate:
+            rn += 1
+            term *= x / rn
+            gin += term
+        gin *= factor / p
+        return gin
+    while True:
+        a += 1
+        b += 2
+        term += 1
+        an = a * term
+        for i in range(2):
+            pn[i + 4] = b * pn[i + 2] - an * pn[i]
+        if pn[5] != 0:
+            rn = pn[4] / pn[5]
+            dif = abs(gin - rn)
+            if dif > accurate:
+                gin = rn
+            elif dif <= accurate * rn:
+                break
+        for i in range(4):
+            pn[i] = pn[i + 2]
+        if abs(pn[4]) < overflow:
+            continue
+        for i in range(4):
+            pn[i] /= overflow
+    gin = 1 - factor * gin
+    return gin
diff --git a/code/lib/Bio/Phylo/PAML/codeml.py b/code/lib/Bio/Phylo/PAML/codeml.py
new file mode 100644
index 0000000..c06f4d2
--- /dev/null
+++ b/code/lib/Bio/Phylo/PAML/codeml.py
@@ -0,0 +1,214 @@
+# Copyright (C) 2011, 2018 by Brandon Invergo (b.invergo@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes for the support of CODEML.
+
+Maximum likelihood analysis using codon substitution models.
+"""
+
+
+import os.path
+from ._paml import Paml
+from . import _parse_codeml
+
+
+class CodemlError(EnvironmentError):
+    """CODEML failed. Run with verbose=True to view CODEML's error message."""
+
+
+class Codeml(Paml):
+    """An interface to CODEML, part of the PAML package."""
+
+    def __init__(self, alignment=None, tree=None, working_dir=None, out_file=None):
+        """Initialize the codeml instance.
+
+        The user may optionally pass in strings specifying the locations
+        of the input alignment and tree files, the working directory and
+        the final output file. Other options found in the CODEML control
+        have typical settings by default to run site class models 0, 1 and
+        2 on a nucleotide alignment.
+        """
+        Paml.__init__(self, alignment, working_dir, out_file)
+        if tree is not None:
+            if not os.path.exists(tree):
+                raise FileNotFoundError("The specified tree file does not exist.")
+        self.tree = tree
+        self.ctl_file = "codeml.ctl"
+        self._options = {
+            "noisy": None,
+            "verbose": None,
+            "runmode": None,
+            "seqtype": None,
+            "CodonFreq": None,
+            "ndata": None,
+            "clock": None,
+            "aaDist": None,
+            "aaRatefile": None,
+            "model": None,
+            "NSsites": None,
+            "icode": None,
+            "Mgene": None,
+            "fix_kappa": None,
+            "kappa": None,
+            "fix_omega": None,
+            "omega": None,
+            "fix_alpha": None,
+            "alpha": None,
+            "Malpha": None,
+            "ncatG": None,
+            "getSE": None,
+            "RateAncestor": None,
+            "Small_Diff": None,
+            "cleandata": None,
+            "fix_blength": None,
+            "method": None,
+            "rho": None,
+            "fix_rho": None,
+        }
+
+    def write_ctl_file(self):
+        """Dynamically build a CODEML control file from the options.
+
+        The control file is written to the location specified by the
+        ctl_file property of the codeml class.
+        """
+        # Make sure all paths are relative to the working directory
+        self._set_rel_paths()
+        with open(self.ctl_file, "w") as ctl_handle:
+            ctl_handle.write("seqfile = %s\n" % self._rel_alignment)
+            ctl_handle.write("outfile = %s\n" % self._rel_out_file)
+            ctl_handle.write("treefile = %s\n" % self._rel_tree)
+            for option in self._options.items():
+                if option[1] is None:
+                    # If an option has a value of None, there's no need
+                    # to write it in the control file; it's normally just
+                    # commented out.
+                    continue
+                if option[0] == "NSsites":
+                    # NSsites is stored in Python as a list but in the
+                    # control file it is specified as a series of numbers
+                    # separated by spaces.
+                    NSsites = " ".join(str(site) for site in option[1])
+                    ctl_handle.write("%s = %s\n" % (option[0], NSsites))
+                else:
+                    ctl_handle.write("%s = %s\n" % (option[0], option[1]))
+
+    def read_ctl_file(self, ctl_file):
+        """Parse a control file and load the options into the Codeml instance."""
+        temp_options = {}
+        if not os.path.isfile(ctl_file):
+            raise FileNotFoundError("File not found: %r" % ctl_file)
+        else:
+            with open(ctl_file) as ctl_handle:
+                for line in ctl_handle:
+                    line = line.strip()
+                    uncommented = line.split("*", 1)[0]
+                    if uncommented != "":
+                        if "=" not in uncommented:
+                            raise AttributeError(
+                                "Malformed line in control file:\n%r" % line
+                            )
+                        (option, value) = uncommented.split("=", 1)
+                        option = option.strip()
+                        value = value.strip()
+                        if option == "seqfile":
+                            self.alignment = value
+                        elif option == "treefile":
+                            self.tree = value
+                        elif option == "outfile":
+                            self.out_file = value
+                        elif option == "NSsites":
+                            site_classes = value.split(" ")
+                            for n in range(len(site_classes)):
+                                try:
+                                    site_classes[n] = int(site_classes[n])
+                                except ValueError:
+                                    raise TypeError(
+                                        "Invalid site class: %s" % site_classes[n]
+                                    ) from None
+                            temp_options["NSsites"] = site_classes
+                        elif option not in self._options:
+                            raise KeyError("Invalid option: %s" % option)
+                        else:
+                            if "." in value:
+                                try:
+                                    converted_value = float(value)
+                                except ValueError:
+                                    converted_value = value
+                            else:
+                                try:
+                                    converted_value = int(value)
+                                except ValueError:
+                                    converted_value = value
+                            temp_options[option] = converted_value
+        for option in self._options:
+            if option in temp_options:
+                self._options[option] = temp_options[option]
+            else:
+                self._options[option] = None
+
+    def print_options(self):
+        """Print out all of the options and their current settings."""
+        for option in self._options.items():
+            if option[0] == "NSsites" and option[1] is not None:
+                # NSsites is stored in Python as a list but in the
+                # control file it is specified as a series of numbers
+                # separated by spaces.
+                NSsites = " ".join(str(site) for site in option[1])
+                print("%s = %s" % (option[0], NSsites))
+            else:
+                print("%s = %s" % (option[0], option[1]))
+
+    def _set_rel_paths(self):
+        """Make all file/directory paths relative to the PWD (PRIVATE).
+
+        CODEML requires that all paths specified in the control file be
+        relative to the directory from which it is called rather than
+        absolute paths.
+        """
+        Paml._set_rel_paths(self)
+        if self.tree is not None:
+            self._rel_tree = os.path.relpath(self.tree, self.working_dir)
+
+    def run(self, ctl_file=None, verbose=False, command="codeml", parse=True):
+        """Run codeml using the current configuration.
+
+        If parse is True then read and return the results, otherwise
+        return None.
+
+        The arguments may be passed as either absolute or relative
+        paths, despite the fact that CODEML requires relative paths.
+        """
+        if self.tree is None:
+            raise ValueError("Tree file not specified.")
+        if not os.path.exists(self.tree):
+            raise FileNotFoundError("The specified tree file does not exist.")
+        Paml.run(self, ctl_file, verbose, command)
+        if parse:
+            return read(self.out_file)
+        return None
+
+
+def read(results_file):
+    """Parse a CODEML results file."""
+    results = {}
+    if not os.path.exists(results_file):
+        raise FileNotFoundError("Results file does not exist.")
+    with open(results_file) as handle:
+        lines = handle.readlines()
+    if not lines:
+        raise ValueError(
+            "Empty results file.  Did CODEML exit successfully?  "
+            "Run 'Codeml.run()' with 'verbose=True'."
+        )
+    (results, multi_models, multi_genes) = _parse_codeml.parse_basics(lines, results)
+    results = _parse_codeml.parse_nssites(lines, results, multi_models, multi_genes)
+    results = _parse_codeml.parse_pairwise(lines, results)
+    results = _parse_codeml.parse_distances(lines, results)
+    if not results:
+        raise ValueError("Invalid results file")
+    return results
diff --git a/code/lib/Bio/Phylo/PAML/yn00.py b/code/lib/Bio/Phylo/PAML/yn00.py
new file mode 100644
index 0000000..923ab5e
--- /dev/null
+++ b/code/lib/Bio/Phylo/PAML/yn00.py
@@ -0,0 +1,145 @@
+# Copyright (C) 2011, 2018 by Brandon Invergo (b.invergo@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes for the support of yn00.
+
+Yang and Nielsen 2000,  estimating synonymous and nonsynonymous substitution
+rates in pairwise comparison of protein-coding DNA sequences.
+"""
+
+import os.path
+from ._paml import Paml
+from . import _parse_yn00
+
+
+class Yn00Error(EnvironmentError):
+    """yn00 failed. Run with verbose=True to view yn00's error message."""
+
+
+class Yn00(Paml):
+    """An interface to yn00, part of the PAML package."""
+
+    def __init__(self, alignment=None, working_dir=None, out_file=None):
+        """Initialize the Yn00 instance.
+
+        The user may optionally pass in strings specifying the locations
+        of the input alignment, the working directory and
+        the final output file.
+        """
+        Paml.__init__(self, alignment, working_dir, out_file)
+        self.ctl_file = "yn00.ctl"
+        self._options = {
+            "verbose": None,
+            "icode": None,
+            "weighting": None,
+            "commonf3x4": None,
+            "ndata": None,
+        }
+
+    def write_ctl_file(self):
+        """Dynamically build a yn00 control file from the options.
+
+        The control file is written to the location specified by the
+        ctl_file property of the yn00 class.
+        """
+        # Make sure all paths are relative to the working directory
+        self._set_rel_paths()
+        with open(self.ctl_file, "w") as ctl_handle:
+            ctl_handle.write("seqfile = %s\n" % self._rel_alignment)
+            ctl_handle.write("outfile = %s\n" % self._rel_out_file)
+            for option in self._options.items():
+                if option[1] is None:
+                    # If an option has a value of None, there's no need
+                    # to write it in the control file; it's normally just
+                    # commented out.
+                    continue
+                ctl_handle.write("%s = %s\n" % (option[0], option[1]))
+
+    def read_ctl_file(self, ctl_file):
+        """Parse a control file and load the options into the yn00 instance."""
+        temp_options = {}
+        if not os.path.isfile(ctl_file):
+            raise FileNotFoundError("File not found: %r" % ctl_file)
+        else:
+            with open(ctl_file) as ctl_handle:
+                for line in ctl_handle:
+                    line = line.strip()
+                    uncommented = line.split("*", 1)[0]
+                    if uncommented != "":
+                        if "=" not in uncommented:
+                            raise AttributeError(
+                                "Malformed line in control file:\n%r" % line
+                            )
+                        (option, value) = uncommented.split("=")
+                        option = option.strip()
+                        value = value.strip()
+                        if option == "seqfile":
+                            self.alignment = value
+                        elif option == "outfile":
+                            self.out_file = value
+                        elif option not in self._options:
+                            raise KeyError("Invalid option: %s" % option)
+                        else:
+                            if "." in value or "e-" in value:
+                                try:
+                                    converted_value = float(value)
+                                except ValueError:
+                                    converted_value = value
+                            else:
+                                try:
+                                    converted_value = int(value)
+                                except ValueError:
+                                    converted_value = value
+                            temp_options[option] = converted_value
+        for option in self._options:
+            if option in temp_options:
+                self._options[option] = temp_options[option]
+            else:
+                self._options[option] = None
+
+    def run(self, ctl_file=None, verbose=False, command="yn00", parse=True):
+        """Run yn00 using the current configuration.
+
+        If parse is True then read and return the result, otherwise
+        return None.
+        """
+        Paml.run(self, ctl_file, verbose, command)
+        if parse:
+            return read(self.out_file)
+        return None
+
+
+def read(results_file):
+    """Parse a yn00 results file."""
+    results = {}
+    if not os.path.exists(results_file):
+        raise FileNotFoundError("Results file does not exist.")
+    with open(results_file) as handle:
+        lines = handle.readlines()
+    if not lines:
+        raise ValueError(
+            "Empty results file.  Did YN00 exit successfully?  "
+            "Run 'Yn00.run()' with 'verbose=True'."
+        )
+    for line_num, line in enumerate(lines):
+        if "(A) Nei-Gojobori (1986) method" in line:
+            ng86_start = line_num + 1
+        elif "(B) Yang & Nielsen (2000) method" in line:
+            (results, sequences) = _parse_yn00.parse_ng86(
+                lines[ng86_start:line_num], results
+            )
+            yn00_start = line_num + 1
+        elif "(C) LWL85, LPB93 & LWLm methods" in line:
+            results = _parse_yn00.parse_yn00(
+                lines[yn00_start:line_num], results, sequences
+            )
+            results = _parse_yn00.parse_others(
+                lines[line_num + 1 :], results, sequences
+            )
+    if not results:
+        raise ValueError("Invalid results file.")
+    return results
diff --git a/code/lib/Bio/Phylo/PhyloXML.py b/code/lib/Bio/Phylo/PhyloXML.py
new file mode 100644
index 0000000..bdabdfe
--- /dev/null
+++ b/code/lib/Bio/Phylo/PhyloXML.py
@@ -0,0 +1,1587 @@
+# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes corresponding to phyloXML elements.
+
+See Also
+--------
+Official specification:
+   http://phyloxml.org/
+Journal article:
+    Han and Zmasek (2009), https://doi.org/10.1186/1471-2105-10-356
+
+"""
+
+import re
+import warnings
+
+from Bio.Align import MultipleSeqAlignment
+from Bio.Seq import Seq
+from Bio.SeqFeature import SeqFeature, FeatureLocation
+from Bio.SeqRecord import SeqRecord
+from Bio import BiopythonWarning
+
+from Bio.Phylo import BaseTree
+
+
+class PhyloXMLWarning(BiopythonWarning):
+    """Warning for non-compliance with the phyloXML specification."""
+
+    pass
+
+
+def _check_str(text, testfunc):
+    """Check a string using testfunc, and warn if there's no match (PRIVATE)."""
+    if text is not None and not testfunc(text):
+        warnings.warn(
+            "String %s doesn't match the given regexp" % text,
+            PhyloXMLWarning,
+            stacklevel=2,
+        )
+
+
+# Core elements
+
+
+class PhyloElement(BaseTree.TreeElement):
+    """Base class for all PhyloXML objects."""
+
+
+class Phyloxml(PhyloElement):
+    """Root node of the PhyloXML document.
+
+    Contains an arbitrary number of Phylogeny elements, possibly followed by
+    elements from other namespaces.
+
+    :Parameters:
+        attributes : dict
+            (XML namespace definitions)
+        phylogenies : list
+            The phylogenetic trees
+        other : list
+            Arbitrary non-phyloXML elements, if any
+
+    """
+
+    def __init__(self, attributes, phylogenies=None, other=None):
+        """Initialize parameters for PhyloXML object."""
+        self.attributes = {
+            # standard
+            "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
+            "xmlns": "http://www.phyloxml.org",
+            "xsi:schemaLocation": "http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd",
+        }
+        if attributes:
+            self.attributes.update(attributes)
+        self.phylogenies = phylogenies or []
+        self.other = other or []
+
+    def __getitem__(self, index):
+        """Get a phylogeny by index or name."""
+        if isinstance(index, (int, slice)):
+            return self.phylogenies[index]
+        if not isinstance(index, str):
+            raise KeyError("can't use %s as an index" % type(index))
+        for tree in self.phylogenies:
+            if tree.name == index:
+                return tree
+        else:
+            raise KeyError("no phylogeny found with name %r" % index)
+
+    def __iter__(self):
+        """Iterate through the phylogenetic trees in this object."""
+        return iter(self.phylogenies)
+
+    def __len__(self):
+        """Return the number of phylogenetic trees in this object."""
+        return len(self.phylogenies)
+
+    def __str__(self):
+        """Return name of phylogenies in the object."""
+        return "%s([%s])" % (
+            self.__class__.__name__,
+            ",\n".join(map(str, self.phylogenies)),
+        )
+
+
+class Other(PhyloElement):
+    """Container for non-phyloXML elements in the tree.
+
+    Usually, an Other object will have either a 'value' or a non-empty list
+    of 'children', but not both. This is not enforced here, though.
+
+    :Parameters:
+        tag : string
+            local tag for the XML node
+        namespace : string
+            XML namespace for the node -- should not be the default phyloXML
+            namespace.
+        attributes : dict of strings
+            attributes on the XML node
+        value : string
+            text contained directly within this XML node
+        children : list
+            child nodes, if any (also ``Other`` instances)
+
+    """
+
+    def __init__(self, tag, namespace=None, attributes=None, value=None, children=None):
+        """Initialize values for non-phyloXML elements."""
+        self.tag = tag
+        self.namespace = namespace
+        self.attributes = attributes or {}
+        self.value = value
+        self.children = children or []
+
+    def __iter__(self):
+        """Iterate through the children of this object (if any)."""
+        return iter(self.children)
+
+
+class Phylogeny(PhyloElement, BaseTree.Tree):
+    """A phylogenetic tree.
+
+    :Parameters:
+        root : Clade
+            the root node/clade of this tree
+        rooted : bool
+            True if this tree is rooted
+        rerootable : bool
+            True if this tree is rerootable
+        branch_length_unit : string
+            unit for branch_length values on clades
+        name : string
+            identifier for this tree, not required to be unique
+        id : Id
+            unique identifier for this tree
+        description : string
+            plain-text description
+        date : Date
+            date for the root node of this tree
+        confidences : list
+            Confidence objects for this tree
+        clade_relations : list
+            CladeRelation objects
+        sequence_relations : list
+            SequenceRelation objects
+        properties : list
+            Property objects
+        other : list
+            non-phyloXML elements (type ``Other``)
+
+    """
+
+    def __init__(
+        self,
+        root=None,
+        rooted=True,
+        rerootable=None,
+        branch_length_unit=None,
+        type=None,
+        # Child nodes
+        name=None,
+        id=None,
+        description=None,
+        date=None,
+        # Collections
+        confidences=None,
+        clade_relations=None,
+        sequence_relations=None,
+        properties=None,
+        other=None,
+    ):
+        """Initialize values for phylogenetic tree object."""
+        assert isinstance(rooted, bool)
+        self.root = root
+        self.rooted = rooted
+        self.rerootable = rerootable
+        self.branch_length_unit = branch_length_unit
+        self.type = type
+        self.name = name
+        self.id = id
+        self.description = description
+        self.date = date
+        self.confidences = confidences or []
+        self.clade_relations = clade_relations or []
+        self.sequence_relations = sequence_relations or []
+        self.properties = properties or []
+        self.other = other or []
+
+    @classmethod
+    def from_tree(cls, tree, **kwargs):
+        """Create a new Phylogeny given a Tree (from Newick/Nexus or BaseTree).
+
+        Keyword arguments are the usual ``Phylogeny`` constructor parameters.
+        """
+        phy = cls(
+            root=Clade.from_clade(tree.root),
+            rooted=tree.rooted,
+            name=tree.name,
+            id=(tree.id is not None) and Id(str(tree.id)) or None,
+        )
+        phy.__dict__.update(kwargs)
+        return phy
+
+    @classmethod
+    def from_clade(cls, clade, **kwargs):
+        """Create a new Phylogeny given a Newick or BaseTree Clade object.
+
+        Keyword arguments are the usual ``PhyloXML.Clade`` constructor parameters.
+        """
+        return Clade.from_clade(clade).to_phylogeny(**kwargs)
+
+    def as_phyloxml(self):
+        """Return this tree, a PhyloXML-compatible Phylogeny object.
+
+        Overrides the ``BaseTree`` method.
+        """
+        return self
+
+    def to_phyloxml_container(self, **kwargs):
+        """Create a new Phyloxml object containing just this phylogeny."""
+        return Phyloxml(kwargs, phylogenies=[self])
+
+    def to_alignment(self):
+        """Construct an alignment from the aligned sequences in this tree."""
+
+        def is_aligned_seq(elem):
+            if isinstance(elem, Sequence) and elem.mol_seq.is_aligned:
+                return True
+            return False
+
+        seqs = self._filter_search(is_aligned_seq, "preorder", True)
+        records = (seq.to_seqrecord() for seq in seqs)
+        return MultipleSeqAlignment(records)
+
+    # Singular property for plural attribute
+    def _get_confidence(self):
+        """Equivalent to self.confidences[0] if there is only 1 value (PRIVATE).
+
+        See Also: ``Clade.confidence``, ``Clade.taxonomy``
+
+        """
+        if len(self.confidences) == 0:
+            return None
+        if len(self.confidences) > 1:
+            raise AttributeError(
+                "more than 1 confidence value available; use Phylogeny.confidences"
+            )
+        return self.confidences[0]
+
+    def _set_confidence(self, value):
+        if value is None:
+            # Special case: mirror the behavior of _get_confidence
+            self.confidences = []
+            return
+        if isinstance(value, (float, int)):
+            value = Confidence(value)
+        elif not isinstance(value, Confidence):
+            raise ValueError("value must be a number or Confidence instance")
+        if len(self.confidences) == 0:
+            self.confidences.append(value)
+        elif len(self.confidences) == 1:
+            self.confidences[0] = value
+        else:
+            raise ValueError(
+                "multiple confidence values already exist; "
+                "use Phylogeny.confidences instead"
+            )
+
+    def _del_confidence(self):
+        self.confidences = []
+
+    confidence = property(_get_confidence, _set_confidence, _del_confidence)
+
+
+class Clade(PhyloElement, BaseTree.Clade):
+    """Describes a branch of the current phylogenetic tree.
+
+    Used recursively, describes the topology of a phylogenetic tree.
+
+    Both ``color`` and ``width`` elements should be interpreted by client code
+    as applying to the whole clade, including all descendents, unless
+    overwritten in-sub clades. This module doesn't automatically assign these
+    attributes to sub-clades to achieve this cascade -- and neither should you.
+
+    :Parameters:
+        branch_length
+            parent branch length of this clade
+        id_source
+            link other elements to a clade (on the xml-level)
+        name : string
+            short label for this clade
+        confidences : list of Confidence objects
+            used to indicate the support for a clade/parent branch.
+        width : float
+            branch width for this clade (including branch from parent)
+        color : BranchColor
+            color used for graphical display of this clade
+        node_id
+            unique identifier for the root node of this clade
+        taxonomies : list
+            Taxonomy objects
+        sequences : list
+            Sequence objects
+        events : Events
+            describe such events as gene-duplications at the root node/parent
+            branch of this clade
+        binary_characters : BinaryCharacters
+            binary characters
+        distributions : list of Distribution objects
+            distribution(s) of this clade
+        date : Date
+            a date for the root node of this clade
+        references : list
+            Reference objects
+        properties : list
+            Property objects
+        clades : list Clade objects
+            Sub-clades
+        other : list of Other objects
+            non-phyloXML objects
+
+    """
+
+    def __init__(
+        self,
+        # Attributes
+        branch_length=None,
+        id_source=None,
+        # Child nodes
+        name=None,
+        width=None,
+        color=None,
+        node_id=None,
+        events=None,
+        binary_characters=None,
+        date=None,
+        # Collections
+        confidences=None,
+        taxonomies=None,
+        sequences=None,
+        distributions=None,
+        references=None,
+        properties=None,
+        clades=None,
+        other=None,
+    ):
+        """Initialize value for the Clade object."""
+        self.branch_length = branch_length
+        self.id_source = id_source
+        self.name = name
+        self.width = width
+        self.color = color
+        self.node_id = node_id
+        self.events = events
+        self.binary_characters = binary_characters
+        self.date = date
+        self.confidences = confidences or []
+        self.taxonomies = taxonomies or []
+        self.sequences = sequences or []
+        self.distributions = distributions or []
+        self.references = references or []
+        self.properties = properties or []
+        self.clades = clades or []
+        self.other = other or []
+
+    @classmethod
+    def from_clade(cls, clade, **kwargs):
+        """Create a new PhyloXML Clade from a Newick or BaseTree Clade object.
+
+        Keyword arguments are the usual PhyloXML Clade constructor parameters.
+        """
+        new_clade = cls(branch_length=clade.branch_length, name=clade.name)
+        new_clade.clades = [cls.from_clade(c) for c in clade]
+        new_clade.confidence = clade.confidence
+        new_clade.width = clade.width
+        new_clade.color = (
+            BranchColor(clade.color.red, clade.color.green, clade.color.blue)
+            if clade.color
+            else None
+        )
+        new_clade.__dict__.update(kwargs)
+        return new_clade
+
+    def to_phylogeny(self, **kwargs):
+        """Create a new phylogeny containing just this clade."""
+        phy = Phylogeny(root=self, date=self.date)
+        phy.__dict__.update(kwargs)
+        return phy
+
+    # Shortcuts for list attributes that are usually only 1 item
+    # NB: Duplicated from Phylogeny class
+    def _get_confidence(self):
+        """Return confidence values (PRIVATE)."""
+        if len(self.confidences) == 0:
+            return None
+        if len(self.confidences) > 1:
+            raise AttributeError(
+                "more than 1 confidence value available; use Clade.confidences"
+            )
+        return self.confidences[0]
+
+    def _set_confidence(self, value):
+        """Set the confidence value (PRIVATE)."""
+        if value is None:
+            # Special case: mirror the behavior of _get_confidence
+            self.confidences = []
+            return
+        if isinstance(value, (float, int)):
+            value = Confidence(value)
+        elif not isinstance(value, Confidence):
+            raise ValueError("value must be a number or Confidence instance")
+        if len(self.confidences) == 0:
+            self.confidences.append(value)
+        elif len(self.confidences) == 1:
+            self.confidences[0] = value
+        else:
+            raise ValueError(
+                "multiple confidence values already exist; "
+                "use Phylogeny.confidences instead"
+            )
+
+    def _del_confidence(self):
+        """Delete confidences values (PRIVATE)."""
+        self.confidences = []
+
+    confidence = property(_get_confidence, _set_confidence, _del_confidence)
+
+    def _get_taxonomy(self):
+        """Get taxonomy list for the clade (PRIVATE)."""
+        if len(self.taxonomies) == 0:
+            return None
+        if len(self.taxonomies) > 1:
+            raise AttributeError(
+                "more than 1 taxonomy value available; use Clade.taxonomies"
+            )
+        return self.taxonomies[0]
+
+    def _set_taxonomy(self, value):
+        """Set a taxonomy for the clade (PRIVATE)."""
+        if not isinstance(value, Taxonomy):
+            raise ValueError("assigned value must be a Taxonomy instance")
+        if len(self.taxonomies) == 0:
+            self.taxonomies.append(value)
+        elif len(self.taxonomies) == 1:
+            self.taxonomies[0] = value
+        else:
+            raise ValueError(
+                "multiple taxonomy values already exist; "
+                "use Phylogeny.taxonomies instead"
+            )
+
+    taxonomy = property(_get_taxonomy, _set_taxonomy)
+
+
+# PhyloXML wrapper for a special BaseTree attribute
+
+
+class BranchColor(PhyloElement, BaseTree.BranchColor):
+    """Manage Tree branch's color."""
+
+    def __init__(self, *args, **kwargs):
+        """Initialize parameters for the BranchColor object."""
+        BaseTree.BranchColor.__init__(self, *args, **kwargs)
+
+
+# PhyloXML-specific complex types
+
+
+class Accession(PhyloElement):
+    """Captures the local part in a sequence identifier.
+
+    Example: In ``UniProtKB:P17304``, the Accession instance attribute ``value``
+    is 'P17304' and the ``source`` attribute is 'UniProtKB'.
+    """
+
+    def __init__(self, value, source):
+        """Initialize value for Accession object."""
+        self.value = value
+        self.source = source
+
+    def __str__(self):
+        """Show the class name and an identifying attribute."""
+        return "%s:%s" % (self.source, self.value)
+
+
+class Annotation(PhyloElement):
+    """The annotation of a molecular sequence.
+
+    It is recommended to annotate by using the optional 'ref' attribute.
+
+    :Parameters:
+        ref : string
+            reference string, e.g. 'GO:0008270',
+            'KEGG:Tetrachloroethene degradation', 'EC:1.1.1.1'
+        source : string
+            plain-text source for this annotation
+        evidence : str
+            describe evidence as free text (e.g. 'experimental')
+        desc : string
+            free text description
+        confidence : Confidence
+            state the type and value of support (type Confidence)
+        properties : list
+            typed and referenced annotations from external resources
+        uri : Uri
+            link
+
+    """
+
+    re_ref = re.compile(r"[a-zA-Z0-9_]+:[a-zA-Z0-9_\.\-\s]+")
+
+    def __init__(
+        self,
+        # Attributes
+        ref=None,
+        source=None,
+        evidence=None,
+        type=None,
+        # Child nodes
+        desc=None,
+        confidence=None,
+        uri=None,
+        # Collection
+        properties=None,
+    ):
+        """Initialize value for the Annotation object."""
+        _check_str(ref, self.re_ref.match)
+        self.ref = ref
+        self.source = source
+        self.evidence = evidence
+        self.type = type
+        self.desc = desc
+        self.confidence = confidence
+        self.uri = uri
+        self.properties = properties or []
+
+
+class BinaryCharacters(PhyloElement):
+    """Binary characters at the root of a clade.
+
+    The names and/or counts of binary characters present, gained, and lost
+    at the root of a clade.
+    """
+
+    def __init__(
+        self,
+        # Attributes
+        type=None,
+        gained_count=None,
+        lost_count=None,
+        present_count=None,
+        absent_count=None,
+        # Child nodes (flattened into collections)
+        gained=None,
+        lost=None,
+        present=None,
+        absent=None,
+    ):
+        """Initialize values for the BinaryCharacters object."""
+        self.type = type
+        self.gained_count = gained_count
+        self.lost_count = lost_count
+        self.present_count = present_count
+        self.absent_count = absent_count
+        self.gained = gained or []
+        self.lost = lost or []
+        self.present = present or []
+        self.absent = absent or []
+
+
+class CladeRelation(PhyloElement):
+    """Expresses a typed relationship between two clades.
+
+    For example, this could be used to describe multiple parents of a clade.
+
+    :type id_ref_0: str
+    :type id_ref_1: str
+    :type distance: str
+    :type type: str
+
+    :type confidence: Confidence
+    """
+
+    def __init__(self, type, id_ref_0, id_ref_1, distance=None, confidence=None):
+        """Initialize values for the CladeRelation object."""
+        self.distance = distance
+        self.type = type
+        self.id_ref_0 = id_ref_0
+        self.id_ref_1 = id_ref_1
+        self.confidence = confidence
+
+
+class Confidence(PhyloElement):
+    """A general purpose confidence element.
+
+    For example, this can be used to express the bootstrap support value of a
+    clade (in which case the ``type`` attribute is 'bootstrap').
+
+    :Parameters:
+        value : float
+            confidence value
+        type : string
+            label for the type of confidence, e.g. 'bootstrap'
+
+    """
+
+    def __init__(self, value, type="unknown"):
+        """Initialize values for the Confidence object."""
+        self.value = value
+        self.type = type
+
+    # Comparison operators
+
+    def __hash__(self):
+        """Return the hash value of the object.
+
+        Hash values are integers. They are used to quickly compare dictionary
+        keys during a dictionary lookup. Numeric values that compare equal have
+        the same hash value (even if they are of different types, as is the
+        case for 1 and 1.0).
+        """
+        return id(self)
+
+    def __eq__(self, other):
+        """Check for equality between Confidence objects."""
+        if isinstance(other, Confidence):
+            return self.value == other.value
+        return self.value == other
+
+    def __ne__(self, other):
+        """Check for inequality between two Confidence objects."""
+        if isinstance(other, Confidence):
+            return self.value != other.value
+        return self.value != other
+
+    # Ordering -- see functools.total_ordering in Py2.7
+
+    def __lt__(self, other):
+        """Return True if confidence is less than the other confidence."""
+        if isinstance(other, Confidence):
+            return self.value < other.value
+        return self.value < other
+
+    def __le__(self, other):
+        """Return True if confidence is less than or equal to the other confidence."""
+        return self < other or self == other
+
+    def __gt__(self, other):
+        """Return True if confidence is grater than the other confidence."""
+        return not (self <= other)
+
+    def __ge__(self, other):
+        """Return True if confidence is grater than or equal to the other confidence."""
+        return not (self.value < other)
+
+    # Arithmetic operators, including reverse
+
+    def __add__(self, other):
+        """Conduct addition between values of two Confidence objects."""
+        return self.value + other
+
+    def __radd__(self, other):
+        """Conduct reverse addition between values of two Confidence objects."""
+        return other + self.value
+
+    def __sub__(self, other):
+        """Conduct subtraction between values of two Confidence objects."""
+        return self.value - other
+
+    def __rsub__(self, other):
+        """Conduct reverse subtraction between values of two Confidence objects."""
+        return other - self.value
+
+    def __mul__(self, other):
+        """Conduct multiplication between values of two Confidence objects."""
+        return self.value * other
+
+    def __rmul__(self, other):
+        """Conduct reverse multiplication between values of two Confidence objects."""
+        return other * self.value
+
+    def __truediv__(self, other):
+        """Conduct division between values of two Confidence objects."""
+        return self.value.__truediv__(other)
+
+    def __rtruediv__(self, other):
+        """Conduct reverse division between values of two Confidence objects."""
+        return other.__rtruediv__(self.value)
+
+    def __floordiv__(self, other):
+        """C-style and old-style division."""
+        return self.value.__floordiv__(other)
+
+    def __rfloordiv__(self, other):
+        """Conduct revers C-style and old-style division."""
+        return other.__floordiv__(self.value)
+
+    def __mod__(self, other):
+        """Conduct modulus between values of two Confidence objects."""
+        return self.value % other
+
+    def __rmod__(self, other):
+        """Conduct reverse modulus between values of two Confidence objects."""
+        return other % self.value
+
+    def __divmod__(self, other):
+        """Return quotient and remainder, dividing the Confidence value by the given value."""
+        return divmod(self.value, other)
+
+    def __rdivmod__(self, other):
+        """Return quotient and remainder, dividing the given value by the Confidence value."""
+        return divmod(other, self.value)
+
+    def __pow__(self, other, modulo=None):
+        """Return the value of Confidence object raised to the given power."""
+        if modulo is not None:
+            return pow(self.value, other, modulo)
+        return pow(self.value, other)
+
+    def __rpow__(self, other):
+        """Return the given value raised to the power of the Confidence object value."""
+        return pow(other, self.value)
+
+    # Unary arithmetic operations: -, +, abs()
+
+    def __neg__(self):
+        """Conduct negation of a Confidence object."""
+        return -self.value
+
+    def __pos__(self):
+        """Return the value of a Confidence object."""
+        return self.value
+
+    def __abs__(self):
+        """Return absolute value of Confidence object."""
+        return abs(self.value)
+
+    # Explicit coercion to numeric types: float, int
+
+    def __float__(self):
+        """Return float value of Confidence object."""
+        return float(self.value)
+
+    def __int__(self):
+        """Return integer value of Confidence object."""
+        return int(self.value)
+
+
+class Date(PhyloElement):
+    """A date associated with a clade/node.
+
+    Its value can be numerical by using the 'value' element and/or free text
+    with the 'desc' element' (e.g. 'Silurian'). If a numerical value is used, it
+    is recommended to employ the 'unit' attribute.
+
+    :Parameters:
+        unit : string
+            type of numerical value (e.g. 'mya' for 'million years ago')
+        value : float
+            the date value
+        desc : string
+            plain-text description of the date
+        minimum : float
+            lower bound on the date value
+        maximum : float
+            upper bound on the date value
+
+    """
+
+    def __init__(self, value=None, unit=None, desc=None, minimum=None, maximum=None):
+        """Initialize values of the Date object."""
+        self.value = value
+        self.unit = unit
+        self.desc = desc
+        self.minimum = minimum
+        self.maximum = maximum
+
+    def __str__(self):
+        """Show the class name and the human-readable date."""
+        if self.unit and self.value is not None:
+            return "%s %s" % (self.value, self.unit)
+        if self.desc is not None:
+            return self.desc
+        return self.__class__.__name__
+
+
+class Distribution(PhyloElement):
+    """Geographic distribution of the items of a clade (species, sequences).
+
+    Intended for phylogeographic applications.
+
+    :Parameters:
+        desc : string
+            free-text description of the location
+        points : list of ``Point`` objects
+            coordinates (similar to the 'Point' element in Google's KML format)
+        polygons : list of ``Polygon`` objects
+            coordinate sets defining geographic regions
+
+    """
+
+    def __init__(self, desc=None, points=None, polygons=None):
+        """Initialize values of Distribution object."""
+        self.desc = desc
+        self.points = points or []
+        self.polygons = polygons or []
+
+
+class DomainArchitecture(PhyloElement):
+    """Domain architecture of a protein.
+
+    :Parameters:
+        length : int
+            total length of the protein sequence
+        domains : list ProteinDomain objects
+            the domains within this protein
+
+    """
+
+    def __init__(self, length=None, domains=None):
+        """Initialize values of the DomainArchitecture object."""
+        self.length = length
+        self.domains = domains
+
+
+class Events(PhyloElement):
+    """Events at the root node of a clade (e.g. one gene duplication).
+
+    All attributes are set to None by default, but this object can also be
+    treated as a dictionary, in which case None values are treated as missing
+    keys and deleting a key resets that attribute's value back to None.
+    """
+
+    ok_type = {
+        "transfer",
+        "fusion",
+        "speciation_or_duplication",
+        "other",
+        "mixed",
+        "unassigned",
+    }
+
+    def __init__(
+        self,
+        type=None,
+        duplications=None,
+        speciations=None,
+        losses=None,
+        confidence=None,
+    ):
+        """Initialize values of the Events object."""
+        _check_str(type, self.ok_type.__contains__)
+        self.type = type
+        self.duplications = duplications
+        self.speciations = speciations
+        self.losses = losses
+        self.confidence = confidence
+
+    def items(self):
+        """Return Event's items."""
+        return [(k, v) for k, v in self.__dict__.items() if v is not None]
+
+    def keys(self):
+        """Return Event's keys."""
+        return [k for k, v in self.__dict__.items() if v is not None]
+
+    def values(self):
+        """Return values from a key-value pair in an Events dict."""
+        return [v for v in self.__dict__.values() if v is not None]
+
+    def __len__(self):
+        """Return number of Events."""
+        # TODO - Better way to do this?
+        return len(self.values())
+
+    def __getitem__(self, key):
+        """Get value of Event with the given key."""
+        try:
+            val = getattr(self, key)
+        except AttributeError:
+            raise KeyError(key) from None
+        if val is None:
+            raise KeyError("%r has not been set in this object" % key)
+        return val
+
+    def __setitem__(self, key, val):
+        """Add item to Event dict."""
+        setattr(self, key, val)
+
+    def __delitem__(self, key):
+        """Delete Event with given key."""
+        setattr(self, key, None)
+
+    def __iter__(self):
+        """Iterate over the keys present in a Events dict."""
+        return iter(self.keys())
+
+    def __contains__(self, key):
+        """Return True if Event dict contains key."""
+        try:
+            return getattr(self, key) is not None
+        except AttributeError:
+            return False
+
+
+class Id(PhyloElement):
+    """A general-purpose identifier element.
+
+    Allows to indicate the provider (or authority) of an identifier, e.g. NCBI,
+    along with the value itself.
+    """
+
+    def __init__(self, value, provider=None):
+        """Initialize values for the identifier object."""
+        self.value = value
+        self.provider = provider
+
+    def __str__(self):
+        """Return identifier as a string."""
+        if self.provider is not None:
+            return "%s:%s" % (self.provider, self.value)
+        return self.value
+
+
+class MolSeq(PhyloElement):
+    """Store a molecular sequence.
+
+    :Parameters:
+        value : string
+            the sequence itself
+        is_aligned : bool
+            True if this sequence is aligned with the others (usually meaning
+            all aligned seqs are the same length and gaps may be present)
+
+    """
+
+    re_value = re.compile(r"[a-zA-Z\.\-\?\*_]+")
+
+    def __init__(self, value, is_aligned=None):
+        """Initialize parameters for the MolSeq object."""
+        _check_str(value, self.re_value.match)
+        self.value = value
+        self.is_aligned = is_aligned
+
+    def __str__(self):
+        """Return the value of the Molecular Sequence object."""
+        return self.value
+
+
+class Point(PhyloElement):
+    """Geographic coordinates of a point, with an optional altitude.
+
+    Used by element 'Distribution'.
+
+    :Parameters:
+        geodetic_datum : string, required
+            the geodetic datum (also called 'map datum'). For example, Google's
+            KML uses 'WGS84'.
+        lat : numeric
+            latitude
+        long : numeric
+            longitude
+        alt : numeric
+            altitude
+        alt_unit : string
+            unit for the altitude (e.g. 'meter')
+
+    """
+
+    def __init__(self, geodetic_datum, lat, long, alt=None, alt_unit=None):
+        """Initialize value for the Point object."""
+        self.geodetic_datum = geodetic_datum
+        self.lat = lat
+        self.long = long
+        self.alt = alt
+        self.alt_unit = alt_unit
+
+
+class Polygon(PhyloElement):
+    """A polygon defined by a list of 'Points' (used by element 'Distribution').
+
+    :param points: list of 3 or more points representing vertices.
+
+    """
+
+    def __init__(self, points=None):
+        """Initialize value for the Polygon object."""
+        self.points = points or []
+
+    def __str__(self):
+        """Return list of points as a string."""
+        return "%s([%s])" % (self.__class__.__name__, ",\n".join(map(str, self.points)))
+
+
+class Property(PhyloElement):
+    """A typed and referenced property from an external resources.
+
+    Can be attached to ``Phylogeny``, ``Clade``, and ``Annotation`` objects.
+
+    :Parameters:
+        value : string
+            the value of the property
+        ref : string
+            reference to an external resource, e.g. "NOAA:depth"
+        applies_to : string
+            indicates the item to which a property applies to (e.g.  'node' for
+            the parent node of a clade, 'parent_branch' for the parent branch of
+            a clade, or just 'clade').
+        datatype : string
+            the type of a property; limited to xsd-datatypes
+            (e.g. 'xsd:string', 'xsd:boolean', 'xsd:integer', 'xsd:decimal',
+            'xsd:float', 'xsd:double', 'xsd:date', 'xsd:anyURI').
+        unit : string (optional)
+            the unit of the property, e.g. "METRIC:m"
+        id_ref : Id (optional)
+            allows to attached a property specifically to one element (on the
+            xml-level)
+
+    """
+
+    re_ref = re.compile(r"[a-zA-Z0-9_]+:[a-zA-Z0-9_\.\-\s]+")
+    ok_applies_to = {
+        "phylogeny",
+        "clade",
+        "node",
+        "annotation",
+        "parent_branch",
+        "other",
+    }
+    ok_datatype = {
+        "xsd:string",
+        "xsd:boolean",
+        "xsd:decimal",
+        "xsd:float",
+        "xsd:double",
+        "xsd:duration",
+        "xsd:dateTime",
+        "xsd:time",
+        "xsd:date",
+        "xsd:gYearMonth",
+        "xsd:gYear",
+        "xsd:gMonthDay",
+        "xsd:gDay",
+        "xsd:gMonth",
+        "xsd:hexBinary",
+        "xsd:base64Binary",
+        "xsd:anyURI",
+        "xsd:normalizedString",
+        "xsd:token",
+        "xsd:integer",
+        "xsd:nonPositiveInteger",
+        "xsd:negativeInteger",
+        "xsd:long",
+        "xsd:int",
+        "xsd:short",
+        "xsd:byte",
+        "xsd:nonNegativeInteger",
+        "xsd:unsignedLong",
+        "xsd:unsignedInt",
+        "xsd:unsignedShort",
+        "xsd:unsignedByte",
+        "xsd:positiveInteger",
+    }
+
+    def __init__(self, value, ref, applies_to, datatype, unit=None, id_ref=None):
+        """Initialize value for the Property object."""
+        _check_str(ref, self.re_ref.match)
+        _check_str(applies_to, self.ok_applies_to.__contains__)
+        _check_str(datatype, self.ok_datatype.__contains__)
+        _check_str(unit, self.re_ref.match)
+        self.unit = unit
+        self.id_ref = id_ref
+        self.value = value
+        self.ref = ref
+        self.applies_to = applies_to
+        self.datatype = datatype
+
+
+class ProteinDomain(PhyloElement):
+    """Represents an individual domain in a domain architecture.
+
+    The locations use 0-based indexing, as most Python objects including
+    SeqFeature do, rather than the usual biological convention starting at 1.
+    This means the start and end attributes can be used directly as slice
+    indexes on Seq objects.
+
+    :Parameters:
+        start : non-negative integer
+            start of the domain on the sequence, using 0-based indexing
+        end : non-negative integer
+            end of the domain on the sequence
+        confidence : float
+            can be used to store e.g. E-values
+        id : string
+            unique identifier/name
+
+    """
+
+    def __init__(self, value, start, end, confidence=None, id=None):
+        """Initialize value for a ProteinDomain object."""
+        self.value = value
+        self.start = start
+        self.end = end
+        self.confidence = confidence
+        self.id = id
+
+    @classmethod
+    def from_seqfeature(cls, feat):
+        """Create ProteinDomain object from SeqFeature."""
+        return ProteinDomain(
+            feat.id,
+            feat.location.nofuzzy_start,
+            feat.location.nofuzzy_end,
+            confidence=feat.qualifiers.get("confidence"),
+        )
+
+    def to_seqfeature(self):
+        """Create a SeqFeature from the ProteinDomain Object."""
+        feat = SeqFeature(location=FeatureLocation(self.start, self.end), id=self.value)
+        try:
+            confidence = self.confidence
+        except AttributeError:
+            pass
+        else:
+            feat.qualifiers["confidence"] = confidence
+        return feat
+
+
+class Reference(PhyloElement):
+    """Literature reference for a clade.
+
+    NB: Whenever possible, use the ``doi`` attribute instead of the free-text
+    ``desc`` element.
+    """
+
+    re_doi = re.compile(r"[a-zA-Z0-9_\.]+/[a-zA-Z0-9_\.]+")
+
+    def __init__(self, doi=None, desc=None):
+        """Initialize elements of the Reference class object."""
+        _check_str(doi, self.re_doi.match)
+        self.doi = doi
+        self.desc = desc
+
+
+class Sequence(PhyloElement):
+    """A molecular sequence (Protein, DNA, RNA) associated with a node.
+
+    One intended use for ``id_ref`` is to link a sequence to a taxonomy (via the
+    taxonomy's ``id_source``) in case of multiple sequences and taxonomies per
+    node.
+
+    :Parameters:
+        type : {'dna', 'rna', 'protein'}
+            type of molecule this sequence represents
+        id_ref : string
+            reference to another resource
+        id_source : string
+            source for the reference
+        symbol : string
+            short symbol of the sequence, e.g. 'ACTM' (max. 10 chars)
+        accession : Accession
+            accession code for this sequence.
+        name : string
+            full name of the sequence, e.g. 'muscle Actin'
+        location
+            location of a sequence on a genome/chromosome.
+        mol_seq : MolSeq
+            the molecular sequence itself
+        uri : Uri
+            link
+        annotations : list of Annotation objects
+            annotations on this sequence
+        domain_architecture : DomainArchitecture
+            protein domains on this sequence
+        other : list of Other objects
+            non-phyloXML elements
+
+    """
+
+    types = {"dna", "rna", "protein"}
+    re_symbol = re.compile(r"\S{1,10}")
+
+    def __init__(
+        self,
+        # Attributes
+        type=None,
+        id_ref=None,
+        id_source=None,
+        # Child nodes
+        symbol=None,
+        accession=None,
+        name=None,
+        location=None,
+        mol_seq=None,
+        uri=None,
+        domain_architecture=None,
+        # Collections
+        annotations=None,
+        other=None,
+    ):
+        """Initialize value for a Sequence object."""
+        _check_str(type, self.types.__contains__)
+        _check_str(symbol, self.re_symbol.match)
+        self.type = type
+        self.id_ref = id_ref
+        self.id_source = id_source
+        self.symbol = symbol
+        self.accession = accession
+        self.name = name
+        self.location = location
+        self.mol_seq = mol_seq
+        self.uri = uri
+        self.domain_architecture = domain_architecture
+        self.annotations = annotations or []
+        self.other = other or []
+
+    @classmethod
+    def from_seqrecord(cls, record, is_aligned=None):
+        """Create a new PhyloXML Sequence from a SeqRecord object."""
+        if is_aligned is None:
+            is_aligned = "-" in record.seq
+        params = {
+            "accession": Accession(record.id, ""),
+            "symbol": record.name,
+            "name": record.description,
+            "mol_seq": MolSeq(str(record.seq), is_aligned),
+        }
+        molecule_type = record.annotations.get("molecule_type")
+        if molecule_type is not None:
+            if "DNA" in molecule_type:
+                params["type"] = "dna"
+            elif "RNA" in molecule_type:
+                params["type"] = "rna"
+            elif "protein" in molecule_type:
+                params["type"] = "protein"
+
+        # Unpack record.annotations
+        for key in ("id_ref", "id_source", "location"):
+            if key in record.annotations:
+                params[key] = record.annotations[key]
+        if isinstance(record.annotations.get("uri"), dict):
+            params["uri"] = Uri(**record.annotations["uri"])
+        # Build a Sequence.annotation object
+        if record.annotations.get("annotations"):
+            params["annotations"] = []
+            for annot in record.annotations["annotations"]:
+                ann_args = {}
+                for key in ("ref", "source", "evidence", "type", "desc"):
+                    if key in annot:
+                        ann_args[key] = annot[key]
+                if isinstance(annot.get("confidence"), list):
+                    ann_args["confidence"] = Confidence(*annot["confidence"])
+                if isinstance(annot.get("properties"), list):
+                    ann_args["properties"] = [
+                        Property(**prop)
+                        for prop in annot["properties"]
+                        if isinstance(prop, dict)
+                    ]
+                params["annotations"].append(Annotation(**ann_args))
+
+        # Unpack record.features
+        if record.features:
+            params["domain_architecture"] = DomainArchitecture(
+                length=len(record.seq),
+                domains=[
+                    ProteinDomain.from_seqfeature(feat) for feat in record.features
+                ],
+            )
+
+        return Sequence(**params)
+
+    def to_seqrecord(self):
+        """Create a SeqRecord object from this Sequence instance.
+
+        The seqrecord.annotations dictionary is packed like so::
+
+            { # Sequence attributes with no SeqRecord equivalent:
+              'id_ref': self.id_ref,
+              'id_source': self.id_source,
+              'location': self.location,
+              'uri': { 'value': self.uri.value,
+                              'desc': self.uri.desc,
+                              'type': self.uri.type },
+              # Sequence.annotations attribute (list of Annotations)
+              'annotations': [{'ref': ann.ref,
+                               'source': ann.source,
+                               'evidence': ann.evidence,
+                               'type': ann.type,
+                               'confidence': [ann.confidence.value,
+                                              ann.confidence.type],
+                               'properties': [{'value': prop.value,
+                                                'ref': prop.ref,
+                                                'applies_to': prop.applies_to,
+                                                'datatype': prop.datatype,
+                                                'unit': prop.unit,
+                                                'id_ref': prop.id_ref}
+                                               for prop in ann.properties],
+                              } for ann in self.annotations],
+            }
+
+        """
+
+        def clean_dict(dct):
+            """Remove None-valued items from a dictionary."""
+            return {key: val for key, val in dct.items() if val is not None}
+
+        seqrec = SeqRecord(
+            Seq(self.mol_seq.value),
+            **clean_dict(
+                {
+                    "id": str(self.accession),
+                    "name": self.symbol,
+                    "description": self.name,
+                    # 'dbxrefs': None,
+                }
+            )
+        )
+        if self.domain_architecture:
+            seqrec.features = [
+                dom.to_seqfeature() for dom in self.domain_architecture.domains
+            ]
+        # Sequence attributes with no SeqRecord equivalent
+        if self.type == "dna":
+            molecule_type = "DNA"
+        elif self.type == "rna":
+            molecule_type = "RNA"
+        elif self.type == "protein":
+            molecule_type = "protein"
+        else:
+            molecule_type = None
+        seqrec.annotations = clean_dict(
+            {
+                "id_ref": self.id_ref,
+                "id_source": self.id_source,
+                "location": self.location,
+                "uri": self.uri
+                and clean_dict(
+                    {
+                        "value": self.uri.value,
+                        "desc": self.uri.desc,
+                        "type": self.uri.type,
+                    }
+                ),
+                "molecule_type": molecule_type,
+                "annotations": self.annotations
+                and [
+                    clean_dict(
+                        {
+                            "ref": ann.ref,
+                            "source": ann.source,
+                            "evidence": ann.evidence,
+                            "type": ann.type,
+                            "confidence": ann.confidence
+                            and [ann.confidence.value, ann.confidence.type],
+                            "properties": [
+                                clean_dict(
+                                    {
+                                        "value": prop.value,
+                                        "ref": prop.ref,
+                                        "applies_to": prop.applies_to,
+                                        "datatype": prop.datatype,
+                                        "unit": prop.unit,
+                                        "id_ref": prop.id_ref,
+                                    }
+                                )
+                                for prop in ann.properties
+                            ],
+                        }
+                    )
+                    for ann in self.annotations
+                ],
+            }
+        )
+        return seqrec
+
+
+class SequenceRelation(PhyloElement):
+    """Express a typed relationship between two sequences.
+
+    For example, this could be used to describe an orthology (in which case
+    attribute 'type' is 'orthology').
+
+    :Parameters:
+        id_ref_0 : Id
+            first sequence reference identifier
+        id_ref_1 : Id
+            second sequence reference identifier
+        distance : float
+            distance between the two sequences
+        type : restricted string
+            describe the type of relationship
+        confidence : Confidence
+            confidence value for this relation
+
+    """
+
+    ok_type = {
+        "orthology",
+        "one_to_one_orthology",
+        "super_orthology",
+        "paralogy",
+        "ultra_paralogy",
+        "xenology",
+        "unknown",
+        "other",
+    }
+
+    def __init__(self, type, id_ref_0, id_ref_1, distance=None, confidence=None):
+        """Initialize the class."""
+        _check_str(type, self.ok_type.__contains__)
+        self.distance = distance
+        self.type = type
+        self.id_ref_0 = id_ref_0
+        self.id_ref_1 = id_ref_1
+        self.confidence = confidence
+
+
+class Taxonomy(PhyloElement):
+    """Describe taxonomic information for a clade.
+
+    :Parameters:
+        id_source : Id
+            link other elements to a taxonomy (on the XML level)
+        id : Id
+            unique identifier of a taxon, e.g. Id('6500',
+            provider='ncbi_taxonomy') for the California sea hare
+        code : restricted string
+            store UniProt/Swiss-Prot style organism codes, e.g. 'APLCA' for the
+            California sea hare 'Aplysia californica'
+        scientific_name : string
+            the standard scientific name for this organism, e.g. 'Aplysia
+            californica' for the California sea hare
+        authority : string
+            keep the authority, such as 'J. G. Cooper, 1863', associated with
+            the 'scientific_name'
+        common_names : list of strings
+            common names for this organism
+        synonyms : list of strings
+            synonyms for this taxon?
+        rank : restricted string
+            taxonomic rank
+        uri : Uri
+            link
+        other : list of Other objects
+            non-phyloXML elements
+
+    """
+
+    re_code = re.compile(r"[a-zA-Z0-9_]{2,10}")
+    ok_rank = {
+        "domain",
+        "kingdom",
+        "subkingdom",
+        "branch",
+        "infrakingdom",
+        "superphylum",
+        "phylum",
+        "subphylum",
+        "infraphylum",
+        "microphylum",
+        "superdivision",
+        "division",
+        "subdivision",
+        "infradivision",
+        "superclass",
+        "class",
+        "subclass",
+        "infraclass",
+        "superlegion",
+        "legion",
+        "sublegion",
+        "infralegion",
+        "supercohort",
+        "cohort",
+        "subcohort",
+        "infracohort",
+        "superorder",
+        "order",
+        "suborder",
+        "superfamily",
+        "family",
+        "subfamily",
+        "supertribe",
+        "tribe",
+        "subtribe",
+        "infratribe",
+        "genus",
+        "subgenus",
+        "superspecies",
+        "species",
+        "subspecies",
+        "variety",
+        "subvariety",
+        "form",
+        "subform",
+        "cultivar",
+        "unknown",
+        "other",
+    }
+
+    def __init__(
+        self,
+        # Attributes
+        id_source=None,
+        # Child nodes
+        id=None,
+        code=None,
+        scientific_name=None,
+        authority=None,
+        rank=None,
+        uri=None,
+        # Collections
+        common_names=None,
+        synonyms=None,
+        other=None,
+    ):
+        """Initialize the class."""
+        _check_str(code, self.re_code.match)
+        _check_str(rank, self.ok_rank.__contains__)
+        self.id_source = id_source
+        self.id = id
+        self.code = code
+        self.scientific_name = scientific_name
+        self.authority = authority
+        self.rank = rank
+        self.uri = uri
+        self.common_names = common_names or []
+        self.synonyms = synonyms or []
+        self.other = other or []
+
+    def __str__(self):
+        """Show the class name and an identifying attribute."""
+        if self.code is not None:
+            return self.code
+        if self.scientific_name is not None:
+            return self.scientific_name
+        if self.rank is not None:
+            return self.rank
+        if self.id is not None:
+            return str(self.id)
+        return self.__class__.__name__
+
+
+class Uri(PhyloElement):
+    """A uniform resource identifier.
+
+    In general, this is expected to be an URL (for example, to link to an image
+    on a website, in which case the ``type`` attribute might be 'image' and
+    ``desc`` might be 'image of a California sea hare').
+    """
+
+    def __init__(self, value, desc=None, type=None):
+        """Initialize the class."""
+        self.value = value
+        self.desc = desc
+        self.type = type
+
+    def __str__(self):
+        """Return string representation of Uri."""
+        if self.value:
+            return self.value
+        return repr(self)
diff --git a/code/lib/Bio/Phylo/PhyloXMLIO.py b/code/lib/Bio/Phylo/PhyloXMLIO.py
new file mode 100644
index 0000000..e6f4138
--- /dev/null
+++ b/code/lib/Bio/Phylo/PhyloXMLIO.py
@@ -0,0 +1,950 @@
+# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""PhyloXML reader/parser, writer, and associated functions.
+
+Instantiates tree elements from a parsed PhyloXML file, and constructs an XML
+file from a ``Bio.Phylo.PhyloXML`` object.
+
+About capitalization:
+ - phyloXML means the file format specification
+ - PhyloXML means the Biopython module ``Bio.Phylo.PhyloXML`` and its classes
+ - Phyloxml means the top-level class used by ``PhyloXMLIO.read`` (but not
+   ``Bio.Phylo.read``!), containing a list of Phylogenies (objects derived from
+   ``BaseTree.Tree``)
+
+"""
+
+from xml.etree import ElementTree
+
+from Bio.Phylo import PhyloXML as PX
+
+
+# Recognize the phyloXML namespace when parsing
+# See http://effbot.org/zone/element-namespaces.htm
+NAMESPACES = {"phy": "http://www.phyloxml.org"}
+
+try:
+    register_namespace = ElementTree.register_namespace
+except AttributeError:
+    if not hasattr(ElementTree, "_namespace_map"):
+        # cElementTree needs the pure-Python xml.etree.ElementTree
+        from xml.etree import ElementTree as ET_py
+
+        ElementTree._namespace_map = ET_py._namespace_map
+
+    def register_namespace(prefix, uri):
+        """Set the namespace for ElementTree."""
+        ElementTree._namespace_map[uri] = prefix
+
+
+for prefix, uri in NAMESPACES.items():
+    register_namespace(prefix, uri)
+
+# Tell ElementTree how to write to text handles
+DEFAULT_ENCODING = "unicode"
+
+
+class PhyloXMLError(Exception):
+    """Exception raised when PhyloXML object construction cannot continue.
+
+    XML syntax errors will be found and raised by the underlying ElementTree
+    module; this exception is for valid XML that breaks the phyloXML
+    specification.
+    """
+
+    pass
+
+
+# ---------------------------------------------------------
+# Public API
+
+
+def read(file):
+    """Parse a phyloXML file or stream and build a tree of Biopython objects.
+
+    The children of the root node are phylogenies and possibly other arbitrary
+    (non-phyloXML) objects.
+
+    :returns: a single ``Bio.Phylo.PhyloXML.Phyloxml`` object.
+
+    """
+    return Parser(file).read()
+
+
+def parse(file):
+    """Iterate over the phylogenetic trees in a phyloXML file.
+
+    This ignores any additional data stored at the top level, but may be more
+    memory-efficient than the ``read`` function.
+
+    :returns: a generator of ``Bio.Phylo.PhyloXML.Phylogeny`` objects.
+
+    """
+    return Parser(file).parse()
+
+
+def write(obj, file, encoding=DEFAULT_ENCODING, indent=True):
+    """Write a phyloXML file.
+
+    :Parameters:
+        obj
+            an instance of ``Phyloxml``, ``Phylogeny`` or ``BaseTree.Tree``,
+            or an iterable of either of the latter two. The object will be
+            converted to a Phyloxml object before serialization.
+        file
+            either an open handle or a file name.
+
+    """
+
+    def fix_single(tree):
+        if isinstance(tree, PX.Phylogeny):
+            return tree
+        if isinstance(tree, PX.Clade):
+            return tree.to_phylogeny()
+        if isinstance(tree, PX.BaseTree.Tree):
+            return PX.Phylogeny.from_tree(tree)
+        if isinstance(tree, PX.BaseTree.Clade):
+            return PX.Phylogeny.from_tree(PX.BaseTree.Tree(root=tree))
+        else:
+            raise ValueError("iterable must contain Tree or Clade types")
+
+    if isinstance(obj, PX.Phyloxml):
+        pass
+    elif isinstance(obj, (PX.BaseTree.Tree, PX.BaseTree.Clade)):
+        obj = fix_single(obj).to_phyloxml()
+    elif hasattr(obj, "__iter__"):
+        obj = PX.Phyloxml({}, phylogenies=(fix_single(t) for t in obj))
+    else:
+        raise ValueError(
+            "First argument must be a Phyloxml, Phylogeny, "
+            "Tree, or iterable of Trees or Phylogenies."
+        )
+    return Writer(obj).write(file, encoding=encoding, indent=indent)
+
+
+# ---------------------------------------------------------
+# Functions I wish ElementTree had
+
+
+def _local(tag):
+    """Extract the local tag from a namespaced tag name (PRIVATE)."""
+    if tag[0] == "{":
+        return tag[tag.index("}") + 1 :]
+    return tag
+
+
+def _split_namespace(tag):
+    """Split a tag into namespace and local tag strings (PRIVATE)."""
+    try:
+        return tag[1:].split("}", 1)
+    except ValueError:
+        return ("", tag)
+
+
+def _ns(tag, namespace=NAMESPACES["phy"]):
+    """Format an XML tag with the given namespace (PRIVATE)."""
+    return "{%s}%s" % (namespace, tag)
+
+
+def _get_child_as(parent, tag, construct):
+    """Find a child node by tag, and pass it through a constructor (PRIVATE).
+
+    Returns None if no matching child is found.
+    """
+    child = parent.find(_ns(tag))
+    if child is not None:
+        return construct(child)
+
+
+def _get_child_text(parent, tag, construct=str):
+    """Find a child node by tag; pass its text through a constructor (PRIVATE).
+
+    Returns None if no matching child is found.
+    """
+    child = parent.find(_ns(tag))
+    if child is not None and child.text:
+        return construct(child.text)
+
+
+def _get_children_as(parent, tag, construct):
+    """Find child nodes by tag; pass each through a constructor (PRIVATE).
+
+    Returns an empty list if no matching child is found.
+    """
+    return [construct(child) for child in parent.findall(_ns(tag))]
+
+
+def _get_children_text(parent, tag, construct=str):
+    """Find child nodes by tag; pass each node's text through a constructor (PRIVATE).
+
+    Returns an empty list if no matching child is found.
+    """
+    return [construct(child.text) for child in parent.findall(_ns(tag)) if child.text]
+
+
+def _indent(elem, level=0):
+    """Add line breaks and indentation to ElementTree in-place (PRIVATE).
+
+    Sources:
+     - http://effbot.org/zone/element-lib.htm#prettyprint
+     - http://infix.se/2007/02/06/gentlemen-indent-your-xml
+
+    """
+    i = "\n" + level * "  "
+    if len(elem):
+        if not elem.text or not elem.text.strip():
+            elem.text = i + "  "
+        for e in elem:
+            _indent(e, level + 1)
+            if not e.tail or not e.tail.strip():
+                e.tail = i + "  "
+        if not e.tail or not e.tail.strip():
+            e.tail = i
+    else:
+        if level and (not elem.tail or not elem.tail.strip()):
+            elem.tail = i
+
+
+# ---------------------------------------------------------
+# INPUT
+# ---------------------------------------------------------
+
+
+def _str2bool(text):
+    """Convert string to boolean (PRIVATE)."""
+    if text == "true" or text == "1":
+        return True
+    if text == "false" or text == "0":
+        return False
+    raise ValueError("String could not be converted to boolean: " + text)
+
+
+def _dict_str2bool(dct, keys):
+    """Return a new dictionary where string values are replaced with booleans (PRIVATE)."""
+    out = dct.copy()
+    for key in keys:
+        if key in out:
+            out[key] = _str2bool(out[key])
+    return out
+
+
+def _int(text):
+    """Return text as an integer (PRIVATE)."""
+    if text is not None:
+        try:
+            return int(text)
+        except Exception:
+            return None
+
+
+def _float(text):
+    """Return text as a float (PRIVATE)."""
+    if text is not None:
+        try:
+            return float(text)
+        except Exception:
+            return None
+
+
+def _collapse_wspace(text):
+    """Replace all spans of whitespace with a single space character (PRIVATE).
+
+    Also remove leading and trailing whitespace. See "Collapse Whitespace
+    Policy" in the phyloXML spec glossary:
+    http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary
+    """
+    if text is not None:
+        return " ".join(text.split())
+
+
+# NB: Not currently used
+def _replace_wspace(text):
+    """Replace tab, LF and CR characters with spaces, but don't collapse (PRIVATE).
+
+    See "Replace Whitespace Policy" in the phyloXML spec glossary:
+    http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary
+    """
+    for char in ("\t", "\n", "\r"):
+        if char in text:
+            text = text.replace(char, " ")
+    return text
+
+
+class Parser:
+    """Methods for parsing all phyloXML nodes from an XML stream.
+
+    To minimize memory use, the tree of ElementTree parsing events is cleared
+    after completing each phylogeny, clade, and top-level 'other' element.
+    Elements below the clade level are kept in memory until parsing of the
+    current clade is finished -- this shouldn't be a problem because clade is
+    the only recursive element, and non-clade nodes below this level are of
+    bounded size.
+    """
+
+    def __init__(self, file):
+        """Initialize the class."""
+        # Get an iterable context for XML parsing events
+        context = iter(ElementTree.iterparse(file, events=("start", "end")))
+        event, root = next(context)
+        self.root = root
+        self.context = context
+
+    def read(self):
+        """Parse the phyloXML file and create a single Phyloxml object."""
+        phyloxml = PX.Phyloxml({_local(key): val for key, val in self.root.items()})
+        other_depth = 0
+        for event, elem in self.context:
+            namespace, localtag = _split_namespace(elem.tag)
+            if event == "start":
+                if namespace != NAMESPACES["phy"]:
+                    other_depth += 1
+                    continue
+                if localtag == "phylogeny":
+                    phylogeny = self._parse_phylogeny(elem)
+                    phyloxml.phylogenies.append(phylogeny)
+            if event == "end" and namespace != NAMESPACES["phy"]:
+                # Deal with items not specified by phyloXML
+                other_depth -= 1
+                if other_depth == 0:
+                    # We're directly under the root node -- evaluate
+                    otr = self.other(elem, namespace, localtag)
+                    phyloxml.other.append(otr)
+                    self.root.clear()
+        return phyloxml
+
+    def parse(self):
+        """Parse the phyloXML file incrementally and return each phylogeny."""
+        phytag = _ns("phylogeny")
+        for event, elem in self.context:
+            if event == "start" and elem.tag == phytag:
+                yield self._parse_phylogeny(elem)
+
+    # Special parsing cases -- incremental, using self.context
+
+    def _parse_phylogeny(self, parent):
+        """Parse a single phylogeny within the phyloXML tree (PRIVATE).
+
+        Recursively builds a phylogenetic tree with help from parse_clade, then
+        clears the XML event history for the phylogeny element and returns
+        control to the top-level parsing function.
+        """
+        phylogeny = PX.Phylogeny(
+            **_dict_str2bool(parent.attrib, ["rooted", "rerootable"])
+        )
+        list_types = {
+            # XML tag, plural attribute
+            "confidence": "confidences",
+            "property": "properties",
+            "clade_relation": "clade_relations",
+            "sequence_relation": "sequence_relations",
+        }
+        for event, elem in self.context:
+            namespace, tag = _split_namespace(elem.tag)
+            if event == "start" and tag == "clade":
+                if phylogeny.root is not None:
+                    raise ValueError("Phylogeny object should only have 1 clade")
+                phylogeny.root = self._parse_clade(elem)
+                continue
+            if event == "end":
+                if tag == "phylogeny":
+                    parent.clear()
+                    break
+                # Handle the other non-recursive children
+                if tag in list_types:
+                    getattr(phylogeny, list_types[tag]).append(getattr(self, tag)(elem))
+                # Complex types
+                elif tag in ("date", "id"):
+                    setattr(phylogeny, tag, getattr(self, tag)(elem))
+                # Simple types
+                elif tag in ("name", "description"):
+                    setattr(phylogeny, tag, _collapse_wspace(elem.text))
+                # Unknown tags
+                elif namespace != NAMESPACES["phy"]:
+                    phylogeny.other.append(self.other(elem, namespace, tag))
+                    parent.clear()
+                else:
+                    # NB: This shouldn't happen in valid files
+                    raise PhyloXMLError("Misidentified tag: " + tag)
+        return phylogeny
+
+    _clade_complex_types = ["color", "events", "binary_characters", "date"]
+    _clade_list_types = {
+        "confidence": "confidences",
+        "distribution": "distributions",
+        "reference": "references",
+        "property": "properties",
+    }
+    _clade_tracked_tags = (
+        set(_clade_complex_types)
+        .union(_clade_list_types.keys())
+        .union(["branch_length", "name", "node_id", "width"])
+    )
+
+    def _parse_clade(self, parent):
+        """Parse a Clade node and its children, recursively (PRIVATE)."""
+        clade = PX.Clade(**parent.attrib)
+        if clade.branch_length is not None:
+            clade.branch_length = float(clade.branch_length)
+        # NB: Only evaluate nodes at the current level
+        tag_stack = []
+        for event, elem in self.context:
+            namespace, tag = _split_namespace(elem.tag)
+            if event == "start":
+                if tag == "clade":
+                    clade.clades.append(self._parse_clade(elem))
+                    continue
+                if tag == "taxonomy":
+                    clade.taxonomies.append(self._parse_taxonomy(elem))
+                    continue
+                if tag == "sequence":
+                    clade.sequences.append(self._parse_sequence(elem))
+                    continue
+                if tag in self._clade_tracked_tags:
+                    tag_stack.append(tag)
+            if event == "end":
+                if tag == "clade":
+                    elem.clear()
+                    break
+                if tag != tag_stack[-1]:
+                    continue
+                tag_stack.pop()
+                # Handle the other non-recursive children
+                if tag in self._clade_list_types:
+                    getattr(clade, self._clade_list_types[tag]).append(
+                        getattr(self, tag)(elem)
+                    )
+                elif tag in self._clade_complex_types:
+                    setattr(clade, tag, getattr(self, tag)(elem))
+                elif tag == "branch_length":
+                    # NB: possible collision with the attribute
+                    if clade.branch_length is not None:
+                        raise PhyloXMLError(
+                            "Attribute branch_length was already set for this Clade."
+                        )
+                    clade.branch_length = _float(elem.text)
+                elif tag == "width":
+                    clade.width = _float(elem.text)
+                elif tag == "name":
+                    clade.name = _collapse_wspace(elem.text)
+                elif tag == "node_id":
+                    clade.node_id = PX.Id(
+                        elem.text.strip(), elem.attrib.get("provider")
+                    )
+                elif namespace != NAMESPACES["phy"]:
+                    clade.other.append(self.other(elem, namespace, tag))
+                    elem.clear()
+                else:
+                    raise PhyloXMLError("Misidentified tag: " + tag)
+        return clade
+
+    def _parse_sequence(self, parent):
+        """Parse a molecular sequence (PRIVATE)."""
+        sequence = PX.Sequence(**parent.attrib)
+        for event, elem in self.context:
+            namespace, tag = _split_namespace(elem.tag)
+            if event == "end":
+                if tag == "sequence":
+                    parent.clear()
+                    break
+                if tag in ("accession", "mol_seq", "uri", "domain_architecture"):
+                    setattr(sequence, tag, getattr(self, tag)(elem))
+                elif tag == "annotation":
+                    sequence.annotations.append(self.annotation(elem))
+                elif tag == "name":
+                    sequence.name = _collapse_wspace(elem.text)
+                elif tag in ("symbol", "location"):
+                    setattr(sequence, tag, elem.text)
+                elif namespace != NAMESPACES["phy"]:
+                    sequence.other.append(self.other(elem, namespace, tag))
+                    parent.clear()
+        return sequence
+
+    def _parse_taxonomy(self, parent):
+        """Parse taxonomic information for a clade (PRIVATE)."""
+        taxonomy = PX.Taxonomy(**parent.attrib)
+        for event, elem in self.context:
+            namespace, tag = _split_namespace(elem.tag)
+            if event == "end":
+                if tag == "taxonomy":
+                    parent.clear()
+                    break
+                if tag in ("id", "uri"):
+                    setattr(taxonomy, tag, getattr(self, tag)(elem))
+                elif tag == "common_name":
+                    taxonomy.common_names.append(_collapse_wspace(elem.text))
+                elif tag == "synonym":
+                    taxonomy.synonyms.append(elem.text)
+                elif tag in ("code", "scientific_name", "authority", "rank"):
+                    # ENH: check_str on rank
+                    setattr(taxonomy, tag, elem.text)
+                elif namespace != NAMESPACES["phy"]:
+                    taxonomy.other.append(self.other(elem, namespace, tag))
+                    parent.clear()
+        return taxonomy
+
+    def other(self, elem, namespace, localtag):
+        """Create an Other object, a non-phyloXML element."""
+        return PX.Other(
+            localtag,
+            namespace,
+            elem.attrib,
+            value=elem.text and elem.text.strip() or None,
+            children=[
+                self.other(child, *_split_namespace(child.tag)) for child in elem
+            ],
+        )
+
+    # Complex types
+
+    def accession(self, elem):
+        """Create accession object."""
+        return PX.Accession(elem.text.strip(), elem.get("source"))
+
+    def annotation(self, elem):
+        """Create annotation object."""
+        return PX.Annotation(
+            desc=_collapse_wspace(_get_child_text(elem, "desc")),
+            confidence=_get_child_as(elem, "confidence", self.confidence),
+            properties=_get_children_as(elem, "property", self.property),
+            uri=_get_child_as(elem, "uri", self.uri),
+            **elem.attrib
+        )
+
+    def binary_characters(self, elem):
+        """Create binary characters object."""
+
+        def bc_getter(elem):
+            """Get binary characters from subnodes."""
+            return _get_children_text(elem, "bc")
+
+        return PX.BinaryCharacters(
+            type=elem.get("type"),
+            gained_count=_int(elem.get("gained_count")),
+            lost_count=_int(elem.get("lost_count")),
+            present_count=_int(elem.get("present_count")),
+            absent_count=_int(elem.get("absent_count")),
+            # Flatten BinaryCharacterList sub-nodes into lists of strings
+            gained=_get_child_as(elem, "gained", bc_getter),
+            lost=_get_child_as(elem, "lost", bc_getter),
+            present=_get_child_as(elem, "present", bc_getter),
+            absent=_get_child_as(elem, "absent", bc_getter),
+        )
+
+    def clade_relation(self, elem):
+        """Create clade relationship object."""
+        return PX.CladeRelation(
+            elem.get("type"),
+            elem.get("id_ref_0"),
+            elem.get("id_ref_1"),
+            distance=elem.get("distance"),
+            confidence=_get_child_as(elem, "confidence", self.confidence),
+        )
+
+    def color(self, elem):
+        """Create branch color object."""
+        red, green, blue = (
+            _get_child_text(elem, color, int) for color in ("red", "green", "blue")
+        )
+        return PX.BranchColor(red, green, blue)
+
+    def confidence(self, elem):
+        """Create confidence object."""
+        return PX.Confidence(_float(elem.text), elem.get("type"))
+
+    def date(self, elem):
+        """Create date object."""
+        return PX.Date(
+            unit=elem.get("unit"),
+            desc=_collapse_wspace(_get_child_text(elem, "desc")),
+            value=_get_child_text(elem, "value", float),
+            minimum=_get_child_text(elem, "minimum", float),
+            maximum=_get_child_text(elem, "maximum", float),
+        )
+
+    def distribution(self, elem):
+        """Create geographic distribution object."""
+        return PX.Distribution(
+            desc=_collapse_wspace(_get_child_text(elem, "desc")),
+            points=_get_children_as(elem, "point", self.point),
+            polygons=_get_children_as(elem, "polygon", self.polygon),
+        )
+
+    def domain(self, elem):
+        """Create protein domain object."""
+        return PX.ProteinDomain(
+            elem.text.strip(),
+            int(elem.get("from")) - 1,
+            int(elem.get("to")),
+            confidence=_float(elem.get("confidence")),
+            id=elem.get("id"),
+        )
+
+    def domain_architecture(self, elem):
+        """Create domain architecture object."""
+        return PX.DomainArchitecture(
+            length=int(elem.get("length")),
+            domains=_get_children_as(elem, "domain", self.domain),
+        )
+
+    def events(self, elem):
+        """Create events object."""
+        return PX.Events(
+            type=_get_child_text(elem, "type"),
+            duplications=_get_child_text(elem, "duplications", int),
+            speciations=_get_child_text(elem, "speciations", int),
+            losses=_get_child_text(elem, "losses", int),
+            confidence=_get_child_as(elem, "confidence", self.confidence),
+        )
+
+    def id(self, elem):
+        """Create identifier object."""
+        provider = elem.get("provider") or elem.get("type")
+        return PX.Id(elem.text.strip(), provider)
+
+    def mol_seq(self, elem):
+        """Create molecular sequence object."""
+        is_aligned = elem.get("is_aligned")
+        if is_aligned is not None:
+            is_aligned = _str2bool(is_aligned)
+        return PX.MolSeq(elem.text.strip(), is_aligned=is_aligned)
+
+    def point(self, elem):
+        """Create point object, coordinates of a point."""
+        return PX.Point(
+            elem.get("geodetic_datum"),
+            _get_child_text(elem, "lat", float),
+            _get_child_text(elem, "long", float),
+            alt=_get_child_text(elem, "alt", float),
+            alt_unit=elem.get("alt_unit"),
+        )
+
+    def polygon(self, elem):
+        """Create polygon object, list of points."""
+        return PX.Polygon(points=_get_children_as(elem, "point", self.point))
+
+    def property(self, elem):
+        """Create properties from external resources."""
+        return PX.Property(
+            elem.text.strip(),
+            elem.get("ref"),
+            elem.get("applies_to"),
+            elem.get("datatype"),
+            unit=elem.get("unit"),
+            id_ref=elem.get("id_ref"),
+        )
+
+    def reference(self, elem):
+        """Create literature reference object."""
+        return PX.Reference(doi=elem.get("doi"), desc=_get_child_text(elem, "desc"))
+
+    def sequence_relation(self, elem):
+        """Create sequence relationship object, relationship between two sequences."""
+        return PX.SequenceRelation(
+            elem.get("type"),
+            elem.get("id_ref_0"),
+            elem.get("id_ref_1"),
+            distance=_float(elem.get("distance")),
+            confidence=_get_child_as(elem, "confidence", self.confidence),
+        )
+
+    def uri(self, elem):
+        """Create uri object, expected to be a url."""
+        return PX.Uri(
+            elem.text.strip(),
+            desc=_collapse_wspace(elem.get("desc")),
+            type=elem.get("type"),
+        )
+
+
+# ---------------------------------------------------------
+# OUTPUT
+# ---------------------------------------------------------
+
+
+def _serialize(value):
+    """Convert a Python primitive to a phyloXML-compatible string (PRIVATE)."""
+    if isinstance(value, float):
+        return str(value).upper()
+    elif isinstance(value, bool):
+        return str(value).lower()
+    return str(value)
+
+
+def _clean_attrib(obj, attrs):
+    """Create a dictionary from an object's specified, non-None attributes (PRIVATE)."""
+    out = {}
+    for key in attrs:
+        val = getattr(obj, key)
+        if val is not None:
+            out[key] = _serialize(val)
+    return out
+
+
+def _handle_complex(tag, attribs, subnodes, has_text=False):
+    """Handle to serialize nodes with subnodes (PRIVATE)."""
+
+    def wrapped(self, obj):
+        """Wrap nodes and subnodes as elements."""
+        elem = ElementTree.Element(tag, _clean_attrib(obj, attribs))
+        for subn in subnodes:
+            if isinstance(subn, str):
+                # singular object: method and attribute names are the same
+                if getattr(obj, subn) is not None:
+                    elem.append(getattr(self, subn)(getattr(obj, subn)))
+            else:
+                # list: singular method, pluralized attribute name
+                method, plural = subn
+                for item in getattr(obj, plural):
+                    elem.append(getattr(self, method)(item))
+        if has_text:
+            elem.text = _serialize(obj.value)
+        return elem
+
+    wrapped.__doc__ = "Serialize a %s and its subnodes, in order." % tag
+    return wrapped
+
+
+def _handle_simple(tag):
+    """Handle to serialize simple nodes (PRIVATE)."""
+
+    def wrapped(self, obj):
+        """Wrap node as element."""
+        elem = ElementTree.Element(tag)
+        elem.text = _serialize(obj)
+        return elem
+
+    wrapped.__doc__ = "Serialize a simple %s node." % tag
+    return wrapped
+
+
+class Writer:
+    """Methods for serializing a PhyloXML object to XML."""
+
+    def __init__(self, phyloxml):
+        """Build an ElementTree from a PhyloXML object."""
+        assert isinstance(phyloxml, PX.Phyloxml), "Not a Phyloxml object"
+        self._tree = ElementTree.ElementTree(self.phyloxml(phyloxml))
+
+    def write(self, file, encoding=DEFAULT_ENCODING, indent=True):
+        """Write PhyloXML to a file."""
+        if indent:
+            _indent(self._tree.getroot())
+        self._tree.write(file, encoding)
+        return len(self._tree.getroot())
+
+    # Convert classes to ETree elements
+
+    def phyloxml(self, obj):
+        """Convert phyloxml to Etree element."""
+        elem = ElementTree.Element("phyloxml", obj.attributes)  # Namespaces
+        for tree in obj.phylogenies:
+            elem.append(self.phylogeny(tree))
+        for otr in obj.other:
+            elem.append(self.other(otr))
+        return elem
+
+    def other(self, obj):
+        """Convert other to Etree element."""
+        elem = ElementTree.Element(_ns(obj.tag, obj.namespace), obj.attributes)
+        elem.text = obj.value
+        for child in obj.children:
+            elem.append(self.other(child))
+        return elem
+
+    phylogeny = _handle_complex(
+        "phylogeny",
+        ("rooted", "rerootable", "branch_length_unit", "type"),
+        (
+            "name",
+            "id",
+            "description",
+            "date",
+            ("confidence", "confidences"),
+            "clade",
+            ("clade_relation", "clade_relations"),
+            ("sequence_relation", "sequence_relations"),
+            ("property", "properties"),
+            ("other", "other"),
+        ),
+    )
+
+    clade = _handle_complex(
+        "clade",
+        ("id_source",),
+        (
+            "name",
+            "branch_length",
+            ("confidence", "confidences"),
+            "width",
+            "color",
+            "node_id",
+            ("taxonomy", "taxonomies"),
+            ("sequence", "sequences"),
+            "events",
+            "binary_characters",
+            ("distribution", "distributions"),
+            "date",
+            ("reference", "references"),
+            ("property", "properties"),
+            ("clade", "clades"),
+            ("other", "other"),
+        ),
+    )
+
+    accession = _handle_complex("accession", ("source",), (), has_text=True)
+
+    annotation = _handle_complex(
+        "annotation",
+        ("ref", "source", "evidence", "type"),
+        ("desc", "confidence", ("property", "properties"), "uri"),
+    )
+
+    def binary_characters(self, obj):
+        """Serialize a binary_characters node and its subnodes."""
+        elem = ElementTree.Element(
+            "binary_characters",
+            _clean_attrib(
+                obj,
+                ("type", "gained_count", "lost_count", "present_count", "absent_count"),
+            ),
+        )
+        for subn in ("gained", "lost", "present", "absent"):
+            subelem = ElementTree.Element(subn)
+            for token in getattr(obj, subn):
+                subelem.append(self.bc(token))
+            elem.append(subelem)
+        return elem
+
+    clade_relation = _handle_complex(
+        "clade_relation", ("id_ref_0", "id_ref_1", "distance", "type"), ("confidence",)
+    )
+
+    color = _handle_complex("color", (), ("red", "green", "blue"))
+
+    confidence = _handle_complex("confidence", ("type",), (), has_text=True)
+
+    date = _handle_complex("date", ("unit",), ("desc", "value", "minimum", "maximum"))
+
+    distribution = _handle_complex(
+        "distribution", (), ("desc", ("point", "points"), ("polygon", "polygons"))
+    )
+
+    def domain(self, obj):
+        """Serialize a domain node."""
+        elem = ElementTree.Element(
+            "domain", {"from": str(obj.start + 1), "to": str(obj.end)}
+        )
+        if obj.confidence is not None:
+            elem.set("confidence", _serialize(obj.confidence))
+        if obj.id is not None:
+            elem.set("id", obj.id)
+        elem.text = _serialize(obj.value)
+        return elem
+
+    domain_architecture = _handle_complex(
+        "domain_architecture", ("length",), (("domain", "domains"),)
+    )
+
+    events = _handle_complex(
+        "events", (), ("type", "duplications", "speciations", "losses", "confidence")
+    )
+
+    id = _handle_complex("id", ("provider",), (), has_text=True)
+
+    mol_seq = _handle_complex("mol_seq", ("is_aligned",), (), has_text=True)
+
+    node_id = _handle_complex("node_id", ("provider",), (), has_text=True)
+
+    point = _handle_complex(
+        "point", ("geodetic_datum", "alt_unit"), ("lat", "long", "alt")
+    )
+
+    polygon = _handle_complex("polygon", (), (("point", "points"),))
+
+    property = _handle_complex(
+        "property",
+        ("ref", "unit", "datatype", "applies_to", "id_ref"),
+        (),
+        has_text=True,
+    )
+
+    reference = _handle_complex("reference", ("doi",), ("desc",))
+
+    sequence = _handle_complex(
+        "sequence",
+        ("type", "id_ref", "id_source"),
+        (
+            "symbol",
+            "accession",
+            "name",
+            "location",
+            "mol_seq",
+            "uri",
+            ("annotation", "annotations"),
+            "domain_architecture",
+            ("other", "other"),
+        ),
+    )
+
+    sequence_relation = _handle_complex(
+        "sequence_relation",
+        ("id_ref_0", "id_ref_1", "distance", "type"),
+        ("confidence",),
+    )
+
+    taxonomy = _handle_complex(
+        "taxonomy",
+        ("id_source",),
+        (
+            "id",
+            "code",
+            "scientific_name",
+            "authority",
+            ("common_name", "common_names"),
+            ("synonym", "synonyms"),
+            "rank",
+            "uri",
+            ("other", "other"),
+        ),
+    )
+
+    uri = _handle_complex("uri", ("desc", "type"), (), has_text=True)
+
+    # Primitive types
+
+    # Floating point
+    alt = _handle_simple("alt")
+    branch_length = _handle_simple("branch_length")
+    lat = _handle_simple("lat")
+    long = _handle_simple("long")
+    maximum = _handle_simple("maximum")
+    minimum = _handle_simple("minimum")
+    value = _handle_simple("value")
+    width = _handle_simple("width")
+
+    # Integers
+    blue = _handle_simple("blue")
+    duplications = _handle_simple("duplications")
+    green = _handle_simple("green")
+    losses = _handle_simple("losses")
+    red = _handle_simple("red")
+    speciations = _handle_simple("speciations")
+
+    # Strings
+    bc = _handle_simple("bc")
+    code = _handle_simple("code")
+    common_name = _handle_simple("common_name")
+    desc = _handle_simple("desc")
+    description = _handle_simple("description")
+    location = _handle_simple("location")
+    name = _handle_simple("name")
+    rank = _handle_simple("rank")
+    scientific_name = _handle_simple("scientific_name")
+    symbol = _handle_simple("symbol")
+    synonym = _handle_simple("synonym")
+    type = _handle_simple("type")
diff --git a/code/lib/Bio/Phylo/TreeConstruction.py b/code/lib/Bio/Phylo/TreeConstruction.py
new file mode 100644
index 0000000..2144ae7
--- /dev/null
+++ b/code/lib/Bio/Phylo/TreeConstruction.py
@@ -0,0 +1,1179 @@
+# Copyright (C) 2013 by Yanbo Ye (yeyanbo289@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes and methods for tree construction."""
+
+import itertools
+import copy
+import numbers
+from Bio.Phylo import BaseTree
+from Bio.Align import MultipleSeqAlignment
+from Bio.Align import substitution_matrices
+
+
+class _Matrix:
+    """Base class for distance matrix or scoring matrix.
+
+    Accepts a list of names and a lower triangular matrix.::
+
+        matrix = [[0],
+                  [1, 0],
+                  [2, 3, 0],
+                  [4, 5, 6, 0]]
+        represents the symmetric matrix of
+        [0,1,2,4]
+        [1,0,3,5]
+        [2,3,0,6]
+        [4,5,6,0]
+
+    :Parameters:
+        names : list
+            names of elements, used for indexing
+        matrix : list
+            nested list of numerical lists in lower triangular format
+
+    Examples
+    --------
+    >>> from Bio.Phylo.TreeConstruction import _Matrix
+    >>> names = ['Alpha', 'Beta', 'Gamma', 'Delta']
+    >>> matrix = [[0], [1, 0], [2, 3, 0], [4, 5, 6, 0]]
+    >>> m = _Matrix(names, matrix)
+    >>> m
+    _Matrix(names=['Alpha', 'Beta', 'Gamma', 'Delta'], matrix=[[0], [1, 0], [2, 3, 0], [4, 5, 6, 0]])
+
+    You can use two indices to get or assign an element in the matrix.
+
+    >>> m[1,2]
+    3
+    >>> m['Beta','Gamma']
+    3
+    >>> m['Beta','Gamma'] = 4
+    >>> m['Beta','Gamma']
+    4
+
+    Further more, you can use one index to get or assign a list of elements related to that index.
+
+    >>> m[0]
+    [0, 1, 2, 4]
+    >>> m['Alpha']
+    [0, 1, 2, 4]
+    >>> m['Alpha'] = [0, 7, 8, 9]
+    >>> m[0]
+    [0, 7, 8, 9]
+    >>> m[0,1]
+    7
+
+    Also you can delete or insert a column&row of elemets by index.
+
+    >>> m
+    _Matrix(names=['Alpha', 'Beta', 'Gamma', 'Delta'], matrix=[[0], [7, 0], [8, 4, 0], [9, 5, 6, 0]])
+    >>> del m['Alpha']
+    >>> m
+    _Matrix(names=['Beta', 'Gamma', 'Delta'], matrix=[[0], [4, 0], [5, 6, 0]])
+    >>> m.insert('Alpha', [0, 7, 8, 9] , 0)
+    >>> m
+    _Matrix(names=['Alpha', 'Beta', 'Gamma', 'Delta'], matrix=[[0], [7, 0], [8, 4, 0], [9, 5, 6, 0]])
+
+    """
+
+    def __init__(self, names, matrix=None):
+        """Initialize matrix.
+
+        Arguments are a list of names, and optionally a list of lower
+        triangular matrix data (zero matrix used by default).
+        """
+        # check names
+        if isinstance(names, list) and all(isinstance(s, str) for s in names):
+            if len(set(names)) == len(names):
+                self.names = names
+            else:
+                raise ValueError("Duplicate names found")
+        else:
+            raise TypeError("'names' should be a list of strings")
+
+        # check matrix
+        if matrix is None:
+            # create a new one with 0 if matrix is not assigned
+            matrix = [[0] * i for i in range(1, len(self) + 1)]
+            self.matrix = matrix
+        else:
+            # check if all elements are numbers
+            if (
+                isinstance(matrix, list)
+                and all(isinstance(l, list) for l in matrix)
+                and all(
+                    isinstance(n, numbers.Number)
+                    for n in [item for sublist in matrix for item in sublist]
+                )
+            ):
+                # check if the same length with names
+                if len(matrix) == len(names):
+                    # check if is lower triangle format
+                    if [len(m) for m in matrix] == list(range(1, len(self) + 1)):
+                        self.matrix = matrix
+                    else:
+                        raise ValueError("'matrix' should be in lower triangle format")
+                else:
+                    raise ValueError("'names' and 'matrix' should be the same size")
+            else:
+                raise TypeError("'matrix' should be a list of numerical lists")
+
+    def __getitem__(self, item):
+        """Access value(s) by the index(s) or name(s).
+
+        For a _Matrix object 'dm'::
+
+            dm[i]                   get a value list from the given 'i' to others;
+            dm[i, j]                get the value between 'i' and 'j';
+            dm['name']              map name to index first
+            dm['name1', 'name2']    map name to index first
+
+        """
+        # Handle single indexing
+        if isinstance(item, (int, str)):
+            index = None
+            if isinstance(item, int):
+                index = item
+            elif isinstance(item, str):
+                if item in self.names:
+                    index = self.names.index(item)
+                else:
+                    raise ValueError("Item not found.")
+            else:
+                raise TypeError("Invalid index type.")
+            # check index
+            if index > len(self) - 1:
+                raise IndexError("Index out of range.")
+            return [self.matrix[index][i] for i in range(0, index)] + [
+                self.matrix[i][index] for i in range(index, len(self))
+            ]
+        # Handle double indexing
+        elif len(item) == 2:
+            row_index = None
+            col_index = None
+            if all(isinstance(i, int) for i in item):
+                row_index, col_index = item
+            elif all(isinstance(i, str) for i in item):
+                row_name, col_name = item
+                if row_name in self.names and col_name in self.names:
+                    row_index = self.names.index(row_name)
+                    col_index = self.names.index(col_name)
+                else:
+                    raise ValueError("Item not found.")
+            else:
+                raise TypeError("Invalid index type.")
+            # check index
+            if row_index > len(self) - 1 or col_index > len(self) - 1:
+                raise IndexError("Index out of range.")
+            if row_index > col_index:
+                return self.matrix[row_index][col_index]
+            else:
+                return self.matrix[col_index][row_index]
+        else:
+            raise TypeError("Invalid index type.")
+
+    def __setitem__(self, item, value):
+        """Set value by the index(s) or name(s).
+
+        Similar to __getitem__::
+
+            dm[1] = [1, 0, 3, 4]    set values from '1' to others;
+            dm[i, j] = 2            set the value from 'i' to 'j'
+
+        """
+        # Handle single indexing
+        if isinstance(item, (int, str)):
+            index = None
+            if isinstance(item, int):
+                index = item
+            elif isinstance(item, str):
+                if item in self.names:
+                    index = self.names.index(item)
+                else:
+                    raise ValueError("Item not found.")
+            else:
+                raise TypeError("Invalid index type.")
+            # check index
+            if index > len(self) - 1:
+                raise IndexError("Index out of range.")
+            # check and assign value
+            if isinstance(value, list) and all(
+                isinstance(n, numbers.Number) for n in value
+            ):
+                if len(value) == len(self):
+                    for i in range(0, index):
+                        self.matrix[index][i] = value[i]
+                    for i in range(index, len(self)):
+                        self.matrix[i][index] = value[i]
+                else:
+                    raise ValueError("Value not the same size.")
+            else:
+                raise TypeError("Invalid value type.")
+        # Handle double indexing
+        elif len(item) == 2:
+            row_index = None
+            col_index = None
+            if all(isinstance(i, int) for i in item):
+                row_index, col_index = item
+            elif all(isinstance(i, str) for i in item):
+                row_name, col_name = item
+                if row_name in self.names and col_name in self.names:
+                    row_index = self.names.index(row_name)
+                    col_index = self.names.index(col_name)
+                else:
+                    raise ValueError("Item not found.")
+            else:
+                raise TypeError("Invalid index type.")
+            # check index
+            if row_index > len(self) - 1 or col_index > len(self) - 1:
+                raise IndexError("Index out of range.")
+            # check and assign value
+            if isinstance(value, numbers.Number):
+                if row_index > col_index:
+                    self.matrix[row_index][col_index] = value
+                else:
+                    self.matrix[col_index][row_index] = value
+            else:
+                raise TypeError("Invalid value type.")
+        else:
+            raise TypeError("Invalid index type.")
+
+    def __delitem__(self, item):
+        """Delete related distances by the index or name."""
+        index = None
+        if isinstance(item, int):
+            index = item
+        elif isinstance(item, str):
+            index = self.names.index(item)
+        else:
+            raise TypeError("Invalid index type.")
+        # remove distances related to index
+        for i in range(index + 1, len(self)):
+            del self.matrix[i][index]
+        del self.matrix[index]
+        # remove name
+        del self.names[index]
+
+    def insert(self, name, value, index=None):
+        """Insert distances given the name and value.
+
+        :Parameters:
+            name : str
+                name of a row/col to be inserted
+            value : list
+                a row/col of values to be inserted
+
+        """
+        if isinstance(name, str):
+            # insert at the given index or at the end
+            if index is None:
+                index = len(self)
+            if not isinstance(index, int):
+                raise TypeError("Invalid index type.")
+            # insert name
+            self.names.insert(index, name)
+            # insert elements of 0, to be assigned
+            self.matrix.insert(index, [0] * index)
+            for i in range(index, len(self)):
+                self.matrix[i].insert(index, 0)
+            # assign value
+            self[index] = value
+        else:
+            raise TypeError("Invalid name type.")
+
+    def __len__(self):
+        """Matrix length."""
+        return len(self.names)
+
+    def __repr__(self):
+        """Return Matrix as a string."""
+        return self.__class__.__name__ + "(names=%s, matrix=%s)" % tuple(
+            map(repr, (self.names, self.matrix))
+        )
+
+    def __str__(self):
+        """Get a lower triangular matrix string."""
+        matrix_string = "\n".join(
+            [
+                self.names[i] + "\t" + "\t".join([str(n) for n in self.matrix[i]])
+                for i in range(0, len(self))
+            ]
+        )
+        matrix_string = matrix_string + "\n\t" + "\t".join(self.names)
+        return matrix_string
+
+
+class DistanceMatrix(_Matrix):
+    """Distance matrix class that can be used for distance based tree algorithms.
+
+    All diagonal elements will be zero no matter what the users provide.
+    """
+
+    def __init__(self, names, matrix=None):
+        """Initialize the class."""
+        _Matrix.__init__(self, names, matrix)
+        self._set_zero_diagonal()
+
+    def __setitem__(self, item, value):
+        """Set Matrix's items to values."""
+        _Matrix.__setitem__(self, item, value)
+        self._set_zero_diagonal()
+
+    def _set_zero_diagonal(self):
+        """Set all diagonal elements to zero (PRIVATE)."""
+        for i in range(0, len(self)):
+            self.matrix[i][i] = 0
+
+    def format_phylip(self, handle):
+        """Write data in Phylip format to a given file-like object or handle.
+
+        The output stream is the input distance matrix format used with Phylip
+        programs (e.g. 'neighbor'). See:
+        http://evolution.genetics.washington.edu/phylip/doc/neighbor.html
+
+        :Parameters:
+            handle : file or file-like object
+                A writeable text mode file handle or other object supporting
+                the 'write' method, such as StringIO or sys.stdout.
+
+        """
+        handle.write(f"    {len(self.names)}\n")
+        # Phylip needs space-separated, vertically aligned columns
+        name_width = max(12, max(map(len, self.names)) + 1)
+        value_fmts = ("{" + str(x) + ":.4f}" for x in range(1, len(self.matrix) + 1))
+        row_fmt = "{0:" + str(name_width) + "s}" + "  ".join(value_fmts) + "\n"
+        for i, (name, values) in enumerate(zip(self.names, self.matrix)):
+            # Mirror the matrix values across the diagonal
+            mirror_values = (self.matrix[j][i] for j in range(i + 1, len(self.matrix)))
+            fields = itertools.chain([name], values, mirror_values)
+            handle.write(row_fmt.format(*fields))
+
+
+# Shim for compatibility with Biopython<1.70 (#1304)
+_DistanceMatrix = DistanceMatrix
+
+
+class DistanceCalculator:
+    """Class to calculate the distance matrix from a DNA or Protein.
+
+    Multiple Sequence Alignment(MSA) and the given name of the
+    substitution model.
+
+    Currently only scoring matrices are used.
+
+    :Parameters:
+        model : str
+            Name of the model matrix to be used to calculate distance.
+            The attribute ``dna_models`` contains the available model
+            names for DNA sequences and ``protein_models`` for protein
+            sequences.
+
+    Examples
+    --------
+    Loading a small PHYLIP alignment from which to compute distances::
+
+        from Bio.Phylo.TreeConstruction import DistanceCalculator
+        from Bio import AlignIO
+        aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
+        print(aln)
+
+    Output::
+
+        Alignment with 5 rows and 13 columns
+        AACGTGGCCACAT Alpha
+        AAGGTCGCCACAC Beta
+        CAGTTCGCCACAA Gamma
+        GAGATTTCCGCCT Delta
+        GAGATCTCCGCCC Epsilon
+
+    DNA calculator with 'identity' model::
+
+        calculator = DistanceCalculator('identity')
+        dm = calculator.get_distance(aln)
+        print(dm)
+
+    Output::
+
+        Alpha	0
+        Beta	0.23076923076923073	0
+        Gamma	0.3846153846153846	0.23076923076923073	0
+        Delta	0.5384615384615384	0.5384615384615384	0.5384615384615384	0
+        Epsilon	0.6153846153846154	0.3846153846153846	0.46153846153846156	0.15384615384615385	0
+            Alpha	Beta	Gamma	Delta	Epsilon
+
+    Protein calculator with 'blosum62' model::
+
+        calculator = DistanceCalculator('blosum62')
+        dm = calculator.get_distance(aln)
+        print(dm)
+
+    Output::
+
+        Alpha	0
+        Beta	0.36904761904761907	0
+        Gamma	0.49397590361445787	0.25	0
+        Delta	0.5853658536585367	0.5476190476190477	0.5662650602409638	0
+        Epsilon	0.7	0.3555555555555555	0.48888888888888893	0.2222222222222222	0
+            Alpha	Beta	Gamma	Delta	Epsilon
+
+    """
+
+    protein_alphabet = set("ABCDEFGHIKLMNPQRSTVWXYZ")
+
+    dna_models = []
+    protein_models = []
+
+    # matrices available
+    names = substitution_matrices.load()
+    for name in names:
+        matrix = substitution_matrices.load(name)
+        if name == "NUC.4.4":
+            # BLAST nucleic acid scoring matrix
+            name = "blastn"
+        else:
+            name = name.lower()
+        if protein_alphabet.issubset(set(matrix.alphabet)):
+            protein_models.append(name)
+        else:
+            dna_models.append(name)
+
+    del protein_alphabet
+    del name
+    del names
+    del matrix
+
+    models = ["identity"] + dna_models + protein_models
+
+    def __init__(self, model="identity", skip_letters=None):
+        """Initialize with a distance model."""
+        # Shim for backward compatibility (#491)
+        if skip_letters:
+            self.skip_letters = skip_letters
+        elif model == "identity":
+            self.skip_letters = ()
+        else:
+            self.skip_letters = ("-", "*")
+
+        if model == "identity":
+            self.scoring_matrix = None
+        elif model in self.models:
+            if model == "blastn":
+                name = "NUC.4.4"
+            else:
+                name = model.upper()
+            self.scoring_matrix = substitution_matrices.load(name)
+        else:
+            raise ValueError(
+                "Model not supported. Available models: " + ", ".join(self.models)
+            )
+
+    def _pairwise(self, seq1, seq2):
+        """Calculate pairwise distance from two sequences (PRIVATE).
+
+        Returns a value between 0 (identical sequences) and 1 (completely
+        different, or seq1 is an empty string.)
+        """
+        score = 0
+        max_score = 0
+        if self.scoring_matrix is None:
+            # Score by character identity, not skipping any special letters
+            score = sum(
+                l1 == l2
+                for l1, l2 in zip(seq1, seq2)
+                if l1 not in self.skip_letters and l2 not in self.skip_letters
+            )
+            max_score = len(seq1)
+        else:
+            max_score1 = 0
+            max_score2 = 0
+            for i in range(0, len(seq1)):
+                l1 = seq1[i]
+                l2 = seq2[i]
+                if l1 in self.skip_letters or l2 in self.skip_letters:
+                    continue
+                try:
+                    max_score1 += self.scoring_matrix[l1, l1]
+                except IndexError:
+                    raise ValueError(
+                        "Bad letter '%s' in sequence '%s' at position '%s'"
+                        % (l1, seq1.id, i)
+                    ) from None
+                try:
+                    max_score2 += self.scoring_matrix[l2, l2]
+                except IndexError:
+                    raise ValueError(
+                        "Bad letter '%s' in sequence '%s' at position '%s'"
+                        % (l2, seq2.id, i)
+                    ) from None
+                score += self.scoring_matrix[l1, l2]
+            # Take the higher score if the matrix is asymmetrical
+            max_score = max(max_score1, max_score2)
+        if max_score == 0:
+            return 1  # max possible scaled distance
+        return 1 - (score * 1.0 / max_score)
+
+    def get_distance(self, msa):
+        """Return a DistanceMatrix for MSA object.
+
+        :Parameters:
+            msa : MultipleSeqAlignment
+                DNA or Protein multiple sequence alignment.
+
+        """
+        if not isinstance(msa, MultipleSeqAlignment):
+            raise TypeError("Must provide a MultipleSeqAlignment object.")
+
+        names = [s.id for s in msa]
+        dm = DistanceMatrix(names)
+        for seq1, seq2 in itertools.combinations(msa, 2):
+            dm[seq1.id, seq2.id] = self._pairwise(seq1, seq2)
+        return dm
+
+
+class TreeConstructor:
+    """Base class for all tree constructor."""
+
+    def build_tree(self, msa):
+        """Caller to built the tree from a MultipleSeqAlignment object.
+
+        This should be implemented in subclass.
+        """
+        raise NotImplementedError("Method not implemented!")
+
+
+class DistanceTreeConstructor(TreeConstructor):
+    """Distance based tree constructor.
+
+    :Parameters:
+        method : str
+            Distance tree construction method, 'nj'(default) or 'upgma'.
+        distance_calculator : DistanceCalculator
+            The distance matrix calculator for multiple sequence alignment.
+            It must be provided if ``build_tree`` will be called.
+
+    Examples
+    --------
+    Loading a small PHYLIP alignment from which to compute distances, and then
+    build a upgma Tree::
+
+        from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
+        from Bio.Phylo.TreeConstruction import DistanceCalculator
+        from Bio import AlignIO
+        aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
+        constructor = DistanceTreeConstructor()
+        calculator = DistanceCalculator('identity')
+        dm = calculator.get_distance(aln)
+        upgmatree = constructor.upgma(dm)
+        print(upgmatree)
+
+    Output::
+
+        Tree(rooted=True)
+            Clade(branch_length=0, name='Inner4')
+                Clade(branch_length=0.18749999999999994, name='Inner1')
+                    Clade(branch_length=0.07692307692307693, name='Epsilon')
+                    Clade(branch_length=0.07692307692307693, name='Delta')
+                Clade(branch_length=0.11057692307692304, name='Inner3')
+                    Clade(branch_length=0.038461538461538464, name='Inner2')
+                        Clade(branch_length=0.11538461538461536, name='Gamma')
+                        Clade(branch_length=0.11538461538461536, name='Beta')
+                    Clade(branch_length=0.15384615384615383, name='Alpha')
+
+    Build a NJ Tree::
+
+        njtree = constructor.nj(dm)
+        print(njtree)
+
+    Output::
+
+        Tree(rooted=False)
+            Clade(branch_length=0, name='Inner3')
+                Clade(branch_length=0.18269230769230765, name='Alpha')
+                Clade(branch_length=0.04807692307692307, name='Beta')
+                Clade(branch_length=0.04807692307692307, name='Inner2')
+                    Clade(branch_length=0.27884615384615385, name='Inner1')
+                        Clade(branch_length=0.051282051282051266, name='Epsilon')
+                        Clade(branch_length=0.10256410256410259, name='Delta')
+                    Clade(branch_length=0.14423076923076922, name='Gamma')
+
+    """
+
+    methods = ["nj", "upgma"]
+
+    def __init__(self, distance_calculator=None, method="nj"):
+        """Initialize the class."""
+        if distance_calculator is None or isinstance(
+            distance_calculator, DistanceCalculator
+        ):
+            self.distance_calculator = distance_calculator
+        else:
+            raise TypeError("Must provide a DistanceCalculator object.")
+        if isinstance(method, str) and method in self.methods:
+            self.method = method
+        else:
+            raise TypeError(
+                "Bad method: "
+                + method
+                + ". Available methods: "
+                + ", ".join(self.methods)
+            )
+
+    def build_tree(self, msa):
+        """Construct and return a Tree, Neighbor Joining or UPGMA."""
+        if self.distance_calculator:
+            dm = self.distance_calculator.get_distance(msa)
+            tree = None
+            if self.method == "upgma":
+                tree = self.upgma(dm)
+            else:
+                tree = self.nj(dm)
+            return tree
+        else:
+            raise TypeError("Must provide a DistanceCalculator object.")
+
+    def upgma(self, distance_matrix):
+        """Construct and return an UPGMA tree.
+
+        Constructs and returns an Unweighted Pair Group Method
+        with Arithmetic mean (UPGMA) tree.
+
+        :Parameters:
+            distance_matrix : DistanceMatrix
+                The distance matrix for tree construction.
+
+        """
+        if not isinstance(distance_matrix, DistanceMatrix):
+            raise TypeError("Must provide a DistanceMatrix object.")
+
+        # make a copy of the distance matrix to be used
+        dm = copy.deepcopy(distance_matrix)
+        # init terminal clades
+        clades = [BaseTree.Clade(None, name) for name in dm.names]
+        # init minimum index
+        min_i = 0
+        min_j = 0
+        inner_count = 0
+        while len(dm) > 1:
+            min_dist = dm[1, 0]
+            # find minimum index
+            for i in range(1, len(dm)):
+                for j in range(0, i):
+                    if min_dist >= dm[i, j]:
+                        min_dist = dm[i, j]
+                        min_i = i
+                        min_j = j
+
+            # create clade
+            clade1 = clades[min_i]
+            clade2 = clades[min_j]
+            inner_count += 1
+            inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count))
+            inner_clade.clades.append(clade1)
+            inner_clade.clades.append(clade2)
+            # assign branch length
+            if clade1.is_terminal():
+                clade1.branch_length = min_dist * 1.0 / 2
+            else:
+                clade1.branch_length = min_dist * 1.0 / 2 - self._height_of(clade1)
+
+            if clade2.is_terminal():
+                clade2.branch_length = min_dist * 1.0 / 2
+            else:
+                clade2.branch_length = min_dist * 1.0 / 2 - self._height_of(clade2)
+
+            # update node list
+            clades[min_j] = inner_clade
+            del clades[min_i]
+
+            # rebuild distance matrix,
+            # set the distances of new node at the index of min_j
+            for k in range(0, len(dm)):
+                if k != min_i and k != min_j:
+                    dm[min_j, k] = (dm[min_i, k] + dm[min_j, k]) * 1.0 / 2
+
+            dm.names[min_j] = "Inner" + str(inner_count)
+
+            del dm[min_i]
+        inner_clade.branch_length = 0
+        return BaseTree.Tree(inner_clade)
+
+    def nj(self, distance_matrix):
+        """Construct and return a Neighbor Joining tree.
+
+        :Parameters:
+            distance_matrix : DistanceMatrix
+                The distance matrix for tree construction.
+
+        """
+        if not isinstance(distance_matrix, DistanceMatrix):
+            raise TypeError("Must provide a DistanceMatrix object.")
+
+        # make a copy of the distance matrix to be used
+        dm = copy.deepcopy(distance_matrix)
+        # init terminal clades
+        clades = [BaseTree.Clade(None, name) for name in dm.names]
+        # init node distance
+        node_dist = [0] * len(dm)
+        # init minimum index
+        min_i = 0
+        min_j = 0
+        inner_count = 0
+        # special cases for Minimum Alignment Matrices
+        if len(dm) == 1:
+            root = clades[0]
+
+            return BaseTree.Tree(root, rooted=False)
+        elif len(dm) == 2:
+            # minimum distance will always be [1,0]
+            min_i = 1
+            min_j = 0
+            clade1 = clades[min_i]
+            clade2 = clades[min_j]
+            clade1.branch_length = dm[min_i, min_j] / 2.0
+            clade2.branch_length = dm[min_i, min_j] - clade1.branch_length
+            inner_clade = BaseTree.Clade(None, "Inner")
+            inner_clade.clades.append(clade1)
+            inner_clade.clades.append(clade2)
+            clades[0] = inner_clade
+            root = clades[0]
+
+            return BaseTree.Tree(root, rooted=False)
+        while len(dm) > 2:
+            # calculate nodeDist
+            for i in range(0, len(dm)):
+                node_dist[i] = 0
+                for j in range(0, len(dm)):
+                    node_dist[i] += dm[i, j]
+                node_dist[i] = node_dist[i] / (len(dm) - 2)
+
+            # find minimum distance pair
+            min_dist = dm[1, 0] - node_dist[1] - node_dist[0]
+            min_i = 0
+            min_j = 1
+            for i in range(1, len(dm)):
+                for j in range(0, i):
+                    temp = dm[i, j] - node_dist[i] - node_dist[j]
+                    if min_dist > temp:
+                        min_dist = temp
+                        min_i = i
+                        min_j = j
+            # create clade
+            clade1 = clades[min_i]
+            clade2 = clades[min_j]
+            inner_count += 1
+            inner_clade = BaseTree.Clade(None, "Inner" + str(inner_count))
+            inner_clade.clades.append(clade1)
+            inner_clade.clades.append(clade2)
+            # assign branch length
+            clade1.branch_length = (
+                dm[min_i, min_j] + node_dist[min_i] - node_dist[min_j]
+            ) / 2.0
+            clade2.branch_length = dm[min_i, min_j] - clade1.branch_length
+
+            # update node list
+            clades[min_j] = inner_clade
+            del clades[min_i]
+
+            # rebuild distance matrix,
+            # set the distances of new node at the index of min_j
+            for k in range(0, len(dm)):
+                if k != min_i and k != min_j:
+                    dm[min_j, k] = (
+                        dm[min_i, k] + dm[min_j, k] - dm[min_i, min_j]
+                    ) / 2.0
+
+            dm.names[min_j] = "Inner" + str(inner_count)
+            del dm[min_i]
+
+        # set the last clade as one of the child of the inner_clade
+        root = None
+        if clades[0] == inner_clade:
+            clades[0].branch_length = 0
+            clades[1].branch_length = dm[1, 0]
+            clades[0].clades.append(clades[1])
+            root = clades[0]
+        else:
+            clades[0].branch_length = dm[1, 0]
+            clades[1].branch_length = 0
+            clades[1].clades.append(clades[0])
+            root = clades[1]
+
+        return BaseTree.Tree(root, rooted=False)
+
+    def _height_of(self, clade):
+        """Calculate clade height -- the longest path to any terminal (PRIVATE)."""
+        height = 0
+        if clade.is_terminal():
+            height = clade.branch_length
+        else:
+            height = height + max(self._height_of(c) for c in clade.clades)
+        return height
+
+
+# #################### Tree Scoring and Searching Classes #####################
+
+
+class Scorer:
+    """Base class for all tree scoring methods."""
+
+    def get_score(self, tree, alignment):
+        """Caller to get the score of a tree for the given alignment.
+
+        This should be implemented in subclass.
+        """
+        raise NotImplementedError("Method not implemented!")
+
+
+class TreeSearcher:
+    """Base class for all tree searching methods."""
+
+    def search(self, starting_tree, alignment):
+        """Caller to search the best tree with a starting tree.
+
+        This should be implemented in subclass.
+        """
+        raise NotImplementedError("Method not implemented!")
+
+
+class NNITreeSearcher(TreeSearcher):
+    """Tree searching with Nearest Neighbor Interchanges (NNI) algorithm.
+
+    :Parameters:
+        scorer : ParsimonyScorer
+            parsimony scorer to calculate the parsimony score of
+            different trees during NNI algorithm.
+
+    """
+
+    def __init__(self, scorer):
+        """Initialize the class."""
+        if isinstance(scorer, Scorer):
+            self.scorer = scorer
+        else:
+            raise TypeError("Must provide a Scorer object.")
+
+    def search(self, starting_tree, alignment):
+        """Implement the TreeSearcher.search method.
+
+        :Parameters:
+           starting_tree : Tree
+               starting tree of NNI method.
+           alignment : MultipleSeqAlignment
+               multiple sequence alignment used to calculate parsimony
+               score of different NNI trees.
+
+        """
+        return self._nni(starting_tree, alignment)
+
+    def _nni(self, starting_tree, alignment):
+        """Search for the best parsimony tree using the NNI algorithm (PRIVATE)."""
+        best_tree = starting_tree
+        while True:
+            best_score = self.scorer.get_score(best_tree, alignment)
+            temp = best_score
+            for t in self._get_neighbors(best_tree):
+                score = self.scorer.get_score(t, alignment)
+                if score < best_score:
+                    best_score = score
+                    best_tree = t
+            # stop if no smaller score exist
+            if best_score >= temp:
+                break
+        return best_tree
+
+    def _get_neighbors(self, tree):
+        """Get all neighbor trees of the given tree (PRIVATE).
+
+        Currently only for binary rooted trees.
+        """
+        # make child to parent dict
+        parents = {}
+        for clade in tree.find_clades():
+            if clade != tree.root:
+                node_path = tree.get_path(clade)
+                # cannot get the parent if the parent is root. Bug?
+                if len(node_path) == 1:
+                    parents[clade] = tree.root
+                else:
+                    parents[clade] = node_path[-2]
+        neighbors = []
+        root_childs = []
+        for clade in tree.get_nonterminals(order="level"):
+            if clade == tree.root:
+                left = clade.clades[0]
+                right = clade.clades[1]
+                root_childs.append(left)
+                root_childs.append(right)
+                if not left.is_terminal() and not right.is_terminal():
+                    # make changes around the left_left clade
+                    # left_left = left.clades[0]
+                    left_right = left.clades[1]
+                    right_left = right.clades[0]
+                    right_right = right.clades[1]
+                    # neightbor 1 (left_left + right_right)
+                    del left.clades[1]
+                    del right.clades[1]
+                    left.clades.append(right_right)
+                    right.clades.append(left_right)
+                    temp_tree = copy.deepcopy(tree)
+                    neighbors.append(temp_tree)
+                    # neighbor 2 (left_left + right_left)
+                    del left.clades[1]
+                    del right.clades[0]
+                    left.clades.append(right_left)
+                    right.clades.append(right_right)
+                    temp_tree = copy.deepcopy(tree)
+                    neighbors.append(temp_tree)
+                    # change back (left_left + left_right)
+                    del left.clades[1]
+                    del right.clades[0]
+                    left.clades.append(left_right)
+                    right.clades.insert(0, right_left)
+            elif clade in root_childs:
+                # skip root child
+                continue
+            else:
+                # method for other clades
+                # make changes around the parent clade
+                left = clade.clades[0]
+                right = clade.clades[1]
+                parent = parents[clade]
+                if clade == parent.clades[0]:
+                    sister = parent.clades[1]
+                    # neighbor 1 (parent + right)
+                    del parent.clades[1]
+                    del clade.clades[1]
+                    parent.clades.append(right)
+                    clade.clades.append(sister)
+                    temp_tree = copy.deepcopy(tree)
+                    neighbors.append(temp_tree)
+                    # neighbor 2 (parent + left)
+                    del parent.clades[1]
+                    del clade.clades[0]
+                    parent.clades.append(left)
+                    clade.clades.append(right)
+                    temp_tree = copy.deepcopy(tree)
+                    neighbors.append(temp_tree)
+                    # change back (parent + sister)
+                    del parent.clades[1]
+                    del clade.clades[0]
+                    parent.clades.append(sister)
+                    clade.clades.insert(0, left)
+                else:
+                    sister = parent.clades[0]
+                    # neighbor 1 (parent + right)
+                    del parent.clades[0]
+                    del clade.clades[1]
+                    parent.clades.insert(0, right)
+                    clade.clades.append(sister)
+                    temp_tree = copy.deepcopy(tree)
+                    neighbors.append(temp_tree)
+                    # neighbor 2 (parent + left)
+                    del parent.clades[0]
+                    del clade.clades[0]
+                    parent.clades.insert(0, left)
+                    clade.clades.append(right)
+                    temp_tree = copy.deepcopy(tree)
+                    neighbors.append(temp_tree)
+                    # change back (parent + sister)
+                    del parent.clades[0]
+                    del clade.clades[0]
+                    parent.clades.insert(0, sister)
+                    clade.clades.insert(0, left)
+        return neighbors
+
+
+# ######################## Parsimony Classes ##########################
+
+
+class ParsimonyScorer(Scorer):
+    """Parsimony scorer with a scoring matrix.
+
+    This is a combination of Fitch algorithm and Sankoff algorithm.
+    See ParsimonyTreeConstructor for usage.
+
+    :Parameters:
+        matrix : _Matrix
+            scoring matrix used in parsimony score calculation.
+
+    """
+
+    def __init__(self, matrix=None):
+        """Initialize the class."""
+        if not matrix or isinstance(matrix, _Matrix):
+            self.matrix = matrix
+        else:
+            raise TypeError("Must provide a _Matrix object.")
+
+    def get_score(self, tree, alignment):
+        """Calculate parsimony score using the Fitch algorithm.
+
+        Calculate and return the parsimony score given a tree and the
+        MSA using either the Fitch algorithm (without a penalty matrix)
+        or the Sankoff algorithm (with a matrix).
+        """
+        # make sure the tree is rooted and bifurcating
+        if not tree.is_bifurcating():
+            raise ValueError("The tree provided should be bifurcating.")
+        if not tree.rooted:
+            tree.root_at_midpoint()
+        # sort tree terminals and alignment
+        terms = tree.get_terminals()
+        terms.sort(key=lambda term: term.name)
+        alignment.sort()
+        if not all(t.name == a.id for t, a in zip(terms, alignment)):
+            raise ValueError(
+                "Taxon names of the input tree should be the same with the alignment."
+            )
+        # term_align = dict(zip(terms, alignment))
+        score = 0
+        for i in range(len(alignment[0])):
+            # parsimony score for column_i
+            score_i = 0
+            # get column
+            column_i = alignment[:, i]
+            # skip non-informative column
+            if column_i == len(column_i) * column_i[0]:
+                continue
+
+            # start calculating score_i using the tree and column_i
+
+            # Fitch algorithm without the penalty matrix
+            if not self.matrix:
+                # init by mapping terminal clades and states in column_i
+                clade_states = dict(zip(terms, [{c} for c in column_i]))
+                for clade in tree.get_nonterminals(order="postorder"):
+                    clade_childs = clade.clades
+                    left_state = clade_states[clade_childs[0]]
+                    right_state = clade_states[clade_childs[1]]
+                    state = left_state & right_state
+                    if not state:
+                        state = left_state | right_state
+                        score_i = score_i + 1
+                    clade_states[clade] = state
+            # Sankoff algorithm with the penalty matrix
+            else:
+                inf = float("inf")
+                # init score arrays for terminal clades
+                alphabet = self.matrix.names
+                length = len(alphabet)
+                clade_scores = {}
+                for j in range(len(column_i)):
+                    array = [inf] * length
+                    index = alphabet.index(column_i[j])
+                    array[index] = 0
+                    clade_scores[terms[j]] = array
+                # bottom up calculation
+                for clade in tree.get_nonterminals(order="postorder"):
+                    clade_childs = clade.clades
+                    left_score = clade_scores[clade_childs[0]]
+                    right_score = clade_scores[clade_childs[1]]
+                    array = []
+                    for m in range(length):
+                        min_l = inf
+                        min_r = inf
+                        for n in range(length):
+                            sl = self.matrix[alphabet[m], alphabet[n]] + left_score[n]
+                            sr = self.matrix[alphabet[m], alphabet[n]] + right_score[n]
+                            if min_l > sl:
+                                min_l = sl
+                            if min_r > sr:
+                                min_r = sr
+                        array.append(min_l + min_r)
+                    clade_scores[clade] = array
+                # minimum from root score
+                score_i = min(array)
+                # TODO: resolve internal states
+            score = score + score_i
+        return score
+
+
+class ParsimonyTreeConstructor(TreeConstructor):
+    """Parsimony tree constructor.
+
+    :Parameters:
+        searcher : TreeSearcher
+            tree searcher to search the best parsimony tree.
+        starting_tree : Tree
+            starting tree provided to the searcher.
+
+    Examples
+    --------
+    We will load an alignment, and then load various trees which have already been computed from it::
+
+        from Bio import AlignIO, Phylo
+        aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
+        print(aln)
+
+    Output::
+
+        Alignment with 5 rows and 13 columns
+        AACGTGGCCACAT Alpha
+        AAGGTCGCCACAC Beta
+        CAGTTCGCCACAA Gamma
+        GAGATTTCCGCCT Delta
+        GAGATCTCCGCCC Epsilon
+
+    Load a starting tree::
+
+        starting_tree = Phylo.read('TreeConstruction/nj.tre', 'newick')
+        print(starting_tree)
+
+    Output::
+
+        Tree(rooted=False, weight=1.0)
+            Clade(branch_length=0.0, name='Inner3')
+                Clade(branch_length=0.01421, name='Inner2')
+                    Clade(branch_length=0.23927, name='Inner1')
+                        Clade(branch_length=0.08531, name='Epsilon')
+                        Clade(branch_length=0.13691, name='Delta')
+                    Clade(branch_length=0.2923, name='Alpha')
+                Clade(branch_length=0.07477, name='Beta')
+                Clade(branch_length=0.17523, name='Gamma')
+
+    Build the Parsimony tree from the starting tree::
+
+        scorer = Phylo.TreeConstruction.ParsimonyScorer()
+        searcher = Phylo.TreeConstruction.NNITreeSearcher(scorer)
+        constructor = Phylo.TreeConstruction.ParsimonyTreeConstructor(searcher, starting_tree)
+        pars_tree = constructor.build_tree(aln)
+        print(pars_tree)
+
+    Output::
+
+        Tree(rooted=True, weight=1.0)
+            Clade(branch_length=0.0)
+                Clade(branch_length=0.19732999999999998, name='Inner1')
+                    Clade(branch_length=0.13691, name='Delta')
+                    Clade(branch_length=0.08531, name='Epsilon')
+                Clade(branch_length=0.04194000000000003, name='Inner2')
+                    Clade(branch_length=0.01421, name='Inner3')
+                        Clade(branch_length=0.17523, name='Gamma')
+                        Clade(branch_length=0.07477, name='Beta')
+                    Clade(branch_length=0.2923, name='Alpha')
+
+    """
+
+    def __init__(self, searcher, starting_tree=None):
+        """Initialize the class."""
+        self.searcher = searcher
+        self.starting_tree = starting_tree
+
+    def build_tree(self, alignment):
+        """Build the tree.
+
+        :Parameters:
+            alignment : MultipleSeqAlignment
+                multiple sequence alignment to calculate parsimony tree.
+
+        """
+        # if starting_tree is none,
+        # create a upgma tree with 'identity' scoring matrix
+        if self.starting_tree is None:
+            dtc = DistanceTreeConstructor(DistanceCalculator("identity"), "upgma")
+            self.starting_tree = dtc.build_tree(alignment)
+        return self.searcher.search(self.starting_tree, alignment)
diff --git a/code/lib/Bio/Phylo/__init__.py b/code/lib/Bio/Phylo/__init__.py
new file mode 100644
index 0000000..5e37972
--- /dev/null
+++ b/code/lib/Bio/Phylo/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Package for working with phylogenetic trees.
+
+See Also: http://biopython.org/wiki/Phylo
+
+"""
+
+from Bio.Phylo._io import parse, read, write, convert
+from Bio.Phylo._utils import draw, draw_ascii, to_networkx
diff --git a/code/lib/Bio/Phylo/__pycache__/BaseTree.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/BaseTree.cpython-37.pyc
new file mode 100644
index 0000000..1685012
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/BaseTree.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/CDAO.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/CDAO.cpython-37.pyc
new file mode 100644
index 0000000..2807e4b
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/CDAO.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/CDAOIO.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/CDAOIO.cpython-37.pyc
new file mode 100644
index 0000000..bc9a0c8
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/CDAOIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/Consensus.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/Consensus.cpython-37.pyc
new file mode 100644
index 0000000..5c2217a
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/Consensus.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/NeXML.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/NeXML.cpython-37.pyc
new file mode 100644
index 0000000..f183c19
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/NeXML.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/NeXMLIO.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/NeXMLIO.cpython-37.pyc
new file mode 100644
index 0000000..c31e5d9
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/NeXMLIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/Newick.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/Newick.cpython-37.pyc
new file mode 100644
index 0000000..f33cb2f
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/Newick.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/NewickIO.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/NewickIO.cpython-37.pyc
new file mode 100644
index 0000000..be8c452
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/NewickIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/NexusIO.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/NexusIO.cpython-37.pyc
new file mode 100644
index 0000000..579238b
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/NexusIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/PhyloXML.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/PhyloXML.cpython-37.pyc
new file mode 100644
index 0000000..0c60e4d
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/PhyloXML.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/PhyloXMLIO.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/PhyloXMLIO.cpython-37.pyc
new file mode 100644
index 0000000..7ac2248
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/PhyloXMLIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/TreeConstruction.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/TreeConstruction.cpython-37.pyc
new file mode 100644
index 0000000..8098df2
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/TreeConstruction.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..7d26ec5
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/_cdao_owl.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/_cdao_owl.cpython-37.pyc
new file mode 100644
index 0000000..167385e
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/_cdao_owl.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/_io.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/_io.cpython-37.pyc
new file mode 100644
index 0000000..52a0a9c
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/_io.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/__pycache__/_utils.cpython-37.pyc b/code/lib/Bio/Phylo/__pycache__/_utils.cpython-37.pyc
new file mode 100644
index 0000000..3e75ad3
Binary files /dev/null and b/code/lib/Bio/Phylo/__pycache__/_utils.cpython-37.pyc differ
diff --git a/code/lib/Bio/Phylo/_cdao_owl.py b/code/lib/Bio/Phylo/_cdao_owl.py
new file mode 100644
index 0000000..aba58ab
--- /dev/null
+++ b/code/lib/Bio/Phylo/_cdao_owl.py
@@ -0,0 +1,2887 @@
+# Copyright (C) 2013 by Ben Morris (ben@bendmorris.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Classes to support Comparative Data Analysis Ontology (CDAO)."""
+
+import xml.etree.ElementTree as ET
+
+
+cdao_namespaces = {
+    "cdao": "http://purl.obolibrary.org/obo/cdao.owl#",
+    "obo": "http://purl.obolibrary.org/obo/",
+}
+
+
+def resolve_uri(s, namespaces=cdao_namespaces, cdao_to_obo=True, xml_style=False):
+    """Convert prefixed URIs to full URIs.
+
+    Optionally, converts CDAO named identifiers to OBO numeric identifiers.
+    """
+    if cdao_to_obo and s.startswith("cdao:"):
+        return resolve_uri("obo:%s" % cdao_elements[s[5:]], namespaces, cdao_to_obo)
+
+    for prefix in namespaces:
+        if xml_style:
+            s = s.replace(prefix + ":", "{%s}" % namespaces[prefix])
+        else:
+            s = s.replace(prefix + ":", namespaces[prefix])
+
+    return s
+
+
+cdao_owl = """<?xml version="1.0"?>
+
+
+<!DOCTYPE rdf:RDF [
+    <!ENTITY owl "http://www.w3.org/2002/07/owl#" >
+    <!ENTITY obo "http://purl.obolibrary.org/obo/" >
+    <!ENTITY dc "http://purl.org/dc/elements/1.1/" >
+    <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" >
+    <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" >
+    <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" >
+]>
+
+
+<rdf:RDF xmlns="http://www.evolutionaryontology.org/cdao/1.0/cdao.owl#"
+     xml:base="http://www.evolutionaryontology.org/cdao/1.0/cdao.owl"
+     xmlns:dc="http://purl.org/dc/elements/1.1/"
+     xmlns:obo="http://purl.obolibrary.org/obo/"
+     xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+     xmlns:owl="http://www.w3.org/2002/07/owl#"
+     xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
+     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+    <owl:Ontology rdf:about="&obo;cdao.owl">
+        <dc:coverage rdf:datatype="&xsd;string">Comparison of two or more biological entities of the same class when the similarities and differences of the entities are treated explicitly as the product of an evolutionary process of descent with modification.</dc:coverage>
+        <dc:description rdf:datatype="&xsd;string">The Comparative Data Analysis Ontology (CDAO) provides a framework for understanding data in the context of evolutionary-comparative analysis.  This comparative approach is used commonly in bioinformatics and other areas of biology to draw inferences from a comparison of differently evolved versions of something, such as differently evolved versions of a protein.  In this kind of analysis, the things-to-be-compared typically are classes called &#39;OTUs&#39; (Operational Taxonomic Units).  The OTUs can represent biological species, but also may be drawn from higher or lower in a biological hierarchy, anywhere from molecules to communities.  The features to be compared among OTUs are rendered in an entity-attribute-value model sometimes referred to as the &#39;character-state data model&#39;.  For a given character, such as &#39;beak length&#39;, each OTU has a state, such as &#39;short&#39; or &#39;long&#39;.  The differences between states are understood to emerge by a historical process of evolutionary transitions in state, represented by a model (or rules) of transitions along with a phylogenetic tree.  CDAO provides the framework for representing OTUs, trees, transformations, and characters.  The representation of characters and transformations may depend on imported ontologies for a specific type of character.</dc:description>
+        <dc:creator xml:lang="en">CDAO Team</dc:creator>
+        <dc:title xml:lang="en">Comparative Data Analysis Ontology</dc:title>
+        <dc:subject xml:lang="en">comparative analysis; comparative data analysis; evolutionary comparative analysis; evolution;  phylogeny; phylogenetics</dc:subject>
+        <dc:rights rdf:resource="http://creativecommons.org/publicdomain/zero/1.0/"/>
+        <owl:versionIRI rdf:resource="&obo;cdao/2012-06-06/cdao.owl"/>
+        <owl:imports rdf:resource="&obo;iao/ontology-metadata.owl"/>
+    </owl:Ontology>
+
+
+
+    <!--
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    // Annotation properties
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////
+     -->
+
+    <owl:AnnotationProperty rdf:about="&dc;creator"/>
+    <owl:AnnotationProperty rdf:about="&dc;subject"/>
+    <owl:AnnotationProperty rdf:about="&dc;description"/>
+    <owl:AnnotationProperty rdf:about="&dc;coverage"/>
+    <owl:AnnotationProperty rdf:about="&dc;language"/>
+    <owl:AnnotationProperty rdf:about="&dc;identifier"/>
+    <owl:AnnotationProperty rdf:about="&dc;date"/>
+    <owl:AnnotationProperty rdf:about="&dc;source"/>
+    <owl:AnnotationProperty rdf:about="&dc;title"/>
+    <owl:AnnotationProperty rdf:about="&dc;rights"/>
+
+
+
+
+
+
+    <!--
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    // Object Properties
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////
+     -->
+
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000142 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000142">
+        <rdfs:label rdf:datatype="&xsd;string">has_Character</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property associates a character data matrix with a character (a column) represented in the matrix.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000056"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000071"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000143 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000143">
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_Edge_as_Child</rdfs:label>
+        <dc:description>The property links a Node to the Edge it belongs to in the child position.</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000139"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000146"/>
+        <owl:inverseOf rdf:resource="&obo;CDAO_0000209"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000144 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000144">
+        <rdf:type rdf:resource="&owl;TransitiveProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">has_Ancestor</rdfs:label>
+        <dc:description>The property links a node to any of the other nodes that are its ancestors in a rooted tree.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <owl:inverseOf rdf:resource="&obo;CDAO_0000174"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000145 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000145">
+        <rdfs:label rdf:datatype="&xsd;string">has_Nucleotide_State</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property associates a nucleotide character-state instance with a state value from the domain of nucleotide states.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000002"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000184"/>
+        <rdfs:range>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000015"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000133"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:range>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000146 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000146">
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_Edge</rdfs:label>
+        <dc:description>The property links a Node to one of the edges that are incident on such node.</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000099"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000190"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000147 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000147">
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_Character_State_Data_Matrix</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000056"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000190"/>
+        <rdfs:domain>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000071"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000138"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:domain>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000148 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000148">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">has_Root</rdfs:label>
+        <dc:description>The property links a rooted tree to the specific node that represents the unique root of the tree.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000012"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000149 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000149">
+        <rdfs:label rdf:datatype="&xsd;string">has_Child</rdfs:label>
+        <dc:description>The property links a node to a node that is an immediate descendant in the tree.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000174"/>
+        <owl:inverseOf rdf:resource="&obo;CDAO_0000179"/>
+        <owl:propertyChainAxiom rdf:parseType="Collection">
+            <rdf:Description rdf:about="&obo;CDAO_0000177"/>
+            <rdf:Description rdf:about="&obo;CDAO_0000209"/>
+        </owl:propertyChainAxiom>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000150 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000150">
+        <rdfs:label rdf:datatype="&xsd;string">has_First_Coordinate_Item</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">The property that relates a coordinate list to the first item in the list.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000092"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+        <rdfs:range>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000003"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000095"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:range>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000151 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000151">
+        <rdfs:label rdf:datatype="&xsd;string">has_Coordinate</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000022"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000098"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000152 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000152">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_Continuous_Character</rdfs:label>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000019"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000068"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000205"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000153 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000153">
+        <rdfs:label rdf:datatype="&xsd;string">has_Datum</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a character to a state datum for the character.</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000098"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+        <rdfs:domain>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000071"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000138"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:domain>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000154 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000154">
+        <rdfs:label rdf:datatype="&xsd;string">has_Standard_Datum</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000008"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000183"/>
+        <rdfs:domain>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000075"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000138"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:domain>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000155 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000155">
+        <rdfs:label rdf:datatype="&xsd;string">subtree_of</rdfs:label>
+        <dc:description>This property links two networks where the latter is a substructure of the former</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000006"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000006"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000156 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000156">
+        <rdfs:label rdf:datatype="&xsd;string">has_Amino_Acid_State</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property associates a amino acid character-state instance with a state value from the domain of amino acid states.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000112"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000184"/>
+        <rdfs:range>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000015"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000076"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:range>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000157 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000157">
+        <rdfs:label rdf:datatype="&xsd;string">is_annotation_of</rdfs:label>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000040"/>
+        <rdfs:range rdf:resource="&owl;Thing"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000158 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000158">
+        <rdfs:label rdf:datatype="&xsd;string">has_RNA_Datum</rdfs:label>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000206"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000159 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000159">
+        <rdfs:label rdf:datatype="&xsd;string">has_Left_State</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a transformation to a &#39;left&#39; state (the state associated with the &#39;left&#39; node).</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000091"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000097"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000182"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000160 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000160">
+        <rdfs:label rdf:datatype="&xsd;string">precedes</rdfs:label>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000161 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000161">
+        <rdfs:label rdf:datatype="&xsd;string">exclude</rdfs:label>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000162 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000162">
+        <rdfs:label rdf:datatype="&xsd;string">has_Node</rdfs:label>
+        <dc:description>Property that associates to each Edge the Nodes it connects.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000099"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000163 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000163">
+        <rdfs:label rdf:datatype="&xsd;string">nca_node_of</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000059"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000140"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000164 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000164">
+        <rdfs:label rdf:datatype="&xsd;string">has_External_Reference</rdfs:label>
+        <rdfs:comment rdf:datatype="&rdfs;Literal">Associates a TU to some external taxonomy reference.</rdfs:comment>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000138"/>
+        <rdfs:range rdf:resource="&owl;Thing"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000165 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000165">
+        <rdfs:label rdf:datatype="&xsd;string">has_Coordinate_System</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property links a coordinate to the coordinate system it references.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000022"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000104"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000166 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000166">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_Nucleotide_Character</rdfs:label>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000002"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000094"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000205"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000167 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000167">
+        <rdf:type rdf:resource="&owl;SymmetricProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">connects_to</rdfs:label>
+        <owl:inverseOf rdf:resource="&obo;CDAO_0000167"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000168 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000168">
+        <rdfs:label rdf:datatype="&xsd;string">has_Amino_Acid_Datum</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates an amino acid character (a column in a protein sequence alignment) to a state datum for the character (an individual cell in the alignment column).</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000112"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000206"/>
+        <rdfs:domain>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000131"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000138"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:domain>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000169 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000169">
+        <rdfs:label rdf:datatype="&xsd;string">hereditary_change_of</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a type of evolutionary change (an Edge_Transformation) to the character that undergoes the change.  The change is a transformation_of the affected character.</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000071"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000170 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000170">
+        <rdfs:label rdf:datatype="&xsd;string">has_Compound_Datum</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a compound character (a character with some states that are subdividable) to a state datum for the character.</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000136"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000183"/>
+        <rdfs:domain>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000078"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000138"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:domain>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000171 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000171">
+        <rdfs:label rdf:datatype="&xsd;string">has_Descendants</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000059"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000080"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000172 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000172">
+        <rdfs:label rdf:datatype="&xsd;string">reconciliation_of</rdfs:label>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000030"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000110"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000173 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000173">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_Amino_Acid_Character</rdfs:label>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000112"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000131"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000205"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000174 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000174">
+        <rdf:type rdf:resource="&owl;TransitiveProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">has_Descendant</rdfs:label>
+        <dc:description>A property that links a node to any of its descendants in a rooted tree.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000175 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000175">
+        <rdfs:label rdf:datatype="&xsd;string">has_Continuous_State</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property associates a character-state instance with a state value on a continuous numeric scale.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000019"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000031"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000184"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000176 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000176">
+        <rdfs:label rdf:datatype="&xsd;string">has_Type</rdfs:label>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000177 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000177">
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_Edge_as_Parent</rdfs:label>
+        <dc:description>The property links a Node to one of the Edges where the node appears in the parent position (i.e., closer to the root).</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000139"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000146"/>
+        <owl:inverseOf rdf:resource="&obo;CDAO_0000201"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000178 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000178">
+        <rdfs:label rdf:datatype="&xsd;string">has</rdfs:label>
+        <dc:description>Generic &#39;has&#39; property.</dc:description>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000179 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000179">
+        <rdfs:label rdf:datatype="&xsd;string">has_Parent</rdfs:label>
+        <dc:description>The property that links a node to its unique parent in a rooted tree.</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000144"/>
+        <owl:propertyChainAxiom rdf:parseType="Collection">
+            <rdf:Description rdf:about="&obo;CDAO_0000143"/>
+            <rdf:Description rdf:about="&obo;CDAO_0000201"/>
+        </owl:propertyChainAxiom>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000180 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000180">
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_Compound_Character</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000078"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000136"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000205"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000181 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000181">
+        <rdfs:label rdf:datatype="&xsd;string">homologous_to</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This propery relates different instances of the same character, including the case when the states of the character differ (e.g., large_beak of beak_size_character of TU A is homologous_to small_beak of beak_size_character of TU B).</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000098"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000098"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000182 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000182">
+        <rdfs:label rdf:datatype="&xsd;string">has_Change_Component</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a transformation to the components that compose it.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000097"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000183 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000183">
+        <rdfs:label rdf:datatype="&xsd;string">has_Categorical_Datum</rdfs:label>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000153"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000184 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000184">
+        <rdfs:label rdf:datatype="&xsd;string">has_State</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property associates a character-state instance with its state value, e.g., a state value expressed in terms of an imported domain ontology.</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000091"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000098"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000185 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000185">
+        <rdfs:label rdf:datatype="&xsd;string">has_Left_Node</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a transformation to a &#39;left&#39; node (the node that has the &#39;left&#39; state).</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000097"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000182"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000186 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000186">
+        <rdfs:label rdf:datatype="&xsd;string">has_Right_State</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a transformation to a &#39;right&#39; state (the state associated with the &#39;right&#39; node).</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000091"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000097"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000182"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000187 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000187">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">represents_TU</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a TU or taxonomic unit (typically associated with character data) to a phylogenetic history (Tree).</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000138"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000140"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000188 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000188">
+        <rdfs:label rdf:datatype="&xsd;string">exclude_Node</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000161"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000189 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000189">
+        <rdfs:label rdf:datatype="&xsd;string">has_Compound_State</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property associates a compound character-state instance with its compound state value.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000136"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000184"/>
+        <rdfs:range>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000015"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000055"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:range>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000190 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000190">
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to</rdfs:label>
+        <dc:description>Generic property that links a concept to another concept it is a constituent of. The property is a synonym of part_of.</dc:description>
+        <owl:equivalentProperty rdf:resource="&obo;CDAO_0000194"/>
+        <rdfs:range rdf:resource="&owl;Thing"/>
+        <rdfs:domain rdf:resource="&owl;Thing"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000191 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000191">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_TU</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a character-state datum to its TU.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000098"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000138"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000190"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000192 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000192">
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_Network</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000006"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000190"/>
+        <rdfs:domain>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000099"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000140"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:domain>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000193 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000193">
+        <rdfs:label rdf:datatype="&xsd;string">has_Annotation</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000040"/>
+        <owl:inverseOf rdf:resource="&obo;CDAO_0000157"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+        <rdfs:domain rdf:resource="&owl;Thing"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000194 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000194">
+        <rdfs:label rdf:datatype="&xsd;string">part_of</rdfs:label>
+        <owl:inverseOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000195 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000195">
+        <rdfs:label rdf:datatype="&xsd;string">has_Nucleotide_Datum</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a nucleotide character (a column in a nucleotide alignment) to a state datum for the character (an individual cell in the alignment column).</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000002"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000206"/>
+        <rdfs:domain>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000094"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000138"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:domain>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000196 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000196">
+        <rdfs:label rdf:datatype="&xsd;string">represented_by_Node</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a TU to a node that represents it in a network.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000138"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <owl:inverseOf rdf:resource="&obo;CDAO_0000187"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000197 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000197">
+        <rdfs:label rdf:datatype="&xsd;string">has_Remaining_Coordinate_List</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">The property that relates a coordinate list to the item in the list beyond the first item.</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000092"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000092"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000198 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000198">
+        <rdfs:label rdf:datatype="&xsd;string">has_Element</rdfs:label>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000118"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000199 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000199">
+        <rdfs:label rdf:datatype="&xsd;string">exclude_Subtree</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000070"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000161"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000200 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000200">
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_Tree</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000110"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000190"/>
+        <rdfs:domain>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000099"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000140"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:domain>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000201 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000201">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">has_Parent_Node</rdfs:label>
+        <dc:description>Associates to a Directed Edge the Node that is in the parent position in the edge (i.e., the node touched by the edge and closer to the root of the tree)</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000139"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000162"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000202 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000202">
+        <rdfs:label rdf:datatype="&xsd;string">has_Lineage_node</rdfs:label>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000004"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000203 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000203">
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_Tree_as_Root</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000110"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000140"/>
+        <owl:inverseOf rdf:resource="&obo;CDAO_0000148"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000190"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000204 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000204">
+        <rdfs:label rdf:datatype="&xsd;string">has_Hereditary_Change</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000097"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000099"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000205 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000205">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">belongs_to_Character</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000071"/>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000098"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000190"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000206 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000206">
+        <rdfs:label rdf:datatype="&xsd;string">has_Molecular_Datum</rdfs:label>
+        <rdfs:range rdf:resource="&obo;CDAO_0000050"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000183"/>
+        <rdfs:domain>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000115"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000138"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:domain>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000207 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000207">
+        <rdfs:label rdf:datatype="&xsd;string">has_Continuous_Datum</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a continuous character to a state datum for the character.</dc:description>
+        <rdfs:range rdf:resource="&obo;CDAO_0000019"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000153"/>
+        <rdfs:domain>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000068"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000138"/>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:domain>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000208 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000208">
+        <rdfs:label rdf:datatype="&xsd;string">has_TU</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property associates a character data matrix with a TU (a row) represented in the matrix.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000056"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000138"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000178"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000209 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000209">
+        <rdf:type rdf:resource="&owl;FunctionalProperty"/>
+        <rdfs:label rdf:datatype="&xsd;string">has_Child_Node</rdfs:label>
+        <dc:description>The property associates to a Directed Edge the Node that is in the child position in the edge, i.e., the node touched by the edge and closer to the leaves of the tree.</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000139"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000162"/>
+    </owl:ObjectProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000210 -->
+
+    <owl:ObjectProperty rdf:about="&obo;CDAO_0000210">
+        <rdfs:label rdf:datatype="&xsd;string">has_Right_Node</rdfs:label>
+        <dc:description rdf:datatype="&xsd;string">This property relates a transformation to a &#39;right&#39; node (the node that has the &#39;right&#39; state).</dc:description>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000097"/>
+        <rdfs:range rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000182"/>
+    </owl:ObjectProperty>
+
+
+
+    <!--
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    // Data properties
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////
+     -->
+
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000211 -->
+
+    <owl:DatatypeProperty rdf:about="&obo;CDAO_0000211">
+        <rdfs:label rdf:datatype="&xsd;string">has_Precision</rdfs:label>
+        <rdfs:range rdf:resource="&xsd;float"/>
+    </owl:DatatypeProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000212 -->
+
+    <owl:DatatypeProperty rdf:about="&obo;CDAO_0000212">
+        <rdfs:label rdf:datatype="&xsd;string">has_Point_Coordinate_Value</rdfs:label>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000003"/>
+        <rdfs:range rdf:resource="&xsd;integer"/>
+    </owl:DatatypeProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000213 -->
+
+    <owl:DatatypeProperty rdf:about="&obo;CDAO_0000213">
+        <rdfs:label rdf:datatype="&xsd;string">has_Int_Value</rdfs:label>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000215"/>
+        <rdfs:range rdf:resource="&xsd;int"/>
+    </owl:DatatypeProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000214 -->
+
+    <owl:DatatypeProperty rdf:about="&obo;CDAO_0000214">
+        <rdfs:label rdf:datatype="&xsd;string">has_Support_Value</rdfs:label>
+        <rdfs:range rdf:resource="&xsd;float"/>
+    </owl:DatatypeProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000215 -->
+
+    <owl:DatatypeProperty rdf:about="&obo;CDAO_0000215">
+        <rdfs:label rdf:datatype="&xsd;string">has_Value</rdfs:label>
+    </owl:DatatypeProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000216 -->
+
+    <owl:DatatypeProperty rdf:about="&obo;CDAO_0000216">
+        <rdfs:label rdf:datatype="&xsd;string">has_Uncertainty_Factor</rdfs:label>
+        <rdfs:range rdf:resource="&xsd;float"/>
+    </owl:DatatypeProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000217 -->
+
+    <owl:DatatypeProperty rdf:about="&obo;CDAO_0000217">
+        <rdfs:label rdf:datatype="&xsd;string">has_Range_End_Value</rdfs:label>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000095"/>
+        <rdfs:range rdf:resource="&xsd;integer"/>
+    </owl:DatatypeProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000218 -->
+
+    <owl:DatatypeProperty rdf:about="&obo;CDAO_0000218">
+        <rdfs:label rdf:datatype="&xsd;string">has_Float_Value</rdfs:label>
+        <rdfs:subPropertyOf rdf:resource="&obo;CDAO_0000215"/>
+        <rdfs:range rdf:resource="&xsd;float"/>
+    </owl:DatatypeProperty>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000219 -->
+
+    <owl:DatatypeProperty rdf:about="&obo;CDAO_0000219">
+        <rdfs:label rdf:datatype="&xsd;string">has_Range_Start_Value</rdfs:label>
+        <rdfs:domain rdf:resource="&obo;CDAO_0000095"/>
+        <rdfs:range rdf:resource="&xsd;integer"/>
+    </owl:DatatypeProperty>
+
+
+
+    <!--
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    // Classes
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////
+     -->
+
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000002 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000002">
+        <rdfs:label rdf:datatype="&xsd;string">DesoxiRibonucleotideResidueStateDatum</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000050"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000003 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000003">
+        <rdfs:label rdf:datatype="&xsd;string">CoordinatePoint</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000022"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000004 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000004">
+        <rdfs:label rdf:datatype="&xsd;string">Lineage</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000012"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000202"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000140"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000005 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000005">
+        <rdfs:label rdf:datatype="&xsd;string">Phylo4Tree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000074"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000006 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000006">
+        <rdfs:label rdf:datatype="&xsd;string">Network</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000178"/>
+                <owl:allValuesFrom>
+                    <owl:Class>
+                        <owl:unionOf rdf:parseType="Collection">
+                            <rdf:Description rdf:about="&obo;CDAO_0000099"/>
+                            <rdf:Description rdf:about="&obo;CDAO_0000140"/>
+                        </owl:unionOf>
+                    </owl:Class>
+                </owl:allValuesFrom>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000007 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000007">
+        <rdfs:label rdf:datatype="&xsd;string">ModelDescription</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000040"/>
+        <dc:description>Description of a model of transformations.</dc:description>
+        <rdfs:comment>This is a non-computible description of a model, not the fully specified mathematical model, which typically relates the probability of a transformation to various parameters.</rdfs:comment>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000008 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000008">
+        <rdfs:label rdf:datatype="&xsd;string">StandardStateDatum</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000089"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000009 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000009">
+        <rdfs:label rdf:datatype="&xsd;string">ContinuousCharacterLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000063"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000010 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000010">
+        <rdfs:label rdf:datatype="&xsd;string">ContinuousCharBayesianLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000009"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000011 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000011">
+        <rdfs:label rdf:datatype="&xsd;string">NEXUSTreeBlock</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000074"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000012 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000012">
+        <rdfs:label rdf:datatype="&xsd;string">RootedTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000110"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000155"/>
+                <owl:allValuesFrom rdf:resource="&obo;CDAO_0000012"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000148"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000140"/>
+                <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:qualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000178"/>
+                <owl:allValuesFrom>
+                    <owl:Class>
+                        <owl:unionOf rdf:parseType="Collection">
+                            <rdf:Description rdf:about="&obo;CDAO_0000139"/>
+                            <rdf:Description rdf:about="&obo;CDAO_0000140"/>
+                        </owl:unionOf>
+                    </owl:Class>
+                </owl:allValuesFrom>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <owl:disjointWith rdf:resource="&obo;CDAO_0000088"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000013 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000013">
+        <rdfs:label rdf:datatype="&xsd;string">Kimura2Parameters</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000020"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000014 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000014">
+        <rdfs:label rdf:datatype="&xsd;string">TreeProcedure</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000044"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000015 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000015">
+        <rdfs:label rdf:datatype="&xsd;string">Generic_State</rdfs:label>
+        <owl:equivalentClass>
+            <owl:Class>
+                <owl:oneOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000222"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000221"/>
+                    <rdf:Description rdf:about="&obo;CDAO_0000223"/>
+                </owl:oneOf>
+            </owl:Class>
+        </owl:equivalentClass>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000039"/>
+        <rdfs:comment>This class should be renamed.  These are not generic states but non-concrete states including gap, unknown and missing.</rdfs:comment>
+        <dc:description>This concept is tied to the verbally ambiguous &#39;gap&#39; concept and to the use of a gap character (often the en dash &#39;-&#39;) in text representations of sequence alignments. In general, this represents the absence of any positively diagnosed Character-State. As such, the gap may be interpreted as an additional Character-State, as the absence of the Character, or as an unknown value.  In some cases it is helpful to separate these.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000016 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000016">
+        <rdfs:label rdf:datatype="&xsd;string">UnrootedSubtree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000070"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000017 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000017">
+        <rdfs:label rdf:datatype="&xsd;string">UnresolvedTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000110"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000018 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000018">
+        <rdfs:label rdf:datatype="&xsd;string">BifurcatingTree</rdfs:label>
+        <owl:equivalentClass rdf:resource="&obo;CDAO_0000130"/>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000110"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000019 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000019">
+        <rdfs:label rdf:datatype="&xsd;string">ContinuousStateDatum</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000098"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000020 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000020">
+        <rdfs:label rdf:datatype="&xsd;string">SubstitutionModel</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000007"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000021 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000021">
+        <rdfs:label rdf:datatype="&xsd;string">JukesKantor</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000020"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000022 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000022">
+        <rdfs:label rdf:datatype="&xsd;string">DatumCoordinate</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000165"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000104"/>
+                <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:qualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <dc:description>A positional coordinate giving the source of a character state, used for molecular sequences.</dc:description>
+        <rdfs:comment>drawing from seqloc categories from NCBI at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/SDKDOCS/SEQLOC.HTML#_Seq-loc:_Locations_on</rdfs:comment>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000023 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000023">
+        <rdfs:label rdf:datatype="&xsd;string">UnresolvedRootedTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000012"/>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000017"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000024 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000024">
+        <rdfs:label rdf:datatype="&xsd;string">Branch</rdfs:label>
+        <owl:equivalentClass rdf:resource="&obo;CDAO_0000099"/>
+        <dc:description>&#39;Branch&#39; is the domain-specific synonym for an edge of a (Phylogenetic) Tree or Network.  Branches may have properties such as length and degree of support.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000025 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000025">
+        <rdfs:label rdf:datatype="&xsd;string">CharacterStateDataMatrixAnnotation</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000040"/>
+        <dc:description>Meta-information associated with a character matrix, such as, for the case of a sequence alignment, the method of alignment.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000026 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000026">
+        <rdfs:label rdf:datatype="&xsd;string">AncestralNode</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000140"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000174"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000140"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000194"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000012"/>
+                <owl:minQualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:minQualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000027 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000027">
+        <rdfs:label rdf:datatype="&xsd;string">UnresolvedUnrootedTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000017"/>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000088"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000029 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000029">
+        <rdfs:label rdf:datatype="&xsd;string">UncertainStateDomain</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000091"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000216"/>
+                <owl:someValuesFrom rdf:resource="&xsd;float"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000030 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000030">
+        <rdfs:label rdf:datatype="&xsd;string">ReconcileTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000110"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000172"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000110"/>
+                <owl:minQualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">2</owl:minQualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000031 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000031">
+        <rdfs:label rdf:datatype="&xsd;string">Continuous</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000091"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000215"/>
+                <owl:cardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:cardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <dc:description>This class describes a continuous value. The link to the actual float value is through the property has_Value. It could have also other properties attached (e.g., has_Precision).</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000032 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000032">
+        <rdfs:label rdf:datatype="&xsd;string">AlignmentProcedure</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000025"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000033 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000033">
+        <rdfs:label rdf:datatype="&xsd;string">Dichotomy</rdfs:label>
+        <owl:equivalentClass rdf:resource="&obo;CDAO_0000124"/>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000026"/>
+        <owl:disjointWith rdf:resource="&obo;CDAO_0000042"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000034 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000034">
+        <rdfs:label rdf:datatype="&xsd;string">Molecular</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000039"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000035 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000035">
+        <rdfs:label rdf:datatype="&xsd;string">ContinuousCharParsimonyLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000009"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000039 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000039">
+        <rdfs:label rdf:datatype="&xsd;string">Categorical</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000091"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000040 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000040">
+        <rdfs:label rdf:datatype="&xsd;string">CDAOAnnotation</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+        <rdfs:comment>Its possible that this base class should be discarded and that annotations should inherit from an imported base class if one exists.</rdfs:comment>
+        <dc:description>The base class of annotations in CDAO.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000041 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000041">
+        <rdfs:label rdf:datatype="&xsd;string">originationEvent</rdfs:label>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000190"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000097"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000042 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000042">
+        <rdfs:label rdf:datatype="&xsd;string">Polytomy</rdfs:label>
+        <owl:equivalentClass>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000177"/>
+                <owl:minCardinality rdf:datatype="&xsd;nonNegativeInteger">3</owl:minCardinality>
+            </owl:Restriction>
+        </owl:equivalentClass>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000026"/>
+        <owl:disjointWith rdf:resource="&obo;CDAO_0000124"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000043 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000043">
+        <rdfs:label rdf:datatype="&xsd;string">PolymorphicStateDomain</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000091"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000216"/>
+                <owl:hasValue rdf:datatype="&xsd;float">1.0</owl:hasValue>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000044 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000044">
+        <rdfs:label rdf:datatype="&xsd;string">TreeAnnotation</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000040"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000157"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000110"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000045 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000045">
+        <rdfs:label rdf:datatype="&xsd;string">Standard</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000039"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000046 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000046">
+        <rdfs:label rdf:datatype="&xsd;string">EdgeLength</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000101"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000176"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000063"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000215"/>
+                <owl:someValuesFrom rdf:resource="&rdfs;Literal"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:comment>Its possible that this should not be classed as an &#39;annotation&#39; since it contains data rather than meta-data.</rdfs:comment>
+        <dc:description>The length of an edge (branch) of a Tree or Network, typically in units of evolutionary changes in character-state per character.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000047 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000047">
+        <rdfs:label rdf:datatype="&xsd;string">RibonucleotideResidue</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000034"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000048 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000048">
+        <rdfs:label rdf:datatype="&xsd;string">Clade</rdfs:label>
+        <owl:equivalentClass rdf:resource="&obo;CDAO_0000129"/>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000110"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000049 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000049">
+        <rdfs:label rdf:datatype="&xsd;string">DiscreteCharParsimonyLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000100"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000050 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000050">
+        <rdfs:label rdf:datatype="&xsd;string">MolecularStateDatum</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000089"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000051 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000051">
+        <rdfs:label rdf:datatype="&xsd;string">PolyphyleticGroup</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000006"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000188"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000140"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <owl:disjointWith rdf:resource="&obo;CDAO_0000127"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000052 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000052">
+        <rdfs:label rdf:datatype="&xsd;string">NexusDataBlock</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000107"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000053 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000053">
+        <rdfs:label rdf:datatype="&xsd;string">BranchingNode</rdfs:label>
+        <owl:equivalentClass>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000177"/>
+                <owl:minCardinality rdf:datatype="&xsd;nonNegativeInteger">2</owl:minCardinality>
+            </owl:Restriction>
+        </owl:equivalentClass>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000026"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000055 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000055">
+        <rdfs:label rdf:datatype="&xsd;string">Compound</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000039"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000056 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000056">
+        <rdfs:label rdf:datatype="&xsd;string">CharacterStateDataMatrix</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000178"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000025"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000208"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000138"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000142"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000071"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <dc:description>A matrix of character-state data, typically containing observed data, though in some cases the states in the matrix might be simulated or hypothetical. Synonyms: character Data matrix, character-state matrix</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000057 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000057">
+        <rdfs:label rdf:datatype="&xsd;string">RibonucleotideResidueStateDatum</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000050"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000058 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000058">
+        <rdfs:label rdf:datatype="&xsd;string">TimeCalibratedLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000063"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000059 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000059">
+        <rdfs:label rdf:datatype="&xsd;string">SetOfNodes</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000118"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000198"/>
+                <owl:allValuesFrom rdf:resource="&obo;CDAO_0000140"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000060 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000060">
+        <rdfs:label rdf:datatype="&xsd;string">MRCANode</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000080"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000163"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000118"/>
+                <owl:minQualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:minQualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000061 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000061">
+        <rdfs:label rdf:datatype="&xsd;string">FASTADataMatrix</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000107"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000062 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000062">
+        <rdfs:label rdf:datatype="&xsd;string">evolutionaryTransition</rdfs:label>
+        <owl:equivalentClass rdf:resource="&obo;CDAO_0000065"/>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000097"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000159"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000091"/>
+                <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:qualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000186"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000091"/>
+                <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:qualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000169"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000071"/>
+                <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:qualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000063 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000063">
+        <rdfs:label rdf:datatype="&xsd;string">EdgeLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000064 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000064">
+        <rdfs:label rdf:datatype="&xsd;string">cladogeneticChange</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000097"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000065 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000065">
+        <rdfs:label rdf:datatype="&xsd;string">anageneticChange</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000097"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000066 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000066">
+        <rdfs:label rdf:datatype="&xsd;string">TUAnnotation</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000040"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000067 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000067">
+        <rdfs:label rdf:datatype="&xsd;string">PhyloTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000074"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000068 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000068">
+        <rdfs:label rdf:datatype="&xsd;string">ContinuousCharacter</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000071"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000207"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000019"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000153"/>
+                <owl:allValuesFrom rdf:resource="&obo;CDAO_0000019"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000069 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000069">
+        <rdfs:label rdf:datatype="&xsd;string">PHYLIPTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000074"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000070 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000070">
+        <rdfs:label rdf:datatype="&xsd;string">Subtree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000110"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000155"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000110"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000071 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000071">
+        <rdfs:label rdf:datatype="&xsd;string">Character</rdfs:label>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000153"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000098"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:comment rdf:datatype="&xsd;string">Traits shown to be relevant for phylogenetic classification</rdfs:comment>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000072 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000072">
+        <rdfs:label rdf:datatype="&xsd;string">GalledTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000006"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000073 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000073">
+        <rdfs:label rdf:datatype="&xsd;string">SpeciesTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000110"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000074 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000074">
+        <rdfs:label rdf:datatype="&xsd;string">TreeFormat</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000044"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000075 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000075">
+        <rdfs:label rdf:datatype="&xsd;string">StandardCharacter</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000111"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000076 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000076">
+        <rdfs:label rdf:datatype="&xsd;string">AminoAcidResidue</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000034"/>
+        <dc:description>This class will be declared equivalent ot the amino acid class description imported</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000077 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000077">
+        <rdfs:label rdf:datatype="&xsd;string">geneDuplication</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000064"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000078 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000078">
+        <rdfs:label rdf:datatype="&xsd;string">CompoundCharacter</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000111"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000170"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000136"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000142"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000071"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000153"/>
+                <owl:allValuesFrom rdf:resource="&obo;CDAO_0000136"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <dc:description>A character that could be divided into separate characters but is not due to the non-independence of changes that would result, e.g., as in the case of a subsequence that is either present or absent as a block.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000079 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000079">
+        <rdfs:label rdf:datatype="&xsd;string">SIMMAPTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000074"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000080 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000080">
+        <rdfs:label rdf:datatype="&xsd;string">CommonAncestralNode</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000026"/>
+        <rdfs:subClassOf>
+            <owl:Class>
+                <owl:unionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000053"/>
+                    <owl:Restriction>
+                        <owl:onProperty rdf:resource="&obo;CDAO_0000174"/>
+                        <owl:someValuesFrom rdf:resource="&obo;CDAO_0000053"/>
+                    </owl:Restriction>
+                </owl:unionOf>
+            </owl:Class>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000081 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000081">
+        <rdfs:label rdf:datatype="&xsd;string">NewickTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000074"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000082 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000082">
+        <rdfs:label rdf:datatype="&xsd;string">TimeProportionalLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000063"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000083 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000083">
+        <rdfs:label rdf:datatype="&xsd;string">DiscreteCharDistanceLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000100"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000084 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000084">
+        <rdfs:label rdf:datatype="&xsd;string">StarTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000012"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000149"/>
+                <owl:allValuesFrom rdf:resource="&obo;CDAO_0000108"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000085 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000085">
+        <rdfs:label rdf:datatype="&xsd;string">FullyResolvedUnrootedTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000018"/>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000088"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000086 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000086">
+        <rdfs:label rdf:datatype="&xsd;string">ParaphyleticGroup</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000127"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000199"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000070"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <owl:disjointWith rdf:resource="&obo;CDAO_0000129"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000087 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000087">
+        <rdfs:label rdf:datatype="&xsd;string">geneticEvent</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000041"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000088 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000088">
+        <rdfs:label rdf:datatype="&xsd;string">UnrootedTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000110"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000089 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000089">
+        <rdfs:label rdf:datatype="&xsd;string">CategoricalStateDatum</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000098"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000090 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000090">
+        <rdfs:label rdf:datatype="&xsd;string">DiscreteCharLikelihoodLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000100"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000091 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000091">
+        <rdfs:label rdf:datatype="&xsd;string">CharacterStateDomain</rdfs:label>
+        <dc:description>The universe of possible states for a particular type of character, e.g., the states of an Amino_Acid character come from the Amino_Acid domain.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000092 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000092">
+        <rdfs:label rdf:datatype="&xsd;string">CoordinateList</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000022"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000093 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000093">
+        <rdfs:label rdf:datatype="&xsd;string">GammaDistribution</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000020"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000094 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000094">
+        <rdfs:label rdf:datatype="&xsd;string">DesoxiRibonucleotideResidueCharacter</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000115"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000195"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000002"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000153"/>
+                <owl:allValuesFrom rdf:resource="&obo;CDAO_0000002"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000095 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000095">
+        <rdfs:label rdf:datatype="&xsd;string">CoordinateRange</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000022"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000096 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000096">
+        <rdfs:label rdf:datatype="&xsd;string">ReticulateEvolution</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000006"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000097 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000097">
+        <rdfs:label rdf:datatype="&xsd;string">hereditaryChange</rdfs:label>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000186"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000091"/>
+                <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:qualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000159"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000091"/>
+                <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:qualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000169"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000071"/>
+                <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:qualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000098 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000098">
+        <rdfs:label rdf:datatype="&xsd;string">CharacterStateDatum</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000205"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000071"/>
+                <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:qualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000191"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000138"/>
+                <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:qualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <dc:description>The instance of a given character for a given TU.  Its state is an object property drawn from a particular character state domain, e.g., the state of an Amino_Acid_State_Datum is an object property drawn from the domain Amino_Acid.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000099 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000099">
+        <rdfs:label rdf:datatype="&xsd;string">Edge</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000193"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000101"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000162"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000140"/>
+                <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">2</owl:qualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <dc:description>An edge connecting two nodes in a (Phylogenetic) Tree or Network, also known as a &#39;branch&#39;.  Edges may have attributes such as length, degree of support, and direction.  An edge can be a surrogate for a &#39;split&#39; or bipartition, since each edge in a tree divides the terminal nodes into two sets.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000100 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000100">
+        <rdfs:label rdf:datatype="&xsd;string">DiscreteCharacterLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000063"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000101 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000101">
+        <rdfs:label rdf:datatype="&xsd;string">EdgeAnnotation</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000040"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000102 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000102">
+        <rdfs:label rdf:datatype="&xsd;string">FullyResolvedRootedTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000012"/>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000018"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000178"/>
+                <owl:allValuesFrom>
+                    <owl:Class>
+                        <owl:unionOf rdf:parseType="Collection">
+                            <rdf:Description rdf:about="&obo;CDAO_0000033"/>
+                            <rdf:Description rdf:about="&obo;CDAO_0000099"/>
+                            <rdf:Description rdf:about="&obo;CDAO_0000108"/>
+                        </owl:unionOf>
+                    </owl:Class>
+                </owl:allValuesFrom>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000103 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000103">
+        <rdfs:label rdf:datatype="&xsd;string">GrafenLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000063"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000104 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000104">
+        <rdfs:label rdf:datatype="&xsd;string">CoordinateSystem</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+        <dc:description>A reference to an external coordinate system.  Coordinates for data must refer to some such external coordinate system.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000105 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000105">
+        <rdfs:label rdf:datatype="&xsd;string">GenBankDataMatrix</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000107"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000107 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000107">
+        <rdfs:label rdf:datatype="&xsd;string">DataMatrixFormat</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000025"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000108 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000108">
+        <rdfs:label rdf:datatype="&xsd;string">TerminalNode</rdfs:label>
+        <owl:equivalentClass>
+            <owl:Class>
+                <owl:intersectionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000140"/>
+                    <owl:Restriction>
+                        <owl:onProperty rdf:resource="&obo;CDAO_0000149"/>
+                        <owl:allValuesFrom>
+                            <owl:Class>
+                                <owl:complementOf rdf:resource="&obo;CDAO_0000140"/>
+                            </owl:Class>
+                        </owl:allValuesFrom>
+                    </owl:Restriction>
+                </owl:intersectionOf>
+            </owl:Class>
+        </owl:equivalentClass>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000140"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000109 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000109">
+        <rdfs:label rdf:datatype="&xsd;string">RibonucleotideResidueCharacter</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000115"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000153"/>
+                <owl:allValuesFrom rdf:resource="&obo;CDAO_0000057"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000110 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000110">
+        <rdfs:label rdf:datatype="&xsd;string">Tree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000006"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000111 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000111">
+        <rdfs:label rdf:datatype="&xsd;string">CategoricalCharacter</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000071"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000112 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000112">
+        <rdfs:label rdf:datatype="&xsd;string">AminoAcidResidueStateDatum</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000050"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000113 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000113">
+        <rdfs:label rdf:datatype="&xsd;string">PHYLIPDataMatrix</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000107"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000114 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000114">
+        <rdfs:label rdf:datatype="&xsd;string">ContinuousCharLikelihoodLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000009"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000115 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000115">
+        <rdfs:label rdf:datatype="&xsd;string">MolecularCharacter</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000111"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000116 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000116">
+        <rdfs:label rdf:datatype="&xsd;string">hereditaryPersistance</rdfs:label>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000190"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000097"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000117 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000117">
+        <rdfs:label rdf:datatype="&xsd;string">SetOfCharacters</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000118"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000118 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000118">
+        <rdfs:label rdf:datatype="&xsd;string">SetOfThings</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000198"/>
+                <owl:allValuesFrom>
+                    <owl:Class>
+                        <owl:unionOf rdf:parseType="Collection">
+                            <rdf:Description rdf:about="&obo;CDAO_0000071"/>
+                            <rdf:Description rdf:about="&obo;CDAO_0000117"/>
+                        </owl:unionOf>
+                    </owl:Class>
+                </owl:allValuesFrom>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <dc:description>The class is used to describe either colletions of characters or higher order grouping (e.g., groups of groups of characters). This extends the CharSet block of NEXUS.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000120 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000120">
+        <rdfs:label rdf:datatype="&xsd;string">Sequence</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000178"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000098"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000178"/>
+                <owl:allValuesFrom>
+                    <owl:Restriction>
+                        <owl:onProperty rdf:resource="&obo;CDAO_0000151"/>
+                        <owl:onClass rdf:resource="&obo;CDAO_0000022"/>
+                        <owl:minQualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:minQualifiedCardinality>
+                    </owl:Restriction>
+                </owl:allValuesFrom>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <dc:description>A set of ordered states, typically the residues in a macromolecular sequence.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000121 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000121">
+        <rdfs:label rdf:datatype="&xsd;string">speciation</rdfs:label>
+        <owl:equivalentClass rdf:resource="&obo;CDAO_0000122"/>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000064"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000122 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000122">
+        <rdfs:label rdf:datatype="&xsd;string">cladogenesis</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000064"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000124 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000124">
+        <rdfs:label rdf:datatype="&xsd;string">Bifurcation</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000026"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000177"/>
+                <owl:cardinality rdf:datatype="&xsd;nonNegativeInteger">2</owl:cardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000125 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000125">
+        <rdfs:label rdf:datatype="&xsd;string">DiscreteCharBayesianLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000100"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000126 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000126">
+        <rdfs:label rdf:datatype="&xsd;string">TaxonomicLink</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000066"/>
+        <dc:description>Link to an externally defined taxonomic hierarchy.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000127 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000127">
+        <rdfs:label rdf:datatype="&xsd;string">MonophyleticGroup</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000006"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000128 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000128">
+        <rdfs:label rdf:datatype="&xsd;string">molecularRecombination</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000132"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000129 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000129">
+        <rdfs:label rdf:datatype="&xsd;string">HolophyleticGroup</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000127"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000130 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000130">
+        <rdfs:label rdf:datatype="&xsd;string">FullyResolvedTree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000110"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000131 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000131">
+        <rdfs:label rdf:datatype="&xsd;string">AminoAcidResidueCharacter</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000115"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000168"/>
+                <owl:someValuesFrom rdf:resource="&obo;CDAO_0000112"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000153"/>
+                <owl:allValuesFrom rdf:resource="&obo;CDAO_0000112"/>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000132 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000132">
+        <rdfs:label rdf:datatype="&xsd;string">recombination</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000087"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000133 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000133">
+        <rdfs:label rdf:datatype="&xsd;string">DesoxiRibonucleotideResidue</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000034"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000134 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000134">
+        <rdfs:label rdf:datatype="&xsd;string">RootedSubtree</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000012"/>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000070"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000136 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000136">
+        <rdfs:label rdf:datatype="&xsd;string">CompoundStateDatum</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000089"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000137 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000137">
+        <rdfs:label rdf:datatype="&xsd;string">GapCost</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000007"/>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000138 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000138">
+        <rdfs:label rdf:datatype="&xsd;string">TU</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+        <dc:description>A unit of analysis that may be tied to a node in a tree and to a row in a character matrix.  It subsumes the traditional concepts of &#39;OTU&#39; and &#39;HTU&#39;.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000139 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000139">
+        <rdfs:label rdf:datatype="&xsd;string">DirectedEdge</rdfs:label>
+        <owl:equivalentClass>
+            <owl:Class>
+                <owl:intersectionOf rdf:parseType="Collection">
+                    <rdf:Description rdf:about="&obo;CDAO_0000099"/>
+                    <owl:Restriction>
+                        <owl:onProperty rdf:resource="&obo;CDAO_0000201"/>
+                        <owl:onClass rdf:resource="&obo;CDAO_0000140"/>
+                        <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:qualifiedCardinality>
+                    </owl:Restriction>
+                    <owl:Restriction>
+                        <owl:onProperty rdf:resource="&obo;CDAO_0000209"/>
+                        <owl:onClass rdf:resource="&obo;CDAO_0000140"/>
+                        <owl:qualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:qualifiedCardinality>
+                    </owl:Restriction>
+                </owl:intersectionOf>
+            </owl:Class>
+        </owl:equivalentClass>
+        <dc:description>A directed edge. Rooted trees have directed edges. The direction is specified by way of the parent and child relationships of nodes that the edge connects.</dc:description>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000140 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000140">
+        <rdfs:label rdf:datatype="&xsd;string">Node</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&owl;Thing"/>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000143"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000139"/>
+                <owl:maxQualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:maxQualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+        <rdfs:subClassOf>
+            <owl:Restriction>
+                <owl:onProperty rdf:resource="&obo;CDAO_0000194"/>
+                <owl:onClass rdf:resource="&obo;CDAO_0000006"/>
+                <owl:minQualifiedCardinality rdf:datatype="&xsd;nonNegativeInteger">1</owl:minQualifiedCardinality>
+            </owl:Restriction>
+        </rdfs:subClassOf>
+    </owl:Class>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000141 -->
+
+    <owl:Class rdf:about="&obo;CDAO_0000141">
+        <rdfs:label rdf:datatype="&xsd;string">ContinuousCharDistanceLengthType</rdfs:label>
+        <rdfs:subClassOf rdf:resource="&obo;CDAO_0000009"/>
+    </owl:Class>
+
+
+
+    <!--
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    // Individuals
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////
+     -->
+
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000220 -->
+
+    <owl:Thing rdf:about="&obo;CDAO_0000220">
+        <rdf:type rdf:resource="&obo;CDAO_0000133"/>
+        <rdf:type rdf:resource="&owl;NamedIndividual"/>
+        <rdfs:label rdf:datatype="&xsd;string">dA</rdfs:label>
+    </owl:Thing>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000221 -->
+
+    <owl:Thing rdf:about="&obo;CDAO_0000221">
+        <rdf:type rdf:resource="&obo;CDAO_0000015"/>
+        <rdf:type rdf:resource="&owl;NamedIndividual"/>
+        <rdfs:label rdf:datatype="&xsd;string">absent</rdfs:label>
+    </owl:Thing>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000222 -->
+
+    <owl:Thing rdf:about="&obo;CDAO_0000222">
+        <rdf:type rdf:resource="&obo;CDAO_0000015"/>
+        <rdf:type rdf:resource="&owl;NamedIndividual"/>
+        <rdfs:label rdf:datatype="&xsd;string">unknown</rdfs:label>
+    </owl:Thing>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000223 -->
+
+    <owl:Thing rdf:about="&obo;CDAO_0000223">
+        <rdf:type rdf:resource="&obo;CDAO_0000015"/>
+        <rdf:type rdf:resource="&owl;NamedIndividual"/>
+        <rdfs:label rdf:datatype="&xsd;string">gap</rdfs:label>
+    </owl:Thing>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000224 -->
+
+    <owl:Thing rdf:about="&obo;CDAO_0000224">
+        <rdf:type rdf:resource="&obo;CDAO_0000133"/>
+        <rdf:type rdf:resource="&owl;NamedIndividual"/>
+        <rdfs:label rdf:datatype="&xsd;string">dG</rdfs:label>
+    </owl:Thing>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000225 -->
+
+    <owl:Thing rdf:about="&obo;CDAO_0000225">
+        <rdf:type rdf:resource="&obo;CDAO_0000057"/>
+        <rdf:type rdf:resource="&owl;NamedIndividual"/>
+        <rdfs:label rdf:datatype="&xsd;string">rU</rdfs:label>
+    </owl:Thing>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000226 -->
+
+    <owl:Thing rdf:about="&obo;CDAO_0000226">
+        <rdf:type rdf:resource="&obo;CDAO_0000133"/>
+        <rdf:type rdf:resource="&owl;NamedIndividual"/>
+        <rdfs:label rdf:datatype="&xsd;string">dC</rdfs:label>
+    </owl:Thing>
+
+
+
+    <!-- http://purl.obolibrary.org/obo/CDAO_0000227 -->
+
+    <owl:Thing rdf:about="&obo;CDAO_0000227">
+        <rdf:type rdf:resource="&obo;CDAO_0000133"/>
+        <rdf:type rdf:resource="&owl;NamedIndividual"/>
+        <rdfs:label rdf:datatype="&xsd;string">dT</rdfs:label>
+    </owl:Thing>
+
+
+
+    <!--
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    // General axioms
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////
+     -->
+
+    <rdf:Description>
+        <rdf:type rdf:resource="&owl;AllDisjointClasses"/>
+        <owl:members rdf:parseType="Collection">
+            <rdf:Description rdf:about="&obo;CDAO_0000068"/>
+            <rdf:Description rdf:about="&obo;CDAO_0000094"/>
+            <rdf:Description rdf:about="&obo;CDAO_0000131"/>
+        </owl:members>
+    </rdf:Description>
+    <rdf:Description>
+        <rdf:type rdf:resource="&owl;AllDisjointClasses"/>
+        <owl:members rdf:parseType="Collection">
+            <rdf:Description rdf:about="&obo;CDAO_0000002"/>
+            <rdf:Description rdf:about="&obo;CDAO_0000019"/>
+            <rdf:Description rdf:about="&obo;CDAO_0000112"/>
+        </owl:members>
+    </rdf:Description>
+</rdf:RDF>
+
+
+
+<!-- Generated by the OWL API (version 3.2.3.1824) http://owlapi.sourceforge.net -->
+
+"""
+
+cdao_elements = {}
+
+root = ET.fromstring(cdao_owl)
+for node_type in "ObjectProperty", "Class", "DatatypeProperty":
+    for element in root.findall("{http://www.w3.org/2002/07/owl#}%s" % node_type):
+        obo = element.attrib[
+            "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about"
+        ].split("/")[-1]
+        cdao = element.find("{http://www.w3.org/2000/01/rdf-schema#}label").text
+        cdao_elements[cdao] = obo
diff --git a/code/lib/Bio/Phylo/_io.py b/code/lib/Bio/Phylo/_io.py
new file mode 100644
index 0000000..4e61a52
--- /dev/null
+++ b/code/lib/Bio/Phylo/_io.py
@@ -0,0 +1,86 @@
+# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""I/O function wrappers for phylogenetic tree formats.
+
+This API follows the same semantics as Biopython's ``SeqIO`` and
+``AlignIO``.
+"""
+
+
+from Bio import File
+from Bio.Phylo import BaseTree, NewickIO, NexusIO, PhyloXMLIO, NeXMLIO
+
+supported_formats = {
+    "newick": NewickIO,
+    "nexus": NexusIO,
+    "phyloxml": PhyloXMLIO,
+    "nexml": NeXMLIO,
+}
+
+try:
+    from Bio.Phylo import CDAOIO
+
+    supported_formats["cdao"] = CDAOIO
+except ImportError:
+    pass
+
+
+def parse(file, format, **kwargs):
+    """Parse a file iteratively, and yield each of the trees it contains.
+
+    If a file only contains one tree, this still returns an iterable object that
+    contains one element.
+
+    Examples
+    --------
+    >>> import Bio.Phylo
+    >>> trees = Bio.Phylo.parse('PhyloXML/apaf.xml', 'phyloxml')
+    >>> for tree in trees:
+    ...     print(tree.rooted)
+    True
+
+    """
+    with File.as_handle(file) as fp:
+        yield from getattr(supported_formats[format], "parse")(fp, **kwargs)
+
+
+def read(file, format, **kwargs):
+    """Parse a file in the given format and return a single tree.
+
+    Raises a ``ValueError`` if there are zero or multiple trees -- if this
+    occurs, use ``parse`` instead to get the complete sequence of trees.
+    """
+    try:
+        tree_gen = parse(file, format, **kwargs)
+        tree = next(tree_gen)
+    except StopIteration:
+        raise ValueError("There are no trees in this file.") from None
+    try:
+        next(tree_gen)
+    except StopIteration:
+        return tree
+    else:
+        raise ValueError("There are multiple trees in this file; use parse() instead.")
+
+
+def write(trees, file, format, **kwargs):
+    """Write a sequence of trees to file in the given format."""
+    if isinstance(trees, (BaseTree.Tree, BaseTree.Clade)):
+        # Passed a single tree instead of an iterable -- that's OK
+        trees = [trees]
+    with File.as_handle(file, "w+") as fp:
+        n = getattr(supported_formats[format], "write")(trees, fp, **kwargs)
+    return n
+
+
+def convert(in_file, in_format, out_file, out_format, parse_args=None, **kwargs):
+    """Convert between two tree file formats."""
+    if parse_args is None:
+        parse_args = {}
+    trees = parse(in_file, in_format, **parse_args)
+    return write(trees, out_file, out_format, **kwargs)
diff --git a/code/lib/Bio/Phylo/_utils.py b/code/lib/Bio/Phylo/_utils.py
new file mode 100644
index 0000000..240b28d
--- /dev/null
+++ b/code/lib/Bio/Phylo/_utils.py
@@ -0,0 +1,504 @@
+# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Utilities for handling, displaying and exporting Phylo trees.
+
+Third-party libraries are loaded when the corresponding function is called.
+"""
+
+import math
+import sys
+
+from Bio import MissingPythonDependencyError
+
+
+def to_networkx(tree):
+    """Convert a Tree object to a networkx graph.
+
+    The result is useful for graph-oriented analysis, and also interactive
+    plotting with pylab, matplotlib or pygraphviz, though the resulting diagram
+    is usually not ideal for displaying a phylogeny.
+
+    Requires NetworkX version 0.99 or later.
+    """
+    try:
+        import networkx
+    except ImportError:
+        raise MissingPythonDependencyError(
+            "Install NetworkX if you want to use to_networkx."
+        ) from None
+
+    # NB (1/2010): the networkx API stabilized at v.1.0
+    # 1.0+: edges accept arbitrary data as kwargs, weights are floats
+    # 0.99: edges accept weight as a string, nothing else
+    # pre-0.99: edges accept no additional data
+    # Ubuntu Lucid LTS uses v0.99, let's support everything
+    if networkx.__version__ >= "1.0":
+
+        def add_edge(graph, n1, n2):
+            graph.add_edge(n1, n2, weight=n2.branch_length or 1.0)
+            # Copy branch color value as hex, if available
+            if hasattr(n2, "color") and n2.color is not None:
+                graph[n1][n2]["color"] = n2.color.to_hex()
+            elif hasattr(n1, "color") and n1.color is not None:
+                # Cascading color attributes
+                graph[n1][n2]["color"] = n1.color.to_hex()
+                n2.color = n1.color
+            # Copy branch weight value (float) if available
+            if hasattr(n2, "width") and n2.width is not None:
+                graph[n1][n2]["width"] = n2.width
+            elif hasattr(n1, "width") and n1.width is not None:
+                # Cascading width attributes
+                graph[n1][n2]["width"] = n1.width
+                n2.width = n1.width
+
+    elif networkx.__version__ >= "0.99":
+
+        def add_edge(graph, n1, n2):
+            graph.add_edge(n1, n2, (n2.branch_length or 1.0))
+
+    else:
+
+        def add_edge(graph, n1, n2):
+            graph.add_edge(n1, n2)
+
+    def build_subgraph(graph, top):
+        """Walk down the Tree, building graphs, edges and nodes."""
+        for clade in top:
+            graph.add_node(clade.root)
+            add_edge(graph, top.root, clade.root)
+            build_subgraph(graph, clade)
+
+    if tree.rooted:
+        G = networkx.DiGraph()
+    else:
+        G = networkx.Graph()
+    G.add_node(tree.root)
+    build_subgraph(G, tree.root)
+    return G
+
+
+def draw_ascii(tree, file=None, column_width=80):
+    """Draw an ascii-art phylogram of the given tree.
+
+    The printed result looks like::
+
+                                        _________ Orange
+                         ______________|
+                        |              |______________ Tangerine
+          ______________|
+         |              |          _________________________ Grapefruit
+        _|              |_________|
+         |                        |______________ Pummelo
+         |
+         |__________________________________ Apple
+
+
+    :Parameters:
+        file : file-like object
+            File handle opened for writing the output drawing. (Default:
+            standard output)
+        column_width : int
+            Total number of text columns used by the drawing.
+
+    """
+    if file is None:
+        file = sys.stdout
+
+    taxa = tree.get_terminals()
+    # Some constants for the drawing calculations
+    max_label_width = max(len(str(taxon)) for taxon in taxa)
+    drawing_width = column_width - max_label_width - 1
+    drawing_height = 2 * len(taxa) - 1
+
+    def get_col_positions(tree):
+        """Create a mapping of each clade to its column position."""
+        depths = tree.depths()
+        # If there are no branch lengths, assume unit branch lengths
+        if max(depths.values()) == 0:
+            depths = tree.depths(unit_branch_lengths=True)
+        # Potential drawing overflow due to rounding -- 1 char per tree layer
+        fudge_margin = int(math.ceil(math.log(len(taxa), 2)))
+        cols_per_branch_unit = (drawing_width - fudge_margin) / float(
+            max(depths.values())
+        )
+        return {
+            clade: int(blen * cols_per_branch_unit + 1.0)
+            for clade, blen in depths.items()
+        }
+
+    def get_row_positions(tree):
+        positions = {taxon: 2 * idx for idx, taxon in enumerate(taxa)}
+
+        def calc_row(clade):
+            for subclade in clade:
+                if subclade not in positions:
+                    calc_row(subclade)
+            positions[clade] = (
+                positions[clade.clades[0]] + positions[clade.clades[-1]]
+            ) // 2
+
+        calc_row(tree.root)
+        return positions
+
+    col_positions = get_col_positions(tree)
+    row_positions = get_row_positions(tree)
+    char_matrix = [[" " for x in range(drawing_width)] for y in range(drawing_height)]
+
+    def draw_clade(clade, startcol):
+        thiscol = col_positions[clade]
+        thisrow = row_positions[clade]
+        # Draw a horizontal line
+        for col in range(startcol, thiscol):
+            char_matrix[thisrow][col] = "_"
+        if clade.clades:
+            # Draw a vertical line
+            toprow = row_positions[clade.clades[0]]
+            botrow = row_positions[clade.clades[-1]]
+            for row in range(toprow + 1, botrow + 1):
+                char_matrix[row][thiscol] = "|"
+            # NB: Short terminal branches need something to stop rstrip()
+            if (col_positions[clade.clades[0]] - thiscol) < 2:
+                char_matrix[toprow][thiscol] = ","
+            # Draw descendents
+            for child in clade:
+                draw_clade(child, thiscol + 1)
+
+    draw_clade(tree.root, 0)
+    # Print the complete drawing
+    for idx, row in enumerate(char_matrix):
+        line = "".join(row).rstrip()
+        # Add labels for terminal taxa in the right margin
+        if idx % 2 == 0:
+            line += " " + str(taxa[idx // 2])
+        file.write(line + "\n")
+    file.write("\n")
+
+
+def draw(
+    tree,
+    label_func=str,
+    do_show=True,
+    show_confidence=True,
+    # For power users
+    axes=None,
+    branch_labels=None,
+    label_colors=None,
+    *args,
+    **kwargs
+):
+    """Plot the given tree using matplotlib (or pylab).
+
+    The graphic is a rooted tree, drawn with roughly the same algorithm as
+    draw_ascii.
+
+    Additional keyword arguments passed into this function are used as pyplot
+    options. The input format should be in the form of:
+    pyplot_option_name=(tuple), pyplot_option_name=(tuple, dict), or
+    pyplot_option_name=(dict).
+
+    Example using the pyplot options 'axhspan' and 'axvline'::
+
+        from Bio import Phylo, AlignIO
+        from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
+        constructor = DistanceTreeConstructor()
+        aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
+        calculator = DistanceCalculator('identity')
+        dm = calculator.get_distance(aln)
+        tree = constructor.upgma(dm)
+        Phylo.draw(tree, axhspan=((0.25, 7.75), {'facecolor':'0.5'}),
+        ... axvline={'x':0, 'ymin':0, 'ymax':1})
+
+    Visual aspects of the plot can also be modified using pyplot's own functions
+    and objects (via pylab or matplotlib). In particular, the pyplot.rcParams
+    object can be used to scale the font size (rcParams["font.size"]) and line
+    width (rcParams["lines.linewidth"]).
+
+    :Parameters:
+        label_func : callable
+            A function to extract a label from a node. By default this is str(),
+            but you can use a different function to select another string
+            associated with each node. If this function returns None for a node,
+            no label will be shown for that node.
+        do_show : bool
+            Whether to show() the plot automatically.
+        show_confidence : bool
+            Whether to display confidence values, if present on the tree.
+        axes : matplotlib/pylab axes
+            If a valid matplotlib.axes.Axes instance, the phylogram is plotted
+            in that Axes. By default (None), a new figure is created.
+        branch_labels : dict or callable
+            A mapping of each clade to the label that will be shown along the
+            branch leading to it. By default this is the confidence value(s) of
+            the clade, taken from the ``confidence`` attribute, and can be
+            easily toggled off with this function's ``show_confidence`` option.
+            But if you would like to alter the formatting of confidence values,
+            or label the branches with something other than confidence, then use
+            this option.
+        label_colors : dict or callable
+            A function or a dictionary specifying the color of the tip label.
+            If the tip label can't be found in the dict or label_colors is
+            None, the label will be shown in black.
+
+    """
+    try:
+        import matplotlib.pyplot as plt
+    except ImportError:
+        try:
+            import pylab as plt
+        except ImportError:
+            raise MissingPythonDependencyError(
+                "Install matplotlib or pylab if you want to use draw."
+            ) from None
+
+    import matplotlib.collections as mpcollections
+
+    # Arrays that store lines for the plot of clades
+    horizontal_linecollections = []
+    vertical_linecollections = []
+
+    # Options for displaying branch labels / confidence
+    def conf2str(conf):
+        if int(conf) == conf:
+            return str(int(conf))
+        return str(conf)
+
+    if not branch_labels:
+        if show_confidence:
+
+            def format_branch_label(clade):
+                try:
+                    confidences = clade.confidences
+                    # phyloXML supports multiple confidences
+                except AttributeError:
+                    pass
+                else:
+                    return "/".join(conf2str(cnf.value) for cnf in confidences)
+                if clade.confidence is not None:
+                    return conf2str(clade.confidence)
+                return None
+
+        else:
+
+            def format_branch_label(clade):
+                return None
+
+    elif isinstance(branch_labels, dict):
+
+        def format_branch_label(clade):
+            return branch_labels.get(clade)
+
+    else:
+        if not callable(branch_labels):
+            raise TypeError(
+                "branch_labels must be either a dict or a callable (function)"
+            )
+        format_branch_label = branch_labels
+
+    # options for displaying label colors.
+    if label_colors:
+        if callable(label_colors):
+
+            def get_label_color(label):
+                return label_colors(label)
+
+        else:
+            # label_colors is presumed to be a dict
+            def get_label_color(label):
+                return label_colors.get(label, "black")
+
+    else:
+
+        def get_label_color(label):
+            # if label_colors is not specified, use black
+            return "black"
+
+    # Layout
+
+    def get_x_positions(tree):
+        """Create a mapping of each clade to its horizontal position.
+
+        Dict of {clade: x-coord}
+        """
+        depths = tree.depths()
+        # If there are no branch lengths, assume unit branch lengths
+        if not max(depths.values()):
+            depths = tree.depths(unit_branch_lengths=True)
+        return depths
+
+    def get_y_positions(tree):
+        """Create a mapping of each clade to its vertical position.
+
+        Dict of {clade: y-coord}.
+        Coordinates are negative, and integers for tips.
+        """
+        maxheight = tree.count_terminals()
+        # Rows are defined by the tips
+        heights = {
+            tip: maxheight - i for i, tip in enumerate(reversed(tree.get_terminals()))
+        }
+
+        # Internal nodes: place at midpoint of children
+        def calc_row(clade):
+            for subclade in clade:
+                if subclade not in heights:
+                    calc_row(subclade)
+            # Closure over heights
+            heights[clade] = (
+                heights[clade.clades[0]] + heights[clade.clades[-1]]
+            ) / 2.0
+
+        if tree.root.clades:
+            calc_row(tree.root)
+        return heights
+
+    x_posns = get_x_positions(tree)
+    y_posns = get_y_positions(tree)
+    # The function draw_clade closes over the axes object
+    if axes is None:
+        fig = plt.figure()
+        axes = fig.add_subplot(1, 1, 1)
+    elif not isinstance(axes, plt.matplotlib.axes.Axes):
+        raise ValueError("Invalid argument for axes: %s" % axes)
+
+    def draw_clade_lines(
+        use_linecollection=False,
+        orientation="horizontal",
+        y_here=0,
+        x_start=0,
+        x_here=0,
+        y_bot=0,
+        y_top=0,
+        color="black",
+        lw=".1",
+    ):
+        """Create a line with or without a line collection object.
+
+        Graphical formatting of the lines representing clades in the plot can be
+        customized by altering this function.
+        """
+        if not use_linecollection and orientation == "horizontal":
+            axes.hlines(y_here, x_start, x_here, color=color, lw=lw)
+        elif use_linecollection and orientation == "horizontal":
+            horizontal_linecollections.append(
+                mpcollections.LineCollection(
+                    [[(x_start, y_here), (x_here, y_here)]], color=color, lw=lw
+                )
+            )
+        elif not use_linecollection and orientation == "vertical":
+            axes.vlines(x_here, y_bot, y_top, color=color)
+        elif use_linecollection and orientation == "vertical":
+            vertical_linecollections.append(
+                mpcollections.LineCollection(
+                    [[(x_here, y_bot), (x_here, y_top)]], color=color, lw=lw
+                )
+            )
+
+    def draw_clade(clade, x_start, color, lw):
+        """Recursively draw a tree, down from the given clade."""
+        x_here = x_posns[clade]
+        y_here = y_posns[clade]
+        # phyloXML-only graphics annotations
+        if hasattr(clade, "color") and clade.color is not None:
+            color = clade.color.to_hex()
+        if hasattr(clade, "width") and clade.width is not None:
+            lw = clade.width * plt.rcParams["lines.linewidth"]
+        # Draw a horizontal line from start to here
+        draw_clade_lines(
+            use_linecollection=True,
+            orientation="horizontal",
+            y_here=y_here,
+            x_start=x_start,
+            x_here=x_here,
+            color=color,
+            lw=lw,
+        )
+        # Add node/taxon labels
+        label = label_func(clade)
+        if label not in (None, clade.__class__.__name__):
+            axes.text(
+                x_here,
+                y_here,
+                " %s" % label,
+                verticalalignment="center",
+                color=get_label_color(label),
+            )
+        # Add label above the branch (optional)
+        conf_label = format_branch_label(clade)
+        if conf_label:
+            axes.text(
+                0.5 * (x_start + x_here),
+                y_here,
+                conf_label,
+                fontsize="small",
+                horizontalalignment="center",
+            )
+        if clade.clades:
+            # Draw a vertical line connecting all children
+            y_top = y_posns[clade.clades[0]]
+            y_bot = y_posns[clade.clades[-1]]
+            # Only apply widths to horizontal lines, like Archaeopteryx
+            draw_clade_lines(
+                use_linecollection=True,
+                orientation="vertical",
+                x_here=x_here,
+                y_bot=y_bot,
+                y_top=y_top,
+                color=color,
+                lw=lw,
+            )
+            # Draw descendents
+            for child in clade:
+                draw_clade(child, x_here, color, lw)
+
+    draw_clade(tree.root, 0, "k", plt.rcParams["lines.linewidth"])
+
+    # If line collections were used to create clade lines, here they are added
+    # to the pyplot plot.
+    for i in horizontal_linecollections:
+        axes.add_collection(i)
+    for i in vertical_linecollections:
+        axes.add_collection(i)
+
+    # Aesthetics
+
+    try:
+        name = tree.name
+    except AttributeError:
+        pass
+    else:
+        if name:
+            axes.set_title(name)
+    axes.set_xlabel("branch length")
+    axes.set_ylabel("taxa")
+    # Add margins around the tree to prevent overlapping the axes
+    xmax = max(x_posns.values())
+    axes.set_xlim(-0.05 * xmax, 1.25 * xmax)
+    # Also invert the y-axis (origin at the top)
+    # Add a small vertical margin, but avoid including 0 and N+1 on the y axis
+    axes.set_ylim(max(y_posns.values()) + 0.8, 0.2)
+
+    # Parse and process key word arguments as pyplot options
+    for key, value in kwargs.items():
+        try:
+            # Check that the pyplot option input is iterable, as required
+            list(value)
+        except TypeError:
+            raise ValueError(
+                'Keyword argument "%s=%s" is not in the format '
+                "pyplot_option_name=(tuple), pyplot_option_name=(tuple, dict),"
+                " or pyplot_option_name=(dict) " % (key, value)
+            ) from None
+        if isinstance(value, dict):
+            getattr(plt, str(key))(**dict(value))
+        elif not (isinstance(value[0], tuple)):
+            getattr(plt, str(key))(*value)
+        elif isinstance(value[0], tuple):
+            getattr(plt, str(key))(*value[0], **dict(value[1]))
+
+    if do_show:
+        plt.show()
diff --git a/code/lib/Bio/PopGen/GenePop/Controller.py b/code/lib/Bio/PopGen/GenePop/Controller.py
new file mode 100644
index 0000000..0ea7434
--- /dev/null
+++ b/code/lib/Bio/PopGen/GenePop/Controller.py
@@ -0,0 +1,945 @@
+# Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Module to control GenePop."""
+
+import os
+import re
+import shutil
+import tempfile
+
+from Bio.Application import AbstractCommandline, _Argument
+
+
+def _gp_float(tok):
+    """Get a float from a token, if it fails, returns the string (PRIVATE)."""
+    try:
+        return float(tok)
+    except ValueError:
+        return str(tok)
+
+
+def _gp_int(tok):
+    """Get a int from a token, if it fails, returns the string (PRIVATE)."""
+    try:
+        return int(tok)
+    except ValueError:
+        return str(tok)
+
+
+def _read_allele_freq_table(f):
+    line = f.readline()
+    while " --" not in line:
+        if line == "":
+            raise StopIteration
+        if "No data" in line:
+            return None, None
+        line = f.readline()
+    alleles = [x for x in f.readline().rstrip().split(" ") if x != ""]
+    alleles = [_gp_int(x) for x in alleles]
+    line = f.readline().rstrip()
+    table = []
+    while line != "":
+        parts = [x for x in line.split(" ") if x != ""]
+        try:
+            table.append(
+                (parts[0], [_gp_float(x) for x in parts[1:-1]], _gp_int(parts[-1]))
+            )
+        except ValueError:
+            table.append((parts[0], [None] * len(alleles), 0))
+        line = f.readline().rstrip()
+    return alleles, table
+
+
+def _read_table(f, funs):
+    table = []
+    line = f.readline().rstrip()
+    while "---" not in line:
+        line = f.readline().rstrip()
+    line = f.readline().rstrip()
+    while "===" not in line and "---" not in line and line != "":
+        toks = [x for x in line.split(" ") if x != ""]
+        parts = []
+        for i, tok in enumerate(toks):
+            try:
+                parts.append(funs[i](tok))
+            except ValueError:
+                parts.append(tok)  # Could not cast
+        table.append(tuple(parts))
+        line = f.readline().rstrip()
+    return table
+
+
+def _read_triangle_matrix(f):
+    matrix = []
+    line = f.readline().rstrip()
+    while line != "":
+        matrix.append([_gp_float(x) for x in [y for y in line.split(" ") if y != ""]])
+        line = f.readline().rstrip()
+    return matrix
+
+
+def _read_headed_triangle_matrix(f):
+    matrix = {}
+    header = f.readline().rstrip()
+    if "---" in header or "===" in header:
+        header = f.readline().rstrip()
+    nlines = len([x for x in header.split(" ") if x != ""]) - 1
+    for line_pop in range(nlines):
+        line = f.readline().rstrip()
+        vals = [x for x in line.split(" ")[1:] if x != ""]
+        clean_vals = []
+        for val in vals:
+            try:
+                clean_vals.append(_gp_float(val))
+            except ValueError:
+                clean_vals.append(None)
+        for col_pop, clean_val in enumerate(clean_vals):
+            matrix[(line_pop + 1, col_pop)] = clean_val
+    return matrix
+
+
+def _hw_func(stream, is_locus, has_fisher=False):
+    line = stream.readline()
+    if is_locus:
+        hook = "Locus "
+    else:
+        hook = "Pop : "
+    while line != "":
+        if line.lstrip().startswith(hook):
+            stream.readline()
+            stream.readline()
+            stream.readline()
+            table = _read_table(
+                stream, [str, _gp_float, _gp_float, _gp_float, _gp_float, _gp_int, str]
+            )
+            # loci might mean pop if hook="Locus "
+            loci = {}
+            for entry in table:
+                if len(entry) < 4:
+                    loci[entry[0]] = None
+                else:
+                    locus, p, se, fis_wc, fis_rh, steps = entry[:-1]
+                    if se == "-":
+                        se = None
+                    loci[locus] = p, se, fis_wc, fis_rh, steps
+            return loci
+        line = stream.readline()
+    # self.done = True
+    raise StopIteration
+
+
+class _FileIterator:
+    """Return an iterator which crawls over a stream of lines with a function (PRIVATE).
+
+    The generator function is expected to yield a tuple, while
+    consuming input
+    """
+
+    def __init__(self, func, fname, handle=None):
+        self.func = func
+        if handle is None:
+            self.stream = open(fname)
+        else:
+            # For special cases where calling code wants to
+            # seek into the file before starting:
+            self.stream = handle
+        self.fname = fname
+        self.done = False
+
+    def __iter__(self):
+        if self.done:
+            self.done = True
+            raise StopIteration
+        return self
+
+    def __next__(self):
+        return self.func(self)
+
+    def __del__(self):
+        self.stream.close()
+        os.remove(self.fname)
+
+
+class _GenePopCommandline(AbstractCommandline):
+    """Return a Command Line Wrapper for GenePop (PRIVATE)."""
+
+    def __init__(self, genepop_dir=None, cmd="Genepop", **kwargs):
+        self.parameters = [
+            _Argument(["command"], "GenePop option to be called", is_required=True),
+            _Argument(["mode"], "Should allways be batch", is_required=True),
+            _Argument(["input"], "Input file", is_required=True),
+            _Argument(["Dememorization"], "Dememorization step"),
+            _Argument(["BatchNumber"], "Number of MCMC batches"),
+            _Argument(["BatchLength"], "Length of MCMC chains"),
+            _Argument(["HWtests"], "Enumeration or MCMC"),
+            _Argument(["IsolBDstatistic"], "IBD statistic (a or e)"),
+            _Argument(["MinimalDistance"], "Minimal IBD distance"),
+            _Argument(["GeographicScale"], "Log or Linear"),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+        self.set_parameter("mode", "Mode=Batch")
+
+    def set_menu(self, option_list):
+        """Set the menu option.
+
+        Example set_menu([6,1]) = get all F statistics (menu 6.1)
+        """
+        self.set_parameter(
+            "command", "MenuOptions=" + ".".join(str(x) for x in option_list)
+        )
+
+    def set_input(self, fname):
+        """Set the input file name."""
+        self.set_parameter("input", "InputFile=" + fname)
+
+
+class GenePopController:
+    """Define a class to interface with the GenePop program."""
+
+    def __init__(self, genepop_dir=None):
+        """Initialize the controller.
+
+        genepop_dir is the directory where GenePop is.
+
+        The binary should be called Genepop (capital G)
+        """
+        self.controller = _GenePopCommandline(genepop_dir)
+
+    def _get_opts(self, dememorization, batches, iterations, enum_test=None):
+        opts = {}
+        opts["Dememorization"] = dememorization
+        opts["BatchNumber"] = batches
+        opts["BatchLength"] = iterations
+        if enum_test is not None:
+            if enum_test is True:
+                opts["HWtests"] = "Enumeration"
+            else:
+                opts["HWtests"] = "MCMC"
+        return opts
+
+    def _run_genepop(self, extensions, option, fname, opts=None):
+        if opts is None:
+            opts = {}
+        cwd = os.getcwd()
+        temp_dir = tempfile.mkdtemp()
+        os.chdir(temp_dir)
+        self.controller.set_menu(option)
+        if os.path.isabs(fname):
+            self.controller.set_input(fname)
+        else:
+            self.controller.set_input(cwd + os.sep + fname)
+        for opt in opts:
+            self.controller.set_parameter(opt, opt + "=" + str(opts[opt]))
+        self.controller()  # checks error level is zero
+        os.chdir(cwd)
+        shutil.rmtree(temp_dir)
+
+    def _test_pop_hz_both(
+        self,
+        fname,
+        type,
+        ext,
+        enum_test=True,
+        dememorization=10000,
+        batches=20,
+        iterations=5000,
+    ):
+        """Use Hardy-Weinberg test for heterozygote deficiency/excess (PRIVATE).
+
+        Returns a population iterator containing a dictionary where
+        dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps).
+
+        Some loci have a None if the info is not available.
+        SE might be none (for enumerations).
+        """
+        opts = self._get_opts(dememorization, batches, iterations, enum_test)
+        self._run_genepop([ext], [1, type], fname, opts)
+
+        def hw_func(self):
+            return _hw_func(self.stream, False)
+
+        return _FileIterator(hw_func, fname + ext)
+
+    def _test_global_hz_both(
+        self,
+        fname,
+        type,
+        ext,
+        enum_test=True,
+        dememorization=10000,
+        batches=20,
+        iterations=5000,
+    ):
+        """Use Global Hardy-Weinberg test for heterozygote deficiency/excess (PRIVATE).
+
+        Returns a triple with:
+         - A list per population containing (pop_name, P-val, SE, switches).
+           Some pops have a None if the info is not available.
+           SE might be none (for enumerations).
+         - A list per loci containing (locus_name, P-val, SE, switches).
+           Some loci have a None if the info is not available.
+           SE might be none (for enumerations).
+         - Overall results (P-val, SE, switches).
+
+        """
+        opts = self._get_opts(dememorization, batches, iterations, enum_test)
+        self._run_genepop([ext], [1, type], fname, opts)
+
+        def hw_pop_func(self):
+            return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
+
+        with open(fname + ext) as f1:
+            line = f1.readline()
+            while "by population" not in line:
+                line = f1.readline()
+            pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float])
+        with open(fname + ext) as f2:
+            line = f2.readline()
+            while "by locus" not in line:
+                line = f2.readline()
+            loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float])
+        with open(fname + ext) as f:
+            line = f.readline()
+            while "all locus" not in line:
+                line = f.readline()
+            f.readline()
+            f.readline()
+            f.readline()
+            f.readline()
+            line = f.readline().rstrip()
+            p, se, switches = tuple(
+                _gp_float(x) for x in [y for y in line.split(" ") if y != ""]
+            )
+        return pop_p, loc_p, (p, se, switches)
+
+    # 1.1
+    def test_pop_hz_deficiency(
+        self, fname, enum_test=True, dememorization=10000, batches=20, iterations=5000
+    ):
+        """Use Hardy-Weinberg test for heterozygote deficiency.
+
+        Returns a population iterator containing a dictionary wehre
+        dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps).
+
+        Some loci have a None if the info is not available.
+        SE might be none (for enumerations).
+        """
+        return self._test_pop_hz_both(
+            fname, 1, ".D", enum_test, dememorization, batches, iterations
+        )
+
+    # 1.2
+    def test_pop_hz_excess(
+        self, fname, enum_test=True, dememorization=10000, batches=20, iterations=5000
+    ):
+        """Use Hardy-Weinberg test for heterozygote deficiency.
+
+        Returns a population iterator containing a dictionary where
+        dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps).
+
+        Some loci have a None if the info is not available.
+        SE might be none (for enumerations).
+        """
+        return self._test_pop_hz_both(
+            fname, 2, ".E", enum_test, dememorization, batches, iterations
+        )
+
+    # 1.3 P file
+    def test_pop_hz_prob(
+        self,
+        fname,
+        ext,
+        enum_test=False,
+        dememorization=10000,
+        batches=20,
+        iterations=5000,
+    ):
+        """Use Hardy-Weinberg test based on probability.
+
+        Returns 2 iterators and a final tuple:
+
+         1. Returns a loci iterator containing:
+             - A dictionary[pop_pos]=(P-val, SE, Fis-WC, Fis-RH, steps).
+               Some pops have a None if the info is not available.
+               SE might be none (for enumerations).
+             - Result of Fisher's test (Chi2, deg freedom, prob).
+         2. Returns a population iterator containing:
+             - A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps).
+               Some loci have a None if the info is not available.
+               SE might be none (for enumerations).
+             - Result of Fisher's test (Chi2, deg freedom, prob).
+         3. Final tuple (Chi2, deg freedom, prob).
+
+        """
+        opts = self._get_opts(dememorization, batches, iterations, enum_test)
+        self._run_genepop([ext], [1, 3], fname, opts)
+
+        def hw_prob_loci_func(self):
+            return _hw_func(self.stream, True, True)
+
+        def hw_prob_pop_func(self):
+            return _hw_func(self.stream, False, True)
+
+        shutil.copyfile(fname + ".P", fname + ".P2")
+
+        return (
+            _FileIterator(hw_prob_loci_func, fname + ".P"),
+            _FileIterator(hw_prob_pop_func, fname + ".P2"),
+        )
+
+    # 1.4
+    def test_global_hz_deficiency(
+        self, fname, enum_test=True, dememorization=10000, batches=20, iterations=5000
+    ):
+        """Use Global Hardy-Weinberg test for heterozygote deficiency.
+
+        Returns a triple with:
+         - An list per population containing (pop_name, P-val, SE, switches).
+           Some pops have a None if the info is not available.
+           SE might be none (for enumerations).
+         - An list per loci containing (locus_name, P-val, SE, switches).
+           Some loci have a None if the info is not available.
+           SE might be none (for enumerations).
+         - Overall results (P-val, SE, switches).
+
+        """
+        return self._test_global_hz_both(
+            fname, 4, ".DG", enum_test, dememorization, batches, iterations
+        )
+
+    # 1.5
+    def test_global_hz_excess(
+        self, fname, enum_test=True, dememorization=10000, batches=20, iterations=5000
+    ):
+        """Use Global Hardy-Weinberg test for heterozygote excess.
+
+        Returns a triple with:
+         - A list per population containing (pop_name, P-val, SE, switches).
+           Some pops have a None if the info is not available.
+           SE might be none (for enumerations).
+         - A list per loci containing (locus_name, P-val, SE, switches).
+           Some loci have a None if the info is not available.
+           SE might be none (for enumerations).
+         - Overall results (P-val, SE, switches)
+
+        """
+        return self._test_global_hz_both(
+            fname, 5, ".EG", enum_test, dememorization, batches, iterations
+        )
+
+    # 2.1
+    def test_ld(self, fname, dememorization=10000, batches=20, iterations=5000):
+        """Test for linkage disequilibrium on each pair of loci in each population."""
+        opts = self._get_opts(dememorization, batches, iterations)
+        self._run_genepop([".DIS"], [2, 1], fname, opts)
+
+        def ld_pop_func(self):
+            current_pop = None
+            line = self.stream.readline().rstrip()
+            if line == "":
+                self.done = True
+                raise StopIteration
+            toks = [x for x in line.split(" ") if x != ""]
+            pop, locus1, locus2 = toks[0], toks[1], toks[2]
+            if not hasattr(self, "start_locus1"):
+                start_locus1, start_locus2 = locus1, locus2
+                current_pop = -1
+            if locus1 == start_locus1 and locus2 == start_locus2:
+                current_pop += 1
+            if toks[3] == "No":
+                return current_pop, pop, (locus1, locus2), None
+            p, se, switches = _gp_float(toks[3]), _gp_float(toks[4]), _gp_int(toks[5])
+            return current_pop, pop, (locus1, locus2), (p, se, switches)
+
+        def ld_func(self):
+            line = self.stream.readline().rstrip()
+            if line == "":
+                self.done = True
+                raise StopIteration
+            toks = [x for x in line.split(" ") if x != ""]
+            locus1, locus2 = toks[0], toks[2]
+            try:
+                chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5])
+            except ValueError:
+                return (locus1, locus2), None
+            return (locus1, locus2), (chi2, df, p)
+
+        f1 = open(fname + ".DIS")
+        line = f1.readline()
+        while "----" not in line:
+            line = f1.readline()
+        shutil.copyfile(fname + ".DIS", fname + ".DI2")
+        f2 = open(fname + ".DI2")
+        line = f2.readline()
+        while "Locus pair" not in line:
+            line = f2.readline()
+        while "----" not in line:
+            line = f2.readline()
+        return (
+            _FileIterator(ld_pop_func, fname + ".DIS", f1),
+            _FileIterator(ld_func, fname + ".DI2", f2),
+        )
+
+    # 2.2
+    def create_contingency_tables(self, fname):
+        """Provision for creating Genotypic contingency tables."""
+        raise NotImplementedError
+
+    # 3.1 PR/GE files
+    def test_genic_diff_all(
+        self, fname, dememorization=10000, batches=20, iterations=5000
+    ):
+        """Provision for Genic differentiation for all populations."""
+        raise NotImplementedError
+
+    # 3.2 PR2/GE2 files
+    def test_genic_diff_pair(
+        self, fname, dememorization=10000, batches=20, iterations=5000
+    ):
+        """Provision for Genic differentiation for all population pairs."""
+        raise NotImplementedError
+
+    # 3.3 G files
+    def test_genotypic_diff_all(
+        self, fname, dememorization=10000, batches=20, iterations=5000
+    ):
+        """Provision for Genotypic differentiation for all populations."""
+        raise NotImplementedError
+
+    # 3.4 2G2 files
+    def test_genotypic_diff_pair(
+        self, fname, dememorization=10000, batches=20, iterations=5000
+    ):
+        """Provision for Genotypic differentiation for all population pairs."""
+        raise NotImplementedError
+
+    # 4
+    def estimate_nm(self, fname):
+        """Estimate the Number of Migrants.
+
+        Parameters:
+         - fname - file name
+
+        Returns
+         - Mean sample size
+         - Mean frequency of private alleles
+         - Number of migrants for Ne=10
+         - Number of migrants for Ne=25
+         - Number of migrants for Ne=50
+         - Number of migrants after correcting for expected size
+
+        """
+        self._run_genepop(["PRI"], [4], fname)
+        with open(fname + ".PRI") as f:
+            lines = f.readlines()  # Small file, it is ok
+        for line in lines:
+            m = re.search("Mean sample size: ([.0-9]+)", line)
+            if m is not None:
+                mean_sample_size = _gp_float(m.group(1))
+            m = re.search(r"Mean frequency of private alleles p\(1\)= ([.0-9]+)", line)
+            if m is not None:
+                mean_priv_alleles = _gp_float(m.group(1))
+            m = re.search("N=10: ([.0-9]+)", line)
+            if m is not None:
+                mig10 = _gp_float(m.group(1))
+            m = re.search("N=25: ([.0-9]+)", line)
+            if m is not None:
+                mig25 = _gp_float(m.group(1))
+            m = re.search("N=50: ([.0-9]+)", line)
+            if m is not None:
+                mig50 = _gp_float(m.group(1))
+            m = re.search("for size= ([.0-9]+)", line)
+            if m is not None:
+                mig_corrected = _gp_float(m.group(1))
+        os.remove(fname + ".PRI")
+        return mean_sample_size, mean_priv_alleles, mig10, mig25, mig50, mig_corrected
+
+    # 5.1
+    def calc_allele_genotype_freqs(self, fname):
+        """Calculate allele and genotype frequencies per locus and per sample.
+
+        Parameters:
+         - fname - file name
+
+        Returns tuple with 2 elements:
+         - Population iterator with
+
+           - population name
+           - Locus dictionary with key = locus name and content tuple as
+             Genotype List with
+             (Allele1, Allele2, observed, expected)
+             (expected homozygotes, observed hm,
+             expected heterozygotes, observed ht)
+             Allele frequency/Fis dictionary with allele as key and
+             (count, frequency, Fis Weir & Cockerham)
+           - Totals as a pair
+           - count
+           - Fis Weir & Cockerham,
+           - Fis Robertson & Hill
+
+         - Locus iterator with
+
+           - Locus name
+           - allele list
+           - Population list with a triple
+
+             - population name
+             - list of allele frequencies in the same order as allele list above
+             - number of genes
+
+        Will create a file called fname.INF
+
+        """
+        self._run_genepop(["INF"], [5, 1], fname)
+        # First pass, general information
+        # num_loci = None
+        # num_pops = None
+        # with open(fname + ".INF") as f:
+        #     line = f.readline()
+        #     while (num_loci is None or num_pops is None) and line != '':
+        #         m = re.search("Number of populations detected : ([0-9+])", l)
+        #         if m is not None:
+        #             num_pops = _gp_int(m.group(1))
+        #          m = re.search("Number of loci detected        : ([0-9+])", l)
+        #          if m is not None:
+        #              num_loci = _gp_int(m.group(1))
+        #          line = f.readline()
+
+        def pop_parser(self):
+            if hasattr(self, "old_line"):
+                line = self.old_line
+                del self.old_line
+            else:
+                line = self.stream.readline()
+            loci_content = {}
+            while line != "":
+                line = line.rstrip()
+                if "Tables of allelic frequencies for each locus" in line:
+                    return self.curr_pop, loci_content
+                match = re.match(".*Pop: (.+) Locus: (.+)", line)
+                if match is not None:
+                    pop = match.group(1).rstrip()
+                    locus = match.group(2)
+                    if not hasattr(self, "first_locus"):
+                        self.first_locus = locus
+                    if hasattr(self, "curr_pop"):
+                        if self.first_locus == locus:
+                            old_pop = self.curr_pop
+                            # self.curr_pop = pop
+                            self.old_line = line
+                            del self.first_locus
+                            del self.curr_pop
+                            return old_pop, loci_content
+                    self.curr_pop = pop
+                else:
+                    line = self.stream.readline()
+                    continue
+                geno_list = []
+                line = self.stream.readline()
+                if "No data" in line:
+                    continue
+
+                while "Genotypes  Obs." not in line:
+                    line = self.stream.readline()
+
+                while line != "\n":
+                    m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)", line)
+                    if m2 is not None:
+                        geno_list.append(
+                            (
+                                _gp_int(m2.group(1)),
+                                _gp_int(m2.group(2)),
+                                _gp_int(m2.group(3)),
+                                _gp_float(m2.group(4)),
+                            )
+                        )
+                    else:
+                        line = self.stream.readline()
+                        continue
+                    line = self.stream.readline()
+
+                while "Expected number of ho" not in line:
+                    line = self.stream.readline()
+                expHo = _gp_float(line[38:])
+                line = self.stream.readline()
+                obsHo = _gp_int(line[38:])
+                line = self.stream.readline()
+                expHe = _gp_float(line[38:])
+                line = self.stream.readline()
+                obsHe = _gp_int(line[38:])
+                line = self.stream.readline()
+
+                while "Sample count" not in line:
+                    line = self.stream.readline()
+                line = self.stream.readline()
+                freq_fis = {}
+                overall_fis = None
+                while "----" not in line:
+                    vals = [x for x in line.rstrip().split(" ") if x != ""]
+                    if vals[0] == "Tot":
+                        overall_fis = (
+                            _gp_int(vals[1]),
+                            _gp_float(vals[2]),
+                            _gp_float(vals[3]),
+                        )
+                    else:
+                        freq_fis[_gp_int(vals[0])] = (
+                            _gp_int(vals[1]),
+                            _gp_float(vals[2]),
+                            _gp_float(vals[3]),
+                        )
+                    line = self.stream.readline()
+                loci_content[locus] = (
+                    geno_list,
+                    (expHo, obsHo, expHe, obsHe),
+                    freq_fis,
+                    overall_fis,
+                )
+            self.done = True
+            raise StopIteration
+
+        def locus_parser(self):
+            line = self.stream.readline()
+            while line != "":
+                line = line.rstrip()
+                match = re.match(" Locus: (.+)", line)
+                if match is not None:
+                    locus = match.group(1)
+                    alleles, table = _read_allele_freq_table(self.stream)
+                    return locus, alleles, table
+                line = self.stream.readline()
+            self.done = True
+            raise StopIteration
+
+        shutil.copyfile(fname + ".INF", fname + ".IN2")
+        pop_iter = _FileIterator(pop_parser, fname + ".INF")
+        locus_iter = _FileIterator(locus_parser, fname + ".IN2")
+        return (pop_iter, locus_iter)
+
+    def _calc_diversities_fis(self, fname, ext):
+        self._run_genepop([ext], [5, 2], fname)
+        with open(fname + ext) as f:
+            line = f.readline()
+            while line != "":
+                line = line.rstrip()
+                if line.startswith(
+                    "Statistics per sample over all loci with at least two individuals typed"
+                ):
+                    avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float])
+                    avg_Qintra = _read_table(f, [str, _gp_float])
+                line = f.readline()
+
+        def fis_func(self):
+            line = self.stream.readline()
+            while line != "":
+                line = line.rstrip()
+                m = re.search("Locus: (.+)", line)
+                if m is not None:
+                    locus = m.group(1)
+                    self.stream.readline()
+                    if "No complete" in self.stream.readline():
+                        return locus, None
+                    self.stream.readline()
+                    fis_table = _read_table(
+                        self.stream, [str, _gp_float, _gp_float, _gp_float]
+                    )
+                    self.stream.readline()
+                    avg_qinter, avg_fis = tuple(
+                        _gp_float(x)
+                        for x in [
+                            y for y in self.stream.readline().split(" ") if y != ""
+                        ]
+                    )
+                    return locus, fis_table, avg_qinter, avg_fis
+                line = self.stream.readline()
+            self.done = True
+            raise StopIteration
+
+        return _FileIterator(fis_func, fname + ext), avg_fis, avg_Qintra
+
+    # 5.2
+    def calc_diversities_fis_with_identity(self, fname):
+        """Compute identity-base Gene diversities and Fis."""
+        return self._calc_diversities_fis(fname, ".DIV")
+
+    # 5.3
+    def calc_diversities_fis_with_size(self, fname):
+        """Provision to Computer Allele size-based Gene diversities and Fis."""
+        raise NotImplementedError
+
+    # 6.1 Less genotype frequencies
+    def calc_fst_all(self, fname):
+        """Execute GenePop and gets Fst/Fis/Fit (all populations).
+
+        Parameters:
+         - fname - file name
+
+        Returns:
+         - (multiLocusFis, multiLocusFst, multiLocus Fit),
+         - Iterator of tuples
+           (Locus name, Fis, Fst, Fit, Qintra, Qinter)
+
+        Will create a file called ``fname.FST``.
+
+        This does not return the genotype frequencies.
+
+        """
+        self._run_genepop([".FST"], [6, 1], fname)
+        with open(fname + ".FST") as f:
+            line = f.readline()
+            while line != "":
+                if line.startswith("           All:"):
+                    toks = [x for x in line.rstrip().split(" ") if x != ""]
+                    try:
+                        allFis = _gp_float(toks[1])
+                    except ValueError:
+                        allFis = None
+                    try:
+                        allFst = _gp_float(toks[2])
+                    except ValueError:
+                        allFst = None
+                    try:
+                        allFit = _gp_float(toks[3])
+                    except ValueError:
+                        allFit = None
+                line = f.readline()
+
+        def proc(self):
+            if hasattr(self, "last_line"):
+                line = self.last_line
+                del self.last_line
+            else:
+                line = self.stream.readline()
+            locus = None
+            fis = None
+            fst = None
+            fit = None
+            qintra = None
+            qinter = None
+            while line != "":
+                line = line.rstrip()
+                if line.startswith("  Locus:"):
+                    if locus is not None:
+                        self.last_line = line
+                        return locus, fis, fst, fit, qintra, qinter
+                    else:
+                        locus = line.split(":")[1].lstrip()
+                elif line.startswith("Fis^="):
+                    fis = _gp_float(line.split(" ")[1])
+                elif line.startswith("Fst^="):
+                    fst = _gp_float(line.split(" ")[1])
+                elif line.startswith("Fit^="):
+                    fit = _gp_float(line.split(" ")[1])
+                elif line.startswith("1-Qintra^="):
+                    qintra = _gp_float(line.split(" ")[1])
+                elif line.startswith("1-Qinter^="):
+                    qinter = _gp_float(line.split(" ")[1])
+                    return locus, fis, fst, fit, qintra, qinter
+                line = self.stream.readline()
+            if locus is not None:
+                return locus, fis, fst, fit, qintra, qinter
+            self.stream.close()
+            self.done = True
+            raise StopIteration
+
+        return (allFis, allFst, allFit), _FileIterator(proc, fname + ".FST")
+
+    # 6.2
+    def calc_fst_pair(self, fname):
+        """Estimate spatial structure from Allele identity for all population pairs."""
+        self._run_genepop([".ST2", ".MIG"], [6, 2], fname)
+        with open(fname + ".ST2") as f:
+            line = f.readline()
+            while line != "":
+                line = line.rstrip()
+                if line.startswith("Estimates for all loci"):
+                    avg_fst = _read_headed_triangle_matrix(f)
+                line = f.readline()
+
+        def loci_func(self):
+            line = self.stream.readline()
+            while line != "":
+                line = line.rstrip()
+                m = re.search(" Locus: (.+)", line)
+                if m is not None:
+                    locus = m.group(1)
+                    matrix = _read_headed_triangle_matrix(self.stream)
+                    return locus, matrix
+                line = self.stream.readline()
+            self.done = True
+            raise StopIteration
+
+        os.remove(fname + ".MIG")
+        return _FileIterator(loci_func, fname + ".ST2"), avg_fst
+
+    # 6.3
+    def calc_rho_all(self, fname):
+        """Provision for estimating spatial structure from Allele size for all populations."""
+        raise NotImplementedError
+
+    # 6.4
+    def calc_rho_pair(self, fname):
+        """Provision for estimating spatial structure from Allele size for all population pairs."""
+        raise NotImplementedError
+
+    def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
+        """Calculate isolation by distance statistics (PRIVATE)."""
+        self._run_genepop(
+            [".GRA", ".MIG", ".ISO"],
+            [6, sub],
+            fname,
+            opts={
+                "MinimalDistance": min_dist,
+                "GeographicScale": scale,
+                "IsolBDstatistic": stat,
+            },
+        )
+        with open(fname + ".ISO") as f:
+            f.readline()
+            f.readline()
+            f.readline()
+            f.readline()
+            estimate = _read_triangle_matrix(f)
+            f.readline()
+            f.readline()
+            distance = _read_triangle_matrix(f)
+            f.readline()
+            match = re.match("a = (.+), b = (.+)", f.readline().rstrip())
+            a = _gp_float(match.group(1))
+            b = _gp_float(match.group(2))
+            f.readline()
+            f.readline()
+            match = re.match(" b=(.+)", f.readline().rstrip())
+            bb = _gp_float(match.group(1))
+            match = re.match(r".*\[(.+)  ;  (.+)\]", f.readline().rstrip())
+            bblow = _gp_float(match.group(1))
+            bbhigh = _gp_float(match.group(2))
+        os.remove(fname + ".MIG")
+        os.remove(fname + ".GRA")
+        os.remove(fname + ".ISO")
+        return estimate, distance, (a, b), (bb, bblow, bbhigh)
+
+    # 6.5
+    def calc_ibd_diplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
+        """Calculate isolation by distance statistics for diploid data.
+
+        See _calc_ibd for parameter details.
+
+        Note that each pop can only have a single individual and
+        the individual name has to be the sample coordinates.
+        """
+        return self._calc_ibd(fname, 5, stat, scale, min_dist)
+
+    # 6.6
+    def calc_ibd_haplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
+        """Calculate isolation by distance statistics for haploid data.
+
+        See _calc_ibd for parameter details.
+
+        Note that each pop can only have a single individual and
+        the individual name has to be the sample coordinates.
+        """
+        return self._calc_ibd(fname, 6, stat, scale, min_dist)
diff --git a/code/lib/Bio/PopGen/GenePop/EasyController.py b/code/lib/Bio/PopGen/GenePop/EasyController.py
new file mode 100644
index 0000000..685e04f
--- /dev/null
+++ b/code/lib/Bio/PopGen/GenePop/EasyController.py
@@ -0,0 +1,196 @@
+# Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Control GenePop through an easier interface.
+
+This interface is less efficient than the standard GenePopControler
+
+"""
+
+from .Controller import GenePopController
+from Bio.PopGen import GenePop
+
+
+class EasyController:
+    """Define a class for an easier interface with the GenePop program."""
+
+    def __init__(self, fname, genepop_dir=None):
+        """Initialize the controller.
+
+        genepop_dir is the directory where GenePop is.
+
+        The binary should be called Genepop (capital G)
+        """
+        self._fname = fname
+        self._controller = GenePopController(genepop_dir)
+        self.__fst_pair_locus = {}  # More caches like this needed!
+        self.__allele_frequency = {}  # More caches like this needed!
+
+    def get_basic_info(self):
+        """Obtain the population list and loci list from the file."""
+        with open(self._fname) as f:
+            rec = GenePop.read(f)
+        return rec.pop_list, rec.loci_list
+
+    # 1.3
+    def test_hw_pop(self, pop_pos, test_type="probability"):
+        """Perform Hardy-Weinberg test on the given position."""
+        if test_type == "deficiency":
+            hw_res = self._controller.test_pop_hz_deficiency(self._fname)
+        elif test_type == "excess":
+            hw_res = self._controller.test_pop_hz_excess(self._fname)
+        else:
+            loci_res, hw_res, fisher_full = self._controller.test_pop_hz_prob(
+                self._fname, ".P"
+            )
+        for i in range(pop_pos - 1):
+            next(hw_res)
+        return next(hw_res)
+
+    # 1.4
+    def test_hw_global(
+        self,
+        test_type="deficiency",
+        enum_test=True,
+        dememorization=10000,
+        batches=20,
+        iterations=5000,
+    ):
+        """Perform Hardy-Weinberg global Heterozygote test."""
+        if test_type == "deficiency":
+            pop_res, loc_res, all = self._controller.test_global_hz_deficiency(
+                self._fname, enum_test, dememorization, batches, iterations
+            )
+        else:
+            pop_res, loc_res, all = self._controller.test_global_hz_excess(
+                self._fname, enum_test, dememorization, batches, iterations
+            )
+        return list(pop_res), list(loc_res), all
+
+    # 2.1
+    def test_ld_all_pair(
+        self, locus1, locus2, dememorization=10000, batches=20, iterations=5000
+    ):
+        """Test for linkage disequilibrium for each pair of loci in each population."""
+        all_ld = self._controller.test_ld(
+            self._fname, dememorization, batches, iterations
+        )[1]
+        for ld_case in all_ld:
+            (l1, l2), result = ld_case
+            if (l1 == locus1 and l2 == locus2) or (l1 == locus2 and l2 == locus1):
+                return result
+
+    def estimate_nm(self):
+        """Estimate Nm. Just a simple bridge."""
+        return self._controller.estimate_nm(self._fname)
+
+    def get_heterozygosity_info(self, pop_pos, locus_name):
+        """Return the heterozygosity info for a certain locus on a population.
+
+        Returns (Expected homozygotes, observed homozygotes,
+        Expected heterozygotes, observed heterozygotes)
+        """
+        geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
+        pop_iter, loc_iter = geno_freqs
+        pops = list(pop_iter)
+        return pops[pop_pos][1][locus_name][1]
+
+    def get_genotype_count(self, pop_pos, locus_name):
+        """Return the genotype counts for a certain population and locus."""
+        geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
+        pop_iter, loc_iter = geno_freqs
+        pop_iter = list(pop_iter)
+        return pop_iter[pop_pos][1][locus_name][0]
+
+    def get_fis(self, pop_pos, locus_name):
+        """Return the Fis for a certain population and locus.
+
+        Below CW means Cockerham and Weir and RH means Robertson and Hill.
+
+        Returns a pair:
+
+        - dictionary [allele] = (repetition count, frequency, Fis CW )
+          with information for each allele
+        - a triple with total number of alleles, Fis CW, Fis RH
+
+        """
+        geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
+        pop_iter, loc_iter = geno_freqs
+        pops = list(pop_iter)
+        return pops[pop_pos][1][locus_name][2:]
+
+    def get_alleles(self, pop_pos, locus_name):
+        """Return the alleles for a certain population and locus."""
+        geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
+        pop_iter, loc_iter = geno_freqs
+        pop_iter = list(pop_iter)
+        return list(pop_iter[pop_pos][1][locus_name][2].keys())
+
+    def get_alleles_all_pops(self, locus_name):
+        """Return the alleles for a certain population and locus."""
+        geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
+        pop_iter, loc_iter = geno_freqs
+        for locus_info in loc_iter:
+            if locus_info[0] == locus_name:
+                return locus_info[1]
+
+    def get_allele_frequency(self, pop_pos, locus_name):
+        """Calculate the allele frequency for a certain locus on a population."""
+        if len(self.__allele_frequency) == 0:
+            geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
+            pop_iter, loc_iter = geno_freqs
+            for locus_info in loc_iter:
+                if locus_info[0] is None:
+                    self.__allele_frequency[locus_info[0]] = None, None
+                else:
+                    self.__allele_frequency[locus_info[0]] = locus_info[1:]
+        info = self.__allele_frequency[locus_name]
+        pop_name, freqs, total = info[1][pop_pos]
+        allele_freq = {}
+        alleles = info[0]
+        for i, allele in enumerate(alleles):
+            allele_freq[allele] = freqs[i]
+        return total, allele_freq
+
+    def get_multilocus_f_stats(self):
+        """Return the multilocus F stats.
+
+        Explain averaging.
+        Returns Fis(CW), Fst, Fit
+        """
+        return self._controller.calc_fst_all(self._fname)[0]
+
+    def get_f_stats(self, locus_name):
+        """Return F stats for a locus.
+
+        Returns Fis(CW), Fst, Fit, Qintra, Qinter
+        """
+        loci_iter = self._controller.calc_fst_all(self._fname)[1]
+        for name, fis, fst, fit, qintra, qinter in loci_iter:
+            if name == locus_name:
+                return fis, fst, fit, qintra, qinter
+
+    def get_avg_fis(self):
+        """Calculate identity-base average Fis."""
+        return self._controller.calc_diversities_fis_with_identity(self._fname)[1]
+
+    def get_avg_fst_pair(self):
+        """Calculate Allele size-base average Fis for all population pairs."""
+        return self._controller.calc_fst_pair(self._fname)[1]
+
+    def get_avg_fst_pair_locus(self, locus):
+        """Calculate Allele size-base average Fis for all population pairs of the given locus."""
+        if len(self.__fst_pair_locus) == 0:
+            iter = self._controller.calc_fst_pair(self._fname)[0]
+            for locus_info in iter:
+                self.__fst_pair_locus[locus_info[0]] = locus_info[1]
+        return self.__fst_pair_locus[locus]
+
+    def calc_ibd(self, is_diplo=True, stat="a", scale="Log", min_dist=0.00001):
+        """Calculate isolation by distance statistics for Diploid or Haploid."""
+        if is_diplo:
+            return self._controller.calc_ibd_diplo(self._fname, stat, scale, min_dist)
+        else:
+            return self._controller.calc_ibd_haplo(self._fname, stat, scale, min_dist)
diff --git a/code/lib/Bio/PopGen/GenePop/FileParser.py b/code/lib/Bio/PopGen/GenePop/FileParser.py
new file mode 100644
index 0000000..9a6b6d5
--- /dev/null
+++ b/code/lib/Bio/PopGen/GenePop/FileParser.py
@@ -0,0 +1,347 @@
+# Copyright 2010 by Tiago Antao.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Code to parse BIG GenePop files.
+
+The difference between this class and the standard Bio.PopGen.GenePop.Record
+class is that this one does not read the whole file to memory.
+It provides an iterator interface, slower but consuming much mess memory.
+Should be used with big files (Thousands of markers and individuals).
+
+See http://wbiomed.curtin.edu.au/genepop/ , the format is documented
+here: http://wbiomed.curtin.edu.au/genepop/help_input.html .
+
+Classes:
+ - FileRecord           Holds GenePop data.
+
+Functions:
+
+
+"""
+from Bio.PopGen.GenePop import get_indiv
+
+
+def read(fname):
+    """Parse a file containing a GenePop file.
+
+    fname is a file name that contains a GenePop record.
+    """
+    record = FileRecord(fname)
+    return record
+
+
+class FileRecord:
+    """Hold information from a GenePop record.
+
+    Attributes:
+    - marker_len         The marker length (2 or 3 digit code per allele).
+    - comment_line       Comment line.
+    - loci_list          List of loci names.
+
+    Methods:
+    - get_individual     Returns the next individual of the current population.
+    - skip_population    Skips the current population.
+
+    skip_population skips the individuals of the current population, returns
+    True if there are more populations.
+
+    get_individual returns an individual of the current population (or None
+    if the list ended).
+
+    Each individual is a pair composed by individual name and a list of alleles
+    (2 per marker or 1 for haploid data). Examples::
+
+        ('Ind1', [(1,2),    (3,3), (200,201)]
+        ('Ind2', [(2,None), (3,3), (None,None)]
+        ('Other1', [(1,1),  (4,3), (200,200)]
+
+    """
+
+    def __init__(self, fname):
+        """Initialize the class."""
+        self.comment_line = ""
+        self.loci_list = []
+        self.fname = fname
+        self.start_read()
+
+    def __str__(self):
+        """Return (reconstructs) a GenePop textual representation.
+
+        This might take a lot of memory.
+        Marker length will be 3.
+        """
+        marker_len = 3
+        rep = [self.comment_line + "\n"]
+        rep.append("\n".join(self.loci_list) + "\n")
+        current_pop = self.current_pop
+        current_ind = self.current_ind
+        self._handle.seek(0)
+        self.skip_header()
+        rep.append("Pop\n")
+        more = True
+        while more:
+            res = self.get_individual()
+            if res is True:
+                rep.append("Pop\n")
+            elif res is False:
+                more = False
+            else:
+                name, markers = res
+                rep.append(name)
+                rep.append(",")
+                for marker in markers:
+                    rep.append(" ")
+                    for al in marker:
+                        if al is None:
+                            al = "0"
+                        aStr = str(al)
+                        while len(aStr) < marker_len:
+                            aStr = "".join(["0", aStr])
+                        rep.append(aStr)
+                rep.append("\n")
+        self.seek_position(current_pop, current_ind)
+        return "".join(rep)
+
+    def start_read(self):
+        """Start parsing a file containing a GenePop file."""
+        self._handle = open(self.fname)
+        self.comment_line = self._handle.readline().rstrip()
+        # We can now have one loci per line or all loci in a single line
+        # separated by either space or comma+space...
+        # We will remove all commas on loci... that should not be a problem
+        sample_loci_line = self._handle.readline().rstrip().replace(",", "")
+        all_loci = sample_loci_line.split(" ")
+        self.loci_list.extend(all_loci)
+        for line in self._handle:
+            line = line.rstrip()
+            if line.upper() == "POP":
+                break
+            self.loci_list.append(line)
+        else:
+            raise ValueError(
+                "No population data found, file probably not GenePop related"
+            )
+        # self._after_pop = True
+        self.current_pop = 0
+        self.current_ind = 0
+
+    def skip_header(self):
+        """Skip the Header. To be done after a re-open."""
+        self.current_pop = 0
+        self.current_ind = 0
+        for line in self._handle:
+            if line.rstrip().upper() == "POP":
+                return
+
+    def seek_position(self, pop, indiv):
+        """Seek a certain position in the file.
+
+        Arguments:
+         - pop - pop position (0 is first)
+         - indiv - individual in pop
+
+        """
+        self._handle.seek(0)
+        self.skip_header()
+        while pop > 0:
+            self.skip_population()
+            pop -= 1
+        while indiv > 0:
+            self.get_individual()
+            indiv -= 1
+
+    def skip_population(self):
+        """Skip the current population. Returns true if there is another pop."""
+        for line in self._handle:
+            if line == "":
+                return False
+            line = line.rstrip()
+            if line.upper() == "POP":
+                self.current_pop += 1
+                self.current_ind = 0
+                return True
+
+    def get_individual(self):
+        """Get the next individual.
+
+        Returns individual information if there are more individuals
+        in the current population.
+        Returns True if there are no more individuals in the current
+        population, but there are more populations. Next read will
+        be of the following pop.
+        Returns False if at end of file.
+        """
+        for line in self._handle:
+            line = line.rstrip()
+            if line.upper() == "POP":
+                self.current_pop += 1
+                self.current_ind = 0
+                return True
+            else:
+                self.current_ind += 1
+                indiv_name, allele_list, ignore = get_indiv(line)
+                return indiv_name, allele_list
+        return False
+
+    def remove_population(self, pos, fname):
+        """Remove a population (by position).
+
+        Arguments:
+         - pos - position
+         - fname - file to be created with population removed
+
+        """
+        old_rec = read(self.fname)
+        with open(fname, "w") as f:
+            f.write(self.comment_line + "\n")
+            for locus in old_rec.loci_list:
+                f.write(locus + "\n")
+            curr_pop = 0
+            l_parser = old_rec.get_individual()
+            start_pop = True
+            while l_parser:
+                if curr_pop == pos:
+                    old_rec.skip_population()
+                    curr_pop += 1
+                else:
+                    if l_parser is True:
+                        curr_pop += 1
+                        start_pop = True
+                    else:
+                        if start_pop:
+                            f.write("POP\n")
+                            start_pop = False
+                        name, markers = l_parser
+                        f.write(name + ",")
+                        for marker in markers:
+                            f.write(" ")
+                            for al in marker:
+                                if al is None:
+                                    al = "0"
+                                aStr = str(al)
+                                while len(aStr) < 3:
+                                    aStr = "".join(["0", aStr])
+                                f.write(aStr)
+                        f.write("\n")
+
+                l_parser = old_rec.get_individual()
+
+    def remove_locus_by_position(self, pos, fname):
+        """Remove a locus by position.
+
+        Arguments:
+         - pos - position
+         - fname - file to be created with locus removed
+
+        """
+        old_rec = read(self.fname)
+        with open(fname, "w") as f:
+            f.write(self.comment_line + "\n")
+            loci_list = old_rec.loci_list
+            del loci_list[pos]
+            for locus in loci_list:
+                f.write(locus + "\n")
+            l_parser = old_rec.get_individual()
+            f.write("POP\n")
+            while l_parser:
+                if l_parser is True:
+                    f.write("POP\n")
+                else:
+                    name, markers = l_parser
+                    f.write(name + ",")
+                    marker_pos = 0
+                    for marker in markers:
+                        if marker_pos == pos:
+                            marker_pos += 1
+                            continue
+                        marker_pos += 1
+                        f.write(" ")
+                        for al in marker:
+                            if al is None:
+                                al = "0"
+                            aStr = str(al)
+                            while len(aStr) < 3:
+                                aStr = "".join(["0", aStr])
+                            f.write(aStr)
+                    f.write("\n")
+
+                l_parser = old_rec.get_individual()
+
+    def remove_loci_by_position(self, positions, fname):
+        """Remove a set of loci by position.
+
+        Arguments:
+         - positions - positions
+         - fname - file to be created with locus removed
+
+        """
+        old_rec = read(self.fname)
+        with open(fname, "w") as f:
+            f.write(self.comment_line + "\n")
+            loci_list = old_rec.loci_list
+            positions.sort()
+            positions.reverse()
+            posSet = set()
+            for pos in positions:
+                del loci_list[pos]
+                posSet.add(pos)
+            for locus in loci_list:
+                f.write(locus + "\n")
+            l_parser = old_rec.get_individual()
+            f.write("POP\n")
+            while l_parser:
+                if l_parser is True:
+                    f.write("POP\n")
+                else:
+                    name, markers = l_parser
+                    f.write(name + ",")
+                    marker_pos = 0
+                    for marker in markers:
+                        if marker_pos in posSet:
+                            marker_pos += 1
+                            continue
+                        marker_pos += 1
+                        f.write(" ")
+                        for al in marker:
+                            if al is None:
+                                al = "0"
+                            aStr = str(al)
+                            while len(aStr) < 3:
+                                aStr = "".join(["0", aStr])
+                            f.write(aStr)
+                    f.write("\n")
+
+                l_parser = old_rec.get_individual()
+
+    def remove_locus_by_name(self, name, fname):
+        """Remove a locus by name.
+
+        Arguments:
+         - name - name
+         - fname - file to be created with locus removed
+
+        """
+        for i, locus in enumerate(self.loci_list):
+            if locus == name:
+                self.remove_locus_by_position(i, fname)
+                return
+        # If here than locus not existent... Maybe raise exception?
+        #   Although it should be Ok... Just a boolean return, maybe?
+
+    def remove_loci_by_name(self, names, fname):
+        """Remove a loci list (by name).
+
+        Arguments:
+         - names - names
+         - fname - file to be created with loci removed
+
+        """
+        positions = []
+        for i, locus in enumerate(self.loci_list):
+            if locus in names:
+                positions.append(i)
+        self.remove_loci_by_position(positions, fname)
+        # If here than locus not existent... Maybe raise exception?
+        #   Although it should be Ok... Just a boolean return, maybe?
diff --git a/code/lib/Bio/PopGen/GenePop/LargeFileParser.py b/code/lib/Bio/PopGen/GenePop/LargeFileParser.py
new file mode 100644
index 0000000..84fc545
--- /dev/null
+++ b/code/lib/Bio/PopGen/GenePop/LargeFileParser.py
@@ -0,0 +1,118 @@
+# Copyright 2010 by Tiago Antao.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Large file parsing of Genepop files.
+
+The standard parser loads the whole file into memory. This parser
+provides an iterator over data.
+
+Classes:
+- LargeRecord - Holds GenePop data.
+
+Functions:
+- read - Parses a GenePop record (file) into a Record object.
+
+"""
+
+
+def get_indiv(line):
+    """Get individual's data from line."""
+    indiv_name, marker_line = line.split(",")
+    markers = marker_line.replace("\t", " ").split(" ")
+    markers = [marker for marker in markers if marker != ""]
+    if len(markers[0]) in [2, 4]:  # 2 digits per allele
+        marker_len = 2
+    else:
+        marker_len = 3
+    try:
+        allele_list = [
+            (int(marker[0:marker_len]), int(marker[marker_len:])) for marker in markers
+        ]
+    except ValueError:  # Haploid
+        allele_list = [(int(marker[0:marker_len]),) for marker in markers]
+    return indiv_name, allele_list, marker_len
+
+
+def read(handle):
+    """Parse a handle containing a GenePop file.
+
+    Arguments:
+    - handle is a file-like object that contains a GenePop record.
+
+    """
+    record = Record(handle)
+    record.comment_line = next(handle).rstrip()
+    # We can now have one loci per line or all loci in a single line
+    # separated by either space or comma+space...
+    # We will remove all commas on loci... that should not be a problem
+    sample_loci_line = next(handle).rstrip().replace(",", "")
+    all_loci = sample_loci_line.split(" ")
+    record.loci_list.extend(all_loci)
+    line = handle.readline()
+    while line != "":
+        line = line.rstrip()
+        if line.upper() == "POP":
+            record.stack.append("POP")
+            break
+        record.loci_list.append(line)
+        line = handle.readline()
+    next_line = handle.readline().rstrip()
+    indiv_name, allele_list, record.marker_len = get_indiv(next_line)
+    record.stack.append(next_line)
+    return record
+
+
+class Record:
+    """Hold information from a GenePop record.
+
+    Members:
+    marker_len         The marker length (2 or 3 digit code per allele).
+
+    comment_line       Comment line.
+
+    loci_list          List of loci names.
+
+    data_generator     Iterates over population data.
+
+    The generator will only work once. If you want to read a handle
+    twice you have to re-open it!
+
+    data_generator can either be () - an empty tuple - marking a new
+    population or an individual. An individual is something like
+    ('Ind1', [(1,1), (3,None), (200,201)],
+    In the case above the individual is called Ind1,
+    has three diploid loci. For the second loci, one of the alleles
+    is unknown.
+
+    """
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self.handle = handle
+        self.marker_len = 0
+        self.comment_line = ""
+        self.loci_list = []
+        self.populations = []
+        self.stack = []
+
+    def data_generator(self):
+        """Extract population data."""
+        for handle in [self.stack, self.handle]:
+            for line in handle:
+                line = line.rstrip()
+                if line.upper() == "POP":
+                    yield ()
+                else:
+                    indiv_name, allele_list, marker_len = get_indiv(line)
+                    clean_list = []
+                    for locus in allele_list:
+                        mk_real = []
+                        for al in locus:
+                            if al == 0:
+                                mk_real.append(None)
+                            else:
+                                mk_real.append(al)
+                        clean_list.append(tuple(mk_real))
+                    yield indiv_name, clean_list
diff --git a/code/lib/Bio/PopGen/GenePop/__init__.py b/code/lib/Bio/PopGen/GenePop/__init__.py
new file mode 100644
index 0000000..9aa6f18
--- /dev/null
+++ b/code/lib/Bio/PopGen/GenePop/__init__.py
@@ -0,0 +1,223 @@
+# Copyright 2007 by Tiago Antao.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Code to work with GenePop.
+
+See http://wbiomed.curtin.edu.au/genepop/ , the format is documented
+here: http://wbiomed.curtin.edu.au/genepop/help_input.html .
+
+Classes:
+Record           Holds GenePop data.
+
+Functions:
+read             Parses a GenePop record (file) into a Record object.
+
+
+Partially inspired by MedLine Code.
+
+"""
+from copy import deepcopy
+
+
+def get_indiv(line):
+    """Extract the details of the individual information on the line."""
+
+    def int_no_zero(val):
+        """Return integer of val, or None if is zero."""
+        v = int(val)
+        if v == 0:
+            return None
+        return v
+
+    indiv_name, marker_line = line.split(",")
+    markers = marker_line.replace("\t", " ").split(" ")
+    markers = [marker for marker in markers if marker != ""]
+    if len(markers[0]) in [2, 4]:  # 2 digits per allele
+        marker_len = 2
+    else:
+        marker_len = 3
+    try:
+        allele_list = [
+            (int_no_zero(marker[0:marker_len]), int_no_zero(marker[marker_len:]))
+            for marker in markers
+        ]
+    except ValueError:  # Haploid
+        allele_list = [(int_no_zero(marker[0:marker_len]),) for marker in markers]
+    return indiv_name, allele_list, marker_len
+
+
+def read(handle):
+    """Parse a handle containing a GenePop file.
+
+    handle is a file-like object that contains a GenePop record.
+    """
+    record = Record()
+    record.comment_line = next(handle).rstrip()
+    # We can now have one loci per line or all loci in a single line
+    # separated by either space or comma+space...
+    # We will remove all commas on loci... that should not be a problem
+    sample_loci_line = next(handle).rstrip().replace(",", "")
+    all_loci = sample_loci_line.split(" ")
+    record.loci_list.extend(all_loci)
+    for line in handle:
+        line = line.rstrip()
+        if line.upper() == "POP":
+            break
+        record.loci_list.append(line)
+    else:
+        raise ValueError("No population data found, file probably not GenePop related")
+    record.populations.append([])
+    for line in handle:
+        line = line.rstrip()
+        if line.upper() == "POP":
+            record.populations.append([])
+        else:
+            indiv_name, allele_list, record.marker_len = get_indiv(line)
+            record.populations[-1].append((indiv_name, allele_list))
+    loci = record.loci_list
+    for pop in record.populations:
+        record.pop_list.append(pop[-1][0])
+        for indiv in pop:
+            for mk_i in range(len(loci)):
+                mk_orig = indiv[1][mk_i]
+                mk_real = []
+                for al in mk_orig:
+                    if al == 0:
+                        mk_real.append(None)
+                    else:
+                        mk_real.append(al)
+                indiv[1][mk_i] = tuple(mk_real)
+    return record
+
+
+class Record:
+    """Hold information from a GenePop record.
+
+    Members:
+
+        - marker_len         The marker length (2 or 3 digit code per allele).
+
+        - comment_line       Comment line.
+
+        - loci_list          List of loci names.
+
+        - pop_list           List of population names.
+
+        - populations        List of population data.
+
+    In most genepop files, the population name is not trustable.
+    It is strongly recommended that populations are referred by index.
+
+    populations has one element per population. Each element is itself
+    a list of individuals, each individual is a pair composed by individual
+    name and a list of alleles (2 per marker or 1 for haploids):
+    Example::
+
+        [
+            [
+                ('Ind1', [(1,2),    (3,3), (200,201)],
+                ('Ind2', [(2,None), (3,3), (None,None)],
+            ],
+            [
+                ('Other1', [(1,1),  (4,3), (200,200)],
+            ]
+        ]
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.marker_len = 0
+        self.comment_line = ""
+        self.loci_list = []
+        self.pop_list = []
+        self.populations = []
+
+    def __str__(self):
+        """Return (reconstruct) a GenePop textual representation."""
+        rep = [self.comment_line + "\n"]
+        rep.append("\n".join(self.loci_list) + "\n")
+        for pop in self.populations:
+            rep.append("Pop\n")
+            for indiv in pop:
+                name, markers = indiv
+                rep.append(name)
+                rep.append(",")
+                for marker in markers:
+                    rep.append(" ")
+                    for al in marker:
+                        if al is None:
+                            al = "0"
+                        aStr = str(al)
+                        while len(aStr) < self.marker_len:
+                            aStr = "".join(["0", aStr])
+                        rep.append(aStr)
+                rep.append("\n")
+        return "".join(rep)
+
+    def split_in_pops(self, pop_names):
+        """Split a GP record in a dictionary with 1 pop per entry.
+
+        Given a record with n pops and m loci returns a dictionary
+        of records (key pop_name) where each item is a record
+        with a single pop and m loci.
+
+        Arguments:
+        - pop_names - Population names
+
+        """
+        gp_pops = {}
+        for i, population in enumerate(self.populations):
+            gp_pop = Record()
+            gp_pop.marker_len = self.marker_len
+            gp_pop.comment_line = self.comment_line
+            gp_pop.loci_list = deepcopy(self.loci_list)
+            gp_pop.populations = [deepcopy(population)]
+            gp_pops[pop_names[i]] = gp_pop
+        return gp_pops
+
+    def split_in_loci(self, gp):
+        """Split a GP record in a dictionary with 1 locus per entry.
+
+        Given a record with n pops and m loci returns a dictionary
+        of records (key locus name) where each item is a record
+        with a single locus and n pops.
+        """
+        gp_loci = {}
+        for i, locus in enumerate(self.loci_list):
+            gp_pop = Record()
+            gp_pop.marker_len = self.marker_len
+            gp_pop.comment_line = self.comment_line
+            gp_pop.loci_list = [locus]
+            gp_pop.populations = []
+            for pop in self.populations:
+                my_pop = []
+                for indiv in pop:
+                    my_pop.append((indiv[0], [indiv[1][i]]))
+                gp_pop.populations.append(my_pop)
+            gp_loci[gp_pop.loci_list[0]] = gp_pop
+        return gp_loci
+
+    def remove_population(self, pos):
+        """Remove a population (by position)."""
+        del self.populations[pos]
+
+    def remove_locus_by_position(self, pos):
+        """Remove a locus by position."""
+        del self.loci_list[pos]
+        for pop in self.populations:
+            for indiv in pop:
+                name, loci = indiv
+                del loci[pos]
+
+    def remove_locus_by_name(self, name):
+        """Remove a locus by name."""
+        for i, locus in enumerate(self.loci_list):
+            if locus == name:
+                self.remove_locus_by_position(i)
+                return
+        # If here than locus not existent... Maybe raise exception?
+        #   Although it should be Ok... Just a boolean return, maybe?
diff --git a/code/lib/Bio/PopGen/GenePop/__pycache__/Controller.cpython-37.pyc b/code/lib/Bio/PopGen/GenePop/__pycache__/Controller.cpython-37.pyc
new file mode 100644
index 0000000..ee1ca7c
Binary files /dev/null and b/code/lib/Bio/PopGen/GenePop/__pycache__/Controller.cpython-37.pyc differ
diff --git a/code/lib/Bio/PopGen/GenePop/__pycache__/EasyController.cpython-37.pyc b/code/lib/Bio/PopGen/GenePop/__pycache__/EasyController.cpython-37.pyc
new file mode 100644
index 0000000..6814bc9
Binary files /dev/null and b/code/lib/Bio/PopGen/GenePop/__pycache__/EasyController.cpython-37.pyc differ
diff --git a/code/lib/Bio/PopGen/GenePop/__pycache__/FileParser.cpython-37.pyc b/code/lib/Bio/PopGen/GenePop/__pycache__/FileParser.cpython-37.pyc
new file mode 100644
index 0000000..26d844c
Binary files /dev/null and b/code/lib/Bio/PopGen/GenePop/__pycache__/FileParser.cpython-37.pyc differ
diff --git a/code/lib/Bio/PopGen/GenePop/__pycache__/LargeFileParser.cpython-37.pyc b/code/lib/Bio/PopGen/GenePop/__pycache__/LargeFileParser.cpython-37.pyc
new file mode 100644
index 0000000..36378b8
Binary files /dev/null and b/code/lib/Bio/PopGen/GenePop/__pycache__/LargeFileParser.cpython-37.pyc differ
diff --git a/code/lib/Bio/PopGen/GenePop/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/PopGen/GenePop/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..ba8503b
Binary files /dev/null and b/code/lib/Bio/PopGen/GenePop/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/PopGen/__init__.py b/code/lib/Bio/PopGen/__init__.py
new file mode 100644
index 0000000..a6b82ca
--- /dev/null
+++ b/code/lib/Bio/PopGen/__init__.py
@@ -0,0 +1,8 @@
+# Copyright 2007 by Tiago Antao.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+
+"""PopGen: Population Genetics and Genomics library in Python."""
diff --git a/code/lib/Bio/PopGen/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/PopGen/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..9b303cd
Binary files /dev/null and b/code/lib/Bio/PopGen/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Restriction/PrintFormat.py b/code/lib/Bio/Restriction/PrintFormat.py
new file mode 100644
index 0000000..c303611
--- /dev/null
+++ b/code/lib/Bio/Restriction/PrintFormat.py
@@ -0,0 +1,485 @@
+#!/usr/bin/env python
+#
+#      Restriction Analysis Libraries.
+#      Copyright (C) 2004. Frederic Sohm.
+#
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+#
+r"""Print the results of restriction enzyme analysis.
+
+PrintFormat prints the results from restriction analysis in 3 different
+format: list, column or map.
+
+The easiest way to use it is:
+
+    >>> from Bio.Restriction.PrintFormat import PrintFormat
+    >>> from Bio.Restriction.Restriction import RestrictionBatch
+    >>> from Bio.Seq import Seq
+    >>> pBs_mcs = Seq('GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTC')
+    >>> restriction_batch = RestrictionBatch(['EcoRI', 'BamHI', 'ApaI'])
+    >>> result = restriction_batch.search(pBs_mcs)
+    >>> my_map = PrintFormat()
+    >>> my_map.print_that(result, 'My pBluescript mcs analysis:\n',
+    ...               'No site:\n')
+    My pBluescript mcs analysis:
+    ApaI       :  12.
+    EcoRI      :  50.
+    No site:
+    BamHI     
+    <BLANKLINE>
+    >>> my_map.sequence = pBs_mcs
+    >>> my_map.print_as("map")
+    >>> my_map.print_that(result)
+               12 ApaI
+               |                                                
+               |                                     50 EcoRI
+               |                                     |          
+    GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTC
+    ||||||||||||||||||||||||||||||||||||||||||||||||||||||
+    CCATGGCCCGGGGGGGAGCTCCAGCTGCCATAGCTATTCGAACTATAGCTTAAG
+    1                                                   54
+    <BLANKLINE>
+    <BLANKLINE>
+       Enzymes which do not cut the sequence.
+    <BLANKLINE>
+    BamHI     
+    <BLANKLINE>
+    >>>
+
+Some of the methods of PrintFormat are meant to be overridden by derived
+class.
+
+Use the following parameters to control the appearance:
+
+- ConsoleWidth : width of the console used default to 80.
+                 should never be less than 60.
+- NameWidth    : space attributed to the name in PrintList method.
+- Indent       : Indent of the second line.
+- MaxSize      : Maximal size of the sequence (default=6:
+                 -> 99 999 bp + 1 trailing ','
+                 people are unlikely to ask for restriction map of sequences
+                 bigger than 100.000 bp. This is needed to determine the
+                 space to be reserved for sites location.
+
+                 - MaxSize = 5  =>   9.999 bp
+                 - MaxSize = 6  =>  99.999 bp
+                 - MaxSize = 7  => 999.999 bp
+
+Example output::
+
+    <------------ ConsoleWidth --------------->
+    <- NameWidth ->
+    EcoRI         :   1, 45, 50, 300, 400, 650,
+                          700, 1200, 2500.
+                      <-->
+                        Indent
+
+"""  # noqa: W291
+
+
+import re
+
+
+class PrintFormat:
+    """PrintFormat allow the printing of results of restriction analysis."""
+
+    ConsoleWidth = 80
+    NameWidth = 10
+    MaxSize = 6
+    Cmodulo = ConsoleWidth % NameWidth
+    PrefWidth = ConsoleWidth - Cmodulo
+    Indent = 4
+    linesize = PrefWidth - NameWidth
+
+    def print_as(self, what="list"):
+        """Print the results as specified.
+
+        Valid format are:
+            'list'      -> alphabetical order
+            'number'    -> number of sites in the sequence
+            'map'       -> a map representation of the sequence with the sites.
+
+        If you want more flexibility over-ride the virtual method make_format.
+        """
+        if what == "map":
+            self.make_format = self._make_map
+        elif what == "number":
+            self.make_format = self._make_number
+        else:
+            self.make_format = self._make_list
+
+    def format_output(self, dct, title="", s1=""):
+        """Summarise results as a nicely formatted string.
+
+        Arguments:
+         - dct is a dictionary as returned by a RestrictionBatch.search()
+         - title is the title of the map.
+           It must be a formatted string, i.e. you must include the line break.
+         - s1 is the title separating the list of enzymes that have sites from
+           those without sites.
+         - s1 must be a formatted string as well.
+
+        The format of print_that is a list.
+        """
+        if not dct:
+            dct = self.results
+        ls, nc = [], []
+        for k, v in dct.items():
+            if v:
+                ls.append((k, v))
+            else:
+                nc.append(k)
+        return self.make_format(ls, title, nc, s1)
+
+    def print_that(self, dct, title="", s1=""):
+        """Print the output of the format_output method (OBSOLETE).
+
+        Arguments:
+         - dct is a dictionary as returned by a RestrictionBatch.search()
+         - title is the title of the map.
+           It must be a formatted string, i.e. you must include the line break.
+         - s1 is the title separating the list of enzymes that have sites from
+           those without sites.
+         - s1 must be a formatted string as well.
+
+        This method prints the output of A.format_output() and it is here
+        for backwards compatibility.
+        """
+        print(self.format_output(dct, title, s1))
+
+    def make_format(self, cut=(), title="", nc=(), s1=""):
+        """Virtual method used for formatting results.
+
+        Virtual method.
+        Here to be pointed to one of the _make_* methods.
+        You can as well create a new method and point make_format to it.
+        """
+        return self._make_list(cut, title, nc, s1)
+
+    # _make_* methods to be used with the virtual method make_format
+
+    def _make_list(self, ls, title, nc, s1):
+        """Summarise a list of positions by enzyme (PRIVATE).
+
+        Return a string of form::
+
+            title.
+
+            enzyme1     :   position1, position2.
+            enzyme2     :   position1, position2, position3.
+
+        Arguments:
+         - ls is a tuple or list of cutting enzymes.
+         - title is the title.
+         - nc is a tuple or list of non cutting enzymes.
+         - s1 is the sentence before the non cutting enzymes.
+        """
+        return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)
+
+    def _make_map(self, ls, title, nc, s1):
+        """Summarise mapping information as a string (PRIVATE).
+
+        Return a string of form::
+
+            | title.
+            |
+            |     enzyme1, position
+            |     |
+            | AAAAAAAAAAAAAAAAAAAAA...
+            | |||||||||||||||||||||
+            | TTTTTTTTTTTTTTTTTTTTT...
+
+        Arguments:
+         - ls is a list of cutting enzymes.
+         - title is the title.
+         - nc is a list of non cutting enzymes.
+         - s1 is the sentence before the non cutting enzymes.
+        """
+        return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)
+
+    def _make_number(self, ls, title, nc, s1):
+        """Format cutting position information as a string (PRIVATE).
+
+        Returns a string in the form::
+
+            title.
+
+            enzyme which cut 1 time:
+
+            enzyme1     :   position1.
+
+            enzyme which cut 2 times:
+
+            enzyme2     :   position1, position2.
+            ...
+
+        Arguments:
+         - ls is a list of cutting enzymes.
+         - title is the title.
+         - nc is a list of non cutting enzymes.
+         - s1 is the sentence before the non cutting enzymes.
+        """
+        return self._make_number_only(ls, title) + self._make_nocut_only(nc, s1)
+
+    def _make_nocut(self, ls, title, nc, s1):
+        """Summarise non-cutting enzymes (PRIVATE).
+
+        Return a formatted string of the non cutting enzymes.
+
+        ls is a list of cutting enzymes -> will not be used.
+        Here for compatibility with make_format.
+
+        Arguments:
+         - title is the title.
+         - nc is a list of non cutting enzymes.
+         - s1 is the sentence before the non cutting enzymes.
+        """
+        return title + self._make_nocut_only(nc, s1)
+
+    def _make_nocut_only(self, nc, s1, ls=(), title=""):
+        """Summarise non-cutting enzymes (PRIVATE).
+
+        Return a formatted string of the non cutting enzymes.
+
+        Arguments:
+         - nc is a tuple or list of non cutting enzymes.
+         - s1 is the sentence before the non cutting enzymes.
+        """
+        if not nc:
+            return s1
+        st = ""
+        stringsite = s1 or "\n   Enzymes which do not cut the sequence.\n\n"
+        Join = "".join
+        for key in sorted(nc):
+            st = Join((st, str.ljust(str(key), self.NameWidth)))
+            if len(st) > self.linesize:
+                stringsite = Join((stringsite, st, "\n"))
+                st = ""
+        stringsite = Join((stringsite, st, "\n"))
+        return stringsite
+
+    def _make_list_only(self, ls, title, nc=(), s1=""):
+        """Summarise list of positions per enzyme (PRIVATE).
+
+        Return a string of form::
+
+            title.
+
+            enzyme1     :   position1, position2.
+            enzyme2     :   position1, position2, position3.
+            ...
+
+        Arguments:
+         - ls is a tuple or list of results.
+         - title is a string.
+         - Non cutting enzymes are not included.
+        """
+        if not ls:
+            return title
+        return self.__next_section(ls, title)
+
+    def _make_number_only(self, ls, title, nc=(), s1=""):
+        """Summarise number of cuts as a string (PRIVATE).
+
+        Return a string of form::
+
+            title.
+
+            enzyme which cut 1 time:
+
+            enzyme1     :   position1.
+
+            enzyme which cut 2 times:
+
+            enzyme2     :   position1, position2.
+            ...
+
+        Arguments:
+         - ls is a list of results.
+         - title is a string.
+         - Non cutting enzymes are not included.
+        """
+        if not ls:
+            return title
+        ls.sort(key=lambda x: len(x[1]))
+        iterator = iter(ls)
+        cur_len = 1
+        new_sect = []
+        for name, sites in iterator:
+            length = len(sites)
+            if length > cur_len:
+                title += "\n\nenzymes which cut %i times :\n\n" % cur_len
+                title = self.__next_section(new_sect, title)
+                new_sect, cur_len = [(name, sites)], length
+                continue
+            new_sect.append((name, sites))
+        title += "\n\nenzymes which cut %i times :\n\n" % cur_len
+        return self.__next_section(new_sect, title)
+
+    def _make_map_only(self, ls, title, nc=(), s1=""):
+        """Make string describing cutting map (PRIVATE).
+
+        Return a string of form::
+
+            | title.
+            |
+            |     enzyme1, position
+            |     |
+            | AAAAAAAAAAAAAAAAAAAAA...
+            | |||||||||||||||||||||
+            | TTTTTTTTTTTTTTTTTTTTT...
+
+        Arguments:
+         - ls is a list of results.
+         - title is a string.
+         - Non cutting enzymes are not included.
+        """
+        if not ls:
+            return title
+        resultKeys = sorted(str(x) for x, y in ls)
+        map = title or ""
+        enzymemap = {}
+        for (enzyme, cut) in ls:
+            for c in cut:
+                if c in enzymemap:
+                    enzymemap[c].append(str(enzyme))
+                else:
+                    enzymemap[c] = [str(enzyme)]
+        mapping = sorted(enzymemap.keys())
+        cutloc = {}
+        x, counter, length = 0, 0, len(self.sequence)
+        for x in range(60, length, 60):
+            counter = x - 60
+            loc = []
+            cutloc[counter] = loc
+            remaining = []
+            for key in mapping:
+                if key <= x:
+                    loc.append(key)
+                else:
+                    remaining.append(key)
+            mapping = remaining
+        cutloc[x] = mapping
+        sequence = str(self.sequence)
+        revsequence = str(self.sequence.complement())
+        a = "|"
+        base, counter = 0, 0
+        emptyline = " " * 60
+        Join = "".join
+        for base in range(60, length, 60):
+            counter = base - 60
+            line = emptyline
+            for key in cutloc[counter]:
+                s = ""
+                if key == base:
+                    for n in enzymemap[key]:
+                        s = " ".join((s, n))
+                    chunk = line[0:59]
+                    lineo = Join((chunk, str(key), s, "\n"))
+                    line2 = Join((chunk, a, "\n"))
+                    linetot = Join((lineo, line2))
+                    map = Join((map, linetot))
+                    break
+                for n in enzymemap[key]:
+                    s = " ".join((s, n))
+                k = key % 60
+                lineo = Join((line[0 : (k - 1)], str(key), s, "\n"))
+                line = Join((line[0 : (k - 1)], a, line[k:]))
+                line2 = Join((line[0 : (k - 1)], a, line[k:], "\n"))
+                linetot = Join((lineo, line2))
+                map = Join((map, linetot))
+            mapunit = "\n".join(
+                (
+                    sequence[counter:base],
+                    a * 60,
+                    revsequence[counter:base],
+                    Join(
+                        (
+                            str.ljust(str(counter + 1), 15),
+                            " " * 30,
+                            str.rjust(str(base), 15),
+                            "\n\n",
+                        )
+                    ),
+                )
+            )
+            map = Join((map, mapunit))
+        line = " " * 60
+        for key in cutloc[base]:
+            s = ""
+            if key == length:
+                for n in enzymemap[key]:
+                    s = Join((s, " ", n))
+                chunk = line[0 : (length - 1)]
+                lineo = Join((chunk, str(key), s, "\n"))
+                line2 = Join((chunk, a, "\n"))
+                linetot = Join((lineo, line2))
+                map = Join((map, linetot))
+                break
+            for n in enzymemap[key]:
+                s = Join((s, " ", n))
+            k = key % 60
+            lineo = Join((line[0 : (k - 1)], str(key), s, "\n"))
+            line = Join((line[0 : (k - 1)], a, line[k:]))
+            line2 = Join((line[0 : (k - 1)], a, line[k:], "\n"))
+            linetot = Join((lineo, line2))
+            map = Join((map, linetot))
+        mapunit = ""
+        mapunit = Join((sequence[base:length], "\n"))
+        mapunit = Join((mapunit, a * (length - base), "\n"))
+        mapunit = Join((mapunit, revsequence[base:length], "\n"))
+        mapunit = Join(
+            (
+                mapunit,
+                Join(
+                    (
+                        str.ljust(str(base + 1), 15),
+                        " " * (length - base - 30),
+                        str.rjust(str(length), 15),
+                        "\n\n",
+                    )
+                ),
+            )
+        )
+        map = Join((map, mapunit))
+        return map
+
+    # private method to do lists:
+
+    def __next_section(self, ls, into):
+        """Next section (PRIVATE).
+
+        Arguments:
+         - ls is a tuple/list of tuple (string, [int, int]).
+         - into is a string to which the formatted ls will be added.
+
+        Format ls as a string of lines:
+        The form is::
+
+            enzyme1     :   position1.
+            enzyme2     :   position2, position3.
+
+        then add the formatted ls to tot
+        return tot.
+        """
+        indentation = "\n" + (self.NameWidth + self.Indent) * " "
+        linesize = self.linesize - self.MaxSize
+        pat = re.compile(r"([\w,\s()]){1,%i}[,\.]" % linesize)
+        several, Join = "", "".join
+        for name, sites in sorted(ls):
+            stringsite = ""
+            output = Join((", ".join(str(site) for site in sites), "."))
+            if len(output) > linesize:
+                #
+                #   cut where appropriate and add the indentation
+                #
+                output = [x.group() for x in re.finditer(pat, output)]
+                stringsite = indentation.join(output)
+            else:
+                stringsite = output
+            into = Join(
+                (into, str(name).ljust(self.NameWidth), " :  ", stringsite, "\n")
+            )
+        return into
diff --git a/code/lib/Bio/Restriction/Restriction.py b/code/lib/Bio/Restriction/Restriction.py
new file mode 100644
index 0000000..c8e8c50
--- /dev/null
+++ b/code/lib/Bio/Restriction/Restriction.py
@@ -0,0 +1,2694 @@
+#!/usr/bin/env python
+#
+#      Restriction Analysis Libraries.
+#      Copyright (C) 2004. Frederic Sohm.
+#
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
+"""Restriction Enzyme classes.
+
+Notes about the diverses class of the restriction enzyme implementation::
+
+            RestrictionType is the type of all restriction enzymes.
+        -----------------------------------------------------------------------
+            AbstractCut implements some methods that are common to all enzymes.
+        -----------------------------------------------------------------------
+            NoCut, OneCut,TwoCuts   represent the number of double strand cuts
+                                    produced by the enzyme.
+                                    they correspond to the 4th field of the
+                                    rebase record emboss_e.NNN.
+                    0->NoCut    : the enzyme is not characterised.
+                    2->OneCut   : the enzyme produce one double strand cut.
+                    4->TwoCuts  : two double strand cuts.
+        -----------------------------------------------------------------------
+            Meth_Dep, Meth_Undep    represent the methylation susceptibility to
+                                    the enzyme.
+                                    Not implemented yet.
+        -----------------------------------------------------------------------
+            Palindromic,            if the site is palindromic or not.
+            NotPalindromic          allow some optimisations of the code.
+                                    No need to check the reverse strand
+                                    with palindromic sites.
+        -----------------------------------------------------------------------
+            Unknown, Blunt,         represent the overhang.
+            Ov5, Ov3                Unknown is here for symmetry reasons and
+                                    correspond to enzymes that are not
+                                    characterised in rebase.
+        -----------------------------------------------------------------------
+            Defined, Ambiguous,     represent the sequence of the overhang.
+            NotDefined
+                                    NotDefined is for enzymes not characterised
+                                    in rebase.
+
+                                    Defined correspond to enzymes that display
+                                    a constant overhang whatever the sequence.
+                                    ex : EcoRI. G^AATTC -> overhang :AATT
+                                                CTTAA^G
+
+                                    Ambiguous : the overhang varies with the
+                                    sequence restricted.
+                                    Typically enzymes which cut outside their
+                                    restriction site or (but not always)
+                                    inside an ambiguous site.
+                                    ex:
+                                    AcuI CTGAAG(22/20)  -> overhang : NN
+                                    AasI GACNNN^NNNGTC  -> overhang : NN
+                                         CTGN^NNNNNCAG
+
+                note : these 3 classes refers to the overhang not the site.
+                   So the enzyme ApoI (RAATTY) is defined even if its
+                   restriction site is ambiguous.
+
+                        ApoI R^AATTY -> overhang : AATT -> Defined
+                             YTTAA^R
+                   Accordingly, blunt enzymes are always Defined even
+                   when they cut outside their restriction site.
+        -----------------------------------------------------------------------
+            Not_available,          as found in rebase file emboss_r.NNN files.
+            Commercially_available
+                                    allow the selection of the enzymes
+                                    according to their suppliers to reduce the
+                                    quantity of results.
+                                    Also will allow the implementation of
+                                    buffer compatibility tables. Not
+                                    implemented yet.
+
+                                    the list of suppliers is extracted from
+                                    emboss_s.NNN
+        -----------------------------------------------------------------------
+
+"""
+
+
+import warnings
+
+import re
+import itertools
+
+from Bio.Seq import Seq, MutableSeq
+from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
+from Bio.Restriction.Restriction_Dictionary import typedict
+from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
+from Bio.Restriction.PrintFormat import PrintFormat
+from Bio import BiopythonWarning
+
+
+# Used to use Bio.Restriction.DNAUtils.check_bases (and expose it under this
+# namespace), but have deprecated that module.
+
+
+def _check_bases(seq_string):
+    """Check characters in a string (PRIVATE).
+
+    Remove digits and white space present in string. Allows any valid ambiguous
+    IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
+
+    Other characters (e.g. symbols) trigger a TypeError.
+
+    Returns the string WITH A LEADING SPACE (!). This is for backwards
+    compatibility, and may in part be explained by the fact that
+    ``Bio.Restriction`` doesn't use zero based counting.
+    """
+    # Remove white space and make upper case:
+    seq_string = "".join(seq_string.split()).upper()
+    # Remove digits
+    for c in "0123456789":
+        seq_string = seq_string.replace(c, "")
+    # Check only allowed IUPAC letters
+    if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")):
+        raise TypeError("Invalid character found in %r" % seq_string)
+    return " " + seq_string
+
+
+matching = {
+    "A": "ARWMHVDN",
+    "C": "CYSMHBVN",
+    "G": "GRSKBVDN",
+    "T": "TYWKHBDN",
+    "R": "ABDGHKMNSRWV",
+    "Y": "CBDHKMNSTWVY",
+    "W": "ABDHKMNRTWVY",
+    "S": "CBDGHKMNSRVY",
+    "M": "ACBDHMNSRWVY",
+    "K": "BDGHKNSRTWVY",
+    "H": "ACBDHKMNSRTWVY",
+    "B": "CBDGHKMNSRTWVY",
+    "V": "ACBDGHKMNSRWVY",
+    "D": "ABDGHKMNSRTWVY",
+    "N": "ACBDGHKMNSRTWVY",
+}
+
+DNA = Seq
+
+
+class FormattedSeq:
+    """A linear or circular sequence object for restriction analysis.
+
+    Translates a Bio.Seq into a formatted sequence to be used with Restriction.
+
+    Roughly: remove anything which is not IUPAC alphabet and then add a space
+             in front of the sequence to get a biological index instead of a
+             python index (i.e. index of the first base is 1 not 0).
+
+    Retains information about the shape of the molecule linear (default) or
+    circular. Restriction sites are search over the edges of circular sequence.
+    """
+
+    def __init__(self, seq, linear=True):
+        """Initialize ``FormattedSeq`` with sequence and topology (optional).
+
+        ``seq`` is either a ``Bio.Seq``, ``Bio.MutableSeq`` or a
+        ``FormattedSeq``. If ``seq`` is a ``FormattedSeq``, ``linear``
+        will have no effect on the shape of the sequence.
+        """
+        if isinstance(seq, (Seq, MutableSeq)):
+            stringy = str(seq)
+            self.lower = stringy.islower()
+            # Note this adds a leading space to the sequence (!)
+            self.data = _check_bases(stringy)
+            self.linear = linear
+            self.klass = seq.__class__
+        elif isinstance(seq, FormattedSeq):
+            self.lower = seq.lower
+            self.data = seq.data
+            self.linear = seq.linear
+            self.klass = seq.klass
+        else:
+            raise TypeError("expected Seq or MutableSeq, got %s" % type(seq))
+
+    def __len__(self):
+        """Return length of ``FormattedSeq``.
+
+        ``FormattedSeq`` has a leading space, thus subtract 1.
+        """
+        return len(self.data) - 1
+
+    def __repr__(self):
+        """Represent ``FormattedSeq`` class as a string."""
+        return "FormattedSeq(%r, linear=%r)" % (self[1:], self.linear)
+
+    def __eq__(self, other):
+        """Implement equality operator for ``FormattedSeq`` object."""
+        if isinstance(other, FormattedSeq):
+            if repr(self) == repr(other):
+                return True
+            else:
+                return False
+        return False
+
+    def circularise(self):
+        """Circularise sequence in place."""
+        self.linear = False
+
+    def linearise(self):
+        """Linearise sequence in place."""
+        self.linear = True
+
+    def to_linear(self):
+        """Make a new instance of sequence as linear."""
+        new = self.__class__(self)
+        new.linear = True
+        return new
+
+    def to_circular(self):
+        """Make a new instance of sequence as circular."""
+        new = self.__class__(self)
+        new.linear = False
+        return new
+
+    def is_linear(self):
+        """Return if sequence is linear (True) or circular (False)."""
+        return self.linear
+
+    def finditer(self, pattern, size):
+        """Return a list of a given pattern which occurs in the sequence.
+
+        The list is made of tuple (location, pattern.group).
+        The latter is used with non palindromic sites.
+        Pattern is the regular expression pattern corresponding to the
+        enzyme restriction site.
+        Size is the size of the restriction enzyme recognition-site size.
+        """
+        if self.is_linear():
+            data = self.data
+        else:
+            data = self.data + self.data[1:size]
+        return [(i.start(), i.group) for i in re.finditer(pattern, data)]
+
+    def __getitem__(self, i):
+        """Return substring of ``FormattedSeq``.
+
+        The class of the returned object is the class of the respective
+        sequence. Note that due to the leading space, indexing is 1-based:
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.Restriction.Restriction import FormattedSeq
+        >>> f_seq = FormattedSeq(Seq('ATGCATGC'))
+        >>> f_seq[1]
+        Seq('A')
+
+        """
+        if self.lower:
+            return self.klass(self.data[i].lower())
+        return self.klass(self.data[i])
+
+
+class RestrictionType(type):
+    """RestrictionType. Type from which all enzyme classes are derived.
+
+    Implement the operator methods.
+    """
+
+    def __init__(cls, name="", bases=(), dct=None):
+        """Initialize RestrictionType instance.
+
+        Not intended to be used in normal operation. The enzymes are
+        instantiated when importing the module.
+        See below.
+        """
+        if "-" in name:
+            raise ValueError("Problem with hyphen in %r as enzyme name" % name)
+        # 2011/11/26 - Nobody knows what this call was supposed to accomplish,
+        # but all unit tests seem to pass without it.
+        # super().__init__(cls, name, bases, dct)
+        try:
+            cls.compsite = re.compile(cls.compsite)
+        except AttributeError:
+            # Can happen if initialised wrongly.
+            # (This was seen when Sphinx api-doc imports the classes, and
+            # tried to automatically general documentation for them)
+            pass
+        except Exception:
+            raise ValueError(
+                "Problem with regular expression, re.compiled(%r)" % cls.compsite
+            ) from None
+
+    def __add__(cls, other):
+        """Add restriction enzyme to a RestrictionBatch().
+
+        If other is an enzyme returns a batch of the two enzymes.
+        If other is already a RestrictionBatch add enzyme to it.
+        """
+        if isinstance(other, RestrictionType):
+            return RestrictionBatch([cls, other])
+        elif isinstance(other, RestrictionBatch):
+            return other.add_nocheck(cls)
+        else:
+            raise TypeError
+
+    def __truediv__(cls, other):
+        """Override '/' operator to use as search method.
+
+        >>> from Bio.Restriction import EcoRI
+        >>> EcoRI/Seq('GAATTC')
+        [2]
+
+        Returns RE.search(other).
+        """
+        return cls.search(other)
+
+    def __rtruediv__(cls, other):
+        """Override division with reversed operands to use as search method.
+
+        >>> from Bio.Restriction import EcoRI
+        >>> Seq('GAATTC')/EcoRI
+        [2]
+
+        Returns RE.search(other).
+        """
+        return cls.search(other)
+
+    def __floordiv__(cls, other):
+        """Override '//' operator to use as catalyse method.
+
+        >>> from Bio.Restriction import EcoRI
+        >>> EcoRI//Seq('GAATTC')
+        (Seq('G'), Seq('AATTC'))
+
+        Returns RE.catalyse(other).
+        """
+        return cls.catalyse(other)
+
+    def __rfloordiv__(cls, other):
+        """As __floordiv__, with reversed operands.
+
+        >>> from Bio.Restriction import EcoRI
+        >>> Seq('GAATTC')//EcoRI
+        (Seq('G'), Seq('AATTC'))
+
+        Returns RE.catalyse(other).
+        """
+        return cls.catalyse(other)
+
+    def __str__(cls):
+        """Return the name of the enzyme as string."""
+        return cls.__name__
+
+    def __repr__(cls):
+        """Implement repr method.
+
+        Used with eval or exec will instantiate the enzyme.
+        """
+        return "%s" % cls.__name__
+
+    def __len__(cls):
+        """Return length of recognition site of enzyme as int."""
+        try:
+            return cls.size
+        except AttributeError:
+            # Happens if the instance was not initialised as expected.
+            # e.g. if instance created by a documentation framework
+            # like Sphinx trying to inspect the class automatically,
+            # Also seen within IPython.
+            return 0
+
+    def __hash__(cls):
+        """Implement ``hash()`` method for ``RestrictionType``.
+
+        Python default is to use ``id(...)``
+        This is consistent with the ``__eq__`` implementation
+        """
+        return id(cls)
+
+    def __eq__(cls, other):
+        """Override '==' operator.
+
+        True if RE and other are the same enzyme.
+
+        Specifically this checks they are the same Python object.
+        """
+        # assert (id(cls)==id(other)) == (other is cls) == (cls is other)
+        return id(cls) == id(other)
+
+    def __ne__(cls, other):
+        """Override '!=' operator.
+
+        Isoschizomer strict (same recognition site, same restriction) -> False
+        All the other-> True
+
+        WARNING - This is not the inverse of the __eq__ method
+
+        >>> from Bio.Restriction import SacI, SstI
+        >>> SacI != SstI  # true isoschizomers
+        False
+        >>> SacI == SstI
+        False
+        """
+        if not isinstance(other, RestrictionType):
+            return True
+        elif cls.charac == other.charac:
+            return False
+        else:
+            return True
+
+    def __rshift__(cls, other):
+        """Override '>>' operator to test for neoschizomers.
+
+        neoschizomer : same recognition site, different restriction. -> True
+        all the others :                                             -> False
+
+        >>> from Bio.Restriction import SmaI, XmaI
+        >>> SmaI >> XmaI
+        True
+        """
+        if not isinstance(other, RestrictionType):
+            return False
+        elif cls.site == other.site and cls.charac != other.charac:
+            return True
+        else:
+            return False
+
+    def __mod__(cls, other):
+        """Override '%' operator to test for compatible overhangs.
+
+        True if a and b have compatible overhang.
+
+        >>> from Bio.Restriction import XhoI, SalI
+        >>> XhoI % SalI
+        True
+        """
+        if not isinstance(other, RestrictionType):
+            raise TypeError("expected RestrictionType, got %s instead" % type(other))
+        return cls._mod1(other)
+
+    def __ge__(cls, other):
+        """Compare length of recognition site of two enzymes.
+
+        Override '>='. a is greater or equal than b if the a site is longer
+        than b site. If their site have the same length sort by alphabetical
+        order of their names.
+
+        >>> from Bio.Restriction import EcoRI, EcoRV
+        >>> EcoRI.size
+        6
+        >>> EcoRV.size
+        6
+        >>> EcoRI >= EcoRV
+        False
+        """
+        if not isinstance(other, RestrictionType):
+            raise NotImplementedError
+        if len(cls) > len(other):
+            return True
+        elif cls.size == len(other) and cls.__name__ >= other.__name__:
+            return True
+        else:
+            return False
+
+    def __gt__(cls, other):
+        """Compare length of recognition site of two enzymes.
+
+        Override '>'. Sorting order:
+
+        1. size of the recognition site.
+        2. if equal size, alphabetical order of the names.
+
+        """
+        if not isinstance(other, RestrictionType):
+            raise NotImplementedError
+        if len(cls) > len(other):
+            return True
+        elif cls.size == len(other) and cls.__name__ > other.__name__:
+            return True
+        else:
+            return False
+
+    def __le__(cls, other):
+        """Compare length of recognition site of two enzymes.
+
+        Override '<='. Sorting order:
+
+        1. size of the recognition site.
+        2. if equal size, alphabetical order of the names.
+
+        """
+        if not isinstance(other, RestrictionType):
+            raise NotImplementedError
+        elif len(cls) < len(other):
+            return True
+        elif len(cls) == len(other) and cls.__name__ <= other.__name__:
+            return True
+        else:
+            return False
+
+    def __lt__(cls, other):
+        """Compare length of recognition site of two enzymes.
+
+        Override '<'. Sorting order:
+
+        1. size of the recognition site.
+        2. if equal size, alphabetical order of the names.
+
+        """
+        if not isinstance(other, RestrictionType):
+            raise NotImplementedError
+        elif len(cls) < len(other):
+            return True
+        elif len(cls) == len(other) and cls.__name__ < other.__name__:
+            return True
+        else:
+            return False
+
+
+class AbstractCut(RestrictionType):
+    """Implement the methods that are common to all restriction enzymes.
+
+    All the methods are classmethod.
+
+    For internal use only. Not meant to be instantiated.
+    """
+
+    @classmethod
+    def search(cls, dna, linear=True):
+        """Return a list of cutting sites of the enzyme in the sequence.
+
+        Compensate for circular sequences and so on.
+
+        dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
+
+        If linear is False, the restriction sites that span over the boundaries
+        will be included.
+
+        The positions are the first base of the 3' fragment,
+        i.e. the first base after the position the enzyme will cut.
+        """
+        #
+        #   Separating search from _search allow a (very limited) optimisation
+        #   of the search when using a batch of restriction enzymes.
+        #   in this case the DNA is tested once by the class which implements
+        #   the batch instead of being tested by each enzyme single.
+        #   see RestrictionBatch.search() for example.
+        #
+        if isinstance(dna, FormattedSeq):
+            cls.dna = dna
+            return cls._search()
+        else:
+            cls.dna = FormattedSeq(dna, linear)
+            return cls._search()
+
+    @classmethod
+    def all_suppliers(cls):
+        """Print all the suppliers of restriction enzyme."""
+        supply = sorted(x[0] for x in suppliers_dict.values())
+        print(",\n".join(supply))
+
+    @classmethod
+    def is_equischizomer(cls, other):
+        """Test for real isoschizomer.
+
+        True if other is an isoschizomer of RE, but not an neoschizomer,
+        else False.
+
+        Equischizomer: same site, same position of restriction.
+
+        >>> from Bio.Restriction import SacI, SstI, SmaI, XmaI
+        >>> SacI.is_equischizomer(SstI)
+        True
+        >>> SmaI.is_equischizomer(XmaI)
+        False
+
+        """
+        return not cls != other
+
+    @classmethod
+    def is_neoschizomer(cls, other):
+        """Test for neoschizomer.
+
+        True if other is an isoschizomer of RE, else False.
+        Neoschizomer: same site, different position of restriction.
+        """
+        return cls >> other
+
+    @classmethod
+    def is_isoschizomer(cls, other):
+        """Test for same recognition site.
+
+        True if other has the same recognition site, else False.
+
+        Isoschizomer: same site.
+
+        >>> from Bio.Restriction import SacI, SstI, SmaI, XmaI
+        >>> SacI.is_isoschizomer(SstI)
+        True
+        >>> SmaI.is_isoschizomer(XmaI)
+        True
+
+        """
+        return (not cls != other) or cls >> other
+
+    @classmethod
+    def equischizomers(cls, batch=None):
+        """List equischizomers of the enzyme.
+
+        Return a tuple of all the isoschizomers of RE.
+        If batch is supplied it is used instead of the default AllEnzymes.
+
+        Equischizomer: same site, same position of restriction.
+        """
+        if not batch:
+            batch = AllEnzymes
+        r = [x for x in batch if not cls != x]
+        i = r.index(cls)
+        del r[i]
+        r.sort()
+        return r
+
+    @classmethod
+    def neoschizomers(cls, batch=None):
+        """List neoschizomers of the enzyme.
+
+        Return a tuple of all the neoschizomers of RE.
+        If batch is supplied it is used instead of the default AllEnzymes.
+
+        Neoschizomer: same site, different position of restriction.
+        """
+        if not batch:
+            batch = AllEnzymes
+        r = sorted(x for x in batch if cls >> x)
+        return r
+
+    @classmethod
+    def isoschizomers(cls, batch=None):
+        """List all isoschizomers of the enzyme.
+
+        Return a tuple of all the equischizomers and neoschizomers of RE.
+        If batch is supplied it is used instead of the default AllEnzymes.
+        """
+        if not batch:
+            batch = AllEnzymes
+        r = [x for x in batch if (cls >> x) or (not cls != x)]
+        i = r.index(cls)
+        del r[i]
+        r.sort()
+        return r
+
+    @classmethod
+    def frequency(cls):
+        """Return the theoretically cutting frequency of the enzyme.
+
+        Frequency of the site, given as 'one cut per x bases' (int).
+        """
+        return cls.freq
+
+
+class NoCut(AbstractCut):
+    """Implement the methods specific to the enzymes that do not cut.
+
+    These enzymes are generally enzymes that have been only partially
+    characterised and the way they cut the DNA is unknow or enzymes for
+    which the pattern of cut is to complex to be recorded in Rebase
+    (ncuts values of 0 in emboss_e.###).
+
+    When using search() with these enzymes the values returned are at the start
+    of the restriction site.
+
+    Their catalyse() method returns a TypeError.
+
+    Unknown and NotDefined are also part of the base classes of these enzymes.
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    @classmethod
+    def cut_once(cls):
+        """Return if the cutting pattern has one cut.
+
+        True if the enzyme cut the sequence one time on each strand.
+        """
+        return False
+
+    @classmethod
+    def cut_twice(cls):
+        """Return if the cutting pattern has two cuts.
+
+        True if the enzyme cut the sequence twice on each strand.
+        """
+        return False
+
+    @classmethod
+    def _modify(cls, location):
+        """Return a generator that moves the cutting position by 1 (PRIVATE).
+
+        For internal use only.
+
+        location is an integer corresponding to the location of the match for
+        the enzyme pattern in the sequence.
+        _modify returns the real place where the enzyme will cut.
+
+        Example::
+
+            EcoRI pattern : GAATTC
+            EcoRI will cut after the G.
+            so in the sequence:
+                     ______
+            GAATACACGGAATTCGA
+                     |
+                     10
+            dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
+            EcoRI cut after the G so:
+            EcoRI._modify(10) -> 11.
+
+        If the enzyme cut twice _modify will returns two integer corresponding
+        to each cutting site.
+        """
+        yield location
+
+    @classmethod
+    def _rev_modify(cls, location):
+        """Return a generator that moves the cutting position by 1 (PRIVATE).
+
+        For internal use only.
+
+        As _modify for site situated on the antiparallel strand when the
+        enzyme is not palindromic.
+        """
+        yield location
+
+    @classmethod
+    def characteristic(cls):
+        """Return a list of the enzyme's characteristics as tuple.
+
+        the tuple contains the attributes:
+
+        - fst5 -> first 5' cut ((current strand) or None
+        - fst3 -> first 3' cut (complementary strand) or None
+        - scd5 -> second 5' cut (current strand) or None
+        - scd5 -> second 3' cut (complementary strand) or None
+        - site -> recognition site.
+
+        """
+        return None, None, None, None, cls.site
+
+
+class OneCut(AbstractCut):
+    """Implement the methods for enzymes that cut the DNA only once.
+
+    Correspond to ncuts values of 2 in emboss_e.###
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    @classmethod
+    def cut_once(cls):
+        """Return if the cutting pattern has one cut.
+
+        True if the enzyme cut the sequence one time on each strand.
+        """
+        return True
+
+    @classmethod
+    def cut_twice(cls):
+        """Return if the cutting pattern has two cuts.
+
+        True if the enzyme cut the sequence twice on each strand.
+        """
+        return False
+
+    @classmethod
+    def _modify(cls, location):
+        """Return a generator that moves the cutting position by 1 (PRIVATE).
+
+        For internal use only.
+
+        location is an integer corresponding to the location of the match for
+        the enzyme pattern in the sequence.
+        _modify returns the real place where the enzyme will cut.
+
+        Example::
+
+            EcoRI pattern : GAATTC
+            EcoRI will cut after the G.
+            so in the sequence:
+                     ______
+            GAATACACGGAATTCGA
+                     |
+                     10
+            dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
+            EcoRI cut after the G so:
+            EcoRI._modify(10) -> 11.
+
+        if the enzyme cut twice _modify will returns two integer corresponding
+        to each cutting site.
+        """
+        yield location + cls.fst5
+
+    @classmethod
+    def _rev_modify(cls, location):
+        """Return a generator that moves the cutting position by 1 (PRIVATE).
+
+        For internal use only.
+
+        As _modify for site situated on the antiparallel strand when the
+        enzyme is not palindromic
+        """
+        yield location - cls.fst3
+
+    @classmethod
+    def characteristic(cls):
+        """Return a list of the enzyme's characteristics as tuple.
+
+        The tuple contains the attributes:
+
+        - fst5 -> first 5' cut ((current strand) or None
+        - fst3 -> first 3' cut (complementary strand) or None
+        - scd5 -> second 5' cut (current strand) or None
+        - scd5 -> second 3' cut (complementary strand) or None
+        - site -> recognition site.
+
+        """
+        return cls.fst5, cls.fst3, None, None, cls.site
+
+
+class TwoCuts(AbstractCut):
+    """Implement the methods for enzymes that cut the DNA twice.
+
+    Correspond to ncuts values of 4 in emboss_e.###
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    @classmethod
+    def cut_once(cls):
+        """Return if the cutting pattern has one cut.
+
+        True if the enzyme cut the sequence one time on each strand.
+        """
+        return False
+
+    @classmethod
+    def cut_twice(cls):
+        """Return if the cutting pattern has two cuts.
+
+        True if the enzyme cut the sequence twice on each strand.
+        """
+        return True
+
+    @classmethod
+    def _modify(cls, location):
+        """Return a generator that moves the cutting position by 1 (PRIVATE).
+
+        For internal use only.
+
+        location is an integer corresponding to the location of the match for
+        the enzyme pattern in the sequence.
+        _modify returns the real place where the enzyme will cut.
+
+        example::
+
+            EcoRI pattern : GAATTC
+            EcoRI will cut after the G.
+            so in the sequence:
+                     ______
+            GAATACACGGAATTCGA
+                     |
+                     10
+            dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
+            EcoRI cut after the G so:
+            EcoRI._modify(10) -> 11.
+
+        if the enzyme cut twice _modify will returns two integer corresponding
+        to each cutting site.
+        """
+        yield location + cls.fst5
+        yield location + cls.scd5
+
+    @classmethod
+    def _rev_modify(cls, location):
+        """Return a generator that moves the cutting position by 1 (PRIVATE).
+
+        for internal use only.
+
+        as _modify for site situated on the antiparallel strand when the
+        enzyme is not palindromic
+        """
+        yield location - cls.fst3
+        yield location - cls.scd3
+
+    @classmethod
+    def characteristic(cls):
+        """Return a list of the enzyme's characteristics as tuple.
+
+        the tuple contains the attributes:
+
+        - fst5 -> first 5' cut ((current strand) or None
+        - fst3 -> first 3' cut (complementary strand) or None
+        - scd5 -> second 5' cut (current strand) or None
+        - scd5 -> second 3' cut (complementary strand) or None
+        - site -> recognition site.
+
+        """
+        return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site
+
+
+class Meth_Dep(AbstractCut):
+    """Implement the information about methylation.
+
+    Enzymes of this class possess a site which is methylable.
+    """
+
+    @classmethod
+    def is_methylable(cls):
+        """Return if recognition site can be methylated.
+
+        True if the recognition site is a methylable.
+        """
+        return True
+
+
+class Meth_Undep(AbstractCut):
+    """Implement information about methylation sensitibility.
+
+    Enzymes of this class are not sensible to methylation.
+    """
+
+    @classmethod
+    def is_methylable(cls):
+        """Return if recognition site can be methylated.
+
+        True if the recognition site is a methylable.
+        """
+        return False
+
+
+class Palindromic(AbstractCut):
+    """Implement methods for enzymes with palindromic recognition sites.
+
+    palindromic means : the recognition site and its reverse complement are
+                        identical.
+    Remarks     : an enzyme with a site CGNNCG is palindromic even if some
+                  of the sites that it will recognise are not.
+                  for example here : CGAACG
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    @classmethod
+    def _search(cls):
+        """Return a list of cutting sites of the enzyme in the sequence (PRIVATE).
+
+        For internal use only.
+
+        Implement the search method for palindromic enzymes.
+        """
+        siteloc = cls.dna.finditer(cls.compsite, cls.size)
+        cls.results = [r for s, g in siteloc for r in cls._modify(s)]
+        if cls.results:
+            cls._drop()
+        return cls.results
+
+    @classmethod
+    def is_palindromic(cls):
+        """Return if the enzyme has a palindromic recoginition site."""
+        return True
+
+
+class NonPalindromic(AbstractCut):
+    """Implement methods for enzymes with non-palindromic recognition sites.
+
+    Palindromic means : the recognition site and its reverse complement are
+                        identical.
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    @classmethod
+    def _search(cls):
+        """Return a list of cutting sites of the enzyme in the sequence (PRIVATE).
+
+        For internal use only.
+
+        Implement the search method for non palindromic enzymes.
+        """
+        iterator = cls.dna.finditer(cls.compsite, cls.size)
+        cls.results = []
+        modif = cls._modify
+        revmodif = cls._rev_modify
+        s = str(cls)
+        cls.on_minus = []
+
+        for start, group in iterator:
+            if group(s):
+                cls.results += list(modif(start))
+            else:
+                cls.on_minus += list(revmodif(start))
+        cls.results += cls.on_minus
+
+        if cls.results:
+            cls.results.sort()
+            cls._drop()
+        return cls.results
+
+    @classmethod
+    def is_palindromic(cls):
+        """Return if the enzyme has a palindromic recoginition site."""
+        return False
+
+
+class Unknown(AbstractCut):
+    """Implement methods for enzymes that produce unknown overhangs.
+
+    These enzymes are also NotDefined and NoCut.
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    @classmethod
+    def catalyse(cls, dna, linear=True):
+        """List the sequence fragments after cutting dna with enzyme.
+
+        Return a tuple of dna as will be produced by using RE to restrict the
+        dna.
+
+        dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
+
+        If linear is False, the sequence is considered to be circular and the
+        output will be modified accordingly.
+        """
+        raise NotImplementedError("%s restriction is unknown." % cls.__name__)
+
+    catalyze = catalyse
+
+    @classmethod
+    def is_blunt(cls):
+        """Return if the enzyme produces blunt ends.
+
+        True if the enzyme produces blunt end.
+
+        Related methods:
+
+        - RE.is_3overhang()
+        - RE.is_5overhang()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def is_5overhang(cls):
+        """Return if the enzymes produces 5' overhanging ends.
+
+        True if the enzyme produces 5' overhang sticky end.
+
+        Related methods:
+
+        - RE.is_3overhang()
+        - RE.is_blunt()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def is_3overhang(cls):
+        """Return if the enzyme produces 3' overhanging ends.
+
+        True if the enzyme produces 3' overhang sticky end.
+
+        Related methods:
+
+        - RE.is_5overhang()
+        - RE.is_blunt()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def overhang(cls):
+        """Return the type of the enzyme's overhang as string.
+
+        Can be "3' overhang", "5' overhang", "blunt", "unknown".
+        """
+        return "unknown"
+
+    @classmethod
+    def compatible_end(cls):
+        """List all enzymes that produce compatible ends for the enzyme."""
+        return []
+
+    @classmethod
+    def _mod1(cls, other):
+        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
+
+        For internal use only.
+
+        Test for the compatibility of restriction ending of RE and other.
+        """
+        return False
+
+
+class Blunt(AbstractCut):
+    """Implement methods for enzymes that produce blunt ends.
+
+    The enzyme cuts the + strand and the - strand of the DNA at the same
+    place.
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    @classmethod
+    def catalyse(cls, dna, linear=True):
+        """List the sequence fragments after cutting dna with enzyme.
+
+        Return a tuple of dna as will be produced by using RE to restrict the
+        dna.
+
+        dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
+
+        If linear is False, the sequence is considered to be circular and the
+        output will be modified accordingly.
+        """
+        r = cls.search(dna, linear)
+        d = cls.dna
+        if not r:
+            return (d[1:],)
+        fragments = []
+        length = len(r) - 1
+        if d.is_linear():
+            #
+            #   START of the sequence to FIRST site.
+            #
+            fragments.append(d[1 : r[0]])
+            if length:
+                #
+                #   if more than one site add them.
+                #
+                fragments += [d[r[x] : r[x + 1]] for x in range(length)]
+            #
+            #   LAST site to END of the sequence.
+            #
+            fragments.append(d[r[-1] :])
+        else:
+            #
+            #   circular : bridge LAST site to FIRST site.
+            #
+            fragments.append(d[r[-1] :] + d[1 : r[0]])
+            if not length:
+                #
+                #   one site we finish here.
+                #
+                return tuple(fragments)
+            #
+            #   add the others.
+            #
+            fragments += [d[r[x] : r[x + 1]] for x in range(length)]
+        return tuple(fragments)
+
+    catalyze = catalyse
+
+    @classmethod
+    def is_blunt(cls):
+        """Return if the enzyme produces blunt ends.
+
+        True if the enzyme produces blunt end.
+
+        Related methods:
+
+        - RE.is_3overhang()
+        - RE.is_5overhang()
+        - RE.is_unknown()
+
+        """
+        return True
+
+    @classmethod
+    def is_5overhang(cls):
+        """Return if the enzymes produces 5' overhanging ends.
+
+        True if the enzyme produces 5' overhang sticky end.
+
+        Related methods:
+
+        - RE.is_3overhang()
+        - RE.is_blunt()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def is_3overhang(cls):
+        """Return if the enzyme produces 3' overhanging ends.
+
+        True if the enzyme produces 3' overhang sticky end.
+
+        Related methods:
+
+        - RE.is_5overhang()
+        - RE.is_blunt()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def overhang(cls):
+        """Return the type of the enzyme's overhang as string.
+
+        Can be "3' overhang", "5' overhang", "blunt", "unknown".
+        """
+        return "blunt"
+
+    @classmethod
+    def compatible_end(cls, batch=None):
+        """List all enzymes that produce compatible ends for the enzyme."""
+        if not batch:
+            batch = AllEnzymes
+        r = sorted(x for x in iter(AllEnzymes) if x.is_blunt())
+        return r
+
+    @staticmethod
+    def _mod1(other):
+        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
+
+        For internal use only
+
+        Test for the compatibility of restriction ending of RE and other.
+        """
+        return issubclass(other, Blunt)
+
+
+class Ov5(AbstractCut):
+    """Implement methods for enzymes that produce 5' overhanging ends.
+
+    The enzyme cuts the + strand after the - strand of the DNA.
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    @classmethod
+    def catalyse(cls, dna, linear=True):
+        """List the sequence fragments after cutting dna with enzyme.
+
+        Return a tuple of dna as will be produced by using RE to restrict the
+        dna.
+
+        dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
+
+        If linear is False, the sequence is considered to be circular and the
+        output will be modified accordingly.
+        """
+        r = cls.search(dna, linear)
+        d = cls.dna
+        if not r:
+            return (d[1:],)
+        length = len(r) - 1
+        fragments = []
+        if d.is_linear():
+            #
+            #   START of the sequence to FIRST site.
+            #
+            fragments.append(d[1 : r[0]])
+            if length:
+                #
+                #   if more than one site add them.
+                #
+                fragments += [d[r[x] : r[x + 1]] for x in range(length)]
+            #
+            #   LAST site to END of the sequence.
+            #
+            fragments.append(d[r[-1] :])
+        else:
+            #
+            #   circular : bridge LAST site to FIRST site.
+            #
+            fragments.append(d[r[-1] :] + d[1 : r[0]])
+            if not length:
+                #
+                #   one site we finish here.
+                #
+                return tuple(fragments)
+            #
+            #   add the others.
+            #
+            fragments += [d[r[x] : r[x + 1]] for x in range(length)]
+        return tuple(fragments)
+
+    catalyze = catalyse
+
+    @classmethod
+    def is_blunt(cls):
+        """Return if the enzyme produces blunt ends.
+
+        True if the enzyme produces blunt end.
+
+        Related methods:
+
+        - RE.is_3overhang()
+        - RE.is_5overhang()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def is_5overhang(cls):
+        """Return if the enzymes produces 5' overhanging ends.
+
+        True if the enzyme produces 5' overhang sticky end.
+
+        Related methods:
+
+        - RE.is_3overhang()
+        - RE.is_blunt()
+        - RE.is_unknown()
+
+        """
+        return True
+
+    @classmethod
+    def is_3overhang(cls):
+        """Return if the enzyme produces 3' overhanging ends.
+
+        True if the enzyme produces 3' overhang sticky end.
+
+        Related methods:
+
+        - RE.is_5overhang()
+        - RE.is_blunt()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def overhang(cls):
+        """Return the type of the enzyme's overhang as string.
+
+        Can be "3' overhang", "5' overhang", "blunt", "unknown".
+        """
+        return "5' overhang"
+
+    @classmethod
+    def compatible_end(cls, batch=None):
+        """List all enzymes that produce compatible ends for the enzyme."""
+        if not batch:
+            batch = AllEnzymes
+        r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and x % cls)
+        return r
+
+    @classmethod
+    def _mod1(cls, other):
+        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
+
+        For internal use only.
+
+        Test for the compatibility of restriction ending of RE and other.
+        """
+        if issubclass(other, Ov5):
+            return cls._mod2(other)
+        else:
+            return False
+
+
+class Ov3(AbstractCut):
+    """Implement methods for enzymes that produce 3' overhanging ends.
+
+    The enzyme cuts the - strand after the + strand of the DNA.
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    @classmethod
+    def catalyse(cls, dna, linear=True):
+        """List the sequence fragments after cutting dna with enzyme.
+
+        Return a tuple of dna as will be produced by using RE to restrict the
+        dna.
+
+        dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
+
+        If linear is False, the sequence is considered to be circular and the
+        output will be modified accordingly.
+        """
+        r = cls.search(dna, linear)
+        d = cls.dna
+        if not r:
+            return (d[1:],)
+        fragments = []
+        length = len(r) - 1
+        if d.is_linear():
+            #
+            #   START of the sequence to FIRST site.
+            #
+            fragments.append(d[1 : r[0]])
+            if length:
+                #
+                #   if more than one site add them.
+                #
+                fragments += [d[r[x] : r[x + 1]] for x in range(length)]
+            #
+            #   LAST site to END of the sequence.
+            #
+            fragments.append(d[r[-1] :])
+        else:
+            #
+            #   circular : bridge LAST site to FIRST site.
+            #
+            fragments.append(d[r[-1] :] + d[1 : r[0]])
+            if not length:
+                #
+                #   one site we finish here.
+                #
+                return tuple(fragments)
+            #
+            #   add the others.
+            #
+            fragments += [d[r[x] : r[x + 1]] for x in range(length)]
+        return tuple(fragments)
+
+    catalyze = catalyse
+
+    @classmethod
+    def is_blunt(cls):
+        """Return if the enzyme produces blunt ends.
+
+        True if the enzyme produces blunt end.
+
+        Related methods:
+
+        - RE.is_3overhang()
+        - RE.is_5overhang()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def is_5overhang(cls):
+        """Return if the enzymes produces 5' overhanging ends.
+
+        True if the enzyme produces 5' overhang sticky end.
+
+        Related methods:
+
+        - RE.is_3overhang()
+        - RE.is_blunt()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def is_3overhang(cls):
+        """Return if the enzyme produces 3' overhanging ends.
+
+        True if the enzyme produces 3' overhang sticky end.
+
+        Related methods:
+
+        - RE.is_5overhang()
+        - RE.is_blunt()
+        - RE.is_unknown()
+
+        """
+        return True
+
+    @classmethod
+    def overhang(cls):
+        """Return the type of the enzyme's overhang as string.
+
+        Can be "3' overhang", "5' overhang", "blunt", "unknown".
+        """
+        return "3' overhang"
+
+    @classmethod
+    def compatible_end(cls, batch=None):
+        """List all enzymes that produce compatible ends for the enzyme."""
+        if not batch:
+            batch = AllEnzymes
+        r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and x % cls)
+        return r
+
+    @classmethod
+    def _mod1(cls, other):
+        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
+
+        For internal use only.
+
+        Test for the compatibility of restriction ending of RE and other.
+        """
+        #
+        #   called by RE._mod1(other) when the one of the enzyme is ambiguous
+        #
+        if issubclass(other, Ov3):
+            return cls._mod2(other)
+        else:
+            return False
+
+
+class Defined(AbstractCut):
+    """Implement methods for enzymes with defined recognition site and cut.
+
+    Typical example : EcoRI -> G^AATT_C
+                      The overhang will always be AATT
+    Notes:
+        Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N
+        Their overhang is always the same : blunt!
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    @classmethod
+    def _drop(cls):
+        """Remove cuts that are outsite of the sequence (PRIVATE).
+
+        For internal use only.
+
+        Drop the site that are situated outside the sequence in linear
+        sequence. Modify the index for site in circular sequences.
+        """
+        #
+        #   remove or modify the results that are outside the sequence.
+        #   This is necessary since after finding the site we add the distance
+        #   from the site to the cut with the _modify and _rev_modify methods.
+        #   For linear we will remove these sites altogether.
+        #   For circular sequence, we modify the result rather than _drop it
+        #   since the site is in the sequence.
+        #
+        length = len(cls.dna)
+        drop = itertools.dropwhile
+        take = itertools.takewhile
+        if cls.dna.is_linear():
+            cls.results = list(drop(lambda x: x <= 1, cls.results))
+            cls.results = list(take(lambda x: x <= length, cls.results))
+        else:
+            for index, location in enumerate(cls.results):
+                if location < 1:
+                    cls.results[index] += length
+                else:
+                    break
+            for index, location in enumerate(cls.results[::-1]):
+                if location > length:
+                    cls.results[-(index + 1)] -= length
+                else:
+                    break
+
+    @classmethod
+    def is_defined(cls):
+        """Return if recognition sequence and cut are defined.
+
+        True if the sequence recognised and cut is constant,
+        i.e. the recognition site is not degenerated AND the enzyme cut inside
+        the site.
+
+        Related methods:
+
+        - RE.is_ambiguous()
+        - RE.is_unknown()
+
+        """
+        return True
+
+    @classmethod
+    def is_ambiguous(cls):
+        """Return if recognition sequence and cut may be ambiguous.
+
+        True if the sequence recognised and cut is ambiguous,
+        i.e. the recognition site is degenerated AND/OR the enzyme cut outside
+        the site.
+
+        Related methods:
+
+        - RE.is_defined()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def is_unknown(cls):
+        """Return if recognition sequence is unknown.
+
+        True if the sequence is unknown,
+        i.e. the recognition site has not been characterised yet.
+
+        Related methods:
+
+        - RE.is_defined()
+        - RE.is_ambiguous()
+
+        """
+        return False
+
+    @classmethod
+    def elucidate(cls):
+        """Return a string representing the recognition site and cuttings.
+
+        Return a representation of the site with the cut on the (+) strand
+        represented as '^' and the cut on the (-) strand as '_'.
+        ie:
+
+        >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI
+        >>> EcoRI.elucidate()   # 5' overhang
+        'G^AATT_C'
+        >>> KpnI.elucidate()    # 3' overhang
+        'G_GTAC^C'
+        >>> EcoRV.elucidate()   # blunt
+        'GAT^_ATC'
+        >>> SnaI.elucidate()    # NotDefined, cut profile unknown.
+        '? GTATAC ?'
+        >>>
+
+        """
+        f5 = cls.fst5
+        f3 = cls.fst3
+        site = cls.site
+        if cls.cut_twice():
+            re = "cut twice, not yet implemented sorry."
+        elif cls.is_5overhang():
+            if f5 == f3 == 0:
+                re = "N^" + cls.site + "_N"
+            elif f3 == 0:
+                re = site[:f5] + "^" + site[f5:] + "_N"
+            else:
+                re = site[:f5] + "^" + site[f5:f3] + "_" + site[f3:]
+        elif cls.is_blunt():
+            re = site[:f5] + "^_" + site[f5:]
+        else:
+            if f5 == f3 == 0:
+                re = "N_" + site + "^N"
+            else:
+                re = site[:f3] + "_" + site[f3:f5] + "^" + site[f5:]
+        return re
+
+    @classmethod
+    def _mod2(cls, other):
+        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
+
+        For internal use only.
+
+        Test for the compatibility of restriction ending of RE and other.
+        """
+        #
+        #   called by RE._mod1(other) when the one of the enzyme is ambiguous
+        #
+        if other.ovhgseq == cls.ovhgseq:
+            return True
+        elif issubclass(other, Ambiguous):
+            return other._mod2(cls)
+        else:
+            return False
+
+
+class Ambiguous(AbstractCut):
+    """Implement methods for enzymes that produce variable overhangs.
+
+    Typical example : BstXI -> CCAN_NNNN^NTGG
+                      The overhang can be any sequence of 4 bases.
+
+    Notes:
+        Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N
+        Their overhang is always the same : blunt!
+
+    Internal use only. Not meant to be instantiated.
+
+    """
+
+    @classmethod
+    def _drop(cls):
+        """Remove cuts that are outsite of the sequence (PRIVATE).
+
+        For internal use only.
+
+        Drop the site that are situated outside the sequence in linear
+        sequence. Modify the index for site in circular sequences.
+        """
+        length = len(cls.dna)
+        drop = itertools.dropwhile
+        take = itertools.takewhile
+        if cls.dna.is_linear():
+            cls.results = list(drop(lambda x: x <= 1, cls.results))
+            cls.results = list(take(lambda x: x <= length, cls.results))
+        else:
+            for index, location in enumerate(cls.results):
+                if location < 1:
+                    cls.results[index] += length
+                else:
+                    break
+            for index, location in enumerate(cls.results[::-1]):
+                if location > length:
+                    cls.results[-(index + 1)] -= length
+                else:
+                    break
+
+    @classmethod
+    def is_defined(cls):
+        """Return if recognition sequence and cut are defined.
+
+        True if the sequence recognised and cut is constant,
+        i.e. the recognition site is not degenerated AND the enzyme cut inside
+        the site.
+
+        Related methods:
+
+        - RE.is_ambiguous()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def is_ambiguous(cls):
+        """Return if recognition sequence and cut may be ambiguous.
+
+        True if the sequence recognised and cut is ambiguous,
+        i.e. the recognition site is degenerated AND/OR the enzyme cut outside
+        the site.
+
+        Related methods:
+
+        - RE.is_defined()
+        - RE.is_unknown()
+
+        """
+        return True
+
+    @classmethod
+    def is_unknown(cls):
+        """Return if recognition sequence is unknown.
+
+        True if the sequence is unknown,
+        i.e. the recognition site has not been characterised yet.
+
+        Related methods:
+
+        - RE.is_defined()
+        - RE.is_ambiguous()
+
+        """
+        return False
+
+    @classmethod
+    def _mod2(cls, other):
+        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
+
+        For internal use only.
+
+        Test for the compatibility of restriction ending of RE and other.
+        """
+        #
+        #   called by RE._mod1(other) when the one of the enzyme is ambiguous
+        #
+        if len(cls.ovhgseq) != len(other.ovhgseq):
+            return False
+        else:
+            se = cls.ovhgseq
+            for base in se:
+                if base in "ATCG":
+                    pass
+                if base in "N":
+                    se = ".".join(se.split("N"))
+                if base in "RYWMSKHDBV":
+                    expand = "[" + matching[base] + "]"
+                    se = expand.join(se.split(base))
+            if re.match(se, other.ovhgseq):
+                return True
+            else:
+                return False
+
+    @classmethod
+    def elucidate(cls):
+        """Return a string representing the recognition site and cuttings.
+
+        Return a representation of the site with the cut on the (+) strand
+        represented as '^' and the cut on the (-) strand as '_'.
+        ie:
+
+        >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI
+        >>> EcoRI.elucidate()   # 5' overhang
+        'G^AATT_C'
+        >>> KpnI.elucidate()    # 3' overhang
+        'G_GTAC^C'
+        >>> EcoRV.elucidate()   # blunt
+        'GAT^_ATC'
+        >>> SnaI.elucidate()     # NotDefined, cut profile unknown.
+        '? GTATAC ?'
+        >>>
+
+        """
+        f5 = cls.fst5
+        f3 = cls.fst3
+        length = len(cls)
+        site = cls.site
+        if cls.cut_twice():
+            re = "cut twice, not yet implemented sorry."
+        elif cls.is_5overhang():
+            if f3 == f5 == 0:
+                re = "N^" + site + "_N"
+            elif 0 <= f5 <= length and 0 <= f3 + length <= length:
+                re = site[:f5] + "^" + site[f5:f3] + "_" + site[f3:]
+            elif 0 <= f5 <= length:
+                re = site[:f5] + "^" + site[f5:] + f3 * "N" + "_N"
+            elif 0 <= f3 + length <= length:
+                re = "N^" + abs(f5) * "N" + site[:f3] + "_" + site[f3:]
+            elif f3 + length < 0:
+                re = "N^" * abs(f5) * "N" + "_" + abs(length + f3) * "N" + site
+            elif f5 > length:
+                re = site + (f5 - length) * "N" + "^" + (length + f3 - f5) * "N" + "_N"
+            else:
+                re = "N^" + abs(f5) * "N" + site + f3 * "N" + "_N"
+        elif cls.is_blunt():
+            if f5 < 0:
+                re = "N^_" + abs(f5) * "N" + site
+            elif f5 > length:
+                re = site + (f5 - length) * "N" + "^_N"
+            else:
+                raise ValueError("%s.easyrepr() : error f5=%i" % (cls.name, f5))
+        else:
+            if f3 == 0:
+                if f5 == 0:
+                    re = "N_" + site + "^N"
+                else:
+                    re = site + "_" + (f5 - length) * "N" + "^N"
+            elif 0 < f3 + length <= length and 0 <= f5 <= length:
+                re = site[:f3] + "_" + site[f3:f5] + "^" + site[f5:]
+            elif 0 < f3 + length <= length:
+                re = site[:f3] + "_" + site[f3:] + (f5 - length) * "N" + "^N"
+            elif 0 <= f5 <= length:
+                re = "N_" + "N" * (f3 + length) + site[:f5] + "^" + site[f5:]
+            elif f3 > 0:
+                re = site + f3 * "N" + "_" + (f5 - f3 - length) * "N" + "^N"
+            elif f5 < 0:
+                re = "N_" + abs(f3 - f5 + length) * "N" + "^" + abs(f5) * "N" + site
+            else:
+                re = "N_" + abs(f3 + length) * "N" + site + (f5 - length) * "N" + "^N"
+        return re
+
+
+class NotDefined(AbstractCut):
+    """Implement methods for enzymes with non-characterized overhangs.
+
+    Correspond to NoCut and Unknown.
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    @classmethod
+    def _drop(cls):
+        """Remove cuts that are outsite of the sequence (PRIVATE).
+
+        For internal use only.
+
+        Drop the site that are situated outside the sequence in linear
+        sequence. Modify the index for site in circular sequences.
+        """
+        if cls.dna.is_linear():
+            return
+        else:
+            length = len(cls.dna)
+            for index, location in enumerate(cls.results):
+                if location < 1:
+                    cls.results[index] += length
+                else:
+                    break
+            for index, location in enumerate(cls.results[:-1]):
+                if location > length:
+                    cls.results[-(index + 1)] -= length
+                else:
+                    break
+
+    @classmethod
+    def is_defined(cls):
+        """Return if recognition sequence and cut are defined.
+
+        True if the sequence recognised and cut is constant,
+        i.e. the recognition site is not degenerated AND the enzyme cut inside
+        the site.
+
+        Related methods:
+
+        - RE.is_ambiguous()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def is_ambiguous(cls):
+        """Return if recognition sequence and cut may be ambiguous.
+
+        True if the sequence recognised and cut is ambiguous,
+        i.e. the recognition site is degenerated AND/OR the enzyme cut outside
+        the site.
+
+        Related methods:
+
+        - RE.is_defined()
+        - RE.is_unknown()
+
+        """
+        return False
+
+    @classmethod
+    def is_unknown(cls):
+        """Return if recognition sequence is unknown.
+
+        True if the sequence is unknown,
+        i.e. the recognition site has not been characterised yet.
+
+        Related methods:
+
+        - RE.is_defined()
+        - RE.is_ambiguous()
+
+        """
+        return True
+
+    @classmethod
+    def _mod2(cls, other):
+        """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
+
+        For internal use only.
+
+        Test for the compatibility of restriction ending of RE and other.
+        """
+        #
+        #   Normally we should not arrive here. But well better safe than
+        #   sorry.
+        #   the overhang is not defined we are compatible with nobody.
+        #   could raise an Error may be rather than return quietly.
+        #
+        # return False
+        raise ValueError(
+            "%s.mod2(%s), %s : NotDefined. pas glop pas glop!"
+            % (str(cls), str(other), str(cls))
+        )
+
+    @classmethod
+    def elucidate(cls):
+        """Return a string representing the recognition site and cuttings.
+
+        Return a representation of the site with the cut on the (+) strand
+        represented as '^' and the cut on the (-) strand as '_'.
+        ie:
+
+        >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI
+        >>> EcoRI.elucidate()   # 5' overhang
+        'G^AATT_C'
+        >>> KpnI.elucidate()    # 3' overhang
+        'G_GTAC^C'
+        >>> EcoRV.elucidate()   # blunt
+        'GAT^_ATC'
+        >>> SnaI.elucidate()     # NotDefined, cut profile unknown.
+        '? GTATAC ?'
+        >>>
+
+        """
+        return "? %s ?" % cls.site
+
+
+class Commercially_available(AbstractCut):
+    """Implement methods for enzymes which are commercially available.
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    #
+    #   Recent addition to Rebase make this naming convention uncertain.
+    #   May be better to says enzymes which have a supplier.
+    #
+
+    @classmethod
+    def suppliers(cls):
+        """Print a list of suppliers of the enzyme."""
+        for s in cls.suppl:
+            print(suppliers_dict[s][0] + ",")
+
+    @classmethod
+    def supplier_list(cls):
+        """Return a list of suppliers of the enzyme."""
+        return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl]
+
+    @classmethod
+    def buffers(cls, supplier):
+        """Return the recommended buffer of the supplier for this enzyme.
+
+        Not implemented yet.
+        """
+
+    @classmethod
+    def is_comm(cls):
+        """Return if enzyme is commercially available.
+
+        True if RE has suppliers.
+        """
+        return True
+
+
+class Not_available(AbstractCut):
+    """Implement methods for enzymes which are not commercially available.
+
+    Internal use only. Not meant to be instantiated.
+    """
+
+    @staticmethod
+    def suppliers():
+        """Print a list of suppliers of the enzyme."""
+        return None
+
+    @classmethod
+    def supplier_list(cls):
+        """Return a list of suppliers of the enzyme."""
+        return []
+
+    @classmethod
+    def buffers(cls, supplier):
+        """Return the recommended buffer of the supplier for this enzyme.
+
+        Not implemented yet.
+        """
+        raise TypeError("Enzyme not commercially available.")
+
+    @classmethod
+    def is_comm(cls):
+        """Return if enzyme is commercially available.
+
+        True if RE has suppliers.
+        """
+        return False
+
+
+###############################################################################
+#                                                                             #
+#                       Restriction Batch                                     #
+#                                                                             #
+###############################################################################
+
+
+class RestrictionBatch(set):
+    """Class for operations on more than one enzyme."""
+
+    def __init__(self, first=(), suppliers=()):
+        """Initialize empty RB or pre-fill with enzymes (from supplier)."""
+        first = [self.format(x) for x in first]
+        first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]]
+        set.__init__(self, first)
+        self.mapping = dict.fromkeys(self)
+        self.already_mapped = None
+        self.suppliers = [x for x in suppliers if x in suppliers_dict]
+
+    def __str__(self):
+        """Return a readable representation of the ``RestrictionBatch``."""
+        if len(self) < 5:
+            return "+".join(self.elements())
+        else:
+            return "...".join(
+                ("+".join(self.elements()[:2]), "+".join(self.elements()[-2:]))
+            )
+
+    def __repr__(self):
+        """Represent ``RestrictionBatch`` class as a string for debugging."""
+        return "RestrictionBatch(%s)" % self.elements()
+
+    def __contains__(self, other):
+        """Implement ``in`` for ``RestrictionBatch``."""
+        try:
+            other = self.format(other)
+        except ValueError:  # other is not a restriction enzyme
+            return False
+        return set.__contains__(self, other)
+
+    def __div__(self, other):
+        """Override '/' operator to use as search method."""
+        return self.search(other)
+
+    def __rdiv__(self, other):
+        """Override division with reversed operands to use as search method."""
+        return self.search(other)
+
+    def __truediv__(self, other):
+        """Override Python 3 division operator to use as search method.
+
+        Like __div__.
+        """
+        return self.search(other)
+
+    def __rtruediv__(self, other):
+        """As __truediv___, with reversed operands.
+
+        Like __rdiv__.
+        """
+        return self.search(other)
+
+    def get(self, enzyme, add=False):
+        """Check if enzyme is in batch and return it.
+
+        If add is True and enzyme is not in batch add enzyme to batch.
+        If add is False (which is the default) only return enzyme.
+        If enzyme is not a RestrictionType or can not be evaluated to
+        a RestrictionType, raise a ValueError.
+        """
+        e = self.format(enzyme)
+        if e in self:
+            return e
+        elif add:
+            self.add(e)
+            return e
+        else:
+            raise ValueError("enzyme %s is not in RestrictionBatch" % e.__name__)
+
+    def lambdasplit(self, func):
+        """Filter enzymes in batch with supplied function.
+
+        The new batch will contain only the enzymes for which
+        func return True.
+        """
+        d = list(filter(func, self))
+        new = RestrictionBatch()
+        new._data = dict(zip(d, [True] * len(d)))
+        return new
+
+    def add_supplier(self, letter):
+        """Add all enzymes from a given supplier to batch.
+
+        letter represents the suppliers as defined in the dictionary
+        RestrictionDictionary.suppliers
+        Returns None.
+        Raise a KeyError if letter is not a supplier code.
+        """
+        supplier = suppliers_dict[letter]
+        self.suppliers.append(letter)
+        for x in supplier[1]:
+            self.add_nocheck(eval(x))
+
+    def current_suppliers(self):
+        """List the current suppliers for the restriction batch.
+
+        Return a sorted list of the suppliers which have been used to
+        create the batch.
+        """
+        suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers)
+        return suppl_list
+
+    def __iadd__(self, other):
+        """Override '+=' for use with sets.
+
+        b += other -> add other to b, check the type of other.
+        """
+        self.add(other)
+        return self
+
+    def __add__(self, other):
+        """Overide '+' for use with sets.
+
+        b + other -> new RestrictionBatch.
+        """
+        new = self.__class__(self)
+        new.add(other)
+        return new
+
+    def remove(self, other):
+        """Remove enzyme from restriction batch.
+
+        Safe set.remove method. Verify that other is a RestrictionType or can
+        be evaluated to a RestrictionType.
+        Raise a ValueError if other can not be evaluated to a RestrictionType.
+        Raise a KeyError if other is not in B.
+        """
+        return set.remove(self, self.format(other))
+
+    def add(self, other):
+        """Add a restriction enzyme to the restriction batch.
+
+        Safe set.add method. Verify that other is a RestrictionType or can be
+        evaluated to a RestrictionType.
+        Raise a ValueError if other can not be evaluated to a RestrictionType.
+        """
+        return set.add(self, self.format(other))
+
+    def add_nocheck(self, other):
+        """Add restriction enzyme to batch without checking its type."""
+        return set.add(self, other)
+
+    def format(self, y):
+        """Evaluate enzyme (name) and return it (as RestrictionType).
+
+        If y is a RestrictionType return y.
+        If y can be evaluated to a RestrictionType return eval(y).
+        Raise a ValueError in all other case.
+        """
+        try:
+            if isinstance(y, RestrictionType):
+                return y
+            elif isinstance(eval(str(y)), RestrictionType):
+                return eval(y)
+        except (NameError, SyntaxError):
+            pass
+        raise ValueError("%s is not a RestrictionType" % y.__class__)
+
+    def is_restriction(self, y):
+        """Return if enzyme (name) is a known enzyme.
+
+        True if y or eval(y) is a RestrictionType.
+        """
+        return isinstance(y, RestrictionType) or isinstance(
+            eval(str(y)), RestrictionType
+        )
+
+    def split(self, *classes, **bool):
+        """Extract enzymes of a certain class and put in new RestrictionBatch.
+
+        It works but it is slow, so it has really an interest when splitting
+        over multiple conditions.
+        """
+
+        def splittest(element):
+            for klass in classes:
+                b = bool.get(klass.__name__, True)
+                if issubclass(element, klass):
+                    if b:
+                        continue
+                    else:
+                        return False
+                elif b:
+                    return False
+                else:
+                    continue
+            return True
+
+        d = list(filter(splittest, self))
+        new = RestrictionBatch()
+        new._data = dict(zip(d, [True] * len(d)))
+        return new
+
+    def elements(self):
+        """List the enzymes of the RestrictionBatch as list of strings.
+
+        Give all the names of the enzymes in B sorted alphabetically.
+        """
+        return sorted(str(e) for e in self)
+
+    def as_string(self):
+        """List the names of the enzymes of the RestrictionBatch.
+
+        Return a list of the name of the elements of the batch.
+        """
+        return [str(e) for e in self]
+
+    @classmethod
+    def suppl_codes(cls):
+        """Return a dicionary with supplier codes.
+
+        Letter code for the suppliers.
+        """
+        supply = {k: v[0] for k, v in suppliers_dict.items()}
+        return supply
+
+    @classmethod
+    def show_codes(cls):
+        """Print a list of supplier codes."""
+        supply = [" = ".join(i) for i in cls.suppl_codes().items()]
+        print("\n".join(supply))
+
+    def search(self, dna, linear=True):
+        """Return a dic of cutting sites in the seq for the batch enzymes."""
+        #
+        #   here we replace the search method of the individual enzymes
+        #   with one unique testing method.
+        #
+        if not hasattr(self, "already_mapped"):
+            # TODO - Why does this happen!
+            # Try the "doctest" at the start of PrintFormat.py
+            self.already_mapped = None
+        if isinstance(dna, DNA):
+            # For the searching, we just care about the sequence as a string,
+            # if that is the same we can use the cached search results.
+            # At the time of writing, Seq == method isn't implemented,
+            # and therefore does object identity which is stricter.
+            if (str(dna), linear) == self.already_mapped:
+                return self.mapping
+            else:
+                self.already_mapped = str(dna), linear
+                fseq = FormattedSeq(dna, linear)
+                self.mapping = {x: x.search(fseq) for x in self}
+                return self.mapping
+        elif isinstance(dna, FormattedSeq):
+            if (str(dna), dna.linear) == self.already_mapped:
+                return self.mapping
+            else:
+                self.already_mapped = str(dna), dna.linear
+                self.mapping = {x: x.search(dna) for x in self}
+                return self.mapping
+        raise TypeError(
+            "Expected Seq or MutableSeq instance, got %s instead" % type(dna)
+        )
+
+
+###############################################################################
+#                                                                             #
+#                       Restriction Analysis                                  #
+#                                                                             #
+###############################################################################
+
+_empty_DNA = DNA("")
+_restrictionbatch = RestrictionBatch()
+
+
+class Analysis(RestrictionBatch, PrintFormat):
+    """Provide methods for enhanced analysis and pretty printing."""
+
+    def __init__(
+        self, restrictionbatch=_restrictionbatch, sequence=_empty_DNA, linear=True
+    ):
+        """Initialize an Analysis with RestrictionBatch and sequence.
+
+        For most of the methods of this class if a dictionary is given it will
+        be used as the base to calculate the results.
+        If no dictionary is given a new analysis using the RestrictionBatch
+        which has been given when the Analysis class has been instantiated,
+        will be carried out and used.
+        """
+        RestrictionBatch.__init__(self, restrictionbatch)
+        self.rb = restrictionbatch
+        self.sequence = sequence
+        self.linear = linear
+        if self.sequence:
+            self.search(self.sequence, self.linear)
+
+    def __repr__(self):
+        """Represent ``Analysis`` class as a string."""
+        return "Analysis(%r,%r,%s)" % (self.rb, self.sequence, self.linear)
+
+    def _sub_set(self, wanted):
+        """Filter result for keys which are in wanted (PRIVATE).
+
+        Internal use only. Returns a dict.
+
+        Screen the results through wanted set.
+        Keep only the results for which the enzymes is in wanted set.
+        """
+        # It seems that this method is not used in the whole class!
+        return {k: v for k, v in self.mapping.items() if k in wanted}
+
+    def _boundaries(self, start, end):
+        """Set boundaries to correct values (PRIVATE).
+
+        Format the boundaries for use with the methods that limit the
+        search to only part of the sequence given to analyse.
+        """
+        if not isinstance(start, int):
+            raise TypeError("expected int, got %s instead" % type(start))
+        if not isinstance(end, int):
+            raise TypeError("expected int, got %s instead" % type(end))
+        if start < 1:  # Looks like this tries to do python list like indexing
+            start += len(self.sequence)
+        if end < 1:
+            end += len(self.sequence)
+        if start < end:
+            pass
+        else:
+            start, end = end, start
+        if start < end:
+            return start, end, self._test_normal
+
+    def _test_normal(self, start, end, site):
+        """Test if site is between start and end (PRIVATE).
+
+        Internal use only
+        """
+        return start <= site < end
+
+    def _test_reverse(self, start, end, site):
+        """Test if site is between end and start, for circular sequences (PRIVATE).
+
+        Internal use only.
+        """
+        return start <= site <= len(self.sequence) or 1 <= site < end
+
+    def format_output(self, dct=None, title="", s1=""):
+        """Collect data and pass to PrintFormat.
+
+        If dct is not given the full dictionary is used.
+        """
+        if not dct:
+            dct = self.mapping
+        return PrintFormat.format_output(self, dct, title, s1)
+
+    def print_that(self, dct=None, title="", s1=""):
+        """Print the output of the analysis.
+
+        If dct is not given the full dictionary is used.
+        s1: Title for non-cutting enzymes
+        This method prints the output of A.format_output() and it is here
+        for backwards compatibility.
+        """
+        print(self.format_output(dct, title, s1))
+
+    def change(self, **what):
+        """Change parameters of print output.
+
+        It is possible to change the width of the shell by setting
+        self.ConsoleWidth to what you want.
+        self.NameWidth refer to the maximal length of the enzyme name.
+
+        Changing one of these parameters here might not give the results
+        you expect. In which case, you can settle back to a 80 columns shell
+        or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
+        you get it right.
+        """
+        for k, v in what.items():
+            if k in ("NameWidth", "ConsoleWidth"):
+                setattr(self, k, v)
+                self.Cmodulo = self.ConsoleWidth % self.NameWidth
+                self.PrefWidth = self.ConsoleWidth - self.Cmodulo
+            elif k == "sequence":
+                setattr(self, "sequence", v)
+                self.search(self.sequence, self.linear)
+            elif k == "rb":
+                self = Analysis.__init__(self, v, self.sequence, self.linear)
+            elif k == "linear":
+                setattr(self, "linear", v)
+                self.search(self.sequence, v)
+            elif k in ("Indent", "Maxsize"):
+                setattr(self, k, v)
+            elif k in ("Cmodulo", "PrefWidth"):
+                raise AttributeError(
+                    "To change %s, change NameWidth and/or ConsoleWidth" % k
+                )
+            else:
+                raise AttributeError("Analysis has no attribute %s" % k)
+
+    def full(self, linear=True):
+        """Perform analysis with all enzymes of batch and return all results.
+
+        Full Restriction Map of the sequence, as a dictionary.
+        """
+        return self.mapping
+
+    def blunt(self, dct=None):
+        """Return only cuts that have blunt ends."""
+        if not dct:
+            dct = self.mapping
+        return {k: v for k, v in dct.items() if k.is_blunt()}
+
+    def overhang5(self, dct=None):
+        """Return only cuts that have 5' overhangs."""
+        if not dct:
+            dct = self.mapping
+        return {k: v for k, v in dct.items() if k.is_5overhang()}
+
+    def overhang3(self, dct=None):
+        """Return only cuts that have 3' overhangs."""
+        if not dct:
+            dct = self.mapping
+        return {k: v for k, v in dct.items() if k.is_3overhang()}
+
+    def defined(self, dct=None):
+        """Return only results from enzymes that produce defined overhangs."""
+        if not dct:
+            dct = self.mapping
+        return {k: v for k, v in dct.items() if k.is_defined()}
+
+    def with_sites(self, dct=None):
+        """Return only results from enzyme with at least one cut."""
+        if not dct:
+            dct = self.mapping
+        return {k: v for k, v in dct.items() if v}
+
+    def without_site(self, dct=None):
+        """Return only results from enzymes that don't cut the sequence."""
+        if not dct:
+            dct = self.mapping
+        return {k: v for k, v in dct.items() if not v}
+
+    def with_N_sites(self, N, dct=None):
+        """Return only results from enzymes that cut the sequence N times."""
+        if not dct:
+            dct = self.mapping
+        return {k: v for k, v in dct.items() if len(v) == N}
+
+    def with_number_list(self, list, dct=None):
+        """Return only results from enzymes that cut (x,y,z,...) times."""
+        if not dct:
+            dct = self.mapping
+        return {k: v for k, v in dct.items() if len(v) in list}
+
+    def with_name(self, names, dct=None):
+        """Return only results from enzymes which names are listed."""
+        for i, enzyme in enumerate(names):
+            if enzyme not in AllEnzymes:
+                warnings.warn("no data for the enzyme: %s" % enzyme, BiopythonWarning)
+                del names[i]
+        if not dct:
+            return RestrictionBatch(names).search(self.sequence, self.linear)
+        return {n: dct[n] for n in names if n in dct}
+
+    def with_site_size(self, site_size, dct=None):
+        """Return only results form enzymes with a given site size."""
+        sites = [name for name in self if name.size == site_size]
+        if not dct:
+            return RestrictionBatch(sites).search(self.sequence)
+        return {k: v for k, v in dct.items() if k in site_size}
+
+    def only_between(self, start, end, dct=None):
+        """Return only results from enzymes that only cut within start, end."""
+        start, end, test = self._boundaries(start, end)
+        if not dct:
+            dct = self.mapping
+        d = dict(dct)
+        for key, sites in dct.items():
+            if not sites:
+                del d[key]
+                continue
+            for site in sites:
+                if test(start, end, site):
+                    continue
+                else:
+                    del d[key]
+                    break
+        return d
+
+    def between(self, start, end, dct=None):
+        """Return only results from enzymes that cut at least within borders.
+
+        Enzymes that cut the sequence at least in between start and end.
+        They may cut outside as well.
+        """
+        start, end, test = self._boundaries(start, end)
+        d = {}
+        if not dct:
+            dct = self.mapping
+        for key, sites in dct.items():
+            for site in sites:
+                if test(start, end, site):
+                    d[key] = sites
+                    break
+                continue
+        return d
+
+    def show_only_between(self, start, end, dct=None):
+        """Return only results from within start, end.
+
+        Enzymes must cut inside start/end and may also cut outside. However,
+        only the cutting positions within start/end will be returned.
+        """
+        d = []
+        if start <= end:
+            d = [
+                (k, [vv for vv in v if start <= vv <= end])
+                for k, v in self.between(start, end, dct).items()
+            ]
+        else:
+            d = [
+                (k, [vv for vv in v if start <= vv or vv <= end])
+                for k, v in self.between(start, end, dct).items()
+            ]
+        return dict(d)
+
+    def only_outside(self, start, end, dct=None):
+        """Return only results from enzymes that only cut outside start, end.
+
+        Enzymes that cut the sequence outside of the region
+        in between start and end but do not cut inside.
+        """
+        start, end, test = self._boundaries(start, end)
+        if not dct:
+            dct = self.mapping
+        d = dict(dct)
+        for key, sites in dct.items():
+            if not sites:
+                del d[key]
+                continue
+            for site in sites:
+                if test(start, end, site):
+                    del d[key]
+                    break
+                else:
+                    continue
+        return d
+
+    def outside(self, start, end, dct=None):
+        """Return only results from enzymes that at least cut outside borders.
+
+        Enzymes that cut outside the region in between start and end.
+        They may cut inside as well.
+        """
+        start, end, test = self._boundaries(start, end)
+        if not dct:
+            dct = self.mapping
+        d = {}
+        for key, sites in dct.items():
+            for site in sites:
+                if test(start, end, site):
+                    continue
+                else:
+                    d[key] = sites
+                    break
+        return d
+
+    def do_not_cut(self, start, end, dct=None):
+        """Return only results from enzymes that don't cut between borders."""
+        if not dct:
+            dct = self.mapping
+        d = self.without_site()
+        d.update(self.only_outside(start, end, dct))
+        return d
+
+
+#
+#   The restriction enzyme classes are created dynamically when the module is
+#   imported. Here is the magic which allow the creation of the
+#   restriction-enzyme classes.
+#
+#   The reason for the two dictionaries in Restriction_Dictionary
+#   one for the types (which will be called pseudo-type as they really
+#   correspond to the values that instances of RestrictionType can take)
+#   and one for the enzymes is efficiency as the bases are evaluated
+#   once per pseudo-type.
+#
+#   However Restriction is still a very inefficient module at import. But
+#   remember that around 660 classes (which is more or less the size of Rebase)
+#   have to be created dynamically. However, this processing take place only
+#   once.
+#   This inefficiency is however largely compensated by the use of metaclass
+#   which provide a very efficient layout for the class themselves mostly
+#   alleviating the need of if/else loops in the class methods.
+#
+#   It is essential to run Restriction with doc string optimisation (-OO
+#   switch) as the doc string of 660 classes take a lot of processing.
+#
+CommOnly = RestrictionBatch()  # commercial enzymes
+NonComm = RestrictionBatch()  # not available commercially
+for TYPE, (bases, enzymes) in typedict.items():
+    #
+    #   The keys are the pseudo-types TYPE (stored as type1, type2...)
+    #   The names are not important and are only present to differentiate
+    #   the keys in the dict. All the pseudo-types are in fact RestrictionType.
+    #   These names will not be used after and the pseudo-types are not
+    #   kept in the locals() dictionary. It is therefore impossible to
+    #   import them.
+    #   Now, if you have look at the dictionary, you will see that not all the
+    #   types are present as those without corresponding enzymes have been
+    #   removed by Dictionary_Builder().
+    #
+    #   The values are tuples which contain
+    #   as first element a tuple of bases (as string) and
+    #   as second element the names of the enzymes.
+    #
+    #   First eval the bases.
+    #
+    bases = tuple(eval(x) for x in bases)
+    #
+    #   now create the particular value of RestrictionType for the classes
+    #   in enzymes.
+    #
+    T = type.__new__(RestrictionType, "RestrictionType", bases, {})
+    for k in enzymes:
+        #
+        #   Now, we go through all the enzymes and assign them their type.
+        #   enzymedict[k] contains the values of the attributes for this
+        #   particular class (self.site, self.ovhg,....).
+        #
+        newenz = T(k, bases, enzymedict[k])
+        #
+        #   we add the enzymes to the corresponding batch.
+        #
+        #   No need to verify the enzyme is a RestrictionType -> add_nocheck
+        #
+        if newenz.is_comm():
+            CommOnly.add_nocheck(newenz)
+        else:
+            NonComm.add_nocheck(newenz)
+#
+#   AllEnzymes is a RestrictionBatch with all the enzymes from Rebase.
+#
+AllEnzymes = RestrictionBatch(CommOnly)
+AllEnzymes.update(NonComm)
+#
+#   Now, place the enzymes in locals so they can be imported.
+#
+names = [str(x) for x in AllEnzymes]
+locals().update(dict(zip(names, AllEnzymes)))
+__all__ = (
+    "FormattedSeq",
+    "Analysis",
+    "RestrictionBatch",
+    "AllEnzymes",
+    "CommOnly",
+    "NonComm",
+) + tuple(names)
+del k, enzymes, TYPE, bases, names
diff --git a/code/lib/Bio/Restriction/Restriction_Dictionary.py b/code/lib/Bio/Restriction/Restriction_Dictionary.py
new file mode 100644
index 0000000..3a3e58e
--- /dev/null
+++ b/code/lib/Bio/Restriction/Restriction_Dictionary.py
@@ -0,0 +1,19915 @@
+#!/usr/bin/env python
+#      Copyright (C) 2004. Frederic Sohm.
+#
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+#
+# THIS FILE IS AUTOMATICALLY GENERATED - DO NOT EDIT IT BY HAND!
+#
+# Instead, use the tool Scripts/Restriction/ranacompiler.py which in turn
+# uses Scripts/Restriction/rebase_update.py
+
+"""Restriction Analysis Libraries.
+
+Used REBASE emboss files version 001 (2020).
+"""
+
+rest_dict = {}
+
+rest_dict["AanI"] = {
+    "charac": (3, -3, None, None, "TTATAA"),
+    "compsite": "(?=(?P<AanI>TTATAA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTATAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["AarI"] = {
+    "charac": (11, 8, None, None, "CACCTGC"),
+    "compsite": "(?=(?P<AarI>CACCTGC))|(?=(?P<AarI_as>GCAGGTG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": 8,
+    "fst5": 11,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACCTGC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["AasI"] = {
+    "charac": (7, -7, None, None, "GACNNNNNNGTC"),
+    "compsite": "(?=(?P<AasI>GAC......GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNNNNGTC",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["AatII"] = {
+    "charac": (5, -5, None, None, "GACGTC"),
+    "compsite": "(?=(?P<AatII>GACGTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "ACGT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACGTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "K", "M", "N", "V"),
+}
+
+rest_dict["Aba6411II"] = {
+    "charac": (None, None, None, None, "CRRTAAG"),
+    "compsite": "(?=(?P<Aba6411II>C[AG][AG]TAAG))|(?=(?P<Aba6411II_as>CTTA[CT][CT]G))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CRRTAAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AbaB8342IV"] = {
+    "charac": (None, None, None, None, "CATTAG"),
+    "compsite": "(?=(?P<AbaB8342IV>CATTAG))|(?=(?P<AbaB8342IV_as>CTAATG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATTAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AbaCIII"] = {
+    "charac": (None, None, None, None, "CTATCAV"),
+    "compsite": "(?=(?P<AbaCIII>CTATCA[ACG]))|(?=(?P<AbaCIII_as>[CGT]TGATAG))",
+    "dna": None,
+    "freq": 5461.333333333333,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTATCAV",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AbaSI"] = {
+    "charac": (12, 9, None, None, "C"),
+    "compsite": "(?=(?P<AbaSI>C))|(?=(?P<AbaSI_as>G))",
+    "dna": None,
+    "freq": 4.0,
+    "fst3": 9,
+    "fst5": 12,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "C",
+    "size": 1,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["AbaUMB2I"] = {
+    "charac": (None, None, None, None, "YCCGSS"),
+    "compsite": "(?=(?P<AbaUMB2I>[CT]CCG[CG][CG]))|(?=(?P<AbaUMB2I_as>[CG][CG]CGG[AG]))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "YCCGSS",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Abr4036II"] = {
+    "charac": (None, None, None, None, "GRTYGACC"),
+    "compsite": "(?=(?P<Abr4036II>G[AG]T[CT]GACC))|(?=(?P<Abr4036II_as>GGTC[AG]A[CT]C))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRTYGACC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AbsI"] = {
+    "charac": (2, -2, None, None, "CCTCGAGG"),
+    "compsite": "(?=(?P<AbsI>CCTCGAGG))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TCGA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTCGAGG",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["Acc16I"] = {
+    "charac": (3, -3, None, None, "TGCGCA"),
+    "compsite": "(?=(?P<Acc16I>TGCGCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGCGCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Acc36I"] = {
+    "charac": (10, 8, None, None, "ACCTGC"),
+    "compsite": "(?=(?P<Acc36I>ACCTGC))|(?=(?P<Acc36I_as>GCAGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 8,
+    "fst5": 10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCTGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["Acc65I"] = {
+    "charac": (1, -1, None, None, "GGTACC"),
+    "compsite": "(?=(?P<Acc65I>GGTACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GTAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTACC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "N", "V"),
+}
+
+rest_dict["Acc65V"] = {
+    "charac": (None, None, None, None, "GACGCA"),
+    "compsite": "(?=(?P<Acc65V>GACGCA))|(?=(?P<Acc65V_as>TGCGTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACGCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AccB1I"] = {
+    "charac": (1, -1, None, None, "GGYRCC"),
+    "compsite": "(?=(?P<AccB1I>GG[CT][AG]CC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GYRC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGYRCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["AccB7I"] = {
+    "charac": (7, -7, None, None, "CCANNNNNTGG"),
+    "compsite": "(?=(?P<AccB7I>CCA.....TGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCANNNNNTGG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["AccBSI"] = {
+    "charac": (3, -3, None, None, "CCGCTC"),
+    "compsite": "(?=(?P<AccBSI>CCGCTC))|(?=(?P<AccBSI_as>GAGCGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["AccI"] = {
+    "charac": (2, -2, None, None, "GTMKAC"),
+    "compsite": "(?=(?P<AccI>GT[AC][GT]AC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "MK",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTMKAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "J", "K", "M", "N", "Q", "R", "X"),
+}
+
+rest_dict["AccII"] = {
+    "charac": (2, -2, None, None, "CGCG"),
+    "compsite": "(?=(?P<AccII>CGCG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("J", "K"),
+}
+
+rest_dict["AccIII"] = {
+    "charac": (1, -1, None, None, "TCCGGA"),
+    "compsite": "(?=(?P<AccIII>TCCGGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCCGGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J", "K", "R"),
+}
+
+rest_dict["AceIII"] = {
+    "charac": (13, 11, None, None, "CAGCTC"),
+    "compsite": "(?=(?P<AceIII>CAGCTC))|(?=(?P<AceIII_as>GAGCTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 11,
+    "fst5": 13,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAGCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AchA6III"] = {
+    "charac": (None, None, None, None, "AGCCAG"),
+    "compsite": "(?=(?P<AchA6III>AGCCAG))|(?=(?P<AchA6III_as>CTGGCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGCCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AciI"] = {
+    "charac": (1, -1, None, None, "CCGC"),
+    "compsite": "(?=(?P<AciI>CCGC))|(?=(?P<AciI_as>GCGG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["AclI"] = {
+    "charac": (2, -2, None, None, "AACGTT"),
+    "compsite": "(?=(?P<AclI>AACGTT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AACGTT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "N", "V"),
+}
+
+rest_dict["AclWI"] = {
+    "charac": (9, 5, None, None, "GGATC"),
+    "compsite": "(?=(?P<AclWI>GGATC))|(?=(?P<AclWI_as>GATCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 9,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGATC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["Aco12261II"] = {
+    "charac": (None, None, None, None, "CCRGAG"),
+    "compsite": "(?=(?P<Aco12261II>CC[AG]GAG))|(?=(?P<Aco12261II_as>CTC[CT]GG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCRGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AcoI"] = {
+    "charac": (1, -1, None, None, "YGGCCR"),
+    "compsite": "(?=(?P<AcoI>[CT]GGCC[AG]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "YGGCCR",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["AcoY31II"] = {
+    "charac": (None, None, None, None, "TAGCRAB"),
+    "compsite": "(?=(?P<AcoY31II>TAGC[AG]A[CGT]))|(?=(?P<AcoY31II_as>[ACG]T[CT]GCTA))",
+    "dna": None,
+    "freq": 2730.6666666666665,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TAGCRAB",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AcsI"] = {
+    "charac": (1, -1, None, None, "RAATTY"),
+    "compsite": "(?=(?P<AcsI>[AG]AATT[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "AATT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RAATTY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["AcuI"] = {
+    "charac": (22, 14, None, None, "CTGAAG"),
+    "compsite": "(?=(?P<AcuI>CTGAAG))|(?=(?P<AcuI_as>CTTCAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 14,
+    "fst5": 22,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTGAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "N"),
+}
+
+rest_dict["AcvI"] = {
+    "charac": (3, -3, None, None, "CACGTG"),
+    "compsite": "(?=(?P<AcvI>CACGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGTG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("Q", "X"),
+}
+
+rest_dict["AcyI"] = {
+    "charac": (2, -2, None, None, "GRCGYC"),
+    "compsite": "(?=(?P<AcyI>G[AG]CG[CT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRCGYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J",),
+}
+
+rest_dict["AdeI"] = {
+    "charac": (6, -6, None, None, "CACNNNGTG"),
+    "compsite": "(?=(?P<AdeI>CAC...GTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACNNNGTG",
+    "size": 9,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Adh6U21I"] = {
+    "charac": (None, None, None, None, "GAANCAG"),
+    "compsite": "(?=(?P<Adh6U21I>GAA.CAG))|(?=(?P<Adh6U21I_as>CTG.TTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAANCAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AfaI"] = {
+    "charac": (2, -2, None, None, "GTAC"),
+    "compsite": "(?=(?P<AfaI>GTAC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTAC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["AfeI"] = {
+    "charac": (3, -3, None, None, "AGCGCT"),
+    "compsite": "(?=(?P<AfeI>AGCGCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGCGCT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "N"),
+}
+
+rest_dict["AfiI"] = {
+    "charac": (7, -7, None, None, "CCNNNNNNNGG"),
+    "compsite": "(?=(?P<AfiI>CC.......GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNNNNNNNGG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["AflII"] = {
+    "charac": (1, -1, None, None, "CTTAAG"),
+    "compsite": "(?=(?P<AflII>CTTAAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TTAA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTTAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J", "K", "N"),
+}
+
+rest_dict["AflIII"] = {
+    "charac": (1, -1, None, None, "ACRYGT"),
+    "compsite": "(?=(?P<AflIII>AC[AG][CT]GT))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CRYG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACRYGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("M", "N", "S"),
+}
+
+rest_dict["AgeI"] = {
+    "charac": (1, -1, None, None, "ACCGGT"),
+    "compsite": "(?=(?P<AgeI>ACCGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCGGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J", "N", "R"),
+}
+
+rest_dict["AgsI"] = {
+    "charac": (3, -3, None, None, "TTSAA"),
+    "compsite": "(?=(?P<AgsI>TT[CG]AA))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "S",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTSAA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["AhaIII"] = {
+    "charac": (3, -3, None, None, "TTTAAA"),
+    "compsite": "(?=(?P<AhaIII>TTTAAA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTTAAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AhdI"] = {
+    "charac": (6, -6, None, None, "GACNNNNNGTC"),
+    "compsite": "(?=(?P<AhdI>GAC.....GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNNNGTC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["AhlI"] = {
+    "charac": (1, -1, None, None, "ACTAGT"),
+    "compsite": "(?=(?P<AhlI>ACTAGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CTAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACTAGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["AhyRBAHI"] = {
+    "charac": (None, None, None, None, "GCYYGAC"),
+    "compsite": "(?=(?P<AhyRBAHI>GC[CT][CT]GAC))|(?=(?P<AhyRBAHI_as>GTC[AG][AG]GC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCYYGAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AhyYL17I"] = {
+    "charac": (None, None, None, None, "YAAMGAG"),
+    "compsite": "(?=(?P<AhyYL17I>[CT]AA[AC]GAG))|(?=(?P<AhyYL17I_as>CTC[GT]TT[AG]))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "YAAMGAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AjiI"] = {
+    "charac": (3, -3, None, None, "CACGTC"),
+    "compsite": "(?=(?P<AjiI>CACGTC))|(?=(?P<AjiI_as>GACGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["AjnI"] = {
+    "charac": (0, 0, None, None, "CCWGG"),
+    "compsite": "(?=(?P<AjnI>CC[AT]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "CCWGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["AjuI"] = {
+    "charac": (-7, -26, 25, 6, "GAANNNNNNNTTGG"),
+    "compsite": "(?=(?P<AjuI>GAA.......TTGG))|(?=(?P<AjuI_as>CCAA.......TTC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": -26,
+    "fst5": -7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 6,
+    "scd5": 25,
+    "site": "GAANNNNNNNTTGG",
+    "size": 14,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["AleI"] = {
+    "charac": (5, -5, None, None, "CACNNNNGTG"),
+    "compsite": "(?=(?P<AleI>CAC....GTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACNNNNGTG",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["AlfI"] = {
+    "charac": (-10, -24, 24, 10, "GCANNNNNNTGC"),
+    "compsite": "(?=(?P<AlfI>GCA......TGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -24,
+    "fst5": -10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": 10,
+    "scd5": 24,
+    "site": "GCANNNNNNTGC",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AloI"] = {
+    "charac": (-7, -25, 25, 7, "GAACNNNNNNTCC"),
+    "compsite": "(?=(?P<AloI>GAAC......TCC))|(?=(?P<AloI_as>GGA......GTTC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": -25,
+    "fst5": -7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 7,
+    "scd5": 25,
+    "site": "GAACNNNNNNTCC",
+    "size": 13,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["AluBI"] = {
+    "charac": (2, -2, None, None, "AGCT"),
+    "compsite": "(?=(?P<AluBI>AGCT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGCT",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["AluI"] = {
+    "charac": (2, -2, None, None, "AGCT"),
+    "compsite": "(?=(?P<AluI>AGCT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGCT",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X", "Y"),
+}
+
+rest_dict["Alw21I"] = {
+    "charac": (5, -5, None, None, "GWGCWC"),
+    "compsite": "(?=(?P<Alw21I>G[AT]GC[AT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "WGCW",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GWGCWC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Alw26I"] = {
+    "charac": (6, 5, None, None, "GTCTC"),
+    "compsite": "(?=(?P<Alw26I>GTCTC))|(?=(?P<Alw26I_as>GAGAC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTCTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Alw44I"] = {
+    "charac": (1, -1, None, None, "GTGCAC"),
+    "compsite": "(?=(?P<Alw44I>GTGCAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTGCAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "J"),
+}
+
+rest_dict["AlwFI"] = {
+    "charac": (None, None, None, None, "GAAAYNNNNNRTG"),
+    "compsite": "(?=(?P<AlwFI>GAAA[CT].....[AG]TG))|(?=(?P<AlwFI_as>CA[CT].....[AG]TTTC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAAYNNNNNRTG",
+    "size": 13,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AlwI"] = {
+    "charac": (9, 5, None, None, "GGATC"),
+    "compsite": "(?=(?P<AlwI>GGATC))|(?=(?P<AlwI_as>GATCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 9,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGATC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["AlwNI"] = {
+    "charac": (6, -6, None, None, "CAGNNNCTG"),
+    "compsite": "(?=(?P<AlwNI>CAG...CTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAGNNNCTG",
+    "size": 9,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Ama87I"] = {
+    "charac": (1, -1, None, None, "CYCGRG"),
+    "compsite": "(?=(?P<Ama87I>C[CT]CG[AG]G))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "YCGR",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CYCGRG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["AmaCSI"] = {
+    "charac": (17, 9, None, None, "GCTCCA"),
+    "compsite": "(?=(?P<AmaCSI>GCTCCA))|(?=(?P<AmaCSI_as>TGGAGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 9,
+    "fst5": 17,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTCCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Aod1I"] = {
+    "charac": (None, None, None, None, "GATCNAC"),
+    "compsite": "(?=(?P<Aod1I>GATC.AC))|(?=(?P<Aod1I_as>GT.GATC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATCNAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Aor13HI"] = {
+    "charac": (1, -1, None, None, "TCCGGA"),
+    "compsite": "(?=(?P<Aor13HI>TCCGGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCCGGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["Aor51HI"] = {
+    "charac": (3, -3, None, None, "AGCGCT"),
+    "compsite": "(?=(?P<Aor51HI>AGCGCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGCGCT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["AoxI"] = {
+    "charac": (0, 0, None, None, "GGCC"),
+    "compsite": "(?=(?P<AoxI>GGCC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["ApaBI"] = {
+    "charac": (8, -8, None, None, "GCANNNNNTGC"),
+    "compsite": "(?=(?P<ApaBI>GCA.....TGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -8,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCANNNNNTGC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["ApaI"] = {
+    "charac": (5, -5, None, None, "GGGCCC"),
+    "compsite": "(?=(?P<ApaI>GGGCCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGGCCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "J", "K", "M", "N", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["ApaLI"] = {
+    "charac": (1, -1, None, None, "GTGCAC"),
+    "compsite": "(?=(?P<ApaLI>GTGCAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTGCAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C", "K", "N"),
+}
+
+rest_dict["ApeKI"] = {
+    "charac": (1, -1, None, None, "GCWGC"),
+    "compsite": "(?=(?P<ApeKI>GC[AT]GC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "CWG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCWGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["ApoI"] = {
+    "charac": (1, -1, None, None, "RAATTY"),
+    "compsite": "(?=(?P<ApoI>[AG]AATT[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "AATT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RAATTY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["ApyPI"] = {
+    "charac": (26, 18, None, None, "ATCGAC"),
+    "compsite": "(?=(?P<ApyPI>ATCGAC))|(?=(?P<ApyPI_as>GTCGAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 18,
+    "fst5": 26,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATCGAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AquII"] = {
+    "charac": (27, 18, None, None, "GCCGNAC"),
+    "compsite": "(?=(?P<AquII>GCCG.AC))|(?=(?P<AquII_as>GT.CGGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 18,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCGNAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AquIII"] = {
+    "charac": (26, 18, None, None, "GAGGAG"),
+    "compsite": "(?=(?P<AquIII>GAGGAG))|(?=(?P<AquIII_as>CTCCTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 18,
+    "fst5": 26,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AquIV"] = {
+    "charac": (26, 17, None, None, "GRGGAAG"),
+    "compsite": "(?=(?P<AquIV>G[AG]GGAAG))|(?=(?P<AquIV_as>CTTCC[CT]C))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": 17,
+    "fst5": 26,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRGGAAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["ArsI"] = {
+    "charac": (-8, -26, 24, 6, "GACNNNNNNTTYG"),
+    "compsite": "(?=(?P<ArsI>GAC......TT[CT]G))|(?=(?P<ArsI_as>C[AG]AA......GTC))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -26,
+    "fst5": -8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 6,
+    "scd5": 24,
+    "site": "GACNNNNNNTTYG",
+    "size": 13,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["AscI"] = {
+    "charac": (2, -2, None, None, "GGCGCGCC"),
+    "compsite": "(?=(?P<AscI>GGCGCGCC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CGCG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCGCC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["AseI"] = {
+    "charac": (2, -2, None, None, "ATTAAT"),
+    "compsite": "(?=(?P<AseI>ATTAAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATTAAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J", "N"),
+}
+
+rest_dict["Asi256I"] = {
+    "charac": (1, -1, None, None, "GATC"),
+    "compsite": "(?=(?P<Asi256I>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "AT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AsiGI"] = {
+    "charac": (1, -1, None, None, "ACCGGT"),
+    "compsite": "(?=(?P<AsiGI>ACCGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCGGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["AsiSI"] = {
+    "charac": (5, -5, None, None, "GCGATCGC"),
+    "compsite": "(?=(?P<AsiSI>GCGATCGC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "AT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGATCGC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("I", "N"),
+}
+
+rest_dict["Asp103I"] = {
+    "charac": (None, None, None, None, "CGRAGGC"),
+    "compsite": "(?=(?P<Asp103I>CG[AG]AGGC))|(?=(?P<Asp103I_as>GCCT[CT]CG))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGRAGGC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Asp114pII"] = {
+    "charac": (None, None, None, None, "AGCABCC"),
+    "compsite": "(?=(?P<Asp114pII>AGCA[CGT]CC))|(?=(?P<Asp114pII_as>GG[ACG]TGCT))",
+    "dna": None,
+    "freq": 5461.333333333333,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGCABCC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Asp337I"] = {
+    "charac": (None, None, None, None, "CARABGG"),
+    "compsite": "(?=(?P<Asp337I>CA[AG]A[CGT]GG))|(?=(?P<Asp337I_as>CC[ACG]T[CT]TG))",
+    "dna": None,
+    "freq": 2730.6666666666665,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CARABGG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Asp700I"] = {
+    "charac": (5, -5, None, None, "GAANNNNTTC"),
+    "compsite": "(?=(?P<Asp700I>GAA....TTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAANNNNTTC",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("M",),
+}
+
+rest_dict["Asp718I"] = {
+    "charac": (1, -1, None, None, "GGTACC"),
+    "compsite": "(?=(?P<Asp718I>GGTACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GTAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTACC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("M", "S"),
+}
+
+rest_dict["AspA2I"] = {
+    "charac": (1, -1, None, None, "CCTAGG"),
+    "compsite": "(?=(?P<AspA2I>CCTAGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CTAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTAGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["AspAMDIV"] = {
+    "charac": (None, None, None, None, "ACCCAC"),
+    "compsite": "(?=(?P<AspAMDIV>ACCCAC))|(?=(?P<AspAMDIV_as>GTGGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCCAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AspBHI"] = {
+    "charac": (13, 12, None, None, "YSCNS"),
+    "compsite": "(?=(?P<AspBHI>[CT][CG]C.[CG]))|(?=(?P<AspBHI_as>[CG].G[CG][AG]))",
+    "dna": None,
+    "freq": 32.0,
+    "fst3": 12,
+    "fst5": 13,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "YSCNS",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AspDUT2V"] = {
+    "charac": (None, None, None, None, "GNGCAAC"),
+    "compsite": "(?=(?P<AspDUT2V>G.GCAAC))|(?=(?P<AspDUT2V_as>GTTGC.C))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GNGCAAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AspJHL3II"] = {
+    "charac": (None, None, None, None, "CGCCCAG"),
+    "compsite": "(?=(?P<AspJHL3II>CGCCCAG))|(?=(?P<AspJHL3II_as>CTGGGCG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCCCAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AspLEI"] = {
+    "charac": (3, -3, None, None, "GCGC"),
+    "compsite": "(?=(?P<AspLEI>GCGC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["AspNIH4III"] = {
+    "charac": (None, None, None, None, "AAGAACB"),
+    "compsite": "(?=(?P<AspNIH4III>AAGAAC[CGT]))|(?=(?P<AspNIH4III_as>[ACG]GTTCTT))",
+    "dna": None,
+    "freq": 5461.333333333333,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AAGAACB",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AspS9I"] = {
+    "charac": (1, -1, None, None, "GGNCC"),
+    "compsite": "(?=(?P<AspS9I>GG.CC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GNC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["AspSLV7III"] = {
+    "charac": (None, None, None, None, "GTCTCA"),
+    "compsite": "(?=(?P<AspSLV7III>GTCTCA))|(?=(?P<AspSLV7III_as>TGAGAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTCTCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Asu14238IV"] = {
+    "charac": (None, None, None, None, "CGTRAC"),
+    "compsite": "(?=(?P<Asu14238IV>CGT[AG]AC))|(?=(?P<Asu14238IV_as>GT[CT]ACG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGTRAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AsuC2I"] = {
+    "charac": (2, -2, None, None, "CCSGG"),
+    "compsite": "(?=(?P<AsuC2I>CC[CG]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "S",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCSGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["AsuHPI"] = {
+    "charac": (13, 7, None, None, "GGTGA"),
+    "compsite": "(?=(?P<AsuHPI>GGTGA))|(?=(?P<AsuHPI_as>TCACC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 7,
+    "fst5": 13,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTGA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["AsuI"] = {
+    "charac": (1, -1, None, None, "GGNCC"),
+    "compsite": "(?=(?P<AsuI>GG.CC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GNC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AsuII"] = {
+    "charac": (2, -2, None, None, "TTCGAA"),
+    "compsite": "(?=(?P<AsuII>TTCGAA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTCGAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C",),
+}
+
+rest_dict["AsuNHI"] = {
+    "charac": (1, -1, None, None, "GCTAGC"),
+    "compsite": "(?=(?P<AsuNHI>GCTAGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CTAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTAGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["AteTI"] = {
+    "charac": (None, None, None, None, "GGGRAG"),
+    "compsite": "(?=(?P<AteTI>GGG[AG]AG))|(?=(?P<AteTI_as>CT[CT]CCC))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGGRAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AvaI"] = {
+    "charac": (1, -1, None, None, "CYCGRG"),
+    "compsite": "(?=(?P<AvaI>C[CT]CG[AG]G))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "YCGR",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CYCGRG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J", "N", "Q", "X"),
+}
+
+rest_dict["AvaII"] = {
+    "charac": (1, -1, None, None, "GGWCC"),
+    "compsite": "(?=(?P<AvaII>GG[AT]CC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGWCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("J", "N", "R", "X"),
+}
+
+rest_dict["AvaIII"] = {
+    "charac": (None, None, None, None, "ATGCAT"),
+    "compsite": "(?=(?P<AvaIII>ATGCAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATGCAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AvrII"] = {
+    "charac": (1, -1, None, None, "CCTAGG"),
+    "compsite": "(?=(?P<AvrII>CCTAGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CTAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTAGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Awo1030IV"] = {
+    "charac": (None, None, None, None, "GCCRAG"),
+    "compsite": "(?=(?P<Awo1030IV>GCC[AG]AG))|(?=(?P<Awo1030IV_as>CT[CT]GGC))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCRAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["AxyI"] = {
+    "charac": (2, -2, None, None, "CCTNAGG"),
+    "compsite": "(?=(?P<AxyI>CCT.AGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTNAGG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("J",),
+}
+
+rest_dict["BaeGI"] = {
+    "charac": (5, -5, None, None, "GKGCMC"),
+    "compsite": "(?=(?P<BaeGI>G[GT]GC[AC]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "KGCM",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GKGCMC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BaeI"] = {
+    "charac": (-10, -26, 23, 7, "ACNNNNGTAYC"),
+    "compsite": "(?=(?P<BaeI>AC....GTA[CT]C))|(?=(?P<BaeI_as>G[AG]TAC....GT))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -26,
+    "fst5": -10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 7,
+    "scd5": 23,
+    "site": "ACNNNNGTAYC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Bag18758I"] = {
+    "charac": (None, None, None, None, "CCCGAG"),
+    "compsite": "(?=(?P<Bag18758I>CCCGAG))|(?=(?P<Bag18758I_as>CTCGGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BalI"] = {
+    "charac": (3, -3, None, None, "TGGCCA"),
+    "compsite": "(?=(?P<BalI>TGGCCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGGCCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "J", "K", "Q", "R", "X"),
+}
+
+rest_dict["BamHI"] = {
+    "charac": (1, -1, None, None, "GGATCC"),
+    "compsite": "(?=(?P<BamHI>GGATCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGATCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X", "Y"),
+}
+
+rest_dict["BanI"] = {
+    "charac": (1, -1, None, None, "GGYRCC"),
+    "compsite": "(?=(?P<BanI>GG[CT][AG]CC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GYRC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGYRCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N", "R"),
+}
+
+rest_dict["BanII"] = {
+    "charac": (5, -5, None, None, "GRGCYC"),
+    "compsite": "(?=(?P<BanII>G[AG]GC[CT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "RGCY",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRGCYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("K", "N", "X"),
+}
+
+rest_dict["BanLI"] = {
+    "charac": (None, None, None, None, "RTCAGG"),
+    "compsite": "(?=(?P<BanLI>[AG]TCAGG))|(?=(?P<BanLI_as>CCTGA[CT]))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RTCAGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BarI"] = {
+    "charac": (-7, -25, 25, 7, "GAAGNNNNNNTAC"),
+    "compsite": "(?=(?P<BarI>GAAG......TAC))|(?=(?P<BarI_as>GTA......CTTC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": -25,
+    "fst5": -7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 7,
+    "scd5": 25,
+    "site": "GAAGNNNNNNTAC",
+    "size": 13,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["Bau1417V"] = {
+    "charac": (None, None, None, None, "GTTCAG"),
+    "compsite": "(?=(?P<Bau1417V>GTTCAG))|(?=(?P<Bau1417V_as>CTGAAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTTCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BauI"] = {
+    "charac": (1, -1, None, None, "CACGAG"),
+    "compsite": "(?=(?P<BauI>CACGAG))|(?=(?P<BauI_as>CTCGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "ACGA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Bbr52II"] = {
+    "charac": (None, None, None, None, "GGCGAG"),
+    "compsite": "(?=(?P<Bbr52II>GGCGAG))|(?=(?P<Bbr52II_as>CTCGCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Bbr57III"] = {
+    "charac": (None, None, None, None, "GTRAAYG"),
+    "compsite": "(?=(?P<Bbr57III>GT[AG]AA[CT]G))|(?=(?P<Bbr57III_as>C[AG]TT[CT]AC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTRAAYG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Bbr7017II"] = {
+    "charac": (None, None, None, None, "CGGGAG"),
+    "compsite": "(?=(?P<Bbr7017II>CGGGAG))|(?=(?P<Bbr7017II_as>CTCCCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Bbr7017III"] = {
+    "charac": (None, None, None, None, "GGRCAG"),
+    "compsite": "(?=(?P<Bbr7017III>GG[AG]CAG))|(?=(?P<Bbr7017III_as>CTG[CT]CC))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGRCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BbrPI"] = {
+    "charac": (3, -3, None, None, "CACGTG"),
+    "compsite": "(?=(?P<BbrPI>CACGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGTG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("M",),
+}
+
+rest_dict["BbsI"] = {
+    "charac": (8, 6, None, None, "GAAGAC"),
+    "compsite": "(?=(?P<BbsI>GAAGAC))|(?=(?P<BbsI_as>GTCTTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 6,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAGAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BbuB31I"] = {
+    "charac": (None, None, None, None, "GNAAYG"),
+    "compsite": "(?=(?P<BbuB31I>G.AA[CT]G))|(?=(?P<BbuB31I_as>C[AG]TT.C))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GNAAYG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BbuB31II"] = {
+    "charac": (None, None, None, None, "CGRKA"),
+    "compsite": "(?=(?P<BbuB31II>CG[AG][GT]A))|(?=(?P<BbuB31II_as>T[AC][CT]CG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGRKA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Bbv12I"] = {
+    "charac": (5, -5, None, None, "GWGCWC"),
+    "compsite": "(?=(?P<Bbv12I>G[AT]GC[AT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "WGCW",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GWGCWC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BbvCI"] = {
+    "charac": (2, -2, None, None, "CCTCAGC"),
+    "compsite": "(?=(?P<BbvCI>CCTCAGC))|(?=(?P<BbvCI_as>GCTGAGG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTCAGC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BbvI"] = {
+    "charac": (13, 12, None, None, "GCAGC"),
+    "compsite": "(?=(?P<BbvI>GCAGC))|(?=(?P<BbvI_as>GCTGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 12,
+    "fst5": 13,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCAGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BbvII"] = {
+    "charac": (8, 6, None, None, "GAAGAC"),
+    "compsite": "(?=(?P<BbvII>GAAGAC))|(?=(?P<BbvII_as>GTCTTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 6,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAGAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BccI"] = {
+    "charac": (9, 5, None, None, "CCATC"),
+    "compsite": "(?=(?P<BccI>CCATC))|(?=(?P<BccI_as>GATGG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 9,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCATC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Bce3081I"] = {
+    "charac": (None, None, None, None, "TAGGAG"),
+    "compsite": "(?=(?P<Bce3081I>TAGGAG))|(?=(?P<Bce3081I_as>CTCCTA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TAGGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Bce83I"] = {
+    "charac": (22, 14, None, None, "CTTGAG"),
+    "compsite": "(?=(?P<Bce83I>CTTGAG))|(?=(?P<Bce83I_as>CTCAAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 14,
+    "fst5": 22,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTTGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BceAI"] = {
+    "charac": (17, 14, None, None, "ACGGC"),
+    "compsite": "(?=(?P<BceAI>ACGGC))|(?=(?P<BceAI_as>GCCGT))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 14,
+    "fst5": 17,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACGGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BceSIV"] = {
+    "charac": (-7, -10, 14, 11, "GCAGC"),
+    "compsite": "(?=(?P<BceSIV>GCAGC))|(?=(?P<BceSIV_as>GCTGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -10,
+    "fst5": -7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": 11,
+    "scd5": 14,
+    "site": "GCAGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BcefI"] = {
+    "charac": (17, 13, None, None, "ACGGC"),
+    "compsite": "(?=(?P<BcefI>ACGGC))|(?=(?P<BcefI_as>GCCGT))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 13,
+    "fst5": 17,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACGGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BcgI"] = {
+    "charac": (-10, -24, 24, 10, "CGANNNNNNTGC"),
+    "compsite": "(?=(?P<BcgI>CGA......TGC))|(?=(?P<BcgI_as>GCA......TCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -24,
+    "fst5": -10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": 10,
+    "scd5": 24,
+    "site": "CGANNNNNNTGC",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BciT130I"] = {
+    "charac": (2, -2, None, None, "CCWGG"),
+    "compsite": "(?=(?P<BciT130I>CC[AT]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "W",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["BciVI"] = {
+    "charac": (12, 5, None, None, "GTATCC"),
+    "compsite": "(?=(?P<BciVI>GTATCC))|(?=(?P<BciVI_as>GGATAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 5,
+    "fst5": 12,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTATCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BclI"] = {
+    "charac": (1, -1, None, None, "TGATCA"),
+    "compsite": "(?=(?P<BclI>TGATCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGATCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "J", "M", "N", "O", "R", "S"),
+}
+
+rest_dict["BcnI"] = {
+    "charac": (2, -2, None, None, "CCSGG"),
+    "compsite": "(?=(?P<BcnI>CC[CG]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "S",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCSGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["BcoDI"] = {
+    "charac": (6, 5, None, None, "GTCTC"),
+    "compsite": "(?=(?P<BcoDI>GTCTC))|(?=(?P<BcoDI_as>GAGAC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTCTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BcuI"] = {
+    "charac": (1, -1, None, None, "ACTAGT"),
+    "compsite": "(?=(?P<BcuI>ACTAGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CTAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACTAGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BdaI"] = {
+    "charac": (-10, -24, 24, 10, "TGANNNNNNTCA"),
+    "compsite": "(?=(?P<BdaI>TGA......TCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -24,
+    "fst5": -10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": 10,
+    "scd5": 24,
+    "site": "TGANNNNNNTCA",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BetI"] = {
+    "charac": (1, -1, None, None, "WCCGGW"),
+    "compsite": "(?=(?P<BetI>[AT]CCGG[AT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "WCCGGW",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BfaI"] = {
+    "charac": (1, -1, None, None, "CTAG"),
+    "compsite": "(?=(?P<BfaI>CTAG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTAG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BfaSII"] = {
+    "charac": (None, None, None, None, "GANGGAG"),
+    "compsite": "(?=(?P<BfaSII>GA.GGAG))|(?=(?P<BfaSII_as>CTCC.TC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GANGGAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BfiI"] = {
+    "charac": (11, 4, None, None, "ACTGGG"),
+    "compsite": "(?=(?P<BfiI>ACTGGG))|(?=(?P<BfiI_as>CCCAGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 4,
+    "fst5": 11,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACTGGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BfmI"] = {
+    "charac": (1, -1, None, None, "CTRYAG"),
+    "compsite": "(?=(?P<BfmI>CT[AG][CT]AG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TRYA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTRYAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BfoI"] = {
+    "charac": (5, -5, None, None, "RGCGCY"),
+    "compsite": "(?=(?P<BfoI>[AG]GCGC[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "GCGC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGCGCY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BfrI"] = {
+    "charac": (1, -1, None, None, "CTTAAG"),
+    "compsite": "(?=(?P<BfrI>CTTAAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TTAA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTTAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("M", "S"),
+}
+
+rest_dict["BfuAI"] = {
+    "charac": (10, 8, None, None, "ACCTGC"),
+    "compsite": "(?=(?P<BfuAI>ACCTGC))|(?=(?P<BfuAI_as>GCAGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 8,
+    "fst5": 10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCTGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BfuI"] = {
+    "charac": (12, 5, None, None, "GTATCC"),
+    "compsite": "(?=(?P<BfuI>GTATCC))|(?=(?P<BfuI_as>GGATAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 5,
+    "fst5": 12,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTATCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Bga514I"] = {
+    "charac": (None, None, None, None, "GTRAAG"),
+    "compsite": "(?=(?P<Bga514I>GT[AG]AAG))|(?=(?P<Bga514I_as>CTT[CT]AC))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTRAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BglI"] = {
+    "charac": (7, -7, None, None, "GCCNNNNNGGC"),
+    "compsite": "(?=(?P<BglI>GCC.....GGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCNNNNNGGC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "N", "O", "Q", "R", "V", "X"),
+}
+
+rest_dict["BglII"] = {
+    "charac": (1, -1, None, None, "AGATCT"),
+    "compsite": "(?=(?P<BglII>AGATCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGATCT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["BinI"] = {
+    "charac": (9, 5, None, None, "GGATC"),
+    "compsite": "(?=(?P<BinI>GGATC))|(?=(?P<BinI_as>GATCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 9,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGATC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BisI"] = {
+    "charac": (2, -2, None, None, "GCNGC"),
+    "compsite": "(?=(?P<BisI>GC.GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BkrAM31DI"] = {
+    "charac": (None, None, None, None, "RTTAAATM"),
+    "compsite": "(?=(?P<BkrAM31DI>[AG]TTAAAT[AC]))|(?=(?P<BkrAM31DI_as>[GT]ATTTAA[CT]))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RTTAAATM",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Ble402II"] = {
+    "charac": (None, None, None, None, "GRAGCAG"),
+    "compsite": "(?=(?P<Ble402II>G[AG]AGCAG))|(?=(?P<Ble402II_as>CTGCT[CT]C))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRAGCAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BlnI"] = {
+    "charac": (1, -1, None, None, "CCTAGG"),
+    "compsite": "(?=(?P<BlnI>CCTAGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CTAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTAGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("K", "M", "S"),
+}
+
+rest_dict["BloAII"] = {
+    "charac": (None, None, None, None, "GAGGAC"),
+    "compsite": "(?=(?P<BloAII>GAGGAC))|(?=(?P<BloAII_as>GTCCTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGGAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BlpI"] = {
+    "charac": (2, -2, None, None, "GCTNAGC"),
+    "compsite": "(?=(?P<BlpI>GCT.AGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTNAGC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BlsI"] = {
+    "charac": (3, -3, None, None, "GCNGC"),
+    "compsite": "(?=(?P<BlsI>GC.GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BmcAI"] = {
+    "charac": (3, -3, None, None, "AGTACT"),
+    "compsite": "(?=(?P<BmcAI>AGTACT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGTACT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["Bme1390I"] = {
+    "charac": (2, -2, None, None, "CCNGG"),
+    "compsite": "(?=(?P<Bme1390I>CC.GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Bme18I"] = {
+    "charac": (1, -1, None, None, "GGWCC"),
+    "compsite": "(?=(?P<Bme18I>GG[AT]CC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGWCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BmeDI"] = {
+    "charac": (3, 0, None, None, "C"),
+    "compsite": "(?=(?P<BmeDI>C))|(?=(?P<BmeDI_as>G))",
+    "dna": None,
+    "freq": 4.0,
+    "fst3": 0,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "C",
+    "size": 1,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BmeRI"] = {
+    "charac": (6, -6, None, None, "GACNNNNNGTC"),
+    "compsite": "(?=(?P<BmeRI>GAC.....GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNNNGTC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["BmeT110I"] = {
+    "charac": (1, -1, None, None, "CYCGRG"),
+    "compsite": "(?=(?P<BmeT110I>C[CT]CG[AG]G))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "YCGR",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CYCGRG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["BmgBI"] = {
+    "charac": (3, -3, None, None, "CACGTC"),
+    "compsite": "(?=(?P<BmgBI>CACGTC))|(?=(?P<BmgBI_as>GACGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BmgI"] = {
+    "charac": (None, None, None, None, "GKGCCC"),
+    "compsite": "(?=(?P<BmgI>G[GT]GCCC))|(?=(?P<BmgI_as>GGGC[AC]C))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GKGCCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BmgT120I"] = {
+    "charac": (1, -1, None, None, "GGNCC"),
+    "compsite": "(?=(?P<BmgT120I>GG.CC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GNC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["BmiI"] = {
+    "charac": (3, -3, None, None, "GGNNCC"),
+    "compsite": "(?=(?P<BmiI>GG..CC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNNCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["BmrFI"] = {
+    "charac": (2, -2, None, None, "CCNGG"),
+    "compsite": "(?=(?P<BmrFI>CC.GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["BmrI"] = {
+    "charac": (11, 4, None, None, "ACTGGG"),
+    "compsite": "(?=(?P<BmrI>ACTGGG))|(?=(?P<BmrI_as>CCCAGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 4,
+    "fst5": 11,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACTGGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BmsI"] = {
+    "charac": (10, 9, None, None, "GCATC"),
+    "compsite": "(?=(?P<BmsI>GCATC))|(?=(?P<BmsI_as>GATGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 9,
+    "fst5": 10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCATC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BmtI"] = {
+    "charac": (5, -5, None, None, "GCTAGC"),
+    "compsite": "(?=(?P<BmtI>GCTAGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CTAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTAGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "N", "V"),
+}
+
+rest_dict["BmuI"] = {
+    "charac": (11, 4, None, None, "ACTGGG"),
+    "compsite": "(?=(?P<BmuI>ACTGGG))|(?=(?P<BmuI_as>CCCAGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 4,
+    "fst5": 11,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACTGGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BoxI"] = {
+    "charac": (5, -5, None, None, "GACNNNNGTC"),
+    "compsite": "(?=(?P<BoxI>GAC....GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNNGTC",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BpiI"] = {
+    "charac": (8, 6, None, None, "GAAGAC"),
+    "compsite": "(?=(?P<BpiI>GAAGAC))|(?=(?P<BpiI_as>GTCTTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 6,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAGAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BplI"] = {
+    "charac": (-8, -24, 24, 8, "GAGNNNNNCTC"),
+    "compsite": "(?=(?P<BplI>GAG.....CTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -24,
+    "fst5": -8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 8,
+    "scd5": 24,
+    "site": "GAGNNNNNCTC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BpmI"] = {
+    "charac": (22, 14, None, None, "CTGGAG"),
+    "compsite": "(?=(?P<BpmI>CTGGAG))|(?=(?P<BpmI_as>CTCCAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 14,
+    "fst5": 22,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTGGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "N"),
+}
+
+rest_dict["Bpu10I"] = {
+    "charac": (2, -2, None, None, "CCTNAGC"),
+    "compsite": "(?=(?P<Bpu10I>CCT.AGC))|(?=(?P<Bpu10I_as>GCT.AGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTNAGC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "N", "V"),
+}
+
+rest_dict["Bpu1102I"] = {
+    "charac": (2, -2, None, None, "GCTNAGC"),
+    "compsite": "(?=(?P<Bpu1102I>GCT.AGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTNAGC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["Bpu14I"] = {
+    "charac": (2, -2, None, None, "TTCGAA"),
+    "compsite": "(?=(?P<Bpu14I>TTCGAA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTCGAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BpuEI"] = {
+    "charac": (22, 14, None, None, "CTTGAG"),
+    "compsite": "(?=(?P<BpuEI>CTTGAG))|(?=(?P<BpuEI_as>CTCAAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 14,
+    "fst5": 22,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTTGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BpuMI"] = {
+    "charac": (2, -2, None, None, "CCSGG"),
+    "compsite": "(?=(?P<BpuMI>CC[CG]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "S",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCSGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["Bsa29I"] = {
+    "charac": (2, -2, None, None, "ATCGAT"),
+    "compsite": "(?=(?P<Bsa29I>ATCGAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATCGAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BsaAI"] = {
+    "charac": (3, -3, None, None, "YACGTR"),
+    "compsite": "(?=(?P<BsaAI>[CT]ACGT[AG]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "YACGTR",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsaBI"] = {
+    "charac": (5, -5, None, None, "GATNNNNATC"),
+    "compsite": "(?=(?P<BsaBI>GAT....ATC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATNNNNATC",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsaHI"] = {
+    "charac": (2, -2, None, None, "GRCGYC"),
+    "compsite": "(?=(?P<BsaHI>G[AG]CG[CT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRCGYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsaI"] = {
+    "charac": (7, 5, None, None, "GGTCTC"),
+    "compsite": "(?=(?P<BsaI>GGTCTC))|(?=(?P<BsaI_as>GAGACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 5,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsaJI"] = {
+    "charac": (1, -1, None, None, "CCNNGG"),
+    "compsite": "(?=(?P<BsaJI>CC..GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CNNG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNNGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsaWI"] = {
+    "charac": (1, -1, None, None, "WCCGGW"),
+    "compsite": "(?=(?P<BsaWI>[AT]CCGG[AT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "WCCGGW",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsaXI"] = {
+    "charac": (-9, -23, 21, 7, "ACNNNNNCTCC"),
+    "compsite": "(?=(?P<BsaXI>AC.....CTCC))|(?=(?P<BsaXI_as>GGAG.....GT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -23,
+    "fst5": -9,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": 7,
+    "scd5": 21,
+    "site": "ACNNNNNCTCC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsbI"] = {
+    "charac": (27, 19, None, None, "CAACAC"),
+    "compsite": "(?=(?P<BsbI>CAACAC))|(?=(?P<BsbI_as>GTGTTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 19,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAACAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Bsc4I"] = {
+    "charac": (7, -7, None, None, "CCNNNNNNNGG"),
+    "compsite": "(?=(?P<Bsc4I>CC.......GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNNNNNNNGG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BscAI"] = {
+    "charac": (9, 6, None, None, "GCATC"),
+    "compsite": "(?=(?P<BscAI>GCATC))|(?=(?P<BscAI_as>GATGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 6,
+    "fst5": 9,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCATC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BscGI"] = {
+    "charac": (None, None, None, None, "CCCGT"),
+    "compsite": "(?=(?P<BscGI>CCCGT))|(?=(?P<BscGI_as>ACGGG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCGT",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Bse118I"] = {
+    "charac": (1, -1, None, None, "RCCGGY"),
+    "compsite": "(?=(?P<Bse118I>[AG]CCGG[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RCCGGY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Bse1I"] = {
+    "charac": (6, -1, None, None, "ACTGG"),
+    "compsite": "(?=(?P<Bse1I>ACTGG))|(?=(?P<Bse1I_as>CCAGT))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "GN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACTGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Bse21I"] = {
+    "charac": (2, -2, None, None, "CCTNAGG"),
+    "compsite": "(?=(?P<Bse21I>CCT.AGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTNAGG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Bse3DI"] = {
+    "charac": (8, 0, None, None, "GCAATG"),
+    "compsite": "(?=(?P<Bse3DI>GCAATG))|(?=(?P<Bse3DI_as>CATTGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 0,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCAATG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Bse8I"] = {
+    "charac": (5, -5, None, None, "GATNNNNATC"),
+    "compsite": "(?=(?P<Bse8I>GAT....ATC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATNNNNATC",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BseAI"] = {
+    "charac": (1, -1, None, None, "TCCGGA"),
+    "compsite": "(?=(?P<BseAI>TCCGGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCCGGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C",),
+}
+
+rest_dict["BseBI"] = {
+    "charac": (2, -2, None, None, "CCWGG"),
+    "compsite": "(?=(?P<BseBI>CC[AT]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "W",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("C",),
+}
+
+rest_dict["BseCI"] = {
+    "charac": (2, -2, None, None, "ATCGAT"),
+    "compsite": "(?=(?P<BseCI>ATCGAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATCGAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C",),
+}
+
+rest_dict["BseDI"] = {
+    "charac": (1, -1, None, None, "CCNNGG"),
+    "compsite": "(?=(?P<BseDI>CC..GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CNNG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNNGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BseGI"] = {
+    "charac": (7, 0, None, None, "GGATG"),
+    "compsite": "(?=(?P<BseGI>GGATG))|(?=(?P<BseGI_as>CATCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 0,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGATG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BseJI"] = {
+    "charac": (5, -5, None, None, "GATNNNNATC"),
+    "compsite": "(?=(?P<BseJI>GAT....ATC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATNNNNATC",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BseLI"] = {
+    "charac": (7, -7, None, None, "CCNNNNNNNGG"),
+    "compsite": "(?=(?P<BseLI>CC.......GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNNNNNNNGG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BseMI"] = {
+    "charac": (8, 0, None, None, "GCAATG"),
+    "compsite": "(?=(?P<BseMI>GCAATG))|(?=(?P<BseMI_as>CATTGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 0,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCAATG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BseMII"] = {
+    "charac": (15, 8, None, None, "CTCAG"),
+    "compsite": "(?=(?P<BseMII>CTCAG))|(?=(?P<BseMII_as>CTGAG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 8,
+    "fst5": 15,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTCAG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BseNI"] = {
+    "charac": (6, -1, None, None, "ACTGG"),
+    "compsite": "(?=(?P<BseNI>ACTGG))|(?=(?P<BseNI_as>CCAGT))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "GN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACTGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BsePI"] = {
+    "charac": (1, -1, None, None, "GCGCGC"),
+    "compsite": "(?=(?P<BsePI>GCGCGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CGCG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGCGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BseRI"] = {
+    "charac": (16, 8, None, None, "GAGGAG"),
+    "compsite": "(?=(?P<BseRI>GAGGAG))|(?=(?P<BseRI_as>CTCCTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 8,
+    "fst5": 16,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BseSI"] = {
+    "charac": (5, -5, None, None, "GKGCMC"),
+    "compsite": "(?=(?P<BseSI>G[GT]GC[AC]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "KGCM",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GKGCMC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BseX3I"] = {
+    "charac": (1, -1, None, None, "CGGCCG"),
+    "compsite": "(?=(?P<BseX3I>CGGCCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGCCG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BseXI"] = {
+    "charac": (13, 12, None, None, "GCAGC"),
+    "compsite": "(?=(?P<BseXI>GCAGC))|(?=(?P<BseXI_as>GCTGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 12,
+    "fst5": 13,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCAGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BseYI"] = {
+    "charac": (1, -1, None, None, "CCCAGC"),
+    "compsite": "(?=(?P<BseYI>CCCAGC))|(?=(?P<BseYI_as>GCTGGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCAGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsgI"] = {
+    "charac": (22, 14, None, None, "GTGCAG"),
+    "compsite": "(?=(?P<BsgI>GTGCAG))|(?=(?P<BsgI_as>CTGCAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 14,
+    "fst5": 22,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTGCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Bsh1236I"] = {
+    "charac": (2, -2, None, None, "CGCG"),
+    "compsite": "(?=(?P<Bsh1236I>CGCG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Bsh1285I"] = {
+    "charac": (4, -4, None, None, "CGRYCG"),
+    "compsite": "(?=(?P<Bsh1285I>CG[AG][CT]CG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "RY",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGRYCG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BshFI"] = {
+    "charac": (2, -2, None, None, "GGCC"),
+    "compsite": "(?=(?P<BshFI>GGCC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("C",),
+}
+
+rest_dict["BshNI"] = {
+    "charac": (1, -1, None, None, "GGYRCC"),
+    "compsite": "(?=(?P<BshNI>GG[CT][AG]CC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GYRC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGYRCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BshTI"] = {
+    "charac": (1, -1, None, None, "ACCGGT"),
+    "compsite": "(?=(?P<BshTI>ACCGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCGGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BshVI"] = {
+    "charac": (2, -2, None, None, "ATCGAT"),
+    "compsite": "(?=(?P<BshVI>ATCGAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATCGAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["BsiEI"] = {
+    "charac": (4, -4, None, None, "CGRYCG"),
+    "compsite": "(?=(?P<BsiEI>CG[AG][CT]CG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "RY",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGRYCG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsiHKAI"] = {
+    "charac": (5, -5, None, None, "GWGCWC"),
+    "compsite": "(?=(?P<BsiHKAI>G[AT]GC[AT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "WGCW",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GWGCWC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsiHKCI"] = {
+    "charac": (1, -1, None, None, "CYCGRG"),
+    "compsite": "(?=(?P<BsiHKCI>C[CT]CG[AG]G))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "YCGR",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CYCGRG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("Q", "X"),
+}
+
+rest_dict["BsiI"] = {
+    "charac": (1, -1, None, None, "CACGAG"),
+    "compsite": "(?=(?P<BsiI>CACGAG))|(?=(?P<BsiI_as>CTCGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "ACGA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BsiSI"] = {
+    "charac": (1, -1, None, None, "CCGG"),
+    "compsite": "(?=(?P<BsiSI>CCGG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("C", "Y"),
+}
+
+rest_dict["BsiWI"] = {
+    "charac": (1, -1, None, None, "CGTACG"),
+    "compsite": "(?=(?P<BsiWI>CGTACG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GTAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGTACG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsiYI"] = {
+    "charac": (7, -7, None, None, "CCNNNNNNNGG"),
+    "compsite": "(?=(?P<BsiYI>CC.......GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNNNNNNNGG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BslFI"] = {
+    "charac": (15, 14, None, None, "GGGAC"),
+    "compsite": "(?=(?P<BslFI>GGGAC))|(?=(?P<BslFI_as>GTCCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 14,
+    "fst5": 15,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGGAC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BslI"] = {
+    "charac": (7, -7, None, None, "CCNNNNNNNGG"),
+    "compsite": "(?=(?P<BslI>CC.......GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNNNNNNNGG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsmAI"] = {
+    "charac": (6, 5, None, None, "GTCTC"),
+    "compsite": "(?=(?P<BsmAI>GTCTC))|(?=(?P<BsmAI_as>GAGAC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTCTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsmBI"] = {
+    "charac": (7, 5, None, None, "CGTCTC"),
+    "compsite": "(?=(?P<BsmBI>CGTCTC))|(?=(?P<BsmBI_as>GAGACG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 5,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGTCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsmFI"] = {
+    "charac": (15, 14, None, None, "GGGAC"),
+    "compsite": "(?=(?P<BsmFI>GGGAC))|(?=(?P<BsmFI_as>GTCCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 14,
+    "fst5": 15,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGGAC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsmI"] = {
+    "charac": (7, -1, None, None, "GAATGC"),
+    "compsite": "(?=(?P<BsmI>GAATGC))|(?=(?P<BsmI_as>GCATTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "CN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAATGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J", "M", "N", "S"),
+}
+
+rest_dict["BsnI"] = {
+    "charac": (2, -2, None, None, "GGCC"),
+    "compsite": "(?=(?P<BsnI>GGCC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["Bso31I"] = {
+    "charac": (7, 5, None, None, "GGTCTC"),
+    "compsite": "(?=(?P<Bso31I>GGTCTC))|(?=(?P<Bso31I_as>GAGACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 5,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BsoBI"] = {
+    "charac": (1, -1, None, None, "CYCGRG"),
+    "compsite": "(?=(?P<BsoBI>C[CT]CG[AG]G))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "YCGR",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CYCGRG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Bsp119I"] = {
+    "charac": (2, -2, None, None, "TTCGAA"),
+    "compsite": "(?=(?P<Bsp119I>TTCGAA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTCGAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Bsp120I"] = {
+    "charac": (1, -1, None, None, "GGGCCC"),
+    "compsite": "(?=(?P<Bsp120I>GGGCCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGGCCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Bsp1286I"] = {
+    "charac": (5, -5, None, None, "GDGCHC"),
+    "compsite": "(?=(?P<Bsp1286I>G[AGT]GC[ACT]C))",
+    "dna": None,
+    "freq": 455.1111111111111,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "DGCH",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GDGCHC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J", "K", "N"),
+}
+
+rest_dict["Bsp13I"] = {
+    "charac": (1, -1, None, None, "TCCGGA"),
+    "compsite": "(?=(?P<Bsp13I>TCCGGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCCGGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Bsp1407I"] = {
+    "charac": (1, -1, None, None, "TGTACA"),
+    "compsite": "(?=(?P<Bsp1407I>TGTACA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GTAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGTACA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["Bsp143I"] = {
+    "charac": (0, 0, None, None, "GATC"),
+    "compsite": "(?=(?P<Bsp143I>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Bsp1720I"] = {
+    "charac": (2, -2, None, None, "GCTNAGC"),
+    "compsite": "(?=(?P<Bsp1720I>GCT.AGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTNAGC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Bsp19I"] = {
+    "charac": (1, -1, None, None, "CCATGG"),
+    "compsite": "(?=(?P<Bsp19I>CCATGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCATGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Bsp24I"] = {
+    "charac": (-8, -25, 24, 7, "GACNNNNNNTGG"),
+    "compsite": "(?=(?P<Bsp24I>GAC......TGG))|(?=(?P<Bsp24I_as>CCA......GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -25,
+    "fst5": -8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 7,
+    "scd5": 24,
+    "site": "GACNNNNNNTGG",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Bsp3004IV"] = {
+    "charac": (None, None, None, None, "CCGCAT"),
+    "compsite": "(?=(?P<Bsp3004IV>CCGCAT))|(?=(?P<Bsp3004IV_as>ATGCGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGCAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Bsp460III"] = {
+    "charac": (None, None, None, None, "CGCGCAG"),
+    "compsite": "(?=(?P<Bsp460III>CGCGCAG))|(?=(?P<Bsp460III_as>CTGCGCG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCGCAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Bsp68I"] = {
+    "charac": (3, -3, None, None, "TCGCGA"),
+    "compsite": "(?=(?P<Bsp68I>TCGCGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCGCGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BspACI"] = {
+    "charac": (1, -1, None, None, "CCGC"),
+    "compsite": "(?=(?P<BspACI>CCGC))|(?=(?P<BspACI_as>GCGG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BspANI"] = {
+    "charac": (2, -2, None, None, "GGCC"),
+    "compsite": "(?=(?P<BspANI>GGCC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("X",),
+}
+
+rest_dict["BspCNI"] = {
+    "charac": (14, 7, None, None, "CTCAG"),
+    "compsite": "(?=(?P<BspCNI>CTCAG))|(?=(?P<BspCNI_as>CTGAG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 7,
+    "fst5": 14,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTCAG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BspD6I"] = {
+    "charac": (9, 6, None, None, "GAGTC"),
+    "compsite": "(?=(?P<BspD6I>GAGTC))|(?=(?P<BspD6I_as>GACTC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 6,
+    "fst5": 9,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BspDI"] = {
+    "charac": (2, -2, None, None, "ATCGAT"),
+    "compsite": "(?=(?P<BspDI>ATCGAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATCGAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BspEI"] = {
+    "charac": (1, -1, None, None, "TCCGGA"),
+    "compsite": "(?=(?P<BspEI>TCCGGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCCGGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BspFNI"] = {
+    "charac": (2, -2, None, None, "CGCG"),
+    "compsite": "(?=(?P<BspFNI>CGCG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BspGI"] = {
+    "charac": (None, None, None, None, "CTGGAC"),
+    "compsite": "(?=(?P<BspGI>CTGGAC))|(?=(?P<BspGI_as>GTCCAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTGGAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BspHI"] = {
+    "charac": (1, -1, None, None, "TCATGA"),
+    "compsite": "(?=(?P<BspHI>TCATGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCATGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BspLI"] = {
+    "charac": (3, -3, None, None, "GGNNCC"),
+    "compsite": "(?=(?P<BspLI>GG..CC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNNCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BspLU11I"] = {
+    "charac": (1, -1, None, None, "ACATGT"),
+    "compsite": "(?=(?P<BspLU11I>ACATGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACATGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BspMAI"] = {
+    "charac": (5, -5, None, None, "CTGCAG"),
+    "compsite": "(?=(?P<BspMAI>CTGCAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTGCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("X",),
+}
+
+rest_dict["BspMI"] = {
+    "charac": (10, 8, None, None, "ACCTGC"),
+    "compsite": "(?=(?P<BspMI>ACCTGC))|(?=(?P<BspMI_as>GCAGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 8,
+    "fst5": 10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCTGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BspMII"] = {
+    "charac": (1, -1, None, None, "TCCGGA"),
+    "compsite": "(?=(?P<BspMII>TCCGGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCCGGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BspNCI"] = {
+    "charac": (None, None, None, None, "CCAGA"),
+    "compsite": "(?=(?P<BspNCI>CCAGA))|(?=(?P<BspNCI_as>TCTGG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCAGA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BspOI"] = {
+    "charac": (5, -5, None, None, "GCTAGC"),
+    "compsite": "(?=(?P<BspOI>GCTAGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CTAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTAGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BspPI"] = {
+    "charac": (9, 5, None, None, "GGATC"),
+    "compsite": "(?=(?P<BspPI>GGATC))|(?=(?P<BspPI_as>GATCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 9,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGATC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BspQI"] = {
+    "charac": (8, 4, None, None, "GCTCTTC"),
+    "compsite": "(?=(?P<BspQI>GCTCTTC))|(?=(?P<BspQI_as>GAAGAGC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": 4,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTCTTC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BspT104I"] = {
+    "charac": (2, -2, None, None, "TTCGAA"),
+    "compsite": "(?=(?P<BspT104I>TTCGAA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTCGAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["BspT107I"] = {
+    "charac": (1, -1, None, None, "GGYRCC"),
+    "compsite": "(?=(?P<BspT107I>GG[CT][AG]CC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GYRC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGYRCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["BspTI"] = {
+    "charac": (1, -1, None, None, "CTTAAG"),
+    "compsite": "(?=(?P<BspTI>CTTAAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TTAA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTTAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["BspTNI"] = {
+    "charac": (7, 5, None, None, "GGTCTC"),
+    "compsite": "(?=(?P<BspTNI>GGTCTC))|(?=(?P<BspTNI_as>GAGACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 5,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("X",),
+}
+
+rest_dict["BsrBI"] = {
+    "charac": (3, -3, None, None, "CCGCTC"),
+    "compsite": "(?=(?P<BsrBI>CCGCTC))|(?=(?P<BsrBI_as>GAGCGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsrDI"] = {
+    "charac": (8, 0, None, None, "GCAATG"),
+    "compsite": "(?=(?P<BsrDI>GCAATG))|(?=(?P<BsrDI_as>CATTGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 0,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCAATG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsrFI"] = {
+    "charac": (1, -1, None, None, "RCCGGY"),
+    "compsite": "(?=(?P<BsrFI>[AG]CCGG[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RCCGGY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsrGI"] = {
+    "charac": (1, -1, None, None, "TGTACA"),
+    "compsite": "(?=(?P<BsrGI>TGTACA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GTAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGTACA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsrI"] = {
+    "charac": (6, -1, None, None, "ACTGG"),
+    "compsite": "(?=(?P<BsrI>ACTGG))|(?=(?P<BsrI_as>CCAGT))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "GN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACTGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BssAI"] = {
+    "charac": (1, -1, None, None, "RCCGGY"),
+    "compsite": "(?=(?P<BssAI>[AG]CCGG[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RCCGGY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C",),
+}
+
+rest_dict["BssECI"] = {
+    "charac": (1, -1, None, None, "CCNNGG"),
+    "compsite": "(?=(?P<BssECI>CC..GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CNNG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNNGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BssHII"] = {
+    "charac": (1, -1, None, None, "GCGCGC"),
+    "compsite": "(?=(?P<BssHII>GCGCGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CGCG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGCGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J", "K", "M", "N", "Q", "R", "X"),
+}
+
+rest_dict["BssMI"] = {
+    "charac": (0, 0, None, None, "GATC"),
+    "compsite": "(?=(?P<BssMI>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["BssNAI"] = {
+    "charac": (3, -3, None, None, "GTATAC"),
+    "compsite": "(?=(?P<BssNAI>GTATAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTATAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BssNI"] = {
+    "charac": (2, -2, None, None, "GRCGYC"),
+    "compsite": "(?=(?P<BssNI>G[AG]CG[CT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRCGYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["BssSI"] = {
+    "charac": (1, -1, None, None, "CACGAG"),
+    "compsite": "(?=(?P<BssSI>CACGAG))|(?=(?P<BssSI_as>CTCGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "ACGA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BssT1I"] = {
+    "charac": (1, -1, None, None, "CCWWGG"),
+    "compsite": "(?=(?P<BssT1I>CC[AT][AT]GG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CWWG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWWGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Bst1107I"] = {
+    "charac": (3, -3, None, None, "GTATAC"),
+    "compsite": "(?=(?P<Bst1107I>GTATAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTATAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["Bst2BI"] = {
+    "charac": (1, -1, None, None, "CACGAG"),
+    "compsite": "(?=(?P<Bst2BI>CACGAG))|(?=(?P<Bst2BI_as>CTCGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "ACGA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["Bst2UI"] = {
+    "charac": (2, -2, None, None, "CCWGG"),
+    "compsite": "(?=(?P<Bst2UI>CC[AT]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "W",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Bst4CI"] = {
+    "charac": (3, -3, None, None, "ACNGT"),
+    "compsite": "(?=(?P<Bst4CI>AC.GT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACNGT",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Bst6I"] = {
+    "charac": (7, 4, None, None, "CTCTTC"),
+    "compsite": "(?=(?P<Bst6I>CTCTTC))|(?=(?P<Bst6I_as>GAAGAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 4,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTCTTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstACI"] = {
+    "charac": (2, -2, None, None, "GRCGYC"),
+    "compsite": "(?=(?P<BstACI>G[AG]CG[CT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRCGYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BstAFI"] = {
+    "charac": (1, -1, None, None, "CTTAAG"),
+    "compsite": "(?=(?P<BstAFI>CTTAAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TTAA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTTAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BstAPI"] = {
+    "charac": (7, -7, None, None, "GCANNNNNTGC"),
+    "compsite": "(?=(?P<BstAPI>GCA.....TGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCANNNNNTGC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("I", "N"),
+}
+
+rest_dict["BstAUI"] = {
+    "charac": (1, -1, None, None, "TGTACA"),
+    "compsite": "(?=(?P<BstAUI>TGTACA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GTAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGTACA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstBAI"] = {
+    "charac": (3, -3, None, None, "YACGTR"),
+    "compsite": "(?=(?P<BstBAI>[CT]ACGT[AG]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "YACGTR",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstBI"] = {
+    "charac": (2, -2, None, None, "TTCGAA"),
+    "compsite": "(?=(?P<BstBI>TTCGAA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTCGAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BstC8I"] = {
+    "charac": (3, -3, None, None, "GCNNGC"),
+    "compsite": "(?=(?P<BstC8I>GC..GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNNGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BstDEI"] = {
+    "charac": (1, -1, None, None, "CTNAG"),
+    "compsite": "(?=(?P<BstDEI>CT.AG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTNAG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstDSI"] = {
+    "charac": (1, -1, None, None, "CCRYGG"),
+    "compsite": "(?=(?P<BstDSI>CC[AG][CT]GG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CRYG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCRYGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstEII"] = {
+    "charac": (1, -1, None, None, "GGTNACC"),
+    "compsite": "(?=(?P<BstEII>GGT.ACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "GTNAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTNACC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("C", "J", "N", "R"),
+}
+
+rest_dict["BstENI"] = {
+    "charac": (5, -5, None, None, "CCTNNNNNAGG"),
+    "compsite": "(?=(?P<BstENI>CCT.....AGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTNNNNNAGG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstF5I"] = {
+    "charac": (7, 0, None, None, "GGATG"),
+    "compsite": "(?=(?P<BstF5I>GGATG))|(?=(?P<BstF5I_as>CATCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 0,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGATG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstFNI"] = {
+    "charac": (2, -2, None, None, "CGCG"),
+    "compsite": "(?=(?P<BstFNI>CGCG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstH2I"] = {
+    "charac": (5, -5, None, None, "RGCGCY"),
+    "compsite": "(?=(?P<BstH2I>[AG]GCGC[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "GCGC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGCGCY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstHHI"] = {
+    "charac": (3, -3, None, None, "GCGC"),
+    "compsite": "(?=(?P<BstHHI>GCGC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstKTI"] = {
+    "charac": (3, -3, None, None, "GATC"),
+    "compsite": "(?=(?P<BstKTI>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "AT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BstMAI"] = {
+    "charac": (6, 5, None, None, "GTCTC"),
+    "compsite": "(?=(?P<BstMAI>GTCTC))|(?=(?P<BstMAI_as>GAGAC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTCTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstMBI"] = {
+    "charac": (0, 0, None, None, "GATC"),
+    "compsite": "(?=(?P<BstMBI>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstMCI"] = {
+    "charac": (4, -4, None, None, "CGRYCG"),
+    "compsite": "(?=(?P<BstMCI>CG[AG][CT]CG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "RY",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGRYCG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstMWI"] = {
+    "charac": (7, -7, None, None, "GCNNNNNNNGC"),
+    "compsite": "(?=(?P<BstMWI>GC.......GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNNNNNNNGC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BstNI"] = {
+    "charac": (2, -2, None, None, "CCWGG"),
+    "compsite": "(?=(?P<BstNI>CC[AT]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "W",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BstNSI"] = {
+    "charac": (5, -5, None, None, "RCATGY"),
+    "compsite": "(?=(?P<BstNSI>[AG]CATG[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RCATGY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstPAI"] = {
+    "charac": (5, -5, None, None, "GACNNNNGTC"),
+    "compsite": "(?=(?P<BstPAI>GAC....GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNNGTC",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstPI"] = {
+    "charac": (1, -1, None, None, "GGTNACC"),
+    "compsite": "(?=(?P<BstPI>GGT.ACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "GTNAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTNACC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["BstSCI"] = {
+    "charac": (0, 0, None, None, "CCNGG"),
+    "compsite": "(?=(?P<BstSCI>CC.GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "CCNGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BstSFI"] = {
+    "charac": (1, -1, None, None, "CTRYAG"),
+    "compsite": "(?=(?P<BstSFI>CT[AG][CT]AG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TRYA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTRYAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BstSLI"] = {
+    "charac": (5, -5, None, None, "GKGCMC"),
+    "compsite": "(?=(?P<BstSLI>G[GT]GC[AC]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "KGCM",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GKGCMC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BstSNI"] = {
+    "charac": (3, -3, None, None, "TACGTA"),
+    "compsite": "(?=(?P<BstSNI>TACGTA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TACGTA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstUI"] = {
+    "charac": (2, -2, None, None, "CGCG"),
+    "compsite": "(?=(?P<BstUI>CGCG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BstV1I"] = {
+    "charac": (13, 12, None, None, "GCAGC"),
+    "compsite": "(?=(?P<BstV1I>GCAGC))|(?=(?P<BstV1I_as>GCTGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 12,
+    "fst5": 13,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCAGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BstV2I"] = {
+    "charac": (8, 6, None, None, "GAAGAC"),
+    "compsite": "(?=(?P<BstV2I>GAAGAC))|(?=(?P<BstV2I_as>GTCTTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 6,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAGAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstX2I"] = {
+    "charac": (1, -1, None, None, "RGATCY"),
+    "compsite": "(?=(?P<BstX2I>[AG]GATC[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGATCY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["BstXI"] = {
+    "charac": (8, -8, None, None, "CCANNNNNNTGG"),
+    "compsite": "(?=(?P<BstXI>CCA......TGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -8,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCANNNNNNTGG",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "J", "K", "M", "N", "Q", "R", "V", "X", "Y"),
+}
+
+rest_dict["BstYI"] = {
+    "charac": (1, -1, None, None, "RGATCY"),
+    "compsite": "(?=(?P<BstYI>[AG]GATC[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGATCY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BstZ17I"] = {
+    "charac": (3, -3, None, None, "GTATAC"),
+    "compsite": "(?=(?P<BstZ17I>GTATAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTATAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BstZI"] = {
+    "charac": (1, -1, None, None, "CGGCCG"),
+    "compsite": "(?=(?P<BstZI>CGGCCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGCCG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("R",),
+}
+
+rest_dict["Bsu15I"] = {
+    "charac": (2, -2, None, None, "ATCGAT"),
+    "compsite": "(?=(?P<Bsu15I>ATCGAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATCGAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Bsu36I"] = {
+    "charac": (2, -2, None, None, "CCTNAGG"),
+    "compsite": "(?=(?P<Bsu36I>CCT.AGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTNAGG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BsuI"] = {
+    "charac": (12, 5, None, None, "GTATCC"),
+    "compsite": "(?=(?P<BsuI>GTATCC))|(?=(?P<BsuI_as>GGATAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 5,
+    "fst5": 12,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTATCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BsuRI"] = {
+    "charac": (2, -2, None, None, "GGCC"),
+    "compsite": "(?=(?P<BsuRI>GGCC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "I"),
+}
+
+rest_dict["BsuTUI"] = {
+    "charac": (2, -2, None, None, "ATCGAT"),
+    "compsite": "(?=(?P<BsuTUI>ATCGAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATCGAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("X",),
+}
+
+rest_dict["BtgI"] = {
+    "charac": (1, -1, None, None, "CCRYGG"),
+    "compsite": "(?=(?P<BtgI>CC[AG][CT]GG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CRYG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCRYGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BtgZI"] = {
+    "charac": (16, 14, None, None, "GCGATG"),
+    "compsite": "(?=(?P<BtgZI>GCGATG))|(?=(?P<BtgZI_as>CATCGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 14,
+    "fst5": 16,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGATG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BthCI"] = {
+    "charac": (4, -4, None, None, "GCNGC"),
+    "compsite": "(?=(?P<BthCI>GC.GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "CNG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BtrI"] = {
+    "charac": (3, -3, None, None, "CACGTC"),
+    "compsite": "(?=(?P<BtrI>CACGTC))|(?=(?P<BtrI_as>GACGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["BtsCI"] = {
+    "charac": (7, 0, None, None, "GGATG"),
+    "compsite": "(?=(?P<BtsCI>GGATG))|(?=(?P<BtsCI_as>CATCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 0,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGATG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BtsI"] = {
+    "charac": (8, 0, None, None, "GCAGTG"),
+    "compsite": "(?=(?P<BtsI>GCAGTG))|(?=(?P<BtsI_as>CACTGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 0,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCAGTG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BtsIMutI"] = {
+    "charac": (7, 0, None, None, "CAGTG"),
+    "compsite": "(?=(?P<BtsIMutI>CAGTG))|(?=(?P<BtsIMutI_as>CACTG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 0,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAGTG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["BtuMI"] = {
+    "charac": (3, -3, None, None, "TCGCGA"),
+    "compsite": "(?=(?P<BtuMI>TCGCGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCGCGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["Bve1B23I"] = {
+    "charac": (None, None, None, None, "GACNNNNNTGG"),
+    "compsite": "(?=(?P<Bve1B23I>GAC.....TGG))|(?=(?P<Bve1B23I_as>CCA.....GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNNNTGG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["BveI"] = {
+    "charac": (10, 8, None, None, "ACCTGC"),
+    "compsite": "(?=(?P<BveI>ACCTGC))|(?=(?P<BveI_as>GCAGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 8,
+    "fst5": 10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCTGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Cac8I"] = {
+    "charac": (3, -3, None, None, "GCNNGC"),
+    "compsite": "(?=(?P<Cac8I>GC..GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNNGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["CaiI"] = {
+    "charac": (6, -6, None, None, "CAGNNNCTG"),
+    "compsite": "(?=(?P<CaiI>CAG...CTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAGNNNCTG",
+    "size": 9,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Cal14237I"] = {
+    "charac": (None, None, None, None, "GGTTAG"),
+    "compsite": "(?=(?P<Cal14237I>GGTTAG))|(?=(?P<Cal14237I_as>CTAACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTTAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CalB3II"] = {
+    "charac": (None, None, None, None, "GRTTRAG"),
+    "compsite": "(?=(?P<CalB3II>G[AG]TT[AG]AG))|(?=(?P<CalB3II_as>CT[CT]AA[CT]C))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRTTRAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cau10061II"] = {
+    "charac": (None, None, None, None, "GTTAAT"),
+    "compsite": "(?=(?P<Cau10061II>GTTAAT))|(?=(?P<Cau10061II_as>ATTAAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTTAAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CauII"] = {
+    "charac": (2, -2, None, None, "CCSGG"),
+    "compsite": "(?=(?P<CauII>CC[CG]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "S",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCSGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cba13II"] = {
+    "charac": (None, None, None, None, "AGGAAT"),
+    "compsite": "(?=(?P<Cba13II>AGGAAT))|(?=(?P<Cba13II_as>ATTCCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGGAAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cba16038I"] = {
+    "charac": (None, None, None, None, "CCTNAYNC"),
+    "compsite": "(?=(?P<Cba16038I>CCT.A[CT].C))|(?=(?P<Cba16038I_as>G.[AG]T.AGG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTNAYNC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cbo67071IV"] = {
+    "charac": (None, None, None, None, "GCRGAAG"),
+    "compsite": "(?=(?P<Cbo67071IV>GC[AG]GAAG))|(?=(?P<Cbo67071IV_as>CTTC[CT]GC))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCRGAAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cch467III"] = {
+    "charac": (None, None, None, None, "GNGAAAY"),
+    "compsite": "(?=(?P<Cch467III>G.GAAA[CT]))|(?=(?P<Cch467III_as>[AG]TTTC.C))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GNGAAAY",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CchII"] = {
+    "charac": (17, 9, None, None, "GGARGA"),
+    "compsite": "(?=(?P<CchII>GGA[AG]GA))|(?=(?P<CchII_as>TC[CT]TCC))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": 9,
+    "fst5": 17,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGARGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CchIII"] = {
+    "charac": (26, 18, None, None, "CCCAAG"),
+    "compsite": "(?=(?P<CchIII>CCCAAG))|(?=(?P<CchIII_as>CTTGGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 18,
+    "fst5": 26,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CciI"] = {
+    "charac": (1, -1, None, None, "TCATGA"),
+    "compsite": "(?=(?P<CciI>TCATGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCATGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["CciNI"] = {
+    "charac": (2, -2, None, None, "GCGGCCGC"),
+    "compsite": "(?=(?P<CciNI>GCGGCCGC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGGCCGC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Cco14983V"] = {
+    "charac": (None, None, None, None, "GGGTDA"),
+    "compsite": "(?=(?P<Cco14983V>GGGT[AGT]A))|(?=(?P<Cco14983V_as>T[ACT]ACCC))",
+    "dna": None,
+    "freq": 1365.3333333333333,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGGTDA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cco14983VI"] = {
+    "charac": (None, None, None, None, "GCYGA"),
+    "compsite": "(?=(?P<Cco14983VI>GC[CT]GA))|(?=(?P<Cco14983VI_as>TC[AG]GC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCYGA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CcrNAIII"] = {
+    "charac": (None, None, None, None, "CGACCAG"),
+    "compsite": "(?=(?P<CcrNAIII>CGACCAG))|(?=(?P<CcrNAIII_as>CTGGTCG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGACCAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cdi11397I"] = {
+    "charac": (None, None, None, None, "GCGCAG"),
+    "compsite": "(?=(?P<Cdi11397I>GCGCAG))|(?=(?P<Cdi11397I_as>CTGCGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CdiI"] = {
+    "charac": (4, -1, None, None, "CATCG"),
+    "compsite": "(?=(?P<CdiI>CATCG))|(?=(?P<CdiI_as>CGATG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATCG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CdpI"] = {
+    "charac": (26, 18, None, None, "GCGGAG"),
+    "compsite": "(?=(?P<CdpI>GCGGAG))|(?=(?P<CdpI_as>CTCCGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 18,
+    "fst5": 26,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cdu23823II"] = {
+    "charac": (None, None, None, None, "GTGAAG"),
+    "compsite": "(?=(?P<Cdu23823II>GTGAAG))|(?=(?P<Cdu23823II_as>CTTCAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTGAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CfoI"] = {
+    "charac": (3, -3, None, None, "GCGC"),
+    "compsite": "(?=(?P<CfoI>GCGC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("M", "R", "S"),
+}
+
+rest_dict["Cfr10I"] = {
+    "charac": (1, -1, None, None, "RCCGGY"),
+    "compsite": "(?=(?P<Cfr10I>[AG]CCGG[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RCCGGY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["Cfr13I"] = {
+    "charac": (1, -1, None, None, "GGNCC"),
+    "compsite": "(?=(?P<Cfr13I>GG.CC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GNC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Cfr42I"] = {
+    "charac": (4, -4, None, None, "CCGCGG"),
+    "compsite": "(?=(?P<Cfr42I>CCGCGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "GC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGCGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Cfr9I"] = {
+    "charac": (1, -1, None, None, "CCCGGG"),
+    "compsite": "(?=(?P<Cfr9I>CCCGGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCGGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["CfrI"] = {
+    "charac": (1, -1, None, None, "YGGCCR"),
+    "compsite": "(?=(?P<CfrI>[CT]GGCC[AG]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "YGGCCR",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CfrMH13II"] = {
+    "charac": (None, None, None, None, "AGCANCC"),
+    "compsite": "(?=(?P<CfrMH13II>AGCA.CC))|(?=(?P<CfrMH13II_as>GG.TGCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGCANCC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CfrMH16VI"] = {
+    "charac": (None, None, None, None, "CTAAAG"),
+    "compsite": "(?=(?P<CfrMH16VI>CTAAAG))|(?=(?P<CfrMH16VI_as>CTTTAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTAAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cfupf3II"] = {
+    "charac": (None, None, None, None, "GARCAG"),
+    "compsite": "(?=(?P<Cfupf3II>GA[AG]CAG))|(?=(?P<Cfupf3II_as>CTG[CT]TC))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GARCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cgl13032I"] = {
+    "charac": (None, None, None, None, "GGCGCA"),
+    "compsite": "(?=(?P<Cgl13032I>GGCGCA))|(?=(?P<Cgl13032I_as>TGCGCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cgl13032II"] = {
+    "charac": (None, None, None, None, "ACGABGG"),
+    "compsite": "(?=(?P<Cgl13032II>ACGA[CGT]GG))|(?=(?P<Cgl13032II_as>CC[ACG]TCGT))",
+    "dna": None,
+    "freq": 5461.333333333333,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACGABGG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["ChaI"] = {
+    "charac": (4, -4, None, None, "GATC"),
+    "compsite": "(?=(?P<ChaI>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cje265V"] = {
+    "charac": (None, None, None, None, "GKAAGC"),
+    "compsite": "(?=(?P<Cje265V>G[GT]AAGC))|(?=(?P<Cje265V_as>GCTT[AC]C))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GKAAGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cje54107III"] = {
+    "charac": (None, None, None, None, "GKAAYC"),
+    "compsite": "(?=(?P<Cje54107III>G[GT]AA[CT]C))|(?=(?P<Cje54107III_as>G[AG]TT[AC]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GKAAYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CjeFIII"] = {
+    "charac": (None, None, None, None, "GCAAGG"),
+    "compsite": "(?=(?P<CjeFIII>GCAAGG))|(?=(?P<CjeFIII_as>CCTTGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCAAGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CjeFV"] = {
+    "charac": (None, None, None, None, "GGRCA"),
+    "compsite": "(?=(?P<CjeFV>GG[AG]CA))|(?=(?P<CjeFV_as>TG[CT]CC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGRCA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CjeI"] = {
+    "charac": (-8, -25, 26, 9, "CCANNNNNNGT"),
+    "compsite": "(?=(?P<CjeI>CCA......GT))|(?=(?P<CjeI_as>AC......TGG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -25,
+    "fst5": -8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 6,
+    "ovhgseq": "NNNNNN",
+    "results": None,
+    "scd3": 9,
+    "scd5": 26,
+    "site": "CCANNNNNNGT",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CjeNII"] = {
+    "charac": (None, None, None, None, "GAGNNNNNGT"),
+    "compsite": "(?=(?P<CjeNII>GAG.....GT))|(?=(?P<CjeNII_as>AC.....CTC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGNNNNNGT",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CjeNIII"] = {
+    "charac": (25, 17, None, None, "GKAAYG"),
+    "compsite": "(?=(?P<CjeNIII>G[GT]AA[CT]G))|(?=(?P<CjeNIII_as>C[AG]TT[AC]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 17,
+    "fst5": 25,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GKAAYG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CjeNV"] = {
+    "charac": (None, None, None, None, "CCYGA"),
+    "compsite": "(?=(?P<CjeNV>CC[CT]GA))|(?=(?P<CjeNV_as>TC[AG]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCYGA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CjeP659IV"] = {
+    "charac": (None, None, None, None, "CACNNNNNNNGAA"),
+    "compsite": "(?=(?P<CjeP659IV>CAC.......GAA))|(?=(?P<CjeP659IV_as>TTC.......GTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACNNNNNNNGAA",
+    "size": 13,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CjePI"] = {
+    "charac": (-7, -25, 26, 8, "CCANNNNNNNTC"),
+    "compsite": "(?=(?P<CjePI>CCA.......TC))|(?=(?P<CjePI_as>GA.......TGG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -25,
+    "fst5": -7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 6,
+    "ovhgseq": "NNNNNN",
+    "results": None,
+    "scd3": 8,
+    "scd5": 26,
+    "site": "CCANNNNNNNTC",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CjuI"] = {
+    "charac": (None, None, None, None, "CAYNNNNNRTG"),
+    "compsite": "(?=(?P<CjuI>CA[CT].....[AG]TG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAYNNNNNRTG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CjuII"] = {
+    "charac": (None, None, None, None, "CAYNNNNNCTC"),
+    "compsite": "(?=(?P<CjuII>CA[CT].....CTC))|(?=(?P<CjuII_as>GAG.....[AG]TG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAYNNNNNCTC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cla11845III"] = {
+    "charac": (None, None, None, None, "GCGAA"),
+    "compsite": "(?=(?P<Cla11845III>GCGAA))|(?=(?P<Cla11845III_as>TTCGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGAA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["ClaI"] = {
+    "charac": (2, -2, None, None, "ATCGAT"),
+    "compsite": "(?=(?P<ClaI>ATCGAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATCGAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "K", "M", "N", "Q", "R", "S", "X"),
+}
+
+rest_dict["Cly7489II"] = {
+    "charac": (None, None, None, None, "AAAAGRG"),
+    "compsite": "(?=(?P<Cly7489II>AAAAG[AG]G))|(?=(?P<Cly7489II_as>C[CT]CTTTT))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AAAAGRG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Cma23826I"] = {
+    "charac": (None, None, None, None, "CGGAAG"),
+    "compsite": "(?=(?P<Cma23826I>CGGAAG))|(?=(?P<Cma23826I_as>CTTCCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CpoI"] = {
+    "charac": (2, -2, None, None, "CGGWCCG"),
+    "compsite": "(?=(?P<CpoI>CGG[AT]CCG))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGWCCG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["CseI"] = {
+    "charac": (10, 10, None, None, "GACGC"),
+    "compsite": "(?=(?P<CseI>GACGC))|(?=(?P<CseI_as>GCGTC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 10,
+    "fst5": 10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["CsiI"] = {
+    "charac": (1, -1, None, None, "ACCWGGT"),
+    "compsite": "(?=(?P<CsiI>ACC[AT]GGT))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "CCWGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCWGGT",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Csp2014I"] = {
+    "charac": (None, None, None, None, "GGAGGC"),
+    "compsite": "(?=(?P<Csp2014I>GGAGGC))|(?=(?P<Csp2014I_as>GCCTCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGAGGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Csp6I"] = {
+    "charac": (1, -1, None, None, "GTAC"),
+    "compsite": "(?=(?P<Csp6I>GTAC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTAC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["CspAI"] = {
+    "charac": (1, -1, None, None, "ACCGGT"),
+    "compsite": "(?=(?P<CspAI>ACCGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCGGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C",),
+}
+
+rest_dict["CspCI"] = {
+    "charac": (-11, -25, 24, 10, "CAANNNNNGTGG"),
+    "compsite": "(?=(?P<CspCI>CAA.....GTGG))|(?=(?P<CspCI_as>CCAC.....TTG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": -25,
+    "fst5": -11,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": 10,
+    "scd5": 24,
+    "site": "CAANNNNNGTGG",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["CspI"] = {
+    "charac": (2, -2, None, None, "CGGWCCG"),
+    "compsite": "(?=(?P<CspI>CGG[AT]CCG))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGWCCG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("R",),
+}
+
+rest_dict["CstMI"] = {
+    "charac": (26, 18, None, None, "AAGGAG"),
+    "compsite": "(?=(?P<CstMI>AAGGAG))|(?=(?P<CstMI_as>CTCCTT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 18,
+    "fst5": 26,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AAGGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["CviAII"] = {
+    "charac": (1, -1, None, None, "CATG"),
+    "compsite": "(?=(?P<CviAII>CATG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "AT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["CviJI"] = {
+    "charac": (2, -2, None, None, "RGCY"),
+    "compsite": "(?=(?P<CviJI>[AG]GC[CT]))",
+    "dna": None,
+    "freq": 64.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGCY",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("Q", "X"),
+}
+
+rest_dict["CviKI_1"] = {
+    "charac": (2, -2, None, None, "RGCY"),
+    "compsite": "(?=(?P<CviKI_1>[AG]GC[CT]))",
+    "dna": None,
+    "freq": 64.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGCY",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["CviQI"] = {
+    "charac": (1, -1, None, None, "GTAC"),
+    "compsite": "(?=(?P<CviQI>GTAC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTAC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["CviRI"] = {
+    "charac": (2, -2, None, None, "TGCA"),
+    "compsite": "(?=(?P<CviRI>TGCA))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGCA",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Dde51507I"] = {
+    "charac": (None, None, None, None, "CCWGG"),
+    "compsite": "(?=(?P<Dde51507I>CC[AT]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["DdeI"] = {
+    "charac": (1, -1, None, None, "CTNAG"),
+    "compsite": "(?=(?P<DdeI>CT.AG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTNAG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("K", "M", "N", "O", "Q", "R", "S", "X"),
+}
+
+rest_dict["DinI"] = {
+    "charac": (3, -3, None, None, "GGCGCC"),
+    "compsite": "(?=(?P<DinI>GGCGCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["DpnI"] = {
+    "charac": (2, -2, None, None, "GATC"),
+    "compsite": "(?=(?P<DpnI>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "E", "K", "M", "N", "O", "Q", "R", "S", "X"),
+}
+
+rest_dict["DpnII"] = {
+    "charac": (0, 0, None, None, "GATC"),
+    "compsite": "(?=(?P<DpnII>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["DraI"] = {
+    "charac": (3, -3, None, None, "TTTAAA"),
+    "compsite": "(?=(?P<DraI>TTTAAA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTTAAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "J", "K", "M", "N", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["DraII"] = {
+    "charac": (2, -2, None, None, "RGGNCCY"),
+    "compsite": "(?=(?P<DraII>[AG]GG.CC[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GNC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGGNCCY",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["DraIII"] = {
+    "charac": (6, -6, None, None, "CACNNNGTG"),
+    "compsite": "(?=(?P<DraIII>CAC...GTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACNNNGTG",
+    "size": 9,
+    "substrat": "DNA",
+    "suppl": ("I", "M", "N", "V"),
+}
+
+rest_dict["DraRI"] = {
+    "charac": (27, 18, None, None, "CAAGNAC"),
+    "compsite": "(?=(?P<DraRI>CAAG.AC))|(?=(?P<DraRI_as>GT.CTTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 18,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAAGNAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["DrdI"] = {
+    "charac": (7, -7, None, None, "GACNNNNNNGTC"),
+    "compsite": "(?=(?P<DrdI>GAC......GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNNNNGTC",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["DrdII"] = {
+    "charac": (None, None, None, None, "GAACCA"),
+    "compsite": "(?=(?P<DrdII>GAACCA))|(?=(?P<DrdII_as>TGGTTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAACCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["DrdIV"] = {
+    "charac": (26, 18, None, None, "TACGAC"),
+    "compsite": "(?=(?P<DrdIV>TACGAC))|(?=(?P<DrdIV_as>GTCGTA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 18,
+    "fst5": 26,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TACGAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["DriI"] = {
+    "charac": (6, -6, None, None, "GACNNNNNGTC"),
+    "compsite": "(?=(?P<DriI>GAC.....GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNNNGTC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["DsaI"] = {
+    "charac": (1, -1, None, None, "CCRYGG"),
+    "compsite": "(?=(?P<DsaI>CC[AG][CT]GG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CRYG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCRYGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["DseDI"] = {
+    "charac": (7, -7, None, None, "GACNNNNNNGTC"),
+    "compsite": "(?=(?P<DseDI>GAC......GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNNNNGTC",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["DvuIII"] = {
+    "charac": (None, None, None, None, "CACNCAC"),
+    "compsite": "(?=(?P<DvuIII>CAC.CAC))|(?=(?P<DvuIII_as>GTG.GTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACNCAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["EaeI"] = {
+    "charac": (1, -1, None, None, "YGGCCR"),
+    "compsite": "(?=(?P<EaeI>[CT]GGCC[AG]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "YGGCCR",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("K", "N"),
+}
+
+rest_dict["EagI"] = {
+    "charac": (1, -1, None, None, "CGGCCG"),
+    "compsite": "(?=(?P<EagI>CGGCCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGCCG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Eam1104I"] = {
+    "charac": (7, 4, None, None, "CTCTTC"),
+    "compsite": "(?=(?P<Eam1104I>CTCTTC))|(?=(?P<Eam1104I_as>GAAGAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 4,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTCTTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Eam1105I"] = {
+    "charac": (6, -6, None, None, "GACNNNNNGTC"),
+    "compsite": "(?=(?P<Eam1105I>GAC.....GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNNNGTC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["EarI"] = {
+    "charac": (7, 4, None, None, "CTCTTC"),
+    "compsite": "(?=(?P<EarI>CTCTTC))|(?=(?P<EarI_as>GAAGAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 4,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTCTTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["EciI"] = {
+    "charac": (17, 9, None, None, "GGCGGA"),
+    "compsite": "(?=(?P<EciI>GGCGGA))|(?=(?P<EciI_as>TCCGCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 9,
+    "fst5": 17,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Ecl136II"] = {
+    "charac": (3, -3, None, None, "GAGCTC"),
+    "compsite": "(?=(?P<Ecl136II>GAGCTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Ecl234I"] = {
+    "charac": (None, None, None, None, "CGGNAAG"),
+    "compsite": "(?=(?P<Ecl234I>CGG.AAG))|(?=(?P<Ecl234I_as>CTT.CCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGNAAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Ecl35734I"] = {
+    "charac": (None, None, None, None, "GAAAYTC"),
+    "compsite": "(?=(?P<Ecl35734I>GAAA[CT]TC))|(?=(?P<Ecl35734I_as>GA[AG]TTTC))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAAYTC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["EclXI"] = {
+    "charac": (1, -1, None, None, "CGGCCG"),
+    "compsite": "(?=(?P<EclXI>CGGCCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGCCG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("S",),
+}
+
+rest_dict["Eco105I"] = {
+    "charac": (3, -3, None, None, "TACGTA"),
+    "compsite": "(?=(?P<Eco105I>TACGTA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TACGTA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Eco130I"] = {
+    "charac": (1, -1, None, None, "CCWWGG"),
+    "compsite": "(?=(?P<Eco130I>CC[AT][AT]GG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CWWG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWWGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Eco147I"] = {
+    "charac": (3, -3, None, None, "AGGCCT"),
+    "compsite": "(?=(?P<Eco147I>AGGCCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGGCCT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Eco24I"] = {
+    "charac": (5, -5, None, None, "GRGCYC"),
+    "compsite": "(?=(?P<Eco24I>G[AG]GC[CT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "RGCY",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRGCYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Eco31I"] = {
+    "charac": (7, 5, None, None, "GGTCTC"),
+    "compsite": "(?=(?P<Eco31I>GGTCTC))|(?=(?P<Eco31I_as>GAGACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 5,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Eco32I"] = {
+    "charac": (3, -3, None, None, "GATATC"),
+    "compsite": "(?=(?P<Eco32I>GATATC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATATC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Eco43896II"] = {
+    "charac": (None, None, None, None, "CRARCAG"),
+    "compsite": "(?=(?P<Eco43896II>C[AG]A[AG]CAG))|(?=(?P<Eco43896II_as>CTG[CT]T[CT]G))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CRARCAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Eco4465II"] = {
+    "charac": (None, None, None, None, "GAAABCC"),
+    "compsite": "(?=(?P<Eco4465II>GAAA[CGT]CC))|(?=(?P<Eco4465II_as>GG[ACG]TTTC))",
+    "dna": None,
+    "freq": 5461.333333333333,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAABCC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Eco47I"] = {
+    "charac": (1, -1, None, None, "GGWCC"),
+    "compsite": "(?=(?P<Eco47I>GG[AT]CC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGWCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Eco47III"] = {
+    "charac": (3, -3, None, None, "AGCGCT"),
+    "compsite": "(?=(?P<Eco47III>AGCGCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGCGCT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "M", "R", "S"),
+}
+
+rest_dict["Eco52I"] = {
+    "charac": (1, -1, None, None, "CGGCCG"),
+    "compsite": "(?=(?P<Eco52I>CGGCCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGCCG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["Eco53kI"] = {
+    "charac": (3, -3, None, None, "GAGCTC"),
+    "compsite": "(?=(?P<Eco53kI>GAGCTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Eco57I"] = {
+    "charac": (22, 14, None, None, "CTGAAG"),
+    "compsite": "(?=(?P<Eco57I>CTGAAG))|(?=(?P<Eco57I_as>CTTCAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 14,
+    "fst5": 22,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTGAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Eco57MI"] = {
+    "charac": (22, 14, None, None, "CTGRAG"),
+    "compsite": "(?=(?P<Eco57MI>CTG[AG]AG))|(?=(?P<Eco57MI_as>CT[CT]CAG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": 14,
+    "fst5": 22,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTGRAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Eco72I"] = {
+    "charac": (3, -3, None, None, "CACGTG"),
+    "compsite": "(?=(?P<Eco72I>CACGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGTG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Eco81I"] = {
+    "charac": (2, -2, None, None, "CCTNAGG"),
+    "compsite": "(?=(?P<Eco81I>CCT.AGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTNAGG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["Eco88I"] = {
+    "charac": (1, -1, None, None, "CYCGRG"),
+    "compsite": "(?=(?P<Eco88I>C[CT]CG[AG]G))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "YCGR",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CYCGRG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Eco91I"] = {
+    "charac": (1, -1, None, None, "GGTNACC"),
+    "compsite": "(?=(?P<Eco91I>GGT.ACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "GTNAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTNACC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["EcoBLMcrX"] = {
+    "charac": (2, -2, None, None, "RCSRC"),
+    "compsite": "(?=(?P<EcoBLMcrX>[AG]C[CG][AG]C))|(?=(?P<EcoBLMcrX_as>G[CT][CG]G[CT]))",
+    "dna": None,
+    "freq": 128.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "S",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RCSRC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["EcoE1140I"] = {
+    "charac": (None, None, None, None, "ACCYAC"),
+    "compsite": "(?=(?P<EcoE1140I>ACC[CT]AC))|(?=(?P<EcoE1140I_as>GT[AG]GGT))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCYAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["EcoHI"] = {
+    "charac": (0, 0, None, None, "CCSGG"),
+    "compsite": "(?=(?P<EcoHI>CC[CG]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "CCSGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCSGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["EcoHSI"] = {
+    "charac": (None, None, None, None, "GGTAAG"),
+    "compsite": "(?=(?P<EcoHSI>GGTAAG))|(?=(?P<EcoHSI_as>CTTACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["EcoICRI"] = {
+    "charac": (3, -3, None, None, "GAGCTC"),
+    "compsite": "(?=(?P<EcoICRI>GAGCTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "R", "V"),
+}
+
+rest_dict["EcoMVII"] = {
+    "charac": (None, None, None, None, "CANCATC"),
+    "compsite": "(?=(?P<EcoMVII>CA.CATC))|(?=(?P<EcoMVII_as>GATG.TG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CANCATC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["EcoNI"] = {
+    "charac": (5, -5, None, None, "CCTNNNNNAGG"),
+    "compsite": "(?=(?P<EcoNI>CCT.....AGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTNNNNNAGG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["EcoNIH6II"] = {
+    "charac": (None, None, None, None, "ATGAAG"),
+    "compsite": "(?=(?P<EcoNIH6II>ATGAAG))|(?=(?P<EcoNIH6II_as>CTTCAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATGAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["EcoO109I"] = {
+    "charac": (2, -2, None, None, "RGGNCCY"),
+    "compsite": "(?=(?P<EcoO109I>[AG]GG.CC[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GNC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGGNCCY",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B", "J", "K", "N"),
+}
+
+rest_dict["EcoO65I"] = {
+    "charac": (1, -1, None, None, "GGTNACC"),
+    "compsite": "(?=(?P<EcoO65I>GGT.ACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "GTNAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTNACC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["EcoRI"] = {
+    "charac": (1, -1, None, None, "GAATTC"),
+    "compsite": "(?=(?P<EcoRI>GAATTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "AATT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAATTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X", "Y"),
+}
+
+rest_dict["EcoRII"] = {
+    "charac": (0, 0, None, None, "CCWGG"),
+    "compsite": "(?=(?P<EcoRII>CC[AT]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "CCWGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B", "J"),
+}
+
+rest_dict["EcoRV"] = {
+    "charac": (3, -3, None, None, "GATATC"),
+    "compsite": "(?=(?P<EcoRV>GATATC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATATC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["EcoT14I"] = {
+    "charac": (1, -1, None, None, "CCWWGG"),
+    "compsite": "(?=(?P<EcoT14I>CC[AT][AT]GG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CWWG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWWGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["EcoT22I"] = {
+    "charac": (5, -5, None, None, "ATGCAT"),
+    "compsite": "(?=(?P<EcoT22I>ATGCAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATGCAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["EcoT38I"] = {
+    "charac": (5, -5, None, None, "GRGCYC"),
+    "compsite": "(?=(?P<EcoT38I>G[AG]GC[CT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "RGCY",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRGCYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J",),
+}
+
+rest_dict["EgeI"] = {
+    "charac": (3, -3, None, None, "GGCGCC"),
+    "compsite": "(?=(?P<EgeI>GGCGCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["EheI"] = {
+    "charac": (3, -3, None, None, "GGCGCC"),
+    "compsite": "(?=(?P<EheI>GGCGCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Eli8509II"] = {
+    "charac": (None, None, None, None, "CCGGAG"),
+    "compsite": "(?=(?P<Eli8509II>CCGGAG))|(?=(?P<Eli8509II_as>CTCCGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["ErhI"] = {
+    "charac": (1, -1, None, None, "CCWWGG"),
+    "compsite": "(?=(?P<ErhI>CC[AT][AT]GG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CWWG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWWGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["EsaBC3I"] = {
+    "charac": (2, -2, None, None, "TCGA"),
+    "compsite": "(?=(?P<EsaBC3I>TCGA))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCGA",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["EsaSSI"] = {
+    "charac": (None, None, None, None, "GACCAC"),
+    "compsite": "(?=(?P<EsaSSI>GACCAC))|(?=(?P<EsaSSI_as>GTGGTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACCAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Esp3007I"] = {
+    "charac": (None, None, None, None, "CAGAAG"),
+    "compsite": "(?=(?P<Esp3007I>CAGAAG))|(?=(?P<Esp3007I_as>CTTCTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAGAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Esp3I"] = {
+    "charac": (7, 5, None, None, "CGTCTC"),
+    "compsite": "(?=(?P<Esp3I>CGTCTC))|(?=(?P<Esp3I_as>GAGACG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 5,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGTCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "N"),
+}
+
+rest_dict["EspI"] = {
+    "charac": (2, -2, None, None, "GCTNAGC"),
+    "compsite": "(?=(?P<EspI>GCT.AGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTNAGC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["FaeI"] = {
+    "charac": (4, -4, None, None, "CATG"),
+    "compsite": "(?=(?P<FaeI>CATG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["FaiI"] = {
+    "charac": (2, -2, None, None, "YATR"),
+    "compsite": "(?=(?P<FaiI>[CT]AT[AG]))",
+    "dna": None,
+    "freq": 64.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "YATR",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["FalI"] = {
+    "charac": (-8, -24, 24, 8, "AAGNNNNNCTT"),
+    "compsite": "(?=(?P<FalI>AAG.....CTT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -24,
+    "fst5": -8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 8,
+    "scd5": 24,
+    "site": "AAGNNNNNCTT",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["FaqI"] = {
+    "charac": (15, 14, None, None, "GGGAC"),
+    "compsite": "(?=(?P<FaqI>GGGAC))|(?=(?P<FaqI_as>GTCCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 14,
+    "fst5": 15,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGGAC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["FatI"] = {
+    "charac": (0, 0, None, None, "CATG"),
+    "compsite": "(?=(?P<FatI>CATG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I", "N"),
+}
+
+rest_dict["FauI"] = {
+    "charac": (9, 6, None, None, "CCCGC"),
+    "compsite": "(?=(?P<FauI>CCCGC))|(?=(?P<FauI_as>GCGGG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 6,
+    "fst5": 9,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I", "N"),
+}
+
+rest_dict["FauNDI"] = {
+    "charac": (2, -2, None, None, "CATATG"),
+    "compsite": "(?=(?P<FauNDI>CATATG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATATG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Fba202Z8II"] = {
+    "charac": (None, None, None, None, "AGAAGG"),
+    "compsite": "(?=(?P<Fba202Z8II>AGAAGG))|(?=(?P<Fba202Z8II_as>CCTTCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGAAGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["FbaI"] = {
+    "charac": (1, -1, None, None, "TGATCA"),
+    "compsite": "(?=(?P<FbaI>TGATCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGATCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["FblI"] = {
+    "charac": (2, -2, None, None, "GTMKAC"),
+    "compsite": "(?=(?P<FblI>GT[AC][GT]AC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "MK",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTMKAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Fco1691IV"] = {
+    "charac": (None, None, None, None, "GCVGAG"),
+    "compsite": "(?=(?P<Fco1691IV>GC[ACG]GAG))|(?=(?P<Fco1691IV_as>CTC[CGT]GC))",
+    "dna": None,
+    "freq": 1365.3333333333333,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCVGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["FinI"] = {
+    "charac": (None, None, None, None, "GGGAC"),
+    "compsite": "(?=(?P<FinI>GGGAC))|(?=(?P<FinI_as>GTCCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGGAC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["FmuI"] = {
+    "charac": (4, -4, None, None, "GGNCC"),
+    "compsite": "(?=(?P<FmuI>GG.CC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "GNC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Fnu4HI"] = {
+    "charac": (2, -2, None, None, "GCNGC"),
+    "compsite": "(?=(?P<Fnu4HI>GC.GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["FnuDII"] = {
+    "charac": (2, -2, None, None, "CGCG"),
+    "compsite": "(?=(?P<FnuDII>CGCG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["FokI"] = {
+    "charac": (14, 13, None, None, "GGATG"),
+    "compsite": "(?=(?P<FokI>GGATG))|(?=(?P<FokI_as>CATCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 13,
+    "fst5": 14,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGATG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "J", "K", "M", "N", "V", "X", "Y"),
+}
+
+rest_dict["FriOI"] = {
+    "charac": (5, -5, None, None, "GRGCYC"),
+    "compsite": "(?=(?P<FriOI>G[AG]GC[CT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "RGCY",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRGCYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["FseI"] = {
+    "charac": (6, -6, None, None, "GGCCGGCC"),
+    "compsite": "(?=(?P<FseI>GGCCGGCC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCCGGCC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Fsp4HI"] = {
+    "charac": (2, -2, None, None, "GCNGC"),
+    "compsite": "(?=(?P<Fsp4HI>GC.GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["FspAI"] = {
+    "charac": (4, -4, None, None, "RTGCGCAY"),
+    "compsite": "(?=(?P<FspAI>[AG]TGCGCA[CT]))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RTGCGCAY",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["FspBI"] = {
+    "charac": (1, -1, None, None, "CTAG"),
+    "compsite": "(?=(?P<FspBI>CTAG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTAG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["FspEI"] = {
+    "charac": (14, 16, None, None, "CC"),
+    "compsite": "(?=(?P<FspEI>CC))|(?=(?P<FspEI_as>GG))",
+    "dna": None,
+    "freq": 16.0,
+    "fst3": 16,
+    "fst5": 14,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CC",
+    "size": 2,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["FspI"] = {
+    "charac": (3, -3, None, None, "TGCGCA"),
+    "compsite": "(?=(?P<FspI>TGCGCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGCGCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J", "N"),
+}
+
+rest_dict["FspPK15I"] = {
+    "charac": (None, None, None, None, "GARGAAG"),
+    "compsite": "(?=(?P<FspPK15I>GA[AG]GAAG))|(?=(?P<FspPK15I_as>CTTC[CT]TC))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GARGAAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["FtnUV"] = {
+    "charac": (None, None, None, None, "GAAACA"),
+    "compsite": "(?=(?P<FtnUV>GAAACA))|(?=(?P<FtnUV_as>TGTTTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAACA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["GauT27I"] = {
+    "charac": (None, None, None, None, "CGCGCAGG"),
+    "compsite": "(?=(?P<GauT27I>CGCGCAGG))|(?=(?P<GauT27I_as>CCTGCGCG))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCGCAGG",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Gba708II"] = {
+    "charac": (None, None, None, None, "ATGCAC"),
+    "compsite": "(?=(?P<Gba708II>ATGCAC))|(?=(?P<Gba708II_as>GTGCAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATGCAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["GdiII"] = {
+    "charac": (1, -1, None, None, "CGGCCR"),
+    "compsite": "(?=(?P<GdiII>CGGCC[AG]))|(?=(?P<GdiII_as>[CT]GGCCG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGCCR",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["GlaI"] = {
+    "charac": (2, -2, None, None, "GCGC"),
+    "compsite": "(?=(?P<GlaI>GCGC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["GluI"] = {
+    "charac": (2, -2, None, None, "GCNGC"),
+    "compsite": "(?=(?P<GluI>GC.GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["GsaI"] = {
+    "charac": (5, -5, None, None, "CCCAGC"),
+    "compsite": "(?=(?P<GsaI>CCCAGC))|(?=(?P<GsaI_as>GCTGGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CCAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCAGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["GsuI"] = {
+    "charac": (22, 14, None, None, "CTGGAG"),
+    "compsite": "(?=(?P<GsuI>CTGGAG))|(?=(?P<GsuI_as>CTCCAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 14,
+    "fst5": 22,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTGGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["HaeI"] = {
+    "charac": (3, -3, None, None, "WGGCCW"),
+    "compsite": "(?=(?P<HaeI>[AT]GGCC[AT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "WGGCCW",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HaeII"] = {
+    "charac": (5, -5, None, None, "RGCGCY"),
+    "compsite": "(?=(?P<HaeII>[AG]GCGC[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "GCGC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGCGCY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J", "K", "N", "R"),
+}
+
+rest_dict["HaeIII"] = {
+    "charac": (2, -2, None, None, "GGCC"),
+    "compsite": "(?=(?P<HaeIII>GGCC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "X"),
+}
+
+rest_dict["HapII"] = {
+    "charac": (1, -1, None, None, "CCGG"),
+    "compsite": "(?=(?P<HapII>CCGG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["HauII"] = {
+    "charac": (17, 9, None, None, "TGGCCA"),
+    "compsite": "(?=(?P<HauII>TGGCCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 9,
+    "fst5": 17,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGGCCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HbaII"] = {
+    "charac": (None, None, None, None, "GCCCAG"),
+    "compsite": "(?=(?P<HbaII>GCCCAG))|(?=(?P<HbaII_as>CTGGGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HdeNY26I"] = {
+    "charac": (None, None, None, None, "CGANNNNNNTCC"),
+    "compsite": "(?=(?P<HdeNY26I>CGA......TCC))|(?=(?P<HdeNY26I_as>GGA......TCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGANNNNNNTCC",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HdeZA17I"] = {
+    "charac": (None, None, None, None, "GCANNNNNNTCC"),
+    "compsite": "(?=(?P<HdeZA17I>GCA......TCC))|(?=(?P<HdeZA17I_as>GGA......TGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCANNNNNNTCC",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HgaI"] = {
+    "charac": (10, 10, None, None, "GACGC"),
+    "compsite": "(?=(?P<HgaI>GACGC))|(?=(?P<HgaI_as>GCGTC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 10,
+    "fst5": 10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I", "N"),
+}
+
+rest_dict["HgiAI"] = {
+    "charac": (5, -5, None, None, "GWGCWC"),
+    "compsite": "(?=(?P<HgiAI>G[AT]GC[AT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "WGCW",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GWGCWC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HgiCI"] = {
+    "charac": (1, -1, None, None, "GGYRCC"),
+    "compsite": "(?=(?P<HgiCI>GG[CT][AG]CC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GYRC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGYRCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HgiEII"] = {
+    "charac": (None, None, None, None, "ACCNNNNNNGGT"),
+    "compsite": "(?=(?P<HgiEII>ACC......GGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCNNNNNNGGT",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HgiJII"] = {
+    "charac": (5, -5, None, None, "GRGCYC"),
+    "compsite": "(?=(?P<HgiJII>G[AG]GC[CT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "RGCY",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRGCYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HhaI"] = {
+    "charac": (3, -3, None, None, "GCGC"),
+    "compsite": "(?=(?P<HhaI>GCGC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "J", "K", "N", "Q", "R", "X"),
+}
+
+rest_dict["Hin1I"] = {
+    "charac": (2, -2, None, None, "GRCGYC"),
+    "compsite": "(?=(?P<Hin1I>G[AG]CG[CT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRCGYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["Hin1II"] = {
+    "charac": (4, -4, None, None, "CATG"),
+    "compsite": "(?=(?P<Hin1II>CATG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Hin4I"] = {
+    "charac": (-8, -24, 24, 8, "GAYNNNNNVTC"),
+    "compsite": "(?=(?P<Hin4I>GA[CT].....[ACG]TC))|(?=(?P<Hin4I_as>GA[CGT].....[AG]TC))",
+    "dna": None,
+    "freq": 682.6666666666666,
+    "fst3": -24,
+    "fst5": -8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 8,
+    "scd5": 24,
+    "site": "GAYNNNNNVTC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Hin4II"] = {
+    "charac": (11, 5, None, None, "CCTTC"),
+    "compsite": "(?=(?P<Hin4II>CCTTC))|(?=(?P<Hin4II_as>GAAGG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 11,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Hin6I"] = {
+    "charac": (1, -1, None, None, "GCGC"),
+    "compsite": "(?=(?P<Hin6I>GCGC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["HinP1I"] = {
+    "charac": (1, -1, None, None, "GCGC"),
+    "compsite": "(?=(?P<HinP1I>GCGC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["HincII"] = {
+    "charac": (3, -3, None, None, "GTYRAC"),
+    "compsite": "(?=(?P<HincII>GT[CT][AG]AC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTYRAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "J", "K", "N", "O", "Q", "R", "X"),
+}
+
+rest_dict["HindII"] = {
+    "charac": (3, -3, None, None, "GTYRAC"),
+    "compsite": "(?=(?P<HindII>GT[CT][AG]AC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTYRAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "M", "S", "V"),
+}
+
+rest_dict["HindIII"] = {
+    "charac": (1, -1, None, None, "AAGCTT"),
+    "compsite": "(?=(?P<HindIII>AAGCTT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "AGCT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AAGCTT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X", "Y"),
+}
+
+rest_dict["HinfI"] = {
+    "charac": (1, -1, None, None, "GANTC"),
+    "compsite": "(?=(?P<HinfI>GA.TC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "ANT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GANTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "V", "X", "Y"),
+}
+
+rest_dict["HpaI"] = {
+    "charac": (3, -3, None, None, "GTTAAC"),
+    "compsite": "(?=(?P<HpaI>GTTAAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTTAAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["HpaII"] = {
+    "charac": (1, -1, None, None, "CCGG"),
+    "compsite": "(?=(?P<HpaII>CCGG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "N", "Q", "R", "V", "X"),
+}
+
+rest_dict["HphI"] = {
+    "charac": (13, 7, None, None, "GGTGA"),
+    "compsite": "(?=(?P<HphI>GGTGA))|(?=(?P<HphI_as>TCACC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 7,
+    "fst5": 13,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTGA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B", "N"),
+}
+
+rest_dict["Hpy166II"] = {
+    "charac": (3, -3, None, None, "GTNNAC"),
+    "compsite": "(?=(?P<Hpy166II>GT..AC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTNNAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Hpy178III"] = {
+    "charac": (2, -2, None, None, "TCNNGA"),
+    "compsite": "(?=(?P<Hpy178III>TC..GA))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCNNGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Hpy188I"] = {
+    "charac": (3, -3, None, None, "TCNGA"),
+    "compsite": "(?=(?P<Hpy188I>TC.GA))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCNGA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Hpy188III"] = {
+    "charac": (2, -2, None, None, "TCNNGA"),
+    "compsite": "(?=(?P<Hpy188III>TC..GA))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCNNGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Hpy300XI"] = {
+    "charac": (None, None, None, None, "CCTYNA"),
+    "compsite": "(?=(?P<Hpy300XI>CCT[CT].A))|(?=(?P<Hpy300XI_as>T.[AG]AGG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTYNA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Hpy8I"] = {
+    "charac": (3, -3, None, None, "GTNNAC"),
+    "compsite": "(?=(?P<Hpy8I>GT..AC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTNNAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Hpy99I"] = {
+    "charac": (5, -5, None, None, "CGWCG"),
+    "compsite": "(?=(?P<Hpy99I>CG[AT]CG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "CGWCG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGWCG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Hpy99XIII"] = {
+    "charac": (None, None, None, None, "GCCTA"),
+    "compsite": "(?=(?P<Hpy99XIII>GCCTA))|(?=(?P<Hpy99XIII_as>TAGGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCTA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Hpy99XIV"] = {
+    "charac": (None, None, None, None, "GGWTAA"),
+    "compsite": "(?=(?P<Hpy99XIV>GG[AT]TAA))|(?=(?P<Hpy99XIV_as>TTA[AT]CC))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGWTAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Hpy99XIV_mut1"] = {
+    "charac": (None, None, None, None, "GGWCNA"),
+    "compsite": "(?=(?P<Hpy99XIV_mut1>GG[AT]C.A))|(?=(?P<Hpy99XIV_mut1_as>T.G[AT]CC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGWCNA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Hpy99XXII"] = {
+    "charac": (None, None, None, None, "TCANNNNNNTRG"),
+    "compsite": "(?=(?P<Hpy99XXII>TCA......T[AG]G))|(?=(?P<Hpy99XXII_as>C[CT]A......TGA))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCANNNNNNTRG",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HpyAV"] = {
+    "charac": (11, 5, None, None, "CCTTC"),
+    "compsite": "(?=(?P<HpyAV>CCTTC))|(?=(?P<HpyAV_as>GAAGG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 11,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["HpyAXIV"] = {
+    "charac": (None, None, None, None, "GCGTA"),
+    "compsite": "(?=(?P<HpyAXIV>GCGTA))|(?=(?P<HpyAXIV_as>TACGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGTA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HpyAXVIII"] = {
+    "charac": (None, None, None, None, "GGANNAG"),
+    "compsite": "(?=(?P<HpyAXVIII>GGA..AG))|(?=(?P<HpyAXVIII_as>CT..TCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGANNAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HpyAXVI_mut1"] = {
+    "charac": (None, None, None, None, "CRTTAA"),
+    "compsite": "(?=(?P<HpyAXVI_mut1>C[AG]TTAA))|(?=(?P<HpyAXVI_mut1_as>TTAA[CT]G))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CRTTAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HpyAXVI_mut2"] = {
+    "charac": (None, None, None, None, "CRTCNA"),
+    "compsite": "(?=(?P<HpyAXVI_mut2>C[AG]TC.A))|(?=(?P<HpyAXVI_mut2_as>T.GA[CT]G))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CRTCNA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HpyCH4III"] = {
+    "charac": (3, -3, None, None, "ACNGT"),
+    "compsite": "(?=(?P<HpyCH4III>AC.GT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACNGT",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["HpyCH4IV"] = {
+    "charac": (1, -1, None, None, "ACGT"),
+    "compsite": "(?=(?P<HpyCH4IV>ACGT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACGT",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["HpyCH4V"] = {
+    "charac": (2, -2, None, None, "TGCA"),
+    "compsite": "(?=(?P<HpyCH4V>TGCA))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGCA",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["HpyF10VI"] = {
+    "charac": (7, -7, None, None, "GCNNNNNNNGC"),
+    "compsite": "(?=(?P<HpyF10VI>GC.......GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNNNNNNNGC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["HpyF3I"] = {
+    "charac": (1, -1, None, None, "CTNAG"),
+    "compsite": "(?=(?P<HpyF3I>CT.AG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTNAG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["HpySE526I"] = {
+    "charac": (1, -1, None, None, "ACGT"),
+    "compsite": "(?=(?P<HpySE526I>ACGT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACGT",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["HpyUM032XIII"] = {
+    "charac": (None, None, None, None, "CYANNNNNNNTRG"),
+    "compsite": "(?=(?P<HpyUM032XIII>C[CT]A.......T[AG]G))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CYANNNNNNNTRG",
+    "size": 13,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HpyUM032XIII_mut1"] = {
+    "charac": (None, None, None, None, "CYANNNNNNNTTC"),
+    "compsite": "(?=(?P<HpyUM032XIII_mut1>C[CT]A.......TTC))|(?=(?P<HpyUM032XIII_mut1_as>GAA.......T[AG]G))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CYANNNNNNNTTC",
+    "size": 13,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HpyUM032XIV"] = {
+    "charac": (None, None, None, None, "GAAAG"),
+    "compsite": "(?=(?P<HpyUM032XIV>GAAAG))|(?=(?P<HpyUM032XIV_as>CTTTC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAAG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["HpyUM037X"] = {
+    "charac": (None, None, None, None, "TNGGNAG|GTGGNAG"),
+    "compsite": "(?=(?P<HpyUM037X>T.GG.AG))|(?=(?P<HpyUM037X_as>CT.CC.A))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TNGGNAG|GTGGNAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Hso63250IV"] = {
+    "charac": (None, None, None, None, "AACNNNNNGTT"),
+    "compsite": "(?=(?P<Hso63250IV>AAC.....GTT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AACNNNNNGTT",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Hso63373III"] = {
+    "charac": (None, None, None, None, "CGANNNNNRTAY"),
+    "compsite": "(?=(?P<Hso63373III>CGA.....[AG]TA[CT]))|(?=(?P<Hso63373III_as>[AG]TA[CT].....TCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGANNNNNRTAY",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Hsp92I"] = {
+    "charac": (2, -2, None, None, "GRCGYC"),
+    "compsite": "(?=(?P<Hsp92I>G[AG]CG[CT]C))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRCGYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("R",),
+}
+
+rest_dict["Hsp92II"] = {
+    "charac": (4, -4, None, None, "CATG"),
+    "compsite": "(?=(?P<Hsp92II>CATG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("R",),
+}
+
+rest_dict["HspAI"] = {
+    "charac": (1, -1, None, None, "GCGC"),
+    "compsite": "(?=(?P<HspAI>GCGC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["HspMHR1II"] = {
+    "charac": (None, None, None, None, "GAGCAGC"),
+    "compsite": "(?=(?P<HspMHR1II>GAGCAGC))|(?=(?P<HspMHR1II_as>GCTGCTC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGCAGC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Jma19592I"] = {
+    "charac": (None, None, None, None, "GTATNAC"),
+    "compsite": "(?=(?P<Jma19592I>GTAT.AC))|(?=(?P<Jma19592I_as>GT.ATAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTATNAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Jma19592II"] = {
+    "charac": (None, None, None, None, "GRGCRAC"),
+    "compsite": "(?=(?P<Jma19592II>G[AG]GC[AG]AC))|(?=(?P<Jma19592II_as>GT[CT]GC[CT]C))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRGCRAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Jsp2502II"] = {
+    "charac": (None, None, None, None, "GRNGAAT"),
+    "compsite": "(?=(?P<Jsp2502II>G[AG].GAAT))|(?=(?P<Jsp2502II_as>ATTC.[CT]C))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRNGAAT",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["KasI"] = {
+    "charac": (1, -1, None, None, "GGCGCC"),
+    "compsite": "(?=(?P<KasI>GGCGCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GCGC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["KflI"] = {
+    "charac": (2, -2, None, None, "GGGWCCC"),
+    "compsite": "(?=(?P<KflI>GGG[AT]CCC))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGGWCCC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Kor51II"] = {
+    "charac": (None, None, None, None, "RTCGAG"),
+    "compsite": "(?=(?P<Kor51II>[AG]TCGAG))|(?=(?P<Kor51II_as>CTCGA[CT]))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RTCGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Kpn156V"] = {
+    "charac": (None, None, None, None, "CRTGATT"),
+    "compsite": "(?=(?P<Kpn156V>C[AG]TGATT))|(?=(?P<Kpn156V_as>AATCA[CT]G))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CRTGATT",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Kpn2I"] = {
+    "charac": (1, -1, None, None, "TCCGGA"),
+    "compsite": "(?=(?P<Kpn2I>TCCGGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCCGGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Kpn327I"] = {
+    "charac": (None, None, None, None, "GACATC"),
+    "compsite": "(?=(?P<Kpn327I>GACATC))|(?=(?P<Kpn327I_as>GATGTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACATC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["KpnI"] = {
+    "charac": (5, -5, None, None, "GGTACC"),
+    "compsite": "(?=(?P<KpnI>GGTACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "GTAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTACC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X", "Y"),
+}
+
+rest_dict["KpnNH25III"] = {
+    "charac": (None, None, None, None, "CTRGAG"),
+    "compsite": "(?=(?P<KpnNH25III>CT[AG]GAG))|(?=(?P<KpnNH25III_as>CTC[CT]AG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTRGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["KpnNIH30III"] = {
+    "charac": (None, None, None, None, "GTTCNAC"),
+    "compsite": "(?=(?P<KpnNIH30III>GTTC.AC))|(?=(?P<KpnNIH30III_as>GT.GAAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTTCNAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["KpnNIH50I"] = {
+    "charac": (None, None, None, None, "GCYAAG"),
+    "compsite": "(?=(?P<KpnNIH50I>GC[CT]AAG))|(?=(?P<KpnNIH50I_as>CTT[AG]GC))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCYAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["KroI"] = {
+    "charac": (1, -1, None, None, "GCCGGC"),
+    "compsite": "(?=(?P<KroI>GCCGGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCGGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["Ksp22I"] = {
+    "charac": (1, -1, None, None, "TGATCA"),
+    "compsite": "(?=(?P<Ksp22I>TGATCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGATCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Ksp632I"] = {
+    "charac": (7, 4, None, None, "CTCTTC"),
+    "compsite": "(?=(?P<Ksp632I>CTCTTC))|(?=(?P<Ksp632I_as>GAAGAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 4,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTCTTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["KspAI"] = {
+    "charac": (3, -3, None, None, "GTTAAC"),
+    "compsite": "(?=(?P<KspAI>GTTAAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTTAAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["KspI"] = {
+    "charac": (4, -4, None, None, "CCGCGG"),
+    "compsite": "(?=(?P<KspI>CCGCGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "GC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGCGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("M", "S"),
+}
+
+rest_dict["Kzo9I"] = {
+    "charac": (0, 0, None, None, "GATC"),
+    "compsite": "(?=(?P<Kzo9I>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["Lba2029III"] = {
+    "charac": (None, None, None, None, "CYAAANG"),
+    "compsite": "(?=(?P<Lba2029III>C[CT]AAA.G))|(?=(?P<Lba2029III_as>C.TTT[AG]G))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CYAAANG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Lbr124II"] = {
+    "charac": (None, None, None, None, "CATCNAC"),
+    "compsite": "(?=(?P<Lbr124II>CATC.AC))|(?=(?P<Lbr124II_as>GT.GATG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATCNAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Lde4408II"] = {
+    "charac": (None, None, None, None, "ACAAAG"),
+    "compsite": "(?=(?P<Lde4408II>ACAAAG))|(?=(?P<Lde4408II_as>CTTTGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACAAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["LguI"] = {
+    "charac": (8, 4, None, None, "GCTCTTC"),
+    "compsite": "(?=(?P<LguI>GCTCTTC))|(?=(?P<LguI_as>GAAGAGC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": 4,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTCTTC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["LlaG50I"] = {
+    "charac": (None, None, None, None, "CCGTKA"),
+    "compsite": "(?=(?P<LlaG50I>CCGT[GT]A))|(?=(?P<LlaG50I_as>T[AC]ACGG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGTKA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["LmnI"] = {
+    "charac": (6, -1, None, None, "GCTCC"),
+    "compsite": "(?=(?P<LmnI>GCTCC))|(?=(?P<LmnI_as>GGAGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "CN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["Lmo370I"] = {
+    "charac": (None, None, None, None, "AGCGCCG"),
+    "compsite": "(?=(?P<Lmo370I>AGCGCCG))|(?=(?P<Lmo370I_as>CGGCGCT))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGCGCCG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Lmo911II"] = {
+    "charac": (None, None, None, None, "TAGRAG"),
+    "compsite": "(?=(?P<Lmo911II>TAG[AG]AG))|(?=(?P<Lmo911II_as>CT[CT]CTA))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TAGRAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Lpl1004II"] = {
+    "charac": (None, None, None, None, "AGGRAG"),
+    "compsite": "(?=(?P<Lpl1004II>AGG[AG]AG))|(?=(?P<Lpl1004II_as>CT[CT]CCT))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGGRAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["LpnI"] = {
+    "charac": (3, -3, None, None, "RGCGCY"),
+    "compsite": "(?=(?P<LpnI>[AG]GCGC[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGCGCY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["LpnPI"] = {
+    "charac": (14, 14, None, None, "CCDG"),
+    "compsite": "(?=(?P<LpnPI>CC[AGT]G))|(?=(?P<LpnPI_as>C[ACT]GG))",
+    "dna": None,
+    "freq": 85.33333333333333,
+    "fst3": 14,
+    "fst5": 14,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCDG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Lra68I"] = {
+    "charac": (None, None, None, None, "GTTCNAG"),
+    "compsite": "(?=(?P<Lra68I>GTTC.AG))|(?=(?P<Lra68I_as>CT.GAAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTTCNAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["LsaDS4I"] = {
+    "charac": (None, None, None, None, "TGGAAT"),
+    "compsite": "(?=(?P<LsaDS4I>TGGAAT))|(?=(?P<LsaDS4I_as>ATTCCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGGAAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Lsp1109I"] = {
+    "charac": (13, 12, None, None, "GCAGC"),
+    "compsite": "(?=(?P<Lsp1109I>GCAGC))|(?=(?P<Lsp1109I_as>GCTGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 12,
+    "fst5": 13,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCAGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Lsp48III"] = {
+    "charac": (None, None, None, None, "AGCACC"),
+    "compsite": "(?=(?P<Lsp48III>AGCACC))|(?=(?P<Lsp48III_as>GGTGCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGCACC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Lsp6406VI"] = {
+    "charac": (None, None, None, None, "CRAGCAC"),
+    "compsite": "(?=(?P<Lsp6406VI>C[AG]AGCAC))|(?=(?P<Lsp6406VI_as>GTGCT[CT]G))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CRAGCAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["LweI"] = {
+    "charac": (10, 9, None, None, "GCATC"),
+    "compsite": "(?=(?P<LweI>GCATC))|(?=(?P<LweI_as>GATGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 9,
+    "fst5": 10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCATC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["MabI"] = {
+    "charac": (1, -1, None, None, "ACCWGGT"),
+    "compsite": "(?=(?P<MabI>ACC[AT]GGT))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "CCWGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCWGGT",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["MaeI"] = {
+    "charac": (1, -1, None, None, "CTAG"),
+    "compsite": "(?=(?P<MaeI>CTAG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTAG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("M",),
+}
+
+rest_dict["MaeII"] = {
+    "charac": (1, -1, None, None, "ACGT"),
+    "compsite": "(?=(?P<MaeII>ACGT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACGT",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("M",),
+}
+
+rest_dict["MaeIII"] = {
+    "charac": (0, 0, None, None, "GTNAC"),
+    "compsite": "(?=(?P<MaeIII>GT.AC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "GTNAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTNAC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("M", "S"),
+}
+
+rest_dict["MalI"] = {
+    "charac": (2, -2, None, None, "GATC"),
+    "compsite": "(?=(?P<MalI>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["MaqI"] = {
+    "charac": (28, 19, None, None, "CRTTGAC"),
+    "compsite": "(?=(?P<MaqI>C[AG]TTGAC))|(?=(?P<MaqI_as>GTCAA[CT]G))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": 19,
+    "fst5": 28,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CRTTGAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["MauBI"] = {
+    "charac": (2, -2, None, None, "CGCGCGCG"),
+    "compsite": "(?=(?P<MauBI>CGCGCGCG))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CGCG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCGCGCG",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Mba11I"] = {
+    "charac": (None, None, None, None, "AGGCGA"),
+    "compsite": "(?=(?P<Mba11I>AGGCGA))|(?=(?P<Mba11I_as>TCGCCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGGCGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["MbiI"] = {
+    "charac": (3, -3, None, None, "CCGCTC"),
+    "compsite": "(?=(?P<MbiI>CCGCTC))|(?=(?P<MbiI_as>GAGCGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["MboI"] = {
+    "charac": (0, 0, None, None, "GATC"),
+    "compsite": "(?=(?P<MboI>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "K", "N", "Q", "R", "X", "Y"),
+}
+
+rest_dict["MboII"] = {
+    "charac": (13, 7, None, None, "GAAGA"),
+    "compsite": "(?=(?P<MboII>GAAGA))|(?=(?P<MboII_as>TCTTC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 7,
+    "fst5": 13,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAGA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "J", "K", "N", "Q", "R", "V", "X"),
+}
+
+rest_dict["McaTI"] = {
+    "charac": (4, -4, None, None, "GCGCGC"),
+    "compsite": "(?=(?P<McaTI>GCGCGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "GC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGCGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Mcr10I"] = {
+    "charac": (None, None, None, None, "GAAGNNNNNCTC"),
+    "compsite": "(?=(?P<Mcr10I>GAAG.....CTC))|(?=(?P<Mcr10I_as>GAG.....CTTC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAGNNNNNCTC",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["McrI"] = {
+    "charac": (4, -4, None, None, "CGRYCG"),
+    "compsite": "(?=(?P<McrI>CG[AG][CT]CG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "RY",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGRYCG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["MfeI"] = {
+    "charac": (1, -1, None, None, "CAATTG"),
+    "compsite": "(?=(?P<MfeI>CAATTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "AATT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAATTG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "N"),
+}
+
+rest_dict["MflI"] = {
+    "charac": (1, -1, None, None, "RGATCY"),
+    "compsite": "(?=(?P<MflI>[AG]GATC[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGATCY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["MhlI"] = {
+    "charac": (5, -5, None, None, "GDGCHC"),
+    "compsite": "(?=(?P<MhlI>G[AGT]GC[ACT]C))",
+    "dna": None,
+    "freq": 455.1111111111111,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "DGCH",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GDGCHC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["MjaIV"] = {
+    "charac": (None, None, None, None, "GTNNAC"),
+    "compsite": "(?=(?P<MjaIV>GT..AC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTNNAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["MkaDII"] = {
+    "charac": (None, None, None, None, "GAGAYGT"),
+    "compsite": "(?=(?P<MkaDII>GAGA[CT]GT))|(?=(?P<MkaDII_as>AC[AG]TCTC))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGAYGT",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["MlsI"] = {
+    "charac": (3, -3, None, None, "TGGCCA"),
+    "compsite": "(?=(?P<MlsI>TGGCCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGGCCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Mlu211III"] = {
+    "charac": (None, None, None, None, "AGCCCA"),
+    "compsite": "(?=(?P<Mlu211III>AGCCCA))|(?=(?P<Mlu211III_as>TGGGCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGCCCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["MluCI"] = {
+    "charac": (0, 0, None, None, "AATT"),
+    "compsite": "(?=(?P<MluCI>AATT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "AATT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AATT",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["MluI"] = {
+    "charac": (1, -1, None, None, "ACGCGT"),
+    "compsite": "(?=(?P<MluI>ACGCGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CGCG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACGCGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["MluNI"] = {
+    "charac": (3, -3, None, None, "TGGCCA"),
+    "compsite": "(?=(?P<MluNI>TGGCCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGGCCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("M",),
+}
+
+rest_dict["Mly113I"] = {
+    "charac": (2, -2, None, None, "GGCGCC"),
+    "compsite": "(?=(?P<Mly113I>GGCGCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["MlyI"] = {
+    "charac": (10, 5, None, None, "GAGTC"),
+    "compsite": "(?=(?P<MlyI>GAGTC))|(?=(?P<MlyI_as>GACTC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["MmeI"] = {
+    "charac": (26, 18, None, None, "TCCRAC"),
+    "compsite": "(?=(?P<MmeI>TCC[AG]AC))|(?=(?P<MmeI_as>GT[CT]GGA))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": 18,
+    "fst5": 26,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCCRAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N", "X"),
+}
+
+rest_dict["MnlI"] = {
+    "charac": (11, 6, None, None, "CCTC"),
+    "compsite": "(?=(?P<MnlI>CCTC))|(?=(?P<MnlI_as>GAGG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 6,
+    "fst5": 11,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "N", "Q", "V", "X"),
+}
+
+rest_dict["Mox20I"] = {
+    "charac": (3, -3, None, None, "TGGCCA"),
+    "compsite": "(?=(?P<Mox20I>TGGCCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGGCCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["Mph1103I"] = {
+    "charac": (5, -5, None, None, "ATGCAT"),
+    "compsite": "(?=(?P<Mph1103I>ATGCAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATGCAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["MreI"] = {
+    "charac": (2, -2, None, None, "CGCCGGCG"),
+    "compsite": "(?=(?P<MreI>CGCCGGCG))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCCGGCG",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["MroI"] = {
+    "charac": (1, -1, None, None, "TCCGGA"),
+    "compsite": "(?=(?P<MroI>TCCGGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCCGGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("M", "O"),
+}
+
+rest_dict["MroNI"] = {
+    "charac": (1, -1, None, None, "GCCGGC"),
+    "compsite": "(?=(?P<MroNI>GCCGGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCGGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["MroXI"] = {
+    "charac": (5, -5, None, None, "GAANNNNTTC"),
+    "compsite": "(?=(?P<MroXI>GAA....TTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAANNNNTTC",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["MscI"] = {
+    "charac": (3, -3, None, None, "TGGCCA"),
+    "compsite": "(?=(?P<MscI>TGGCCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGGCCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N", "O"),
+}
+
+rest_dict["MseI"] = {
+    "charac": (1, -1, None, None, "TTAA"),
+    "compsite": "(?=(?P<MseI>TTAA))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTAA",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["MslI"] = {
+    "charac": (5, -5, None, None, "CAYNNNNRTG"),
+    "compsite": "(?=(?P<MslI>CA[CT]....[AG]TG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAYNNNNRTG",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Msp20I"] = {
+    "charac": (3, -3, None, None, "TGGCCA"),
+    "compsite": "(?=(?P<Msp20I>TGGCCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGGCCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["MspA1I"] = {
+    "charac": (3, -3, None, None, "CMGCKG"),
+    "compsite": "(?=(?P<MspA1I>C[AC]GC[GT]G))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CMGCKG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "N", "R", "V"),
+}
+
+rest_dict["MspCI"] = {
+    "charac": (1, -1, None, None, "CTTAAG"),
+    "compsite": "(?=(?P<MspCI>CTTAAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TTAA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTTAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C",),
+}
+
+rest_dict["MspGI"] = {
+    "charac": (5, -5, None, None, "GCCGGC"),
+    "compsite": "(?=(?P<MspGI>GCCGGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCGGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["MspI"] = {
+    "charac": (1, -1, None, None, "CCGG"),
+    "compsite": "(?=(?P<MspI>CCGG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "J", "K", "N", "Q", "R", "V", "X"),
+}
+
+rest_dict["MspI7II"] = {
+    "charac": (None, None, None, None, "ACGRAG"),
+    "compsite": "(?=(?P<MspI7II>ACG[AG]AG))|(?=(?P<MspI7II_as>CT[CT]CGT))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACGRAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["MspI7IV"] = {
+    "charac": (None, None, None, None, "GCMGAAG"),
+    "compsite": "(?=(?P<MspI7IV>GC[AC]GAAG))|(?=(?P<MspI7IV_as>CTTC[GT]GC))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCMGAAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["MspJI"] = {
+    "charac": (13, 13, None, None, "CNNR"),
+    "compsite": "(?=(?P<MspJI>C..[AG]))|(?=(?P<MspJI_as>[CT]..G))",
+    "dna": None,
+    "freq": 8.0,
+    "fst3": 13,
+    "fst5": 13,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CNNR",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["MspR9I"] = {
+    "charac": (2, -2, None, None, "CCNGG"),
+    "compsite": "(?=(?P<MspR9I>CC.GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["MspSC27II"] = {
+    "charac": (None, None, None, None, "CCGCGAC"),
+    "compsite": "(?=(?P<MspSC27II>CCGCGAC))|(?=(?P<MspSC27II_as>GTCGCGG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGCGAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["MssI"] = {
+    "charac": (4, -4, None, None, "GTTTAAAC"),
+    "compsite": "(?=(?P<MssI>GTTTAAAC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTTTAAAC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["MstI"] = {
+    "charac": (3, -3, None, None, "TGCGCA"),
+    "compsite": "(?=(?P<MstI>TGCGCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGCGCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["MteI"] = {
+    "charac": (4, -4, None, None, "GCGCNGCGC"),
+    "compsite": "(?=(?P<MteI>GCGC.GCGC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGCNGCGC",
+    "size": 9,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["MtuHN878II"] = {
+    "charac": (None, None, None, None, "CACGCAG"),
+    "compsite": "(?=(?P<MtuHN878II>CACGCAG))|(?=(?P<MtuHN878II_as>CTGCGTG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGCAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["MunI"] = {
+    "charac": (1, -1, None, None, "CAATTG"),
+    "compsite": "(?=(?P<MunI>CAATTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "AATT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAATTG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "K", "M", "S"),
+}
+
+rest_dict["Mva1269I"] = {
+    "charac": (7, -1, None, None, "GAATGC"),
+    "compsite": "(?=(?P<Mva1269I>GAATGC))|(?=(?P<Mva1269I_as>GCATTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "CN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAATGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["MvaI"] = {
+    "charac": (2, -2, None, None, "CCWGG"),
+    "compsite": "(?=(?P<MvaI>CC[AT]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "W",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B", "M"),
+}
+
+rest_dict["MvnI"] = {
+    "charac": (2, -2, None, None, "CGCG"),
+    "compsite": "(?=(?P<MvnI>CGCG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("M",),
+}
+
+rest_dict["MwoI"] = {
+    "charac": (7, -7, None, None, "GCNNNNNNNGC"),
+    "compsite": "(?=(?P<MwoI>GC.......GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNNNNNNNGC",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["NaeI"] = {
+    "charac": (3, -3, None, None, "GCCGGC"),
+    "compsite": "(?=(?P<NaeI>GCCGGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCGGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C", "K", "N"),
+}
+
+rest_dict["Nal45188II"] = {
+    "charac": (None, None, None, None, "ACCAGC"),
+    "compsite": "(?=(?P<Nal45188II>ACCAGC))|(?=(?P<Nal45188II_as>GCTGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCAGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["NarI"] = {
+    "charac": (2, -2, None, None, "GGCGCC"),
+    "compsite": "(?=(?P<NarI>GGCGCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J", "M", "N", "Q", "R", "X"),
+}
+
+rest_dict["Nbr128II"] = {
+    "charac": (None, None, None, None, "ACCGAC"),
+    "compsite": "(?=(?P<Nbr128II>ACCGAC))|(?=(?P<Nbr128II_as>GTCGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCGAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["NciI"] = {
+    "charac": (2, -2, None, None, "CCSGG"),
+    "compsite": "(?=(?P<NciI>CC[CG]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "S",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCSGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("J", "N", "R"),
+}
+
+rest_dict["NcoI"] = {
+    "charac": (1, -1, None, None, "CCATGG"),
+    "compsite": "(?=(?P<NcoI>CCATGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCATGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "J", "K", "M", "N", "O", "Q", "R", "S", "X", "Y"),
+}
+
+rest_dict["NdeI"] = {
+    "charac": (2, -2, None, None, "CATATG"),
+    "compsite": "(?=(?P<NdeI>CATATG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATATG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "J", "K", "M", "N", "Q", "R", "S", "X"),
+}
+
+rest_dict["NdeII"] = {
+    "charac": (0, 0, None, None, "GATC"),
+    "compsite": "(?=(?P<NdeII>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("J", "M"),
+}
+
+rest_dict["NgoAVII"] = {
+    "charac": (12, 7, None, None, "GCCGC"),
+    "compsite": "(?=(?P<NgoAVII>GCCGC))|(?=(?P<NgoAVII_as>GCGGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 7,
+    "fst5": 12,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["NgoAVIII"] = {
+    "charac": (-12, -25, 24, 11, "GACNNNNNTGA"),
+    "compsite": "(?=(?P<NgoAVIII>GAC.....TGA))|(?=(?P<NgoAVIII_as>TCA.....GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -25,
+    "fst5": -12,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": 11,
+    "scd5": 24,
+    "site": "GACNNNNNTGA",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["NgoMIV"] = {
+    "charac": (1, -1, None, None, "GCCGGC"),
+    "compsite": "(?=(?P<NgoMIV>GCCGGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCGGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["NhaXI"] = {
+    "charac": (None, None, None, None, "CAAGRAG"),
+    "compsite": "(?=(?P<NhaXI>CAAG[AG]AG))|(?=(?P<NhaXI_as>CT[CT]CTTG))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAAGRAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["NheI"] = {
+    "charac": (1, -1, None, None, "GCTAGC"),
+    "compsite": "(?=(?P<NheI>GCTAGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CTAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTAGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "J", "K", "M", "N", "O", "Q", "R", "S", "X"),
+}
+
+rest_dict["NhoI"] = {
+    "charac": (None, None, None, None, "GCWGC"),
+    "compsite": "(?=(?P<NhoI>GC[AT]GC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCWGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["NlaCI"] = {
+    "charac": (25, 17, None, None, "CATCAC"),
+    "compsite": "(?=(?P<NlaCI>CATCAC))|(?=(?P<NlaCI_as>GTGATG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 17,
+    "fst5": 25,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATCAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["NlaIII"] = {
+    "charac": (4, -4, None, None, "CATG"),
+    "compsite": "(?=(?P<NlaIII>CATG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["NlaIV"] = {
+    "charac": (3, -3, None, None, "GGNNCC"),
+    "compsite": "(?=(?P<NlaIV>GG..CC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNNCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Nli3877I"] = {
+    "charac": (5, -5, None, None, "CYCGRG"),
+    "compsite": "(?=(?P<Nli3877I>C[CT]CG[AG]G))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "YCGR",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CYCGRG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["NmeA6CIII"] = {
+    "charac": (27, 19, None, None, "GCCGAC"),
+    "compsite": "(?=(?P<NmeA6CIII>GCCGAC))|(?=(?P<NmeA6CIII_as>GTCGGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 19,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCGAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["NmeAIII"] = {
+    "charac": (27, 19, None, None, "GCCGAG"),
+    "compsite": "(?=(?P<NmeAIII>GCCGAG))|(?=(?P<NmeAIII_as>CTCGGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 19,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["NmeDI"] = {
+    "charac": (-12, -13, 13, 12, "RCCGGY"),
+    "compsite": "(?=(?P<NmeDI>[AG]CCGG[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -13,
+    "fst5": -12,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 12,
+    "scd5": 13,
+    "site": "RCCGGY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["NmuCI"] = {
+    "charac": (0, 0, None, None, "GTSAC"),
+    "compsite": "(?=(?P<NmuCI>GT[CG]AC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "GTSAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTSAC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["NotI"] = {
+    "charac": (2, -2, None, None, "GCGGCCGC"),
+    "compsite": "(?=(?P<NotI>GCGGCCGC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGGCCGC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "J", "K", "M", "N", "O", "Q", "R", "S", "X"),
+}
+
+rest_dict["NpeUS61II"] = {
+    "charac": (None, None, None, None, "GATCGAC"),
+    "compsite": "(?=(?P<NpeUS61II>GATCGAC))|(?=(?P<NpeUS61II_as>GTCGATC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATCGAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["NruI"] = {
+    "charac": (3, -3, None, None, "TCGCGA"),
+    "compsite": "(?=(?P<NruI>TCGCGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCGCGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "Q", "R", "S", "X"),
+}
+
+rest_dict["NsbI"] = {
+    "charac": (3, -3, None, None, "TGCGCA"),
+    "compsite": "(?=(?P<NsbI>TGCGCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGCGCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["NsiI"] = {
+    "charac": (5, -5, None, None, "ATGCAT"),
+    "compsite": "(?=(?P<NsiI>ATGCAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATGCAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J", "M", "N", "Q", "R", "S", "X"),
+}
+
+rest_dict["NspBII"] = {
+    "charac": (3, -3, None, None, "CMGCKG"),
+    "compsite": "(?=(?P<NspBII>C[AC]GC[GT]G))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CMGCKG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["NspI"] = {
+    "charac": (5, -5, None, None, "RCATGY"),
+    "compsite": "(?=(?P<NspI>[AG]CATG[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RCATGY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["NspV"] = {
+    "charac": (2, -2, None, None, "TTCGAA"),
+    "compsite": "(?=(?P<NspV>TTCGAA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTCGAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("J",),
+}
+
+rest_dict["ObaBS10I"] = {
+    "charac": (None, None, None, None, "ACGAG"),
+    "compsite": "(?=(?P<ObaBS10I>ACGAG))|(?=(?P<ObaBS10I_as>CTCGT))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACGAG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["OgrI"] = {
+    "charac": (None, None, None, None, "CAACNAC"),
+    "compsite": "(?=(?P<OgrI>CAAC.AC))|(?=(?P<OgrI_as>GT.GTTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAACNAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["OliI"] = {
+    "charac": (5, -5, None, None, "CACNNNNGTG"),
+    "compsite": "(?=(?P<OliI>CAC....GTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACNNNNGTG",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["OspHL35III"] = {
+    "charac": (None, None, None, None, "YAGGAG"),
+    "compsite": "(?=(?P<OspHL35III>[CT]AGGAG))|(?=(?P<OspHL35III_as>CTCCT[AG]))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "YAGGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PabI"] = {
+    "charac": (3, -3, None, None, "GTAC"),
+    "compsite": "(?=(?P<PabI>GTAC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTAC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Pac19842II"] = {
+    "charac": (None, None, None, None, "CCTTGA"),
+    "compsite": "(?=(?P<Pac19842II>CCTTGA))|(?=(?P<Pac19842II_as>TCAAGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTTGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PacI"] = {
+    "charac": (5, -5, None, None, "TTAATTAA"),
+    "compsite": "(?=(?P<PacI>TTAATTAA))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "AT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTAATTAA",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("B", "N", "O"),
+}
+
+rest_dict["PacIII"] = {
+    "charac": (None, None, None, None, "GTAATC"),
+    "compsite": "(?=(?P<PacIII>GTAATC))|(?=(?P<PacIII_as>GATTAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTAATC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PaeI"] = {
+    "charac": (5, -5, None, None, "GCATGC"),
+    "compsite": "(?=(?P<PaeI>GCATGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCATGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["PaePA99III"] = {
+    "charac": (None, None, None, None, "AAGAYC"),
+    "compsite": "(?=(?P<PaePA99III>AAGA[CT]C))|(?=(?P<PaePA99III_as>G[AG]TCTT))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AAGAYC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PaeR7I"] = {
+    "charac": (1, -1, None, None, "CTCGAG"),
+    "compsite": "(?=(?P<PaeR7I>CTCGAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TCGA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTCGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["PagI"] = {
+    "charac": (1, -1, None, None, "TCATGA"),
+    "compsite": "(?=(?P<PagI>TCATGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCATGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Pal408I"] = {
+    "charac": (None, None, None, None, "CCRTGAG"),
+    "compsite": "(?=(?P<Pal408I>CC[AG]TGAG))|(?=(?P<Pal408I_as>CTCA[CT]GG))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCRTGAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PalAI"] = {
+    "charac": (2, -2, None, None, "GGCGCGCC"),
+    "compsite": "(?=(?P<PalAI>GGCGCGCC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CGCG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCGCC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["PasI"] = {
+    "charac": (2, -2, None, None, "CCCWGGG"),
+    "compsite": "(?=(?P<PasI>CCC[AT]GGG))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "CWG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCWGGG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["PauI"] = {
+    "charac": (1, -1, None, None, "GCGCGC"),
+    "compsite": "(?=(?P<PauI>GCGCGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CGCG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGCGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Pba2294I"] = {
+    "charac": (None, None, None, None, "GTAAG"),
+    "compsite": "(?=(?P<Pba2294I>GTAAG))|(?=(?P<Pba2294I_as>CTTAC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTAAG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PcaII"] = {
+    "charac": (None, None, None, None, "GACGAG"),
+    "compsite": "(?=(?P<PcaII>GACGAG))|(?=(?P<PcaII_as>CTCGTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PceI"] = {
+    "charac": (3, -3, None, None, "AGGCCT"),
+    "compsite": "(?=(?P<PceI>AGGCCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGGCCT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["PciI"] = {
+    "charac": (1, -1, None, None, "ACATGT"),
+    "compsite": "(?=(?P<PciI>ACATGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACATGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "N"),
+}
+
+rest_dict["PciSI"] = {
+    "charac": (8, 4, None, None, "GCTCTTC"),
+    "compsite": "(?=(?P<PciSI>GCTCTTC))|(?=(?P<PciSI_as>GAAGAGC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": 4,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTCTTC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["Pcr308II"] = {
+    "charac": (None, None, None, None, "CCAAAG"),
+    "compsite": "(?=(?P<Pcr308II>CCAAAG))|(?=(?P<Pcr308II_as>CTTTGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCAAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PcsI"] = {
+    "charac": (7, -7, None, None, "WCGNNNNNNNCGW"),
+    "compsite": "(?=(?P<PcsI>[AT]CG.......CG[AT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "WCGNNNNNNNCGW",
+    "size": 13,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["PctI"] = {
+    "charac": (7, -1, None, None, "GAATGC"),
+    "compsite": "(?=(?P<PctI>GAATGC))|(?=(?P<PctI_as>GCATTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "CN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAATGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Pdi8503III"] = {
+    "charac": (None, None, None, None, "CCGGNAG"),
+    "compsite": "(?=(?P<Pdi8503III>CCGG.AG))|(?=(?P<Pdi8503III_as>CT.CCGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGGNAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PdiI"] = {
+    "charac": (3, -3, None, None, "GCCGGC"),
+    "compsite": "(?=(?P<PdiI>GCCGGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCGGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["PdmI"] = {
+    "charac": (5, -5, None, None, "GAANNNNTTC"),
+    "compsite": "(?=(?P<PdmI>GAA....TTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAANNNNTTC",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Pdu1735I"] = {
+    "charac": (None, None, None, None, "CACCAC"),
+    "compsite": "(?=(?P<Pdu1735I>CACCAC))|(?=(?P<Pdu1735I_as>GTGGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACCAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PenI"] = {
+    "charac": (None, None, None, None, "GCAGT"),
+    "compsite": "(?=(?P<PenI>GCAGT))|(?=(?P<PenI_as>ACTGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCAGT",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PfeI"] = {
+    "charac": (1, -1, None, None, "GAWTC"),
+    "compsite": "(?=(?P<PfeI>GA[AT]TC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "AWT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAWTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Pfl1108I"] = {
+    "charac": (None, None, None, None, "TCGTAG"),
+    "compsite": "(?=(?P<Pfl1108I>TCGTAG))|(?=(?P<Pfl1108I_as>CTACGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCGTAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Pfl23II"] = {
+    "charac": (1, -1, None, None, "CGTACG"),
+    "compsite": "(?=(?P<Pfl23II>CGTACG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GTAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGTACG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Pfl8569I"] = {
+    "charac": (3, -3, None, None, "GCNNGC"),
+    "compsite": "(?=(?P<Pfl8569I>GC..GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNNGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PflFI"] = {
+    "charac": (4, -4, None, None, "GACNNNGTC"),
+    "compsite": "(?=(?P<PflFI>GAC...GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNGTC",
+    "size": 9,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["PflMI"] = {
+    "charac": (7, -7, None, None, "CCANNNNNTGG"),
+    "compsite": "(?=(?P<PflMI>CCA.....TGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCANNNNNTGG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["PflPt14I"] = {
+    "charac": (None, None, None, None, "RGCCCAC"),
+    "compsite": "(?=(?P<PflPt14I>[AG]GCCCAC))|(?=(?P<PflPt14I_as>GTGGGC[CT]))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGCCCAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PfoI"] = {
+    "charac": (1, -1, None, None, "TCCNGGA"),
+    "compsite": "(?=(?P<PfoI>TCC.GGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "CCNGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCCNGGA",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["PfrJS12IV"] = {
+    "charac": (None, None, None, None, "TANAAG"),
+    "compsite": "(?=(?P<PfrJS12IV>TA.AAG))|(?=(?P<PfrJS12IV_as>CTT.TA))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TANAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PfrJS12V"] = {
+    "charac": (None, None, None, None, "GGCGGAG"),
+    "compsite": "(?=(?P<PfrJS12V>GGCGGAG))|(?=(?P<PfrJS12V_as>CTCCGCC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGGAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PfrJS15III"] = {
+    "charac": (None, None, None, None, "CTTCNAC"),
+    "compsite": "(?=(?P<PfrJS15III>CTTC.AC))|(?=(?P<PfrJS15III_as>GT.GAAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTTCNAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Pin17FIII"] = {
+    "charac": (None, None, None, None, "GGYGAB"),
+    "compsite": "(?=(?P<Pin17FIII>GG[CT]GA[CGT]))|(?=(?P<Pin17FIII_as>[ACG]TC[AG]CC))",
+    "dna": None,
+    "freq": 682.6666666666666,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGYGAB",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PinAI"] = {
+    "charac": (1, -1, None, None, "ACCGGT"),
+    "compsite": "(?=(?P<PinAI>ACCGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCGGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("Q", "X"),
+}
+
+rest_dict["PinP23II"] = {
+    "charac": (None, None, None, None, "CTRKCAG"),
+    "compsite": "(?=(?P<PinP23II>CT[AG][GT]CAG))|(?=(?P<PinP23II_as>CTG[AC][CT]AG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTRKCAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PinP59III"] = {
+    "charac": (None, None, None, None, "GAAGNAG"),
+    "compsite": "(?=(?P<PinP59III>GAAG.AG))|(?=(?P<PinP59III_as>CT.CTTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAGNAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PkrI"] = {
+    "charac": (3, -3, None, None, "GCNGC"),
+    "compsite": "(?=(?P<PkrI>GC.GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["PlaDI"] = {
+    "charac": (27, 19, None, None, "CATCAG"),
+    "compsite": "(?=(?P<PlaDI>CATCAG))|(?=(?P<PlaDI_as>CTGATG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 19,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Ple19I"] = {
+    "charac": (4, -4, None, None, "CGATCG"),
+    "compsite": "(?=(?P<Ple19I>CGATCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "AT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGATCG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["PleI"] = {
+    "charac": (9, 5, None, None, "GAGTC"),
+    "compsite": "(?=(?P<PleI>GAGTC))|(?=(?P<PleI_as>GACTC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 9,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["PliMI"] = {
+    "charac": (None, None, None, None, "CGCCGAC"),
+    "compsite": "(?=(?P<PliMI>CGCCGAC))|(?=(?P<PliMI_as>GTCGGCG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCCGAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PluTI"] = {
+    "charac": (5, -5, None, None, "GGCGCC"),
+    "compsite": "(?=(?P<PluTI>GGCGCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "GCGC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["PmaCI"] = {
+    "charac": (3, -3, None, None, "CACGTG"),
+    "compsite": "(?=(?P<PmaCI>CACGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGTG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["PmeI"] = {
+    "charac": (4, -4, None, None, "GTTTAAAC"),
+    "compsite": "(?=(?P<PmeI>GTTTAAAC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTTTAAAC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["PmlI"] = {
+    "charac": (3, -3, None, None, "CACGTG"),
+    "compsite": "(?=(?P<PmlI>CACGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGTG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["PpiI"] = {
+    "charac": (-7, -24, 25, 8, "GAACNNNNNCTC"),
+    "compsite": "(?=(?P<PpiI>GAAC.....CTC))|(?=(?P<PpiI_as>GAG.....GTTC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": -24,
+    "fst5": -7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 8,
+    "scd5": 25,
+    "site": "GAACNNNNNCTC",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PpiP13II"] = {
+    "charac": (None, None, None, None, "CGCRGAC"),
+    "compsite": "(?=(?P<PpiP13II>CGC[AG]GAC))|(?=(?P<PpiP13II_as>GTC[CT]GCG))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCRGAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PpsI"] = {
+    "charac": (9, 5, None, None, "GAGTC"),
+    "compsite": "(?=(?P<PpsI>GAGTC))|(?=(?P<PpsI_as>GACTC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 9,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["Ppu10I"] = {
+    "charac": (1, -1, None, None, "ATGCAT"),
+    "compsite": "(?=(?P<Ppu10I>ATGCAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATGCAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Ppu21I"] = {
+    "charac": (3, -3, None, None, "YACGTR"),
+    "compsite": "(?=(?P<Ppu21I>[CT]ACGT[AG]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "YACGTR",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["PpuMI"] = {
+    "charac": (2, -2, None, None, "RGGWCCY"),
+    "compsite": "(?=(?P<PpuMI>[AG]GG[AT]CC[CT]))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGGWCCY",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["PscI"] = {
+    "charac": (1, -1, None, None, "ACATGT"),
+    "compsite": "(?=(?P<PscI>ACATGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACATGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Pse18267I"] = {
+    "charac": (None, None, None, None, "RCCGAAG"),
+    "compsite": "(?=(?P<Pse18267I>[AG]CCGAAG))|(?=(?P<Pse18267I_as>CTTCGG[CT]))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RCCGAAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PshAI"] = {
+    "charac": (5, -5, None, None, "GACNNNNGTC"),
+    "compsite": "(?=(?P<PshAI>GAC....GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNNGTC",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("K", "N"),
+}
+
+rest_dict["PshBI"] = {
+    "charac": (2, -2, None, None, "ATTAAT"),
+    "compsite": "(?=(?P<PshBI>ATTAAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATTAAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["PsiI"] = {
+    "charac": (3, -3, None, None, "TTATAA"),
+    "compsite": "(?=(?P<PsiI>TTATAA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTATAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "N"),
+}
+
+rest_dict["Psp0357II"] = {
+    "charac": (None, None, None, None, "GCGAAG"),
+    "compsite": "(?=(?P<Psp0357II>GCGAAG))|(?=(?P<Psp0357II_as>CTTCGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Psp03I"] = {
+    "charac": (4, -4, None, None, "GGWCC"),
+    "compsite": "(?=(?P<Psp03I>GG[AT]CC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGWCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Psp124BI"] = {
+    "charac": (5, -5, None, None, "GAGCTC"),
+    "compsite": "(?=(?P<Psp124BI>GAGCTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "AGCT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Psp1406I"] = {
+    "charac": (2, -2, None, None, "AACGTT"),
+    "compsite": "(?=(?P<Psp1406I>AACGTT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AACGTT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["Psp5II"] = {
+    "charac": (2, -2, None, None, "RGGWCCY"),
+    "compsite": "(?=(?P<Psp5II>[AG]GG[AT]CC[CT]))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGGWCCY",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Psp6I"] = {
+    "charac": (0, 0, None, None, "CCWGG"),
+    "compsite": "(?=(?P<Psp6I>CC[AT]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "CCWGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["PspCI"] = {
+    "charac": (3, -3, None, None, "CACGTG"),
+    "compsite": "(?=(?P<PspCI>CACGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGTG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["PspEI"] = {
+    "charac": (1, -1, None, None, "GGTNACC"),
+    "compsite": "(?=(?P<PspEI>GGT.ACC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "GTNAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTNACC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["PspFI"] = {
+    "charac": (1, -1, None, None, "CCCAGC"),
+    "compsite": "(?=(?P<PspFI>CCCAGC))|(?=(?P<PspFI_as>GCTGGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCAGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["PspGI"] = {
+    "charac": (0, 0, None, None, "CCWGG"),
+    "compsite": "(?=(?P<PspGI>CC[AT]GG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "CCWGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["PspLI"] = {
+    "charac": (1, -1, None, None, "CGTACG"),
+    "compsite": "(?=(?P<PspLI>CGTACG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GTAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGTACG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["PspN4I"] = {
+    "charac": (3, -3, None, None, "GGNNCC"),
+    "compsite": "(?=(?P<PspN4I>GG..CC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNNCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["PspOMI"] = {
+    "charac": (1, -1, None, None, "GGGCCC"),
+    "compsite": "(?=(?P<PspOMI>GGGCCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGGCCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "N", "V"),
+}
+
+rest_dict["PspOMII"] = {
+    "charac": (27, 18, None, None, "CGCCCAR"),
+    "compsite": "(?=(?P<PspOMII>CGCCCA[AG]))|(?=(?P<PspOMII_as>[CT]TGGGCG))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": 18,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCCCAR",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PspPI"] = {
+    "charac": (1, -1, None, None, "GGNCC"),
+    "compsite": "(?=(?P<PspPI>GG.CC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GNC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("C",),
+}
+
+rest_dict["PspPPI"] = {
+    "charac": (2, -2, None, None, "RGGWCCY"),
+    "compsite": "(?=(?P<PspPPI>[AG]GG[AT]CC[CT]))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGGWCCY",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["PspPRI"] = {
+    "charac": (21, 13, None, None, "CCYCAG"),
+    "compsite": "(?=(?P<PspPRI>CC[CT]CAG))|(?=(?P<PspPRI_as>CTG[AG]GG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": 13,
+    "fst5": 21,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCYCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PspXI"] = {
+    "charac": (2, -2, None, None, "VCTCGAGB"),
+    "compsite": "(?=(?P<PspXI>[ACG]CTCGAG[CGT]))",
+    "dna": None,
+    "freq": 7281.777777777777,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TCGA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "VCTCGAGB",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("I", "N"),
+}
+
+rest_dict["PsrI"] = {
+    "charac": (-7, -25, 25, 7, "GAACNNNNNNTAC"),
+    "compsite": "(?=(?P<PsrI>GAAC......TAC))|(?=(?P<PsrI_as>GTA......GTTC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": -25,
+    "fst5": -7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 7,
+    "scd5": 25,
+    "site": "GAACNNNNNNTAC",
+    "size": 13,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["PssI"] = {
+    "charac": (5, -5, None, None, "RGGNCCY"),
+    "compsite": "(?=(?P<PssI>[AG]GG.CC[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "GNC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGGNCCY",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Pst14472I"] = {
+    "charac": (None, None, None, None, "CNYACAC"),
+    "compsite": "(?=(?P<Pst14472I>C.[CT]ACAC))|(?=(?P<Pst14472I_as>GTGT[AG].G))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CNYACAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Pst145I"] = {
+    "charac": (None, None, None, None, "CTAMRAG"),
+    "compsite": "(?=(?P<Pst145I>CTA[AC][AG]AG))|(?=(?P<Pst145I_as>CT[CT][GT]TAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTAMRAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Pst273I"] = {
+    "charac": (None, None, None, None, "GATCGAG"),
+    "compsite": "(?=(?P<Pst273I>GATCGAG))|(?=(?P<Pst273I_as>CTCGATC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATCGAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PstI"] = {
+    "charac": (5, -5, None, None, "CTGCAG"),
+    "compsite": "(?=(?P<PstI>CTGCAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTGCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["PstNI"] = {
+    "charac": (6, -6, None, None, "CAGNNNCTG"),
+    "compsite": "(?=(?P<PstNI>CAG...CTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAGNNNCTG",
+    "size": 9,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["PsuGI"] = {
+    "charac": (None, None, None, None, "BBCGD"),
+    "compsite": "(?=(?P<PsuGI>[CGT][CGT]CG[AGT]))|(?=(?P<PsuGI_as>[ACT]CG[ACG][ACG]))",
+    "dna": None,
+    "freq": 37.925925925925924,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "BBCGD",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["PsuI"] = {
+    "charac": (1, -1, None, None, "RGATCY"),
+    "compsite": "(?=(?P<PsuI>[AG]GATC[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGATCY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["PsyI"] = {
+    "charac": (4, -4, None, None, "GACNNNGTC"),
+    "compsite": "(?=(?P<PsyI>GAC...GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNGTC",
+    "size": 9,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["PteI"] = {
+    "charac": (1, -1, None, None, "GCGCGC"),
+    "compsite": "(?=(?P<PteI>GCGCGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CGCG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGCGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["PvuI"] = {
+    "charac": (4, -4, None, None, "CGATCG"),
+    "compsite": "(?=(?P<PvuI>CGATCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "AT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGATCG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "K", "M", "N", "O", "Q", "R", "S", "X"),
+}
+
+rest_dict["PvuII"] = {
+    "charac": (3, -3, None, None, "CAGCTG"),
+    "compsite": "(?=(?P<PvuII>CAGCTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAGCTG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["Rba2021I"] = {
+    "charac": (None, None, None, None, "CACGAGH"),
+    "compsite": "(?=(?P<Rba2021I>CACGAG[ACT]))|(?=(?P<Rba2021I_as>[AGT]CTCGTG))",
+    "dna": None,
+    "freq": 5461.333333333333,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACGAGH",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RceI"] = {
+    "charac": (27, 18, None, None, "CATCGAC"),
+    "compsite": "(?=(?P<RceI>CATCGAC))|(?=(?P<RceI_as>GTCGATG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": 18,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CATCGAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RdeGBI"] = {
+    "charac": (None, None, None, None, "CCGCAG"),
+    "compsite": "(?=(?P<RdeGBI>CCGCAG))|(?=(?P<RdeGBI_as>CTGCGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RdeGBII"] = {
+    "charac": (26, 18, None, None, "ACCCAG"),
+    "compsite": "(?=(?P<RdeGBII>ACCCAG))|(?=(?P<RdeGBII_as>CTGGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 18,
+    "fst5": 26,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RdeGBIII"] = {
+    "charac": (-9, -17, 17, 9, "TGRYCA"),
+    "compsite": "(?=(?P<RdeGBIII>TG[AG][CT]CA))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -17,
+    "fst5": -9,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": 9,
+    "scd5": 17,
+    "site": "TGRYCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RflFIII"] = {
+    "charac": (None, None, None, None, "CGCCAG"),
+    "compsite": "(?=(?P<RflFIII>CGCCAG))|(?=(?P<RflFIII_as>CTGGCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RgaI"] = {
+    "charac": (5, -5, None, None, "GCGATCGC"),
+    "compsite": "(?=(?P<RgaI>GCGATCGC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "AT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGATCGC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["RigI"] = {
+    "charac": (6, -6, None, None, "GGCCGGCC"),
+    "compsite": "(?=(?P<RigI>GGCCGGCC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCCGGCC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["RlaI"] = {
+    "charac": (None, None, None, None, "VCW"),
+    "compsite": "(?=(?P<RlaI>[ACG]C[AT]))|(?=(?P<RlaI_as>[AT]G[CGT]))",
+    "dna": None,
+    "freq": 10.666666666666666,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "VCW",
+    "size": 3,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RlaII"] = {
+    "charac": (26, 18, None, None, "ACACAG"),
+    "compsite": "(?=(?P<RlaII>ACACAG))|(?=(?P<RlaII_as>CTGTGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 18,
+    "fst5": 26,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACACAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RleAI"] = {
+    "charac": (18, 9, None, None, "CCCACA"),
+    "compsite": "(?=(?P<RleAI>CCCACA))|(?=(?P<RleAI_as>TGTGGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 9,
+    "fst5": 18,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCACA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Rmu369III"] = {
+    "charac": (None, None, None, None, "GGCYAC"),
+    "compsite": "(?=(?P<Rmu369III>GGC[CT]AC))|(?=(?P<Rmu369III_as>GT[AG]GCC))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCYAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RpaB5I"] = {
+    "charac": (27, 18, None, None, "CGRGGAC"),
+    "compsite": "(?=(?P<RpaB5I>CG[AG]GGAC))|(?=(?P<RpaB5I_as>GTCC[CT]CG))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": 18,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGRGGAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RpaBI"] = {
+    "charac": (27, 18, None, None, "CCCGCAG"),
+    "compsite": "(?=(?P<RpaBI>CCCGCAG))|(?=(?P<RpaBI_as>CTGCGGG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": 18,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCGCAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RpaI"] = {
+    "charac": (18, 9, None, None, "GTYGGAG"),
+    "compsite": "(?=(?P<RpaI>GT[CT]GGAG))|(?=(?P<RpaI_as>CTCC[AG]AC))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": 9,
+    "fst5": 18,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTYGGAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RpaTI"] = {
+    "charac": (None, None, None, None, "GRTGGAG"),
+    "compsite": "(?=(?P<RpaTI>G[AG]TGGAG))|(?=(?P<RpaTI_as>CTCCA[CT]C))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRTGGAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RruI"] = {
+    "charac": (3, -3, None, None, "TCGCGA"),
+    "compsite": "(?=(?P<RruI>TCGCGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCGCGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["RsaI"] = {
+    "charac": (2, -2, None, None, "GTAC"),
+    "compsite": "(?=(?P<RsaI>GTAC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTAC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "M", "N", "Q", "R", "S", "V", "X", "Y"),
+}
+
+rest_dict["RsaNI"] = {
+    "charac": (1, -1, None, None, "GTAC"),
+    "compsite": "(?=(?P<RsaNI>GTAC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTAC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["RseI"] = {
+    "charac": (5, -5, None, None, "CAYNNNNRTG"),
+    "compsite": "(?=(?P<RseI>CA[CT]....[AG]TG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAYNNNNRTG",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Rsp008IV"] = {
+    "charac": (None, None, None, None, "ACGCAG"),
+    "compsite": "(?=(?P<Rsp008IV>ACGCAG))|(?=(?P<Rsp008IV_as>CTGCGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACGCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Rsp008V"] = {
+    "charac": (None, None, None, None, "GCCCAT"),
+    "compsite": "(?=(?P<Rsp008V>GCCCAT))|(?=(?P<Rsp008V_as>ATGGGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCCAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Rsp531II"] = {
+    "charac": (None, None, None, None, "CACACG"),
+    "compsite": "(?=(?P<Rsp531II>CACACG))|(?=(?P<Rsp531II_as>CGTGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACACG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["RspPBTS2III"] = {
+    "charac": (None, None, None, None, "CTTCGAG"),
+    "compsite": "(?=(?P<RspPBTS2III>CTTCGAG))|(?=(?P<RspPBTS2III_as>CTCGAAG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTTCGAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Rsr2I"] = {
+    "charac": (2, -2, None, None, "CGGWCCG"),
+    "compsite": "(?=(?P<Rsr2I>CGG[AT]CCG))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGWCCG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["RsrII"] = {
+    "charac": (2, -2, None, None, "CGGWCCG"),
+    "compsite": "(?=(?P<RsrII>CGG[AT]CCG))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGWCCG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("N", "Q", "X"),
+}
+
+rest_dict["Rtr1953I"] = {
+    "charac": (None, None, None, None, "TGANNNNNNTGA"),
+    "compsite": "(?=(?P<Rtr1953I>TGA......TGA))|(?=(?P<Rtr1953I_as>TCA......TCA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGANNNNNNTGA",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SacI"] = {
+    "charac": (5, -5, None, None, "GAGCTC"),
+    "compsite": "(?=(?P<SacI>GAGCTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "AGCT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "J", "K", "M", "N", "O", "Q", "R", "S", "X"),
+}
+
+rest_dict["SacII"] = {
+    "charac": (4, -4, None, None, "CCGCGG"),
+    "compsite": "(?=(?P<SacII>CCGCGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "GC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGCGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "J", "K", "N", "O", "Q", "R", "X"),
+}
+
+rest_dict["Saf8902III"] = {
+    "charac": (None, None, None, None, "CAATNAG"),
+    "compsite": "(?=(?P<Saf8902III>CAAT.AG))|(?=(?P<Saf8902III_as>CT.ATTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAATNAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Sag901I"] = {
+    "charac": (None, None, None, None, "GCAAAT"),
+    "compsite": "(?=(?P<Sag901I>GCAAAT))|(?=(?P<Sag901I_as>ATTTGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCAAAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SalI"] = {
+    "charac": (1, -1, None, None, "GTCGAC"),
+    "compsite": "(?=(?P<SalI>GTCGAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TCGA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTCGAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["SanDI"] = {
+    "charac": (2, -2, None, None, "GGGWCCC"),
+    "compsite": "(?=(?P<SanDI>GGG[AT]CCC))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGGWCCC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SapI"] = {
+    "charac": (8, 4, None, None, "GCTCTTC"),
+    "compsite": "(?=(?P<SapI>GCTCTTC))|(?=(?P<SapI_as>GAAGAGC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": 4,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTCTTC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["SaqAI"] = {
+    "charac": (1, -1, None, None, "TTAA"),
+    "compsite": "(?=(?P<SaqAI>TTAA))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTAA",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["SatI"] = {
+    "charac": (2, -2, None, None, "GCNGC"),
+    "compsite": "(?=(?P<SatI>GC.GC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCNGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Sau3AI"] = {
+    "charac": (0, 0, None, None, "GATC"),
+    "compsite": "(?=(?P<Sau3AI>GATC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("C", "J", "K", "M", "N", "R", "X"),
+}
+
+rest_dict["Sau64037IV"] = {
+    "charac": (None, None, None, None, "GTANNNNNNTGG"),
+    "compsite": "(?=(?P<Sau64037IV>GTA......TGG))|(?=(?P<Sau64037IV_as>CCA......TAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTANNNNNNTGG",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Sau96I"] = {
+    "charac": (1, -1, None, None, "GGNCC"),
+    "compsite": "(?=(?P<Sau96I>GG.CC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GNC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("J", "N"),
+}
+
+rest_dict["SauI"] = {
+    "charac": (2, -2, None, None, "CCTNAGG"),
+    "compsite": "(?=(?P<SauI>CCT.AGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "TNA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTNAGG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Sba460II"] = {
+    "charac": (None, None, None, None, "GGNGAYG"),
+    "compsite": "(?=(?P<Sba460II>GG.GA[CT]G))|(?=(?P<Sba460II_as>C[AG]TC.CC))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNGAYG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SbfI"] = {
+    "charac": (6, -6, None, None, "CCTGCAGG"),
+    "compsite": "(?=(?P<SbfI>CCTGCAGG))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTGCAGG",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("I", "N", "V"),
+}
+
+rest_dict["Sbo46I"] = {
+    "charac": (None, None, None, None, "TGAAC"),
+    "compsite": "(?=(?P<Sbo46I>TGAAC))|(?=(?P<Sbo46I_as>GTTCA))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TGAAC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["ScaI"] = {
+    "charac": (3, -3, None, None, "AGTACT"),
+    "compsite": "(?=(?P<ScaI>AGTACT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGTACT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "J", "K", "M", "N", "O", "Q", "R", "S", "X"),
+}
+
+rest_dict["SchI"] = {
+    "charac": (10, 5, None, None, "GAGTC"),
+    "compsite": "(?=(?P<SchI>GAGTC))|(?=(?P<SchI_as>GACTC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 5,
+    "fst5": 10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["SciI"] = {
+    "charac": (3, -3, None, None, "CTCGAG"),
+    "compsite": "(?=(?P<SciI>CTCGAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTCGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["ScoDS2II"] = {
+    "charac": (None, None, None, None, "GCTAAT"),
+    "compsite": "(?=(?P<ScoDS2II>GCTAAT))|(?=(?P<ScoDS2II_as>ATTAGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCTAAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["ScrFI"] = {
+    "charac": (2, -2, None, None, "CCNGG"),
+    "compsite": "(?=(?P<ScrFI>CC.GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("J", "N"),
+}
+
+rest_dict["SdaI"] = {
+    "charac": (6, -6, None, None, "CCTGCAGG"),
+    "compsite": "(?=(?P<SdaI>CCTGCAGG))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTGCAGG",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["SdeAI"] = {
+    "charac": (27, 19, None, None, "CAGRAG"),
+    "compsite": "(?=(?P<SdeAI>CAG[AG]AG))|(?=(?P<SdeAI_as>CT[CT]CTG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": 19,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAGRAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SdeOSI"] = {
+    "charac": (-11, -24, 23, 10, "GACNNNNRTGA"),
+    "compsite": "(?=(?P<SdeOSI>GAC....[AG]TGA))|(?=(?P<SdeOSI_as>TCA[CT]....GTC))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -24,
+    "fst5": -11,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": 10,
+    "scd5": 23,
+    "site": "GACNNNNRTGA",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SduI"] = {
+    "charac": (5, -5, None, None, "GDGCHC"),
+    "compsite": "(?=(?P<SduI>G[AGT]GC[ACT]C))",
+    "dna": None,
+    "freq": 455.1111111111111,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "DGCH",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GDGCHC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["SecI"] = {
+    "charac": (1, -1, None, None, "CCNNGG"),
+    "compsite": "(?=(?P<SecI>CC..GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CNNG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNNGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SelI"] = {
+    "charac": (0, 0, None, None, "CGCG"),
+    "compsite": "(?=(?P<SelI>CGCG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CGCG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Sen17963III"] = {
+    "charac": (None, None, None, None, "CCAAAC"),
+    "compsite": "(?=(?P<Sen17963III>CCAAAC))|(?=(?P<Sen17963III_as>GTTTGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCAAAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SenA1673III"] = {
+    "charac": (None, None, None, None, "GNGGCAG"),
+    "compsite": "(?=(?P<SenA1673III>G.GGCAG))|(?=(?P<SenA1673III_as>CTGCC.C))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GNGGCAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SenSARA26III"] = {
+    "charac": (None, None, None, None, "ACRCAG"),
+    "compsite": "(?=(?P<SenSARA26III>AC[AG]CAG))|(?=(?P<SenSARA26III_as>CTG[CT]GT))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACRCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SenTFIV"] = {
+    "charac": (None, None, None, None, "GATCAG"),
+    "compsite": "(?=(?P<SenTFIV>GATCAG))|(?=(?P<SenTFIV_as>CTGATC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GATCAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SetI"] = {
+    "charac": (4, -4, None, None, "ASST"),
+    "compsite": "(?=(?P<SetI>A[CG][CG]T))",
+    "dna": None,
+    "freq": 64.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "ASST",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ASST",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["SexAI"] = {
+    "charac": (1, -1, None, None, "ACCWGGT"),
+    "compsite": "(?=(?P<SexAI>ACC[AT]GGT))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "CCWGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCWGGT",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": ("M", "N"),
+}
+
+rest_dict["SfaAI"] = {
+    "charac": (5, -5, None, None, "GCGATCGC"),
+    "compsite": "(?=(?P<SfaAI>GCGATCGC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "AT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGATCGC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["SfaNI"] = {
+    "charac": (10, 9, None, None, "GCATC"),
+    "compsite": "(?=(?P<SfaNI>GCATC))|(?=(?P<SfaNI_as>GATGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 9,
+    "fst5": 10,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCATC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I", "N", "V"),
+}
+
+rest_dict["SfcI"] = {
+    "charac": (1, -1, None, None, "CTRYAG"),
+    "compsite": "(?=(?P<SfcI>CT[AG][CT]AG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TRYA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTRYAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["SfeI"] = {
+    "charac": (1, -1, None, None, "CTRYAG"),
+    "compsite": "(?=(?P<SfeI>CT[AG][CT]AG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TRYA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTRYAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SfiI"] = {
+    "charac": (8, -8, None, None, "GGCCNNNNNGGCC"),
+    "compsite": "(?=(?P<SfiI>GGCC.....GGCC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -8,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCCNNNNNGGCC",
+    "size": 13,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["SfoI"] = {
+    "charac": (3, -3, None, None, "GGCGCC"),
+    "compsite": "(?=(?P<SfoI>GGCGCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Sfr274I"] = {
+    "charac": (1, -1, None, None, "CTCGAG"),
+    "compsite": "(?=(?P<Sfr274I>CTCGAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TCGA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTCGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["Sfr303I"] = {
+    "charac": (4, -4, None, None, "CCGCGG"),
+    "compsite": "(?=(?P<Sfr303I>CCGCGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "GC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGCGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["SfuI"] = {
+    "charac": (2, -2, None, None, "TTCGAA"),
+    "compsite": "(?=(?P<SfuI>TTCGAA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTCGAA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("M", "S"),
+}
+
+rest_dict["SgeI"] = {
+    "charac": (13, 13, None, None, "CNNG"),
+    "compsite": "(?=(?P<SgeI>C..G))",
+    "dna": None,
+    "freq": 16.0,
+    "fst3": 13,
+    "fst5": 13,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CNNG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["SgfI"] = {
+    "charac": (5, -5, None, None, "GCGATCGC"),
+    "compsite": "(?=(?P<SgfI>GCGATCGC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "AT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGATCGC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("R",),
+}
+
+rest_dict["SgrAI"] = {
+    "charac": (2, -2, None, None, "CRCCGGYG"),
+    "compsite": "(?=(?P<SgrAI>C[AG]CCGG[CT]G))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CRCCGGYG",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["SgrAII"] = {
+    "charac": (None, None, None, None, "CGAGATC"),
+    "compsite": "(?=(?P<SgrAII>CGAGATC))|(?=(?P<SgrAII_as>GATCTCG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGAGATC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SgrBI"] = {
+    "charac": (4, -4, None, None, "CCGCGG"),
+    "compsite": "(?=(?P<SgrBI>CCGCGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "GC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGCGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C",),
+}
+
+rest_dict["SgrDI"] = {
+    "charac": (2, -2, None, None, "CGTCGACG"),
+    "compsite": "(?=(?P<SgrDI>CGTCGACG))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TCGA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGTCGACG",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["SgrTI"] = {
+    "charac": (14, 14, None, None, "CCDS"),
+    "compsite": "(?=(?P<SgrTI>CC[AGT][CG]))|(?=(?P<SgrTI_as>[CG][ACT]GG))",
+    "dna": None,
+    "freq": 42.666666666666664,
+    "fst3": 14,
+    "fst5": 14,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCDS",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SgsI"] = {
+    "charac": (2, -2, None, None, "GGCGCGCC"),
+    "compsite": "(?=(?P<SgsI>GGCGCGCC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CGCG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCGCC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["SimI"] = {
+    "charac": (2, 0, None, None, "GGGTC"),
+    "compsite": "(?=(?P<SimI>GGGTC))|(?=(?P<SimI_as>GACCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 0,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GTC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGGTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SinI"] = {
+    "charac": (1, -1, None, None, "GGWCC"),
+    "compsite": "(?=(?P<SinI>GG[AT]CC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGWCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("X",),
+}
+
+rest_dict["SlaI"] = {
+    "charac": (1, -1, None, None, "CTCGAG"),
+    "compsite": "(?=(?P<SlaI>CTCGAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TCGA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTCGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C", "Y"),
+}
+
+rest_dict["SmaI"] = {
+    "charac": (3, -3, None, None, "CCCGGG"),
+    "compsite": "(?=(?P<SmaI>CCCGGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCGGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X", "Y"),
+}
+
+rest_dict["SmaUMH5I"] = {
+    "charac": (None, None, None, None, "CTTGAC"),
+    "compsite": "(?=(?P<SmaUMH5I>CTTGAC))|(?=(?P<SmaUMH5I_as>GTCAAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTTGAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SmaUMH8I"] = {
+    "charac": (None, None, None, None, "GCGAACB"),
+    "compsite": "(?=(?P<SmaUMH8I>GCGAAC[CGT]))|(?=(?P<SmaUMH8I_as>[ACG]GTTCGC))",
+    "dna": None,
+    "freq": 5461.333333333333,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGAACB",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SmiI"] = {
+    "charac": (4, -4, None, None, "ATTTAAAT"),
+    "compsite": "(?=(?P<SmiI>ATTTAAAT))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATTTAAAT",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "K", "V"),
+}
+
+rest_dict["SmiMI"] = {
+    "charac": (5, -5, None, None, "CAYNNNNRTG"),
+    "compsite": "(?=(?P<SmiMI>CA[CT]....[AG]TG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAYNNNNRTG",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["SmlI"] = {
+    "charac": (1, -1, None, None, "CTYRAG"),
+    "compsite": "(?=(?P<SmlI>CT[CT][AG]AG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TYRA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTYRAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["SmoI"] = {
+    "charac": (1, -1, None, None, "CTYRAG"),
+    "compsite": "(?=(?P<SmoI>CT[CT][AG]AG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TYRA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTYRAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Sna507VIII"] = {
+    "charac": (None, None, None, None, "CRTTGAG"),
+    "compsite": "(?=(?P<Sna507VIII>C[AG]TTGAG))|(?=(?P<Sna507VIII_as>CTCAA[CT]G))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CRTTGAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SnaBI"] = {
+    "charac": (3, -3, None, None, "TACGTA"),
+    "compsite": "(?=(?P<SnaBI>TACGTA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TACGTA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C", "K", "M", "N", "R"),
+}
+
+rest_dict["SnaI"] = {
+    "charac": (None, None, None, None, "GTATAC"),
+    "compsite": "(?=(?P<SnaI>GTATAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTATAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Sno506I"] = {
+    "charac": (None, None, None, None, "GGCCGAG"),
+    "compsite": "(?=(?P<Sno506I>GGCCGAG))|(?=(?P<Sno506I_as>CTCGGCC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCCGAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Spe19205IV"] = {
+    "charac": (None, None, None, None, "GGACY"),
+    "compsite": "(?=(?P<Spe19205IV>GGAC[CT]))|(?=(?P<Spe19205IV_as>[AG]GTCC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGACY",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SpeI"] = {
+    "charac": (1, -1, None, None, "ACTAGT"),
+    "compsite": "(?=(?P<SpeI>ACTAGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CTAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACTAGT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "J", "K", "M", "N", "O", "Q", "R", "S", "X"),
+}
+
+rest_dict["SphI"] = {
+    "charac": (5, -5, None, None, "GCATGC"),
+    "compsite": "(?=(?P<SphI>GCATGC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCATGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "O", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["SplI"] = {
+    "charac": (1, -1, None, None, "CGTACG"),
+    "compsite": "(?=(?P<SplI>CGTACG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GTAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGTACG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SpnRII"] = {
+    "charac": (None, None, None, None, "TCGAG"),
+    "compsite": "(?=(?P<SpnRII>TCGAG))|(?=(?P<SpnRII_as>CTCGA))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCGAG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SpoDI"] = {
+    "charac": (None, None, None, None, "GCGGRAG"),
+    "compsite": "(?=(?P<SpoDI>GCGG[AG]AG))|(?=(?P<SpoDI_as>CT[CT]CCGC))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGGRAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SrfI"] = {
+    "charac": (4, -4, None, None, "GCCCGGGC"),
+    "compsite": "(?=(?P<SrfI>GCCCGGGC))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCCCGGGC",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Sse232I"] = {
+    "charac": (2, -2, None, None, "CGCCGGCG"),
+    "compsite": "(?=(?P<Sse232I>CGCCGGCG))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCCGGCG",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Sse8387I"] = {
+    "charac": (6, -6, None, None, "CCTGCAGG"),
+    "compsite": "(?=(?P<Sse8387I>CCTGCAGG))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -6,
+    "fst5": 6,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTGCAGG",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["Sse8647I"] = {
+    "charac": (2, -2, None, None, "AGGWCCT"),
+    "compsite": "(?=(?P<Sse8647I>AGG[AT]CCT))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGGWCCT",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Sse9I"] = {
+    "charac": (0, 0, None, None, "AATT"),
+    "compsite": "(?=(?P<Sse9I>AATT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "AATT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AATT",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["SseBI"] = {
+    "charac": (3, -3, None, None, "AGGCCT"),
+    "compsite": "(?=(?P<SseBI>AGGCCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGGCCT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C",),
+}
+
+rest_dict["SsiI"] = {
+    "charac": (1, -1, None, None, "CCGC"),
+    "compsite": "(?=(?P<SsiI>CCGC))|(?=(?P<SsiI_as>GCGG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGC",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Ssp6803IV"] = {
+    "charac": (None, None, None, None, "GAAGGC"),
+    "compsite": "(?=(?P<Ssp6803IV>GAAGGC))|(?=(?P<Ssp6803IV_as>GCCTTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAGGC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Ssp714II"] = {
+    "charac": (None, None, None, None, "CGCAGCG"),
+    "compsite": "(?=(?P<Ssp714II>CGCAGCG))|(?=(?P<Ssp714II_as>CGCTGCG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGCAGCG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SspD5I"] = {
+    "charac": (13, 8, None, None, "GGTGA"),
+    "compsite": "(?=(?P<SspD5I>GGTGA))|(?=(?P<SspD5I_as>TCACC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 8,
+    "fst5": 13,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGTGA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SspDI"] = {
+    "charac": (1, -1, None, None, "GGCGCC"),
+    "compsite": "(?=(?P<SspDI>GGCGCC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GCGC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGCGCC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["SspI"] = {
+    "charac": (3, -3, None, None, "AATATT"),
+    "compsite": "(?=(?P<SspI>AATATT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AATATT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "N", "Q", "R", "V", "X"),
+}
+
+rest_dict["SspJOR1II"] = {
+    "charac": (None, None, None, None, "AGCGANC"),
+    "compsite": "(?=(?P<SspJOR1II>AGCGA.C))|(?=(?P<SspJOR1II_as>G.TCGCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGCGANC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SspMI"] = {
+    "charac": (1, -1, None, None, "CTAG"),
+    "compsite": "(?=(?P<SspMI>CTAG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTAG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["SstE37I"] = {
+    "charac": (27, 18, None, None, "CGAAGAC"),
+    "compsite": "(?=(?P<SstE37I>CGAAGAC))|(?=(?P<SstE37I_as>GTCTTCG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": 18,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGAAGAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SstI"] = {
+    "charac": (5, -5, None, None, "GAGCTC"),
+    "compsite": "(?=(?P<SstI>GAGCTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "AGCT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C",),
+}
+
+rest_dict["Sth132I"] = {
+    "charac": (8, 8, None, None, "CCCG"),
+    "compsite": "(?=(?P<Sth132I>CCCG))|(?=(?P<Sth132I_as>CGGG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 8,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Sth20745III"] = {
+    "charac": (None, None, None, None, "GGACGAC"),
+    "compsite": "(?=(?P<Sth20745III>GGACGAC))|(?=(?P<Sth20745III_as>GTCGTCC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGACGAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Sth302II"] = {
+    "charac": (2, -2, None, None, "CCGG"),
+    "compsite": "(?=(?P<Sth302II>CCGG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCGG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SthSt3II"] = {
+    "charac": (None, None, None, None, "GAAGT"),
+    "compsite": "(?=(?P<SthSt3II>GAAGT))|(?=(?P<SthSt3II_as>ACTTC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAAGT",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["StsI"] = {
+    "charac": (15, 14, None, None, "GGATG"),
+    "compsite": "(?=(?P<StsI>GGATG))|(?=(?P<StsI_as>CATCC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 14,
+    "fst5": 15,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "NNNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGATG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["StuI"] = {
+    "charac": (3, -3, None, None, "AGGCCT"),
+    "compsite": "(?=(?P<StuI>AGGCCT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGGCCT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "J", "K", "M", "N", "Q", "R", "S", "X"),
+}
+
+rest_dict["StyD4I"] = {
+    "charac": (0, 0, None, None, "CCNGG"),
+    "compsite": "(?=(?P<StyD4I>CC.GG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "CCNGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCNGG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["StyI"] = {
+    "charac": (1, -1, None, None, "CCWWGG"),
+    "compsite": "(?=(?P<StyI>CC[AT][AT]GG))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CWWG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCWWGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("C", "J", "N"),
+}
+
+rest_dict["SurP32aII"] = {
+    "charac": (None, None, None, None, "ACRGAG"),
+    "compsite": "(?=(?P<SurP32aII>AC[AG]GAG))|(?=(?P<SurP32aII_as>CTC[CT]GT))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACRGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["SwaI"] = {
+    "charac": (4, -4, None, None, "ATTTAAAT"),
+    "compsite": "(?=(?P<SwaI>ATTTAAAT))",
+    "dna": None,
+    "freq": 65536.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATTTAAAT",
+    "size": 8,
+    "substrat": "DNA",
+    "suppl": ("J", "M", "N", "S"),
+}
+
+rest_dict["TaaI"] = {
+    "charac": (3, -3, None, None, "ACNGT"),
+    "compsite": "(?=(?P<TaaI>AC.GT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACNGT",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["TaiI"] = {
+    "charac": (4, -4, None, None, "ACGT"),
+    "compsite": "(?=(?P<TaiI>ACGT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "ACGT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACGT",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["TaqI"] = {
+    "charac": (1, -1, None, None, "TCGA"),
+    "compsite": "(?=(?P<TaqI>TCGA))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "CG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCGA",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["TaqII"] = {
+    "charac": (17, 9, None, None, "GACCGA"),
+    "compsite": "(?=(?P<TaqII>GACCGA))|(?=(?P<TaqII_as>TCGGTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 9,
+    "fst5": 17,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACCGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("Q", "X"),
+}
+
+rest_dict["TaqIII"] = {
+    "charac": (17, 9, None, None, "CACCCA"),
+    "compsite": "(?=(?P<TaqIII>CACCCA))|(?=(?P<TaqIII_as>TGGGTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": 9,
+    "fst5": 17,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACCCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["TasI"] = {
+    "charac": (0, 0, None, None, "AATT"),
+    "compsite": "(?=(?P<TasI>AATT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "AATT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AATT",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["TatI"] = {
+    "charac": (1, -1, None, None, "WGTACW"),
+    "compsite": "(?=(?P<TatI>[AT]GTAC[AT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GTAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "WGTACW",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["TauI"] = {
+    "charac": (4, -4, None, None, "GCSGC"),
+    "compsite": "(?=(?P<TauI>GC[CG]GC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "CSG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCSGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["TfiI"] = {
+    "charac": (1, -1, None, None, "GAWTC"),
+    "compsite": "(?=(?P<TfiI>GA[AT]TC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "AWT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAWTC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["TpyTP2I"] = {
+    "charac": (None, None, None, None, "ACCAAG"),
+    "compsite": "(?=(?P<TpyTP2I>ACCAAG))|(?=(?P<TpyTP2I_as>CTTGGT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACCAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Tru1I"] = {
+    "charac": (1, -1, None, None, "TTAA"),
+    "compsite": "(?=(?P<Tru1I>TTAA))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTAA",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["Tru9I"] = {
+    "charac": (1, -1, None, None, "TTAA"),
+    "compsite": "(?=(?P<Tru9I>TTAA))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TTAA",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("I", "M", "R", "V"),
+}
+
+rest_dict["TscAI"] = {
+    "charac": (7, -7, None, None, "CASTG"),
+    "compsite": "(?=(?P<TscAI>CA[CG]TG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 10,
+    "ovhgseq": "NNCASTGNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CASTG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["TseFI"] = {
+    "charac": (0, 0, None, None, "GTSAC"),
+    "compsite": "(?=(?P<TseFI>GT[CG]AC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "GTSAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTSAC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["TseI"] = {
+    "charac": (1, -1, None, None, "GCWGC"),
+    "compsite": "(?=(?P<TseI>GC[AT]GC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "CWG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCWGC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["TsoI"] = {
+    "charac": (17, 9, None, None, "TARCCA"),
+    "compsite": "(?=(?P<TsoI>TA[AG]CCA))|(?=(?P<TsoI_as>TGG[CT]TA))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": 9,
+    "fst5": 17,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TARCCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Tsp45I"] = {
+    "charac": (0, 0, None, None, "GTSAC"),
+    "compsite": "(?=(?P<Tsp45I>GT[CG]AC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "GTSAC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTSAC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["Tsp4CI"] = {
+    "charac": (3, -3, None, None, "ACNGT"),
+    "compsite": "(?=(?P<Tsp4CI>AC.GT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACNGT",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["TspARh3I"] = {
+    "charac": (None, None, None, None, "GRACGAC"),
+    "compsite": "(?=(?P<TspARh3I>G[AG]ACGAC))|(?=(?P<TspARh3I_as>GTCGT[CT]C))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GRACGAC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["TspDTI"] = {
+    "charac": (16, 9, None, None, "ATGAA"),
+    "compsite": "(?=(?P<TspDTI>ATGAA))|(?=(?P<TspDTI_as>TTCAT))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 9,
+    "fst5": 16,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATGAA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("Q", "X"),
+}
+
+rest_dict["TspEI"] = {
+    "charac": (0, 0, None, None, "AATT"),
+    "compsite": "(?=(?P<TspEI>AATT))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "AATT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AATT",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["TspGWI"] = {
+    "charac": (16, 9, None, None, "ACGGA"),
+    "compsite": "(?=(?P<TspGWI>ACGGA))|(?=(?P<TspGWI_as>TCCGT))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": 9,
+    "fst5": 16,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ACGGA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("Q", "X"),
+}
+
+rest_dict["TspMI"] = {
+    "charac": (1, -1, None, None, "CCCGGG"),
+    "compsite": "(?=(?P<TspMI>CCCGGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCGGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["TspRI"] = {
+    "charac": (7, -7, None, None, "CASTG"),
+    "compsite": "(?=(?P<TspRI>CA[CG]TG))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 10,
+    "ovhgseq": "NNCASTGNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CASTG",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["TssI"] = {
+    "charac": (None, None, None, None, "GAGNNNCTC"),
+    "compsite": "(?=(?P<TssI>GAG...CTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGNNNCTC",
+    "size": 9,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["TstI"] = {
+    "charac": (-8, -25, 24, 7, "CACNNNNNNTCC"),
+    "compsite": "(?=(?P<TstI>CAC......TCC))|(?=(?P<TstI_as>GGA......GTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -25,
+    "fst5": -8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 5,
+    "ovhgseq": "NNNNN",
+    "results": None,
+    "scd3": 7,
+    "scd5": 24,
+    "site": "CACNNNNNNTCC",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["TsuI"] = {
+    "charac": (None, None, None, None, "GCGAC"),
+    "compsite": "(?=(?P<TsuI>GCGAC))|(?=(?P<TsuI_as>GTCGC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GCGAC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Tth111I"] = {
+    "charac": (4, -4, None, None, "GACNNNGTC"),
+    "compsite": "(?=(?P<Tth111I>GAC...GTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -4,
+    "fst5": 4,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACNNNGTC",
+    "size": 9,
+    "substrat": "DNA",
+    "suppl": ("I", "K", "N", "Q", "V", "X"),
+}
+
+rest_dict["Tth111II"] = {
+    "charac": (17, 9, None, None, "CAARCA"),
+    "compsite": "(?=(?P<Tth111II>CAA[AG]CA))|(?=(?P<Tth111II_as>TG[CT]TTG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": 9,
+    "fst5": 17,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CAARCA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["UbaF11I"] = {
+    "charac": (None, None, None, None, "TCGTA"),
+    "compsite": "(?=(?P<UbaF11I>TCGTA))|(?=(?P<UbaF11I_as>TACGA))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCGTA",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["UbaF12I"] = {
+    "charac": (None, None, None, None, "CTACNNNGTC"),
+    "compsite": "(?=(?P<UbaF12I>CTAC...GTC))|(?=(?P<UbaF12I_as>GAC...GTAG))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTACNNNGTC",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["UbaF13I"] = {
+    "charac": (None, None, None, None, "GAGNNNNNNCTGG"),
+    "compsite": "(?=(?P<UbaF13I>GAG......CTGG))|(?=(?P<UbaF13I_as>CCAG......CTC))",
+    "dna": None,
+    "freq": 16384.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAGNNNNNNCTGG",
+    "size": 13,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["UbaF14I"] = {
+    "charac": (None, None, None, None, "CCANNNNNTCG"),
+    "compsite": "(?=(?P<UbaF14I>CCA.....TCG))|(?=(?P<UbaF14I_as>CGA.....TGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCANNNNNTCG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["UbaF9I"] = {
+    "charac": (None, None, None, None, "TACNNNNNRTGT"),
+    "compsite": "(?=(?P<UbaF9I>TAC.....[AG]TGT))|(?=(?P<UbaF9I_as>ACA[CT].....GTA))",
+    "dna": None,
+    "freq": 8192.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TACNNNNNRTGT",
+    "size": 12,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["UbaPI"] = {
+    "charac": (None, None, None, None, "CGAACG"),
+    "compsite": "(?=(?P<UbaPI>CGAACG))|(?=(?P<UbaPI_as>CGTTCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGAACG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["UcoMSI"] = {
+    "charac": (-7, -11, 11, 7, "GAGCTC"),
+    "compsite": "(?=(?P<UcoMSI>GAGCTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -11,
+    "fst5": -7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": 7,
+    "scd5": 11,
+    "site": "GAGCTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["UnbI"] = {
+    "charac": (0, 0, None, None, "GGNCC"),
+    "compsite": "(?=(?P<UnbI>GG.CC))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "GGNCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGNCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Van9116I"] = {
+    "charac": (None, None, None, None, "CCKAAG"),
+    "compsite": "(?=(?P<Van9116I>CC[GT]AAG))|(?=(?P<Van9116I_as>CTT[AC]GG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCKAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Van91I"] = {
+    "charac": (7, -7, None, None, "CCANNNNNTGG"),
+    "compsite": "(?=(?P<Van91I>CCA.....TGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -7,
+    "fst5": 7,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 3,
+    "ovhgseq": "NNN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCANNNNNTGG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("B", "K"),
+}
+
+rest_dict["VchE4II"] = {
+    "charac": (None, None, None, None, "RTAAAYG"),
+    "compsite": "(?=(?P<VchE4II>[AG]TAAA[CT]G))|(?=(?P<VchE4II_as>C[AG]TTTA[CT]))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RTAAAYG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Vdi96II"] = {
+    "charac": (None, None, None, None, "GNCYTAG"),
+    "compsite": "(?=(?P<Vdi96II>G.C[CT]TAG))|(?=(?P<Vdi96II_as>CTA[AG]G.C))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GNCYTAG",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["Vha464I"] = {
+    "charac": (1, -1, None, None, "CTTAAG"),
+    "compsite": "(?=(?P<Vha464I>CTTAAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TTAA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTTAAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("V",),
+}
+
+rest_dict["VneI"] = {
+    "charac": (1, -1, None, None, "GTGCAC"),
+    "compsite": "(?=(?P<VneI>GTGCAC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTGCAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+rest_dict["VpaK11AI"] = {
+    "charac": (0, 0, None, None, "GGWCC"),
+    "compsite": "(?=(?P<VpaK11AI>GG[AT]CC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": 0,
+    "fst5": 0,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -5,
+    "ovhgseq": "GGWCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGWCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["VpaK11BI"] = {
+    "charac": (1, -1, None, None, "GGWCC"),
+    "compsite": "(?=(?P<VpaK11BI>GG[AT]CC))",
+    "dna": None,
+    "freq": 512.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -3,
+    "ovhgseq": "GWC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GGWCC",
+    "size": 5,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["VspI"] = {
+    "charac": (2, -2, None, None, "ATTAAT"),
+    "compsite": "(?=(?P<VspI>ATTAAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATTAAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "I", "R", "V"),
+}
+
+rest_dict["Vtu19109I"] = {
+    "charac": (None, None, None, None, "CACRAYC"),
+    "compsite": "(?=(?P<Vtu19109I>CAC[AG]A[CT]C))|(?=(?P<Vtu19109I_as>G[AG]T[CT]GTG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACRAYC",
+    "size": 7,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["WviI"] = {
+    "charac": (27, 19, None, None, "CACRAG"),
+    "compsite": "(?=(?P<WviI>CAC[AG]AG))|(?=(?P<WviI_as>CT[CT]GTG))",
+    "dna": None,
+    "freq": 2048.0,
+    "fst3": 19,
+    "fst5": 27,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 2,
+    "ovhgseq": "NN",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CACRAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["XagI"] = {
+    "charac": (5, -5, None, None, "CCTNNNNNAGG"),
+    "compsite": "(?=(?P<XagI>CCT.....AGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTNNNNNAGG",
+    "size": 11,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["XapI"] = {
+    "charac": (1, -1, None, None, "RAATTY"),
+    "compsite": "(?=(?P<XapI>[AG]AATT[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "AATT",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RAATTY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["XbaI"] = {
+    "charac": (1, -1, None, None, "TCTAGA"),
+    "compsite": "(?=(?P<XbaI>TCTAGA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CTAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TCTAGA",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "C", "I", "J", "K", "M", "N", "Q", "R", "S", "V", "X"),
+}
+
+rest_dict["Xca85IV"] = {
+    "charac": (None, None, None, None, "TACGAG"),
+    "compsite": "(?=(?P<Xca85IV>TACGAG))|(?=(?P<Xca85IV_as>CTCGTA))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": None,
+    "fst5": None,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": None,
+    "ovhgseq": None,
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "TACGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["XceI"] = {
+    "charac": (5, -5, None, None, "RCATGY"),
+    "compsite": "(?=(?P<XceI>[AG]CATG[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "CATG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RCATGY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["XcmI"] = {
+    "charac": (8, -8, None, None, "CCANNNNNNNNNTGG"),
+    "compsite": "(?=(?P<XcmI>CCA.........TGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -8,
+    "fst5": 8,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCANNNNNNNNNTGG",
+    "size": 15,
+    "substrat": "DNA",
+    "suppl": ("N",),
+}
+
+rest_dict["XhoI"] = {
+    "charac": (1, -1, None, None, "CTCGAG"),
+    "compsite": "(?=(?P<XhoI>CTCGAG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "TCGA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTCGAG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B", "J", "K", "M", "N", "O", "Q", "R", "S", "X"),
+}
+
+rest_dict["XhoII"] = {
+    "charac": (1, -1, None, None, "RGATCY"),
+    "compsite": "(?=(?P<XhoII>[AG]GATC[CT]))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GATC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "RGATCY",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["XmaI"] = {
+    "charac": (1, -1, None, None, "CCCGGG"),
+    "compsite": "(?=(?P<XmaI>CCCGGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CCGG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCCGGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "N", "R", "V"),
+}
+
+rest_dict["XmaIII"] = {
+    "charac": (1, -1, None, None, "CGGCCG"),
+    "compsite": "(?=(?P<XmaIII>CGGCCG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "GGCC",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CGGCCG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["XmaJI"] = {
+    "charac": (1, -1, None, None, "CCTAGG"),
+    "compsite": "(?=(?P<XmaJI>CCTAGG))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -4,
+    "ovhgseq": "CTAG",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CCTAGG",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["XmiI"] = {
+    "charac": (2, -2, None, None, "GTMKAC"),
+    "compsite": "(?=(?P<XmiI>GT[AC][GT]AC))",
+    "dna": None,
+    "freq": 1024.0,
+    "fst3": -2,
+    "fst5": 2,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "MK",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GTMKAC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("B",),
+}
+
+rest_dict["XmnI"] = {
+    "charac": (5, -5, None, None, "GAANNNNTTC"),
+    "compsite": "(?=(?P<XmnI>GAA....TTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GAANNNNTTC",
+    "size": 10,
+    "substrat": "DNA",
+    "suppl": ("N", "R"),
+}
+
+rest_dict["XspI"] = {
+    "charac": (1, -1, None, None, "CTAG"),
+    "compsite": "(?=(?P<XspI>CTAG))",
+    "dna": None,
+    "freq": 256.0,
+    "fst3": -1,
+    "fst5": 1,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": -2,
+    "ovhgseq": "TA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "CTAG",
+    "size": 4,
+    "substrat": "DNA",
+    "suppl": ("K",),
+}
+
+rest_dict["YkrI"] = {
+    "charac": (11, 9, None, None, "C"),
+    "compsite": "(?=(?P<YkrI>C))|(?=(?P<YkrI_as>G))",
+    "dna": None,
+    "freq": 4.0,
+    "fst3": 9,
+    "fst5": 11,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 1,
+    "ovhgseq": "N",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "C",
+    "size": 1,
+    "substrat": "DNA",
+    "suppl": (),
+}
+
+rest_dict["ZraI"] = {
+    "charac": (3, -3, None, None, "GACGTC"),
+    "compsite": "(?=(?P<ZraI>GACGTC))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "GACGTC",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "N", "V"),
+}
+
+rest_dict["ZrmI"] = {
+    "charac": (3, -3, None, None, "AGTACT"),
+    "compsite": "(?=(?P<ZrmI>AGTACT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -3,
+    "fst5": 3,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 0,
+    "ovhgseq": "",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "AGTACT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I",),
+}
+
+rest_dict["Zsp2I"] = {
+    "charac": (5, -5, None, None, "ATGCAT"),
+    "compsite": "(?=(?P<Zsp2I>ATGCAT))",
+    "dna": None,
+    "freq": 4096.0,
+    "fst3": -5,
+    "fst5": 5,
+    "inact_temp": 65,
+    "opt_temp": 37,
+    "ovhg": 4,
+    "ovhgseq": "TGCA",
+    "results": None,
+    "scd3": None,
+    "scd5": None,
+    "site": "ATGCAT",
+    "size": 6,
+    "substrat": "DNA",
+    "suppl": ("I", "V"),
+}
+
+
+# Turn black code style off
+# fmt: off
+
+suppliers = {}
+
+suppliers["B"] = (
+    "Life Technologies",
+    ["AanI", "AarI", "AasI", "AatII", "AccI", "Acc65I", "AdeI", "AfaI", "AjiI", "AjuI", "AloI", "AluI", "Alw21I", "Alw26I", "Alw44I", "ApaI", "BalI", "BamHI", "BauI", "BclI", "BcnI", "BcuI", "BfmI", "BfoI", "BfuI", "BglI", "BglII", "Bme1390I", "BmeT110I", "BmsI", "BoxI", "BpiI", "BplI", "Bpu10I", "Bpu1102I", "BseDI", "BseGI", "BseJI", "BseLI", "BseMI", "BseMII", "BseNI", "BseSI", "BseXI", "Bsh1236I", "Bsh1285I", "BshNI", "BshTI", "Bsp68I", "Bsp119I", "Bsp120I", "Bsp143I", "Bsp1407I", "BspLI", "BspOI", "BspPI", "BspTI", "Bst1107I", "BstXI", "Bsu15I", "BsuRI", "BveI", "CaiI", "Cfr9I", "Cfr10I", "Cfr13I", "Cfr42I", "ClaI", "CpoI", "CseI", "CsiI", "Csp6I", "DpnI", "DraI", "Eam1104I", "Eam1105I", "Ecl136II", "Eco24I", "Eco31I", "Eco32I", "Eco47I", "Eco47III", "Eco52I", "Eco57I", "Eco72I", "Eco81I", "Eco88I", "Eco91I", "Eco105I", "Eco130I", "Eco147I", "EcoO109I", "EcoRI", "EcoRII", "EcoRV", "EcoT22I", "EheI", "Esp3I", "FaqI", "FokI", "FspAI", "FspBI", "GsuI", "HaeIII", "HapII", "HhaI", "Hin1I", "Hin1II", "Hin6I", "HincII", "HindIII", "HinfI", "HpaI", "HpaII", "HphI", "Hpy8I", "HpyF3I", "HpyF10VI", "KflI", "KpnI", "Kpn2I", "KspAI", "LguI", "Lsp1109I", "LweI", "MauBI", "MbiI", "MboI", "MboII", "MlsI", "MluI", "MnlI", "Mph1103I", "MreI", "MspI", "MssI", "MunI", "MvaI", "Mva1269I", "NcoI", "NdeI", "NheI", "NmuCI", "NotI", "NruI", "NsbI", "OliI", "PacI", "PaeI", "PagI", "PasI", "PauI", "PdiI", "PdmI", "PfeI", "Pfl23II", "PfoI", "Ppu21I", "PscI", "Psp5II", "Psp1406I", "PspFI", "PstI", "PsuI", "PsyI", "PteI", "PvuI", "PvuII", "RruI", "RsaI", "RseI", "SacI", "SacII", "SalI", "SaqAI", "SatI", "ScaI", "SchI", "SdaI", "SduI", "SfaAI", "SfiI", "SgeI", "SgrDI", "SgsI", "SmaI", "SmiI", "SmoI", "SpeI", "SphI", "SsiI", "SspI", "SspDI", "StuI", "TaaI", "TaiI", "TaqI", "TasI", "TatI", "TauI", "Tru1I", "TscAI", "Van91I", "VspI", "XagI", "XapI", "XbaI", "XceI", "XhoI", "XmaJI", "XmiI"],
+)
+
+suppliers["C"] = (
+    "Minotech Biotechnology",
+    ["AluI", "ApaLI", "AsuII", "BamHI", "BclI", "BglI", "BglII", "BseAI", "BseBI", "BseCI", "BshFI", "BsiSI", "BssAI", "BstEII", "CspAI", "EcoRI", "EcoRV", "HindIII", "HinfI", "HpaI", "KpnI", "MboI", "MspCI", "NaeI", "NcoI", "NheI", "NotI", "NruI", "PspPI", "PstI", "PvuII", "RsaI", "SalI", "Sau3AI", "ScaI", "SfiI", "SgrBI", "SlaI", "SmaI", "SnaBI", "SphI", "SseBI", "SspI", "SstI", "StyI", "TaqI", "XbaI"],
+)
+
+suppliers["E"] = (
+    "Agilent Technologies",
+    ["DpnI"],
+)
+
+suppliers["I"] = (
+    "SibEnzyme Ltd.",
+    ["AatII", "AbsI", "Acc16I", "Acc36I", "Acc65I", "AccB1I", "AccB7I", "AccBSI", "AclI", "AclWI", "AcoI", "AcsI", "AcuI", "AfeI", "AgsI", "AhlI", "AjnI", "AluI", "AluBI", "Ama87I", "AoxI", "ApaI", "ArsI", "AsiGI", "AsiSI", "AspA2I", "AspLEI", "AspS9I", "AsuC2I", "AsuHPI", "AsuNHI", "BamHI", "BarI", "Bbv12I", "BglI", "BglII", "BisI", "BlsI", "Bme18I", "BmtI", "BmuI", "BpmI", "Bpu10I", "Bpu14I", "Bsa29I", "Bsc4I", "Bse1I", "Bse8I", "Bse21I", "Bse118I", "Bse3DI", "BsePI", "BseX3I", "BslFI", "Bso31I", "Bsp13I", "Bsp19I", "Bsp1720I", "BspACI", "BspFNI", "BssECI", "BssNAI", "BssT1I", "Bst6I", "BstACI", "BstAFI", "BstAPI", "BstAUI", "Bst2BI", "BstBAI", "Bst4CI", "BstC8I", "BstDEI", "BstDSI", "BstENI", "BstF5I", "BstFNI", "BstH2I", "BstHHI", "BstKTI", "BstMAI", "BstMBI", "BstMCI", "BstMWI", "BstNSI", "BstPAI", "BstSCI", "BstSFI", "BstSLI", "BstSNI", "Bst2UI", "BstV1I", "BstV2I", "BstXI", "BstX2I", "BsuI", "BsuRI", "BtrI", "CciI", "CciNI", "DraI", "DraIII", "DriI", "DseDI", "EcoICRI", "EcoRI", "EcoRV", "EgeI", "ErhI", "FaeI", "FaiI", "FalI", "FatI", "FauI", "FauNDI", "FblI", "FokI", "FriOI", "Fsp4HI", "GlaI", "GluI", "GsaI", "HaeIII", "HgaI", "HindII", "HindIII", "HinfI", "HpaI", "HpaII", "HpySE526I", "HspAI", "KpnI", "KroI", "Ksp22I", "Kzo9I", "LmnI", "MabI", "MalI", "MboII", "MfeI", "MhlI", "MluI", "Mly113I", "MnlI", "Mox20I", "MroNI", "MroXI", "MspI", "MspA1I", "MspR9I", "MteI", "NruI", "PalAI", "PceI", "PciI", "PciSI", "PcsI", "PctI", "PkrI", "Ple19I", "PpsI", "PsiI", "Psp6I", "Psp124BI", "PspCI", "PspEI", "PspLI", "PspN4I", "PspOMI", "PspPPI", "PspXI", "PsrI", "PstI", "PstNI", "PvuII", "RgaI", "RigI", "RsaI", "RsaNI", "Rsr2I", "SalI", "SbfI", "SetI", "SfaNI", "SfiI", "Sfr274I", "Sfr303I", "SmaI", "SmiI", "SmiMI", "SphI", "Sse9I", "SspI", "SspMI", "TaqI", "Tru9I", "TseFI", "Tth111I", "VneI", "VspI", "XbaI", "XmaI", "ZraI", "ZrmI", "Zsp2I"],
+)
+
+suppliers["J"] = (
+    "Nippon Gene Co., Ltd.",
+    ["AccI", "AccII", "AccIII", "AcyI", "AflII", "AgeI", "AluI", "Alw44I", "ApaI", "AseI", "AvaI", "AvaII", "AxyI", "BalI", "BamHI", "BclI", "BglI", "BglII", "BsmI", "Bsp1286I", "BssHII", "BstEII", "BstXI", "DraI", "EcoO109I", "EcoRI", "EcoRII", "EcoRV", "EcoT38I", "FokI", "FspI", "HaeII", "HaeIII", "HhaI", "HincII", "HindIII", "HinfI", "HpaI", "KpnI", "MboII", "MluI", "MspI", "NarI", "NciI", "NcoI", "NdeI", "NdeII", "NheI", "NotI", "NruI", "NsiI", "NspV", "PstI", "PvuII", "RsaI", "SacI", "SacII", "SalI", "Sau96I", "Sau3AI", "ScaI", "ScrFI", "SfiI", "SmaI", "SpeI", "SphI", "SspI", "StuI", "StyI", "SwaI", "TaqI", "XbaI", "XhoI"],
+)
+
+suppliers["K"] = (
+    "Takara Bio Inc.",
+    ["AatII", "AccI", "AccII", "AccIII", "AfaI", "AflII", "AluI", "Aor13HI", "Aor51HI", "ApaI", "ApaLI", "BalI", "BamHI", "BanII", "BciT130I", "BcnI", "BglI", "BglII", "BlnI", "BmeT110I", "BmgT120I", "Bpu1102I", "Bsp1286I", "Bsp1407I", "BspT104I", "BspT107I", "BssHII", "Bst1107I", "BstPI", "BstXI", "Cfr10I", "ClaI", "CpoI", "DdeI", "DpnI", "DraI", "EaeI", "Eco52I", "Eco81I", "EcoO65I", "EcoO109I", "EcoRI", "EcoRV", "EcoT14I", "EcoT22I", "FbaI", "FokI", "HaeII", "HaeIII", "HapII", "HhaI", "Hin1I", "HincII", "HindIII", "HinfI", "HpaI", "KpnI", "MboI", "MboII", "MflI", "MluI", "MspI", "MunI", "NaeI", "NcoI", "NdeI", "NheI", "NotI", "NruI", "PmaCI", "PshAI", "PshBI", "Psp1406I", "PstI", "PvuI", "PvuII", "SacI", "SacII", "SalI", "Sau3AI", "ScaI", "SfiI", "SmaI", "SmiI", "SnaBI", "SpeI", "SphI", "Sse8387I", "SspI", "StuI", "TaqI", "Tth111I", "Van91I", "VpaK11BI", "XbaI", "XhoI", "XspI"],
+)
+
+suppliers["M"] = (
+    "Roche Applied Science",
+    ["AatII", "AccI", "AflIII", "AluI", "ApaI", "Asp700I", "Asp718I", "BamHI", "BbrPI", "BclI", "BfrI", "BglII", "BlnI", "BsmI", "BssHII", "BstXI", "CfoI", "ClaI", "DdeI", "DpnI", "DraI", "DraIII", "Eco47III", "EcoRI", "EcoRV", "FokI", "HaeIII", "HindII", "HindIII", "HinfI", "HpaI", "KpnI", "KspI", "MaeI", "MaeII", "MaeIII", "MluI", "MluNI", "MroI", "MunI", "MvaI", "MvnI", "NarI", "NcoI", "NdeI", "NdeII", "NheI", "NotI", "NruI", "NsiI", "PstI", "PvuI", "PvuII", "RsaI", "SacI", "SalI", "Sau3AI", "ScaI", "SexAI", "SfiI", "SfuI", "SmaI", "SnaBI", "SpeI", "SphI", "StuI", "SwaI", "TaqI", "Tru9I", "XbaI", "XhoI"],
+)
+
+suppliers["N"] = (
+    "New England Biolabs",
+    ["AatII", "AbaSI", "AccI", "Acc65I", "AciI", "AclI", "AcuI", "AfeI", "AflII", "AflIII", "AgeI", "AhdI", "AleI", "AluI", "AlwI", "AlwNI", "ApaI", "ApaLI", "ApeKI", "ApoI", "AscI", "AseI", "AsiSI", "AvaI", "AvaII", "AvrII", "BaeI", "BaeGI", "BamHI", "BanI", "BanII", "BbsI", "BbvI", "BbvCI", "BccI", "BceAI", "BcgI", "BciVI", "BclI", "BcoDI", "BfaI", "BfuAI", "BglI", "BglII", "BlpI", "BmgBI", "BmrI", "BmtI", "BpmI", "Bpu10I", "BpuEI", "BsaI", "BsaAI", "BsaBI", "BsaHI", "BsaJI", "BsaWI", "BsaXI", "BseRI", "BseYI", "BsgI", "BsiEI", "BsiHKAI", "BsiWI", "BslI", "BsmI", "BsmAI", "BsmBI", "BsmFI", "BsoBI", "Bsp1286I", "BspCNI", "BspDI", "BspEI", "BspHI", "BspMI", "BspQI", "BsrI", "BsrBI", "BsrDI", "BsrFI", "BsrGI", "BssHII", "BssSI", "BstAPI", "BstBI", "BstEII", "BstNI", "BstUI", "BstXI", "BstYI", "BstZ17I", "Bsu36I", "BtgI", "BtgZI", "BtsI", "BtsIMutI", "BtsCI", "Cac8I", "ClaI", "CspCI", "CviAII", "CviKI_1", "CviQI", "DdeI", "DpnI", "DpnII", "DraI", "DraIII", "DrdI", "EaeI", "EagI", "EarI", "EciI", "EcoNI", "EcoO109I", "EcoRI", "EcoRV", "Eco53kI", "Esp3I", "FatI", "FauI", "Fnu4HI", "FokI", "FseI", "FspI", "FspEI", "HaeII", "HaeIII", "HgaI", "HhaI", "HinP1I", "HincII", "HindIII", "HinfI", "HpaI", "HpaII", "HphI", "Hpy99I", "Hpy166II", "Hpy188I", "Hpy188III", "HpyAV", "HpyCH4III", "HpyCH4IV", "HpyCH4V", "KasI", "KpnI", "LpnPI", "MboI", "MboII", "MfeI", "MluI", "MluCI", "MlyI", "MmeI", "MnlI", "MscI", "MseI", "MslI", "MspI", "MspA1I", "MspJI", "MwoI", "NaeI", "NarI", "NciI", "NcoI", "NdeI", "NgoMIV", "NheI", "NlaIII", "NlaIV", "NmeAIII", "NotI", "NruI", "NsiI", "NspI", "PacI", "PaeR7I", "PciI", "PflFI", "PflMI", "PleI", "PluTI", "PmeI", "PmlI", "PpuMI", "PshAI", "PsiI", "PspGI", "PspOMI", "PspXI", "PstI", "PvuI", "PvuII", "RsaI", "RsrII", "SacI", "SacII", "SalI", "SapI", "Sau96I", "Sau3AI", "SbfI", "ScaI", "ScrFI", "SexAI", "SfaNI", "SfcI", "SfiI", "SfoI", "SgrAI", "SmaI", "SmlI", "SnaBI", "SpeI", "SphI", "SrfI", "SspI", "StuI", "StyI", "StyD4I", "SwaI", "TaqI", "TfiI", "TseI", "Tsp45I", "TspMI", "TspRI", "Tth111I", "XbaI", "XcmI", "XhoI", "XmaI", "XmnI", "ZraI"],
+)
+
+suppliers["O"] = (
+    "Toyobo Biochemicals",
+    ["AluI", "BamHI", "BclI", "BglI", "BglII", "DdeI", "DpnI", "EcoRI", "EcoRV", "HaeIII", "HincII", "HindIII", "HinfI", "KpnI", "MluI", "MroI", "MscI", "NcoI", "NheI", "NotI", "PacI", "PstI", "PvuI", "PvuII", "SacI", "SacII", "SalI", "ScaI", "SfiI", "SmaI", "SpeI", "SphI", "XhoI"],
+)
+
+suppliers["Q"] = (
+    "Molecular Biology Resources - CHIMERx",
+    ["AccI", "AcvI", "AluI", "ApaI", "AvaI", "BalI", "BamHI", "BglI", "BglII", "BsiHKCI", "BssHII", "BstXI", "ClaI", "CviJI", "DdeI", "DpnI", "DraI", "EcoRI", "EcoRV", "HaeIII", "HhaI", "HincII", "HindIII", "HinfI", "HpaI", "HpaII", "KpnI", "MboI", "MboII", "MluI", "MnlI", "MspI", "NarI", "NcoI", "NdeI", "NheI", "NotI", "NruI", "NsiI", "PinAI", "PstI", "PvuI", "PvuII", "RsaI", "RsrII", "SacI", "SacII", "SalI", "ScaI", "SfiI", "SmaI", "SpeI", "SphI", "SspI", "StuI", "TaqI", "TaqII", "TspDTI", "TspGWI", "Tth111I", "XbaI", "XhoI"],
+)
+
+suppliers["R"] = (
+    "Promega Corporation",
+    ["AccI", "AccIII", "AgeI", "AluI", "ApaI", "AvaII", "BalI", "BamHI", "BanI", "BclI", "BglI", "BglII", "BssHII", "BstEII", "BstXI", "BstZI", "CfoI", "ClaI", "CspI", "DdeI", "DpnI", "DraI", "Eco47III", "EcoICRI", "EcoRI", "EcoRV", "HaeII", "HaeIII", "HhaI", "HincII", "HindIII", "HinfI", "HpaI", "HpaII", "Hsp92I", "Hsp92II", "KpnI", "MboI", "MboII", "MluI", "MspI", "MspA1I", "NarI", "NciI", "NcoI", "NdeI", "NheI", "NotI", "NruI", "NsiI", "PstI", "PvuI", "PvuII", "RsaI", "SacI", "SacII", "SalI", "Sau3AI", "ScaI", "SfiI", "SgfI", "SmaI", "SnaBI", "SpeI", "SphI", "SspI", "StuI", "TaqI", "Tru9I", "VspI", "XbaI", "XhoI", "XmaI", "XmnI"],
+)
+
+suppliers["S"] = (
+    "Sigma Chemical Corporation",
+    ["AflIII", "AluI", "ApaI", "Asp718I", "BamHI", "BclI", "BfrI", "BglII", "BlnI", "BsmI", "CfoI", "ClaI", "DdeI", "DpnI", "DraI", "EclXI", "Eco47III", "EcoRI", "EcoRV", "HaeIII", "HindII", "HindIII", "HpaI", "KpnI", "KspI", "MaeIII", "MluI", "MunI", "NcoI", "NdeI", "NheI", "NotI", "NruI", "NsiI", "PstI", "PvuI", "PvuII", "RsaI", "SacI", "SalI", "ScaI", "SfiI", "SfuI", "SmaI", "SpeI", "SphI", "StuI", "SwaI", "TaqI", "XbaI", "XhoI"],
+)
+
+suppliers["V"] = (
+    "Vivantis Technologies",
+    ["AatII", "Acc16I", "Acc65I", "AccB1I", "AccB7I", "AccBSI", "AclI", "AcsI", "AfiI", "AhlI", "AluI", "Ama87I", "ApaI", "AsiGI", "AspA2I", "AspLEI", "AspS9I", "AsuHPI", "AsuNHI", "BamHI", "Bbv12I", "BglI", "BglII", "BmcAI", "Bme18I", "BmeRI", "BmiI", "BmrFI", "BmtI", "Bpu10I", "Bpu14I", "BpuMI", "Bse1I", "Bse8I", "Bse21I", "Bse118I", "Bse3DI", "BsePI", "BseX3I", "BshVI", "BsnI", "Bso31I", "Bsp13I", "Bsp19I", "Bsp1720I", "BssMI", "BssNI", "BssNAI", "BssT1I", "Bst6I", "BstAUI", "BstBAI", "Bst4CI", "BstDEI", "BstDSI", "BstENI", "BstF5I", "BstFNI", "BstH2I", "BstHHI", "BstMAI", "BstMBI", "BstMCI", "BstNSI", "BstPAI", "BstSNI", "Bst2UI", "BstV2I", "BstXI", "BstX2I", "BtuMI", "CciNI", "DinI", "DraI", "DraIII", "DseDI", "EcoICRI", "EcoRI", "EcoRV", "FauNDI", "FblI", "FokI", "FriOI", "HindII", "HindIII", "HinfI", "HpaI", "HpaII", "HspAI", "KpnI", "Ksp22I", "MboII", "MhlI", "MluI", "MnlI", "MroNI", "MroXI", "MspI", "Msp20I", "MspA1I", "PceI", "PctI", "Psp124BI", "PspCI", "PspEI", "PspOMI", "PstI", "PvuII", "RsaI", "Rsr2I", "SalI", "SbfI", "SfaNI", "SfiI", "Sfr274I", "Sfr303I", "SmaI", "SmiI", "SmiMI", "SphI", "Sse9I", "SspI", "TaqI", "Tru9I", "Tth111I", "Vha464I", "VneI", "VspI", "XbaI", "XmaI", "ZraI", "Zsp2I"],
+)
+
+suppliers["X"] = (
+    "EURx Ltd.",
+    ["AccI", "AcvI", "AluI", "ApaI", "AvaI", "AvaII", "BalI", "BamHI", "BanII", "BglI", "BglII", "BsiHKCI", "BspANI", "BspMAI", "BspTNI", "BssHII", "BstXI", "BsuTUI", "ClaI", "CviJI", "DdeI", "DpnI", "DraI", "EcoRI", "EcoRV", "FokI", "HaeIII", "HhaI", "HincII", "HindIII", "HinfI", "HpaI", "HpaII", "KpnI", "MboI", "MboII", "MluI", "MmeI", "MnlI", "MspI", "NarI", "NcoI", "NdeI", "NheI", "NotI", "NruI", "NsiI", "PinAI", "PstI", "PvuI", "PvuII", "RsaI", "RsrII", "SacI", "SacII", "SalI", "Sau3AI", "ScaI", "SfiI", "SinI", "SmaI", "SpeI", "SphI", "SspI", "StuI", "TaqI", "TaqII", "TspDTI", "TspGWI", "Tth111I", "XbaI", "XhoI"],
+)
+
+suppliers["Y"] = (
+    "SinaClon BioScience Co.",
+    ["AluI", "BamHI", "BsiSI", "BstXI", "EcoRI", "FokI", "HindIII", "HinfI", "KpnI", "MboI", "NcoI", "RsaI", "SlaI", "SmaI"],
+)
+
+
+typedict = {}
+
+typedict["type130"] = (
+    ("Palindromic", "TwoCuts", "Ov5", "Ambiguous", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["NmeDI"],
+)
+
+typedict["type132"] = (
+    ("Palindromic", "TwoCuts", "Ov5", "Ambiguous", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["UcoMSI"],
+)
+
+typedict["type142"] = (
+    ("Palindromic", "TwoCuts", "Ov3", "Ambiguous", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["RdeGBIII"],
+)
+
+typedict["type143"] = (
+    ("Palindromic", "TwoCuts", "Ov3", "Ambiguous", "Meth_Undep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["BplI", "FalI"],
+)
+
+typedict["type144"] = (
+    ("Palindromic", "TwoCuts", "Ov3", "Ambiguous", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["AlfI", "BdaI"],
+)
+
+typedict["type146"] = (
+    ("NonPalindromic", "NoCut", "Unknown", "NotDefined", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["Aba6411II", "AbaB8342IV", "AbaCIII", "Abr4036II", "Acc65V", "AchA6III", "Aco12261II", "AcoY31II", "Adh6U21I", "AhyRBAHI", "AhyYL17I", "Aod1I", "Asp103I", "Asp337I", "AspAMDIV", "AspDUT2V", "AspJHL3II", "AspNIH4III", "AspSLV7III", "Asp114pII", "Asu14238IV", "AteTI", "Awo1030IV", "Bag18758I", "BanLI", "Bau1417V", "Bbr52II", "Bbr57III", "Bbr7017II", "Bbr7017III", "BbuB31I", "BbuB31II", "Bce3081I", "BfaSII", "Bga514I", "BkrAM31DI", "Ble402II", "BloAII", "BscGI", "Bsp460III", "Bsp3004IV", "BspNCI", "Bve1B23I", "Cal14237I", "CalB3II", "Cau10061II", "Cba13II", "Cba16038I", "Cbo67071IV", "Cch467III", "Cco14983V", "Cco14983VI", "CcrNAIII", "Cdi11397I", "Cdu23823II", "CfrMH13II", "CfrMH16VI", "Cfupf3II", "Cgl13032I", "Cgl13032II", "Cje265V", "Cje54107III", "CjeFIII", "CjeFV", "CjeNII", "CjeNV", "Cla11845III", "Cly7489II", "Cma23826I", "Csp2014I", "DrdII", "DvuIII", "Ecl234I", "Ecl35734I", "Eco4465II", "Eco43896II", "EcoE1140I", "EcoHSI", "EcoMVII", "EcoNIH6II", "Eli8509II", "EsaSSI", "Esp3007I", "Fba202Z8II", "Fco1691IV", "FspPK15I", "FtnUV", "GauT27I", "Gba708II", "HbaII", "HdeNY26I", "HdeZA17I", "Hpy99XIII", "Hpy99XIV", "Hpy99XIV_mut1", "Hpy99XXII", "Hpy300XI", "HpyAXIV", "HpyAXVI_mut1", "HpyAXVI_mut2", "HpyAXVIII", "HpyUM032XIII_mut1", "HpyUM032XIV", "HpyUM037X", "Hso63373III", "HspMHR1II", "Jma19592I", "Jma19592II", "Jsp2502II", "Kor51II", "Kpn156V", "Kpn327I", "KpnNH25III", "KpnNIH30III", "KpnNIH50I", "Lba2029III", "Lbr124II", "Lde4408II", "LlaG50I", "Lmo370I", "Lmo911II", "Lpl1004II", "Lra68I", "LsaDS4I", "Lsp48III", "Lsp6406VI", "Mba11I", "Mcr10I", "MkaDII", "Mlu211III", "MspI7II", "MspI7IV", "MspSC27II", "MtuHN878II", "Nal45188II", "Nbr128II", "NhaXI", "NpeUS61II", "ObaBS10I", "OgrI", "OspHL35III", "PacIII", "Pac19842II", "PaePA99III", "Pal408I", "Pba2294I", "PcaII", "Pcr308II", "Pdi8503III", "Pdu1735I", "PflPt14I", "PfrJS12IV", "PfrJS12V", "PfrJS15III", "Pin17FIII", "PinP23II", "PinP59III", "PliMI", "PpiP13II", "Pse18267I", "Psp0357II", "Pst145I", "Pst273I", "Pst14472I", "Rba2021I", "RdeGBI", "RflFIII", "Rmu369III", "RpaTI", "Rsp008IV", "Rsp008V", "Rsp531II", "RspPBTS2III", "Rtr1953I", "Saf8902III", "Sag901I", "Sau64037IV", "Sba460II", "Sbo46I", "ScoDS2II", "Sen17963III", "SenA1673III", "SenSARA26III", "SenTFIV", "SgrAII", "SmaUMH5I", "SmaUMH8I", "Sna507VIII", "Sno506I", "Spe19205IV", "SpnRII", "SpoDI", "Ssp714II", "Ssp6803IV", "SspJOR1II", "Sth20745III", "SthSt3II", "SurP32aII", "TpyTP2I", "TspARh3I", "Van9116I", "VchE4II", "Vdi96II", "Vtu19109I", "Xca85IV"],
+)
+
+typedict["type148"] = (
+    ("NonPalindromic", "NoCut", "Unknown", "NotDefined", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["AbaUMB2I", "AlwFI", "BmgI", "BspGI", "CjeP659IV", "CjuII", "FinI", "PenI", "Pfl1108I", "PsuGI", "RlaI", "TsuI", "UbaF9I", "UbaF11I", "UbaF12I", "UbaF13I", "UbaF14I", "UbaPI"],
+)
+
+typedict["type2"] = (
+    ("Palindromic", "NoCut", "Unknown", "NotDefined", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["AvaIII", "HpyUM032XIII", "Hso63250IV", "MjaIV"],
+)
+
+typedict["type209"] = (
+    ("NonPalindromic", "OneCut", "Blunt", "Defined", "Meth_Dep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["BsrBI", "MlyI"],
+)
+
+typedict["type210"] = (
+    ("NonPalindromic", "OneCut", "Blunt", "Defined", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["NgoAVII"],
+)
+
+typedict["type211"] = (
+    ("NonPalindromic", "OneCut", "Blunt", "Defined", "Meth_Undep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AccBSI", "AjiI", "BmgBI", "BtrI", "MbiI", "SchI"],
+)
+
+typedict["type212"] = (
+    ("NonPalindromic", "OneCut", "Blunt", "Defined", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["CdiI", "SspD5I"],
+)
+
+typedict["type221"] = (
+    ("NonPalindromic", "OneCut", "Ov5", "Defined", "Meth_Dep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AciI", "BbvCI", "BspACI", "BssSI"],
+)
+
+typedict["type223"] = (
+    ("NonPalindromic", "OneCut", "Ov5", "Defined", "Meth_Undep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["BauI", "BseYI", "Bst2BI", "PspFI", "SsiI"],
+)
+
+typedict["type224"] = (
+    ("NonPalindromic", "OneCut", "Ov5", "Defined", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["BsiI", "GdiII", "SimI"],
+)
+
+typedict["type225"] = (
+    ("NonPalindromic", "OneCut", "Ov5", "Ambiguous", "Meth_Dep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AlwI", "Alw26I", "BbvI", "BccI", "BceAI", "BcoDI", "BfuAI", "Bpu10I", "BsaI", "BsmAI", "BsmBI", "BsmFI", "BspMI", "BtgZI", "EarI", "Eco31I", "Esp3I", "FokI", "HgaI", "Lsp1109I", "PleI", "SapI"],
+)
+
+typedict["type226"] = (
+    ("NonPalindromic", "OneCut", "Ov5", "Ambiguous", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["BscAI", "StsI"],
+)
+
+typedict["type227"] = (
+    ("NonPalindromic", "OneCut", "Ov5", "Ambiguous", "Meth_Undep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AarI", "Acc36I", "AclWI", "BbsI", "BmsI", "BpiI", "BseXI", "BslFI", "Bso31I", "BspPI", "BspQI", "BspTNI", "Bst6I", "BstMAI", "BstV1I", "BstV2I", "BveI", "CseI", "Eam1104I", "FaqI", "FauI", "FspEI", "LguI", "LpnPI", "LweI", "MspJI", "PciSI", "PpsI", "SfaNI"],
+)
+
+typedict["type228"] = (
+    ("NonPalindromic", "OneCut", "Ov5", "Ambiguous", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["AceIII", "AspBHI", "BbvII", "BcefI", "BinI", "BspD6I", "EcoBLMcrX", "Ksp632I", "SgrTI", "Sth132I"],
+)
+
+typedict["type235"] = (
+    ("NonPalindromic", "OneCut", "Ov3", "Defined", "Meth_Undep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["GsaI"],
+)
+
+typedict["type237"] = (
+    ("NonPalindromic", "OneCut", "Ov3", "Ambiguous", "Meth_Dep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AcuI", "BmrI", "BpmI", "BpuEI", "BseMII", "BseRI", "BsgI", "BspCNI", "BsrI", "BstF5I", "Eco57I", "HphI", "HpyAV", "MboII", "MmeI", "MnlI", "NmeAIII", "TaqII", "TspGWI"],
+)
+
+typedict["type238"] = (
+    ("NonPalindromic", "OneCut", "Ov3", "Ambiguous", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["AmaCSI", "ApyPI", "AquII", "AquIII", "AquIV", "BfiI", "BsbI", "CchII", "CchIII", "CdpI", "CjeNIII", "CstMI", "DraRI", "DrdIV", "MaqI", "NlaCI", "NmeA6CIII", "PlaDI", "PspOMII", "PspPRI", "RceI", "RdeGBII", "RlaII", "RpaI", "RpaBI", "RpaB5I", "SdeAI", "SstE37I", "TaqIII", "TsoI", "Tth111II", "WviI"],
+)
+
+typedict["type239"] = (
+    ("NonPalindromic", "OneCut", "Ov3", "Ambiguous", "Meth_Undep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AbaSI", "AsuHPI", "BciVI", "BfuI", "BmuI", "Bse1I", "Bse3DI", "BseGI", "BseMI", "BseNI", "BsmI", "BsrDI", "BsuI", "BtsI", "BtsIMutI", "BtsCI", "EciI", "GsuI", "LmnI", "Mva1269I", "PctI", "TspDTI"],
+)
+
+typedict["type240"] = (
+    ("NonPalindromic", "OneCut", "Ov3", "Ambiguous", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["Bce83I", "BmeDI", "Eco57MI", "Hin4II", "RleAI", "YkrI"],
+)
+
+typedict["type274"] = (
+    ("NonPalindromic", "TwoCuts", "Ov5", "Ambiguous", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["BceSIV"],
+)
+
+typedict["type285"] = (
+    ("NonPalindromic", "TwoCuts", "Ov3", "Ambiguous", "Meth_Dep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AloI", "BcgI", "CspCI"],
+)
+
+typedict["type286"] = (
+    ("NonPalindromic", "TwoCuts", "Ov3", "Ambiguous", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["CjeI", "NgoAVIII", "PpiI", "SdeOSI", "TstI"],
+)
+
+typedict["type287"] = (
+    ("NonPalindromic", "TwoCuts", "Ov3", "Ambiguous", "Meth_Undep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AjuI", "ArsI", "BaeI", "BarI", "BsaXI", "PsrI"],
+)
+
+typedict["type288"] = (
+    ("NonPalindromic", "TwoCuts", "Ov3", "Ambiguous", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["Bsp24I", "CjePI", "Hin4I"],
+)
+
+typedict["type4"] = (
+    ("Palindromic", "NoCut", "Unknown", "NotDefined", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["CjuI", "Dde51507I", "HgiEII", "NhoI", "SnaI", "TssI"],
+)
+
+typedict["type65"] = (
+    ("Palindromic", "OneCut", "Blunt", "Defined", "Meth_Dep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AleI", "AluI", "AluBI", "BalI", "BsaAI", "BstC8I", "BstUI", "BsuRI", "Cac8I", "CviJI", "DraI", "EcoRV", "FspI", "HaeIII", "HincII", "HindII", "HpaI", "Hpy8I", "MspA1I", "NaeI", "NlaIV", "NruI", "PmlI", "PshAI", "PsiI", "PvuII", "RsaI", "ScaI", "SfoI", "SmaI", "SnaBI", "SspI", "SwaI", "XmnI"],
+)
+
+typedict["type66"] = (
+    ("Palindromic", "OneCut", "Blunt", "Defined", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["CviRI", "EsaBC3I", "FnuDII"],
+)
+
+typedict["type67"] = (
+    ("Palindromic", "OneCut", "Blunt", "Defined", "Meth_Undep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AanI", "AccII", "Acc16I", "AcvI", "AfaI", "AfeI", "Aor51HI", "Asp700I", "BbrPI", "BmcAI", "BmiI", "BoxI", "BsaBI", "Bse8I", "BseJI", "Bsh1236I", "BshFI", "BsnI", "Bsp68I", "BspANI", "BspFNI", "BspLI", "BssNAI", "Bst1107I", "BstBAI", "BstFNI", "BstPAI", "BstSNI", "BstZ17I", "BtuMI", "CviKI_1", "DinI", "DpnI", "Ecl136II", "Eco32I", "Eco47III", "Eco72I", "Eco105I", "Eco147I", "EcoICRI", "Eco53kI", "EgeI", "EheI", "FaiI", "FspAI", "GlaI", "Hpy166II", "HpyCH4V", "KspAI", "MalI", "MlsI", "MluNI", "Mox20I", "MroXI", "MscI", "MslI", "Msp20I", "MssI", "MvnI", "NsbI", "OliI", "PceI", "PdiI", "PdmI", "PmaCI", "PmeI", "Ppu21I", "PspCI", "PspN4I", "RruI", "RseI", "SmiI", "SmiMI", "SrfI", "SseBI", "StuI", "ZraI", "ZrmI"],
+)
+
+typedict["type68"] = (
+    ("Palindromic", "OneCut", "Blunt", "Defined", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["AhaIII", "HaeI", "LpnI", "MstI", "NspBII", "Pfl8569I", "SciI", "Sth302II"],
+)
+
+typedict["type77"] = (
+    ("Palindromic", "OneCut", "Ov5", "Defined", "Meth_Dep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AccIII", "Acc65I", "AclI", "AflII", "AgeI", "ApaLI", "ApoI", "AscI", "AseI", "BamHI", "BclI", "BglII", "BsaHI", "BsaWI", "BseCI", "BsiWI", "BspHI", "BsrFI", "BssHII", "BstYI", "Bsu15I", "Cfr9I", "Cfr10I", "ClaI", "CviAII", "CviQI", "DpnII", "EaeI", "EagI", "EcoRI", "FatI", "HapII", "HinP1I", "HindIII", "HpaII", "HpyCH4IV", "HspAI", "KasI", "Kpn2I", "MboI", "MfeI", "MluI", "MseI", "MspI", "MunI", "NcoI", "NdeI", "NgoMIV", "NheI", "NotI", "PaeR7I", "SalI", "Sau3AI", "SgrAI", "Sse9I", "TaqI", "TspMI", "VspI", "XbaI", "XhoI", "XmaI"],
+)
+
+typedict["type78"] = (
+    ("Palindromic", "OneCut", "Ov5", "Defined", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["BspMII", "CfrI", "XhoII", "XmaIII"],
+)
+
+typedict["type79"] = (
+    ("Palindromic", "OneCut", "Ov5", "Defined", "Meth_Undep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AbsI", "AcoI", "AcsI", "AcyI", "AhlI", "Alw44I", "Aor13HI", "AoxI", "AsiGI", "Asp718I", "AspA2I", "AsuII", "AsuNHI", "AvrII", "BcuI", "BfaI", "BfrI", "BlnI", "Bpu14I", "Bsa29I", "Bse118I", "BseAI", "BsePI", "BseX3I", "BshTI", "BshVI", "BsiSI", "Bsp13I", "Bsp19I", "Bsp119I", "Bsp120I", "Bsp143I", "Bsp1407I", "BspDI", "BspEI", "BspTI", "BspT104I", "BsrGI", "BssAI", "BssMI", "BssNI", "BstACI", "BstAFI", "BstAUI", "BstBI", "BstMBI", "BstX2I", "BstZI", "BsuTUI", "CciI", "CciNI", "Csp6I", "CspAI", "EclXI", "Eco52I", "FauNDI", "FbaI", "FspBI", "Hin1I", "Hin6I", "HpySE526I", "Hsp92I", "KroI", "Ksp22I", "Kzo9I", "MaeI", "MaeII", "MauBI", "MflI", "MluCI", "Mly113I", "MreI", "MroI", "MroNI", "MspCI", "NarI", "NdeII", "NspV", "PagI", "PalAI", "PauI", "PciI", "Pfl23II", "PinAI", "PscI", "PshBI", "Psp1406I", "PspLI", "PspOMI", "PspXI", "PsuI", "PteI", "RsaNI", "SaqAI", "Sfr274I", "SfuI", "SgrDI", "SgsI", "SlaI", "SpeI", "SspDI", "SspMI", "TasI", "TatI", "Tru1I", "Tru9I", "Vha464I", "VneI", "XapI", "XmaJI", "XspI"],
+)
+
+typedict["type80"] = (
+    ("Palindromic", "OneCut", "Ov5", "Defined", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["Asi256I", "BetI", "BspLU11I", "Ppu10I", "SelI", "SplI", "Sse232I", "TspEI"],
+)
+
+typedict["type81"] = (
+    ("Palindromic", "OneCut", "Ov5", "Ambiguous", "Meth_Dep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AccI", "AflIII", "AjnI", "ApeKI", "AspS9I", "AvaI", "AvaII", "BanI", "BcnI", "BlpI", "BsaJI", "BsoBI", "BssECI", "BstNI", "Bsu36I", "Cfr13I", "DdeI", "EcoNI", "EcoO109I", "EcoRII", "Fnu4HI", "Fsp4HI", "HinfI", "MvaI", "NciI", "PpuMI", "PspGI", "PspPI", "RsrII", "Sau96I", "ScrFI", "SexAI", "SinI", "SmlI", "StyI", "StyD4I", "TfiI", "TseI", "Tsp45I", "Tth111I"],
+)
+
+typedict["type82"] = (
+    ("Palindromic", "OneCut", "Ov5", "Ambiguous", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["EcoHI", "HgiCI"],
+)
+
+typedict["type83"] = (
+    ("Palindromic", "OneCut", "Ov5", "Ambiguous", "Meth_Undep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AccB1I", "Ama87I", "AsuC2I", "AxyI", "BciT130I", "BfmI", "BisI", "Bme18I", "Bme1390I", "BmeT110I", "BmgT120I", "BmrFI", "Bpu1102I", "BpuMI", "Bse21I", "BseBI", "BseDI", "BshNI", "BsiHKCI", "Bsp1720I", "BspT107I", "BssT1I", "BstDEI", "BstDSI", "BstEII", "BstENI", "BstPI", "BstSCI", "BstSFI", "Bst2UI", "BtgI", "CpoI", "CsiI", "CspI", "Eco47I", "Eco81I", "Eco88I", "Eco91I", "Eco130I", "EcoO65I", "EcoT14I", "ErhI", "FblI", "GluI", "Hpy188III", "HpyF3I", "KflI", "MabI", "MaeIII", "MspR9I", "MteI", "NmuCI", "PasI", "PfeI", "PflFI", "PfoI", "Psp5II", "Psp6I", "PspEI", "PspPPI", "PsyI", "Rsr2I", "SatI", "SfcI", "SgeI", "SmoI", "TseFI", "VpaK11BI", "XagI", "XmiI"],
+)
+
+typedict["type84"] = (
+    ("Palindromic", "OneCut", "Ov5", "Ambiguous", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["AsuI", "CauII", "DraII", "DsaI", "EspI", "Hpy178III", "SanDI", "SauI", "SecI", "SfeI", "Sse8647I", "UnbI", "VpaK11AI"],
+)
+
+typedict["type89"] = (
+    ("Palindromic", "OneCut", "Ov3", "Defined", "Meth_Dep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AatII", "ApaI", "AsiSI", "Cfr42I", "FseI", "HaeII", "HhaI", "KpnI", "NlaIII", "NsiI", "NspI", "PluTI", "PstI", "SacI", "SacII"],
+)
+
+typedict["type90"] = (
+    ("Palindromic", "OneCut", "Ov3", "Defined", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["McaTI", "PabI"],
+)
+
+typedict["type91"] = (
+    ("Palindromic", "OneCut", "Ov3", "Defined", "Meth_Undep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AspLEI", "BfoI", "BmtI", "BspMAI", "BspOI", "BstH2I", "BstHHI", "BstKTI", "BstNSI", "CfoI", "EcoT22I", "FaeI", "Hin1II", "Hsp92II", "KspI", "Mph1103I", "PacI", "PaeI", "Ple19I", "Psp124BI", "PvuI", "RgaI", "RigI", "SbfI", "SdaI", "SfaAI", "Sfr303I", "SgfI", "SgrBI", "SphI", "Sse8387I", "SstI", "TaiI", "XceI", "Zsp2I"],
+)
+
+typedict["type92"] = (
+    ("Palindromic", "OneCut", "Ov3", "Defined", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["ChaI", "MspGI"],
+)
+
+typedict["type93"] = (
+    ("Palindromic", "OneCut", "Ov3", "Ambiguous", "Meth_Dep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AgsI", "AhdI", "BaeGI", "BanII", "BglI", "Bsc4I", "BslI", "Bsp1286I", "BstAPI", "BstXI", "DraIII", "EcoT38I", "Hpy99I", "Hpy188I", "MwoI", "SfiI", "TspRI", "XcmI"],
+)
+
+typedict["type94"] = (
+    ("Palindromic", "OneCut", "Ov3", "Ambiguous", "Meth_Dep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["BthCI", "HauII"],
+)
+
+typedict["type95"] = (
+    ("Palindromic", "OneCut", "Ov3", "Ambiguous", "Meth_Undep", "Commercially_available", "AbstractCut", "RestrictionType"),
+    ["AasI", "AccB7I", "AdeI", "AfiI", "Alw21I", "AlwNI", "Bbv12I", "BlsI", "BmeRI", "BseLI", "BseSI", "Bsh1285I", "BsiEI", "BsiHKAI", "Bst4CI", "BstMCI", "BstMWI", "BstSLI", "CaiI", "DrdI", "DriI", "DseDI", "Eam1105I", "Eco24I", "FriOI", "HpyCH4III", "HpyF10VI", "MhlI", "PcsI", "PflMI", "PkrI", "PstNI", "SduI", "SetI", "TaaI", "TauI", "TscAI", "Van91I"],
+)
+
+typedict["type96"] = (
+    ("Palindromic", "OneCut", "Ov3", "Ambiguous", "Meth_Undep", "Not_available", "AbstractCut", "RestrictionType"),
+    ["ApaBI", "BsiYI", "FmuI", "HgiAI", "HgiJII", "McrI", "Nli3877I", "Psp03I", "PssI", "Tsp4CI"],
+)
+
+# Turn black code style on
+# fmt: on
diff --git a/code/lib/Bio/Restriction/__init__.py b/code/lib/Bio/Restriction/__init__.py
new file mode 100644
index 0000000..127d047
--- /dev/null
+++ b/code/lib/Bio/Restriction/__init__.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python
+#
+#      Restriction Analysis Libraries.
+#      Copyright (C) 2004. Frederic Sohm.
+#
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+#
+"""Restriction Digest Enzymes.
+
+Examples
+--------
+    >>> from Bio.Seq import Seq
+    >>> from Bio.Restriction import *
+    >>> pBs_mcs = 'GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTCCTG'
+    >>> pBs_mcs += 'CAGCCCGGGGGATCCACTAGTTCTAGAGCGGCCGCCACCGCGGTGGAGCTC'
+    >>> seq = Seq(pBs_mcs)  # Multiple-cloning site of pBluescript SK(-)
+    >>> a = Analysis(AllEnzymes, seq)
+    >>> a.print_that()              # no argument -> print all the results
+    AbaSI      :  10, 12, 13, 16, 17, 18, 19, 20, 22, 23, 24, 25, 25, 26, 27...
+    BmeDI      :  2, 7, 8, 8, 9, 9, 13, 14, 15, 16, 17, 18, 19, 19, 21, 21...
+    YkrI       :  10, 12, 13, 16, 16, 17, 19, 20, 21, 22, 23, 24, 25, 25, 26...
+
+    BmeDI      :  1, 2, 7, 8, 8, 9, 9, 13, 14, 15, 16, 17, 18, 19...
+    AccII      :  98.
+    AciI       :  86, 90, 96, 98...
+    
+    Enzymes which do not cut the sequence.
+
+    AspLEI    BstHHI    CfoI      CviAII    FaeI      FaiI      FatI      GlaI      
+    HhaI      Hin1II    Hin6I     HinP1I    HpyCH4IV  HpySE526I Hsp92II   HspAI     
+    MaeII     MseI      NlaIII    SaqAI     TaiI      Tru1I     Tru9I...
+    <BLANKLINE>
+    >>> b = a.blunt()  # Analysis with blunt enzmyes
+    >>> a.print_that(b)  # Print results for blunt cutters
+    AccII      :  98.
+    AfaI       :  4.
+    AluBI      :  40, 106.
+    AluI       :  40, 106.
+    Bsh1236I   :  98.
+    BshFI      :  10, 89.
+    BsnI       :  10, 89.
+    BspANI     :  10, 89...
+
+    Enzymes which do not cut the sequence.
+
+    FaiI      GlaI      CdiI      MlyI      SchI      SspD5I    AanI...    
+    <BLANKLINE>
+
+"""  # noqa: W291, W293
+
+from Bio.Restriction.Restriction import *  # noqa (legacy module arrangement)
+
+
+#
+#   OK can't put the following code in Bio.Restriction.__init__ unless
+#   I put everything from Restriction in here.
+#   or at least the RestrictionBatch class.
+#
+#   The reason for that is if I do that, I break the __contains__ method of
+#   the RestrictionBatch in Restriction, which expect to find the name of
+#   the enzymes in the locals() dictionary when evaluating string to see if
+#   it is an enzyme.
+#
+#   This calls for some explanations I guess:
+#       When testing for the presence of a Restriction enzyme in a
+#       RestrictionBatch, the user can use:
+#
+#           1) a class of type 'RestrictionType'
+#           2) a string of the name of the enzyme (its repr)
+#               i.e:
+#                   >>> from Bio.Restriction import RestrictionBatch, EcoRI
+#                   >>> MyBatch = RestrictionBatch(EcoRI)
+#                   >>> EcoRI in MyBatch        # the class EcoRI.
+#                   True
+#                   >>> 'EcoRI' in MyBatch      # a string representation
+#                   True
+#
+#   OK, that's how it is supposed to work. And I find it quite useful.
+#
+#   Now if I leave the code here I got:
+#                   >>> from Bio.Restriction import RestrictionBatch, EcoRI
+#                   >>> MyBatch = RestrictionBatch(EcoRI)
+#                   >>> EcoRI in MyBatch # the class EcoRI.
+#                   True
+#                   >>> 'EcoRI' in MyBatch   # a string.
+#                   False
+
+#   There is 5 ways to change that:
+#       1) abandon the evaluation of string representation.
+#       2) leave the code like that and hack something in RestrictionBatch.
+#       3) Move back the code in Bio.Restriction.Restriction
+#       4) Move RestrictionBatch here.
+#       5) Remove Restriction.Restriction and move all the code in here
+#
+#   1) no fun in that.
+#   2) there is a simpler way to do it.
+#   3) I prefer to keep all the code together.
+#   4) and 5) both are OK. Only a matter of preference.
+#
+#   So the following code has been moved back to Bio.Restriction.Restriction
+#   For the user the results is transparent:
+#   from Bio.Restriction import * works as before.
+#
+
+# ##
+# ##   The restriction enzyme classes are created dynamically when the module
+# ##   is imported. Here is the magic which allow the creation of the
+# ##   restriction-enzyme classes.
+# ##
+# ##   The reason for the two dictionaries in Restriction_Dictionary
+# ##   one for the types (which will be called pseudo-type as they really
+# ##   correspond to the values that instances of RestrictionType can take)
+# ##   and one for the enzymes is efficiency as the bases are evaluated
+# ##   once per pseudo-type.
+# ##
+# ##   However Restriction is still a very inefficient module at import. But
+# ##   remember that around 660 classes (which is more or less the size of
+# ##   Rebase) have to be created dynamically. However, this processing take
+# ##   place only once.
+# ##   This inefficiency is however largely compensated by the use of metaclass
+# ##   which provide a very efficient layout for the class themselves mostly
+# ##   alleviating the need of if/else loops in the class methods.
+# ##
+# ##   It is essential to run Restriction with doc string optimisation (-OO
+# ##   switch) as the doc string of 660 classes take a lot of processing.
+# ##
+# # CommOnly    = RestrictionBatch()    # commercial enzymes
+# # NonComm     = RestrictionBatch()    # not available commercially
+# # for TYPE, (bases, enzymes) in typedict.items():
+# #    #
+# #    #   The keys are the pseudo-types TYPE (stored as type1, type2...)
+# #    #   The names are not important and are only present to differentiate
+# #    #   the keys in the dict. All the pseudo-types are in fact
+# #    #   RestrictionType. These names will not be used after and the pseudo-
+# #    #   types are not kept in the locals() dictionary. It is therefore
+# #    #   impossible to import them.
+# #    #   Now, if you have look at the dictionary, you will see that not all
+# #    #   thet ypes are present as those without corresponding enzymes have
+# #    #   been removed by Dictionary_Builder().
+# #    #
+# #    #   The values are tuples which contain
+# #    #   as first element a tuple of bases (as string) and
+# #    #   as second element the names of the enzymes.
+# #    #
+# #    #   First eval the bases.
+# #    #
+# #    bases = tuple(eval(x) for x in bases)
+# #    #
+# #    #   now create the particular value of RestrictionType for the classes
+# #    #   in enzymes.
+# #    #
+# #    T = type.__new__(RestrictionType, 'RestrictionType', bases, {})
+# #    for k in enzymes:
+# #        #
+# #        #   Now, we go through all the enzymes and assign them their type.
+# #        #   enzymedict[k] contains the values of the attributes for this
+# #        #   particular class (self.site, self.ovhg,....).
+# #        #
+# #        newenz = T(k, bases, enzymedict[k])
+# #        #
+# #        #   we add the enzymes to the corresponding batch.
+# #        #
+# #        #   No need to verify the enzyme is a RestrictionType -> add_nocheck
+# #        #
+# #        if newenz.is_comm() : CommOnly.add_nocheck(newenz)
+# #        else : NonComm.add_nocheck(newenz)
+# ##
+# ##   AllEnzymes is a RestrictionBatch with all the enzymes from Rebase.
+# ##
+# # AllEnzymes = CommOnly | NonComm
+# ##
+# ##   Now, place the enzymes in locals so they can be imported.
+# ##
+# # names = [str(x) for x in AllEnzymes]
+# # locals().update(dict(map(None, names, AllEnzymes)))
+# ##
+# ##   Limit what can be imported by from Restriction import *
+# ##   Most of the classes here will never be used outside this module
+# ##   (Defined,Palindromic...). It is still possible to request them
+# ##   specifically
+# ##
+# ##   also delete the variable that are no longer needed.
+# ##
+# ##
+# # __all__= ['Analysis', 'RestrictionBatch','AllEnzymes','CommOnly',
+# #           'NonComm']+names
+# # del k, x, enzymes, TYPE, bases, names
diff --git a/code/lib/Bio/Restriction/__pycache__/PrintFormat.cpython-37.pyc b/code/lib/Bio/Restriction/__pycache__/PrintFormat.cpython-37.pyc
new file mode 100644
index 0000000..95dce5d
Binary files /dev/null and b/code/lib/Bio/Restriction/__pycache__/PrintFormat.cpython-37.pyc differ
diff --git a/code/lib/Bio/Restriction/__pycache__/Restriction.cpython-37.pyc b/code/lib/Bio/Restriction/__pycache__/Restriction.cpython-37.pyc
new file mode 100644
index 0000000..1d114fd
Binary files /dev/null and b/code/lib/Bio/Restriction/__pycache__/Restriction.cpython-37.pyc differ
diff --git a/code/lib/Bio/Restriction/__pycache__/Restriction_Dictionary.cpython-37.pyc b/code/lib/Bio/Restriction/__pycache__/Restriction_Dictionary.cpython-37.pyc
new file mode 100644
index 0000000..2e9a439
Binary files /dev/null and b/code/lib/Bio/Restriction/__pycache__/Restriction_Dictionary.cpython-37.pyc differ
diff --git a/code/lib/Bio/Restriction/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Restriction/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..be58d8d
Binary files /dev/null and b/code/lib/Bio/Restriction/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SCOP/Cla.py b/code/lib/Bio/SCOP/Cla.py
new file mode 100644
index 0000000..3546eaa
--- /dev/null
+++ b/code/lib/Bio/SCOP/Cla.py
@@ -0,0 +1,128 @@
+# Copyright 2001 by Gavin E. Crooks.  All rights reserved.
+# Revisions copyright 2010 Jeffrey Finkelstein. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Handle the SCOP CLAssification file, which describes SCOP domains.
+
+The file format is described in the scop
+"release notes.":http://scop.mrc-lmb.cam.ac.uk/scop/release-notes.html
+The latest CLA file can be found
+"elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
+
+"Release 1.73": http://scop.mrc-lmb.cam.ac.uk/scop/parse/dir.cla.scop.txt_1.73
+(July 2008)
+
+"""
+
+from . import Residues
+
+
+class Record:
+    """Holds information for one SCOP domain.
+
+    Attributes:
+     - sid - SCOP identifier. e.g. d1danl2
+     - residues - The domain definition as a Residues object
+     - sccs - SCOP concise classification strings.  e.g. b.1.2.1
+     - sunid - SCOP unique identifier for this domain
+     - hierarchy - A dictionary, keys are nodetype, values are sunid,
+       describing the location of this domain in the SCOP hierarchy. See
+       the Scop module for a description of nodetypes. This used to be a
+       list of (key,value) tuples in older versions of Biopython (see
+       Bug 3109).
+
+    """
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.sid = ""
+        self.residues = None
+        self.sccs = ""
+        self.sunid = ""
+        self.hierarchy = {}
+        if line:
+            self._process(line)
+
+    def _process(self, line):
+        line = line.rstrip()  # no trailing whitespace
+        columns = line.split("\t")  # separate the tab-delineated cols
+        if len(columns) != 6:
+            raise ValueError("I don't understand the format of %s" % line)
+
+        self.sid, pdbid, residues, self.sccs, self.sunid, hierarchy = columns
+        self.residues = Residues.Residues(residues)
+        self.residues.pdbid = pdbid
+        self.sunid = int(self.sunid)
+
+        for ht in hierarchy.split(","):
+            key, value = ht.split("=")
+            self.hierarchy[key] = int(value)
+
+    def __str__(self):
+        """Represent the SCOP classification record as a tab-separated string."""
+        s = []
+        s.append(self.sid)
+        s += str(self.residues).split(" ")
+        s.append(self.sccs)
+        s.append(self.sunid)
+
+        s.append(
+            ",".join(
+                "=".join((key, str(value))) for key, value in self.hierarchy.items()
+            )
+        )
+
+        return "\t".join(map(str, s)) + "\n"
+
+
+def parse(handle):
+    """Iterate over a CLA file as Cla records for each line.
+
+    Arguments:
+     - handle - file-like object.
+
+    """
+    for line in handle:
+        if line.startswith("#"):
+            continue
+        yield Record(line)
+
+
+class Index(dict):
+    """A CLA file indexed by SCOP identifiers for rapid random access."""
+
+    def __init__(self, filename):
+        """Create CLA index.
+
+        Arguments:
+         - filename - The file to index
+
+        """
+        dict.__init__(self)
+        self.filename = filename
+        with open(self.filename) as f:
+            position = 0
+            while True:
+                line = f.readline()
+                if not line:
+                    break
+                if line.startswith("#"):
+                    continue
+                record = Record(line)
+                key = record.sid
+                if key is not None:
+                    self[key] = position
+                position = f.tell()
+
+    def __getitem__(self, key):
+        """Return an item from the indexed file."""
+        position = dict.__getitem__(self, key)
+
+        with open(self.filename) as f:
+            f.seek(position)
+            line = f.readline()
+            record = Record(line)
+        return record
diff --git a/code/lib/Bio/SCOP/Des.py b/code/lib/Bio/SCOP/Des.py
new file mode 100644
index 0000000..d279074
--- /dev/null
+++ b/code/lib/Bio/SCOP/Des.py
@@ -0,0 +1,89 @@
+# Copyright 2001 by Gavin E. Crooks.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Handle the SCOP DEScription file.
+
+The file format is described in the scop
+"release notes.":http://scop.berkeley.edu/release-notes-1.55.html
+The latest DES file can be found
+"elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
+
+"Release 1.55":http://scop.berkeley.edu/parse/des.cla.scop.txt_1.55 (July 2001)
+"""
+
+
+class Record:
+    """Holds information for one node in the SCOP hierarchy.
+
+    Attributes:
+     - sunid - SCOP unique identifiers
+     - nodetype - One of 'cl' (class), 'cf' (fold), 'sf' (superfamily),
+       'fa' (family), 'dm' (protein), 'sp' (species), 'px' (domain).
+       Additional node types may be added.
+     - sccs - SCOP concise classification strings. e.g. b.1.2.1
+     - name - The SCOP ID (sid) for domains (e.g. d1anu1), currently empty for other node types
+     - description - e.g. "All beta proteins","Fibronectin type III",
+
+    """
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.sunid = ""
+        self.nodetype = ""
+        self.sccs = ""
+        self.name = ""
+        self.description = ""
+        if line:
+            self._process(line)
+
+    def _process(self, line):
+        """Parse DES records (PRIVATE).
+
+        Records consist of 5 tab deliminated fields,
+        sunid, node type, sccs, node name, node description.
+        """
+        # For example ::
+        #
+        # 21953   px      b.1.2.1 d1dan.1 1dan T:,U:91-106
+        # 48724   cl      b       -       All beta proteins
+        # 48725   cf      b.1     -       Immunoglobulin-like beta-sandwich
+        # 49265   sf      b.1.2   -       Fibronectin type III
+        # 49266   fa      b.1.2.1 -       Fibronectin type III
+
+        line = line.rstrip()  # no trailing whitespace
+        columns = line.split("\t")  # separate the tab-delineated cols
+        if len(columns) != 5:
+            raise ValueError("I don't understand the format of %s" % line)
+
+        sunid, self.nodetype, self.sccs, self.name, self.description = columns
+        if self.name == "-":
+            self.name = ""
+        self.sunid = int(sunid)
+
+    def __str__(self):
+        """Represent the SCOP description record as a tab-separated string."""
+        s = []
+        s.append(self.sunid)
+        s.append(self.nodetype)
+        s.append(self.sccs)
+        if self.name:
+            s.append(self.name)
+        else:
+            s.append("-")
+        s.append(self.description)
+        return "\t".join(map(str, s)) + "\n"
+
+
+def parse(handle):
+    """Iterate over a DES file as a Des record for each line.
+
+    Arguments:
+     - handle - file-like object
+
+    """
+    for line in handle:
+        if line.startswith("#"):
+            continue
+        yield Record(line)
diff --git a/code/lib/Bio/SCOP/Dom.py b/code/lib/Bio/SCOP/Dom.py
new file mode 100644
index 0000000..af3a90b
--- /dev/null
+++ b/code/lib/Bio/SCOP/Dom.py
@@ -0,0 +1,76 @@
+# Copyright 2000 by Jeffrey Chang.  All rights reserved.
+# Revisions copyright 2001 by Gavin E. Crooks. All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Handle the SCOP DOMain file.
+
+The DOM file has been officially deprecated. For more information see
+the SCOP"release notes.":http://scop.berkeley.edu/release-notes-1.55.html
+The DOM files for older releases can be found
+"elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
+"""
+
+from .Residues import Residues
+
+
+class Record:
+    """Holds information for one SCOP domain.
+
+    Attribues:
+     - sid - The SCOP ID of the entry, e.g. d1anu1
+     - residues - The domain definition as a Residues object
+     - hierarchy - A string specifying where this domain is in the hierarchy.
+
+    """
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.sid = ""
+        self.residues = []
+        self.hierarchy = ""
+        if line:
+            self._process(line)
+
+    def _process(self, line):
+        """Parse DOM records (PRIVATE).
+
+        Records consist of 4 tab deliminated fields;
+        sid, pdbid, residues, hierarchy
+        """
+        # For example ::
+        #
+        # d1sctg_ 1sct    g:      1.001.001.001.001.001
+        # d1scth_ 1sct    h:      1.001.001.001.001.001
+        # d1flp__ 1flp    -       1.001.001.001.001.002
+        # d1moh__ 1moh    -       1.001.001.001.001.002
+
+        line = line.rstrip()  # no trailing whitespace
+        columns = line.split("\t")  # separate the tab-delineated cols
+        if len(columns) != 4:
+            raise ValueError("I don't understand the format of %s" % line)
+        self.sid, pdbid, res, self.hierarchy = columns
+        self.residues = Residues(res)
+        self.residues.pdbid = pdbid
+
+    def __str__(self):
+        """Represent the SCOP domain record as a tab-separated string."""
+        s = []
+        s.append(self.sid)
+        s.append(str(self.residues).replace(" ", "\t"))
+        s.append(self.hierarchy)
+        return "\t".join(s) + "\n"
+
+
+def parse(handle):
+    """Iterate over a DOM file as a Dom record for each line.
+
+    Arguments:
+     - handle -- file-like object.
+
+    """
+    for line in handle:
+        if line.startswith("#"):
+            continue
+        yield Record(line)
diff --git a/code/lib/Bio/SCOP/Hie.py b/code/lib/Bio/SCOP/Hie.py
new file mode 100644
index 0000000..7bac517
--- /dev/null
+++ b/code/lib/Bio/SCOP/Hie.py
@@ -0,0 +1,106 @@
+# Copyright 2001 by Gavin E. Crooks.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Handle the SCOP HIErarchy files.
+
+The SCOP Hierarchy files describe the SCOP hierarchy in terms of SCOP
+unique identifiers (sunid).
+
+The file format is described in the SCOP `release notes
+<http://scop.berkeley.edu/release-notes-1.55.html>`_.
+
+The latest HIE file can be found `elsewhere at SCOP
+<http://scop.mrc-lmb.cam.ac.uk/scop/parse/>`_.
+
+`Release 1.55 <http://scop.berkeley.edu/parse/dir.hie.scop.txt_1.55>`_
+(July 2001).
+"""
+# TODO - Update the above URLs
+
+
+class Record:
+    """Holds information for one node in the SCOP hierarchy.
+
+    Attributes:
+     - sunid - SCOP unique identifiers of this node
+     - parent - Parents sunid
+     - children - Sequence of childrens sunids
+
+    """
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.sunid = ""
+        self.parent = ""
+        self.children = []
+        if line:
+            self._process(line)
+
+    def _process(self, line):
+        """Parse HIE records (PRIVATE).
+
+        Records consist of 3 tab deliminated fields; node's sunid,
+        parent's sunid, and a list of children's sunids.
+        """
+        # For example ::
+        #
+        # 0       -       46456,48724,51349,53931,56572,56835,56992,57942
+        # 21953   49268   -
+        # 49267   49266   49268,49269
+        line = line.rstrip()  # no trailing whitespace
+        columns = line.split("\t")  # separate the tab-delineated cols
+        if len(columns) != 3:
+            raise ValueError("I don't understand the format of %s" % line)
+
+        sunid, parent, children = columns
+
+        if sunid == "-":
+            self.sunid = ""
+        else:
+            self.sunid = int(sunid)
+
+        if parent == "-":
+            self.parent = ""
+        else:
+            self.parent = int(parent)
+
+        if children == "-":
+            self.children = ()
+        else:
+            children = children.split(",")
+            self.children = [int(x) for x in children]
+
+    def __str__(self):
+        """Represent the SCOP hierarchy record as a string."""
+        s = []
+        s.append(str(self.sunid))
+
+        if self.parent:
+            s.append(str(self.parent))
+        else:
+            if self.sunid != 0:
+                s.append("0")
+            else:
+                s.append("-")
+
+        if self.children:
+            s.append(",".join(str(x) for x in self.children))
+        else:
+            s.append("-")
+
+        return "\t".join(s) + "\n"
+
+
+def parse(handle):
+    """Iterate over a HIE file as Hie records for each line.
+
+    Arguments:
+     - handle - file-like object.
+
+    """
+    for line in handle:
+        if line.startswith("#"):
+            continue
+        yield Record(line)
diff --git a/code/lib/Bio/SCOP/Raf.py b/code/lib/Bio/SCOP/Raf.py
new file mode 100644
index 0000000..cd93f03
--- /dev/null
+++ b/code/lib/Bio/SCOP/Raf.py
@@ -0,0 +1,320 @@
+# Copyright 2001 by Gavin E. Crooks.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""ASTRAL RAF (Rapid Access Format) Sequence Maps.
+
+The ASTRAL RAF Sequence Maps record the relationship between the PDB SEQRES
+records (representing the sequence of the molecule used in an experiment) to
+the ATOM records (representing the atoms experimentally observed).
+
+This data is derived from the Protein Data Bank CIF files. Known errors in the
+CIF files are corrected manually, with the original PDB file serving as the
+final arbiter in case of discrepancies.
+
+Residues are referenced by residue ID. This consists of a the PDB residue
+sequence number (up to 4 digits) and an optional PDB insertion code (an
+ascii alphabetic character, a-z, A-Z). e.g. "1", "10A", "1010b", "-1"
+
+See "ASTRAL RAF Sequence Maps":http://astral.stanford.edu/raf.html
+
+Dictionary `protein_letters_3to1` provides a mapping from the 3-letter amino
+acid codes found in PDB files to 1-letter codes.  The 3-letter codes include
+chemically modified residues.
+"""
+
+from copy import copy
+
+from Bio.Data.SCOPData import protein_letters_3to1
+
+from Bio.SCOP.Residues import Residues
+
+
+def normalize_letters(one_letter_code):
+    """Convert RAF one-letter amino acid codes into IUPAC standard codes.
+
+    Letters are uppercased, and "." ("Unknown") is converted to "X".
+    """
+    if one_letter_code == ".":
+        return "X"
+    else:
+        return one_letter_code.upper()
+
+
+class SeqMapIndex(dict):
+    """An RAF file index.
+
+    The RAF file itself is about 50 MB. This index provides rapid, random
+    access of RAF records without having to load the entire file into memory.
+
+    The index key is a concatenation of the  PDB ID and chain ID. e.g
+    "2drcA", ``"155c_"``. RAF uses an underscore to indicate blank
+    chain IDs.
+    """
+
+    def __init__(self, filename):
+        """Initialize the RAF file index.
+
+        Arguments:
+         - filename  -- The file to index
+
+        """
+        dict.__init__(self)
+        self.filename = filename
+
+        with open(self.filename) as f:
+            position = 0
+            while True:
+                line = f.readline()
+                if not line:
+                    break
+                key = line[0:5]
+                if key is not None:
+                    self[key] = position
+                position = f.tell()
+
+    def __getitem__(self, key):
+        """Return an item from the indexed file."""
+        position = dict.__getitem__(self, key)
+
+        with open(self.filename) as f:
+            f.seek(position)
+            line = f.readline()
+            record = SeqMap(line)
+        return record
+
+    def getSeqMap(self, residues):
+        """Get the sequence map for a collection of residues.
+
+        Arguments:
+         - residues -- A Residues instance, or a string that can be
+           converted into a Residues instance.
+
+        """
+        if isinstance(residues, str):
+            residues = Residues(residues)
+
+        pdbid = residues.pdbid
+        frags = residues.fragments
+        if not frags:
+            frags = (("_", "", ""),)  # All residues of unnamed chain
+
+        seqMap = None
+        for frag in frags:
+            chainid = frag[0]
+            if chainid in ["", "-", " ", "_"]:
+                chainid = "_"
+            id = pdbid + chainid
+
+            sm = self[id]
+
+            # Cut out fragment of interest
+            start = 0
+            end = len(sm.res)
+            if frag[1]:
+                start = int(sm.index(frag[1], chainid))
+            if frag[2]:
+                end = int(sm.index(frag[2], chainid)) + 1
+
+            sm = sm[start:end]
+
+            if seqMap is None:
+                seqMap = sm
+            else:
+                seqMap += sm
+
+        return seqMap
+
+
+class SeqMap:
+    """An ASTRAL RAF (Rapid Access Format) Sequence Map.
+
+    This is a list like object; You can find the location of particular residues
+    with index(), slice this SeqMap into fragments, and glue fragments back
+    together with extend().
+
+    Attributes:
+     - pdbid -- The PDB 4 character ID
+     - pdb_datestamp -- From the PDB file
+     - version -- The RAF format version. e.g. 0.01
+     - flags -- RAF flags. (See release notes for more information.)
+     - res -- A list of Res objects, one for each residue in this sequence map
+
+    """
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.pdbid = ""
+        self.pdb_datestamp = ""
+        self.version = ""
+        self.flags = ""
+        self.res = []
+        if line:
+            self._process(line)
+
+    def _process(self, line):
+        """Parse a RAF record into a SeqMap object (PRIVATE)."""
+        header_len = 38
+
+        line = line.rstrip()  # no trailing whitespace
+
+        if len(line) < header_len:
+            raise ValueError("Incomplete header: " + line)
+
+        self.pdbid = line[0:4]
+        chainid = line[4:5]
+
+        self.version = line[6:10]
+
+        # Raf format versions 0.01 and 0.02 are identical for practical purposes
+        if self.version != "0.01" and self.version != "0.02":
+            raise ValueError("Incompatible RAF version: " + self.version)
+
+        self.pdb_datestamp = line[14:20]
+        self.flags = line[21:27]
+
+        for i in range(header_len, len(line), 7):
+            f = line[i : i + 7]
+            if len(f) != 7:
+                raise ValueError("Corrupt Field: (" + f + ")")
+            r = Res()
+            r.chainid = chainid
+            r.resid = f[0:5].strip()
+            r.atom = normalize_letters(f[5:6])
+            r.seqres = normalize_letters(f[6:7])
+
+            self.res.append(r)
+
+    def index(self, resid, chainid="_"):
+        """Return the index of the SeqMap for the given resid and chainid."""
+        for i in range(0, len(self.res)):
+            if self.res[i].resid == resid and self.res[i].chainid == chainid:
+                return i
+        raise KeyError("No such residue " + chainid + resid)
+
+    def __getitem__(self, index):
+        """Extract a single Res object from the SeqMap."""
+        if not isinstance(index, slice):
+            raise NotImplementedError
+        s = copy(self)
+        s.res = s.res[index]
+        return s
+
+    def append(self, res):
+        """Append another Res object onto the list of residue mappings."""
+        self.res.append(res)
+
+    def extend(self, other):
+        """Append another SeqMap onto the end of self.
+
+        Both SeqMaps must have the same PDB ID, PDB datestamp and
+        RAF version.  The RAF flags are erased if they are inconsistent. This
+        may happen when fragments are taken from different chains.
+        """
+        if not isinstance(other, SeqMap):
+            raise TypeError("Can only extend a SeqMap with a SeqMap.")
+        if self.pdbid != other.pdbid:
+            raise TypeError("Cannot add fragments from different proteins")
+        if self.version != other.version:
+            raise TypeError("Incompatible rafs")
+        if self.pdb_datestamp != other.pdb_datestamp:
+            raise TypeError("Different pdb dates!")
+        if self.flags != other.flags:
+            self.flags = ""
+        self.res += other.res
+
+    def __iadd__(self, other):
+        """In place addition of SeqMap objects."""
+        self.extend(other)
+        return self
+
+    def __add__(self, other):
+        """Addition of SeqMap objects."""
+        s = copy(self)
+        s.extend(other)
+        return s
+
+    def getAtoms(self, pdb_handle, out_handle):
+        """Extract all relevant ATOM and HETATOM records from a PDB file.
+
+        The PDB file is scanned for ATOM and HETATOM records. If the
+        chain ID, residue ID (seqNum and iCode), and residue type match
+        a residue in this sequence map, then the record is echoed to the
+        output handle.
+
+        This is typically used to find the coordinates of a domain, or other
+        residue subset.
+
+        Arguments:
+         - pdb_handle -- A handle to the relevant PDB file.
+         - out_handle -- All output is written to this file like object.
+
+        """
+        # This code should be refactored when (if?) biopython gets a PDB parser
+
+        # The set of residues that I have to find records for.
+        resSet = {}
+        for r in self.res:
+            if r.atom == "X":  # Unknown residue type
+                continue
+            chainid = r.chainid
+            if chainid == "_":
+                chainid = " "
+            resid = r.resid
+            resSet[(chainid, resid)] = r
+
+        resFound = {}
+        for line in pdb_handle:
+            if line.startswith("ATOM  ") or line.startswith("HETATM"):
+                chainid = line[21:22]
+                resid = line[22:27].strip()
+                key = (chainid, resid)
+                if key in resSet:
+                    res = resSet[key]
+                    atom_aa = res.atom
+                    resName = line[17:20]
+                    if resName in protein_letters_3to1:
+                        if protein_letters_3to1[resName] == atom_aa:
+                            out_handle.write(line)
+                            resFound[key] = res
+
+        if len(resSet) != len(resFound):
+            # for k in resFound:
+            #    del resSet[k]
+            # print(resSet)
+            raise RuntimeError(
+                "Could not find at least one ATOM or HETATM"
+                " record for each and every residue in this"
+                " sequence map."
+            )
+
+
+class Res:
+    """A single residue mapping from a RAF record.
+
+    Attributes:
+     - chainid -- A single character chain ID.
+     - resid   -- The residue ID.
+     - atom    -- amino acid one-letter code from ATOM records.
+     - seqres  -- amino acid one-letter code from SEQRES records.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.chainid = ""
+        self.resid = ""
+        self.atom = ""
+        self.seqres = ""
+
+
+def parse(handle):
+    """Iterate over RAF file, giving a SeqMap object for each line.
+
+    Arguments:
+     - handle -- file-like object.
+
+    """
+    for line in handle:
+        yield SeqMap(line)
diff --git a/code/lib/Bio/SCOP/Residues.py b/code/lib/Bio/SCOP/Residues.py
new file mode 100644
index 0000000..8e374cf
--- /dev/null
+++ b/code/lib/Bio/SCOP/Residues.py
@@ -0,0 +1,91 @@
+# Copyright 2000 by Jeffrey Chang.  All rights reserved.
+# Revisions copyright 2001 by Gavin E. Crooks. All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""A collection of residues from a PDB structure."""
+
+import re
+
+
+_pdbid_re = re.compile(r"^(\w\w\w\w)(?:$|\s+|_)(.*)")
+_fragment_re = re.compile(r"\(?(\w:)?(-?\w*)-?(-?\w*)\)?(.*)")
+
+
+class Residues:
+    """A collection of residues from a PDB structure.
+
+    This class provides code to work with SCOP domain definitions. These
+    are concisely expressed as one or more chain fragments. For example,
+    "(1bba A:10-20,B:)" indicates residue 10 through 20 (inclusive) of
+    chain A, and every residue of chain B in the pdb structure 1bba. The pdb
+    id and brackets are optional. In addition "-" indicates every residue of
+    a pbd structure with one unnamed chain.
+
+    Start and end residue ids consist of the residue sequence number and an
+    optional single letter insertion code. e.g. "12", "-1", "1a", "1000"
+
+
+    pdbid -- An optional PDB id, e.g. "1bba"
+
+    fragments -- A sequence of tuples (chainID, startResID, endResID)
+
+    """
+
+    def __init__(self, str=None):
+        """Initialize the class."""
+        self.pdbid = ""
+        self.fragments = ()
+        if str is not None:
+            self._parse(str)
+
+    def _parse(self, str):
+        str = str.strip()
+
+        # Is there a pdbid at the front? e.g. 1bba A:1-100
+        m = _pdbid_re.match(str)
+        if m is not None:
+            self.pdbid = m.group(1)
+            str = m.group(2)  # Everything else
+
+        if str == "" or str == "-" or str == "(-)":  # no fragments, whole sequence
+            return
+
+        fragments = []
+        for l in str.split(","):
+            m = _fragment_re.match(l)
+            if m is None:
+                raise ValueError("I don't understand the format of %s" % l)
+            chain, start, end, postfix = m.groups()
+
+            if postfix != "":
+                raise ValueError("I don't understand the format of %s" % l)
+
+            if chain:
+                if chain[-1] != ":":
+                    raise ValueError("I don't understand the chain in %s" % l)
+                chain = chain[:-1]  # chop off the ':'
+            else:
+                chain = ""
+
+            fragments.append((chain, start, end))
+        self.fragments = tuple(fragments)
+
+    def __str__(self):
+        """Represent the SCOP residues record as a string."""
+        prefix = ""
+        if self.pdbid:
+            prefix = self.pdbid + " "
+
+        if not self.fragments:
+            return prefix + "-"
+        strs = []
+        for chain, start, end in self.fragments:
+            s = []
+            if chain:
+                s.append("%s:" % chain)
+            if start:
+                s.append("%s-%s" % (start, end))
+            strs.append("".join(s))
+        return prefix + ",".join(strs)
diff --git a/code/lib/Bio/SCOP/__init__.py b/code/lib/Bio/SCOP/__init__.py
new file mode 100644
index 0000000..0c73e85
--- /dev/null
+++ b/code/lib/Bio/SCOP/__init__.py
@@ -0,0 +1,967 @@
+# Copyright 2001 by Gavin E. Crooks.  All rights reserved.
+# Modifications Copyright 2004/2005 James Casbon. All rights Reserved.
+# Modifications Copyright 2010 Jeffrey Finkelstein. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Changes made by James Casbon:
+# - New Astral class
+# - SQL functionality for both Scop and Astral classes
+# - All sunids are int not strings
+#
+# Code written by Jeffrey Chang to access SCOP over the internet, which
+# was previously in Bio.WWW.SCOP, has now been merged into this module.
+
+
+"""SCOP: Structural Classification of Proteins.
+
+The SCOP database aims to provide a manually constructed classification of
+all know protein structures into a hierarchy, the main levels of which
+are family, superfamily and fold.
+
+* "SCOP":http://scop.mrc-lmb.cam.ac.uk/legacy/
+* "Introduction":http://scop.mrc-lmb.cam.ac.uk/legacy/intro.html
+* "SCOP parsable files":http://scop.mrc-lmb.cam.ac.uk/legacy/parse/
+
+The Scop object in this module represents the entire SCOP classification. It
+can be built from the three SCOP parsable files, modified is so desired, and
+converted back to the same file formats. A single SCOP domain (represented
+by the Domain class) can be obtained from Scop using the domain's SCOP
+identifier (sid).
+
+- nodeCodeDict -- A mapping between known 2 letter node codes and a longer
+                  description. The known node types are 'cl' (class), 'cf'
+                  (fold), 'sf' (superfamily), 'fa' (family), 'dm' (domain),
+                  'sp' (species), 'px' (domain). Additional node types may
+                  be added in the future.
+
+This module also provides code to access SCOP over the WWW.
+
+Functions:
+ - search        -- Access the main CGI script.
+ - _open         -- Internally used function.
+
+"""
+
+
+import os
+import re
+
+from urllib.parse import urlencode
+from urllib.request import urlopen
+
+from . import Des
+from . import Cla
+from . import Hie
+from . import Residues
+from Bio import SeqIO
+from Bio.Seq import Seq
+
+# Turn black code style off
+# fmt: off
+nodeCodeDict = {"cl": "class", "cf": "fold", "sf": "superfamily",
+                "fa": "family", "dm": "protein", "sp": "species", "px": "domain"}
+
+
+_nodetype_to_code = {"class": "cl", "fold": "cf", "superfamily": "sf",
+                     "family": "fa", "protein": "dm", "species": "sp", "domain": "px"}
+
+nodeCodeOrder = ["ro", "cl", "cf", "sf", "fa", "dm", "sp", "px"]
+
+astralBibIds = [10, 20, 25, 30, 35, 40, 50, 70, 90, 95, 100]
+
+astralEvs = [10, 5, 1, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001, 1e-4, 1e-5, 1e-10, 1e-15,
+             1e-20, 1e-25, 1e-50]
+
+astralEv_to_file = {10: "e+1", 5: "e+0,7", 1: "e+0", 0.5: "e-0,3", 0.1: "e-1",
+                    0.05: "e-1,3", 0.01: "e-2", 0.005: "e-2,3", 0.001: "e-3",
+                    1e-4: "e-4", 1e-5: "e-5", 1e-10: "e-10", 1e-15: "e-15",
+                    1e-20: "e-20", 1e-25: "e-25", 1e-50: "e-50"}
+
+astralEv_to_sql = {10: "e1", 5: "e0_7", 1: "e0", 0.5: "e_0_3", 0.1: "e_1",
+                   0.05: "e_1_3", 0.01: "e_2", 0.005: "e_2_3", 0.001: "e_3",
+                   1e-4: "e_4", 1e-5: "e_5", 1e-10: "e_10", 1e-15: "e_15",
+                   1e-20: "e_20", 1e-25: "e_25", 1e-50: "e_50"}
+# Turn black code style on
+# fmt: on
+
+
+def cmp_sccs(sccs1, sccs2):
+    """Order SCOP concise classification strings (sccs).
+
+    a.4.5.1 < a.4.5.11 < b.1.1.1
+
+    A sccs (e.g. a.4.5.11) compactly represents a domain's classification.
+    The letter represents the class, and the numbers are the fold,
+    superfamily, and family, respectively.
+    """
+    s1 = sccs1.split(".")
+    s2 = sccs2.split(".")
+
+    c1, c2 = s1[0], s2[0]
+    if c1 < c2:
+        return -1
+    if c1 > c2:
+        return +1
+
+    for c1, c2 in zip(s1[1:], s2[1:]):
+        i1 = int(c1)
+        i2 = int(c2)
+        if i1 < i2:
+            return -1
+        if i1 > i2:
+            return +1
+
+    n1 = len(s1)
+    n2 = len(s2)
+    if n1 < n2:
+        return -1
+    if n1 > n2:
+        return +1
+
+    return 0
+
+
+_domain_re = re.compile(r">?([\w_\.]*)\s+([\w\.]*)\s+\(([^)]*)\) (.*)")
+
+
+def parse_domain(term):
+    """Convert an ASTRAL header string into a Scop domain.
+
+    An ASTRAL (http://astral.stanford.edu/) header contains a concise
+    description of a SCOP domain. A very similar format is used when a
+    Domain object is converted into a string.  The Domain returned by this
+    method contains most of the SCOP information, but it will not be located
+    within the SCOP hierarchy (i.e. The parent node will be None). The
+    description is composed of the SCOP protein and species descriptions.
+
+    A typical ASTRAL header looks like --
+    >d1tpt_1 a.46.2.1 (1-70) Thymidine phosphorylase {Escherichia coli}
+    """
+    m = _domain_re.match(term)
+    if not m:
+        raise ValueError("Domain: " + term)
+
+    dom = Domain()
+    dom.sid = m.group(1)
+    dom.sccs = m.group(2)
+    dom.residues = Residues.Residues(m.group(3))
+    if not dom.residues.pdbid:
+        dom.residues.pdbid = dom.sid[1:5]
+    dom.description = m.group(4).strip()
+
+    return dom
+
+
+def _open_scop_file(scop_dir_path, version, filetype):
+    filename = "dir.%s.scop.txt_%s" % (filetype, version)
+    handle = open(os.path.join(scop_dir_path, filename))
+    return handle
+
+
+class Scop:
+    """The entire SCOP hierarchy.
+
+    root -- The root node of the hierarchy
+    """
+
+    def __init__(
+        self,
+        cla_handle=None,
+        des_handle=None,
+        hie_handle=None,
+        dir_path=None,
+        db_handle=None,
+        version=None,
+    ):
+        """Build the SCOP hierarchy from the SCOP parsable files, or a sql backend.
+
+        If no file handles are given, then a Scop object with a single
+        empty root node is returned.
+
+        If a directory and version are given (with dir_path=.., version=...) or
+        file handles for each file, the whole scop tree will be built in memory.
+
+        If a MySQLdb database handle is given, the tree will be built as needed,
+        minimising construction times.  To build the SQL database to the methods
+        write_xxx_sql to create the tables.
+
+        """
+        self._sidDict = {}
+        self._sunidDict = {}
+
+        if all(
+            h is None for h in [cla_handle, des_handle, hie_handle, dir_path, db_handle]
+        ):
+            return
+
+        if dir_path is None and db_handle is None:
+            if cla_handle is None or des_handle is None or hie_handle is None:
+                raise RuntimeError("Need CLA, DES and HIE files to build SCOP")
+
+        sunidDict = {}
+
+        self.db_handle = db_handle
+        try:
+            if db_handle:
+                # do nothing if we have a db handle, we'll do it all on the fly
+                pass
+            else:
+                # open SCOP parseable files
+                if dir_path:
+                    if not version:
+                        raise RuntimeError(
+                            "Need SCOP version to find parsable files in directory"
+                        )
+                    if cla_handle or des_handle or hie_handle:
+                        raise RuntimeError(
+                            "Cannot specify SCOP directory and specific files"
+                        )
+
+                    cla_handle = _open_scop_file(dir_path, version, "cla")
+                    des_handle = _open_scop_file(dir_path, version, "des")
+                    hie_handle = _open_scop_file(dir_path, version, "hie")
+
+                root = Node()
+                domains = []
+                root.sunid = 0
+                root.type = "ro"
+                sunidDict[root.sunid] = root
+                self.root = root
+                root.description = "SCOP Root"
+
+                # Build the rest of the nodes using the DES file
+                records = Des.parse(des_handle)
+                for record in records:
+                    if record.nodetype == "px":
+                        n = Domain()
+                        n.sid = record.name
+                        domains.append(n)
+                    else:
+                        n = Node()
+                    n.sunid = record.sunid
+                    n.type = record.nodetype
+                    n.sccs = record.sccs
+                    n.description = record.description
+
+                    sunidDict[n.sunid] = n
+
+                # Glue all of the Nodes together using the HIE file
+                records = Hie.parse(hie_handle)
+                for record in records:
+                    if record.sunid not in sunidDict:
+                        print(record.sunid)
+
+                    n = sunidDict[record.sunid]
+
+                    if record.parent != "":  # Not root node
+
+                        if record.parent not in sunidDict:
+                            raise ValueError("Incomplete data?")
+
+                        n.parent = sunidDict[record.parent]
+
+                    for c in record.children:
+                        if c not in sunidDict:
+                            raise ValueError("Incomplete data?")
+                        n.children.append(sunidDict[c])
+
+                # Fill in the gaps with information from the CLA file
+                sidDict = {}
+                records = Cla.parse(cla_handle)
+                for record in records:
+                    n = sunidDict[record.sunid]
+                    assert n.sccs == record.sccs
+                    assert n.sid == record.sid
+                    n.residues = record.residues
+                    sidDict[n.sid] = n
+
+                # Clean up
+                self._sunidDict = sunidDict
+                self._sidDict = sidDict
+                self._domains = tuple(domains)
+
+        finally:
+            if dir_path:
+                # If we opened the files, we close the files
+                if cla_handle:
+                    cla_handle.close()
+                if des_handle:
+                    des_handle.close()
+                if hie_handle:
+                    hie_handle.close()
+
+    def getRoot(self):
+        """Get root node."""
+        return self.getNodeBySunid(0)
+
+    def getDomainBySid(self, sid):
+        """Return a domain from its sid."""
+        if sid in self._sidDict:
+            return self._sidDict[sid]
+        if self.db_handle:
+            self.getDomainFromSQL(sid=sid)
+            if sid in self._sidDict:
+                return self._sidDict[sid]
+        else:
+            return None
+
+    def getNodeBySunid(self, sunid):
+        """Return a node from its sunid."""
+        if sunid in self._sunidDict:
+            return self._sunidDict[sunid]
+        if self.db_handle:
+            self.getDomainFromSQL(sunid=sunid)
+            if sunid in self._sunidDict:
+                return self._sunidDict[sunid]
+        else:
+            return None
+
+    def getDomains(self):
+        """Return an ordered tuple of all SCOP Domains."""
+        if self.db_handle:
+            return self.getRoot().getDescendents("px")
+        else:
+            return self._domains
+
+    def write_hie(self, handle):
+        """Build an HIE SCOP parsable file from this object."""
+        # We order nodes to ease comparison with original file
+        for n in sorted(self._sunidDict.values(), key=lambda n: n.sunid):
+            handle.write(str(n.toHieRecord()))
+
+    def write_des(self, handle):
+        """Build a DES SCOP parsable file from this object."""
+        # Original SCOP file is not ordered?
+        for n in sorted(self._sunidDict.values(), key=lambda n: n.sunid):
+            if n != self.root:
+                handle.write(str(n.toDesRecord()))
+
+    def write_cla(self, handle):
+        """Build a CLA SCOP parsable file from this object."""
+        # We order nodes to ease comparison with original file
+        for n in sorted(self._sidDict.values(), key=lambda n: n.sunid):
+            handle.write(str(n.toClaRecord()))
+
+    def getDomainFromSQL(self, sunid=None, sid=None):
+        """Load a node from the SQL backend using sunid or sid."""
+        if sunid is None and sid is None:
+            return None
+
+        cur = self.db_handle.cursor()
+
+        if sid:
+            cur.execute("SELECT sunid FROM cla WHERE sid=%s", sid)
+            res = cur.fetchone()
+            if res is None:
+                return None
+            sunid = res[0]
+
+        cur.execute("SELECT * FROM des WHERE sunid=%s", sunid)
+        data = cur.fetchone()
+
+        if data is not None:
+            n = None
+
+            # determine if Node or Domain
+            if data[1] != "px":
+                n = Node(scop=self)
+
+                cur.execute("SELECT child FROM hie WHERE parent=%s", sunid)
+                children = []
+                for c in cur.fetchall():
+                    children.append(c[0])
+                n.children = children
+            else:
+                n = Domain(scop=self)
+                cur.execute(
+                    "select sid, residues, pdbid from cla where sunid=%s", sunid
+                )
+
+                n.sid, n.residues, pdbid = cur.fetchone()
+                n.residues = Residues.Residues(n.residues)
+                n.residues.pdbid = pdbid
+                self._sidDict[n.sid] = n
+
+            n.sunid, n.type, n.sccs, n.description = data
+
+            if data[1] != "ro":
+                cur.execute("SELECT parent FROM hie WHERE child=%s", sunid)
+                n.parent = cur.fetchone()[0]
+
+            n.sunid = int(n.sunid)
+
+            self._sunidDict[n.sunid] = n
+
+    def getAscendentFromSQL(self, node, type):
+        """Get ascendents using SQL backend."""
+        if nodeCodeOrder.index(type) >= nodeCodeOrder.index(node.type):
+            return None
+
+        cur = self.db_handle.cursor()
+        cur.execute(
+            "SELECT " + type + " from cla WHERE " + node.type + "=%s", (node.sunid)
+        )
+        result = cur.fetchone()
+        if result is not None:
+            return self.getNodeBySunid(result[0])
+        else:
+            return None
+
+    def getDescendentsFromSQL(self, node, type):
+        """Get descendents of a node using the database backend.
+
+        This avoids repeated iteration of SQL calls and is therefore much
+        quicker than repeatedly calling node.getChildren().
+        """
+        if nodeCodeOrder.index(type) <= nodeCodeOrder.index(node.type):
+            return []
+
+        des_list = []
+
+        # SQL cla table knows nothing about 'ro'
+        if node.type == "ro":
+            for c in node.getChildren():
+                for d in self.getDescendentsFromSQL(c, type):
+                    des_list.append(d)
+            return des_list
+
+        cur = self.db_handle.cursor()
+
+        if type != "px":
+            cur.execute(
+                "SELECT DISTINCT des.sunid,des.type,des.sccs,description FROM "
+                "cla,des WHERE cla." + node.type + "=%s AND cla." + type + "=des.sunid",
+                (node.sunid),
+            )
+            data = cur.fetchall()
+            for d in data:
+                if int(d[0]) not in self._sunidDict:
+                    n = Node(scop=self)
+                    n.sunid, n.type, n.sccs, n.description = d
+                    n.sunid = int(n.sunid)
+                    self._sunidDict[n.sunid] = n
+
+                    cur.execute("SELECT parent FROM hie WHERE child=%s", n.sunid)
+                    n.parent = cur.fetchone()[0]
+
+                    cur.execute("SELECT child FROM hie WHERE parent=%s", n.sunid)
+                    children = []
+                    for c in cur.fetchall():
+                        children.append(c[0])
+                    n.children = children
+
+                des_list.append(self._sunidDict[int(d[0])])
+
+        else:
+            cur.execute(
+                "SELECT cla.sunid,sid,pdbid,residues,cla.sccs,type,description,sp "
+                "FROM cla,des where cla.sunid=des.sunid and cla." + node.type + "=%s",
+                node.sunid,
+            )
+
+            data = cur.fetchall()
+            for d in data:
+                if int(d[0]) not in self._sunidDict:
+                    n = Domain(scop=self)
+                    (
+                        n.sunid,
+                        n.sid,
+                        pdbid,
+                        n.residues,
+                        n.sccs,
+                        n.type,
+                        n.description,
+                        n.parent,
+                    ) = d[0:8]
+                    n.residues = Residues.Residues(n.residues)
+                    n.residues.pdbid = pdbid
+                    n.sunid = int(n.sunid)
+                    self._sunidDict[n.sunid] = n
+                    self._sidDict[n.sid] = n
+
+                des_list.append(self._sunidDict[int(d[0])])
+
+        return des_list
+
+    def write_hie_sql(self, handle):
+        """Write HIE data to SQL database."""
+        cur = handle.cursor()
+
+        cur.execute("DROP TABLE IF EXISTS hie")
+        cur.execute(
+            "CREATE TABLE hie (parent INT, child INT, PRIMARY KEY (child), "
+            "INDEX (parent) )"
+        )
+
+        for p in self._sunidDict.values():
+            for c in p.children:
+                cur.execute("INSERT INTO hie VALUES (%s,%s)" % (p.sunid, c.sunid))
+
+    def write_cla_sql(self, handle):
+        """Write CLA data to SQL database."""
+        cur = handle.cursor()
+
+        cur.execute("DROP TABLE IF EXISTS cla")
+        cur.execute(
+            "CREATE TABLE cla (sunid INT, sid CHAR(8), pdbid CHAR(4), "
+            "residues VARCHAR(50), sccs CHAR(10), cl INT, cf INT, sf INT, fa INT, "
+            "dm INT, sp INT, px INT, PRIMARY KEY (sunid), INDEX (SID) )"
+        )
+
+        for n in self._sidDict.values():
+            c = n.toClaRecord()
+            cur.execute(
+                "INSERT INTO cla VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
+                (
+                    n.sunid,
+                    n.sid,
+                    c.residues.pdbid,
+                    c.residues,
+                    n.sccs,
+                    n.getAscendent("cl").sunid,
+                    n.getAscendent("cf").sunid,
+                    n.getAscendent("sf").sunid,
+                    n.getAscendent("fa").sunid,
+                    n.getAscendent("dm").sunid,
+                    n.getAscendent("sp").sunid,
+                    n.sunid,
+                ),
+            )
+
+    def write_des_sql(self, handle):
+        """Write DES data to SQL database."""
+        cur = handle.cursor()
+
+        cur.execute("DROP TABLE IF EXISTS des")
+        cur.execute(
+            "CREATE TABLE des (sunid INT, type CHAR(2), sccs CHAR(10), "
+            "description VARCHAR(255), PRIMARY KEY (sunid) )"
+        )
+
+        for n in self._sunidDict.values():
+            cur.execute(
+                "INSERT INTO des VALUES (%s,%s,%s,%s)",
+                (n.sunid, n.type, n.sccs, n.description),
+            )
+
+
+class Node:
+    """A node in the Scop hierarchy.
+
+    Attributes:
+     - sunid  -- SCOP unique identifiers. e.g. '14986'
+     - parent -- The parent node
+     - children -- A list of child nodes
+     - sccs     -- SCOP concise classification string. e.g. 'a.1.1.2'
+     - type     -- A 2 letter node type code. e.g. 'px' for domains
+     - description -- Description text.
+
+    """
+
+    def __init__(self, scop=None):
+        """Initialize a Node in the scop hierarchy.
+
+        If a Scop instance is provided to the constructor, this will be used
+        to lookup related references using the SQL methods.  If no instance
+        is provided, it is assumed the whole tree exists and is connected.
+        """
+        self.sunid = ""
+        self.parent = None
+        self.children = []
+        self.sccs = ""
+        self.type = ""
+        self.description = ""
+        self.scop = scop
+
+    def __str__(self):
+        """Represent the node as a string."""
+        s = []
+        s.append(str(self.sunid))
+        s.append(self.sccs)
+        s.append(self.type)
+        s.append(self.description)
+
+        return " ".join(s)
+
+    def toHieRecord(self):
+        """Return an Hie.Record."""
+        rec = Hie.Record()
+        rec.sunid = str(self.sunid)
+        if self.getParent():  # Not root node
+            rec.parent = str(self.getParent().sunid)
+        else:
+            rec.parent = "-"
+        for c in self.getChildren():
+            rec.children.append(str(c.sunid))
+        return rec
+
+    def toDesRecord(self):
+        """Return a Des.Record."""
+        rec = Des.Record()
+        rec.sunid = str(self.sunid)
+        rec.nodetype = self.type
+        rec.sccs = self.sccs
+        rec.description = self.description
+        return rec
+
+    def getChildren(self):
+        """Return a list of children of this Node."""
+        if self.scop is None:
+            return self.children
+        else:
+            return [self.scop.getNodeBySunid(x) for x in self.children]
+
+    def getParent(self):
+        """Return the parent of this Node."""
+        if self.scop is None:
+            return self.parent
+        else:
+            return self.scop.getNodeBySunid(self.parent)
+
+    def getDescendents(self, node_type):
+        """Return a list of all descendant nodes of the given type.
+
+        Node type can be a two letter code or longer description,
+        e.g. 'fa' or 'family'.
+        """
+        if node_type in _nodetype_to_code:
+            node_type = _nodetype_to_code[node_type]
+
+        nodes = [self]
+        if self.scop:
+            return self.scop.getDescendentsFromSQL(self, node_type)
+        while nodes[0].type != node_type:
+            if nodes[0].type == "px":
+                return []  # Fell of the bottom of the hierarchy
+            child_list = []
+            for n in nodes:
+                for child in n.getChildren():
+                    child_list.append(child)
+                nodes = child_list
+
+        return nodes
+
+    def getAscendent(self, node_type):
+        """Return the ancenstor node of the given type, or None.
+
+        Node type can be a two letter code or longer description,
+        e.g. 'fa' or 'family'.
+        """
+        if node_type in _nodetype_to_code:
+            node_type = _nodetype_to_code[node_type]
+
+        if self.scop:
+            return self.scop.getAscendentFromSQL(self, node_type)
+        else:
+            n = self
+            if n.type == node_type:
+                return None
+
+            while n.type != node_type:
+                if n.type == "ro":
+                    return None  # Fell of the top of the hierarchy
+                n = n.getParent()
+
+            return n
+
+
+class Domain(Node):
+    """A SCOP domain. A leaf node in the Scop hierarchy.
+
+    Attributes:
+        - sid - The SCOP domain identifier. e.g. ``"d5hbib_"``
+        - residues - A Residue object. It defines the collection of PDB
+          atoms that make up this domain.
+
+    """
+
+    def __init__(self, scop=None):
+        """Initialize a SCOP Domain object."""
+        Node.__init__(self, scop=scop)
+        self.sid = ""
+        self.residues = None
+
+    def __str__(self):
+        """Represent the SCOP Domain as a string."""
+        s = []
+        s.append(self.sid)
+        s.append(self.sccs)
+        s.append("(" + str(self.residues) + ")")
+
+        if not self.getParent():
+            s.append(self.description)
+        else:
+            sp = self.getParent()
+            dm = sp.getParent()
+            s.append(dm.description)
+            s.append("{" + sp.description + "}")
+
+        return " ".join(s)
+
+    def toDesRecord(self):
+        """Return a Des.Record."""
+        rec = Node.toDesRecord(self)
+        rec.name = self.sid
+        return rec
+
+    def toClaRecord(self):
+        """Return a Cla.Record."""
+        rec = Cla.Record()
+        rec.sid = self.sid
+        rec.residues = self.residues
+        rec.sccs = self.sccs
+        rec.sunid = self.sunid
+
+        n = self
+        while n.sunid != 0:  # Not root node
+            rec.hierarchy[n.type] = str(n.sunid)
+            n = n.getParent()
+
+        # Order does not matter in the hierarchy field. For more info, see
+        # http://scop.mrc-lmb.cam.ac.uk/legacy/release-notes.html
+        # rec.hierarchy.reverse()
+
+        return rec
+
+
+class Astral:
+    """Representation of the ASTRAL database.
+
+    Abstraction of the ASTRAL database, which has sequences for all the SCOP domains,
+    as well as clusterings by percent id or evalue.
+    """
+
+    def __init__(
+        self, dir_path=None, version=None, scop=None, astral_file=None, db_handle=None
+    ):
+        """Initialize the astral database.
+
+        You must provide either a directory of SCOP files:
+            - dir_path - string, the path to location of the scopseq-x.xx directory
+                       (not the directory itself), and
+            - version   -a version number.
+
+        or, a FASTA file:
+            - astral_file - string, a path to a fasta file (which will be loaded in memory)
+
+        or, a MYSQL database:
+            - db_handle - a database handle for a MYSQL database containing a table
+              'astral' with the astral data in it.  This can be created
+              using writeToSQL.
+
+        """
+        if astral_file is None and dir_path is None and db_handle is None:
+            raise RuntimeError(
+                "Need either file handle, or (dir_path + version), "
+                "or database handle to construct Astral"
+            )
+        if not scop:
+            raise RuntimeError("Must provide a Scop instance to construct")
+
+        self.scop = scop
+        self.db_handle = db_handle
+
+        if not astral_file and not db_handle:
+            if dir_path is None or version is None:
+                raise RuntimeError("must provide dir_path and version")
+
+            self.version = version
+            self.path = os.path.join(dir_path, "scopseq-%s" % version)
+            astral_file = "astral-scopdom-seqres-all-%s.fa" % self.version
+            astral_file = os.path.join(self.path, astral_file)
+
+        if astral_file:
+            # Build a dictionary of SeqRecord objects in the FASTA file, IN MEMORY
+            self.fasta_dict = SeqIO.to_dict(SeqIO.parse(astral_file, "fasta"))
+
+        self.astral_file = astral_file
+        self.EvDatasets = {}
+        self.EvDatahash = {}
+        self.IdDatasets = {}
+        self.IdDatahash = {}
+
+    def domainsClusteredByEv(self, id):
+        """Get domains clustered by evalue."""
+        if id not in self.EvDatasets:
+            if self.db_handle:
+                self.EvDatasets[id] = self.getAstralDomainsFromSQL(astralEv_to_sql[id])
+
+            else:
+                if not self.path:
+                    raise RuntimeError("No scopseq directory specified")
+
+                file_prefix = "astral-scopdom-seqres-sel-gs"
+                filename = "%s-e100m-%s-%s.id" % (
+                    file_prefix,
+                    astralEv_to_file[id],
+                    self.version,
+                )
+                filename = os.path.join(self.path, filename)
+                self.EvDatasets[id] = self.getAstralDomainsFromFile(filename)
+        return self.EvDatasets[id]
+
+    def domainsClusteredById(self, id):
+        """Get domains clustered by percentage identity."""
+        if id not in self.IdDatasets:
+            if self.db_handle:
+                self.IdDatasets[id] = self.getAstralDomainsFromSQL("id" + str(id))
+            else:
+                if not self.path:
+                    raise RuntimeError("No scopseq directory specified")
+
+                file_prefix = "astral-scopdom-seqres-sel-gs"
+                filename = "%s-bib-%s-%s.id" % (file_prefix, id, self.version)
+                filename = os.path.join(self.path, filename)
+                self.IdDatasets[id] = self.getAstralDomainsFromFile(filename)
+        return self.IdDatasets[id]
+
+    def getAstralDomainsFromFile(self, filename=None, file_handle=None):
+        """Get the scop domains from a file containing a list of sids."""
+        if file_handle is None and filename is None:
+            raise RuntimeError("You must provide a filename or handle")
+        if not file_handle:
+            file_handle = open(filename)
+        doms = []
+        while True:
+            line = file_handle.readline()
+            if not line:
+                break
+            line = line.rstrip()
+            doms.append(line)
+        if filename:
+            file_handle.close()
+
+        doms = [a for a in doms if a[0] == "d"]
+        doms = [self.scop.getDomainBySid(x) for x in doms]
+        return doms
+
+    def getAstralDomainsFromSQL(self, column):
+        """Load ASTRAL domains from the MySQL database.
+
+        Load a set of astral domains from a column in the astral table of a MYSQL
+        database (which can be created with writeToSQL(...).
+        """
+        cur = self.db_handle.cursor()
+        cur.execute("SELECT sid FROM astral WHERE " + column + "=1")
+        data = cur.fetchall()
+        data = [self.scop.getDomainBySid(x[0]) for x in data]
+
+        return data
+
+    def getSeqBySid(self, domain):
+        """Get the seq record of a given domain from its sid."""
+        if self.db_handle is None:
+            return self.fasta_dict[domain].seq
+        else:
+            cur = self.db_handle.cursor()
+            cur.execute("SELECT seq FROM astral WHERE sid=%s", domain)
+            return Seq(cur.fetchone()[0])
+
+    def getSeq(self, domain):
+        """Return seq associated with domain."""
+        return self.getSeqBySid(domain.sid)
+
+    def hashedDomainsById(self, id):
+        """Get domains clustered by sequence identity in a dict."""
+        if id not in self.IdDatahash:
+            self.IdDatahash[id] = {}
+            for d in self.domainsClusteredById(id):
+                self.IdDatahash[id][d] = 1
+        return self.IdDatahash[id]
+
+    def hashedDomainsByEv(self, id):
+        """Get domains clustered by evalue in a dict."""
+        if id not in self.EvDatahash:
+            self.EvDatahash[id] = {}
+            for d in self.domainsClusteredByEv(id):
+                self.EvDatahash[id][d] = 1
+        return self.EvDatahash[id]
+
+    def isDomainInId(self, dom, id):
+        """Return true if the domain is in the astral clusters for percent ID."""
+        return dom in self.hashedDomainsById(id)
+
+    def isDomainInEv(self, dom, id):
+        """Return true if the domain is in the ASTRAL clusters for evalues."""
+        return dom in self.hashedDomainsByEv(id)
+
+    def writeToSQL(self, db_handle):
+        """Write the ASTRAL database to a MYSQL database."""
+        cur = db_handle.cursor()
+
+        cur.execute("DROP TABLE IF EXISTS astral")
+        cur.execute("CREATE TABLE astral (sid CHAR(8), seq TEXT, PRIMARY KEY (sid))")
+
+        for dom in self.fasta_dict:
+            cur.execute(
+                "INSERT INTO astral (sid,seq) values (%s,%s)",
+                (dom, self.fasta_dict[dom].seq.data),
+            )
+
+        for i in astralBibIds:
+            cur.execute("ALTER TABLE astral ADD (id" + str(i) + " TINYINT)")
+
+            for d in self.domainsClusteredById(i):
+                cur.execute("UPDATE astral SET id" + str(i) + "=1  WHERE sid=%s", d.sid)
+
+        for ev in astralEvs:
+            cur.execute("ALTER TABLE astral ADD (" + astralEv_to_sql[ev] + " TINYINT)")
+
+            for d in self.domainsClusteredByEv(ev):
+                cur.execute(
+                    "UPDATE astral SET " + astralEv_to_sql[ev] + "=1  WHERE sid=%s",
+                    d.sid,
+                )
+
+
+def search(
+    pdb=None,
+    key=None,
+    sid=None,
+    disp=None,
+    dir=None,
+    loc=None,
+    cgi="http://scop.mrc-lmb.cam.ac.uk/legacy/search.cgi",
+    **keywds
+):
+    """Access SCOP search and return a handle to the results.
+
+    Access search.cgi and return a handle to the results.  See the
+    online help file for an explanation of the parameters:
+    http://scop.mrc-lmb.cam.ac.uk/legacy/help.html
+
+    Raises an IOError if there's a network error.
+
+    """
+    params = {"pdb": pdb, "key": key, "sid": sid, "disp": disp, "dir": dir, "loc": loc}
+    variables = {}
+    for k, v in params.items():
+        if v is not None:
+            variables[k] = v
+    variables.update(keywds)
+    return _open(cgi, variables)
+
+
+def _open(cgi, params=None, get=1):
+    """Open a handle to SCOP and return it (PRIVATE).
+
+    Open a handle to SCOP.  cgi is the URL for the cgi script to access.
+    params is a dictionary with the options to pass to it.  get is a boolean
+    that describes whether a GET should be used.
+
+    """
+    # Open a handle to SCOP.
+    if params is None:
+        params = {}
+    options = urlencode(params)
+    if get:  # do a GET
+        if options:
+            cgi += "?" + options
+        handle = urlopen(cgi)
+    else:  # do a POST
+        handle = urlopen(cgi, data=options)
+    return handle
diff --git a/code/lib/Bio/SCOP/__pycache__/Cla.cpython-37.pyc b/code/lib/Bio/SCOP/__pycache__/Cla.cpython-37.pyc
new file mode 100644
index 0000000..a5b3e24
Binary files /dev/null and b/code/lib/Bio/SCOP/__pycache__/Cla.cpython-37.pyc differ
diff --git a/code/lib/Bio/SCOP/__pycache__/Des.cpython-37.pyc b/code/lib/Bio/SCOP/__pycache__/Des.cpython-37.pyc
new file mode 100644
index 0000000..abd3b34
Binary files /dev/null and b/code/lib/Bio/SCOP/__pycache__/Des.cpython-37.pyc differ
diff --git a/code/lib/Bio/SCOP/__pycache__/Dom.cpython-37.pyc b/code/lib/Bio/SCOP/__pycache__/Dom.cpython-37.pyc
new file mode 100644
index 0000000..73ade57
Binary files /dev/null and b/code/lib/Bio/SCOP/__pycache__/Dom.cpython-37.pyc differ
diff --git a/code/lib/Bio/SCOP/__pycache__/Hie.cpython-37.pyc b/code/lib/Bio/SCOP/__pycache__/Hie.cpython-37.pyc
new file mode 100644
index 0000000..d35c779
Binary files /dev/null and b/code/lib/Bio/SCOP/__pycache__/Hie.cpython-37.pyc differ
diff --git a/code/lib/Bio/SCOP/__pycache__/Raf.cpython-37.pyc b/code/lib/Bio/SCOP/__pycache__/Raf.cpython-37.pyc
new file mode 100644
index 0000000..64b7130
Binary files /dev/null and b/code/lib/Bio/SCOP/__pycache__/Raf.cpython-37.pyc differ
diff --git a/code/lib/Bio/SCOP/__pycache__/Residues.cpython-37.pyc b/code/lib/Bio/SCOP/__pycache__/Residues.cpython-37.pyc
new file mode 100644
index 0000000..2b99c2d
Binary files /dev/null and b/code/lib/Bio/SCOP/__pycache__/Residues.cpython-37.pyc differ
diff --git a/code/lib/Bio/SCOP/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SCOP/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..1fcd87b
Binary files /dev/null and b/code/lib/Bio/SCOP/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SVDSuperimposer/__init__.py b/code/lib/Bio/SVDSuperimposer/__init__.py
new file mode 100644
index 0000000..0063aee
--- /dev/null
+++ b/code/lib/Bio/SVDSuperimposer/__init__.py
@@ -0,0 +1,201 @@
+# Copyright (C) 2002, Thomas Hamelryck (thamelry@vub.ac.be)
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Align on protein structure onto another using SVD alignment.
+
+SVDSuperimposer finds the best rotation and translation to put
+two point sets on top of each other (minimizing the RMSD). This is
+eg. useful to superimpose crystal structures. SVD stands for singular
+value decomposition, which is used in the algorithm.
+"""
+
+
+try:
+    from numpy import dot, transpose, sqrt
+    from numpy.linalg import svd, det
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Install NumPy if you want to use Bio.SVDSuperimposer."
+    )
+
+
+class SVDSuperimposer:
+    """Class to run SVD alignment.
+
+    SVDSuperimposer finds the best rotation and translation to put
+    two point sets on top of each other (minimizing the RMSD). This is
+    eg. useful to superimpose crystal structures.
+
+    SVD stands for Singular Value Decomposition, which is used to calculate
+    the superposition.
+
+    Reference:
+
+    Matrix computations, 2nd ed. Golub, G. & Van Loan, CF., The Johns
+    Hopkins University Press, Baltimore, 1989
+
+    start with two coordinate sets (Nx3 arrays - float)
+
+    >>> from Bio.SVDSuperimposer import SVDSuperimposer
+    >>> from numpy import array, dot, set_printoptions
+    >>>
+    >>> x = array([[51.65, -1.90, 50.07],
+    ...      [50.40, -1.23, 50.65],
+    ...      [50.68, -0.04, 51.54],
+    ...      [50.22, -0.02, 52.85]], 'f')
+    >>>
+    >>> y = array([[51.30, -2.99, 46.54],
+    ...      [51.09, -1.88, 47.58],
+    ...      [52.36, -1.20, 48.03],
+    ...      [52.71, -1.18, 49.38]], 'f')
+
+    start
+
+    >>> sup = SVDSuperimposer()
+
+    set the coords y will be rotated and translated on x
+
+    >>> sup.set(x, y)
+
+    do the lsq fit
+
+    >>> sup.run()
+
+    get the rmsd
+
+    >>> rms = sup.get_rms()
+
+    get rotation (right multiplying!) and the translation
+
+    >>> rot, tran = sup.get_rotran()
+
+    rotate y on x
+
+    >>> y_on_x1 = dot(y, rot) + tran
+
+    same thing
+
+    >>> y_on_x2 = sup.get_transformed()
+
+    >>> set_printoptions(precision=2)
+    >>> print(y_on_x1)
+    [[  5.17e+01  -1.90e+00   5.01e+01]
+     [  5.04e+01  -1.23e+00   5.06e+01]
+     [  5.07e+01  -4.16e-02   5.15e+01]
+     [  5.02e+01  -1.94e-02   5.29e+01]]
+    >>> print(y_on_x2)
+    [[  5.17e+01  -1.90e+00   5.01e+01]
+     [  5.04e+01  -1.23e+00   5.06e+01]
+     [  5.07e+01  -4.16e-02   5.15e+01]
+     [  5.02e+01  -1.94e-02   5.29e+01]]
+    >>> print("%.2f" % rms)
+    0.00
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self._clear()
+
+    # Private methods
+
+    def _clear(self):
+        self.reference_coords = None
+        self.coords = None
+        self.transformed_coords = None
+        self.rot = None
+        self.tran = None
+        self.rms = None
+        self.init_rms = None
+
+    def _rms(self, coords1, coords2):
+        """Return rms deviations between coords1 and coords2 (PRIVATE)."""
+        diff = coords1 - coords2
+        return sqrt(sum(sum(diff * diff)) / coords1.shape[0])
+
+    # Public methods
+
+    def set(self, reference_coords, coords):
+        """Set the coordinates to be superimposed.
+
+        coords will be put on top of reference_coords.
+
+        - reference_coords: an NxDIM array
+        - coords: an NxDIM array
+
+        DIM is the dimension of the points, N is the number
+        of points to be superimposed.
+        """
+        # clear everything from previous runs
+        self._clear()
+        # store cordinates
+        self.reference_coords = reference_coords
+        self.coords = coords
+        n = reference_coords.shape
+        m = coords.shape
+        if n != m or not (n[1] == m[1] == 3):
+            raise Exception("Coordinate number/dimension mismatch.")
+        self.n = n[0]
+
+    def run(self):
+        """Superimpose the coordinate sets."""
+        if self.coords is None or self.reference_coords is None:
+            raise Exception("No coordinates set.")
+        coords = self.coords
+        reference_coords = self.reference_coords
+        # center on centroid
+        av1 = sum(coords) / self.n
+        av2 = sum(reference_coords) / self.n
+        coords = coords - av1
+        reference_coords = reference_coords - av2
+        # correlation matrix
+        a = dot(transpose(coords), reference_coords)
+        u, d, vt = svd(a)
+        self.rot = transpose(dot(transpose(vt), transpose(u)))
+        # check if we have found a reflection
+        if det(self.rot) < 0:
+            vt[2] = -vt[2]
+            self.rot = transpose(dot(transpose(vt), transpose(u)))
+        self.tran = av2 - dot(av1, self.rot)
+
+    def get_transformed(self):
+        """Get the transformed coordinate set."""
+        if self.coords is None or self.reference_coords is None:
+            raise Exception("No coordinates set.")
+        if self.rot is None:
+            raise Exception("Nothing superimposed yet.")
+        if self.transformed_coords is None:
+            self.transformed_coords = dot(self.coords, self.rot) + self.tran
+        return self.transformed_coords
+
+    def get_rotran(self):
+        """Right multiplying rotation matrix and translation."""
+        if self.rot is None:
+            raise Exception("Nothing superimposed yet.")
+        return self.rot, self.tran
+
+    def get_init_rms(self):
+        """Root mean square deviation of untransformed coordinates."""
+        if self.coords is None:
+            raise Exception("No coordinates set yet.")
+        if self.init_rms is None:
+            self.init_rms = self._rms(self.coords, self.reference_coords)
+        return self.init_rms
+
+    def get_rms(self):
+        """Root mean square deviation of superimposed coordinates."""
+        if self.rms is None:
+            transformed_coords = self.get_transformed()
+            self.rms = self._rms(transformed_coords, self.reference_coords)
+        return self.rms
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/SVDSuperimposer/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SVDSuperimposer/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..1b30f9c
Binary files /dev/null and b/code/lib/Bio/SVDSuperimposer/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/BlastIO/__init__.py b/code/lib/Bio/SearchIO/BlastIO/__init__.py
new file mode 100644
index 0000000..56798b0
--- /dev/null
+++ b/code/lib/Bio/SearchIO/BlastIO/__init__.py
@@ -0,0 +1,419 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Bio.SearchIO support for BLAST+ output formats.
+
+This module adds support for parsing BLAST+ outputs. BLAST+ is a rewrite of
+NCBI's legacy BLAST (Basic Local Alignment Search Tool), based on the NCBI
+C++ toolkit. The BLAST+ suite is available as command line programs or on
+NCBI's web page.
+
+Bio.SearchIO.BlastIO was tested on the following BLAST+ flavors and versions:
+
+    - flavors: blastn, blastp, blastx, tblastn, tblastx
+    - versions: 2.2.22+, 2.2.26+
+
+You should also be able to parse outputs from a local BLAST+ search or from
+NCBI's web interface. Although the module was not tested against all BLAST+,
+it should still be able to parse these other versions' outputs. Please submit
+a bug report if you stumble upon an unparseable file.
+
+Some output formats from the BLAST legacy suite (BLAST+'s predecessor) may
+still be parsed by this module. However, results are not guaranteed. You may
+try to use the Bio.Blast module to parse them instead.
+
+More information about BLAST are available through these links:
+  - Publication: http://www.biomedcentral.com/1471-2105/10/421
+  - Web interface: http://blast.ncbi.nlm.nih.gov/
+  - User guide: http://www.ncbi.nlm.nih.gov/books/NBK1762/
+
+
+Supported Formats
+=================
+
+Bio.SearchIO.BlastIO supports the following BLAST+ output formats:
+
+  - XML        - 'blast-xml'  - parsing, indexing, writing
+  - Tabular    - 'blast-tab'  - parsing, indexing, writing
+  - Plain text - 'blast-text' - parsing
+
+
+blast-xml
+=========
+
+The blast-xml parser follows the BLAST XML DTD written here:
+http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.mod.dtd
+
+It provides the following attributes for each SearchIO object:
+
++----------------+-------------------------+-----------------------------+
+| Object         | Attribute               | XML Element                 |
++================+=========================+=============================+
+| QueryResult    | target                  | BlastOutput_db              |
+|                +-------------------------+-----------------------------+
+|                | program                 | BlastOutput_program         |
+|                +-------------------------+-----------------------------+
+|                | reference               | BlastOutput_reference       |
+|                +-------------------------+-----------------------------+
+|                | version                 | BlastOutput_version [*]_    |
+|                +-------------------------+-----------------------------+
+|                | description             | Iteration_query-def         |
+|                +-------------------------+-----------------------------+
+|                | id                      | Iteration_query-ID          |
+|                +-------------------------+-----------------------------+
+|                | seq_len                 | Iteration_query-len         |
+|                +-------------------------+-----------------------------+
+|                | param_evalue_threshold  | Parameters_expect           |
+|                +-------------------------+-----------------------------+
+|                | param_entrez_query      | Parameters_entrez-query     |
+|                +-------------------------+-----------------------------+
+|                | param_filter            | Parameters_filter           |
+|                +-------------------------+-----------------------------+
+|                | param_gap_extend        | Parameters_gap-extend       |
+|                +-------------------------+-----------------------------+
+|                | param_gap_open          | Parameters_gap-open         |
+|                +-------------------------+-----------------------------+
+|                | param_include           | Parameters_include          |
+|                +-------------------------+-----------------------------+
+|                | param_matrix            | Parameters_matrix           |
+|                +-------------------------+-----------------------------+
+|                | param_pattern           | Parameters_pattern          |
+|                +-------------------------+-----------------------------+
+|                | param_score_match       | Parameters_sc-match         |
+|                +-------------------------+-----------------------------+
+|                | param_score_mismatch    | Parameters_sc-mismatch      |
+|                +-------------------------+-----------------------------+
+|                | stat_db_num             | Statistics_db-num           |
+|                +-------------------------+-----------------------------+
+|                | stat_db_len             | Statistics_db-len           |
+|                +-------------------------+-----------------------------+
+|                | stat_eff_space          | Statistics_eff-space        |
+|                +-------------------------+-----------------------------+
+|                | stat_entropy            | Statistics_entropy          |
+|                +-------------------------+-----------------------------+
+|                | stat_hsp_len            | Statistics_hsp-len          |
+|                +-------------------------+-----------------------------+
+|                | stat_kappa              | Statistics_kappa            |
+|                +-------------------------+-----------------------------+
+|                | stat_lambda             | Statistics_lambda           |
++----------------+-------------------------+-----------------------------+
+| Hit            | accession               | Hit_accession               |
+|                +-------------------------+-----------------------------+
+|                | description             | Hit_def                     |
+|                +-------------------------+-----------------------------+
+|                | id                      | Hit_id                      |
+|                +-------------------------+-----------------------------+
+|                | seq_len                 | Hit_len                     |
++----------------+-------------------------+-----------------------------+
+| HSP            | bitscore                | Hsp_bit-score               |
+|                +-------------------------+-----------------------------+
+|                | density                 | Hsp_density                 |
+|                +-------------------------+-----------------------------+
+|                | evalue                  | Hsp_evalue                  |
+|                +-------------------------+-----------------------------+
+|                | gap_num                 | Hsp_gaps                    |
+|                +-------------------------+-----------------------------+
+|                | ident_num               | Hsp_identity                |
+|                +-------------------------+-----------------------------+
+|                | pos_num                 | Hsp_positive                |
+|                +-------------------------+-----------------------------+
+|                | bitscore_raw            | Hsp_score                   |
++----------------+-------------------------+-----------------------------+
+| HSPFragment    | aln_span                | Hsp_align-len               |
+| (also via      +-------------------------+-----------------------------+
+| HSP)           | hit_frame               | Hsp_hit-frame               |
+|                +-------------------------+-----------------------------+
+|                | hit_start               | Hsp_hit-from                |
+|                +-------------------------+-----------------------------+
+|                | hit_end                 | Hsp_hit-to                  |
+|                +-------------------------+-----------------------------+
+|                | hit                     | Hsp_hseq                    |
+|                +-------------------------+-----------------------------+
+|                | aln_annotation          | Hsp_midline                 |
+|                +-------------------------+-----------------------------+
+|                | pattern_start           | Hsp_pattern-from            |
+|                +-------------------------+-----------------------------+
+|                | pattern_end             | Hsp_pattern-to              |
+|                +-------------------------+-----------------------------+
+|                | query_frame             | Hsp_query-frame             |
+|                +-------------------------+-----------------------------+
+|                | query_start             | Hsp_query-from              |
+|                +-------------------------+-----------------------------+
+|                | query_end               | Hsp_query-to                |
+|                +-------------------------+-----------------------------+
+|                | query                   | Hsp_qseq                    |
++----------------+-------------------------+-----------------------------+
+
+You may notice that in BLAST XML files, sometimes BLAST replaces your true
+sequence ID with its own generated ID. For example, the query IDs become
+'Query_1', 'Query_2', and so on. While the hit IDs sometimes become
+'gnl|BL_ORD_ID|1', 'gnl|BL_ORD_ID|2', and so on. In these cases, BLAST lumps the
+true sequence IDs together with their descriptions.
+
+The blast-xml parser is aware of these modifications and will attempt to extract
+the true sequence IDs out of the descriptions. So when accessing QueryResult or
+Hit objects, you will use the non-BLAST-generated IDs.
+
+This behavior on the query IDs can be disabled using the 'use_raw_query_ids'
+parameter while the behavior on the hit IDs can be disabled using the
+'use_raw_hit_ids' parameter. Both are boolean values that can be supplied
+to SearchIO.read or SearchIO.parse, with the default values set to 'False'.
+
+In any case, the raw BLAST IDs can always be accessed using the query or hit
+object's 'blast_id' attribute.
+
+The blast-xml write function also accepts 'use_raw_query_ids' and
+'use_raw_hit_ids' parameters. However, note that the default values for the
+writer are set to 'True'. This is because the writer is meant to mimic native
+BLAST result as much as possible.
+
+
+blast-tab
+=========
+
+The default format for blast-tab support is the variant without comments (-m 6
+flag). Commented BLAST tabular files may be parsed, indexed, or written using
+the keyword argument 'comments' set to True:
+
+    >>> # blast-tab defaults to parsing uncommented files
+    >>> from Bio import SearchIO
+    >>> uncommented = 'Blast/tab_2226_tblastn_004.txt'
+    >>> qresult = SearchIO.read(uncommented, 'blast-tab')
+    >>> qresult
+    QueryResult(id='gi|11464971:4-101', 5 hits)
+
+    >>> # set the keyword argument to parse commented files
+    >>> commented = 'Blast/tab_2226_tblastn_008.txt'
+    >>> qresult = SearchIO.read(commented, 'blast-tab', comments=True)
+    >>> qresult
+    QueryResult(id='gi|11464971:4-101', 5 hits)
+
+For uncommented files, the parser defaults to using BLAST's default column
+ordering: 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send
+evalue bitscore'.
+
+If you want to parse an uncommented file with a customized column order, you can
+use the 'fields' keyword argument to pass the custom column order. The names of
+the column follow BLAST's naming. For example, 'qseqid' is the column for the
+query sequence ID. These names may be passed either as a Python list or as a
+space-separated strings.
+
+    >>> # pass the custom column names as a Python list
+    >>> fname = 'Blast/tab_2226_tblastn_009.txt'
+    >>> custom_fields = ['qseqid', 'sseqid']
+    >>> qresult = next(SearchIO.parse(fname, 'blast-tab', fields=custom_fields))
+    >>> qresult
+    QueryResult(id='gi|16080617|ref|NP_391444.1|', 3 hits)
+
+    >>> # pass the custom column names as a space-separated string
+    >>> fname = 'Blast/tab_2226_tblastn_009.txt'
+    >>> custom_fields = 'qseqid sseqid'
+    >>> qresult = next(SearchIO.parse(fname, 'blast-tab', fields=custom_fields))
+    >>> qresult
+    QueryResult(id='gi|16080617|ref|NP_391444.1|', 3 hits)
+
+You may also use the 'std' field name as an alias to BLAST's default 12 columns,
+just like when you run a command line BLAST search.
+
+Note that the 'fields' keyword argument will be ignored if the parsed file is
+commented. Commented files have their column ordering stated explicitly in the
+file, so there is no need to specify it again in SearchIO.
+
+'comments' and 'fields' keyword arguments are both applicable for parsing,
+indexing, and writing.
+
+blast-tab provides the following attributes for each SearchIO objects:
+
++-------------+-------------------+--------------+
+| Object      | Attribute         | Column name  |
++=============+===================+==============+
+| QueryResult | accession         | qacc         |
+|             +-------------------+--------------+
+|             | accession_version | qaccver      |
+|             +-------------------+--------------+
+|             | gi                | qgi          |
+|             +-------------------+--------------+
+|             | seq_len           | qlen         |
+|             +-------------------+--------------+
+|             | id                | qseqid       |
++-------------+-------------------+--------------+
+| Hit         | accession         | sacc         |
+|             +-------------------+--------------+
+|             | accession_version | sacc_ver     |
+|             +-------------------+--------------+
+|             | gi                | sgi          |
+|             +-------------------+--------------+
+|             | gi_all            | sallgi       |
+|             +-------------------+--------------+
+|             | id_all            | sallseqid    |
+|             +-------------------+--------------+
+|             | seq_len           | slen         |
+|             +-------------------+--------------+
+|             | id                | sseqid       |
++-------------+-------------------+--------------+
+| HSP         | bitscore          | bitscore     |
+|             +-------------------+--------------+
+|             | btop              | btop         |
+|             +-------------------+--------------+
+|             | evalue            | evalue       |
+|             +-------------------+--------------+
+|             | gapopen_num       | gapopen      |
+|             +-------------------+--------------+
+|             | gap_num           | gaps         |
+|             +-------------------+--------------+
+|             | ident_num         | nident       |
+|             +-------------------+--------------+
+|             | ident_pct         | pident       |
+|             +-------------------+--------------+
+|             | mismatch_num      | mismatch     |
+|             +-------------------+--------------+
+|             | pos_pct           | ppos         |
+|             +-------------------+--------------+
+|             | pos_num           | positive     |
+|             +-------------------+--------------+
+|             | bitscore_raw      | score        |
++-------------+-------------------+--------------+
+| HSPFragment | frames            | frames [*]_  |
+| (also via   +-------------------+--------------+
+| HSP)        | aln_span          | length       |
+|             +-------------------+--------------+
+|             | query_end         | qend         |
+|             +-------------------+--------------+
+|             | query_frame       | qframe       |
+|             +-------------------+--------------+
+|             | query             | qseq         |
+|             +-------------------+--------------+
+|             | query_start       | qstart       |
+|             +-------------------+--------------+
+|             | hit_end           | send         |
+|             +-------------------+--------------+
+|             | hit_frame         | sframe       |
+|             +-------------------+--------------+
+|             | hit               | sseq         |
+|             +-------------------+--------------+
+|             | hit_start         | sstart       |
++-------------+-------------------+--------------+
+
+If the parsed file is commented, the following attributes may be available as
+well:
+
++--------------+---------------+----------------------------+
+| Object       | Attribute     | Value                      |
++==============+===============+============================+
+| QueryResult  | description   | query description          |
+|              +---------------+----------------------------+
+|              | fields        | columns in the output file |
+|              +---------------+----------------------------+
+|              | program       | BLAST flavor               |
+|              +---------------+----------------------------+
+|              | rid           | remote search ID           |
+|              +---------------+----------------------------+
+|              | target        | target database            |
+|              +---------------+----------------------------+
+|              | version       | BLAST version              |
++--------------+---------------+----------------------------+
+
+
+blast-text
+==========
+The BLAST plain text output format has been known to change considerably between
+BLAST versions. NCBI itself has recommended that users not rely on the plain
+text output for parsing-related work.
+
+However, in some cases parsing the plain text output may still be useful.
+SearchIO provides parsing support for the plain text output, but guarantees only
+a minimum level of support. Writing a parser that fully supports plain text
+output for all BLAST versions is not a priority at the moment.
+
+If you do have a BLAST plain text file that can not be parsed and would like to
+submit a patch, we are more than happy to accept it.
+
+The blast-text parser provides the following object attributes:
+
++-----------------+-------------------------+----------------------------------+
+| Object          | Attribute               | Value                            |
++=================+=========================+==================================+
+| QueryResult     | description             | query sequence description       |
+|                 +-------------------------+----------------------------------+
+|                 | id                      | query sequence ID                |
+|                 +-------------------------+----------------------------------+
+|                 | program                 | BLAST flavor                     |
+|                 +-------------------------+----------------------------------+
+|                 | seq_len                 | full length of query sequence    |
+|                 +-------------------------+----------------------------------+
+|                 | target                  | target database of the search    |
+|                 +-------------------------+----------------------------------+
+|                 | version                 | BLAST version                    |
++-----------------+-------------------------+----------------------------------+
+| Hit             | evalue                  | hit-level evalue, from the hit   |
+|                 |                         | table                            |
+|                 +-------------------------+----------------------------------+
+|                 | id                      | hit sequence ID                  |
+|                 +-------------------------+----------------------------------+
+|                 | description             | hit sequence description         |
+|                 +-------------------------+----------------------------------+
+|                 | score                   | hit-level score, from the hit    |
+|                 |                         | table                            |
+|                 +-------------------------+----------------------------------+
+|                 | seq_len                 | full length of hit sequence      |
++-----------------+-------------------------+----------------------------------+
+| HSP             | evalue                  | hsp-level evalue                 |
+|                 +-------------------------+----------------------------------+
+|                 | bitscore                | hsp-level bit score              |
+|                 +-------------------------+----------------------------------+
+|                 | bitscore_raw            | hsp-level score                  |
+|                 +-------------------------+----------------------------------+
+|                 | gap_num                 | number of gaps in alignment      |
+|                 +-------------------------+----------------------------------+
+|                 | ident_num               | number of identical residues     |
+|                 |                         | in alignment                     |
+|                 +-------------------------+----------------------------------+
+|                 | pos_num                 | number of positive matches in    |
+|                 |                         | alignment                        |
++-----------------+-------------------------+----------------------------------+
+| HSPFragment     | aln_annotation          | alignment similarity string      |
+| (also via       +-------------------------+----------------------------------+
+| HSP)            | aln_span                | length of alignment fragment     |
+|                 +-------------------------+----------------------------------+
+|                 | hit                     | hit sequence                     |
+|                 +-------------------------+----------------------------------+
+|                 | hit_end                 | hit sequence end coordinate      |
+|                 +-------------------------+----------------------------------+
+|                 | hit_frame               | hit sequence reading frame       |
+|                 +-------------------------+----------------------------------+
+|                 | hit_start               | hit sequence start coordinate    |
+|                 +-------------------------+----------------------------------+
+|                 | hit_strand              | hit sequence strand              |
+|                 +-------------------------+----------------------------------+
+|                 | query                   | query sequence                   |
+|                 +-------------------------+----------------------------------+
+|                 | query_end               | query sequence end coordinate    |
+|                 +-------------------------+----------------------------------+
+|                 | query_frame             | query sequence reading frame     |
+|                 +-------------------------+----------------------------------+
+|                 | query_start             | query sequence start coordinate  |
+|                 +-------------------------+----------------------------------+
+|                 | query_strand            | query sequence strand            |
++-----------------+-------------------------+----------------------------------+
+
+
+.. [*] may be modified
+
+.. [*] When 'frames' is present, both ``query_frame`` and ``hit_frame`` will be
+   present as well. It is recommended that you use these instead of 'frames' directly.
+
+"""
+
+from .blast_tab import BlastTabParser, BlastTabIndexer, BlastTabWriter
+from .blast_xml import BlastXmlParser, BlastXmlIndexer, BlastXmlWriter
+from .blast_text import BlastTextParser
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/BlastIO/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SearchIO/BlastIO/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..94fdd45
Binary files /dev/null and b/code/lib/Bio/SearchIO/BlastIO/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/BlastIO/__pycache__/blast_tab.cpython-37.pyc b/code/lib/Bio/SearchIO/BlastIO/__pycache__/blast_tab.cpython-37.pyc
new file mode 100644
index 0000000..f91652c
Binary files /dev/null and b/code/lib/Bio/SearchIO/BlastIO/__pycache__/blast_tab.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/BlastIO/__pycache__/blast_text.cpython-37.pyc b/code/lib/Bio/SearchIO/BlastIO/__pycache__/blast_text.cpython-37.pyc
new file mode 100644
index 0000000..c2ddb3a
Binary files /dev/null and b/code/lib/Bio/SearchIO/BlastIO/__pycache__/blast_text.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/BlastIO/__pycache__/blast_xml.cpython-37.pyc b/code/lib/Bio/SearchIO/BlastIO/__pycache__/blast_xml.cpython-37.pyc
new file mode 100644
index 0000000..aa0d827
Binary files /dev/null and b/code/lib/Bio/SearchIO/BlastIO/__pycache__/blast_xml.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/BlastIO/blast_tab.py b/code/lib/Bio/SearchIO/BlastIO/blast_tab.py
new file mode 100644
index 0000000..1c22a6c
--- /dev/null
+++ b/code/lib/Bio/SearchIO/BlastIO/blast_tab.py
@@ -0,0 +1,894 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Bio.SearchIO parser for BLAST+ tab output format, with or without comments."""
+
+import re
+
+from Bio.SearchIO._index import SearchIndexer
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+
+
+__all__ = ("BlastTabIndexer", "BlastTabParser", "BlastTabWriter")
+
+
+# longname-shortname map
+# maps the column names shown in a commented output to its short name
+# (the one used in the command line)
+_LONG_SHORT_MAP = {
+    "query id": "qseqid",
+    "query acc.": "qacc",
+    "query acc.ver": "qaccver",
+    "query length": "qlen",
+    "subject id": "sseqid",
+    "subject acc.": "sacc",
+    "subject acc.ver": "saccver",
+    "subject length": "slen",
+    "alignment length": "length",
+    "bit score": "bitscore",
+    "score": "score",
+    "evalue": "evalue",
+    "identical": "nident",
+    "% identity": "pident",
+    "positives": "positive",
+    "% positives": "ppos",
+    "mismatches": "mismatch",
+    "gaps": "gaps",
+    "q. start": "qstart",
+    "q. end": "qend",
+    "s. start": "sstart",
+    "s. end": "send",
+    "query frame": "qframe",
+    "sbjct frame": "sframe",
+    "query/sbjct frames": "frames",
+    "query seq": "qseq",
+    "subject seq": "sseq",
+    "gap opens": "gapopen",
+    "query gi": "qgi",
+    "subject ids": "sallseqid",
+    "subject gi": "sgi",
+    "subject gis": "sallgi",
+    "BTOP": "btop",
+    "subject accs.": "sallacc",
+    "subject tax ids": "staxids",
+    "subject sci names": "sscinames",
+    "subject com names": "scomnames",
+    "subject blast names": "sblastnames",
+    "subject super kingdoms": "sskingdoms",
+    "subject title": "stitle",
+    "subject titles": "salltitles",
+    "subject strand": "sstrand",
+    "% subject coverage": "qcovs",
+    "% hsp coverage": "qcovhsp",
+}
+
+
+# function to create a list from semicolon-delimited string
+# used in BlastTabParser._parse_result_row
+def _list_semicol(s):
+    return s.split(";")
+
+
+def _list_diamond(s):
+    return s.split("<>")
+
+
+# column to class attribute map
+_COLUMN_QRESULT = {
+    "qseqid": ("id", str),
+    "qacc": ("accession", str),
+    "qaccver": ("accession_version", str),
+    "qlen": ("seq_len", int),
+    "qgi": ("gi", str),
+}
+_COLUMN_HIT = {
+    "sseqid": ("id", str),
+    "sallseqid": ("id_all", _list_semicol),
+    "sacc": ("accession", str),
+    "saccver": ("accession_version", str),
+    "sallacc": ("accession_all", _list_semicol),
+    "sgi": ("gi", str),
+    "sallgi": ("gi_all", str),
+    "slen": ("seq_len", int),
+    "staxids": ("tax_ids", _list_semicol),
+    "sscinames": ("sci_names", _list_semicol),
+    "scomnames": ("com_names", _list_semicol),
+    "sblastnames": ("blast_names", _list_semicol),
+    "sskingdoms": ("super_kingdoms", _list_semicol),
+    "stitle": ("title", str),
+    "salltitles": ("title_all", _list_diamond),
+    # set strand as HSP property?
+    "sstrand": ("strand", str),
+    "qcovs": ("query_coverage", float),
+}
+_COLUMN_HSP = {
+    "bitscore": ("bitscore", float),
+    "score": ("bitscore_raw", int),
+    "evalue": ("evalue", float),
+    "nident": ("ident_num", int),
+    "pident": ("ident_pct", float),
+    "positive": ("pos_num", int),
+    "ppos": ("pos_pct", float),
+    "mismatch": ("mismatch_num", int),
+    "gaps": ("gap_num", int),
+    "gapopen": ("gapopen_num", int),
+    "btop": ("btop", str),
+    "qcovhsp": ("query_coverage", float),
+}
+_COLUMN_FRAG = {
+    "length": ("aln_span", int),
+    "qstart": ("query_start", int),
+    "qend": ("query_end", int),
+    "sstart": ("hit_start", int),
+    "send": ("hit_end", int),
+    "qframe": ("query_frame", int),
+    "sframe": ("hit_frame", int),
+    "frames": ("frames", str),
+    "qseq": ("query", str),
+    "sseq": ("hit", str),
+}
+_SUPPORTED_FIELDS = set(
+    list(_COLUMN_QRESULT) + list(_COLUMN_HIT) + list(_COLUMN_HSP) + list(_COLUMN_FRAG)
+)
+
+# column order in the non-commented tabular output variant
+# values must be keys inside the column-attribute maps above
+_DEFAULT_FIELDS = [
+    "qseqid",
+    "sseqid",
+    "pident",
+    "length",
+    "mismatch",
+    "gapopen",
+    "qstart",
+    "qend",
+    "sstart",
+    "send",
+    "evalue",
+    "bitscore",
+]
+# one field from each of the following sets must exist in order for the
+# parser to work
+_MIN_QUERY_FIELDS = {"qseqid", "qacc", "qaccver"}
+_MIN_HIT_FIELDS = {"sseqid", "sacc", "saccver", "sallseqid"}
+
+# simple function to create BLAST HSP attributes that may be computed if
+# other certain attributes are present
+# This was previously implemented in the HSP objects in the old model
+
+_RE_GAPOPEN = re.compile(r"\w-")
+
+
+def _compute_gapopen_num(hsp):
+    """Return the number of gap openings in the given HSP (PRIVATE)."""
+    gapopen = 0
+    for seq_type in ("query", "hit"):
+        seq = str(getattr(hsp, seq_type).seq)
+        gapopen += len(re.findall(_RE_GAPOPEN, seq))
+    return gapopen
+
+
+def _augment_blast_hsp(hsp, attr):
+    """Calculate the given HSP attribute, for writing (PRIVATE)."""
+    if not hasattr(hsp, attr) and not attr.endswith("_pct"):
+
+        # aln_span is number of identical matches + mismatches + gaps
+        if attr == "aln_span":
+            hsp.aln_span = hsp.ident_num + hsp.mismatch_num + hsp.gap_num
+
+        # ident and gap requires the num values to be computed first
+        elif attr.startswith("ident"):
+            setattr(hsp, attr, hsp.aln_span - hsp.mismatch_num - hsp.gap_num)
+        elif attr.startswith("gap"):
+            setattr(hsp, attr, hsp.aln_span - hsp.ident_num - hsp.mismatch_num)
+
+        elif attr == "mismatch_num":
+            setattr(hsp, attr, hsp.aln_span - hsp.ident_num - hsp.gap_num)
+
+        elif attr == "gapopen_num":
+            if not hasattr(hsp, "query") or not hasattr(hsp, "hit"):
+                raise AttributeError
+            hsp.gapopen_num = _compute_gapopen_num(hsp)
+
+    # if the attr is a percent value, calculate it
+    if attr == "ident_pct":
+        hsp.ident_pct = hsp.ident_num / float(hsp.aln_span) * 100
+
+    elif attr == "pos_pct":
+        hsp.pos_pct = hsp.pos_num / float(hsp.aln_span) * 100
+
+    elif attr == "gap_pct":
+        hsp.gap_pct = hsp.gap_num / float(hsp.aln_span) * 100
+
+
+class BlastTabParser:
+    """Parser for the BLAST tabular format."""
+
+    def __init__(self, handle, comments=False, fields=_DEFAULT_FIELDS):
+        """Initialize the class."""
+        self.handle = handle
+        self.has_comments = comments
+        self.fields = self._prep_fields(fields)
+        self.line = self.handle.readline().strip()
+
+    def __iter__(self):
+        """Iterate over BlastTabParser, yields query results."""
+        # stop iteration if file has no lines
+        if not self.line:
+            return
+        # determine which iterator to use
+        elif self.has_comments:
+            iterfunc = self._parse_commented_qresult
+        else:
+            if self.line.startswith("#"):
+                raise ValueError(
+                    "Encountered unexpected character '#' at the beginning of a line. "
+                    "Set comments=True if the file is a commented file."
+                )
+            iterfunc = self._parse_qresult
+
+        yield from iterfunc()
+
+    def _prep_fields(self, fields):
+        """Validate and format the given fields for use by the parser (PRIVATE)."""
+        # cast into list if fields is a space-separated string
+        if isinstance(fields, str):
+            fields = fields.strip().split(" ")
+        # blast allows 'std' as a proxy for the standard default lists
+        # we want to transform 'std' to its proper column names
+        if "std" in fields:
+            idx = fields.index("std")
+            fields = fields[:idx] + _DEFAULT_FIELDS + fields[idx + 1 :]
+        # if set(fields) has a null intersection with minimum required
+        # fields for hit and query, raise an exception
+        if not set(fields).intersection(_MIN_QUERY_FIELDS) or not set(
+            fields
+        ).intersection(_MIN_HIT_FIELDS):
+            raise ValueError("Required query and/or hit ID field not found.")
+
+        return fields
+
+    def _parse_commented_qresult(self):
+        """Yield ``QueryResult`` objects from a commented file (PRIVATE)."""
+        while True:
+            comments = self._parse_comments()
+            if comments:
+                try:
+                    self.fields = comments["fields"]
+                    # iterator for the query results
+                    qres_iter = self._parse_qresult()
+                except KeyError:
+                    # no fields means the query has no results
+                    assert "fields" not in comments
+                    # create an iterator returning one empty qresult
+                    # if the query has no results
+                    qres_iter = iter([QueryResult()])
+
+                for qresult in qres_iter:
+                    for key, value in comments.items():
+                        setattr(qresult, key, value)
+                    yield qresult
+
+            else:
+                break
+
+    def _parse_comments(self):
+        """Return a dictionary containing tab file comments (PRIVATE)."""
+        comments = {}
+        while True:
+            # parse program and version
+            # example: # BLASTX 2.2.26+
+            if "BLAST" in self.line and "processed" not in self.line:
+                program_line = self.line[len(" #") :].split(" ")
+                comments["program"] = program_line[0].lower()
+                comments["version"] = program_line[1]
+            # parse query id and description (if available)
+            # example: # Query: gi|356995852 Mus musculus POU domain
+            elif "Query" in self.line:
+                query_line = self.line[len("# Query: ") :].split(" ", 1)
+                comments["id"] = query_line[0]
+                if len(query_line) == 2:
+                    comments["description"] = query_line[1]
+            # parse target database
+            # example: # Database: db/minirefseq_protein
+            elif "Database" in self.line:
+                comments["target"] = self.line[len("# Database: ") :]
+            # parse RID (from remote searches)
+            elif "RID" in self.line:
+                comments["rid"] = self.line[len("# RID: ") :]
+            # parse column order, required for parsing the result lines
+            # example: # Fields: query id, query gi, query acc., query length
+            elif "Fields" in self.line:
+                comments["fields"] = self._parse_fields_line()
+            # if the line has these strings, it's either the end of a comment
+            # or the end of a file, so we return all the comments we've parsed
+            elif " hits found" in self.line or "processed" in self.line:
+                self.line = self.handle.readline().strip()
+                return comments
+
+            self.line = self.handle.readline()
+
+            if not self.line:
+                return comments
+            else:
+                self.line = self.line.strip()
+
+    def _parse_fields_line(self):
+        """Return column short names line from 'Fields' comment line (PRIVATE)."""
+        raw_field_str = self.line[len("# Fields: ") :]
+        long_fields = raw_field_str.split(", ")
+        fields = [_LONG_SHORT_MAP[long_name] for long_name in long_fields]
+        return self._prep_fields(fields)
+
+    def _parse_result_row(self):
+        """Return a dictionary of parsed row values (PRIVATE)."""
+        fields = self.fields
+        columns = self.line.strip().split("\t")
+        if len(fields) != len(columns):
+            raise ValueError(
+                "Expected %i columns, found: %i" % (len(fields), len(columns))
+            )
+
+        qresult, hit, hsp, frag = {}, {}, {}, {}
+        for idx, value in enumerate(columns):
+            sname = fields[idx]
+            # flag to check if any of the _COLUMNs contain sname
+            in_mapping = False
+            # iterate over each dict, mapping pair to determine
+            # attribute name and value of each column
+            for parsed_dict, mapping in (
+                (qresult, _COLUMN_QRESULT),
+                (hit, _COLUMN_HIT),
+                (hsp, _COLUMN_HSP),
+                (frag, _COLUMN_FRAG),
+            ):
+                # process parsed value according to mapping
+                if sname in mapping:
+                    attr_name, caster = mapping[sname]
+                    if caster is not str:
+                        value = caster(value)
+                    parsed_dict[attr_name] = value
+                    in_mapping = True
+            # make sure that any unhandled field is not supported
+            if not in_mapping:
+                assert sname not in _SUPPORTED_FIELDS
+
+        return {"qresult": qresult, "hit": hit, "hsp": hsp, "frag": frag}
+
+    def _get_id(self, parsed):
+        """Return the value used for a QueryResult or Hit ID from a parsed row (PRIVATE)."""
+        # use 'id', with 'id_all', 'accession' and 'accession_version'
+        # fallbacks one of these must have a value since we've checked whether
+        # they exist or not when parsing the comments
+        id_cache = parsed.get("id")
+        if id_cache is None and "id_all" in parsed:
+            id_cache = parsed.get("id_all")[0]
+        if id_cache is None:
+            id_cache = parsed.get("accession")
+        if id_cache is None:
+            id_cache = parsed.get("accession_version")
+
+        return id_cache
+
+    def _parse_qresult(self):
+        """Yield QueryResult objects (PRIVATE)."""
+        # state values, used to determine what to do with each line
+        state_EOF = 0
+        state_QRES_NEW = 1
+        state_QRES_SAME = 3
+        state_HIT_NEW = 2
+        state_HIT_SAME = 4
+        # dummies for initial states
+        qres_state = None
+        hit_state = None
+        file_state = None
+        cur_qid = None
+        cur_hid = None
+        # dummies for initial id caches
+        prev_qid = None
+        prev_hid = None
+        # dummies for initial parsed value containers
+        cur, prev = None, None
+        hit_list, hsp_list = [], []
+
+        while True:
+            # store previous line's parsed values if we've past the first line
+            if cur is not None:
+                prev = cur
+                prev_qid = cur_qid
+                prev_hid = cur_hid
+            # only parse the line if it's not EOF or not a comment line
+            if self.line and not self.line.startswith("#"):
+                cur = self._parse_result_row()
+                cur_qid = self._get_id(cur["qresult"])
+                cur_hid = self._get_id(cur["hit"])
+            else:
+                file_state = state_EOF
+                # mock values for cur_qid and cur_hid since the line is empty
+                cur_qid, cur_hid = None, None
+
+            # get the state of hit and qresult
+            if prev_qid != cur_qid:
+                qres_state = state_QRES_NEW
+            else:
+                qres_state = state_QRES_SAME
+            # new hits are hits with different id or hits in a new qresult
+            if prev_hid != cur_hid or qres_state == state_QRES_NEW:
+                hit_state = state_HIT_NEW
+            else:
+                hit_state = state_HIT_SAME
+
+            # we're creating objects for the previously parsed line(s),
+            # so nothing is done in the first parsed line (prev == None)
+            if prev is not None:
+                # every line is essentially an HSP with one fragment, so we
+                # create both of these for every line
+                frag = HSPFragment(prev_hid, prev_qid)
+                for attr, value in prev["frag"].items():
+                    # adjust coordinates to Python range
+                    # NOTE: this requires both start and end coords to be
+                    # present, otherwise a KeyError will be raised.
+                    # Without this limitation, we might misleadingly set the
+                    # start / end coords
+                    for seq_type in ("query", "hit"):
+                        if attr == seq_type + "_start":
+                            value = min(value, prev["frag"][seq_type + "_end"]) - 1
+                        elif attr == seq_type + "_end":
+                            value = max(value, prev["frag"][seq_type + "_start"])
+                    setattr(frag, attr, value)
+                # strand and frame setattr require the full parsed values
+                # to be set first
+                for seq_type in ("hit", "query"):
+                    # try to set hit and query frame
+                    frame = self._get_frag_frame(frag, seq_type, prev["frag"])
+                    setattr(frag, "%s_frame" % seq_type, frame)
+                    # try to set hit and query strand
+                    strand = self._get_frag_strand(frag, seq_type, prev["frag"])
+                    setattr(frag, "%s_strand" % seq_type, strand)
+
+                hsp = HSP([frag])
+                for attr, value in prev["hsp"].items():
+                    setattr(hsp, attr, value)
+                hsp_list.append(hsp)
+
+                # create hit and append to temp hit container if hit_state
+                # says we're not at the same hit or at a new query
+                if hit_state == state_HIT_NEW:
+                    hit = Hit(hsp_list)
+                    for attr, value in prev["hit"].items():
+                        if attr != "id_all":
+                            setattr(hit, attr, value)
+                        else:
+                            # not setting hit ID since it's already set from the
+                            # prev_hid above
+                            setattr(hit, "_id_alt", value[1:])
+                    hit_list.append(hit)
+                    hsp_list = []
+                # create qresult and yield if we're at a new qresult or EOF
+                if qres_state == state_QRES_NEW or file_state == state_EOF:
+                    qresult = QueryResult(hit_list, prev_qid)
+                    for attr, value in prev["qresult"].items():
+                        setattr(qresult, attr, value)
+                    yield qresult
+                    # if current line is EOF, break
+                    if file_state == state_EOF:
+                        break
+                    hit_list = []
+
+            self.line = self.handle.readline().strip()
+
+    def _get_frag_frame(self, frag, seq_type, parsedict):
+        """Return fragment frame for given object (PRIVATE).
+
+        Returns ``HSPFragment`` frame given the object, its sequence type,
+        and its parsed dictionary values.
+        """
+        assert seq_type in ("query", "hit")
+        frame = getattr(frag, "%s_frame" % seq_type, None)
+        if frame is not None:
+            return frame
+        else:
+            if "frames" in parsedict:
+                # frames is 'x1/x2' string, x1 is query frame, x2 is hit frame
+                idx = 0 if seq_type == "query" else 1
+                return int(parsedict["frames"].split("/")[idx])
+            # else implicit None return
+
+    def _get_frag_strand(self, frag, seq_type, parsedict):
+        """Return fragment strand for given object (PRIVATE).
+
+        Returns ``HSPFragment`` strand given the object, its sequence type,
+        and its parsed dictionary values.
+        """
+        # NOTE: this will never set the strands as 0 for protein
+        # queries / hits, since we can't detect the blast flavors
+        # from the columns alone.
+        assert seq_type in ("query", "hit")
+        strand = getattr(frag, "%s_strand" % seq_type, None)
+        if strand is not None:
+            return strand
+        else:
+            # using parsedict instead of the fragment object since
+            # we need the unadjusted coordinated values
+            start = parsedict.get("%s_start" % seq_type)
+            end = parsedict.get("%s_end" % seq_type)
+            if start is not None and end is not None:
+                return 1 if start <= end else -1
+            # else implicit None return
+
+
+class BlastTabIndexer(SearchIndexer):
+    """Indexer class for BLAST+ tab output."""
+
+    _parser = BlastTabParser
+
+    def __init__(self, filename, comments=False, fields=_DEFAULT_FIELDS):
+        """Initialize the class."""
+        SearchIndexer.__init__(self, filename, comments=comments, fields=fields)
+
+        # if the file doesn't have comments,
+        # get index of column used as the key (qseqid / qacc / qaccver)
+        if not self._kwargs["comments"]:
+            if "qseqid" in fields:
+                self._key_idx = fields.index("qseqid")
+            elif "qacc" in fields:
+                self._key_idx = fields.index("qacc")
+            elif "qaccver" in fields:
+                self._key_idx = fields.index("qaccver")
+            else:
+                raise ValueError(
+                    "Custom fields is missing an ID column. One of these must be "
+                    "present: 'qseqid', 'qacc', or 'qaccver'."
+                )
+
+    def __iter__(self):
+        """Iterate over the file handle; yields key, start offset, and length."""
+        handle = self._handle
+        handle.seek(0)
+
+        if not self._kwargs["comments"]:
+            iterfunc = self._qresult_index
+        else:
+            iterfunc = self._qresult_index_commented
+
+        for key, offset, length in iterfunc():
+            yield key.decode(), offset, length
+
+    def _qresult_index_commented(self):
+        """Indexer for commented BLAST tabular files (PRIVATE)."""
+        handle = self._handle
+        handle.seek(0)
+        start_offset = 0
+        # mark of a new query
+        query_mark = None
+        # mark of the query's ID
+        qid_mark = b"# Query: "
+        # mark of the last line
+        end_mark = b"# BLAST processed"
+
+        while True:
+            end_offset = handle.tell()
+            line = handle.readline()
+
+            if query_mark is None:
+                query_mark = line
+                start_offset = end_offset
+            elif line.startswith(qid_mark):
+                qresult_key = line[len(qid_mark) :].split()[0]
+            elif line == query_mark or line.startswith(end_mark):
+                yield qresult_key, start_offset, end_offset - start_offset
+                start_offset = end_offset
+            elif not line:
+                break
+
+    def _qresult_index(self):
+        """Indexer for noncommented BLAST tabular files (PRIVATE)."""
+        handle = self._handle
+        handle.seek(0)
+        start_offset = 0
+        qresult_key = None
+        key_idx = self._key_idx
+
+        while True:
+            # get end offset here since we only know a qresult ends after
+            # encountering the next one
+            end_offset = handle.tell()
+            # line = handle.readline()
+            line = handle.readline()
+
+            if qresult_key is None:
+                qresult_key = line.split(b"\t")[key_idx]
+            else:
+                try:
+                    curr_key = line.split(b"\t")[key_idx]
+                except IndexError:
+                    curr_key = b""
+
+                if curr_key != qresult_key:
+                    yield qresult_key, start_offset, end_offset - start_offset
+                    qresult_key = curr_key
+                    start_offset = end_offset
+
+            # break if we've reached EOF
+            if not line:
+                break
+
+    def get_raw(self, offset):
+        """Return the raw bytes string of a QueryResult object from the given offset."""
+        if self._kwargs["comments"]:
+            getfunc = self._get_raw_qresult_commented
+        else:
+            getfunc = self._get_raw_qresult
+
+        return getfunc(offset)
+
+    def _get_raw_qresult(self, offset):
+        """Return the raw bytes string of a single QueryResult from a noncommented file (PRIVATE)."""
+        handle = self._handle
+        handle.seek(offset)
+        qresult_raw = b""
+        key_idx = self._key_idx
+        qresult_key = None
+
+        while True:
+            line = handle.readline()
+            # get the key if the first line (qresult key)
+            if qresult_key is None:
+                qresult_key = line.split(b"\t")[key_idx]
+            else:
+                try:
+                    curr_key = line.split(b"\t")[key_idx]
+                except IndexError:
+                    curr_key = b""
+                # only break when qresult is finished (key is different)
+                if curr_key != qresult_key:
+                    break
+            # append to the raw string as long as qresult is the same
+            qresult_raw += line
+
+        return qresult_raw
+
+    def _get_raw_qresult_commented(self, offset):
+        """Return the bytes raw string of a single QueryResult from a commented file (PRIVATE)."""
+        handle = self._handle
+        handle.seek(offset)
+        qresult_raw = b""
+        end_mark = b"# BLAST processed"
+
+        # query mark is the line marking a new query
+        # something like '# TBLASTN 2.2.25+'
+        query_mark = None
+        line = handle.readline()
+        while line:
+            # since query_mark depends on the BLAST search, we need to obtain it
+            # first
+            if query_mark is None:
+                query_mark = line
+            # break when we've reached the next qresult or the search ends
+            elif line == query_mark or line.startswith(end_mark):
+                break
+
+            qresult_raw += line
+            line = handle.readline()
+
+        return qresult_raw
+
+
+class BlastTabWriter:
+    """Writer for blast-tab output format."""
+
+    def __init__(self, handle, comments=False, fields=_DEFAULT_FIELDS):
+        """Initialize the class."""
+        self.handle = handle
+        self.has_comments = comments
+        self.fields = fields
+
+    def write_file(self, qresults):
+        """Write to the handle, return how many QueryResult objects were written."""
+        handle = self.handle
+        qresult_counter, hit_counter, hsp_counter, frag_counter = 0, 0, 0, 0
+
+        for qresult in qresults:
+            if self.has_comments:
+                handle.write(self._build_comments(qresult))
+            if qresult:
+                handle.write(self._build_rows(qresult))
+                if not self.has_comments:
+                    qresult_counter += 1
+                hit_counter += len(qresult)
+                hsp_counter += sum(len(hit) for hit in qresult)
+                frag_counter += sum(len(hit.fragments) for hit in qresult)
+            # if it's commented and there are no hits in the qresult, we still
+            # increment the counter
+            if self.has_comments:
+                qresult_counter += 1
+
+        # commented files have a line saying how many queries were processed
+        if self.has_comments:
+            handle.write("# BLAST processed %i queries" % qresult_counter)
+
+        return qresult_counter, hit_counter, hsp_counter, frag_counter
+
+    def _build_rows(self, qresult):
+        """Return a string containing tabular rows of the QueryResult object (PRIVATE)."""
+        coordinates = {"qstart", "qend", "sstart", "send"}
+        qresult_lines = ""
+        for hit in qresult:
+            for hsp in hit:
+                line = []
+                for field in self.fields:
+                    # get the column value ~ could either be an attribute
+                    # of qresult, hit, or hsp
+                    if field in _COLUMN_QRESULT:
+                        value = getattr(qresult, _COLUMN_QRESULT[field][0])
+                    elif field in _COLUMN_HIT:
+                        if field == "sallseqid":
+                            value = getattr(hit, "id_all")
+                        else:
+                            value = getattr(hit, _COLUMN_HIT[field][0])
+                    # special case, since 'frames' can be determined from
+                    # query frame and hit frame
+                    elif field == "frames":
+                        value = "%i/%i" % (hsp.query_frame, hsp.hit_frame)
+                    elif field in _COLUMN_HSP:
+                        try:
+                            value = getattr(hsp, _COLUMN_HSP[field][0])
+                        except AttributeError:
+                            attr = _COLUMN_HSP[field][0]
+                            _augment_blast_hsp(hsp, attr)
+                            value = getattr(hsp, attr)
+                    elif field in _COLUMN_FRAG:
+                        value = getattr(hsp, _COLUMN_FRAG[field][0])
+                    else:
+                        assert field not in _SUPPORTED_FIELDS
+                        continue
+
+                    # adjust from and to according to strand, if from and to
+                    # is included in the output field
+                    if field in coordinates:
+                        value = self._adjust_coords(field, value, hsp)
+                    # adjust output formatting
+                    value = self._adjust_output(field, value)
+
+                    line.append(value)
+
+                hsp_line = "\t".join(line)
+                qresult_lines += hsp_line + "\n"
+
+        return qresult_lines
+
+    def _adjust_coords(self, field, value, hsp):
+        """Adjust start and end coordinates according to strand (PRIVATE)."""
+        assert field in ("qstart", "qend", "sstart", "send")
+        # determine sequence type to operate on based on field's first letter
+        seq_type = "query" if field.startswith("q") else "hit"
+
+        strand = getattr(hsp, "%s_strand" % seq_type, None)
+        if strand is None:
+            raise ValueError(
+                "Required attribute %r not found." % ("%s_strand" % (seq_type))
+            )
+        # switch start <--> end coordinates if strand is -1
+        if strand < 0:
+            if field.endswith("start"):
+                value = getattr(hsp, "%s_end" % seq_type)
+            elif field.endswith("end"):
+                value = getattr(hsp, "%s_start" % seq_type) + 1
+        elif field.endswith("start"):
+            # adjust start coordinate for positive strand
+            value += 1
+
+        return value
+
+    def _adjust_output(self, field, value):
+        """Adjust formatting of given field and value to mimic native tab output (PRIVATE)."""
+        # qseq and sseq are stored as SeqRecord, but here we only need the str
+        if field in ("qseq", "sseq"):
+            value = str(value.seq)
+
+        # evalue formatting, adapted from BLAST+ source:
+        # src/objtools/align_format/align_format_util.cpp#L668
+        elif field == "evalue":
+            if value < 1.0e-180:
+                value = "0.0"
+            elif value < 1.0e-99:
+                value = "%2.0e" % value
+            elif value < 0.0009:
+                value = "%3.0e" % value
+            elif value < 0.1:
+                value = "%4.3f" % value
+            elif value < 1.0:
+                value = "%3.2f" % value
+            elif value < 10.0:
+                value = "%2.1f" % value
+            else:
+                value = "%5.0f" % value
+
+        # pident and ppos formatting
+        elif field in ("pident", "ppos"):
+            value = "%.2f" % value
+
+        # evalue formatting, adapted from BLAST+ source:
+        # src/objtools/align_format/align_format_util.cpp#L723
+        elif field == "bitscore":
+            if value > 9999:
+                value = "%4.3e" % value
+            elif value > 99.9:
+                value = "%4.0d" % value
+            else:
+                value = "%4.1f" % value
+
+        # coverages have no comma (using floats still ~ a more proper
+        # representation)
+        elif field in ("qcovhsp", "qcovs"):
+            value = "%.0f" % value
+
+        # list into '<>'-delimited string
+        elif field == "salltitles":
+            value = "<>".join(value)
+
+        # list into ';'-delimited string
+        elif field in (
+            "sallseqid",
+            "sallacc",
+            "staxids",
+            "sscinames",
+            "scomnames",
+            "sblastnames",
+            "sskingdoms",
+        ):
+            value = ";".join(value)
+
+        # everything else
+        else:
+            value = str(value)
+
+        return value
+
+    def _build_comments(self, qres):
+        """Return QueryResult tabular comment as a string (PRIVATE)."""
+        comments = []
+        # inverse mapping of the long-short name map, required
+        # for writing comments
+        inv_field_map = {v: k for k, v in _LONG_SHORT_MAP.items()}
+
+        # try to anticipate qress without version
+        program = qres.program.upper()
+        try:
+            version = qres.version
+        except AttributeError:
+            program_line = "# %s" % program
+        else:
+            program_line = "# %s %s" % (program, version)
+        comments.append(program_line)
+        # description may or may not be None
+        if qres.description is None:
+            comments.append("# Query: %s" % qres.id)
+        else:
+            comments.append("# Query: %s %s" % (qres.id, qres.description))
+        # try appending RID line, if present
+        try:
+            comments.append("# RID: %s" % qres.rid)
+        except AttributeError:
+            pass
+        comments.append("# Database: %s" % qres.target)
+        # qresults without hits don't show the Fields comment
+        if qres:
+            comments.append(
+                "# Fields: %s"
+                % ", ".join(inv_field_map[field] for field in self.fields)
+            )
+        comments.append("# %i hits found" % len(qres))
+
+        return "\n".join(comments) + "\n"
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/BlastIO/blast_text.py b/code/lib/Bio/SearchIO/BlastIO/blast_text.py
new file mode 100644
index 0000000..771c57b
--- /dev/null
+++ b/code/lib/Bio/SearchIO/BlastIO/blast_text.py
@@ -0,0 +1,143 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Bio.SearchIO parser for BLAST+ plain text output formats.
+
+At the moment this is a wrapper around Biopython's NCBIStandalone text
+parser (which is now deprecated).
+
+"""
+
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+from Bio.SearchIO._legacy import NCBIStandalone
+
+
+__all__ = ("BlastTextParser",)
+
+
+class BlastTextParser:
+    """Parser for the BLAST text format."""
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self.handle = handle
+        blast_parser = NCBIStandalone.BlastParser()
+        self.blast_iter = NCBIStandalone.Iterator(handle, blast_parser)
+
+    def __iter__(self):
+        """Iterate over BlastTextParser, yields query results."""
+        for rec in self.blast_iter:
+            # set attributes to SearchIO's
+            # get id and desc
+            if rec.query.startswith(">"):
+                rec.query = rec.query[1:]
+            try:
+                qid, qdesc = rec.query.split(" ", 1)
+            except ValueError:
+                qid, qdesc = rec.query, ""
+            qdesc = qdesc.replace("\n", "").replace("\r", "")
+
+            qresult = QueryResult(id=qid)
+            qresult.program = rec.application.lower()
+            qresult.target = rec.database
+            qresult.seq_len = rec.query_letters
+            qresult.version = rec.version
+
+            # determine molecule_type based on program
+            if qresult.program == "blastn":
+                molecule_type = "DNA"
+            elif qresult.program in ["blastp", "blastx", "tblastn", "tblastx"]:
+                molecule_type = "protein"
+
+            # iterate over the 'alignments' (hits) and the hit table
+            for idx, aln in enumerate(rec.alignments):
+                # get id and desc
+                if aln.title.startswith("> "):
+                    aln.title = aln.title[2:]
+                elif aln.title.startswith(">"):
+                    aln.title = aln.title[1:]
+                try:
+                    hid, hdesc = aln.title.split(" ", 1)
+                except ValueError:
+                    hid, hdesc = aln.title, ""
+                hdesc = hdesc.replace("\n", "").replace("\r", "")
+
+                # iterate over the hsps and group them in a list
+                hsp_list = []
+                for bhsp in aln.hsps:
+                    frag = HSPFragment(hid, qid)
+                    frag.molecule_type = molecule_type
+                    # set alignment length
+                    frag.aln_span = bhsp.identities[1]
+                    # set frames
+                    try:
+                        frag.query_frame = int(bhsp.frame[0])
+                    except IndexError:
+                        if qresult.program in ("blastp", "tblastn"):
+                            frag.query_frame = 0
+                        else:
+                            frag.query_frame = 1
+                    try:
+                        frag.hit_frame = int(bhsp.frame[1])
+                    except IndexError:
+                        if qresult.program in ("blastp", "tblastn"):
+                            frag.hit_frame = 0
+                        else:
+                            frag.hit_frame = 1
+                    # set query coordinates
+                    frag.query_start = min(bhsp.query_start, bhsp.query_end) - 1
+                    frag.query_end = max(bhsp.query_start, bhsp.query_end)
+                    # set hit coordinates
+                    frag.hit_start = min(bhsp.sbjct_start, bhsp.sbjct_end) - 1
+                    frag.hit_end = max(bhsp.sbjct_start, bhsp.sbjct_end)
+                    # set query, hit sequences and its annotation
+                    qseq = ""
+                    hseq = ""
+                    midline = ""
+                    for seqtrio in zip(bhsp.query, bhsp.sbjct, bhsp.match):
+                        qchar, hchar, mchar = seqtrio
+                        if qchar == " " or hchar == " ":
+                            assert all(" " == x for x in seqtrio)
+                        else:
+                            qseq += qchar
+                            hseq += hchar
+                            midline += mchar
+                    frag.query, frag.hit = qseq, hseq
+                    frag.aln_annotation["similarity"] = midline
+
+                    # create HSP object with the fragment
+                    hsp = HSP([frag])
+                    hsp.evalue = bhsp.expect
+                    hsp.bitscore = bhsp.bits
+                    hsp.bitscore_raw = bhsp.score
+                    # set gap
+                    try:
+                        hsp.gap_num = bhsp.gaps[0]
+                    except IndexError:
+                        hsp.gap_num = 0
+                    # set identity
+                    hsp.ident_num = bhsp.identities[0]
+                    hsp.pos_num = bhsp.positives[0]
+                    if hsp.pos_num is None:
+                        hsp.pos_num = hsp[0].aln_span
+
+                    hsp_list.append(hsp)
+
+                hit = Hit(hsp_list)
+                hit.seq_len = aln.length
+                hit.description = hdesc
+                qresult.append(hit)
+
+            qresult.description = qdesc
+            yield qresult
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/BlastIO/blast_xml.py b/code/lib/Bio/SearchIO/BlastIO/blast_xml.py
new file mode 100644
index 0000000..04c9e0b
--- /dev/null
+++ b/code/lib/Bio/SearchIO/BlastIO/blast_xml.py
@@ -0,0 +1,987 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Bio.SearchIO parser for BLAST+ XML output formats."""
+# for more info: http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.mod.dtd
+
+import re
+import warnings
+from itertools import chain
+from xml.etree import ElementTree
+from xml.sax.saxutils import XMLGenerator, escape
+
+from Bio import BiopythonParserWarning
+from Bio.SearchIO._index import SearchIndexer
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+
+__all__ = ("BlastXmlParser", "BlastXmlIndexer", "BlastXmlWriter")
+
+
+# element - optional qresult attribute name mapping
+_ELEM_QRESULT_OPT = {
+    "Statistics_db-num": ("stat_db_num", int),
+    "Statistics_db-len": ("stat_db_len", int),
+    "Statistics_eff-space": ("stat_eff_space", float),
+    "Statistics_hsp-len": ("stat_hsp_len", int),
+    "Statistics_kappa": ("stat_kappa", float),
+    "Statistics_lambda": ("stat_lambda", float),
+    "Statistics_entropy": ("stat_entropy", float),
+}
+# element - hit attribute name mapping
+_ELEM_HIT = {
+    # 'Hit_def': ('description', str),   # not set by this dict
+    "Hit_accession": ("accession", str),
+    "Hit_len": ("seq_len", int),
+}
+# element - hsp attribute name mapping
+_ELEM_HSP = {
+    "Hsp_bit-score": ("bitscore", float),
+    "Hsp_score": ("bitscore_raw", int),
+    "Hsp_evalue": ("evalue", float),
+    "Hsp_identity": ("ident_num", int),
+    "Hsp_positive": ("pos_num", int),
+    "Hsp_gaps": ("gap_num", int),
+    "Hsp_density": ("density", float),
+}
+# element - fragment attribute name mapping
+_ELEM_FRAG = {
+    "Hsp_query-from": ("query_start", int),
+    "Hsp_query-to": ("query_end", int),
+    "Hsp_hit-from": ("hit_start", int),
+    "Hsp_hit-to": ("hit_end", int),
+    "Hsp_query-frame": ("query_frame", int),
+    "Hsp_hit-frame": ("hit_frame", int),
+    "Hsp_align-len": ("aln_span", int),
+    "Hsp_pattern-from": ("pattern_start", int),
+    "Hsp_pattern-to": ("pattern_end", int),
+    "Hsp_hseq": ("hit", str),
+    "Hsp_qseq": ("query", str),
+}
+# dictionary for mapping tag name and meta key name
+_ELEM_META = {
+    "BlastOutput_db": ("target", str),
+    "BlastOutput_program": ("program", str),
+    "BlastOutput_version": ("version", str),
+    "BlastOutput_reference": ("reference", str),
+    "Parameters_expect": ("param_evalue_threshold", float),
+    "Parameters_entrez-query": ("param_entrez_query", str),
+    "Parameters_filter": ("param_filter", str),
+    "Parameters_gap-extend": ("param_gap_extend", int),
+    "Parameters_gap-open": ("param_gap_open", int),
+    "Parameters_include": ("param_include", str),
+    "Parameters_matrix": ("param_matrix", str),
+    "Parameters_pattern": ("param_pattern", str),
+    "Parameters_sc-match": ("param_score_match", int),
+    "Parameters_sc-mismatch": ("param_score_mismatch", int),
+}
+# these are fallback tags that store information on the first query
+# outside the <Iteration> tag
+# only used if query_{ID,def,len} is not found in <Iteration>
+# (seen in legacy Blast <2.2.14)
+_ELEM_QRESULT_FALLBACK = {
+    "BlastOutput_query-ID": ("id", str),
+    "BlastOutput_query-def": ("description", str),
+    "BlastOutput_query-len": ("len", str),
+}
+# element-attribute maps, for writing
+_WRITE_MAPS = {
+    "preamble": (
+        ("program", "program"),
+        ("version", "version"),
+        ("reference", "reference"),
+        ("db", "target"),
+        ("query-ID", "id"),
+        ("query-def", "description"),
+        ("query-len", "seq_len"),
+        ("param", None),
+    ),
+    "param": (
+        ("matrix", "param_matrix"),
+        ("expect", "param_evalue_threshold"),
+        ("sc-match", "param_score_match"),
+        ("sc-mismatch", "param_score_mismatch"),
+        ("gap-open", "param_gap_open"),
+        ("gap-extend", "param_gap_extend"),
+        ("filter", "param_filter"),
+        ("pattern", "param_pattern"),
+        ("entrez-query", "param_entrez_query"),
+    ),
+    "qresult": (
+        ("query-ID", "id"),
+        ("query-def", "description"),
+        ("query-len", "seq_len"),
+    ),
+    "stat": (
+        ("db-num", "stat_db_num"),
+        ("db-len", "stat_db_len"),
+        ("hsp-len", "stat_hsp_len"),
+        ("eff-space", "stat_eff_space"),
+        ("kappa", "stat_kappa"),
+        ("lambda", "stat_lambda"),
+        ("entropy", "stat_entropy"),
+    ),
+    "hit": (
+        ("id", "id"),
+        ("def", "description"),
+        ("accession", "accession"),
+        ("len", "seq_len"),
+    ),
+    "hsp": (
+        ("bit-score", "bitscore"),
+        ("score", "bitscore_raw"),
+        ("evalue", "evalue"),
+        ("query-from", "query_start"),
+        ("query-to", "query_end"),
+        ("hit-from", "hit_start"),
+        ("hit-to", "hit_end"),
+        ("pattern-from", "pattern_start"),
+        ("pattern-to", "pattern_end"),
+        ("query-frame", "query_frame"),
+        ("hit-frame", "hit_frame"),
+        ("identity", "ident_num"),
+        ("positive", "pos_num"),
+        ("gaps", "gap_num"),
+        ("align-len", "aln_span"),
+        ("density", "density"),
+        ("qseq", "query"),
+        ("hseq", "hit"),
+        ("midline", None),
+    ),
+}
+# optional elements, based on the DTD
+_DTD_OPT = (
+    "BlastOutput_query-seq",
+    "BlastOutput_mbstat",
+    "Iteration_query-def",
+    "Iteration_query-len",
+    "Iteration-hits",
+    "Iteration_stat",
+    "Iteration_message",
+    "Parameters_matrix",
+    "Parameters_include",
+    "Parameters_sc-match",
+    "Parameters_sc-mismatch",
+    "Parameters_filter",
+    "Parameters_pattern",
+    "Parameters_entrez-query",
+    "Hit_hsps",
+    "Hsp_pattern-from",
+    "Hsp_pattern-to",
+    "Hsp_query-frame",
+    "Hsp_hit-frame",
+    "Hsp_identity",
+    "Hsp_positive",
+    "Hsp_gaps",
+    "Hsp_align-len",
+    "Hsp_density",
+    "Hsp_midline",
+)
+
+# compile RE patterns
+# for capturing BLAST version
+_RE_VERSION = re.compile(r"\d+\.\d+\.\d+\+?")
+# for splitting ID-description pairs
+_RE_ID_DESC_PAIRS_PATTERN = re.compile(" +>")
+# for splitting ID and description (must be used with maxsplit = 1)
+_RE_ID_DESC_PATTERN = re.compile(" +")
+
+
+def _extract_ids_and_descs(raw_id, raw_desc):
+    """Extract IDs, descriptions, and raw ID from raw values (PRIVATE).
+
+    Given values of the ``Hit_id`` and ``Hit_def`` elements, this function
+    returns a tuple of three elements: all IDs, all descriptions, and the
+    BLAST-generated ID. The BLAST-generated ID is set to ``None`` if no
+    BLAST-generated IDs are present.
+
+    """
+    ids = []
+    descs = []
+
+    blast_gen_id = raw_id
+    if raw_id.startswith("gnl|BL_ORD_ID|"):
+        id_desc_line = raw_desc
+    else:
+        id_desc_line = raw_id + " " + raw_desc
+
+    # create a list of lists, each list containing an ID and description
+    # or just an ID, if description is not present
+    id_desc_pairs = [
+        re.split(_RE_ID_DESC_PATTERN, x, 1)
+        for x in re.split(_RE_ID_DESC_PAIRS_PATTERN, id_desc_line)
+    ]
+    # make sure empty descriptions are added as empty strings
+    # also, we return lists for compatibility reasons between Py2 and Py3
+    for pair in id_desc_pairs:
+        if len(pair) != 2:
+            pair.append("")
+        ids.append(pair[0])
+        descs.append(pair[1])
+
+    return (ids, descs, blast_gen_id)
+
+
+class BlastXmlParser:
+    """Parser for the BLAST XML format."""
+
+    def __init__(self, handle, use_raw_query_ids=False, use_raw_hit_ids=False):
+        """Initialize the class."""
+        self.xml_iter = iter(ElementTree.iterparse(handle, events=("start", "end")))
+        self._use_raw_query_ids = use_raw_query_ids
+        self._use_raw_hit_ids = use_raw_hit_ids
+        self._meta, self._fallback = self._parse_preamble()
+
+    def __iter__(self):
+        """Iterate over BlastXmlParser object yields query results."""
+        yield from self._parse_qresult()
+
+    def _parse_preamble(self):
+        """Parse all tag data prior to the first query result (PRIVATE)."""
+        # dictionary for containing all information prior to the first query
+        meta = {}
+        # dictionary for fallback information
+        fallback = {}
+
+        # parse the preamble part (anything prior to the first result)
+        for event, elem in self.xml_iter:
+            # get the tag values, cast appropriately, store into meta
+            if event == "end" and elem.tag in _ELEM_META:
+                attr_name, caster = _ELEM_META[elem.tag]
+
+                if caster is not str:
+                    meta[attr_name] = caster(elem.text)
+                else:
+                    meta[attr_name] = elem.text
+
+                # delete element after we finish parsing it
+                elem.clear()
+                continue
+            # capture fallback values
+            # these are used only if the first <Iteration> does not have any
+            # ID, ref, or len.
+            elif event == "end" and elem.tag in _ELEM_QRESULT_FALLBACK:
+                attr_name, caster = _ELEM_QRESULT_FALLBACK[elem.tag]
+
+                if caster is not str:
+                    fallback[attr_name] = caster(elem.text)
+                else:
+                    fallback[attr_name] = elem.text
+
+                elem.clear()
+                continue
+
+            if event == "start" and elem.tag == "Iteration":
+                break
+
+        # we only want the version number, sans the program name or date
+        if meta.get("version") is not None:
+            meta["version"] = re.search(_RE_VERSION, meta["version"]).group(0)
+
+        return meta, fallback
+
+    def _parse_qresult(self):
+        """Parse query results (PRIVATE)."""
+        # parse the queries
+        for event, qresult_elem in self.xml_iter:
+            # </Iteration> marks the end of a single query
+            # which means we can process it
+            if event == "end" and qresult_elem.tag == "Iteration":
+
+                # we'll use the following schema
+                # <!ELEMENT Iteration (
+                #        Iteration_iter-num,
+                #        Iteration_query-ID?,
+                #        Iteration_query-def?,
+                #        Iteration_query-len?,
+                #        Iteration_hits?,
+                #        Iteration_stat?,
+                #        Iteration_message?)>
+
+                # assign query attributes with fallbacks
+                query_id = qresult_elem.findtext("Iteration_query-ID")
+                if query_id is None:
+                    query_id = self._fallback["id"]
+
+                query_desc = qresult_elem.findtext("Iteration_query-def")
+                if query_desc is None:
+                    query_desc = self._fallback["description"]
+
+                query_len = qresult_elem.findtext("Iteration_query-len")
+                if query_len is None:
+                    query_len = self._fallback["len"]
+
+                blast_query_id = query_id
+                # handle blast searches against databases with Blast's IDs
+                # 'Query_' marks the beginning of a BLAST+-generated ID,
+                # 'lcl|' marks the beginning of a BLAST legacy-generated ID
+                if not self._use_raw_query_ids and (
+                    query_id.startswith("Query_") or query_id.startswith("lcl|")
+                ):
+                    # store the Blast-generated query ID
+                    id_desc = query_desc.split(" ", 1)
+                    query_id = id_desc[0]
+                    try:
+                        query_desc = id_desc[1]
+                    except IndexError:
+                        query_desc = ""
+
+                hit_list, key_list = [], []
+                for hit in self._parse_hit(
+                    qresult_elem.find("Iteration_hits"), query_id
+                ):
+                    if hit:
+                        # need to keep track of hit IDs, since there could be duplicates,
+                        if hit.id in key_list:
+                            warnings.warn(
+                                "Renaming hit ID %r to a BLAST-generated ID "
+                                "%r since the ID was already matched "
+                                "by your query %r. Your BLAST database "
+                                "may contain duplicate entries."
+                                % (hit.id, hit.blast_id, query_id),
+                                BiopythonParserWarning,
+                            )
+                            # fallback to Blast-generated IDs, if the ID is already present
+                            # and restore the desc, too
+                            hit.description = "%s %s" % (hit.id, hit.description)
+                            hit.id = hit.blast_id
+                            # and change the hit_id of the HSPs contained
+                            for hsp in hit:
+                                hsp.hit_id = hit.blast_id
+                        else:
+                            key_list.append(hit.id)
+
+                        hit_list.append(hit)
+
+                # create qresult and assign its attributes
+                qresult = QueryResult(hit_list, query_id)
+                qresult.description = query_desc
+                qresult.seq_len = int(query_len)
+                qresult.blast_id = blast_query_id
+                for key, value in self._meta.items():
+                    setattr(qresult, key, value)
+
+                # statistics are stored in Iteration_stat's 'grandchildren' with the
+                # following DTD
+                # <!ELEMENT Statistics (
+                #        Statistics_db-num,
+                #        Statistics_db-len,
+                #        Statistics_hsp-len,
+                #        Statistics_eff-space,
+                #        Statistics_kappa,
+                #        Statistics_lambda,
+                #        Statistics_entropy)>
+
+                stat_iter_elem = qresult_elem.find("Iteration_stat")
+                if stat_iter_elem is not None:
+                    stat_elem = stat_iter_elem.find("Statistics")
+
+                    for key, val_info in _ELEM_QRESULT_OPT.items():
+                        value = stat_elem.findtext(key)
+                        if value is not None:
+                            caster = val_info[1]
+                            # recast only if value is not intended to be str
+                            if value is not None and caster is not str:
+                                value = caster(value)
+                            setattr(qresult, val_info[0], value)
+
+                # delete element after we finish parsing it
+                qresult_elem.clear()
+                yield qresult
+
+    def _parse_hit(self, root_hit_elem, query_id):
+        """Yield a generator object that transforms Iteration_hits XML elements into Hit objects (PRIVATE).
+
+        :param root_hit_elem: root element of the Iteration_hits tag.
+        :type root_hit_elem: XML element tag
+        :param query_id: QueryResult ID of this Hit
+        :type query_id: string
+
+        """
+        # Hit level processing
+        # Hits are stored in the Iteration_hits tag, with the following
+        # DTD
+        # <!ELEMENT Hit (
+        #        Hit_num,
+        #        Hit_id,
+        #        Hit_def,
+        #        Hit_accession,
+        #        Hit_len,
+        #        Hit_hsps?)>
+
+        # feed the loop below an empty list so iteration still works
+        if root_hit_elem is None:
+            root_hit_elem = []
+
+        for hit_elem in root_hit_elem:
+
+            # BLAST sometimes mangles the sequence IDs and descriptions, so we need
+            # to extract the actual values.
+            raw_hit_id = hit_elem.findtext("Hit_id")
+            raw_hit_desc = hit_elem.findtext("Hit_def")
+            if not self._use_raw_hit_ids:
+                ids, descs, blast_hit_id = _extract_ids_and_descs(
+                    raw_hit_id, raw_hit_desc
+                )
+            else:
+                ids, descs, blast_hit_id = [raw_hit_id], [raw_hit_desc], raw_hit_id
+
+            hit_id, alt_hit_ids = ids[0], ids[1:]
+            hit_desc, alt_hit_descs = descs[0], descs[1:]
+
+            hsps = list(self._parse_hsp(hit_elem.find("Hit_hsps"), query_id, hit_id))
+
+            hit = Hit(hsps)
+            hit.description = hit_desc
+            hit._id_alt = alt_hit_ids
+            hit._description_alt = alt_hit_descs
+            hit.blast_id = blast_hit_id
+
+            for key, val_info in _ELEM_HIT.items():
+                value = hit_elem.findtext(key)
+                if value is not None:
+                    caster = val_info[1]
+                    # recast only if value is not intended to be str
+                    if value is not None and caster is not str:
+                        value = caster(value)
+                    setattr(hit, val_info[0], value)
+
+            # delete element after we finish parsing it
+            hit_elem.clear()
+            yield hit
+
+    def _parse_hsp(self, root_hsp_frag_elem, query_id, hit_id):
+        """Yield a generator object that transforms Hit_hsps XML elements into HSP objects (PRIVATE).
+
+        :param root_hsp_frag_elem: the ``Hit_hsps`` tag
+        :type root_hsp_frag_elem: XML element tag
+        :param query_id: query ID
+        :type query_id: string
+        :param hit_id: hit ID
+        :type hit_id: string
+
+        """
+        # Hit_hsps DTD:
+        # <!ELEMENT Hsp (
+        #        Hsp_num,
+        #        Hsp_bit-score,
+        #        Hsp_score,
+        #        Hsp_evalue,
+        #        Hsp_query-from,
+        #        Hsp_query-to,
+        #        Hsp_hit-from,
+        #        Hsp_hit-to,
+        #        Hsp_pattern-from?,
+        #        Hsp_pattern-to?,
+        #        Hsp_query-frame?,
+        #        Hsp_hit-frame?,
+        #        Hsp_identity?,
+        #        Hsp_positive?,
+        #        Hsp_gaps?,
+        #        Hsp_align-len?,
+        #        Hsp_density?,
+        #        Hsp_qseq,
+        #        Hsp_hseq,
+        #        Hsp_midline?)>
+
+        # if value is None, feed the loop below an empty list
+        if root_hsp_frag_elem is None:
+            root_hsp_frag_elem = []
+
+        for hsp_frag_elem in root_hsp_frag_elem:
+            coords = {}  # temporary container for coordinates
+            frag = HSPFragment(hit_id, query_id)
+            for key, val_info in _ELEM_FRAG.items():
+                value = hsp_frag_elem.findtext(key)
+                caster = val_info[1]
+
+                # adjust 'from' and 'to' coordinates to 0-based ones
+                if value is not None:
+                    if key.endswith("-from") or key.endswith("-to"):
+                        # store coordinates for further processing
+                        coords[val_info[0]] = caster(value)
+                        continue
+                    # recast only if value is not intended to be str
+                    elif caster is not str:
+                        value = caster(value)
+                    setattr(frag, val_info[0], value)
+
+            # set the similarity characters into aln_annotation dict
+            frag.aln_annotation["similarity"] = hsp_frag_elem.findtext("Hsp_midline")
+
+            # process coordinates
+            # since 'x-from' could be bigger than 'x-to', we need to figure
+            # out which one is smaller/bigger since 'x_start' is always smaller
+            # than 'x_end'
+            for coord_type in ("query", "hit", "pattern"):
+                start_type = coord_type + "_start"
+                end_type = coord_type + "_end"
+                try:
+                    start = coords[start_type]
+                    end = coords[end_type]
+                except KeyError:
+                    continue
+                else:
+                    # convert to python range and setattr
+                    setattr(frag, start_type, min(start, end) - 1)
+                    setattr(frag, end_type, max(start, end))
+
+            # set molecule type, based on program
+            prog = self._meta.get("program")
+            if prog == "blastn":
+                frag.molecule_type = "DNA"
+            elif prog in ["blastp", "blastx", "tblastn", "tblastx"]:
+                frag.molecule_type = "protein"
+
+            hsp = HSP([frag])
+            for key, val_info in _ELEM_HSP.items():
+                value = hsp_frag_elem.findtext(key)
+                caster = val_info[1]
+                if value is not None:
+                    if caster is not str:
+                        value = caster(value)
+                    setattr(hsp, val_info[0], value)
+            # delete element after we finish parsing it
+            hsp_frag_elem.clear()
+            yield hsp
+
+
+class BlastXmlIndexer(SearchIndexer):
+    """Indexer class for BLAST XML output."""
+
+    _parser = BlastXmlParser
+    qstart_mark = b"<Iteration>"
+    qend_mark = b"</Iteration>"
+    block_size = 16384
+
+    def __init__(self, filename, **kwargs):
+        """Initialize the class."""
+        SearchIndexer.__init__(self, filename)
+        # TODO: better way to do this?
+        iter_obj = self._parser(self._handle, **kwargs)
+        self._meta, self._fallback = iter_obj._meta, iter_obj._fallback
+
+    def __iter__(self):
+        """Iterate over BlastXmlIndexer yields qstart_id, start_offset, block's length."""
+        qstart_mark = self.qstart_mark
+        qend_mark = self.qend_mark
+        blast_id_mark = b"Query_"
+        block_size = self.block_size
+        handle = self._handle
+        handle.seek(0)
+        re_desc = re.compile(
+            b"<Iteration_query-ID>(.*?)"
+            br"</Iteration_query-ID>\s+?"
+            b"<Iteration_query-def>"
+            b"(.*?)</Iteration_query-def>"
+        )
+        re_desc_end = re.compile(b"</Iteration_query-def>")
+        counter = 0
+
+        while True:
+            start_offset = handle.tell()
+            line = handle.readline()
+            if not line:
+                break
+            if qstart_mark not in line:
+                continue
+            # The following requirements are to make supporting BGZF compressed
+            # BLAST XML files simpler (avoids complex offset manipulations):
+            assert line.count(qstart_mark) == 1, "XML without line breaks?"
+            assert line.lstrip().startswith(qstart_mark), line
+            if qend_mark in line:
+                # Should cope with <Iteration>...</Iteration> on one long line
+                block = line
+            else:
+                # Load the rest of this block up to and including </Iteration>
+                block = [line]
+                while line and qend_mark not in line:
+                    line = handle.readline()
+                    assert qstart_mark not in line, line
+                    block.append(line)
+                assert line.rstrip().endswith(qend_mark), line
+                block = b"".join(block)
+            assert block.count(qstart_mark) == 1, "XML without line breaks? %r" % block
+            assert block.count(qend_mark) == 1, "XML without line breaks? %r" % block
+            # Now we have a full <Iteration>...</Iteration> block, find the ID
+            regx = re.search(re_desc, block)
+            try:
+                qstart_desc = regx.group(2)
+                qstart_id = regx.group(1)
+            except AttributeError:
+                # use the fallback values
+                assert re.search(re_desc_end, block)
+                qstart_desc = self._fallback["description"].encode()
+                qstart_id = self._fallback["id"].encode()
+            if qstart_id.startswith(blast_id_mark):
+                qstart_id = qstart_desc.split(b" ", 1)[0]
+            yield qstart_id.decode(), start_offset, len(block)
+            counter += 1
+
+    def _parse(self, handle):
+        """Overwrite SearchIndexer parse (PRIVATE).
+
+        As we need to set the meta and fallback dictionaries to the parser.
+        """
+        generator = self._parser(handle, **self._kwargs)
+        generator._meta = self._meta
+        generator._fallback = self._fallback
+        return next(iter(generator))
+
+    def get_raw(self, offset):
+        """Return the raw record from the file as a bytes string."""
+        qend_mark = self.qend_mark
+        handle = self._handle
+        handle.seek(offset)
+
+        qresult_raw = handle.readline()
+        assert qresult_raw.lstrip().startswith(self.qstart_mark)
+        while qend_mark not in qresult_raw:
+            qresult_raw += handle.readline()
+        assert qresult_raw.rstrip().endswith(qend_mark)
+        assert qresult_raw.count(qend_mark) == 1
+        # Note this will include any leading and trailing whitespace, in
+        # general expecting "    <Iteration>\n...\n    </Iteration>\n"
+        return qresult_raw
+
+
+class _BlastXmlGenerator(XMLGenerator):
+    """Event-based XML Generator."""
+
+    def __init__(self, out, encoding="utf-8", indent=" ", increment=2):
+        """Initialize the class."""
+        XMLGenerator.__init__(self, out, encoding)
+        # the indentation character
+        self._indent = indent
+        # nest level
+        self._level = 0
+        # how many indentation character should we increment per level
+        self._increment = increment
+        # container for names of tags with children
+        self._parent_stack = []
+        # determine writer method
+
+    def startDocument(self):
+        """Start the XML document."""
+        self._write(
+            '<?xml version="1.0"?>\n'
+            '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" '
+            '"http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n'
+        )
+
+    def startElement(self, name, attrs=None, children=False):
+        """Start an XML element.
+
+        :param name: element name
+        :type name: string
+        :param attrs: element attributes
+        :type attrs: dictionary {string: object}
+        :param children: whether the element has children or not
+        :type children: bool
+
+        """
+        if attrs is None:
+            attrs = {}
+        self.ignorableWhitespace(self._indent * self._level)
+        XMLGenerator.startElement(self, name, attrs)
+
+    def endElement(self, name):
+        """End and XML element of the given name."""
+        XMLGenerator.endElement(self, name)
+        self._write("\n")
+
+    def startParent(self, name, attrs=None):
+        """Start an XML element which has children.
+
+        :param name: element name
+        :type name: string
+        :param attrs: element attributes
+        :type attrs: dictionary {string: object}
+
+        """
+        if attrs is None:
+            attrs = {}
+        self.startElement(name, attrs, children=True)
+        self._level += self._increment
+        self._write("\n")
+        # append the element name, so we can end it later
+        self._parent_stack.append(name)
+
+    def endParent(self):
+        """End an XML element with children."""
+        # the element to end is the one on top of the stack
+        name = self._parent_stack.pop()
+        self._level -= self._increment
+        self.ignorableWhitespace(self._indent * self._level)
+        self.endElement(name)
+
+    def startParents(self, *names):
+        """Start XML elements without children."""
+        for name in names:
+            self.startParent(name)
+
+    def endParents(self, num):
+        """End XML elements, according to the given number."""
+        for i in range(num):
+            self.endParent()
+
+    def simpleElement(self, name, content=None):
+        """Create an XML element without children with the given content."""
+        self.startElement(name, attrs={})
+        if content:
+            self.characters(content)
+        self.endElement(name)
+
+    def characters(self, content):
+        """Replace quotes and apostrophe."""
+        content = escape(str(content))
+        for a, b in (('"', "&quot;"), ("'", "&apos;")):
+            content = content.replace(a, b)
+        self._write(content)
+
+
+class BlastXmlWriter:
+    """Stream-based BLAST+ XML Writer."""
+
+    def __init__(self, handle, use_raw_query_ids=True, use_raw_hit_ids=True):
+        """Initialize the class."""
+        self.xml = _BlastXmlGenerator(handle, "utf-8")
+        self._use_raw_query_ids = use_raw_query_ids
+        self._use_raw_hit_ids = use_raw_hit_ids
+
+    def write_file(self, qresults):
+        """Write the XML contents to the output handle."""
+        xml = self.xml
+        self.qresult_counter, self.hit_counter, self.hsp_counter, self.frag_counter = (
+            0,
+            0,
+            0,
+            0,
+        )
+
+        # get the first qresult, since the preamble requires its attr values
+        first_qresult = next(qresults)
+        # start the XML document, set the root element, and create the preamble
+        xml.startDocument()
+        xml.startParent("BlastOutput")
+        self._write_preamble(first_qresult)
+        # and write the qresults
+        xml.startParent("BlastOutput_iterations")
+        self._write_qresults(chain([first_qresult], qresults))
+        xml.endParents(2)
+        xml.endDocument()
+
+        return (
+            self.qresult_counter,
+            self.hit_counter,
+            self.hsp_counter,
+            self.frag_counter,
+        )
+
+    def _write_elem_block(self, block_name, map_name, obj, opt_dict=None):
+        """Write sibling XML elements (PRIVATE).
+
+        :param block_name: common element name prefix
+        :type block_name: string
+        :param map_name: name of mapping between element and attribute names
+        :type map_name: string
+        :param obj: object whose attribute value will be used
+        :type obj: object
+        :param opt_dict: custom element-attribute mapping
+        :type opt_dict: dictionary {string: string}
+
+        """
+        if opt_dict is None:
+            opt_dict = {}
+        for elem, attr in _WRITE_MAPS[map_name]:
+            elem = block_name + elem
+            try:
+                content = str(getattr(obj, attr))
+            except AttributeError:
+                # ensure attrs that is not present is optional
+                if elem not in _DTD_OPT:
+                    raise ValueError(
+                        "Element %r (attribute %r) not found" % (elem, attr)
+                    )
+            else:
+                # custom element-attribute mapping, for fallback values
+                if elem in opt_dict:
+                    content = opt_dict[elem]
+                self.xml.simpleElement(elem, content)
+
+    def _write_preamble(self, qresult):
+        """Write the XML file preamble (PRIVATE)."""
+        xml = self.xml
+
+        for elem, attr in _WRITE_MAPS["preamble"]:
+            elem = "BlastOutput_" + elem
+            if elem == "BlastOutput_param":
+                xml.startParent(elem)
+                self._write_param(qresult)
+                xml.endParent()
+                continue
+            try:
+                content = str(getattr(qresult, attr))
+            except AttributeError:
+                if elem not in _DTD_OPT:
+                    raise ValueError(
+                        "Element %s (attribute %s) not found" % (elem, attr)
+                    )
+            else:
+                if elem == "BlastOutput_version":
+                    content = "%s %s" % (qresult.program.upper(), qresult.version)
+                elif qresult.blast_id:
+                    if elem == "BlastOutput_query-ID":
+                        content = qresult.blast_id
+                    elif elem == "BlastOutput_query-def":
+                        content = " ".join([qresult.id, qresult.description]).strip()
+                xml.simpleElement(elem, content)
+
+    def _write_param(self, qresult):
+        """Write the parameter block of the preamble (PRIVATE)."""
+        xml = self.xml
+        xml.startParent("Parameters")
+        self._write_elem_block("Parameters_", "param", qresult)
+        xml.endParent()
+
+    def _write_qresults(self, qresults):
+        """Write QueryResult objects into iteration elements (PRIVATE)."""
+        xml = self.xml
+
+        for num, qresult in enumerate(qresults):
+            xml.startParent("Iteration")
+            xml.simpleElement("Iteration_iter-num", str(num + 1))
+            opt_dict = {}
+            if self._use_raw_query_ids:
+                query_id = qresult.blast_id
+                query_desc = qresult.id + " " + qresult.description
+            else:
+                query_id = qresult.id
+                query_desc = qresult.description
+
+            opt_dict = {
+                "Iteration_query-ID": query_id,
+                "Iteration_query-def": query_desc,
+            }
+            self._write_elem_block("Iteration_", "qresult", qresult, opt_dict)
+            # the Iteration_hits tag only has children if there are hits
+            if qresult:
+                xml.startParent("Iteration_hits")
+                self._write_hits(qresult.hits)
+                xml.endParent()
+            # otherwise it's a simple element without any contents
+            else:
+                xml.simpleElement("Iteration_hits", "")
+
+            xml.startParents("Iteration_stat", "Statistics")
+            self._write_elem_block("Statistics_", "stat", qresult)
+            xml.endParents(2)
+            # there's a message if no hits is present
+            if not qresult:
+                xml.simpleElement("Iteration_message", "No hits found")
+            self.qresult_counter += 1
+            xml.endParent()
+
+    def _write_hits(self, hits):
+        """Write Hit objects (PRIVATE)."""
+        xml = self.xml
+
+        for num, hit in enumerate(hits):
+            xml.startParent("Hit")
+            xml.simpleElement("Hit_num", str(num + 1))
+            # use custom hit_id and hit_def mapping if the hit has a
+            # BLAST-generated ID
+            opt_dict = {}
+
+            if self._use_raw_hit_ids:
+                hit_id = hit.blast_id
+                hit_desc = " >".join(
+                    [f"{x} {y}" for x, y in zip(hit.id_all, hit.description_all)]
+                )
+            else:
+                hit_id = hit.id
+                hit_desc = hit.description + " >".join(
+                    [
+                        f"{x} {y}"
+                        for x, y in zip(hit.id_all[1:], hit.description_all[1:])
+                    ]
+                )
+
+            opt_dict = {"Hit_id": hit_id, "Hit_def": hit_desc}
+            self._write_elem_block("Hit_", "hit", hit, opt_dict)
+            xml.startParent("Hit_hsps")
+            self._write_hsps(hit.hsps)
+            self.hit_counter += 1
+            xml.endParents(2)
+
+    def _write_hsps(self, hsps):
+        """Write HSP objects (PRIVATE)."""
+        xml = self.xml
+        for num, hsp in enumerate(hsps):
+            xml.startParent("Hsp")
+            xml.simpleElement("Hsp_num", str(num + 1))
+            for elem, attr in _WRITE_MAPS["hsp"]:
+                elem = "Hsp_" + elem
+                try:
+                    content = self._adjust_output(hsp, elem, attr)
+                # make sure any elements that is not present is optional
+                # in the DTD
+                except AttributeError:
+                    if elem not in _DTD_OPT:
+                        raise ValueError(
+                            "Element %s (attribute %s) not found" % (elem, attr)
+                        )
+                else:
+                    xml.simpleElement(elem, str(content))
+            self.hsp_counter += 1
+            self.frag_counter += len(hsp.fragments)
+            xml.endParent()
+
+    def _adjust_output(self, hsp, elem, attr):
+        """Adjust output to mimic native BLAST+ XML as much as possible (PRIVATE)."""
+        # adjust coordinates
+        if attr in (
+            "query_start",
+            "query_end",
+            "hit_start",
+            "hit_end",
+            "pattern_start",
+            "pattern_end",
+        ):
+            content = getattr(hsp, attr) + 1
+            if "_start" in attr:
+                content = getattr(hsp, attr) + 1
+            else:
+                content = getattr(hsp, attr)
+
+            # adjust for 'from' <--> 'to' flip if it's not a translated search
+            # and frames are different
+            # adapted from /src/algo/blast/format/blastxml_format.cpp#L216
+            if hsp.query_frame != 0 and hsp.hit_frame < 0:
+                if attr == "hit_start":
+                    content = getattr(hsp, "hit_end")
+                elif attr == "hit_end":
+                    content = getattr(hsp, "hit_start") + 1
+
+        # for seqrecord objects, we only need the sequence string
+        elif elem in ("Hsp_hseq", "Hsp_qseq"):
+            content = str(getattr(hsp, attr).seq)
+        elif elem == "Hsp_midline":
+            content = hsp.aln_annotation["similarity"]
+        elif elem in ("Hsp_evalue", "Hsp_bit-score"):
+            # adapted from src/algo/blast/format/blastxml_format.cpp#L138-140
+            content = "%.*g" % (6, getattr(hsp, attr))
+        else:
+            content = getattr(hsp, attr)
+
+        return content
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/BlatIO.py b/code/lib/Bio/SearchIO/BlatIO.py
new file mode 100644
index 0000000..fde64dd
--- /dev/null
+++ b/code/lib/Bio/SearchIO/BlatIO.py
@@ -0,0 +1,751 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO parser for BLAT output formats.
+
+This module adds support for parsing BLAT outputs. BLAT (BLAST-Like Alignment
+Tool) is a sequence similarity search program initially built for annotating
+the human genome.
+
+Bio.SearchIO.BlastIO was tested using standalone BLAT version 34, psLayout
+version 3. It should be able to parse psLayout version 4 without problems.
+
+More information on BLAT is available from these sites:
+
+    - Publication: http://genome.cshlp.org/content/12/4/656
+    - User guide: http://genome.ucsc.edu/goldenPath/help/blatSpec.html
+    - Source download: http://www.soe.ucsc.edu/~kent/src
+    - Executable download: http://hgdownload.cse.ucsc.edu/admin/exe/
+    - Blat score calculation: http://genome.ucsc.edu/FAQ/FAQblat.html#blat4
+
+
+Supported Formats
+=================
+
+BlatIO supports parsing, indexing, and writing for both PSL and PSLX output
+formats, with or without header. To parse, index, or write PSLX files, use the
+'pslx' keyword argument and set it to True.
+
+    # blat-psl defaults to PSL files
+    >>> from Bio import SearchIO
+    >>> psl = 'Blat/psl_34_004.psl'
+    >>> qresult = SearchIO.read(psl, 'blat-psl')
+    >>> qresult
+    QueryResult(id='hg19_dna', 10 hits)
+
+    # set the pslx flag to parse PSLX files
+    >>> pslx = 'Blat/pslx_34_004.pslx'
+    >>> qresult = SearchIO.read(pslx, 'blat-psl', pslx=True)
+    >>> qresult
+    QueryResult(id='hg19_dna', 10 hits)
+
+For parsing and indexing, you do not need to specify whether the file has a
+header or not. For writing, if you want to write a header, you can set the
+'header' keyword argument to True. This will write a 'psLayout version 3' header
+to your output file.
+
+    from Bio import SearchIO
+    qresult = SearchIO.read(psl, 'blat-psl')
+    SearchIO.write(qresult, 'header.psl', header=True)
+    <stdout> (1, 10, 19, 23)
+
+Note that the number of HSPFragments written may exceed the number of HSP
+objects. This is because in PSL files, it is possible to have single matches
+consisting of noncontiguous sequence fragments. This is where the HSPFragment
+object comes into play. These fragments are grouped into a single HSP because
+they share the same statistics (e.g. match numbers, BLAT score, etc.). However,
+they do not share the same sequence attributes, such as the start and end
+coordinates, making them distinct objects.
+
+In addition to parsing PSL(X) files, BlatIO also computes the percent identities
+and scores of your search results. This is done using the calculation formula
+posted here: http://genome.ucsc.edu/FAQ/FAQblat.html#blat4. It mimics the score
+and percent identity calculation done by UCSC's web BLAT service.
+
+Since BlatIO parses the file in a single pass, it expects all results from
+the same query to be in consecutive rows. If the results from one query are
+spread in nonconsecutive rows, BlatIO will consider them to be separate
+QueryResult objects.
+
+In most cases, the PSL(X) format uses the same coordinate system as Python
+(zero-based, half open). These coordinates are anchored on the plus strand.
+However, if the query aligns on the minus strand, BLAT will anchor the qStarts
+coordinates on the minus strand instead. BlatIO is aware of this, and will
+re-anchor the qStarts coordinates to the plus strand whenever it sees a minus
+strand query match. Conversely, when you write out to a PSL(X) file, BlatIO will
+reanchor qStarts to the minus strand again.
+
+BlatIO provides the following attribute-column mapping:
+
++----------------+-------------------------+-----------------------------------+
+| Object         | Attribute               | Column Name, Value                |
++================+=========================+===================================+
+| QueryResutl    | id                      | Q name, query sequence ID         |
+|                +-------------------------+-----------------------------------+
+|                | seq_len                 | Q size, query sequence full       |
+|                |                         | length                            |
++----------------+-------------------------+-----------------------------------+
+| Hit            | id                      | T name, hit sequence ID           |
+|                +-------------------------+-----------------------------------+
+|                | seq_len                 | T size, hit sequence full length  |
++----------------+-------------------------+-----------------------------------+
+| HSP            | hit_end                 | T end, end coordinate of the last |
+|                |                         | hit fragment                      |
+|                +-------------------------+-----------------------------------+
+|                | hit_gap_num             | T gap bases, number of bases      |
+|                |                         | inserted in hit                   |
+|                +-------------------------+-----------------------------------+
+|                | hit_gapopen_num         | T gap count, number of hit gap    |
+|                |                         | inserts                           |
+|                +-------------------------+-----------------------------------+
+|                | hit_span_all            | blockSizes, sizes of each         |
+|                |                         | fragment                          |
+|                +-------------------------+-----------------------------------+
+|                | hit_start               | T start, start coordinate of the  |
+|                |                         | first hit fragment                |
+|                +-------------------------+-----------------------------------+
+|                | hit_start_all           | tStarts, start coordinate of each |
+|                |                         | hit fragment                      |
+|                +-------------------------+-----------------------------------+
+|                | match_num               | match, number of non-repeat       |
+|                |                         | matches                           |
+|                +-------------------------+-----------------------------------+
+|                | mismatch_num            | mismatch, number of mismatches    |
+|                +-------------------------+-----------------------------------+
+|                | match_rep_num           | rep. match, number of matches     |
+|                |                         | that are part of repeats          |
+|                +-------------------------+-----------------------------------+
+|                | n_num                   | N's, number of N bases            |
+|                +-------------------------+-----------------------------------+
+|                | query_end               | Q end, end coordinate of the last |
+|                +-------------------------+-----------------------------------+
+|                |                         | query fragment                    |
+|                | query_gap_num           | Q gap bases, number of bases      |
+|                |                         | inserted in query                 |
+|                +-------------------------+-----------------------------------+
+|                | query_gapopen_num       | Q gap count, number of query gap  |
+|                |                         | inserts                           |
+|                +-------------------------+-----------------------------------+
+|                | query_span_all          | blockSizes, sizes of each         |
+|                |                         | fragment                          |
+|                +-------------------------+-----------------------------------+
+|                | query_start             | Q start, start coordinate of the  |
+|                |                         | first query block                 |
+|                +-------------------------+-----------------------------------+
+|                | query_start_all         | qStarts, start coordinate of each |
+|                |                         | query fragment                    |
+|                +-------------------------+-----------------------------------+
+|                | len [*]_                | block count, the number of blocks |
+|                |                         | in the alignment                  |
++----------------+-------------------------+-----------------------------------+
+| HSPFragment    | hit                     | hit sequence, if present          |
+|                +-------------------------+-----------------------------------+
+|                | hit_strand              | strand, hit sequence strand       |
+|                +-------------------------+-----------------------------------+
+|                | query                   | query sequence, if present        |
+|                +-------------------------+-----------------------------------+
+|                | query_strand            | strand, query sequence strand     |
++----------------+-------------------------+-----------------------------------+
+
+In addition to the column mappings above, BlatIO also provides the following
+object attributes:
+
++----------------+-------------------------+-----------------------------------+
+| Object         | Attribute               | Value                             |
++================+=========================+===================================+
+| HSP            | gapopen_num             | Q gap count + T gap count, total  |
+|                |                         |  number of gap openings           |
+|                +-------------------------+-----------------------------------+
+|                | ident_num               | matches + repmatches, total       |
+|                |                         | number of identical residues      |
+|                +-------------------------+-----------------------------------+
+|                | ident_pct               | percent identity, calculated      |
+|                |                         | using UCSC's formula              |
+|                +-------------------------+-----------------------------------+
+|                | query_is_protein        | boolean, whether the query        |
+|                |                         | sequence is a protein             |
+|                +-------------------------+-----------------------------------+
+|                | score                   | HSP score, calculated using       |
+|                |                         | UCSC's formula                    |
++----------------+-------------------------+-----------------------------------+
+
+Finally, the default HSP and HSPFragment properties are also provided. See the
+HSP and HSPFragment documentation for more details on these properties.
+
+
+.. [*] You can obtain the number of blocks / fragments in the HSP by invoking
+   ``len`` on the HSP
+
+"""
+import re
+from math import log
+
+from Bio.SearchIO._index import SearchIndexer
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+
+
+__all__ = ("BlatPslParser", "BlatPslIndexer", "BlatPslWriter")
+
+
+# precompile regex patterns
+_PTR_ROW_CHECK = r"^\d+\s+\d+\s+\d+\s+\d+"
+_RE_ROW_CHECK = re.compile(_PTR_ROW_CHECK)
+_RE_ROW_CHECK_IDX = re.compile(_PTR_ROW_CHECK.encode())
+
+
+def _list_from_csv(csv_string, caster=None):
+    """Transform the given comma-separated string into a list (PRIVATE).
+
+    :param csv_string: comma-separated input string
+    :type csv_string: string
+    :param caster: function used to cast each item in the input string
+                   to its intended type
+    :type caster: callable, accepts string, returns object
+
+    """
+    if caster is None:
+        return [x for x in csv_string.split(",") if x]
+    else:
+        return [caster(x) for x in csv_string.split(",") if x]
+
+
+def _reorient_starts(starts, blksizes, seqlen, strand):
+    """Reorients block starts into the opposite strand's coordinates (PRIVATE).
+
+    :param starts: start coordinates
+    :type starts: list [int]
+    :param blksizes: block sizes
+    :type blksizes: list [int]
+    :param seqlen: sequence length
+    :type seqlen: int
+    :param strand: sequence strand
+    :type strand: int, choice of -1, 0, or 1
+
+    """
+    if len(starts) != len(blksizes):
+        raise RuntimeError(
+            "Unequal start coordinates and block sizes list (%r vs %r)"
+            % (len(starts), len(blksizes))
+        )
+    # see: http://genome.ucsc.edu/goldenPath/help/blatSpec.html
+    # no need to reorient if it's already the positive strand
+    if strand >= 0:
+        return starts
+    else:
+        # the plus-oriented coordinate is calculated by this:
+        # plus_coord = length - minus_coord - block_size
+        return [seqlen - start - blksize for start, blksize in zip(starts, blksizes)]
+
+
+def _is_protein(psl):
+    """Validate if psl is protein (PRIVATE)."""
+    # check if query is protein or not
+    # adapted from http://genome.ucsc.edu/FAQ/FAQblat.html#blat4
+    if len(psl["strand"]) == 2:
+        if psl["strand"][1] == "+":
+            return psl["tend"] == psl["tstarts"][-1] + 3 * psl["blocksizes"][-1]
+        elif psl["strand"][1] == "-":
+            return psl["tstart"] == psl["tsize"] - (
+                psl["tstarts"][-1] + 3 * psl["blocksizes"][-1]
+            )
+
+    return False
+
+
+def _calc_millibad(psl, is_protein):
+    """Calculate millibad (PRIVATE)."""
+    # adapted from http://genome.ucsc.edu/FAQ/FAQblat.html#blat4
+    size_mul = 3 if is_protein else 1
+    millibad = 0
+
+    qali_size = size_mul * (psl["qend"] - psl["qstart"])
+    tali_size = psl["tend"] - psl["tstart"]
+    ali_size = min(qali_size, tali_size)
+    if ali_size <= 0:
+        return 0
+
+    size_dif = qali_size - tali_size
+    size_dif = 0 if size_dif < 0 else size_dif
+
+    total = size_mul * (psl["matches"] + psl["repmatches"] + psl["mismatches"])
+    if total != 0:
+        millibad = (
+            1000
+            * (
+                psl["mismatches"] * size_mul
+                + psl["qnuminsert"]
+                + round(3 * log(1 + size_dif))
+            )
+        ) / total
+
+    return millibad
+
+
+def _calc_score(psl, is_protein):
+    """Calculate score (PRIVATE)."""
+    # adapted from http://genome.ucsc.edu/FAQ/FAQblat.html#blat4
+    size_mul = 3 if is_protein else 1
+    return (
+        size_mul * (psl["matches"] + (psl["repmatches"] >> 1))
+        - size_mul * psl["mismatches"]
+        - psl["qnuminsert"]
+        - psl["tnuminsert"]
+    )
+
+
+def _create_hsp(hid, qid, psl):
+    """Create high scoring pair object (PRIVATE)."""
+    # protein flag
+    is_protein = _is_protein(psl)
+    # strand
+    # if query is protein, strand is 0
+    if is_protein:
+        qstrand = 0
+    else:
+        qstrand = 1 if psl["strand"][0] == "+" else -1
+    # try to get hit strand, if it exists
+    try:
+        hstrand = 1 if psl["strand"][1] == "+" else -1
+    except IndexError:
+        hstrand = 1  # hit strand defaults to plus
+
+    blocksize_multiplier = 3 if is_protein else 1
+    # query block starts
+    qstarts = _reorient_starts(psl["qstarts"], psl["blocksizes"], psl["qsize"], qstrand)
+    # hit block starts
+    if len(psl["strand"]) == 2:
+        hstarts = _reorient_starts(
+            psl["tstarts"],
+            [blocksize_multiplier * i for i in psl["blocksizes"]],
+            psl["tsize"],
+            hstrand,
+        )
+    else:
+        hstarts = psl["tstarts"]
+    # set query and hit coords
+    # this assumes each block has no gaps (which seems to be the case)
+    assert len(qstarts) == len(hstarts) == len(psl["blocksizes"])
+    query_range_all = list(
+        zip(qstarts, [x + y for x, y in zip(qstarts, psl["blocksizes"])])
+    )
+    hit_range_all = list(
+        zip(
+            hstarts,
+            [x + y * blocksize_multiplier for x, y in zip(hstarts, psl["blocksizes"])],
+        )
+    )
+    # check length of sequences and coordinates, all must match
+    if "tseqs" in psl and "qseqs" in psl:
+        assert (
+            len(psl["tseqs"])
+            == len(psl["qseqs"])
+            == len(query_range_all)
+            == len(hit_range_all)
+        )
+    else:
+        assert len(query_range_all) == len(hit_range_all)
+
+    frags = []
+    # iterating over query_range_all, but hit_range_all works just as well
+    for idx, qcoords in enumerate(query_range_all):
+        hseqlist = psl.get("tseqs")
+        hseq = "" if not hseqlist else hseqlist[idx]
+        qseqlist = psl.get("qseqs")
+        qseq = "" if not qseqlist else qseqlist[idx]
+        frag = HSPFragment(hid, qid, hit=hseq, query=qseq)
+        # set molecule type
+        frag.molecule_type = "DNA"
+        # set coordinates
+        frag.query_start = qcoords[0]
+        frag.query_end = qcoords[1]
+        frag.hit_start = hit_range_all[idx][0]
+        frag.hit_end = hit_range_all[idx][1]
+        # and strands
+        frag.query_strand = qstrand
+        frag.hit_strand = hstrand
+        frags.append(frag)
+
+    # create hsp object
+    hsp = HSP(frags)
+    # check if start and end are set correctly
+    assert hsp.query_start == psl["qstart"]
+    assert hsp.query_end == psl["qend"]
+    assert hsp.hit_start == psl["tstart"]
+    assert hsp.hit_end == psl["tend"]
+    # and check block spans as well
+    hit_spans = [span / blocksize_multiplier for span in hsp.hit_span_all]
+    assert hit_spans == hsp.query_span_all == psl["blocksizes"]
+    # set its attributes
+    hsp.match_num = psl["matches"]
+    hsp.mismatch_num = psl["mismatches"]
+    hsp.match_rep_num = psl["repmatches"]
+    hsp.n_num = psl["ncount"]
+    hsp.query_gapopen_num = psl["qnuminsert"]
+    hsp.query_gap_num = psl["qbaseinsert"]
+    hsp.hit_gapopen_num = psl["tnuminsert"]
+    hsp.hit_gap_num = psl["tbaseinsert"]
+
+    hsp.ident_num = psl["matches"] + psl["repmatches"]
+    hsp.gapopen_num = psl["qnuminsert"] + psl["tnuminsert"]
+    hsp.gap_num = psl["qbaseinsert"] + psl["tbaseinsert"]
+    hsp.query_is_protein = is_protein
+    hsp.ident_pct = 100.0 - _calc_millibad(psl, is_protein) * 0.1
+    hsp.score = _calc_score(psl, is_protein)
+    # helper flag, for writing
+    hsp._has_hit_strand = len(psl["strand"]) == 2
+
+    return hsp
+
+
+class BlatPslParser:
+    """Parser for the BLAT PSL format."""
+
+    def __init__(self, handle, pslx=False):
+        """Initialize the class."""
+        self.handle = handle
+        self.line = self.handle.readline()
+        self.pslx = pslx
+
+    def __iter__(self):
+        """Iterate over BlatPslParser, yields query results."""
+        # break out if it's an empty file
+        if not self.line:
+            return
+
+        # read through header
+        # this assumes that the result row match the regex
+        while not re.search(_RE_ROW_CHECK, self.line.strip()):
+            self.line = self.handle.readline()
+            if not self.line:
+                return
+
+        # parse into query results
+        for qresult in self._parse_qresult():
+            qresult.program = "blat"
+            yield qresult
+
+    def _parse_row(self):
+        """Return a dictionary of parsed column values (PRIVATE)."""
+        assert self.line
+        cols = [x for x in self.line.strip().split("\t") if x]
+        self._validate_cols(cols)
+
+        psl = {}
+        psl["qname"] = cols[9]  # qName
+        psl["qsize"] = int(cols[10])  # qSize
+        psl["tname"] = cols[13]  # tName
+        psl["tsize"] = int(cols[14])  # tSize
+        psl["matches"] = int(cols[0])  # matches
+        psl["mismatches"] = int(cols[1])  # misMatches
+        psl["repmatches"] = int(cols[2])  # repMatches
+        psl["ncount"] = int(cols[3])  # nCount
+        psl["qnuminsert"] = int(cols[4])  # qNumInsert
+        psl["qbaseinsert"] = int(cols[5])  # qBaseInsert
+        psl["tnuminsert"] = int(cols[6])  # tNumInsert
+        psl["tbaseinsert"] = int(cols[7])  # tBaseInsert
+        psl["strand"] = cols[8]  # strand
+        psl["qstart"] = int(cols[11])  # qStart
+        psl["qend"] = int(cols[12])  # qEnd
+        psl["tstart"] = int(cols[15])  # tStart
+        psl["tend"] = int(cols[16])  # tEnd
+        psl["blockcount"] = int(cols[17])  # blockCount
+        psl["blocksizes"] = _list_from_csv(cols[18], int)  # blockSizes
+        psl["qstarts"] = _list_from_csv(cols[19], int)  # qStarts
+        psl["tstarts"] = _list_from_csv(cols[20], int)  # tStarts
+        if self.pslx:
+            psl["qseqs"] = _list_from_csv(cols[21])  # query sequence
+            psl["tseqs"] = _list_from_csv(cols[22])  # hit sequence
+
+        return psl
+
+    def _validate_cols(self, cols):
+        """Validate column's length of PSL or PSLX (PRIVATE)."""
+        if not self.pslx:
+            if len(cols) != 21:
+                raise ValueError(
+                    "Invalid PSL line: %r. Expected 21 tab-separated columns, found %i"
+                    % (self.line, len(cols))
+                )
+        else:
+            if len(cols) != 23:
+                raise ValueError(
+                    "Invalid PSLX line: %r. Expected 23 tab-separated columns, found %i"
+                    % (self.line, len(cols))
+                )
+
+    def _parse_qresult(self):
+        """Yield QueryResult objects (PRIVATE)."""
+        # state values, determines what to do for each line
+        state_EOF = 0
+        state_QRES_NEW = 1
+        state_QRES_SAME = 3
+        state_HIT_NEW = 2
+        state_HIT_SAME = 4
+        # initial dummy values
+        qres_state = None
+        file_state = None
+        cur_qid, cur_hid = None, None
+        prev_qid, prev_hid = None, None
+        cur, prev = None, None
+        hit_list, hsp_list = [], []
+
+        while True:
+            # store previous line's parsed values for all lines after the first
+            if cur is not None:
+                prev = cur
+                prev_qid = cur_qid
+                prev_hid = cur_hid
+            # only parse the result row if it's not EOF
+            if self.line:
+                cur = self._parse_row()
+                cur_qid = cur["qname"]
+                cur_hid = cur["tname"]
+            else:
+                file_state = state_EOF
+                # mock values, since we have nothing to parse
+                cur_qid, cur_hid = None, None
+
+            # get the state of hit and qresult
+            if prev_qid != cur_qid:
+                qres_state = state_QRES_NEW
+            else:
+                qres_state = state_QRES_SAME
+            # new hits are hits with different ids or hits in a new qresult
+            if prev_hid != cur_hid or qres_state == state_QRES_NEW:
+                hit_state = state_HIT_NEW
+            else:
+                hit_state = state_HIT_SAME
+
+            if prev is not None:
+                # create fragment and HSP and set their attributes
+                hsp = _create_hsp(prev_hid, prev_qid, prev)
+                hsp_list.append(hsp)
+
+                if hit_state == state_HIT_NEW:
+                    # create Hit and set its attributes
+                    hit = Hit(hsp_list)
+                    hit.seq_len = prev["tsize"]
+                    hit_list.append(hit)
+                    hsp_list = []
+
+                # create qresult and yield if we're at a new qresult or at EOF
+                if qres_state == state_QRES_NEW or file_state == state_EOF:
+                    qresult = QueryResult(id=prev_qid)
+                    for hit in hit_list:
+                        qresult.absorb(hit)
+                    qresult.seq_len = prev["qsize"]
+                    yield qresult
+                    # if we're at EOF, break
+                    if file_state == state_EOF:
+                        break
+                    hit_list = []
+
+            self.line = self.handle.readline()
+
+
+class BlatPslIndexer(SearchIndexer):
+    """Indexer class for BLAT PSL output."""
+
+    _parser = BlatPslParser
+
+    def __init__(self, filename, pslx=False):
+        """Initialize the class."""
+        SearchIndexer.__init__(self, filename, pslx=pslx)
+
+    def __iter__(self):
+        """Iterate over the file handle; yields key, start offset, and length."""
+        handle = self._handle
+        handle.seek(0)
+        # denotes column location for query identifier
+        query_id_idx = 9
+        qresult_key = None
+        tab_char = b"\t"
+
+        start_offset = handle.tell()
+        line = handle.readline()
+        # read through header
+        # this assumes that the result row match the regex
+        while not re.search(_RE_ROW_CHECK_IDX, line.strip()):
+            start_offset = handle.tell()
+            line = handle.readline()
+            if not line:
+                return
+
+        # and index the qresults
+        while True:
+            end_offset = handle.tell()
+
+            cols = [x for x in line.strip().split(tab_char) if x]
+            if qresult_key is None:
+                qresult_key = cols[query_id_idx]
+            else:
+                curr_key = cols[query_id_idx]
+
+                if curr_key != qresult_key:
+                    yield qresult_key.decode(), start_offset, end_offset - start_offset
+                    qresult_key = curr_key
+                    start_offset = end_offset - len(line)
+
+            line = handle.readline()
+            if not line:
+                yield qresult_key.decode(), start_offset, end_offset - start_offset
+                break
+
+    def get_raw(self, offset):
+        """Return raw bytes string of a QueryResult object from the given offset."""
+        handle = self._handle
+        handle.seek(offset)
+        query_id_idx = 9
+        qresult_key = None
+        qresult_raw = b""
+        tab_char = b"\t"
+
+        while True:
+            line = handle.readline()
+            if not line:
+                break
+            cols = [x for x in line.strip().split(tab_char) if x]
+            if qresult_key is None:
+                qresult_key = cols[query_id_idx]
+            else:
+                curr_key = cols[query_id_idx]
+                if curr_key != qresult_key:
+                    break
+            qresult_raw += line
+
+        return qresult_raw
+
+
+class BlatPslWriter:
+    """Writer for the blat-psl format."""
+
+    def __init__(self, handle, header=False, pslx=False):
+        """Initialize the class."""
+        self.handle = handle
+        # flag for writing header or not
+        self.header = header
+        self.pslx = pslx
+
+    def write_file(self, qresults):
+        """Write query results to file."""
+        handle = self.handle
+        qresult_counter, hit_counter, hsp_counter, frag_counter = 0, 0, 0, 0
+
+        if self.header:
+            handle.write(self._build_header())
+
+        for qresult in qresults:
+            if qresult:
+                handle.write(self._build_row(qresult))
+                qresult_counter += 1
+                hit_counter += len(qresult)
+                hsp_counter += sum(len(hit) for hit in qresult)
+                frag_counter += sum(len(hit.fragments) for hit in qresult)
+
+        return qresult_counter, hit_counter, hsp_counter, frag_counter
+
+    def _build_header(self):
+        """Build header, tab-separated string (PRIVATE)."""
+        # for now, always use the psLayout version 3
+        header = "psLayout version 3\n"
+
+        # adapted from BLAT's source: lib/psl.c#L496
+        header += (
+            "\nmatch\tmis- \trep. \tN's\tQ gap\tQ gap\tT gap\tT "
+            "gap\tstrand\tQ        \tQ   \tQ    \tQ  \tT        \tT   "
+            "\tT    \tT  \tblock\tblockSizes \tqStarts\t tStarts"
+            "\n     \tmatch\tmatch\t   \tcount\tbases\tcount\tbases"
+            "\t      \tname     \tsize\tstart\tend\tname     \tsize"
+            "\tstart\tend\tcount\n%s\n" % ("-" * 159)
+        )
+
+        return header
+
+    def _build_row(self, qresult):
+        """Return a string or one row or more of the QueryResult object (PRIVATE)."""
+        # For now, our writer writes the row according to the order in
+        # the QueryResult and Hit objects.
+        # This is different from BLAT's native output, where the rows are
+        # grouped by strand.
+        # Should we tweak the behavior to better mimic the native output?
+        qresult_lines = []
+
+        for hit in qresult:
+            for hsp in hit.hsps:
+
+                query_is_protein = getattr(hsp, "query_is_protein", False)
+                blocksize_multiplier = 3 if query_is_protein else 1
+
+                line = []
+                line.append(hsp.match_num)
+                line.append(hsp.mismatch_num)
+                line.append(hsp.match_rep_num)
+                line.append(hsp.n_num)
+                line.append(hsp.query_gapopen_num)
+                line.append(hsp.query_gap_num)
+                line.append(hsp.hit_gapopen_num)
+                line.append(hsp.hit_gap_num)
+
+                # check spans
+                eff_query_spans = [blocksize_multiplier * s for s in hsp.query_span_all]
+                if hsp.hit_span_all != eff_query_spans:
+                    raise ValueError("HSP hit span and query span values do not match.")
+                block_sizes = hsp.query_span_all
+
+                # set strand and starts
+                if hsp[0].query_strand >= 0:  # since it may be a protein seq
+                    strand = "+"
+                else:
+                    strand = "-"
+                qstarts = _reorient_starts(
+                    [x[0] for x in hsp.query_range_all],
+                    hsp.query_span_all,
+                    qresult.seq_len,
+                    hsp[0].query_strand,
+                )
+
+                if hsp[0].hit_strand == 1:
+                    hstrand = 1
+                    # only write hit strand if it was present in the source file
+                    if hsp._has_hit_strand:
+                        strand += "+"
+                else:
+                    hstrand = -1
+                    strand += "-"
+                hstarts = _reorient_starts(
+                    [x[0] for x in hsp.hit_range_all],
+                    hsp.hit_span_all,
+                    hit.seq_len,
+                    hstrand,
+                )
+
+                line.append(strand)
+                line.append(qresult.id)
+                line.append(qresult.seq_len)
+                line.append(hsp.query_start)
+                line.append(hsp.query_end)
+                line.append(hit.id)
+                line.append(hit.seq_len)
+                line.append(hsp.hit_start)
+                line.append(hsp.hit_end)
+                line.append(len(hsp))
+                line.append(",".join(str(x) for x in block_sizes) + ",")
+                line.append(",".join(str(x) for x in qstarts) + ",")
+                line.append(",".join(str(x) for x in hstarts) + ",")
+
+                if self.pslx:
+                    line.append(",".join(str(x.seq) for x in hsp.query_all) + ",")
+                    line.append(",".join(str(x.seq) for x in hsp.hit_all) + ",")
+
+                qresult_lines.append("\t".join(str(x) for x in line))
+
+        return "\n".join(qresult_lines) + "\n"
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/ExonerateIO/__init__.py b/code/lib/Bio/SearchIO/ExonerateIO/__init__.py
new file mode 100644
index 0000000..7aaa9f5
--- /dev/null
+++ b/code/lib/Bio/SearchIO/ExonerateIO/__init__.py
@@ -0,0 +1,252 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO support for Exonerate output formats.
+
+This module adds support for handling Exonerate outputs. Exonerate is a generic
+tool for pairwise sequence comparison that allows you to align sequences using
+several different models.
+
+Bio.SearchIO.ExonerateIO was tested on the following Exonerate versions and
+models:
+
+    - version: 2.2
+    - models:
+      - affine:local                - cdna2genome
+      - coding2coding               - est2genome
+      - genome2genome               - ner
+      - protein2dna                 - protein2genome
+      - ungapped                    - ungapped:translated
+
+Although model testing were not exhaustive, ExonerateIO should be able to cope
+with all Exonerate models. Please file a bug report if you stumble upon an
+unparseable file.
+
+More information on Exonerate is available on its home page at
+www.ebi.ac.uk/~guy/exonerate/
+
+
+Supported Formats
+=================
+
+  - Plain text alignment - 'exonerate-text'   - parsing, indexing
+  - Vulgar line          - 'exonerate-vulgar' - parsing, indexing
+  - Cigar line           - 'exonerate-cigar'  - parsing, indexing
+
+On Exonerate, these output formats are not exclusive to one another. For
+example, you may have both plain text and vulgar output in the same file.
+ExonerateIO can only handle one of these at a time, however. If you have a file
+containing both plain text and vulgar lines, for example, you have to pick
+either 'exonerate-text' or 'exonerate-vulgar' to parse it.
+
+Due to the cigar format specification, many features of the alignments such as
+introns or frameshifts may be collapsed into a single feature (in this case,
+they are labelled 'D' for 'deletion'). The parser does not attempt to guess
+whether the D label it encounters is a real deletion or a collapsed feature.
+As such, parsing or indexing using 'exonerate-cigar' may yield different results
+compared to 'exonerate-text' or 'exonerate-vulgar'.
+
+
+exonerate-text
+==============
+
+The plain text output / C4 alignment is the output triggered by the
+'--showalignemnt' flag. Compared to the two other output formats, this format
+contains the most information, having the complete query and hit sequences of
+the alignment.
+
+Here are some examples of the C4 output alignment that ExonerateIO can handle
+(coordinates not written in scale)::
+
+    1. simple ungapped alignments
+
+           1 : ATGGGCAATATCCTTCGGAAAGGTCAGCAAAT :      56
+               ||||||||||||||||||||||||||||||||
+     1319275 : ATGGGCAATATCCTTCGGAAAGGTCAGCAAAT : 1319220
+
+    2. alignments with frameshifts:
+
+         129 : -TGCCGTTACCAT----GACGAAAGTATTAAT : 160
+               -CysArgTyrHis----AspGluSerIleAsn
+               #||||||||||||####|||||||||||||||
+               #CysArgTyrHis####AspGluSerIleAsn
+     1234593 : GTGCCGTTACCATCGGTGACGAAAGTATTAAT : 1234630
+
+    3. alignments with introns and split codons:
+
+        382 :    {A}                             {CC}AAA                 :    358
+              AAA{T}  >>>> Target Intron 3 >>>>  {hr}LysATGAGCGATGAAAATA
+              || { }++         55423 bp        ++{  } !  |||  ||||||||||
+              AAC{L}gt.........................ag{eu}AspTTGAATGATGAAAATA
+      42322 :    {C}                             {TG}GAT                 :  97769
+
+    4. alignments with NER blocks
+
+        111 : CAGAAAA--<   31  >--CTGCCCAGAAT--<   10  >--AACGAGCGTTCCG- :    184
+              | |||||--< NER 1 >--| ||||| | |--< NER 2 >--|||  | ||||||-
+     297911 : CTGAAAA--<   29  >--CCGCCCAAAGT--<   13  >--AACTGGAGTTCCG- : 297993
+
+ExonerateIO utilizes the HSPFragment model quite extensively to deal with non-
+ungapped alignments. For any single HSPFragment, if ExonerateIO sees an intron,
+a NER block, or a frameshift, it will break the fragment into two HSPFragment
+objects and adjust each of their start and end coordinate appropriately.
+
+You may notice that Exonerate always uses the three letter amino acid codes to
+display protein sequences. If the protein itself is part of the query sequence,
+such as in the protein2dna model, ExonerateIO will transform the protein
+sequence into using one letter codes. This is because the SeqRecord objects that
+store the sequences are designed for single-letter sequences only. If Exonerate
+also outputs the underlying nucleotide sequence, it will be saved into an
+``aln_annotation`` entry as a list of triplets.
+
+If the protein sequence is not part of the actual alignment, such as in the
+est2genome or genome2genome models, ExonerateIO will keep the three letter codes
+and store them as ``aln_annotation`` entries. In these cases, the hit and
+query sequences may be used directly as SeqRecord objects as they are one-letter
+nucleotide codes. The three-letter protein sequences are then stored as entries
+in the ``aln_annotation`` dictionary.
+
+
+For 'exonerate-text', ExonerateIO provides the following object attributes:
+
++-----------------+-------------------------+----------------------------------+
+| Object          | Attribute               | Value                            |
++=================+=========================+==================================+
+| QueryResult     | description             | query sequence description       |
+|                 +-------------------------+----------------------------------+
+|                 | id                      | query sequence ID                |
+|                 +-------------------------+----------------------------------+
+|                 | model                   | alignment model                  |
+|                 +-------------------------+----------------------------------+
+|                 | program                 | 'exonerate'                      |
++-----------------+-------------------------+----------------------------------+
+| Hit             | description             | hit sequence description         |
+|                 +-------------------------+----------------------------------+
+|                 | id                      | hit sequence ID                  |
++-----------------+-------------------------+----------------------------------+
+| HSP             | hit_split_codons        | list of split codon coordinates  |
+|                 |                         | in the hit sequence              |
+|                 +-------------------------+----------------------------------+
+|                 | score                   | alignment score                  |
+|                 +-------------------------+----------------------------------+
+|                 | query_split_codons      | list of split codon coordinates  |
+|                 |                         | in the query sequence            |
++-----------------+-------------------------+----------------------------------+
+| HSPFragment     | aln_annotation          | alignment similarity string, hit |
+|                 |                         | sequence annotation, and/or      |
+|                 |                         | query sequence annotation        |
+|                 +-------------------------+----------------------------------+
+|                 | hit                     | hit sequence                     |
+|                 +-------------------------+----------------------------------+
+|                 | hit_end                 | hit sequence end coordinate      |
+|                 +-------------------------+----------------------------------+
+|                 | hit_frame               | hit sequence reading frame       |
+|                 +-------------------------+----------------------------------+
+|                 | hit_start               | hit sequence start coordinate    |
+|                 +-------------------------+----------------------------------+
+|                 | hit_strand              | hit sequence strand              |
+|                 +-------------------------+----------------------------------+
+|                 | query                   | query sequence                   |
+|                 +-------------------------+----------------------------------+
+|                 | query_end               | query sequence end coordinate    |
+|                 +-------------------------+----------------------------------+
+|                 | query_frame             | query sequence reading frame     |
+|                 +-------------------------+----------------------------------+
+|                 | query_start             | query sequence start coordinate  |
+|                 +-------------------------+----------------------------------+
+|                 | query_strand            | query sequence strand            |
++-----------------+-------------------------+----------------------------------+
+
+Note that you can also use the default HSP or HSPFragment properties. For
+example, to check the intron coordinates of your result you can use the
+``query_inter_ranges`` or ``hit_inter_ranges`` properties:
+
+    >>> from Bio import SearchIO
+    >>> fname = 'Exonerate/exn_22_m_genome2genome.exn'
+    >>> all_qresult = list(SearchIO.parse(fname, 'exonerate-text'))
+    >>> hsp = all_qresult[-1][-1][-1]   # last qresult, last hit, last hsp
+    >>> hsp
+    HSP(...)
+    >>> hsp.query_inter_ranges
+    [(388, 449), (284, 319), (198, 198), (114, 161)]
+    >>> hsp.hit_inter_ranges
+    [(487387, 641682), (386207, 487327), (208677, 386123), (71917, 208639)]
+
+Here you can see that for both query and hit introns, the coordinates
+in each tuple is always (start, end) where start <= end. But when you compare
+each tuple to the next, the coordinates decrease. This is an indication that
+both the query and hit sequences lie on the minus strand. Exonerate outputs
+minus strand results in a decreasing manner; the start coordinate is always
+bigger than the end coordinate. ExonerateIO preserves the fragment ordering as a
+whole, but uses its own standard to store an individual fragment's start and end
+coordinates.
+
+You may also notice that the third tuple in ``query_inter_ranges`` is (198, 198),
+two exact same numbers. This means that the query sequence does not have any
+gaps at that position. The gap is only present in the hit sequence, where we see
+that the third tuple contains (208677, 386123), a gap of about 177k bases.
+
+Another example is to use the ``hit_frame_all`` and ``query_frame_all`` to see if
+there are any frameshifts in your alignment:
+
+    >>> from Bio import SearchIO
+    >>> fname = 'Exonerate/exn_22_m_coding2coding_fshifts.exn'
+    >>> qresult = next(SearchIO.parse(fname, 'exonerate-text'))
+    >>> hsp = qresult[0][0]      # first hit, first hsp
+    >>> hsp
+    HSP(...)
+    >>> hsp.query_frame_all
+    [1, 2, 2, 2]
+    >>> hsp.hit_frame_all
+    [1, 1, 3, 1]
+
+Here you can see that the alignment as a whole has three frameshifts. The first
+one occurs in the query sequence, after the first fragment (1 -> 2 shift), the
+second one occurs in the hit sequence, after the second fragment (1 -> 3 shift),
+and the last one also occurs in the hit sequence, before the last fragment (3 ->
+1 shift).
+
+There are other default HSP properties that you can use to ease your workflow.
+Please refer to the HSP object documentation for more details.
+
+
+exonerate-vulgar
+================
+
+The vulgar format provides a compact way of representing alignments created by
+Exonerate. In general, it contains the same information as the plain text output
+except for the 'model' information and the actual sequences themselves. You can
+expect that the coordinates obtained from using 'exonerate-text' and
+'exonerate-vulgar' to be the same. Both formats also creates HSPFragment using
+the same triggers: introns, NER blocks, and/or frameshifts.
+
+
+exonerate-cigar
+===============
+
+The cigar format provides an even more compact representation of Exonerate
+alignments. However, this comes with a cost of losing information. In the cigar
+format, for example, introns are treated as simple deletions. This makes it
+impossible for the parser to distinguish between simple deletions or intron
+regions. As such, 'exonerate-cigar' may produce different sets of coordinates
+and fragments compared to 'exonerate-vulgar' or 'exonerate-text'.
+
+"""
+
+# Known issues & gotchas:
+# - The cigar parser does not use the extended cigar string; only supports MID
+# - Cigar and vulgar parsing results will most likely be different, due to the
+#   different type of data stored by both formats
+
+from .exonerate_text import ExonerateTextParser, ExonerateTextIndexer
+from .exonerate_vulgar import ExonerateVulgarParser, ExonerateVulgarIndexer
+from .exonerate_cigar import ExonerateCigarParser, ExonerateCigarIndexer
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..f32a000
Binary files /dev/null and b/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/_base.cpython-37.pyc b/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/_base.cpython-37.pyc
new file mode 100644
index 0000000..8757a74
Binary files /dev/null and b/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/_base.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/exonerate_cigar.cpython-37.pyc b/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/exonerate_cigar.cpython-37.pyc
new file mode 100644
index 0000000..7aec49b
Binary files /dev/null and b/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/exonerate_cigar.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/exonerate_text.cpython-37.pyc b/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/exonerate_text.cpython-37.pyc
new file mode 100644
index 0000000..9c0fe21
Binary files /dev/null and b/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/exonerate_text.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/exonerate_vulgar.cpython-37.pyc b/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/exonerate_vulgar.cpython-37.pyc
new file mode 100644
index 0000000..fab1cfe
Binary files /dev/null and b/code/lib/Bio/SearchIO/ExonerateIO/__pycache__/exonerate_vulgar.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/ExonerateIO/_base.py b/code/lib/Bio/SearchIO/ExonerateIO/_base.py
new file mode 100644
index 0000000..190f80a
--- /dev/null
+++ b/code/lib/Bio/SearchIO/ExonerateIO/_base.py
@@ -0,0 +1,534 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO abstract base parser for Exonerate standard output format."""
+
+import re
+from functools import reduce
+from abc import ABC, abstractmethod
+
+from Bio.SearchIO._index import SearchIndexer
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+from Bio.SeqUtils import seq1
+
+
+# strand char-value mapping
+_STRAND_MAP = {"+": 1, "-": -1, ".": 0}
+
+_RE_SHIFTS = re.compile(r"(#+)")
+# regex for checking whether a vulgar line has protein/translated components
+_RE_TRANS = re.compile(r"[53ISCF]")
+
+
+def _set_frame(frag):
+    """Set the HSPFragment frames (PRIVATE)."""
+    frag.hit_frame = (frag.hit_start % 3 + 1) * frag.hit_strand
+    frag.query_frame = (frag.query_start % 3 + 1) * frag.query_strand
+
+
+def _make_triplets(seq, phase=0):
+    """Select a valid amino acid sequence given a 3-letter code input (PRIVATE).
+
+    This function takes a single three-letter amino acid sequence and the phase
+    of the sequence to return the longest intact amino acid sequence possible.
+    Parts of the input sequence before and after the selected sequence are also
+    returned.
+
+    This is an internal private function and is meant for parsing Exonerate's
+    three-letter amino acid output.
+
+    >>> from Bio.SearchIO.ExonerateIO._base import _make_triplets
+    >>> _make_triplets('GlyThrSerAlaPro')
+    ('', ['Gly', 'Thr', 'Ser', 'Ala', 'Pro'], '')
+    >>> _make_triplets('yThrSerAla', phase=1)
+    ('y', ['Thr', 'Ser', 'Ala'], '')
+    >>> _make_triplets('yThrSerAlaPr', phase=1)
+    ('y', ['Thr', 'Ser', 'Ala'], 'Pr')
+
+    """
+    pre = seq[:phase]
+    np_seq = seq[phase:]
+    non_triplets = len(np_seq) % 3
+    post = "" if not non_triplets else np_seq[-1 * non_triplets :]
+    intacts = [np_seq[3 * i : 3 * (i + 1)] for i in range(len(np_seq) // 3)]
+    return pre, intacts, post
+
+
+def _get_fragments_coord(frags):
+    """Return the letter coordinate of the given list of fragments (PRIVATE).
+
+    This function takes a list of three-letter amino acid sequences and
+    returns a list of coordinates for each fragment had all the input
+    sequences been flattened.
+
+    This is an internal private function and is meant for parsing Exonerate's
+    three-letter amino acid output.
+
+    >>> from Bio.SearchIO.ExonerateIO._base import _get_fragments_coord
+    >>> _get_fragments_coord(['Thr', 'Ser', 'Ala'])
+    [0, 3, 6]
+    >>> _get_fragments_coord(['Thr', 'SerAlaPro', 'GlyLeu'])
+    [0, 3, 12]
+    >>> _get_fragments_coord(['Thr', 'SerAlaPro', 'GlyLeu', 'Cys'])
+    [0, 3, 12, 18]
+
+    """
+    if not frags:
+        return []
+    # first fragment always starts from position 0
+    init = [0]
+    return reduce(lambda acc, frag: acc + [acc[-1] + len(frag)], frags[:-1], init)
+
+
+def _get_fragments_phase(frags):
+    """Return the phases of the given list of 3-letter amino acid fragments (PRIVATE).
+
+    This is an internal private function and is meant for parsing Exonerate's
+    three-letter amino acid output.
+
+    >>> from Bio.SearchIO.ExonerateIO._base import _get_fragments_phase
+    >>> _get_fragments_phase(['Thr', 'Ser', 'Ala'])
+    [0, 0, 0]
+    >>> _get_fragments_phase(['ThrSe', 'rAla'])
+    [0, 1]
+    >>> _get_fragments_phase(['ThrSe', 'rAlaLeu', 'ProCys'])
+    [0, 1, 0]
+    >>> _get_fragments_phase(['ThrSe', 'rAlaLeuP', 'roCys'])
+    [0, 1, 2]
+    >>> _get_fragments_phase(['ThrSe', 'rAlaLeuPr', 'oCys'])
+    [0, 1, 1]
+
+    """
+    return [(3 - (x % 3)) % 3 for x in _get_fragments_coord(frags)]
+
+
+def _adjust_aa_seq(fraglist):
+    """Transform 3-letter AA codes of input fragments to one-letter codes (PRIVATE).
+
+    Argument fraglist should be a list of HSPFragments objects.
+    """
+    custom_map = {"***": "*", "<->": "-"}
+    hsp_hstart = fraglist[0].hit_start
+    hsp_qstart = fraglist[0].query_start
+    frag_phases = _get_fragments_phase(fraglist)
+    for frag, phase in zip(fraglist, frag_phases):
+        assert frag.query_strand == 0 or frag.hit_strand == 0
+        # hit step may be -1 as we're aligning to DNA
+        hstep = 1 if frag.hit_strand >= 0 else -1
+
+        # set fragment phase
+        frag.phase = phase
+
+        # fragment should have a length that is a multiple of 3
+        # assert len(frag) % 3 == 0
+        qseq = str(frag.query.seq)
+        q_triplets_pre, q_triplets, q_triplets_post = _make_triplets(qseq, phase)
+
+        hseq = str(frag.hit.seq)
+        h_triplets_pre, h_triplets, h_triplets_post = _make_triplets(hseq, phase)
+
+        # get one letter codes
+        # and replace gap codon markers and termination characters
+        hseq1_pre = "X" if h_triplets_pre else ""
+        hseq1_post = "X" if h_triplets_post else ""
+        hseq1 = seq1("".join(h_triplets), custom_map=custom_map)
+        hstart = hsp_hstart + (len(hseq1_pre) * hstep)
+        hend = hstart + len(hseq1.replace("-", "")) * hstep
+
+        qseq1_pre = "X" if q_triplets_pre else ""
+        qseq1_post = "X" if q_triplets_post else ""
+        qseq1 = seq1("".join(q_triplets), custom_map=custom_map)
+        qstart = hsp_qstart + len(qseq1_pre)
+        qend = qstart + len(qseq1.replace("-", ""))
+
+        # replace the old frag sequences with the new ones
+        frag.hit = None
+        frag.query = None
+        frag.hit = hseq1_pre + hseq1 + hseq1_post
+        frag.query = qseq1_pre + qseq1 + qseq1_post
+
+        # set coordinates for the protein sequence
+        if frag.query_strand == 0:
+            frag.query_start, frag.query_end = qstart, qend
+        elif frag.hit_strand == 0:
+            frag.hit_start, frag.hit_end = hstart, hend
+
+        # update alignment annotation
+        # by turning them into list of triplets
+        for annot, annotseq in frag.aln_annotation.items():
+            pre, intact, post = _make_triplets(annotseq, phase)
+            frag.aln_annotation[annot] = (
+                list(filter(None, [pre])) + intact + list(filter(None, [post]))
+            )
+
+        # update values for next iteration
+        hsp_hstart, hsp_qstart = hend, qend
+
+    return fraglist
+
+
+def _split_fragment(frag):
+    """Split one HSPFragment containing frame-shifted alignment into two (PRIVATE)."""
+    # given an HSPFragment object with frameshift(s), this method splits it
+    # into fragments without frameshifts by sequentially chopping it off
+    # starting from the beginning
+    simil = frag.aln_annotation["similarity"]
+    # we should have at least 1 frame shift for splitting
+    assert simil.count("#") > 0
+
+    split_frags = []
+    qstep = 1 if frag.query_strand >= 0 else -1
+    hstep = 1 if frag.hit_strand >= 0 else -1
+    qpos = min(frag.query_range) if qstep >= 0 else max(frag.query_range)
+    hpos = min(frag.hit_range) if qstep >= 0 else max(frag.hit_range)
+    abs_pos = 0
+    # split according to hit, then query
+    while simil:
+
+        try:
+            shifts = re.search(_RE_SHIFTS, simil).group(1)
+            s_start = simil.find(shifts)
+            s_stop = s_start + len(shifts)
+            split = frag[abs_pos : abs_pos + s_start]
+        except AttributeError:  # no '#' in simil, i.e. last frag
+            shifts = ""
+            s_start = 0
+            s_stop = len(simil)
+            split = frag[abs_pos:]
+
+        # coordinates for the split strand
+        qstart, hstart = qpos, hpos
+        qpos += (
+            len(split) - sum(split.query.seq.count(x) for x in ("-", "<", ">"))
+        ) * qstep
+        hpos += (
+            len(split) - sum(split.hit.seq.count(x) for x in ("-", "<", ">"))
+        ) * hstep
+
+        split.hit_start = min(hstart, hpos)
+        split.query_start = min(qstart, qpos)
+        split.hit_end = max(hstart, hpos)
+        split.query_end = max(qstart, qpos)
+
+        # account for frameshift length
+        abs_slice = slice(abs_pos + s_start, abs_pos + s_stop)
+        if len(frag.aln_annotation) == 2:
+            seqs = (frag[abs_slice].query.seq, frag[abs_slice].hit.seq)
+        elif len(frag.aln_annotation) == 3:
+            seqs = (
+                frag[abs_slice].aln_annotation["query_annotation"],
+                frag[abs_slice].aln_annotation["hit_annotation"],
+            )
+        if "#" in seqs[0]:
+            qpos += len(shifts) * qstep
+        elif "#" in seqs[1]:
+            hpos += len(shifts) * hstep
+
+        # set frame
+        _set_frame(split)
+        split_frags.append(split)
+        # set similarity string and absolute position for the next loop
+        simil = simil[s_stop:]
+        abs_pos += s_stop
+
+    return split_frags
+
+
+def _create_hsp(hid, qid, hspd):
+    """Return a list of HSP objects from the given parsed HSP values (PRIVATE)."""
+    frags = []
+    # we are iterating over query_ranges, but hit_ranges works just as well
+    for idx, qcoords in enumerate(hspd["query_ranges"]):
+        # get sequences, create object
+        hseqlist = hspd.get("hit")
+        hseq = "" if hseqlist is None else hseqlist[idx]
+        qseqlist = hspd.get("query")
+        qseq = "" if qseqlist is None else qseqlist[idx]
+        frag = HSPFragment(hid, qid, hit=hseq, query=qseq)
+        # coordinates
+        frag.query_start = qcoords[0]
+        frag.query_end = qcoords[1]
+        frag.hit_start = hspd["hit_ranges"][idx][0]
+        frag.hit_end = hspd["hit_ranges"][idx][1]
+        # alignment annotation
+        try:
+            aln_annot = hspd.get("aln_annotation", {})
+            for key, value in aln_annot.items():
+                frag.aln_annotation[key] = value[idx]
+        except IndexError:
+            pass
+        # strands
+        frag.query_strand = hspd["query_strand"]
+        frag.hit_strand = hspd["hit_strand"]
+        # and append the hsp object to the list
+        if frag.aln_annotation.get("similarity") is not None:
+            if "#" in frag.aln_annotation["similarity"]:
+                frags.extend(_split_fragment(frag))
+                continue
+        # try to set frame if there are translation in the alignment
+        if (
+            len(frag.aln_annotation) > 1
+            or frag.query_strand == 0
+            or ("vulgar_comp" in hspd and re.search(_RE_TRANS, hspd["vulgar_comp"]))
+        ):
+            _set_frame(frag)
+
+        frags.append(frag)
+
+    # if the query is protein, we need to change the hit and query sequences
+    # from three-letter amino acid codes to one letter, and adjust their
+    # coordinates accordingly
+    if len(frags[0].aln_annotation) == 2:  # 2 annotations == protein query
+        frags = _adjust_aa_seq(frags)
+
+    hsp = HSP(frags)
+    # set hsp-specific attributes
+    for attr in (
+        "score",
+        "hit_split_codons",
+        "query_split_codons",
+        "model",
+        "vulgar_comp",
+        "cigar_comp",
+        "molecule_type",
+    ):
+        if attr in hspd:
+            setattr(hsp, attr, hspd[attr])
+
+    return hsp
+
+
+def _parse_hit_or_query_line(line):
+    """Parse the 'Query:' line of exonerate alignment outputs (PRIVATE)."""
+    try:
+        mark, id, desc = line.split(" ", 2)
+    except ValueError:  # no desc
+        mark, id = line.split(" ", 1)
+        desc = ""
+
+    return id, desc
+
+
+class _BaseExonerateParser(ABC):
+    """Abstract base class iterator for exonerate format."""
+
+    _ALN_MARK = None
+
+    def __init__(self, handle):
+        self.handle = handle
+        self.has_c4_alignment = False
+
+    def __iter__(self):
+        # read line until the first alignment block or cigar/vulgar lines
+        while True:
+            self.line = self.handle.readline()
+            # flag for human-readable alignment block
+            if self.line.startswith("C4 Alignment:") and not self.has_c4_alignment:
+                self.has_c4_alignment = True
+            if (
+                self.line.startswith("C4 Alignment:")
+                or self.line.startswith("vulgar:")
+                or self.line.startswith("cigar:")
+            ):
+                break
+            elif not self.line or self.line.startswith("-- completed "):
+                return
+
+        for qresult in self._parse_qresult():
+            qresult.program = "exonerate"
+            # HACK: so that all descriptions are set
+            qresult.description = qresult.description
+            for hit in qresult:
+                hit.description = hit.description
+            yield qresult
+
+    def read_until(self, bool_func):
+        """Read the file handle until the given bool function returns True."""
+        while True:
+            if not self.line or bool_func(self.line):
+                return
+            else:
+                self.line = self.handle.readline()
+
+    @abstractmethod
+    def parse_alignment_block(self, header):
+        raise NotImplementedError
+
+    def _parse_alignment_header(self):
+        # read all header lines and store them
+        aln_header = []
+        # header is everything before the first empty line
+        while self.line.strip():
+            aln_header.append(self.line.strip())
+            self.line = self.handle.readline()
+        # then parse them
+        qresult, hit, hsp = {}, {}, {}
+        for line in aln_header:
+            # query line
+            if line.startswith("Query:"):
+                qresult["id"], qresult["description"] = _parse_hit_or_query_line(line)
+            # target line
+            elif line.startswith("Target:"):
+                hit["id"], hit["description"] = _parse_hit_or_query_line(line)
+            # model line
+            elif line.startswith("Model:"):
+                qresult["model"] = line.split(" ", 1)[1]
+            # score line
+            elif line.startswith("Raw score:"):
+                hsp["score"] = line.split(" ", 2)[2]
+            # query range line
+            elif line.startswith("Query range:"):
+                # line is always 'Query range: \d+ -> \d+', so we can pluck
+                # the numbers directly
+                hsp["query_start"], hsp["query_end"] = line.split(" ", 4)[2:5:2]
+            # hit range line
+            elif line.startswith("Target range:"):
+                # same logic with query range
+                hsp["hit_start"], hsp["hit_end"] = line.split(" ", 4)[2:5:2]
+
+        # determine strand
+        if qresult["description"].endswith(":[revcomp]"):
+            hsp["query_strand"] = "-"
+            qresult["description"] = qresult["description"].replace(":[revcomp]", "")
+        elif "protein2" in qresult["model"]:
+            hsp["query_strand"] = "."
+        else:
+            hsp["query_strand"] = "+"
+        if hit["description"].endswith(":[revcomp]"):
+            hsp["hit_strand"] = "-"
+            hit["description"] = hit["description"].replace(":[revcomp]", "")
+        elif "2protein" in qresult["model"]:
+            hsp["hit_strand"] = "."
+        else:
+            hsp["hit_strand"] = "+"
+
+        # NOTE: we haven't processed the coordinates types
+        # and the strands are not yet Biopython's standard (1 / -1 / 0)
+        # since it's easier if we do the conversion later
+
+        return {"qresult": qresult, "hit": hit, "hsp": hsp}
+
+    def _parse_qresult(self):
+        # state values
+        state_EOF = 0
+        state_QRES_NEW = 1
+        state_QRES_SAME = 3
+        state_HIT_NEW = 2
+        state_HIT_SAME = 4
+        # initial dummies
+        qres_state, hit_state = None, None
+        file_state = None
+        cur_qid, cur_hid = None, None
+        prev_qid, prev_hid = None, None
+        cur, prev = None, None
+        hit_list, hsp_list = [], []
+        # if the file has c4 alignments, use that as the alignment mark
+        if self.has_c4_alignment:
+            self._ALN_MARK = "C4 Alignment:"
+
+        while True:
+            self.read_until(lambda line: line.startswith(self._ALN_MARK))
+            if cur is not None:
+                prev = cur
+                prev_qid = cur_qid
+                prev_hid = cur_hid
+            # only parse the result row if it's not EOF
+            if self.line:
+                assert self.line.startswith(self._ALN_MARK), self.line
+                # create temp dicts for storing parsed values
+                header = {"qresult": {}, "hit": {}, "hsp": {}}
+                # if the file has c4 alignments, try to parse the header
+                if self.has_c4_alignment:
+                    self.read_until(lambda line: line.strip().startswith("Query:"))
+                    header = self._parse_alignment_header()
+                # parse the block contents
+                cur = self.parse_alignment_block(header)
+                cur_qid = cur["qresult"]["id"]
+                cur_hid = cur["hit"]["id"]
+            elif not self.line or self.line.startswith("-- completed "):
+                file_state = state_EOF
+                cur_qid, cur_hid = None, None
+
+            # get the state of hit and qresult
+            if prev_qid != cur_qid:
+                qres_state = state_QRES_NEW
+            else:
+                qres_state = state_QRES_SAME
+            # new hits are hits with different ids or hits in a new query
+            if prev_hid != cur_hid or qres_state == state_QRES_NEW:
+                hit_state = state_HIT_NEW
+            else:
+                hit_state = state_HIT_SAME
+
+            if prev is not None:
+                hsp = _create_hsp(prev_hid, prev_qid, prev["hsp"])
+                hsp_list.append(hsp)
+
+                if hit_state == state_HIT_NEW:
+                    hit = Hit(hsp_list)
+                    for attr, value in prev["hit"].items():
+                        setattr(hit, attr, value)
+                    hit_list.append(hit)
+                    hsp_list = []
+
+                if qres_state == state_QRES_NEW or file_state == state_EOF:
+                    qresult = QueryResult(id=prev_qid)
+                    for hit in hit_list:
+                        # not using append since Exonerate may separate the
+                        # same hit if it has different strands
+                        qresult.absorb(hit)
+                    for attr, value in prev["qresult"].items():
+                        setattr(qresult, attr, value)
+                    yield qresult
+                    if file_state == state_EOF:
+                        break
+                    hit_list = []
+
+            # only readline() here if we're not parsing C4 alignments
+            # C4 alignments readline() is handled by its parse_alignment_block
+            # function
+            if not self.has_c4_alignment:
+                self.line = self.handle.readline()
+
+
+class _BaseExonerateIndexer(SearchIndexer):
+    """Indexer class for Exonerate plain text."""
+
+    _parser = None  # should be defined by subclass
+    _query_mark = None  # this one too
+
+    def get_qresult_id(self, pos):
+        raise NotImplementedError("Should be defined by subclass")
+
+    def __iter__(self):
+        """Iterate over the file handle; yields key, start offset, and length."""
+        handle = self._handle
+        handle.seek(0)
+        qresult_key = None
+
+        while True:
+            start_offset = handle.tell()
+            line = handle.readline()
+            if line.startswith(self._query_mark):
+                if qresult_key is None:
+                    qresult_key = self.get_qresult_id(start_offset)
+                    qresult_offset = start_offset
+                else:
+                    curr_key = self.get_qresult_id(start_offset)
+                    if curr_key != qresult_key:
+                        yield qresult_key, qresult_offset, start_offset - qresult_offset
+                        qresult_key = curr_key
+                        qresult_offset = start_offset
+                        handle.seek(qresult_offset)
+            elif not line:
+                yield qresult_key, qresult_offset, start_offset - qresult_offset
+                break
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/ExonerateIO/exonerate_cigar.py b/code/lib/Bio/SearchIO/ExonerateIO/exonerate_cigar.py
new file mode 100644
index 0000000..7ba8a08
--- /dev/null
+++ b/code/lib/Bio/SearchIO/ExonerateIO/exonerate_cigar.py
@@ -0,0 +1,109 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO parser for Exonerate cigar output format."""
+
+import re
+
+from ._base import _BaseExonerateParser, _STRAND_MAP
+from .exonerate_vulgar import ExonerateVulgarIndexer
+
+
+__all__ = ("ExonerateCigarParser", "ExonerateCigarIndexer")
+
+
+# precompile regex
+_RE_CIGAR = re.compile(
+    r"""^cigar:\s+
+        (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+  # query: ID, start, end, strand
+        (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+  # hit: ID, start, end, strand
+        (\d+)(\s+.*)$                         # score, vulgar components
+        """,
+    re.VERBOSE,
+)
+
+
+class ExonerateCigarParser(_BaseExonerateParser):
+    """Parser for Exonerate cigar strings."""
+
+    _ALN_MARK = "cigar"
+
+    def parse_alignment_block(self, header):
+        """Parse alignment block for cigar format, return query results, hits, hsps."""
+        qresult = header["qresult"]
+        hit = header["hit"]
+        hsp = header["hsp"]
+        self.read_until(lambda line: line.startswith("cigar"))
+        cigars = re.search(_RE_CIGAR, self.line)
+        # if the file has c4 alignments
+        # check if cigar values match our previously parsed header values
+        if self.has_c4_alignment:
+            assert qresult["id"] == cigars.group(1)
+            assert hsp["query_start"] == cigars.group(2)
+            assert hsp["query_end"] == cigars.group(3)
+            assert hsp["query_strand"] == cigars.group(4)
+            assert hit["id"] == cigars.group(5)
+            assert hsp["hit_start"] == cigars.group(6)
+            assert hsp["hit_end"] == cigars.group(7)
+            assert hsp["hit_strand"] == cigars.group(8)
+            assert hsp["score"] == cigars.group(9)
+        else:
+            qresult["id"] = cigars.group(1)
+            hsp["query_start"] = cigars.group(2)
+            hsp["query_end"] = cigars.group(3)
+            hsp["query_strand"] = cigars.group(4)
+            hit["id"] = cigars.group(5)
+            hsp["hit_start"] = cigars.group(6)
+            hsp["hit_end"] = cigars.group(7)
+            hsp["hit_strand"] = cigars.group(8)
+            hsp["score"] = cigars.group(9)
+
+        # adjust strands
+        hsp["query_strand"] = _STRAND_MAP[hsp["query_strand"]]
+        hsp["hit_strand"] = _STRAND_MAP[hsp["hit_strand"]]
+        # cast coords into ints
+        qstart = int(hsp["query_start"])
+        qend = int(hsp["query_end"])
+        hstart = int(hsp["hit_start"])
+        hend = int(hsp["hit_end"])
+        # set coords (start <= end)
+        hsp["query_start"] = min(qstart, qend)
+        hsp["query_end"] = max(qstart, qend)
+        hsp["hit_start"] = min(hstart, hend)
+        hsp["hit_end"] = max(hstart, hend)
+        # cast score into int
+        hsp["score"] = int(hsp["score"])
+        # store cigar components
+        hsp["cigar_comp"] = cigars.group(10)
+        # HACK: since we can't really figure out exactly when a
+        # HSP starts or ends, we set the entire alignment as one HSP
+        hsp["query_ranges"] = [(hsp["query_start"], hsp["query_end"])]
+        hsp["hit_ranges"] = [(hsp["hit_start"], hsp["hit_end"])]
+
+        return {"qresult": qresult, "hit": hit, "hsp": hsp}
+
+
+class ExonerateCigarIndexer(ExonerateVulgarIndexer):
+    """Indexer class for exonerate cigar lines."""
+
+    _parser = ExonerateCigarParser
+    _query_mark = b"cigar"
+
+    def get_qresult_id(self, pos):
+        """Return the query ID of the nearest cigar line."""
+        handle = self._handle
+        handle.seek(pos)
+        # get line, check if it's a vulgar line, and get query ID
+        line = handle.readline()
+        assert line.startswith(self._query_mark), line
+        id = re.search(_RE_CIGAR, line.decode())
+        return id.group(1)
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/ExonerateIO/exonerate_text.py b/code/lib/Bio/SearchIO/ExonerateIO/exonerate_text.py
new file mode 100644
index 0000000..b53e1e4
--- /dev/null
+++ b/code/lib/Bio/SearchIO/ExonerateIO/exonerate_text.py
@@ -0,0 +1,540 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO parser for Exonerate plain text output format."""
+
+import re
+from itertools import chain
+
+
+from ._base import (
+    _BaseExonerateParser,
+    _BaseExonerateIndexer,
+    _STRAND_MAP,
+    _parse_hit_or_query_line,
+)
+from .exonerate_vulgar import _RE_VULGAR
+
+
+__all__ = ("ExonerateTextParser", "ExonerateTextIndexer")
+
+
+# for capturing sequences in alignment blocks
+# e.g. ' 529 : ATCCCTTATCTCTTTATCTTGTA :    472'
+_RE_ALN_ROW = re.compile(r"\s*\d+\s+: (.*) :\s+\d+")
+# for splitting the line based on intron annotations
+# e.g. '  >>>> Target Intron 1 >>>>  ' or 'gt.........................ag'
+_RE_EXON = re.compile(
+    r"[atgc ]{2,}?(?:(?:[<>]+ \w+ Intron \d+ [<>]+)|(?:\.+))[atgc ]{2,}?"
+)
+# captures the intron length
+# from e.g. '61 bp // 154295 bp' (joint intron lengths) or '177446 bp'
+_RE_EXON_LEN = re.compile(r"(?:(\d+) bp // (\d+) bp)|(?:(\d+) bp)")
+# for splitting lines in the NER model
+_RE_NER = re.compile(r"--<\s+\d+\s+>--")
+# for capturing NER gap lengths
+_RE_NER_LEN = re.compile(r"--<\s+(\d+)\s+>--")
+# regexes for capturing the letters inside curly braces
+# no. of letters is either 1 or 2, since they are split codons
+_RE_SCODON_START = re.compile(r"\{(\w{1,2})\}$")
+_RE_SCODON_END = re.compile(r"^\{(\w{1,2})\}")
+
+
+def _flip_codons(codon_seq, target_seq):
+    """Flips the codon characters from one seq to another (PRIVATE)."""
+    a, b = "", ""
+    for char1, char2 in zip(codon_seq, target_seq):
+        # no need to do anything if the codon seq line has nothing
+        if char1 == " ":
+            a += char1
+            b += char2
+        else:
+            a += char2
+            b += char1
+
+    return a, b
+
+
+def _get_block_coords(parsed_seq, row_dict, has_ner=False):
+    """Return a list of start, end coordinates for each given block in the sequence (PRIVATE)."""
+    start = 0
+    coords = []
+    if not has_ner:
+        splitter = _RE_EXON
+    else:
+        splitter = _RE_NER
+
+    # use the query line for reference
+    seq = parsed_seq[row_dict["query"]]
+
+    for block in re.split(splitter, seq):
+        start += seq[start:].find(block)
+        end = start + len(block)
+        coords.append((start, end))
+
+    return coords
+
+
+def _get_inter_coords(coords, strand=1):
+    """Return list of pairs covering intervening ranges (PRIVATE).
+
+    From the given pairs of coordinates, returns a list of pairs
+    covering the intervening ranges.
+    """
+    # adapted from Python's itertools guide
+    # if strand is -1, adjust coords to the ends and starts are chained
+    if strand == -1:
+        sorted_coords = [(max(a, b), min(a, b)) for a, b in coords]
+        inter_coords = list(chain(*sorted_coords))[1:-1]
+        return list(zip(inter_coords[1::2], inter_coords[::2]))
+    else:
+        inter_coords = list(chain(*coords))[1:-1]
+        return list(zip(inter_coords[::2], inter_coords[1::2]))
+
+
+def _stitch_rows(raw_rows):
+    """Stitches together the parsed alignment rows and returns them in a list (PRIVATE)."""
+    # deal with possible codon surprise!
+    # (i.e. alignments with codons using cdna2genome model)
+    # by creating additional rows to contain the codons
+    try:
+        max_len = max(len(x) for x in raw_rows)
+        for row in raw_rows:
+            assert len(row) == max_len
+    except AssertionError:
+        for idx, row in enumerate(raw_rows):
+            if len(row) != max_len:
+                # codons must be present in the query and hit (so +2)
+                assert len(row) + 2 == max_len
+                # add additional empty lines to contain codons
+                raw_rows[idx] = [" " * len(row[0])] + row + [" " * len(row[0])]
+
+    cmbn_rows = []
+    for idx, row in enumerate(raw_rows[0]):
+        cmbn_row = "".join(aln_row[idx] for aln_row in raw_rows)
+        cmbn_rows.append(cmbn_row)
+
+    # the real aligned sequence is always the 'outer' one, so we want
+    # to flip them with their 'inner' pairs
+    if len(cmbn_rows) == 5:
+        # flip query sequence
+        cmbn_rows[0], cmbn_rows[1] = _flip_codons(cmbn_rows[0], cmbn_rows[1])
+        # flip hit sequence
+        cmbn_rows[4], cmbn_rows[3] = _flip_codons(cmbn_rows[4], cmbn_rows[3])
+
+    return cmbn_rows
+
+
+def _get_row_dict(row_len, model):
+    """Return a dictionary of row indices for parsing alignment blocks (PRIVATE)."""
+    idx = {}
+    # 3 lines, usually in dna vs dna models
+    if row_len == 3:
+        idx["query"] = 0
+        idx["midline"] = 1
+        idx["hit"] = 2
+        idx["qannot"], idx["hannot"] = None, None
+    # 4 lines, in protein vs dna models or dna vs protein models
+    # TODO: currently we check this from the model string; is there
+    # a better way to do it?
+    elif row_len == 4:
+        if "protein2" in model:
+            idx["query"] = 0
+            idx["midline"] = 1
+            idx["hit"] = 2
+            idx["hannot"] = 3
+            idx["qannot"] = None
+        elif "2protein" in model:
+            idx["query"] = 1
+            idx["midline"] = 2
+            idx["hit"] = 3
+            idx["hannot"] = None
+            idx["qannot"] = 0
+        else:
+            raise ValueError("Unexpected model: " + model)
+    # 5 lines, translated dna vs translated dna
+    elif row_len == 5:
+        # set sequence indexes
+        idx["qannot"] = 0
+        idx["query"] = 1
+        idx["midline"] = 2
+        idx["hit"] = 3
+        idx["hannot"] = 4
+    else:
+        raise ValueError("Unexpected row count in alignment block: %i" % row_len)
+    return idx
+
+
+def _get_blocks(rows, coords, idx):
+    """Return a list of dictionaries of sequences split by the coordinates (PRIVATE)."""
+    for idx_name in ("query", "hit", "midline", "qannot", "hannot"):
+        assert idx_name in idx
+    blocks = []
+    for start, end in coords:
+        block = {}
+        # get seqs according to index
+        block["query"] = rows[idx["query"]][start:end]
+        block["hit"] = rows[idx["hit"]][start:end]
+        block["similarity"] = rows[idx["midline"]][start:end]
+        if idx["qannot"] is not None:
+            block["query_annotation"] = rows[idx["qannot"]][start:end]
+        if idx["hannot"] is not None:
+            block["hit_annotation"] = rows[idx["hannot"]][start:end]
+        blocks.append(block)
+
+    return blocks
+
+
+def _get_scodon_moves(tmp_seq_blocks):
+    """Get a dictionary of split codon locations relative to each fragment end (PRIVATE)."""
+    scodon_moves = {"query": [], "hit": []}
+    for seq_type in scodon_moves:
+        scoords = []
+        for block in tmp_seq_blocks:
+            # check both ends of the sequence for residues in curly braces
+            m_start = re.search(_RE_SCODON_START, block[seq_type])
+            m_end = re.search(_RE_SCODON_END, block[seq_type])
+            if m_start:
+                m_start = len(m_start.group(1))
+                scoords.append((m_start, 0))
+            else:
+                scoords.append((0, 0))
+            if m_end:
+                m_end = len(m_end.group(1))
+                scoords.append((0, m_end))
+            else:
+                scoords.append((0, 0))
+        scodon_moves[seq_type] = scoords
+
+    return scodon_moves
+
+
+def _clean_blocks(tmp_seq_blocks):
+    """Remove curly braces (split codon markers) from the given sequences (PRIVATE)."""
+    seq_blocks = []
+    for seq_block in tmp_seq_blocks:
+        for line_name in seq_block:
+            seq_block[line_name] = (
+                seq_block[line_name].replace("{", "").replace("}", "")
+            )
+        seq_blocks.append(seq_block)
+
+    return seq_blocks
+
+
+def _comp_intron_lens(seq_type, inter_blocks, raw_inter_lens):
+    """Return the length of introns between fragments (PRIVATE)."""
+    # set opposite type, for setting introns
+    opp_type = "hit" if seq_type == "query" else "query"
+    # list of flags to denote if an intron follows a block
+    # it reads e.g. this line:
+    # "ATGTT{TT}  >>>> Target Intron 1 >>>>  {G}TGTGTGTACATT"
+    # and sets the opposing sequence type's intron (since this
+    # line is present on the opposite sequence type line)
+    has_intron_after = ["Intron" in x[seq_type] for x in inter_blocks]
+    assert len(has_intron_after) == len(raw_inter_lens)
+    # create list containing coord adjustments incorporating
+    # intron lengths
+    inter_lens = []
+    for flag, parsed_len in zip(has_intron_after, raw_inter_lens):
+        if flag:
+            # joint introns
+            if all(parsed_len[:2]):
+                # intron len is [0] if opp_type is query, otherwise it's [1]
+                intron_len = (
+                    int(parsed_len[0]) if opp_type == "query" else int(parsed_len[1])
+                )
+            # single hit/query introns
+            elif parsed_len[2]:
+                intron_len = int(parsed_len[2])
+            else:
+                raise ValueError("Unexpected intron parsing result: %r" % parsed_len)
+        else:
+            intron_len = 0
+
+        inter_lens.append(intron_len)
+
+    return inter_lens
+
+
+def _comp_coords(hsp, seq_type, inter_lens):
+    """Fill the block coordinates of the given hsp dictionary (PRIVATE)."""
+    assert seq_type in ("hit", "query")
+    # manually fill the first coord
+    seq_step = 1 if hsp["%s_strand" % seq_type] >= 0 else -1
+    fstart = hsp["%s_start" % seq_type]
+    # fend is fstart + number of residues in the sequence, minus gaps
+    fend = (
+        fstart
+        + len(hsp[seq_type][0].replace("-", "").replace(">", "").replace("<", ""))
+        * seq_step
+    )
+    coords = [(fstart, fend)]
+    # and start from the second block, after the first inter seq
+    for idx, block in enumerate(hsp[seq_type][1:]):
+        bstart = coords[-1][1] + inter_lens[idx] * seq_step
+        bend = bstart + seq_step * len(block.replace("-", ""))
+        coords.append((bstart, bend))
+
+    # adjust the coords so the smallest is [0], if strand is -1
+    # couldn't do this in the previous steps since we need the initial
+    # block ordering
+    if seq_step != 1:
+        for idx, coord in enumerate(coords):
+            coords[idx] = coords[idx][1], coords[idx][0]
+
+    return coords
+
+
+def _comp_split_codons(hsp, seq_type, scodon_moves):
+    """Compute positions of split codons, store in given HSP dictionary (PRIVATE)."""
+    scodons = []
+    for idx in range(len(scodon_moves[seq_type])):
+        pair = scodon_moves[seq_type][idx]
+        if not any(pair):
+            continue
+        else:
+            assert not all(pair)
+        a, b = pair
+        anchor_pair = hsp["%s_ranges" % seq_type][idx // 2]
+        strand = 1 if hsp["%s_strand" % seq_type] >= 0 else -1
+
+        if a:
+            func = max if strand == 1 else min
+            anchor = func(anchor_pair)
+            start_c, end_c = anchor + a * strand * -1, anchor
+        elif b:
+            func = min if strand == 1 else max
+            anchor = func(anchor_pair)
+            start_c, end_c = anchor + b * strand, anchor
+        scodons.append((min(start_c, end_c), max(start_c, end_c)))
+
+    return scodons
+
+
+class ExonerateTextParser(_BaseExonerateParser):
+    """Parser for Exonerate plain text output."""
+
+    _ALN_MARK = "C4 Alignment:"
+
+    def parse_alignment_block(self, header):
+        """Parse alignment block, return query result, hits, hsps."""
+        qresult = header["qresult"]
+        hit = header["hit"]
+        hsp = header["hsp"]
+        # check for values that must have been set by previous methods
+        for val_name in (
+            "query_start",
+            "query_end",
+            "hit_start",
+            "hit_end",
+            "query_strand",
+            "hit_strand",
+        ):
+            assert val_name in hsp, hsp
+
+        # get the alignment rows
+        # and stitch them so we have the full sequences in single strings
+        raw_aln_blocks, vulgar_comp = self._read_alignment()
+        # cmbn_rows still has split codon markers (curly braces)
+        cmbn_rows = _stitch_rows(raw_aln_blocks)
+        row_dict = _get_row_dict(len(cmbn_rows), qresult["model"])
+        # get the sequence blocks
+        has_ner = "NER" in qresult["model"].upper()
+        seq_coords = _get_block_coords(cmbn_rows, row_dict, has_ner)
+        tmp_seq_blocks = _get_blocks(cmbn_rows, seq_coords, row_dict)
+        # get split codon temp coords for later use
+        # this result in pairs of base movement for both ends of each row
+        scodon_moves = _get_scodon_moves(tmp_seq_blocks)
+        # remove the split codon markers
+        seq_blocks = _clean_blocks(tmp_seq_blocks)
+
+        # adjust strands
+        hsp["query_strand"] = _STRAND_MAP[hsp["query_strand"]]
+        hsp["hit_strand"] = _STRAND_MAP[hsp["hit_strand"]]
+        # cast coords into ints
+        hsp["query_start"] = int(hsp["query_start"])
+        hsp["query_end"] = int(hsp["query_end"])
+        hsp["hit_start"] = int(hsp["hit_start"])
+        hsp["hit_end"] = int(hsp["hit_end"])
+        # cast score into ints
+        hsp["score"] = int(hsp["score"])
+        # set sequences
+        hsp["query"] = [x["query"] for x in seq_blocks]
+        hsp["hit"] = [x["hit"] for x in seq_blocks]
+        hsp["aln_annotation"] = {}
+        # set the molecule type
+        # currently only limited to models with protein queries
+        if (
+            "protein2" in qresult["model"]
+            or "coding2" in qresult["model"]
+            or "2protein" in qresult["model"]
+        ):
+            hsp["molecule_type"] = "protein"
+        # get the annotations if they exist
+        for annot_type in ("similarity", "query_annotation", "hit_annotation"):
+            try:
+                hsp["aln_annotation"][annot_type] = [x[annot_type] for x in seq_blocks]
+            except KeyError:
+                pass
+
+        # use vulgar coordinates if vulgar line is present and return
+        # if vulgar_comp is not None:
+        #    hsp = parse_vulgar_comp(hsp, vulgar_comp)
+
+        #    return {'qresult': qresult, 'hit': hit, 'hsp': hsp}
+
+        # otherwise we need to get the coordinates from the alignment
+        # get the intervening blocks first, so we can use them
+        # to adjust the coordinates
+        if not has_ner:
+            # get intervening coordinates and blocks, only if model is not ner
+            # ner models have a much more simple coordinate calculation
+            inter_coords = _get_inter_coords(seq_coords)
+            inter_blocks = _get_blocks(cmbn_rows, inter_coords, row_dict)
+            # returns a three-component tuple of intron lengths
+            # first two component filled == intron in hit and query
+            # last component filled == intron in hit or query
+            raw_inter_lens = re.findall(_RE_EXON_LEN, cmbn_rows[row_dict["midline"]])
+
+        # compute start and end coords for each block
+        for seq_type in ("query", "hit"):
+
+            # ner blocks and intron blocks require different adjustments
+            if not has_ner:
+                opp_type = "hit" if seq_type == "query" else "query"
+                inter_lens = _comp_intron_lens(seq_type, inter_blocks, raw_inter_lens)
+            else:
+                # for NER blocks, the length of the inter-fragment gaps is
+                # written on the same strand, so opp_type is seq_type
+                opp_type = seq_type
+                inter_lens = [
+                    int(x)
+                    for x in re.findall(_RE_NER_LEN, cmbn_rows[row_dict[seq_type]])
+                ]
+
+            # check that inter_lens's length is len opp_type block - 1
+            if len(inter_lens) != len(hsp[opp_type]) - 1:
+                raise ValueError(
+                    "Length mismatch: %r vs %r"
+                    % (len(inter_lens), len(hsp[opp_type]) - 1)
+                )
+            # fill the hsp query and hit coordinates
+            hsp["%s_ranges" % opp_type] = _comp_coords(hsp, opp_type, inter_lens)
+            # and fill the split codon coordinates, if model != ner
+            # can't do this in the if-else clause above since we need to
+            # compute the ranges first
+            if not has_ner:
+                hsp["%s_split_codons" % opp_type] = _comp_split_codons(
+                    hsp, opp_type, scodon_moves
+                )
+
+        # now that we've finished parsing coords, we can set the hit and start
+        # coord according to Biopython's convention (start <= end)
+        for seq_type in ("query", "hit"):
+            if hsp["%s_strand" % seq_type] == -1:
+                n_start = "%s_start" % seq_type
+                n_end = "%s_end" % seq_type
+                hsp[n_start], hsp[n_end] = hsp[n_end], hsp[n_start]
+
+        return {"qresult": qresult, "hit": hit, "hsp": hsp}
+
+    def _read_alignment(self):
+        """Read the raw alignment block strings, returns them in a list (PRIVATE)."""
+        raw_aln_blocks = []
+        # flag to check whether we're in an alignment row
+        in_aln_row = False
+        # flag for vulgar line, if present, we can parse coordinates from it
+        vulgar_comp = None
+        while True:
+
+            match = re.search(_RE_ALN_ROW, self.line.strip())
+            # if we have a match, set flags and values
+            if match and not in_aln_row:
+                start_idx = self.line.index(match.group(1))
+                row_len = len(match.group(1))
+                in_aln_row = True
+                raw_aln_block = []
+            # if we're in an alignment row, grab the sequence
+            if in_aln_row:
+                raw_aln_block.append(self.line[start_idx : start_idx + row_len])
+            # reset flags and values if the line matches, we're in an alignment
+            # row, and there are more than 1 line in rows
+            if match and in_aln_row and len(raw_aln_block) > 1:
+                raw_aln_blocks.append(raw_aln_block)
+                start_idx = None
+                row_len = None
+                in_aln_row = False
+
+            self.line = self.handle.readline()
+            # try to parse vulgar line if present
+            if self.line.startswith("vulgar"):
+                vulgar = re.search(_RE_VULGAR, self.line)
+                vulgar_comp = vulgar.group(10)
+            if not self.line or self.line.startswith(self._ALN_MARK):
+                # HACK: this is so that the parse_qresult method does not
+                # yield the objects before appending the last HSP. We are doing
+                # this to keep the parser compatible with outputs without
+                # human-readable alignment outputs. This also relies on the
+                # fact that repeated readline() always returns '' on EOF.
+                if not self.line:
+                    self.line = "mock"
+                break
+
+        return raw_aln_blocks, vulgar_comp
+
+
+class ExonerateTextIndexer(_BaseExonerateIndexer):
+    """Indexer class for Exonerate plain text."""
+
+    _parser = ExonerateTextParser
+    _query_mark = b"C4 Alignment"
+
+    def get_qresult_id(self, pos):
+        """Return the query ID from the nearest "Query:" line."""
+        handle = self._handle
+        handle.seek(pos)
+        sentinel = b"Query:"
+
+        while True:
+            line = handle.readline().strip()
+            if line.startswith(sentinel):
+                break
+            if not line:
+                raise StopIteration
+        qid, desc = _parse_hit_or_query_line(line.decode())
+
+        return qid
+
+    def get_raw(self, offset):
+        """Return the raw string of a QueryResult object from the given offset."""
+        handle = self._handle
+        handle.seek(offset)
+        qresult_key = None
+        qresult_raw = b""
+
+        while True:
+            line = handle.readline()
+            if not line:
+                break
+            elif line.startswith(self._query_mark):
+                cur_pos = handle.tell()
+                if qresult_key is None:
+                    qresult_key = self.get_qresult_id(cur_pos)
+                else:
+                    curr_key = self.get_qresult_id(cur_pos)
+                    if curr_key != qresult_key:
+                        break
+                handle.seek(cur_pos)
+            qresult_raw += line
+
+        return qresult_raw
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/ExonerateIO/exonerate_vulgar.py b/code/lib/Bio/SearchIO/ExonerateIO/exonerate_vulgar.py
new file mode 100644
index 0000000..ce342d3
--- /dev/null
+++ b/code/lib/Bio/SearchIO/ExonerateIO/exonerate_vulgar.py
@@ -0,0 +1,219 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO parser for Exonerate vulgar output format."""
+
+import re
+
+from ._base import _BaseExonerateParser, _BaseExonerateIndexer, _STRAND_MAP
+
+
+__all__ = ("ExonerateVulgarParser", "ExonerateVulgarIndexer")
+
+
+# precompile regex
+_RE_VULGAR = re.compile(
+    r"""^vulgar:\s+
+        (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+  # query: ID, start, end, strand
+        (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+  # hit: ID, start, end, strand
+        (\d+)(\s+.*)$                         # score, vulgar components
+        """,
+    re.VERBOSE,
+)
+
+_RE_VCOMP = re.compile(
+    r"""
+        \s+(\S+) # vulgar label (C/M: codon/match, G: gap, N: ner, 5/3: splice
+                 #               site, I: intron, S: split codon, F: frameshift)
+        \s+(\d+) # how many residues to advance in query sequence
+        \s+(\d+) # how many residues to advance in hit sequence
+        """,
+    re.VERBOSE,
+)
+
+
+def parse_vulgar_comp(hsp, vulgar_comp):
+    """Parse the vulgar components present in the hsp dictionary."""
+    # containers for block coordinates
+    qstarts = [hsp["query_start"]]
+    qends = []
+    hstarts = [hsp["hit_start"]]
+    hends = []
+    # containers for split codons
+    hsp["query_split_codons"] = []
+    hsp["hit_split_codons"] = []
+    # containers for ner blocks
+    hsp["query_ner_ranges"] = []
+    hsp["hit_ner_ranges"] = []
+    # sentinels for tracking query and hit positions
+    qpos = hsp["query_start"]
+    hpos = hsp["hit_start"]
+    # multiplier for determining sentinel movement
+    qmove = 1 if hsp["query_strand"] >= 0 else -1
+    hmove = 1 if hsp["hit_strand"] >= 0 else -1
+
+    vcomps = re.findall(_RE_VCOMP, vulgar_comp)
+    for idx, match in enumerate(vcomps):
+        label, qstep, hstep = match[0], int(match[1]), int(match[2])
+        # check for label, must be recognized
+        assert label in "MCGF53INS", "Unexpected vulgar label: %r" % label
+        # match, codon, or gaps
+        if label in "MCGS":
+            # if the previous comp is not an MCGS block, it's the
+            # start of a new block
+            if vcomps[idx - 1][0] not in "MCGS":
+                qstarts.append(qpos)
+                hstarts.append(hpos)
+        # other labels
+        # store the values in the hsp dict as a tuple of (start, stop)
+        # we're not doing anything if the label is in '53IN', as these
+        # basically tell us what the inter-block coordinates are and
+        # inter-block coordinates are automatically calculated by
+        # and HSP property
+        if label == "S":
+            # get start and stop from parsed values
+            qstart, hstart = qpos, hpos
+            qend = qstart + qstep * qmove
+            hend = hstart + hstep * hmove
+            # adjust the start-stop ranges
+            sqstart, sqend = min(qstart, qend), max(qstart, qend)
+            shstart, shend = min(hstart, hend), max(hstart, hend)
+            # split codons
+            # XXX: is it possible to have a frameshift that introduces
+            # a codon split? If so, this may need a different treatment..
+            hsp["query_split_codons"].append((sqstart, sqend))
+            hsp["hit_split_codons"].append((shstart, shend))
+
+        # move sentinels accordingly
+        qpos += qstep * qmove
+        hpos += hstep * hmove
+
+        # append to ends if the next comp is not an MCGS block or
+        # if it's the last comp
+        if idx == len(vcomps) - 1 or (
+            label in "MCGS" and vcomps[idx + 1][0] not in "MCGS"
+        ):
+            qends.append(qpos)
+            hends.append(hpos)
+
+    # adjust coordinates
+    for seq_type in ("query_", "hit_"):
+        strand = hsp[seq_type + "strand"]
+        # switch coordinates if strand is < 0
+        if strand < 0:
+            # switch the starts and ends
+            hsp[seq_type + "start"], hsp[seq_type + "end"] = (
+                hsp[seq_type + "end"],
+                hsp[seq_type + "start"],
+            )
+            if seq_type == "query_":
+                qstarts, qends = qends, qstarts
+            else:
+                hstarts, hends = hends, hstarts
+
+    # set start and end ranges
+    hsp["query_ranges"] = list(zip(qstarts, qends))
+    hsp["hit_ranges"] = list(zip(hstarts, hends))
+    return hsp
+
+
+class ExonerateVulgarParser(_BaseExonerateParser):
+    """Parser for Exonerate vulgar strings."""
+
+    _ALN_MARK = "vulgar"
+
+    def parse_alignment_block(self, header):
+        """Parse alignment block for vulgar format, return query results, hits, hsps."""
+        qresult = header["qresult"]
+        hit = header["hit"]
+        hsp = header["hsp"]
+        self.read_until(lambda line: line.startswith("vulgar"))
+        vulgars = re.search(_RE_VULGAR, self.line)
+        # if the file has c4 alignments
+        # check if vulgar values match our previously parsed header values
+        if self.has_c4_alignment:
+            assert qresult["id"] == vulgars.group(1)
+            assert hsp["query_start"] == vulgars.group(2)
+            assert hsp["query_end"] == vulgars.group(3)
+            assert hsp["query_strand"] == vulgars.group(4)
+            assert hit["id"] == vulgars.group(5)
+            assert hsp["hit_start"] == vulgars.group(6)
+            assert hsp["hit_end"] == vulgars.group(7)
+            assert hsp["hit_strand"] == vulgars.group(8)
+            assert hsp["score"] == vulgars.group(9)
+        else:
+            qresult["id"] = vulgars.group(1)
+            hsp["query_start"] = vulgars.group(2)
+            hsp["query_end"] = vulgars.group(3)
+            hsp["query_strand"] = vulgars.group(4)
+            hit["id"] = vulgars.group(5)
+            hsp["hit_start"] = vulgars.group(6)
+            hsp["hit_end"] = vulgars.group(7)
+            hsp["hit_strand"] = vulgars.group(8)
+            hsp["score"] = vulgars.group(9)
+
+        # adjust strands
+        hsp["hit_strand"] = _STRAND_MAP[hsp["hit_strand"]]
+        hsp["query_strand"] = _STRAND_MAP[hsp["query_strand"]]
+        # cast coords into ints
+        hsp["query_start"] = int(hsp["query_start"])
+        hsp["query_end"] = int(hsp["query_end"])
+        hsp["hit_start"] = int(hsp["hit_start"])
+        hsp["hit_end"] = int(hsp["hit_end"])
+        # cast score into int
+        hsp["score"] = int(hsp["score"])
+        # store vulgar line and parse it
+        # rstrip to remove line endings (otherwise gives errors in Windows)
+        hsp["vulgar_comp"] = vulgars.group(10).rstrip()
+        hsp = parse_vulgar_comp(hsp, hsp["vulgar_comp"])
+
+        return {"qresult": qresult, "hit": hit, "hsp": hsp}
+
+
+class ExonerateVulgarIndexer(_BaseExonerateIndexer):
+    """Indexer class for exonerate vulgar lines."""
+
+    _parser = ExonerateVulgarParser
+    _query_mark = b"vulgar"
+
+    def get_qresult_id(self, pos):
+        """Return the query ID of the nearest vulgar line."""
+        handle = self._handle
+        handle.seek(pos)
+        # get line, check if it's a vulgar line, and get query ID
+        line = handle.readline()
+        assert line.startswith(self._query_mark), line
+        id = re.search(_RE_VULGAR, line.decode())
+        return id.group(1)
+
+    def get_raw(self, offset):
+        """Return the raw bytes string of a QueryResult object from the given offset."""
+        handle = self._handle
+        handle.seek(offset)
+        qresult_key = None
+        qresult_raw = b""
+
+        while True:
+            line = handle.readline()
+            if not line:
+                break
+            elif line.startswith(self._query_mark):
+                cur_pos = handle.tell() - len(line)
+                if qresult_key is None:
+                    qresult_key = self.get_qresult_id(cur_pos)
+                else:
+                    curr_key = self.get_qresult_id(cur_pos)
+                    if curr_key != qresult_key:
+                        break
+            qresult_raw += line
+
+        return qresult_raw
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/FastaIO.py b/code/lib/Bio/SearchIO/FastaIO.py
new file mode 100644
index 0000000..8f6c227
--- /dev/null
+++ b/code/lib/Bio/SearchIO/FastaIO.py
@@ -0,0 +1,601 @@
+# Adapted from Bio.AlignIO.FastaIO copyright 2008-2011 by Peter Cock.
+# Copyright 2012 by Wibowo Arindrarto.
+# All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+r"""Bio.SearchIO support for Bill Pearson's FASTA tools.
+
+This module adds support for parsing FASTA outputs. FASTA is a suite of
+programs that finds regions of local or global similarity between protein
+or nucleotide sequences, either by searching databases or identifying
+local duplications.
+
+Bio.SearchIO.FastaIO was tested on the following FASTA flavors and versions:
+
+    - flavors: fasta, ssearch, tfastx
+    - versions: 35, 36
+
+Other flavors and/or versions may introduce some bugs. Please file a bug report
+if you see such problems to Biopython's bug tracker.
+
+More information on FASTA are available through these links:
+
+    - Website: http://fasta.bioch.virginia.edu/fasta_www2/fasta_list2.shtml
+    - User guide: http://fasta.bioch.virginia.edu/fasta_www2/fasta_guide.pdf
+
+
+Supported Formats
+=================
+
+Bio.SearchIO.FastaIO supports parsing and indexing FASTA outputs triggered by
+the -m 10 flag. Other formats that mimic other programs (e.g. the BLAST tabular
+format using the -m 8 flag) may be parseable but using SearchIO's other parsers
+(in this case, using the 'blast-tab' parser).
+
+
+fasta-m10
+=========
+
+Note that in FASTA -m 10 outputs, HSPs from different strands are considered to
+be from different hits. They are listed as two separate entries in the hit
+table. FastaIO recognizes this and will group HSPs with the same hit ID into a
+single Hit object, regardless of strand.
+
+FASTA also sometimes output extra sequences adjacent to the HSP match. These
+extra sequences are discarded by FastaIO. Only regions containing the actual
+sequence match are extracted.
+
+The following object attributes are provided:
+
++-----------------+-------------------------+----------------------------------+
+| Object          | Attribute               | Value                            |
++=================+=========================+==================================+
+| QueryResult     | description             | query sequence description       |
+|                 +-------------------------+----------------------------------+
+|                 | id                      | query sequence ID                |
+|                 +-------------------------+----------------------------------+
+|                 | program                 | FASTA flavor                     |
+|                 +-------------------------+----------------------------------+
+|                 | seq_len                 | full length of query sequence    |
+|                 +-------------------------+----------------------------------+
+|                 | target                  | target search database           |
+|                 +-------------------------+----------------------------------+
+|                 | version                 | FASTA version                    |
++-----------------+-------------------------+----------------------------------+
+| Hit             | seq_len                 | full length of the hit sequence  |
++-----------------+-------------------------+----------------------------------+
+| HSP             | bitscore                | \*_bits line                     |
+|                 +-------------------------+----------------------------------+
+|                 | evalue                  | \*_expect line                   |
+|                 +-------------------------+----------------------------------+
+|                 | ident_pct               | \*_ident line                    |
+|                 +-------------------------+----------------------------------+
+|                 | init1_score             | \*_init1 line                    |
+|                 +-------------------------+----------------------------------+
+|                 | initn_score             | \*_initn line                    |
+|                 +-------------------------+----------------------------------+
+|                 | opt_score               | \*_opt line, \*_s-w opt line     |
+|                 +-------------------------+----------------------------------+
+|                 | pos_pct                 | \*_sim line                      |
+|                 +-------------------------+----------------------------------+
+|                 | sw_score                | \*_score line                    |
+|                 +-------------------------+----------------------------------+
+|                 | z_score                 | \*_z-score line                  |
++-----------------+-------------------------+----------------------------------+
+| HSPFragment     | aln_annotation          | al_cons block, if present        |
+| (also via HSP)  +-------------------------+----------------------------------+
+|                 | hit                     | hit sequence                     |
+|                 +-------------------------+----------------------------------+
+|                 | hit_end                 | hit sequence end coordinate      |
+|                 +-------------------------+----------------------------------+
+|                 | hit_start               | hit sequence start coordinate    |
+|                 +-------------------------+----------------------------------+
+|                 | hit_strand              | hit sequence strand              |
+|                 +-------------------------+----------------------------------+
+|                 | query                   | query sequence                   |
+|                 +-------------------------+----------------------------------+
+|                 | query_end               | query sequence end coordinate    |
+|                 +-------------------------+----------------------------------+
+|                 | query_start             | query sequence start coordinate  |
+|                 +-------------------------+----------------------------------+
+|                 | query_strand            | query sequence strand            |
++-----------------+-------------------------+----------------------------------+
+
+"""
+
+import re
+
+from Bio.SearchIO._index import SearchIndexer
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+
+
+__all__ = ("FastaM10Parser", "FastaM10Indexer")
+
+
+# precompile regex patterns
+# regex for program name
+_RE_FLAVS = re.compile(r"t?fast[afmsxy]|pr[sf][sx]|lalign|[gs]?[glso]search")
+# regex for sequence ID and length ~ deals with both \n and \r\n
+_PTR_ID_DESC_SEQLEN = r">>>(.+?)\s+(.*?) *- (\d+) (?:aa|nt)\s*$"
+_RE_ID_DESC_SEQLEN = re.compile(_PTR_ID_DESC_SEQLEN)
+_RE_ID_DESC_SEQLEN_IDX = re.compile(_PTR_ID_DESC_SEQLEN.encode())
+# regex for qresult, hit, or hsp attribute value
+_RE_ATTR = re.compile(r"^; [a-z]+(_[ \w-]+):\s+(.*)$")
+# regex for capturing excess start and end sequences in alignments
+_RE_START_EXC = re.compile(r"^-*")
+_RE_END_EXC = re.compile(r"-*$")
+
+# attribute name mappings
+_HSP_ATTR_MAP = {
+    "_initn": ("initn_score", int),
+    "_init1": ("init1_score", int),
+    "_opt": ("opt_score", int),
+    "_s-w opt": ("opt_score", int),
+    "_z-score": ("z_score", float),
+    "_bits": ("bitscore", float),
+    "_expect": ("evalue", float),
+    "_score": ("sw_score", int),
+    "_ident": ("ident_pct", float),
+    "_sim": ("pos_pct", float),
+}
+
+# state flags
+_STATE_NONE = 0
+_STATE_QUERY_BLOCK = 1
+_STATE_HIT_BLOCK = 2
+_STATE_CONS_BLOCK = 3
+
+
+def _set_qresult_hits(qresult, hit_rows=()):
+    """Append Hits without alignments into QueryResults (PRIVATE)."""
+    for hit_row in hit_rows:
+        hit_id, remainder = hit_row.split(" ", 1)
+        # TODO: parse hit and hsp properties properly; by dealing with:
+        #   - any character in the description (brackets, spaces, etc.)
+        #   - possible [f] or [r] presence (for frame info)
+        #   - possible presence of E2() column
+        #   - possible incomplete hit_id due to column length limit
+        # The current method only looks at the Hit ID, none of the things above
+        if hit_id not in qresult:
+            frag = HSPFragment(hit_id, qresult.id)
+            hsp = HSP([frag])
+            hit = Hit([hsp])
+            qresult.append(hit)
+
+    return qresult
+
+
+def _set_hsp_seqs(hsp, parsed, program):
+    """Set HSPs sequences (PRIVATE).
+
+    :param hsp: HSP whose properties will be set
+    :type hsp: HSP
+    :param parsed: parsed values of the HSP attributes
+    :type parsed: dictionary {string: object}
+    :param program: program name
+    :type program: string
+
+    """
+    # get aligned sequences and check if they have equal lengths
+    start = 0
+    for seq_type in ("hit", "query"):
+        if "tfast" not in program:
+            pseq = parsed[seq_type]
+            # adjust start and end coordinates based on the amount of
+            # filler characters
+            start, stop = _get_aln_slice_coords(pseq)
+            start_adj = len(re.search(_RE_START_EXC, pseq["seq"]).group(0))
+            stop_adj = len(re.search(_RE_END_EXC, pseq["seq"]).group(0))
+            start = start + start_adj
+            stop = stop + start_adj - stop_adj
+            parsed[seq_type]["seq"] = pseq["seq"][start:stop]
+    if len(parsed["query"]["seq"]) != len(parsed["hit"]["seq"]):
+        raise ValueError(
+            "Length mismatch: %r %r"
+            % (len(parsed["query"]["seq"]), len(parsed["hit"]["seq"]))
+        )
+    if "similarity" in hsp.aln_annotation:
+        # only using 'start' since FASTA seems to have trimmed the 'excess'
+        # end part
+        hsp.aln_annotation["similarity"] = hsp.aln_annotation["similarity"][start:]
+        # hit or query works equally well here
+        assert len(hsp.aln_annotation["similarity"]) == len(parsed["hit"]["seq"])
+
+    # query and hit sequence types must be the same
+    assert parsed["query"]["_type"] == parsed["hit"]["_type"]
+    type_val = parsed["query"]["_type"]  # hit works fine too
+    molecule_type = "DNA" if type_val == "D" else "protein"
+    setattr(hsp.fragment, "molecule_type", molecule_type)
+
+    for seq_type in ("hit", "query"):
+        # get and set start and end coordinates
+        start = int(parsed[seq_type]["_start"])
+        end = int(parsed[seq_type]["_stop"])
+
+        setattr(hsp.fragment, seq_type + "_start", min(start, end) - 1)
+        setattr(hsp.fragment, seq_type + "_end", max(start, end))
+        # set seq and molecule type
+        setattr(hsp.fragment, seq_type, parsed[seq_type]["seq"])
+
+        if molecule_type != "protein":
+            # get strand from coordinate; start <= end is plus
+            # start > end is minus
+            if start <= end:
+                setattr(hsp.fragment, seq_type + "_strand", 1)
+            else:
+                setattr(hsp.fragment, seq_type + "_strand", -1)
+        else:
+            setattr(hsp.fragment, seq_type + "_strand", 0)
+
+
+def _get_aln_slice_coords(parsed_hsp):
+    """Get HSPs sequences (PRIVATE).
+
+    To get the actual pairwise alignment sequences, we must first
+    translate the un-gapped sequence based coordinates into positions
+    in the gapped sequence (which may have a flanking region shown
+    using leading - characters).  To date, I have never seen any
+    trailing flanking region shown in the m10 file, but the
+    following code should also cope with that.
+
+    Note that this code seems to work fine even when the "sq_offset"
+    entries are present as a result of using the -X command line option.
+    """
+    seq = parsed_hsp["seq"]
+    seq_stripped = seq.strip("-")
+    disp_start = int(parsed_hsp["_display_start"])
+    start = int(parsed_hsp["_start"])
+    stop = int(parsed_hsp["_stop"])
+
+    if start <= stop:
+        start = start - disp_start
+        stop = stop - disp_start + 1
+    else:
+        start = disp_start - start
+        stop = disp_start - stop + 1
+    stop += seq_stripped.count("-")
+    if not (0 <= start and start < stop and stop <= len(seq_stripped)):
+        raise ValueError(
+            "Problem with sequence start/stop,\n%s[%i:%i]\n%s"
+            % (seq, start, stop, parsed_hsp)
+        )
+    return start, stop
+
+
+class FastaM10Parser:
+    """Parser for Bill Pearson's FASTA suite's -m 10 output."""
+
+    def __init__(self, handle, __parse_hit_table=False):
+        """Initialize the class."""
+        self.handle = handle
+        self._preamble = self._parse_preamble()
+
+    def __iter__(self):
+        """Iterate over FastaM10Parser object yields query results."""
+        for qresult in self._parse_qresult():
+            # re-set desc, for hsp query description
+            qresult.description = qresult.description
+            yield qresult
+
+    def _parse_preamble(self):
+        """Parse the Fasta preamble for Fasta flavor and version (PRIVATE)."""
+        preamble = {}
+        while True:
+            line = self.handle.readline()
+            # this should be the line just before the first qresult
+            if line.startswith("Query"):
+                break
+            # try to match for version line
+            elif line.startswith(" version"):
+                preamble["version"] = line.split(" ")[2]
+            else:
+                # try to match for flavor line
+                flav_match = re.match(_RE_FLAVS, line.lower())
+                if flav_match:
+                    preamble["program"] = flav_match.group(0)
+        self.line = line
+
+        return preamble
+
+    def __parse_hit_table(self):
+        """Parse hit table rows."""
+        # parse hit table until we see an empty line
+        hit_rows = []
+        while True:
+            line = self.handle.readline()
+            if (not line) or line.strip():
+                break
+            hit_rows.append("")
+        self.line = line
+        return hit_rows
+
+    def _parse_qresult(self):
+        """Parse query result (PRIVATE)."""
+        # initial qresult value
+        qresult = None
+        hit_rows = []
+        # state values
+        state_QRES_NEW = 1
+        state_QRES_HITTAB = 3
+        state_QRES_CONTENT = 5
+        state_QRES_END = 7
+
+        line = self.line
+
+        while True:
+
+            # one line before the hit table
+            if line.startswith("The best scores are:"):
+                qres_state = state_QRES_HITTAB
+            # the end of a query or the file altogether
+            elif line.strip() == ">>>///" or not line:
+                qres_state = state_QRES_END
+            # the beginning of a new query
+            elif not line.startswith(">>>") and ">>>" in line:
+                qres_state = state_QRES_NEW
+            # the beginning of the query info and its hits + hsps
+            elif line.startswith(">>>") and not line.strip() == ">>><<<":
+                qres_state = state_QRES_CONTENT
+            # default qres mark
+            else:
+                qres_state = None
+
+            if qres_state is not None:
+                if qres_state == state_QRES_HITTAB:
+                    # parse hit table if flag is set
+                    hit_rows = self.__parse_hit_table()
+                    line = self.handle.readline()
+
+                elif qres_state == state_QRES_END:
+                    yield _set_qresult_hits(qresult, hit_rows)
+                    break
+
+                elif qres_state == state_QRES_NEW:
+                    # if qresult is filled, yield it first
+                    if qresult is not None:
+                        yield _set_qresult_hits(qresult, hit_rows)
+                    regx = re.search(_RE_ID_DESC_SEQLEN, line)
+                    query_id = regx.group(1)
+                    seq_len = regx.group(3)
+                    desc = regx.group(2)
+                    qresult = QueryResult(id=query_id)
+                    qresult.seq_len = int(seq_len)
+                    # get target from the next line
+                    line = self.handle.readline()
+                    qresult.target = [x for x in line.split(" ") if x][1].strip()
+                    if desc is not None:
+                        qresult.description = desc
+                    # set values from preamble
+                    for key, value in self._preamble.items():
+                        setattr(qresult, key, value)
+                    line = self.handle.readline()
+
+                elif qres_state == state_QRES_CONTENT:
+                    assert line[3:].startswith(qresult.id), line
+                    for hit, strand in self._parse_hit(query_id):
+                        # HACK: re-set desc, for hsp hit and query description
+                        hit.description = hit.description
+                        hit.query_description = qresult.description
+                        # if hit is not in qresult, append it
+                        if hit.id not in qresult:
+                            qresult.append(hit)
+                        # otherwise, it might be the same hit with a different strand
+                        else:
+                            # make sure strand is different and then append hsp to
+                            # existing hit
+                            for hsp in hit.hsps:
+                                assert strand != hsp.query_strand
+                                qresult[hit.id].append(hsp)
+                    line = self.line
+
+            else:
+                line = self.handle.readline()
+
+        self.line = line
+
+    def _parse_hit(self, query_id):
+        """Parse hit on query identifier (PRIVATE)."""
+        while True:
+            line = self.handle.readline()
+            if line.startswith(">>"):
+                break
+
+        state = _STATE_NONE
+        strand = None
+        hsp_list = []
+        hsp = None
+        parsed_hsp = None
+        hit_desc = None
+        seq_len = None
+        while True:
+            # yield hit if we've reached the start of a new query or
+            # the end of the search
+            self.line = self.handle.readline()
+            if self.line.strip() in [">>><<<", ">>>///"] or (
+                not self.line.startswith(">>>") and ">>>" in self.line
+            ):
+                # append last parsed_hsp['hit']['seq'] line
+                if state == _STATE_HIT_BLOCK:
+                    parsed_hsp["hit"]["seq"] += line.strip()
+                elif state == _STATE_CONS_BLOCK:
+                    hsp.aln_annotation["similarity"] += line.strip("\r\n")
+                # process HSP alignment and coordinates
+                _set_hsp_seqs(hsp, parsed_hsp, self._preamble["program"])
+                hit = Hit(hsp_list)
+                hit.description = hit_desc
+                hit.seq_len = seq_len
+                yield hit, strand
+                hsp_list = []
+                break
+            # yield hit and create a new one if we're still in the same query
+            elif line.startswith(">>"):
+                # try yielding,  if we have hsps
+                if hsp_list:
+                    _set_hsp_seqs(hsp, parsed_hsp, self._preamble["program"])
+                    hit = Hit(hsp_list)
+                    hit.description = hit_desc
+                    hit.seq_len = seq_len
+                    yield hit, strand
+                    hsp_list = []
+                # try to get the hit id and desc, and handle cases without descs
+                try:
+                    hit_id, hit_desc = line[2:].strip().split(" ", 1)
+                except ValueError:
+                    hit_id = line[2:].strip().split(" ", 1)[0]
+                    hit_desc = ""
+                # create the HSP object for Hit
+                frag = HSPFragment(hit_id, query_id)
+                hsp = HSP([frag])
+                hsp_list.append(hsp)
+                # set or reset the state to none
+                state = _STATE_NONE
+                parsed_hsp = {"query": {}, "hit": {}}
+            # create and append a new HSP if line starts with '>--'
+            elif line.startswith(">--"):
+                # set seq attributes of previous hsp
+                _set_hsp_seqs(hsp, parsed_hsp, self._preamble["program"])
+                # and create a new one
+                frag = HSPFragment(hit_id, query_id)
+                hsp = HSP([frag])
+                hsp_list.append(hsp)
+                # set the state ~ none yet
+                state = _STATE_NONE
+                parsed_hsp = {"query": {}, "hit": {}}
+            # this is either query or hit data in the HSP, depending on the state
+            elif line.startswith(">"):
+                if state == _STATE_NONE:
+                    # make sure it's the correct query
+                    if not query_id.startswith(line[1:].split(" ")[0]):
+                        raise ValueError("%r vs %r" % (query_id, line))
+                    state = _STATE_QUERY_BLOCK
+                    parsed_hsp["query"]["seq"] = ""
+                elif state == _STATE_QUERY_BLOCK:
+                    # make sure it's the correct hit
+                    assert hit_id.startswith(line[1:].split(" ")[0])
+                    state = _STATE_HIT_BLOCK
+                    parsed_hsp["hit"]["seq"] = ""
+            # check for conservation block
+            elif line.startswith("; al_cons"):
+                state = _STATE_CONS_BLOCK
+                hsp.fragment.aln_annotation["similarity"] = ""
+            elif line.startswith(";"):
+                # Fasta outputs do not make a clear distinction between Hit
+                # and HSPs, so we check the attribute names to determine
+                # whether it belongs to a Hit or HSP
+                regx = re.search(_RE_ATTR, line.strip())
+                name = regx.group(1)
+                value = regx.group(2)
+
+                # for values before the '>...' query block
+                if state == _STATE_NONE:
+                    if name in _HSP_ATTR_MAP:
+                        attr_name, caster = _HSP_ATTR_MAP[name]
+                        if caster is not str:
+                            value = caster(value)
+                        if name in ["_ident", "_sim"]:
+                            value *= 100
+                        setattr(hsp, attr_name, value)
+                # otherwise, pool the values for processing later
+                elif state == _STATE_QUERY_BLOCK:
+                    parsed_hsp["query"][name] = value
+                elif state == _STATE_HIT_BLOCK:
+                    if name == "_len":
+                        seq_len = int(value)
+                    else:
+                        parsed_hsp["hit"][name] = value
+                # for values in the hit block
+                else:
+                    raise ValueError("Unexpected line: %r" % line)
+            # otherwise, it must be lines containing the sequences
+            else:
+                assert ">" not in line
+                # if we're in hit, parse into hsp.hit
+                if state == _STATE_HIT_BLOCK:
+                    parsed_hsp["hit"]["seq"] += line.strip()
+                elif state == _STATE_QUERY_BLOCK:
+                    parsed_hsp["query"]["seq"] += line.strip()
+                elif state == _STATE_CONS_BLOCK:
+                    hsp.fragment.aln_annotation["similarity"] += line.strip("\r\n")
+                # we should not get here!
+                else:
+                    raise ValueError("Unexpected line: %r" % line)
+            line = self.line
+
+
+class FastaM10Indexer(SearchIndexer):
+    """Indexer class for Bill Pearson's FASTA suite's -m 10 output."""
+
+    _parser = FastaM10Parser
+
+    def __init__(self, filename):
+        """Initialize the class."""
+        SearchIndexer.__init__(self, filename)
+
+    def __iter__(self):
+        """Iterate over FastaM10Indexer; yields query results' keys, start offsets, offset lengths."""
+        handle = self._handle
+        handle.seek(0)
+        start_offset = handle.tell()
+        qresult_key = None
+        query_mark = b">>>"
+
+        line = handle.readline()
+        while True:
+            end_offset = handle.tell()
+
+            if not line.startswith(query_mark) and query_mark in line:
+                regx = re.search(_RE_ID_DESC_SEQLEN_IDX, line)
+                qresult_key = regx.group(1).decode()
+                start_offset = end_offset - len(line)
+            # yield whenever we encounter a new query or at the end of the file
+            if qresult_key is not None:
+                if not line:
+                    yield qresult_key, start_offset, end_offset - start_offset
+                    break
+                line = handle.readline()
+                if not line.startswith(query_mark) and query_mark in line:
+                    yield qresult_key, start_offset, end_offset - start_offset
+                    start_offset = end_offset
+            else:
+                line = handle.readline()
+
+    def get_raw(self, offset):
+        """Return the raw record from the file as a bytes string."""
+        handle = self._handle
+        qresult_raw = b""
+        query_mark = b">>>"
+
+        # read header first
+        handle.seek(0)
+        line = handle.readline()
+        while True:
+            qresult_raw += line
+            line = handle.readline()
+            if not line.startswith(query_mark) and query_mark in line:
+                break
+
+        # and read the qresult raw string
+        handle.seek(offset)
+        line = handle.readline()
+        while True:
+            # preserve whitespace, don't use read_forward
+            if not line:
+                break
+            qresult_raw += line
+
+            line = handle.readline()
+            # break when we've reached qresult end
+            if not line.startswith(query_mark) and query_mark in line:
+                break
+
+        # append mock end marker to qresult_raw, since it's not always present
+        return qresult_raw + b">>><<<\n"
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/HHsuiteIO/__init__.py b/code/lib/Bio/SearchIO/HHsuiteIO/__init__.py
new file mode 100644
index 0000000..faf2ce3
--- /dev/null
+++ b/code/lib/Bio/SearchIO/HHsuiteIO/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2019 by Jens Thomas. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO support for HHSUITE output formats.
+
+This module adds support for parsing HHSUITE version 2 output.
+
+More information about HHSUITE are available through these links:
+- Github repository: https://github.com/soedinglab/hh-suite
+- Wiki: https://github.com/soedinglab/hh-suite/wiki
+
+"""
+
+from .hhsuite2_text import Hhsuite2TextParser
diff --git a/code/lib/Bio/SearchIO/HHsuiteIO/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SearchIO/HHsuiteIO/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..77e9d8c
Binary files /dev/null and b/code/lib/Bio/SearchIO/HHsuiteIO/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/HHsuiteIO/__pycache__/hhsuite2_text.cpython-37.pyc b/code/lib/Bio/SearchIO/HHsuiteIO/__pycache__/hhsuite2_text.cpython-37.pyc
new file mode 100644
index 0000000..8effd2d
Binary files /dev/null and b/code/lib/Bio/SearchIO/HHsuiteIO/__pycache__/hhsuite2_text.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/HHsuiteIO/hhsuite2_text.py b/code/lib/Bio/SearchIO/HHsuiteIO/hhsuite2_text.py
new file mode 100644
index 0000000..2335620
--- /dev/null
+++ b/code/lib/Bio/SearchIO/HHsuiteIO/hhsuite2_text.py
@@ -0,0 +1,234 @@
+# Copyright 2019 by Jens Thomas.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO parser for HHSUITE version 2 and 3 plain text output format."""
+
+import re
+from collections import OrderedDict
+import warnings
+
+from Bio.SearchIO._utils import read_forward
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+
+__all__ = ("Hhsuite2TextParser",)
+
+# precompile regex patterns for faster processing
+# regex for query name capture
+_RE_QUERY = re.compile(r"^Query\s+(.+)\s?$")
+
+# regex for version string capture
+_RE_HIT_BLOCK_START = re.compile(r"^No +(\d+)\s+$")
+
+# id and full description
+_RE_HIT_BLOCK_DESC = re.compile(r">(\S+)\s+(.*)$")
+
+# sequence alignment line
+# Q sp|Q9BSU1|CP07  229 DAKMRVFERSVYFGDSCQDVLSMLGSPHKV  258 (422)
+_RE_MATCH_BLOCK_QUERY_SEQ = re.compile(r"^Q\s+(.+) +(\d+) +([A-Z-]+) +(\d+) +\(\d+\)$")
+_RE_MATCH_BLOCK_HIT_SEQ = re.compile(r"^T\s+(.+) +(\d+) +([A-Z-]+) +(\d+) +\(\d+\)$")
+
+_END_OF_FILE_MARKER = "Done!"
+
+_PROGRAM = "HHSUITE"
+
+# Maximum number of lines to read before expecting a hit block
+# This determines the maximum numnber of hits that would be allowed in
+# the initial hit table.
+MAX_READ_UNTIL = 5000
+
+
+class Hhsuite2TextParser:
+    """Parser for the HHSUITE version 2 and 3 text output."""
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self.handle = handle
+        self.line = read_forward(self.handle)
+        self.done = False
+        self.query_id = None
+        self.seq_len = None
+
+    def __iter__(self):
+        """Iterate over query results - there will only ever be one."""
+        yield from self._parse_qresult()
+
+    def _read_until(self, bool_func, stop_on_blank=True, max_read_until=MAX_READ_UNTIL):
+        """Read the file handle until the given function returns True (PRIVATE)."""
+        count = 0
+        while True:
+            if stop_on_blank and not self.line:
+                return
+            if bool_func(self.line):
+                return
+            else:
+                self.line = read_forward(self.handle)
+            count += 1
+            if count >= max_read_until:
+                raise RuntimeError("Exceeded max_read_until in _read_until")
+
+    def _parse_qresult(self):
+        """Parse HHSUITE output file (PRIVATE)."""
+        hit_block_data = []
+        self._parse_preamble()
+        self._read_until(
+            lambda line: re.search(_RE_HIT_BLOCK_START, line), stop_on_blank=False
+        )
+        while not self.done:
+            hit_dict = self._parse_hit_block()
+            hit_block_data.append(hit_dict)
+        return self._create_qresult(hit_block_data)
+
+    def _parse_preamble(self):
+        """Parse metadata about query (PRIVATE)."""
+        meta = {}
+        while self.line:
+            regx = re.search(_RE_QUERY, self.line)
+            if regx:
+                self.query_id = regx.group(1)
+            if self.line.startswith("Match_columns"):
+                self.seq_len = int(self.line.strip().split()[1])
+            self.line = self.handle.readline().strip()
+        return meta
+
+    def _parse_hit_block(self):
+        """Parse a hit block (PRIVATE)."""
+        self.line = read_forward(self.handle)
+        match = re.search(_RE_HIT_BLOCK_DESC, self.line)
+        if not match:
+            raise RuntimeError(
+                f"Unexpected content in HIT_BLOCK_DESC line'{self.line}'"
+            )
+        hit_data = {
+            "hit_id": match.group(1),
+            "description": match.group(2).lstrip(" ;"),
+            "evalue": None,
+            "hit_start": None,
+            "hit_end": None,
+            "hit_seq": "",
+            "prob": None,
+            "query_start": None,
+            "query_end": None,
+            "query_seq": "",
+            "score": None,
+        }
+        self.line = self.handle.readline()
+        self._process_score_line(self.line, hit_data)
+        while True:
+            self.line = read_forward(self.handle)
+            if not self.line.strip() or self.line.startswith(_END_OF_FILE_MARKER):
+                # _END_OF_FILE_MARKER isn't always present
+                self.done = True
+                return hit_data
+            elif re.search(_RE_HIT_BLOCK_START, self.line):
+                return hit_data
+            else:
+                self._parse_hit_match_block(hit_data)
+
+    @staticmethod
+    def _process_score_line(line, hit_data):
+        """Parse the scores from the line and populate hit_data dict (PRIVATE).
+
+        Lines are of the form:
+        Probab=99.95  E-value=3.7e-34  Score=210.31  Aligned_cols=171  Identities=100%  Similarity=2.050  Sum_probs=166.9
+
+        E-value could be in decimal or scientific notation, so split the string rather then use regexp - this
+        also means we should be tolerant of additional fields being added/removed
+        """
+        score_map = {"E-value": "evalue", "Score": "score", "Probab": "prob"}
+        for score_pair in line.strip().split():
+            key, value = score_pair.split("=")
+            if key in score_map:
+                try:
+                    hit_data[score_map[key]] = float(value)
+                except KeyError:
+                    # We trigger warnings here as it's not a big enough problem to crash, but indicates something unexpected.
+                    warnings.warn(
+                        f"HHsuite parser: unable to extract {key} from line: {line}"
+                    )
+
+    def _parse_hit_match_block(self, hit_match_data):
+        """Parse a single block of hit sequence data (PRIVATE).
+
+        Parses block such as ::
+
+            Q ss_pred             ceecchHHHHHHHHHHHHHHHHHHHhhhhhcCCCCccc
+            Q 4P79:A|PDBID|C  160 YELGPALYLGWSASLLSILGGICVFSTAAASSKEEPAT  197 (198)
+            Q Consensus       160 ~~~g~sf~l~~~~~~l~~~~~~l~~~~~~~~~~~~~~~  197 (198)
+                                  .++|||||++|++.++.+++++++++..+..++++..+
+            T Consensus       327 ~~~GwS~~l~~~s~~l~lia~~l~~~~~~~~~~~~~~~  364 (364)
+            T 5B2G_A          327 REMGASLYVGWAASGLLLLGGGLLCCSGPSSGENLYFQ  364 (364)
+            T ss_dssp             EEECTHHHHHHHHHHHHHHHHHHHHCC-----------
+            T ss_pred             cccchHHHHHHHHHHHHHHHHHHHHhcCCCCCCccccC
+
+        """
+
+        def match_is_valid(match):
+            """Return True if match is not a Consensus column (PRIVATE).
+
+            It's not possible to distinguish a sequence line from a Consensus line with
+            a regexp, so need to check the ID column.
+            """
+            return match.group(1).strip() != "Consensus"
+
+        while True:
+            if not self.line.strip():  # blank lines indicate the end of a hit block
+                return
+            match = re.match(_RE_MATCH_BLOCK_QUERY_SEQ, self.line)
+            if match and match_is_valid(match):
+                hit_match_data["query_seq"] += match.group(3).strip()
+                if hit_match_data["query_start"] is None:
+                    hit_match_data["query_start"] = int(match.group(2))
+                hit_match_data["query_end"] = int(match.group(4))
+            else:
+                match = re.match(_RE_MATCH_BLOCK_HIT_SEQ, self.line)
+                if match and match_is_valid(match):
+                    hit_match_data["hit_seq"] += match.group(3).strip()
+                    if hit_match_data["hit_start"] is None:
+                        hit_match_data["hit_start"] = int(match.group(2))
+                    hit_match_data["hit_end"] = int(match.group(4))
+            self.line = self.handle.readline()
+
+    def _create_qresult(self, hit_blocks):
+        """Create the Biopython data structures from the parsed data (PRIVATE)."""
+        query_id = self.query_id
+        hit_dict = OrderedDict()
+
+        for output_index, block in enumerate(hit_blocks):
+            hit_id = block["hit_id"]
+
+            frag = HSPFragment(hit_id, query_id)
+            frag.molecule_type = "protein"
+            frag.query_start = block["query_start"] - 1
+            frag.query_end = block["query_end"]
+            frag.hit_start = block["hit_start"] - 1
+            frag.hit_end = block["hit_end"]
+            frag.hit = block["hit_seq"]
+            frag.query = block["query_seq"]
+
+            hsp = HSP([frag])
+            hsp.hit_id = hit_id
+            hsp.output_index = output_index
+            hsp.query_id = query_id
+            hsp.hit_description = block["description"]
+            is_included = True  # Should everything should be included?
+            hsp.is_included = is_included
+            hsp.evalue = block["evalue"]
+            hsp.score = block["score"]
+            hsp.prob = block["prob"]
+
+            if hit_id not in hit_dict:
+                hit = Hit([hsp], hit_id)
+                hit.description = block["description"]
+                hit.is_included = is_included
+                hit.evalue = block["evalue"]
+                hit.score = block["score"]
+                hit_dict[hit_id] = hit
+            else:
+                hit_dict[hit_id].append(hsp)
+
+        qresult = QueryResult(hit_dict.values(), query_id)
+        qresult.program = _PROGRAM
+        qresult.seq_len = self.seq_len
+        return [qresult]
diff --git a/code/lib/Bio/SearchIO/HmmerIO/__init__.py b/code/lib/Bio/SearchIO/HmmerIO/__init__.py
new file mode 100644
index 0000000..c243007
--- /dev/null
+++ b/code/lib/Bio/SearchIO/HmmerIO/__init__.py
@@ -0,0 +1,304 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO support for HMMER output formats.
+
+This module adds support for parsing HMMER outputs. HMMER is a
+suite of programs implementing the profile hidden Markov models to find
+similarity across protein sequences.
+
+Bio.SearchIO.HmmerIO was tested on the following HMMER versions and flavors:
+
+    - HMMER3 flavors: hmmscan, hmmsearch, phmmer
+    - HMMER2 flavors: hmmpfam, hmmsearch
+
+More information on HMMER are available through these links:
+  - Web page: http://hmmer.janelia.org/
+  - User guide: ftp://selab.janelia.org/pub/software/hmmer3/3.0/Userguide.pdf
+
+
+Supported formats
+=================
+
+Bio.SearchIO.HmmerIO supports the following HMMER output formats:
+
+    - Plain text, v3.0   - 'hmmer3-text'    - parsing, indexing
+    - Table, v3.0        - 'hmmer3-tab'     - parsing, indexing, writing
+    - Domain table, v3.0 - 'hmmer3-domtab'* - parsing, indexing, writing
+    - Plain text, v2.x   - 'hmmer2-text'    - parsing, indexing
+
+* For the domain table output, due to the way HMMER outputs the sequence
+  coordinates, you have to specify what HMMER flavor produced the output as the
+  file format. So instead of using 'hmmer3-domtab', you have to use either
+  'hmmscan3-domtab', 'hmmsearch3-domtab', or 'phmmer3-domtab' as the file format
+  name.
+
+Note that for all output formats, HMMER uses its own convention of input and
+output coordinates. It does not use the term 'hit' or 'query', instead it
+uses 'hmm' or 'ali'. For example, 'hmmfrom' is the start coordinate of the HMM
+sequence while 'alifrom' is the start coordinate of the protein sequence.
+
+HmmerIO is aware of this different naming scheme and will adjust them
+accordingly to fit SearchIO's object model. If HmmerIO sees that the output file
+to parse was written by hmmsearch or phmmer, all 'hmm' coordinates will be the
+hit coordinates and 'ali' coordinates will be the query coordinates. Conversely,
+if the HMMER flavor is hmmscan, 'hmm' will be query and 'ali' will be hit.
+
+This is why the 'hmmer3-domtab' format has to be specified with the source HMMER
+flavor. The parsers need to know which is the hit and which is the query.
+'hmmer3-text' has its source program information present in the file, while
+'hmmer3-tab' does not output any coordinates. That's why both of these formats
+do not need direct flavor specification like 'hmmer3-domtab'.
+
+Also note that when using the domain table format writers, it will use HMMER's
+naming convention ('hmm' and 'ali') so the files you write will be similar to
+files written by a real HMMER program.
+
+
+hmmer2-text and hmmer3-text
+===========================
+
+The parser for HMMER 3.0 plain text output can parse output files with alignment
+blocks (default) or without (with the '--noali' flag). If the alignment blocks
+are present, you can also parse files with variable alignment width (using the
+'--notextw' or '--textw' flag).
+
+The following SearchIO objects attributes are provided. Rows marked with '*'
+denotes attributes not available in the hmmer2-text format:
+
++-----------------+-------------------------+----------------------------------+
+| Object          | Attribute               | Value                            |
++=================+=========================+==================================+
+| QueryResult     | accession               | accession (if present)           |
+|                 +-------------------------+----------------------------------+
+|                 | description             | query sequence description       |
+|                 +-------------------------+----------------------------------+
+|                 | id                      | query sequence ID                |
+|                 +-------------------------+----------------------------------+
+|                 | program                 | HMMER flavor                     |
+|                 +-------------------------+----------------------------------+
+|                 | seq_len*                | full length of query sequence    |
+|                 +-------------------------+----------------------------------+
+|                 | target                  | target search database           |
+|                 +-------------------------+----------------------------------+
+|                 | version                 | BLAST version                    |
++-----------------+-------------------------+----------------------------------+
+| Hit             | bias*                   | hit-level bias                   |
+|                 +-------------------------+----------------------------------+
+|                 | bitscore                | hit-level score                  |
+|                 +-------------------------+----------------------------------+
+|                 | description             | hit sequence description         |
+|                 +-------------------------+----------------------------------+
+|                 | domain_exp_num*         | expected number of domains in    |
+|                 |                         | the hit (exp column)             |
+|                 +-------------------------+----------------------------------+
+|                 | domain_obs_num          | observed number of domains in    |
+|                 |                         | the hit (N column)               |
+|                 +-------------------------+----------------------------------+
+|                 | evalue                  | hit-level e-value                |
+|                 +-------------------------+----------------------------------+
+|                 | id                      | hit sequence ID                  |
+|                 +-------------------------+----------------------------------+
+|                 | is_included*            | boolean, whether the hit is in   |
+|                 |                         | the inclusion threshold or not   |
++-----------------+-------------------------+----------------------------------+
+| HSP             | acc_avg*                | expected accuracy per alignment  |
+|                 |                         | residue (acc column)             |
+|                 +-------------------------+----------------------------------+
+|                 | bias*                   | hsp-level bias                   |
+|                 +-------------------------+----------------------------------+
+|                 | bitscore                | hsp-level score                  |
+|                 +-------------------------+----------------------------------+
+|                 | domain_index            | the domain index set by HMMER    |
+|                 +-------------------------+----------------------------------+
+|                 | env_end*                | end coordinate of the envelope   |
+|                 +-------------------------+----------------------------------+
+|                 | env_endtype*            | envelope end types (e.g. '[]',   |
+|                 |                         | '..', '[.', etc.)                |
+|                 +-------------------------+----------------------------------+
+|                 | env_start*              | start coordinate of the envelope |
+|                 +-------------------------+----------------------------------+
+|                 | evalue                  | hsp-level independent e-value    |
+|                 +-------------------------+----------------------------------+
+|                 | evalue_cond*            | hsp-level conditional e-value    |
+|                 +-------------------------+----------------------------------+
+|                 | hit_endtype             | hit sequence end types           |
+|                 +-------------------------+----------------------------------+
+|                 | is_included*            | boolean, whether the hit of the  |
+|                 |                         | hsp is in the inclusion          |
+|                 |                         | threshold                        |
+|                 +-------------------------+----------------------------------+
+|                 | query_endtype           | query sequence end types         |
++-----------------+-------------------------+----------------------------------+
+| HSPFragment     | aln_annotation          | alignment similarity string and  |
+| (also via HSP)  |                         | other annotations (e.g. PP, CS)  |
+|                 +-------------------------+----------------------------------+
+|                 | aln_span                | length of alignment fragment     |
+|                 +-------------------------+----------------------------------+
+|                 | hit                     | hit sequence                     |
+|                 +-------------------------+----------------------------------+
+|                 | hit_end                 | hit sequence end coordinate, may |
+|                 |                         | be 'hmmto' or 'alito' depending  |
+|                 |                         | on the HMMER flavor              |
+|                 +-------------------------+----------------------------------+
+|                 | hit_start               | hit sequence start coordinate,   |
+|                 |                         | may be 'hmmfrom' or 'alifrom'    |
+|                 |                         | depending on the HMMER flavor    |
+|                 +-------------------------+----------------------------------+
+|                 | hit_strand              | hit sequence strand              |
+|                 +-------------------------+----------------------------------+
+|                 | query                   | query sequence                   |
+|                 +-------------------------+----------------------------------+
+|                 | query_end               | query sequence end coordinate,   |
+|                 |                         | may be 'hmmto' or 'alito'        |
+|                 |                         | depending on the HMMER flavor    |
+|                 +-------------------------+----------------------------------+
+|                 | query_start             | query sequence start coordinate, |
+|                 |                         | may be 'hmmfrom' or 'alifrom'    |
+|                 |                         | depending on the HMMER flavor    |
+|                 +-------------------------+----------------------------------+
+|                 | query_strand            | query sequence strand            |
++-----------------+-------------------------+----------------------------------+
+
+
+hmmer3-tab
+==========
+The following SearchIO objects attributes are provided:
+
++-----------------+-------------------------+----------------------------------+
+| Object          | Attribute               | Column / Value                   |
++=================+=========================+==================================+
+| QueryResult     | accession               | query accession (if present)     |
+|                 +-------------------------+----------------------------------+
+|                 | description             | query sequence description       |
+|                 +-------------------------+----------------------------------+
+|                 | id                      | query name                       |
++-----------------+-------------------------+----------------------------------+
+| Hit             | accession               | hit accession                    |
+|                 +-------------------------+----------------------------------+
+|                 | bias                    | hit-level bias                   |
+|                 +-------------------------+----------------------------------+
+|                 | bitscore                | hit-level score                  |
+|                 +-------------------------+----------------------------------+
+|                 | description             | hit sequence description         |
+|                 +-------------------------+----------------------------------+
+|                 | cluster_num             | clu column                       |
+|                 +-------------------------+----------------------------------+
+|                 | domain_exp_num          | exp column                       |
+|                 +-------------------------+----------------------------------+
+|                 | domain_included_num     | inc column                       |
+|                 +-------------------------+----------------------------------+
+|                 | domain_obs_num          | dom column                       |
+|                 +-------------------------+----------------------------------+
+|                 | domain_reported_num     | rep column                       |
+|                 +-------------------------+----------------------------------+
+|                 | env_num                 | env column                       |
+|                 +-------------------------+----------------------------------+
+|                 | evalue                  | hit-level evalue                 |
+|                 +-------------------------+----------------------------------+
+|                 | id                      | target name                      |
+|                 +-------------------------+----------------------------------+
+|                 | overlap_num             | ov column                        |
+|                 +-------------------------+----------------------------------+
+|                 | region_num              | reg column                       |
++-----------------+-------------------------+----------------------------------+
+| HSP             | bias                    | bias of the best domain          |
+|                 +-------------------------+----------------------------------+
+|                 | bitscore                | bitscore of the best domain      |
+|                 +-------------------------+----------------------------------+
+|                 | evalue                  | evalue of the best domain        |
++-----------------+-------------------------+----------------------------------+
+
+
+hmmer3-domtab
+=============
+To parse domain table files, you must use the HMMER flavor that produced the
+file. So instead of using 'hmmer3-domtab', use either 'hmmsearch3-domtab',
+'hmmscan3-domtab', or 'phmmer3-domtab'.
+
+The following SearchIO objects attributes are provided:
+
++-----------------+-------------------------+----------------------------------+
+| Object          | Attribute               | Value                            |
++=================+=========================+==================================+
+| QueryResult     | accession               | accession                        |
+|                 +-------------------------+----------------------------------+
+|                 | description             | query sequence description       |
+|                 +-------------------------+----------------------------------+
+|                 | id                      | query sequence ID                |
+|                 +-------------------------+----------------------------------+
+|                 | seq_len                 | full length of query sequence    |
++-----------------+-------------------------+----------------------------------+
+| Hit             | accession               | accession                        |
+|                 +-------------------------+----------------------------------+
+|                 | bias                    | hit-level bias                   |
+|                 +-------------------------+----------------------------------+
+|                 | bitscore                | hit-level score                  |
+|                 +-------------------------+----------------------------------+
+|                 | description             | hit sequence description         |
+|                 +-------------------------+----------------------------------+
+|                 | evalue                  | hit-level e-value                |
+|                 +-------------------------+----------------------------------+
+|                 | id                      | hit sequence ID                  |
+|                 +-------------------------+----------------------------------+
+|                 | seq_len                 | length of hit sequence or HMM    |
++-----------------+-------------------------+----------------------------------+
+| HSP             | acc_avg                 | expected accuracy per alignment  |
+|                 |                         | residue (acc column)             |
+|                 +-------------------------+----------------------------------+
+|                 | bias                    | hsp-level bias                   |
+|                 +-------------------------+----------------------------------+
+|                 | bitscore                | hsp-level score                  |
+|                 +-------------------------+----------------------------------+
+|                 | domain_index            | the domain index set by HMMER    |
+|                 +-------------------------+----------------------------------+
+|                 | env_end                 | end coordinate of the envelope   |
+|                 +-------------------------+----------------------------------+
+|                 | env_start               | start coordinate of the envelope |
+|                 +-------------------------+----------------------------------+
+|                 | evalue                  | hsp-level independent e-value    |
+|                 +-------------------------+----------------------------------+
+|                 | evalue_cond             | hsp-level conditional e-value    |
++-----------------+-------------------------+----------------------------------+
+| HSPFragment     | hit_end                 | hit sequence end coordinate, may |
+| (also via HSP)  |                         | be 'hmmto' or 'alito' depending  |
+|                 |                         | on the HMMER flavor              |
+|                 +-------------------------+----------------------------------+
+|                 | hit_start               | hit sequence start coordinate,   |
+|                 |                         | may be 'hmmfrom' or 'alifrom'    |
+|                 |                         | depending on the HMMER flavor    |
+|                 +-------------------------+----------------------------------+
+|                 | hit_strand              | hit sequence strand              |
+|                 +-------------------------+----------------------------------+
+|                 | query_end               | query sequence end coordinate,   |
+|                 |                         | may be 'hmmto' or 'alito'        |
+|                 |                         | depending on the HMMER flavor    |
+|                 +-------------------------+----------------------------------+
+|                 | query_start             | query sequence start coordinate, |
+|                 |                         | may be 'hmmfrom' or 'alifrom'    |
+|                 |                         | depending on the HMMER flavor    |
+|                 +-------------------------+----------------------------------+
+|                 | query_strand            | query sequence strand            |
++-----------------+-------------------------+----------------------------------+
+
+"""
+
+from .hmmer2_text import Hmmer2TextParser, Hmmer2TextIndexer
+from .hmmer3_domtab import (
+    Hmmer3DomtabParser,
+    Hmmer3DomtabHmmhitParser,
+    Hmmer3DomtabHmmqueryParser,
+)
+from .hmmer3_domtab import Hmmer3DomtabHmmhitIndexer, Hmmer3DomtabHmmqueryIndexer
+from .hmmer3_domtab import Hmmer3DomtabHmmhitWriter, Hmmer3DomtabHmmqueryWriter
+from .hmmer3_text import Hmmer3TextParser, Hmmer3TextIndexer
+from .hmmer3_tab import Hmmer3TabParser, Hmmer3TabIndexer, Hmmer3TabWriter
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/HmmerIO/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SearchIO/HmmerIO/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..9eef1d6
Binary files /dev/null and b/code/lib/Bio/SearchIO/HmmerIO/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/HmmerIO/__pycache__/_base.cpython-37.pyc b/code/lib/Bio/SearchIO/HmmerIO/__pycache__/_base.cpython-37.pyc
new file mode 100644
index 0000000..4c49c25
Binary files /dev/null and b/code/lib/Bio/SearchIO/HmmerIO/__pycache__/_base.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer2_text.cpython-37.pyc b/code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer2_text.cpython-37.pyc
new file mode 100644
index 0000000..e375cb5
Binary files /dev/null and b/code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer2_text.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer3_domtab.cpython-37.pyc b/code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer3_domtab.cpython-37.pyc
new file mode 100644
index 0000000..0b4dcf2
Binary files /dev/null and b/code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer3_domtab.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer3_tab.cpython-37.pyc b/code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer3_tab.cpython-37.pyc
new file mode 100644
index 0000000..4f988c7
Binary files /dev/null and b/code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer3_tab.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer3_text.cpython-37.pyc b/code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer3_text.cpython-37.pyc
new file mode 100644
index 0000000..f47f107
Binary files /dev/null and b/code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer3_text.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/HmmerIO/_base.py b/code/lib/Bio/SearchIO/HmmerIO/_base.py
new file mode 100644
index 0000000..3c20ad7
--- /dev/null
+++ b/code/lib/Bio/SearchIO/HmmerIO/_base.py
@@ -0,0 +1,45 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO base classes for HMMER-related code."""
+
+from Bio.SearchIO._index import SearchIndexer
+
+
+class _BaseHmmerTextIndexer(SearchIndexer):
+    """Base indexer class for HMMER plain text output."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._preamble = b""
+
+    def get_raw(self, offset):
+        """Return the raw record from the file as a bytes string."""
+        handle = self._handle
+        qresult_raw = b""
+
+        # read header first
+        if not self._preamble:
+            handle.seek(0)
+            while True:
+                line = handle.readline()
+                if line.startswith(self.qresult_start):
+                    break
+                qresult_raw += line
+        else:
+            qresult_raw += self._preamble
+
+        # and read the qresult raw string
+        handle.seek(offset)
+        while True:
+            # preserve whitespace, don't use read_forward
+            line = handle.readline()
+            qresult_raw += line
+
+            # break when we've reached qresult end
+            if line.startswith(self.qresult_end) or not line:
+                break
+
+        return qresult_raw
diff --git a/code/lib/Bio/SearchIO/HmmerIO/hmmer2_text.py b/code/lib/Bio/SearchIO/HmmerIO/hmmer2_text.py
new file mode 100644
index 0000000..f28ec22
--- /dev/null
+++ b/code/lib/Bio/SearchIO/HmmerIO/hmmer2_text.py
@@ -0,0 +1,374 @@
+# Copyright 2012 by Kai Blin.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO parser for HMMER 2 text output."""
+
+import re
+
+from Bio.SearchIO._utils import read_forward
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+
+from ._base import _BaseHmmerTextIndexer
+
+__all__ = ("Hmmer2TextParser", "Hmmer2TextIndexer")
+
+
+_HSP_ALIGN_LINE = re.compile(r"(\S+):\s+domain (\d+) of (\d+)")
+
+
+class _HitPlaceholder:
+    def createHit(self, hsp_list):
+        hit = Hit(hsp_list)
+        hit.id_ = self.id_
+        hit.evalue = self.evalue
+        hit.bitscore = self.bitscore
+        if self.description:
+            hit.description = self.description
+        hit.domain_obs_num = self.domain_obs_num
+        return hit
+
+
+class Hmmer2TextParser:
+    """Iterator for the HMMER 2.0 text output."""
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self.handle = handle
+        self.buf = []
+        self._meta = self.parse_preamble()
+
+    def __iter__(self):
+        """Iterate over Hmmer2TextParser, yields query results."""
+        for qresult in self.parse_qresult():
+            qresult.program = self._meta.get("program")
+            qresult.target = self._meta.get("target")
+            qresult.version = self._meta.get("version")
+            yield qresult
+
+    def read_next(self, rstrip=True):
+        """Return the next non-empty line, trailing whitespace removed."""
+        if len(self.buf) > 0:
+            return self.buf.pop()
+        self.line = self.handle.readline()
+        while self.line and rstrip and not self.line.strip():
+            self.line = self.handle.readline()
+        if self.line:
+            if rstrip:
+                self.line = self.line.rstrip()
+        return self.line
+
+    def push_back(self, line):
+        """Un-read a line that should not be parsed yet."""
+        self.buf.append(line)
+
+    def parse_key_value(self):
+        """Parse key-value pair separated by colon."""
+        key, value = self.line.split(":", 1)
+        return key.strip(), value.strip()
+
+    def parse_preamble(self):
+        """Parse HMMER2 preamble."""
+        meta = {}
+        state = "GENERIC"
+        while self.read_next():
+            if state == "GENERIC":
+                if self.line.startswith("hmm"):
+                    meta["program"] = self.line.split("-")[0].strip()
+                elif self.line.startswith("HMMER is"):
+                    continue
+                elif self.line.startswith("HMMER"):
+                    meta["version"] = self.line.split()[1]
+                elif self.line.count("-") == 36:
+                    state = "OPTIONS"
+                continue
+
+            assert state == "OPTIONS"
+            assert "program" in meta
+
+            if self.line.count("-") == 32:
+                break
+
+            key, value = self.parse_key_value()
+            if meta["program"] == "hmmsearch":
+                if key == "Sequence database":
+                    meta["target"] = value
+                    continue
+            elif meta["program"] == "hmmpfam":
+                if key == "HMM file":
+                    meta["target"] = value
+                    continue
+            meta[key] = value
+
+        return meta
+
+    def parse_qresult(self):
+        """Parse a HMMER2 query block."""
+        while self.read_next():
+            if not self.line.startswith("Query"):
+                return
+            _, id_ = self.parse_key_value()
+            self.qresult = QueryResult(id=id_)
+
+            description = None
+
+            while self.read_next() and not self.line.startswith("Scores"):
+                if self.line.startswith("Accession"):
+                    self.qresult.accession = self.parse_key_value()[1]
+                if self.line.startswith("Description"):
+                    description = self.parse_key_value()[1]
+
+            hit_placeholders = self.parse_hits()
+            if len(hit_placeholders) > 0:
+                self.parse_hsps(hit_placeholders)
+                self.parse_hsp_alignments()
+
+            while not self.line.startswith("Query"):
+                self.read_next()
+                if not self.line:
+                    break
+            self.buf.append(self.line)
+
+            if description is not None:
+                self.qresult.description = description
+            yield self.qresult
+
+    def parse_hits(self):
+        """Parse a HMMER2 hit block, beginning with the hit table."""
+        hit_placeholders = []
+        while self.read_next():
+            if self.line.startswith("Parsed"):
+                break
+            if self.line.find("no hits") > -1:
+                break
+
+            if (
+                self.line.startswith("Sequence")
+                or self.line.startswith("Model")
+                or self.line.startswith("-------- ")
+            ):
+                continue
+
+            fields = self.line.split()
+            id_ = fields.pop(0)
+            domain_obs_num = int(fields.pop())
+            evalue = float(fields.pop())
+            bitscore = float(fields.pop())
+            description = " ".join(fields).strip()
+
+            hit = _HitPlaceholder()
+            hit.id_ = id_
+            hit.evalue = evalue
+            hit.bitscore = bitscore
+            hit.description = description
+            hit.domain_obs_num = domain_obs_num
+            hit_placeholders.append(hit)
+
+        return hit_placeholders
+
+    def parse_hsps(self, hit_placeholders):
+        """Parse a HMMER2 hsp block, beginning with the hsp table."""
+        # HSPs may occur in different order than the hits
+        # so store Hit objects separately first
+        unordered_hits = {}
+        while self.read_next():
+            if (
+                self.line.startswith("Alignments")
+                or self.line.startswith("Histogram")
+                or self.line == "//"
+            ):
+                break
+            if (
+                self.line.startswith("Model")
+                or self.line.startswith("Sequence")
+                or self.line.startswith("--------")
+            ):
+                continue
+
+            (
+                id_,
+                domain,
+                seq_f,
+                seq_t,
+                seq_compl,
+                hmm_f,
+                hmm_t,
+                hmm_compl,
+                score,
+                evalue,
+            ) = self.line.split()
+
+            frag = HSPFragment(id_, self.qresult.id)
+            frag.molecule_type = "protein"
+            if self._meta["program"] == "hmmpfam":
+                frag.hit_start = int(hmm_f) - 1
+                frag.hit_end = int(hmm_t)
+                frag.query_start = int(seq_f) - 1
+                frag.query_end = int(seq_t)
+            elif self._meta["program"] == "hmmsearch":
+                frag.query_start = int(hmm_f) - 1
+                frag.query_end = int(hmm_t)
+                frag.hit_start = int(seq_f) - 1
+                frag.hit_end = int(seq_t)
+
+            hsp = HSP([frag])
+            hsp.evalue = float(evalue)
+            hsp.bitscore = float(score)
+            hsp.domain_index = int(domain.split("/")[0])
+            if self._meta["program"] == "hmmpfam":
+                hsp.hit_endtype = hmm_compl
+                hsp.query_endtype = seq_compl
+            elif self._meta["program"] == "hmmsearch":
+                hsp.query_endtype = hmm_compl
+                hsp.hit_endtype = seq_compl
+
+            if id_ not in unordered_hits:
+                placeholder = [p for p in hit_placeholders if p.id_ == id_][0]
+                hit = placeholder.createHit([hsp])
+                unordered_hits[id_] = hit
+            else:
+                hit = unordered_hits[id_]
+                hsp.hit_description = hit.description
+                hit.append(hsp)
+
+        # The placeholder list is in the correct order, so use that order for
+        # the Hit objects in the qresult
+        for p in hit_placeholders:
+            self.qresult.append(unordered_hits[p.id_])
+
+    def parse_hsp_alignments(self):
+        """Parse a HMMER2 HSP alignment block."""
+        if not self.line.startswith("Alignments"):
+            return
+
+        while self.read_next():
+            if self.line == "//" or self.line.startswith("Histogram"):
+                break
+
+            match = re.search(_HSP_ALIGN_LINE, self.line)
+            if match is None:
+                continue
+
+            id_ = match.group(1)
+            idx = int(match.group(2))
+            num = int(match.group(3))
+
+            hit = self.qresult[id_]
+            if hit.domain_obs_num != num:
+                continue
+
+            frag = hit[idx - 1][0]
+
+            hmmseq = ""
+            consensus = ""
+            otherseq = ""
+            structureseq = ""
+            pad = 0
+            while self.read_next() and self.line.startswith(" "):
+                # if there's structure information, parse that
+                if self.line[16:18] == "CS":
+                    structureseq += self.line[19:].strip()
+
+                    if not self.read_next():
+                        break
+
+                # skip the *-> start marker if it exists
+                if self.line[19:22] == "*->":
+                    seq = self.line[22:]
+                    pad = 3
+                else:
+                    seq = self.line[19:]
+                    pad = 0
+
+                hmmseq += seq
+                line_len = len(seq)
+                if not self.read_next(rstrip=False):
+                    break
+                consensus += self.line[19 + pad : 19 + pad + line_len]
+                # If there's no consensus sequence, hmmer2 doesn't
+                # bother to put spaces here, so add extra padding
+                extra_padding = len(hmmseq) - len(consensus)
+                consensus += " " * extra_padding
+
+                if not self.read_next():
+                    break
+
+                # if we have a line break in the end marker, we get a
+                # whitespace-only otherseq line, making split()[0] return
+                # the end coordinate. That'll be a -, which is a valid character
+                # in the sequence, meaning we can't just strip it.
+                parts = self.line[19:].split()
+                if len(parts) == 2:
+                    otherseq += self.line[19:].split()[0].strip()
+
+            self.push_back(self.line)
+
+            # get rid of the end marker
+            if hmmseq.endswith("<-*"):
+                hmmseq = hmmseq[:-3]
+                consensus = consensus[:-3]
+
+            # add similarity sequence to annotation
+            frag.aln_annotation["similarity"] = consensus
+
+            # if there's structure information, add it to the fragment
+            if structureseq:
+                frag.aln_annotation["CS"] = structureseq
+
+            if self._meta["program"] == "hmmpfam":
+                frag.hit = hmmseq
+                frag.query = otherseq
+            else:
+                frag.hit = otherseq
+                frag.query = hmmseq
+
+
+class Hmmer2TextIndexer(_BaseHmmerTextIndexer):
+    """Indexer for hmmer2-text format."""
+
+    _parser = Hmmer2TextParser
+    qresult_start = b"Query"
+    # qresults_ends for hmmpfam and hmmsearch
+    # need to anticipate both since hmmsearch have different query end mark
+    qresult_end = b"//"
+
+    def __iter__(self):
+        """Iterate over Hmmer2TextIndexer; yields query results' key, offsets, 0."""
+        handle = self._handle
+        handle.seek(0)
+        start_offset = handle.tell()
+        regex_id = re.compile(br"Query\s*(?:sequence|HMM)?:\s*(.*)")
+
+        # determine flag for hmmsearch
+        is_hmmsearch = False
+        line = read_forward(handle)
+        if line.startswith(b"hmmsearch"):
+            is_hmmsearch = True
+
+        while True:
+            end_offset = handle.tell()
+
+            if line.startswith(self.qresult_start):
+                regx = re.search(regex_id, line)
+                qresult_key = regx.group(1).strip()
+                # qresult start offset is the offset of this line
+                # (starts with the start mark)
+                start_offset = end_offset - len(line)
+            elif line.startswith(self.qresult_end):
+                yield qresult_key.decode(), start_offset, 0
+                start_offset = end_offset
+            elif not line:
+                # HACK: since hmmsearch can only have one query result
+                if is_hmmsearch:
+                    yield qresult_key.decode(), start_offset, 0
+                break
+
+            line = read_forward(handle)
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/HmmerIO/hmmer3_domtab.py b/code/lib/Bio/SearchIO/HmmerIO/hmmer3_domtab.py
new file mode 100644
index 0000000..514106a
--- /dev/null
+++ b/code/lib/Bio/SearchIO/HmmerIO/hmmer3_domtab.py
@@ -0,0 +1,375 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO parser for HMMER domain table output format."""
+
+from itertools import chain
+
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+
+from .hmmer3_tab import Hmmer3TabParser, Hmmer3TabIndexer
+
+__all__ = (
+    "Hmmer3DomtabHmmhitParser",
+    "Hmmer3DomtabHmmqueryParser",
+    "Hmmer3DomtabHmmhitIndexer",
+    "Hmmer3DomtabHmmqueryIndexer",
+    "Hmmer3DomtabHmmhitWriter",
+    "Hmmer3DomtabHmmqueryWriter",
+)
+
+
+class Hmmer3DomtabParser(Hmmer3TabParser):
+    """Base hmmer3-domtab iterator."""
+
+    def _parse_row(self):
+        """Return a dictionary of parsed row values (PRIVATE)."""
+        assert self.line
+        cols = [x for x in self.line.strip().split(" ") if x]
+        # if len(cols) > 23, we have extra description columns
+        # combine them all into one string in the 19th column
+        if len(cols) > 23:
+            cols[22] = " ".join(cols[22:])
+        elif len(cols) < 23:
+            cols.append("")
+            assert len(cols) == 23
+
+        # assign parsed column data into qresult, hit, and hsp dicts
+        qresult = {}
+        qresult["id"] = cols[3]  # query name
+        qresult["accession"] = cols[4]  # query accession
+        qresult["seq_len"] = int(cols[5])  # qlen
+        hit = {}
+        hit["id"] = cols[0]  # target name
+        hit["accession"] = cols[1]  # target accession
+        hit["seq_len"] = int(cols[2])  # tlen
+        hit["evalue"] = float(cols[6])  # evalue
+        hit["bitscore"] = float(cols[7])  # score
+        hit["bias"] = float(cols[8])  # bias
+        hit["description"] = cols[22]  # description of target
+        hsp = {}
+        hsp["domain_index"] = int(cols[9])  # # (domain number)
+        # not parsing cols[10] since it's basically len(hit)
+        hsp["evalue_cond"] = float(cols[11])  # c-evalue
+        hsp["evalue"] = float(cols[12])  # i-evalue
+        hsp["bitscore"] = float(cols[13])  # score
+        hsp["bias"] = float(cols[14])  # bias
+        hsp["env_start"] = int(cols[19]) - 1  # env from
+        hsp["env_end"] = int(cols[20])  # env to
+        hsp["acc_avg"] = float(cols[21])  # acc
+        frag = {}
+        # strand is always 0, since HMMER now only handles protein
+        frag["hit_strand"] = frag["query_strand"] = 0
+        frag["hit_start"] = int(cols[15]) - 1  # hmm from
+        frag["hit_end"] = int(cols[16])  # hmm to
+        frag["query_start"] = int(cols[17]) - 1  # ali from
+        frag["query_end"] = int(cols[18])  # ali to
+        # HMMER results are always protein
+        frag["molecule_type"] = "protein"
+
+        # switch hmm<-->ali coordinates if hmm is not hit
+        if not self.hmm_as_hit:
+            frag["hit_end"], frag["query_end"] = (frag["query_end"], frag["hit_end"])
+            frag["hit_start"], frag["query_start"] = (
+                frag["query_start"],
+                frag["hit_start"],
+            )
+
+        return {"qresult": qresult, "hit": hit, "hsp": hsp, "frag": frag}
+
+    def _parse_qresult(self):
+        """Return QueryResult objects (PRIVATE)."""
+        # state values, determines what to do for each line
+        state_EOF = 0
+        state_QRES_NEW = 1
+        state_QRES_SAME = 3
+        state_HIT_NEW = 2
+        state_HIT_SAME = 4
+        # dummies for initial states
+        qres_state = None
+        hit_state = None
+        file_state = None
+        # dummies for initial id caches
+        prev_qid = None
+        prev_hid = None
+        # dummies for initial parsed value containers
+        cur, prev = None, None
+        hit_list, hsp_list = [], []
+        cur_qid = None
+        cur_hid = None
+        while True:
+            # store previous line's parsed values, for every line after the 1st
+            if cur is not None:
+                prev = cur
+                prev_qid = cur_qid
+                prev_hid = cur_hid
+            # only parse the line if it's not EOF
+            if self.line and not self.line.startswith("#"):
+                cur = self._parse_row()
+                cur_qid = cur["qresult"]["id"]
+                cur_hid = cur["hit"]["id"]
+            else:
+                file_state = state_EOF
+                # mock ID values since the line is empty
+                cur_qid, cur_hid = None, None
+
+            # get the state of hit and qresult
+            if prev_qid != cur_qid:
+                qres_state = state_QRES_NEW
+            else:
+                qres_state = state_QRES_SAME
+            # new hits are hits with different ids or hits in a new qresult
+            if prev_hid != cur_hid or qres_state == state_QRES_NEW:
+                hit_state = state_HIT_NEW
+            else:
+                hit_state = state_HIT_SAME
+
+            # start creating objects after the first line (i.e. prev is filled)
+            if prev is not None:
+                # each line is basically an HSP with one HSPFragment
+                frag = HSPFragment(prev_hid, prev_qid)
+                for attr, value in prev["frag"].items():
+                    setattr(frag, attr, value)
+                hsp = HSP([frag])
+                for attr, value in prev["hsp"].items():
+                    setattr(hsp, attr, value)
+                hsp_list.append(hsp)
+
+                # create hit object when we've finished parsing all its hsps
+                # i.e. when hit state is state_HIT_NEW
+                if hit_state == state_HIT_NEW:
+                    hit = Hit(hsp_list)
+                    for attr, value in prev["hit"].items():
+                        setattr(hit, attr, value)
+                    hit_list.append(hit)
+                    hsp_list = []
+
+                # create qresult and yield if we're at a new qresult or EOF
+                if qres_state == state_QRES_NEW or file_state == state_EOF:
+                    qresult = QueryResult(hit_list, prev_qid)
+                    for attr, value in prev["qresult"].items():
+                        setattr(qresult, attr, value)
+                    yield qresult
+                    # if current line is EOF, break
+                    if file_state == state_EOF:
+                        break
+                    hit_list = []
+
+            self.line = self.handle.readline()
+
+
+class Hmmer3DomtabHmmhitParser(Hmmer3DomtabParser):
+    """HMMER domain table parser using hit coordinates.
+
+    Parser for the HMMER domain table format that assumes HMM profile
+    coordinates are hit coordinates.
+    """
+
+    hmm_as_hit = True
+
+
+class Hmmer3DomtabHmmqueryParser(Hmmer3DomtabParser):
+    """HMMER domain table parser using query coordinates.
+
+    Parser for the HMMER domain table format that assumes HMM profile
+    coordinates are query coordinates.
+    """
+
+    hmm_as_hit = False
+
+
+class Hmmer3DomtabHmmhitIndexer(Hmmer3TabIndexer):
+    """HMMER domain table indexer using hit coordinates.
+
+    Indexer class for HMMER domain table output that assumes HMM profile
+    coordinates are hit coordinates.
+    """
+
+    _parser = Hmmer3DomtabHmmhitParser
+    _query_id_idx = 3
+
+
+class Hmmer3DomtabHmmqueryIndexer(Hmmer3TabIndexer):
+    """HMMER domain table indexer using query coordinates.
+
+    Indexer class for HMMER domain table output that assumes HMM profile
+    coordinates are query coordinates.
+    """
+
+    _parser = Hmmer3DomtabHmmqueryParser
+    _query_id_idx = 3
+
+
+class Hmmer3DomtabHmmhitWriter:
+    """HMMER domain table writer using hit coordinates.
+
+    Writer for hmmer3-domtab output format which writes hit coordinates
+    as HMM profile coordinates.
+    """
+
+    hmm_as_hit = True
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self.handle = handle
+
+    def write_file(self, qresults):
+        """Write to the handle.
+
+        Returns a tuple of how many QueryResult, Hit, and HSP objects were written.
+
+        """
+        handle = self.handle
+        qresult_counter, hit_counter, hsp_counter, frag_counter = 0, 0, 0, 0
+
+        try:
+            first_qresult = next(qresults)
+        except StopIteration:
+            handle.write(self._build_header())
+        else:
+            # write header
+            handle.write(self._build_header(first_qresult))
+            # and then the qresults
+            for qresult in chain([first_qresult], qresults):
+                if qresult:
+                    handle.write(self._build_row(qresult))
+                    qresult_counter += 1
+                    hit_counter += len(qresult)
+                    hsp_counter += sum(len(hit) for hit in qresult)
+                    frag_counter += sum(len(hit.fragments) for hit in qresult)
+
+        return qresult_counter, hit_counter, hsp_counter, frag_counter
+
+    def _build_header(self, first_qresult=None):
+        """Return the header string of a domain HMMER table output (PRIVATE)."""
+        # calculate whitespace required
+        # adapted from HMMER's source: src/p7_tophits.c#L1157
+        if first_qresult:
+            # qnamew = max(20, len(first_qresult.id))
+            qnamew = 20
+            tnamew = max(20, len(first_qresult[0].id))
+            try:
+                qaccw = max(10, len(first_qresult.acc))
+                taccw = max(10, len(first_qresult[0].acc))
+            except AttributeError:
+                qaccw, taccw = 10, 10
+        else:
+            qnamew, tnamew, qaccw, taccw = 20, 20, 10, 10
+        # Turn black code style off
+        # fmt: off
+        header = ("#%*s %22s %40s %11s %11s %11s\n"
+                  % (tnamew + qnamew - 1 + 15 + taccw + qaccw, "", "--- full sequence ---",
+                     "-------------- this domain -------------", "hmm coord",
+                     "ali coord", "env coord"))
+        header += ("#%-*s %-*s %5s %-*s %-*s %5s %9s %6s %5s %3s %3s %9s "
+                   "%9s %6s %5s %5s %5s %5s %5s %5s %5s %4s %s\n"
+                   % (tnamew - 1,
+                      " target name", taccw, "accession", "tlen", qnamew,
+                      "query name", qaccw, "accession", "qlen", "E-value", "score",
+                      "bias", "#", "of", "c-Evalue", "i-Evalue", "score", "bias",
+                      "from", "to", "from", "to", "from", "to", "acc",
+                      "description of target"))
+        header += ("#%*s %*s %5s %*s %*s %5s %9s %6s %5s %3s %3s %9s %9s "
+                   "%6s %5s %5s %5s %5s %5s %5s %5s %4s %s\n"
+                   % (tnamew - 1,
+                      "-------------------", taccw, "----------", "-----",
+                      qnamew, "--------------------", qaccw, "----------",
+                      "-----", "---------", "------", "-----", "---", "---",
+                      "---------", "---------", "------", "-----", "-----", "-----",
+                      "-----", "-----", "-----", "-----", "----",
+                      "---------------------"))
+        # Turn black code style on
+        # fmt: on
+        return header
+
+    def _build_row(self, qresult):
+        """Return a string or one row or more of the QueryResult object (PRIVATE)."""
+        rows = ""
+
+        # calculate whitespace required
+        # adapted from HMMER's source: src/p7_tophits.c#L1083
+        qnamew = max(20, len(qresult.id))
+        tnamew = max(20, len(qresult[0].id))
+        try:
+            qaccw = max(10, len(qresult.accession))
+            taccw = max(10, len(qresult[0].accession))
+            qresult_acc = qresult.accession
+        except AttributeError:
+            qaccw, taccw = 10, 10
+            qresult_acc = "-"
+
+        for hit in qresult:
+
+            # try to get hit accession
+            try:
+                hit_acc = hit.accession
+            except AttributeError:
+                hit_acc = "-"
+
+            for hsp in hit.hsps:
+                if self.hmm_as_hit:
+                    hmm_to = hsp.hit_end
+                    hmm_from = hsp.hit_start + 1
+                    ali_to = hsp.query_end
+                    ali_from = hsp.query_start + 1
+                else:
+                    hmm_to = hsp.query_end
+                    hmm_from = hsp.query_start + 1
+                    ali_to = hsp.hit_end
+                    ali_from = hsp.hit_start + 1
+
+                rows += (
+                    "%-*s %-*s %5d %-*s %-*s %5d %9.2g %6.1f %5.1f %3d"
+                    " %3d %9.2g %9.2g %6.1f %5.1f %5d %5d %5ld %5ld"
+                    " %5d %5d %4.2f %s\n"
+                    % (
+                        tnamew,
+                        hit.id,
+                        taccw,
+                        hit_acc,
+                        hit.seq_len,
+                        qnamew,
+                        qresult.id,
+                        qaccw,
+                        qresult_acc,
+                        qresult.seq_len,
+                        hit.evalue,
+                        hit.bitscore,
+                        hit.bias,
+                        hsp.domain_index,
+                        len(hit.hsps),
+                        hsp.evalue_cond,
+                        hsp.evalue,
+                        hsp.bitscore,
+                        hsp.bias,
+                        hmm_from,
+                        hmm_to,
+                        ali_from,
+                        ali_to,
+                        hsp.env_start + 1,
+                        hsp.env_end,
+                        hsp.acc_avg,
+                        hit.description,
+                    )
+                )
+
+        return rows
+
+
+class Hmmer3DomtabHmmqueryWriter(Hmmer3DomtabHmmhitWriter):
+    """HMMER domain table writer using query coordinates.
+
+    Writer for hmmer3-domtab output format which writes query coordinates
+    as HMM profile coordinates.
+    """
+
+    hmm_as_hit = False
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/HmmerIO/hmmer3_tab.py b/code/lib/Bio/SearchIO/HmmerIO/hmmer3_tab.py
new file mode 100644
index 0000000..a380732
--- /dev/null
+++ b/code/lib/Bio/SearchIO/HmmerIO/hmmer3_tab.py
@@ -0,0 +1,335 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO parser for HMMER table output format."""
+
+from itertools import chain
+
+from Bio.SearchIO._index import SearchIndexer
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+
+
+__all__ = ("Hmmer3TabParser", "Hmmer3TabIndexer", "Hmmer3TabWriter")
+
+
+class Hmmer3TabParser:
+    """Parser for the HMMER table format."""
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self.handle = handle
+        self.line = self.handle.readline()
+
+    def __iter__(self):
+        """Iterate over Hmmer3TabParser, yields query results."""
+        header_mark = "#"
+        # read through the header if it exists
+        while self.line.startswith(header_mark):
+            self.line = self.handle.readline()
+        # if we have result rows, parse it
+        if self.line:
+            yield from self._parse_qresult()
+
+    def _parse_row(self):
+        """Return a dictionary of parsed row values (PRIVATE)."""
+        cols = [x for x in self.line.strip().split(" ") if x]
+        if len(cols) < 18:
+            raise ValueError("Less columns than expected, only %i" % len(cols))
+        # if len(cols) > 19, we have extra description columns
+        # combine them all into one string in the 19th column
+        cols[18] = " ".join(cols[18:])
+
+        # assign parsed column data into qresult, hit, and hsp dicts
+        qresult = {}
+        qresult["id"] = cols[2]  # query name
+        qresult["accession"] = cols[3]  # query accession
+        hit = {}
+        hit["id"] = cols[0]  # target name
+        hit["accession"] = cols[1]  # target accession
+        hit["evalue"] = float(cols[4])  # evalue (full sequence)
+        hit["bitscore"] = float(cols[5])  # score (full sequence)
+        hit["bias"] = float(cols[6])  # bias (full sequence)
+        hit["domain_exp_num"] = float(cols[10])  # exp
+        hit["region_num"] = int(cols[11])  # reg
+        hit["cluster_num"] = int(cols[12])  # clu
+        hit["overlap_num"] = int(cols[13])  # ov
+        hit["env_num"] = int(cols[14])  # env
+        hit["domain_obs_num"] = int(cols[15])  # dom
+        hit["domain_reported_num"] = int(cols[16])  # rep
+        hit["domain_included_num"] = int(cols[17])  # inc
+        hit["description"] = cols[18]  # description of target
+        hsp = {}
+        hsp["evalue"] = float(cols[7])  # evalue (best 1 domain)
+        hsp["bitscore"] = float(cols[8])  # score (best 1 domain)
+        hsp["bias"] = float(cols[9])  # bias (best 1 domain)
+        # strand is always 0, since HMMER now only handles protein
+        frag = {}
+        frag["hit_strand"] = frag["query_strand"] = 0
+        frag["molecule_type"] = "protein"
+
+        return {"qresult": qresult, "hit": hit, "hsp": hsp, "frag": frag}
+
+    def _parse_qresult(self):
+        """Return QueryResult objects (PRIVATE)."""
+        # state values, determines what to do for each line
+        state_EOF = 0
+        state_QRES_NEW = 1
+        state_QRES_SAME = 3
+        # initial value dummies
+        qres_state = None
+        file_state = None
+        prev_qid = None
+        cur, prev = None, None
+        # container for Hit objects, used to create QueryResult
+        hit_list = []
+        cur_qid = None
+        while True:
+            # store previous line's parsed values for all lines after the first
+            if cur is not None:
+                prev = cur
+                prev_qid = cur_qid
+            # only parse the result row if it's not EOF
+            # NOTE: we are not parsing the extra '#' lines appended to the end
+            # of hmmer31b1 tabular results since storing them in qresult
+            # objects means we can not do a single-pass parsing
+            if self.line and not self.line.startswith("#"):
+                cur = self._parse_row()
+                cur_qid = cur["qresult"]["id"]
+            else:
+                file_state = state_EOF
+                # mock value for cur_qid, since we have nothing to parse
+                cur_qid = None
+
+            if prev_qid != cur_qid:
+                qres_state = state_QRES_NEW
+            else:
+                qres_state = state_QRES_SAME
+
+            if prev is not None:
+                # since domain tab formats only have 1 Hit per line
+                # we always create HSPFragment, HSP, and Hit per line
+                prev_hid = prev["hit"]["id"]
+
+                # create fragment and HSP and set their attributes
+                frag = HSPFragment(prev_hid, prev_qid)
+                for attr, value in prev["frag"].items():
+                    setattr(frag, attr, value)
+                hsp = HSP([frag])
+                for attr, value in prev["hsp"].items():
+                    setattr(hsp, attr, value)
+
+                # create Hit and set its attributes
+                hit = Hit([hsp])
+                for attr, value in prev["hit"].items():
+                    setattr(hit, attr, value)
+                hit_list.append(hit)
+
+                # create qresult and yield if we're at a new qresult or at EOF
+                if qres_state == state_QRES_NEW or file_state == state_EOF:
+                    qresult = QueryResult(hit_list, prev_qid)
+                    for attr, value in prev["qresult"].items():
+                        setattr(qresult, attr, value)
+                    yield qresult
+                    # if we're at EOF, break
+                    if file_state == state_EOF:
+                        break
+                    hit_list = []
+
+            self.line = self.handle.readline()
+
+
+class Hmmer3TabIndexer(SearchIndexer):
+    """Indexer class for HMMER table output."""
+
+    _parser = Hmmer3TabParser
+    # denotes column location for query identifier
+    _query_id_idx = 2
+
+    def __iter__(self):
+        """Iterate over the file handle; yields key, start offset, and length."""
+        handle = self._handle
+        handle.seek(0)
+        query_id_idx = self._query_id_idx
+        qresult_key = None
+        header_mark = b"#"
+        split_mark = b" "
+        # set line with initial mock value, to emulate header
+        line = header_mark
+
+        # read through header
+        while line.startswith(header_mark):
+            start_offset = handle.tell()
+            line = handle.readline()
+
+        # and index the qresults
+        while True:
+            end_offset = handle.tell()
+
+            if not line:
+                break
+
+            cols = [x for x in line.strip().split(split_mark) if x]
+            if qresult_key is None:
+                qresult_key = cols[query_id_idx]
+            else:
+                curr_key = cols[query_id_idx]
+
+                if curr_key != qresult_key:
+                    adj_end = end_offset - len(line)
+                    yield (qresult_key.decode(), start_offset, adj_end - start_offset)
+                    qresult_key = curr_key
+                    start_offset = adj_end
+
+            line = handle.readline()
+            if not line:
+                yield (qresult_key.decode(), start_offset, end_offset - start_offset)
+                break
+
+    def get_raw(self, offset):
+        """Return the raw bytes string of a QueryResult object from the given offset."""
+        handle = self._handle
+        handle.seek(offset)
+        query_id_idx = self._query_id_idx
+        qresult_key = None
+        qresult_raw = b""
+        split_mark = b" "
+
+        while True:
+            line = handle.readline()
+            if not line:
+                break
+            cols = [x for x in line.strip().split(split_mark) if x]
+            if qresult_key is None:
+                qresult_key = cols[query_id_idx]
+            else:
+                curr_key = cols[query_id_idx]
+                if curr_key != qresult_key:
+                    break
+            qresult_raw += line
+
+        return qresult_raw
+
+
+class Hmmer3TabWriter:
+    """Writer for hmmer3-tab output format."""
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self.handle = handle
+
+    def write_file(self, qresults):
+        """Write to the handle.
+
+        Returns a tuple of how many QueryResult, Hit, and HSP objects were written.
+
+        """
+        handle = self.handle
+        qresult_counter, hit_counter, hsp_counter, frag_counter = 0, 0, 0, 0
+
+        try:
+            first_qresult = next(qresults)
+        except StopIteration:
+            handle.write(self._build_header())
+        else:
+            # write header
+            handle.write(self._build_header(first_qresult))
+            # and then the qresults
+            for qresult in chain([first_qresult], qresults):
+                if qresult:
+                    handle.write(self._build_row(qresult))
+                    qresult_counter += 1
+                    hit_counter += len(qresult)
+                    hsp_counter += sum(len(hit) for hit in qresult)
+                    frag_counter += sum(len(hit.fragments) for hit in qresult)
+
+        return qresult_counter, hit_counter, hsp_counter, frag_counter
+
+    def _build_header(self, first_qresult=None):
+        """Return the header string of a HMMER table output (PRIVATE)."""
+        # calculate whitespace required
+        # adapted from HMMER's source: src/p7_tophits.c#L1083
+        if first_qresult is not None:
+            # qnamew = max(20, len(first_qresult.id))
+            qnamew = 20  # why doesn't the above work?
+            tnamew = max(20, len(first_qresult[0].id))
+            qaccw = max(10, len(first_qresult.accession))
+            taccw = max(10, len(first_qresult[0].accession))
+        else:
+            qnamew, tnamew, qaccw, taccw = 20, 20, 10, 10
+        # Turn black code style off
+        # fmt: off
+        header = ("#%*s %22s %22s %33s\n"
+                  % (tnamew + qnamew + taccw + qaccw + 2, "",
+                     "--- full sequence ----", "--- best 1 domain ----",
+                     "--- domain number estimation ----"))
+        header += ("#%-*s %-*s %-*s %-*s %9s %6s %5s %9s %6s %5s %5s %3s "
+                   "%3s %3s %3s %3s %3s %3s %s\n"
+                   % (tnamew - 1, " target name",
+                      taccw, "accession", qnamew, "query name", qaccw,
+                      "accession", "  E-value", " score", " bias",
+                      "  E-value", " score", " bias", "exp",
+                      "reg", "clu", " ov", "env", "dom", "rep",
+                      "inc", "description of target"))
+        header += ("#%*s %*s %*s %*s %9s %6s %5s %9s %6s %5s %5s %3s %3s "
+                   "%3s %3s %3s %3s %3s %s\n"
+                   % (tnamew - 1, "-------------------",
+                      taccw, "----------", qnamew, "--------------------", qaccw,
+                      "----------", "---------", "------", "-----", "---------",
+                      "------", "-----", "---", "---", "---", "---", "---", "---",
+                      "---", "---", "---------------------"))
+        # Turn black code style on
+        # fmt: on
+        return header
+
+    def _build_row(self, qresult):
+        """Return a string or one row or more of the QueryResult object (PRIVATE)."""
+        rows = ""
+
+        # calculate whitespace required
+        # adapted from HMMER's source: src/p7_tophits.c#L1083
+        qnamew = max(20, len(qresult.id))
+        tnamew = max(20, len(qresult[0].id))
+        qaccw = max(10, len(qresult.accession))
+        taccw = max(10, len(qresult[0].accession))
+
+        for hit in qresult:
+            rows += (
+                "%-*s %-*s %-*s %-*s %9.2g %6.1f %5.1f %9.2g %6.1f"
+                " %5.1f %5.1f %3d %3d %3d %3d %3d %3d %3d %s\n"
+                % (
+                    tnamew,
+                    hit.id,
+                    taccw,
+                    hit.accession,
+                    qnamew,
+                    qresult.id,
+                    qaccw,
+                    qresult.accession,
+                    hit.evalue,
+                    hit.bitscore,
+                    hit.bias,
+                    hit.hsps[0].evalue,
+                    hit.hsps[0].bitscore,
+                    hit.hsps[0].bias,
+                    hit.domain_exp_num,
+                    hit.region_num,
+                    hit.cluster_num,
+                    hit.overlap_num,
+                    hit.env_num,
+                    hit.domain_obs_num,
+                    hit.domain_reported_num,
+                    hit.domain_included_num,
+                    hit.description,
+                )
+            )
+
+        return rows
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/HmmerIO/hmmer3_text.py b/code/lib/Bio/SearchIO/HmmerIO/hmmer3_text.py
new file mode 100644
index 0000000..9cc087c
--- /dev/null
+++ b/code/lib/Bio/SearchIO/HmmerIO/hmmer3_text.py
@@ -0,0 +1,436 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO parser for HMMER plain text output format."""
+
+import re
+
+from Bio.SearchIO._utils import read_forward
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+
+from ._base import _BaseHmmerTextIndexer
+
+__all__ = ("Hmmer3TextParser", "Hmmer3TextIndexer")
+
+
+# precompile regex patterns for faster processing
+# regex for program name capture
+_RE_PROGRAM = re.compile(r"^# (\w*hmm\w+) :: .*$")
+# regex for version string capture
+_RE_VERSION = re.compile(r"# \w+ ([\w+\.]+) .*; http.*$")
+# regex for option string capture
+_RE_OPT = re.compile(r"^# (.+):\s+(.+)$")
+# regex for parsing query id and length, for parsing
+_QRE_ID_LEN_PTN = r"^Query:\s*(.*)\s+\[\w=(\d+)\]"
+_QRE_ID_LEN = re.compile(_QRE_ID_LEN_PTN)
+# regex for hsp validation
+_HRE_VALIDATE = re.compile(r"score:\s(-?\d+\.?\d+)\sbits.*value:\s(.*)")
+# regexes for parsing hsp alignment blocks
+_HRE_ANNOT_LINE = re.compile(r"^(\s+)(.+)\s(\w+)")
+_HRE_ID_LINE = re.compile(r"^(\s+\S+\s+[0-9-]+ )(.+?)(\s+[0-9-]+)")
+
+
+class Hmmer3TextParser:
+    """Parser for the HMMER 3.0 text output."""
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self.handle = handle
+        self.line = read_forward(self.handle)
+        self._meta = self._parse_preamble()
+
+    def __iter__(self):
+        """Iterate over query results."""
+        yield from self._parse_qresult()
+
+    def _read_until(self, bool_func):
+        """Read the file handle until the given function returns True (PRIVATE)."""
+        while True:
+            if not self.line or bool_func(self.line):
+                return
+            else:
+                self.line = read_forward(self.handle)
+
+    def _parse_preamble(self):
+        """Parse HMMER preamble (lines beginning with '#') (PRIVATE)."""
+        meta = {}
+        # bool flag for storing state ~ whether we are parsing the option
+        # lines or not
+        has_opts = False
+        while True:
+            # no pound sign means we've left the preamble
+            if not self.line.startswith("#"):
+                break
+            # dashes could either mean we are entering or leaving the options
+            # section ~ so it's a switch for the has_opts flag
+            elif "- - -" in self.line:
+                if not has_opts:
+                    # if flag is false, that means we're entering opts
+                    # so switch the flag accordingly
+                    has_opts = True
+                else:
+                    # if flag is true, that means we've reached the end of opts
+                    # so we can break out of the function
+                    break
+            elif not has_opts:
+                # try parsing program
+                regx = re.search(_RE_PROGRAM, self.line)
+                if regx:
+                    meta["program"] = regx.group(1)
+                # try parsing version
+                regx = re.search(_RE_VERSION, self.line)
+                if regx:
+                    meta["version"] = regx.group(1)
+            elif has_opts:
+                regx = re.search(_RE_OPT, self.line)
+                # if target in regx.group(1), then we store the key as target
+                if "target" in regx.group(1):
+                    meta["target"] = regx.group(2).strip()
+                else:
+                    meta[regx.group(1)] = regx.group(2)
+
+            self.line = read_forward(self.handle)
+
+        return meta
+
+    def _parse_qresult(self):
+        """Parse a HMMER3 query block (PRIVATE)."""
+        self._read_until(lambda line: line.startswith("Query:"))
+
+        while self.line:
+
+            regx = re.search(_QRE_ID_LEN, self.line)
+
+            while not regx:
+                self.line = read_forward(self.handle)
+                regx = re.search(_QRE_ID_LEN, self.line)
+
+            # get query id and length
+            qid = regx.group(1).strip()
+            # store qresult attributes
+            qresult_attrs = {
+                "seq_len": int(regx.group(2)),
+                "program": self._meta.get("program"),
+                "version": self._meta.get("version"),
+                "target": self._meta.get("target"),
+            }
+
+            # get description and accession, if they exist
+            qdesc = "<unknown description>"  # placeholder
+            while not self.line.startswith("Scores for "):
+                self.line = read_forward(self.handle)
+
+                if self.line.startswith("Accession:"):
+                    acc = self.line.strip().split(" ", 1)[1]
+                    qresult_attrs["accession"] = acc.strip()
+                elif self.line.startswith("Description:"):
+                    qdesc = self.line.strip().split(" ", 1)[1].strip()
+                    qresult_attrs["description"] = qdesc
+
+            # parse the query hits
+            while self.line and "//" not in self.line:
+                hit_list = self._parse_hit(qid, qdesc)
+                # read through the statistics summary
+                # TODO: parse and store this information?
+                if self.line.startswith("Internal pipeline"):
+                    while self.line and "//" not in self.line:
+                        self.line = read_forward(self.handle)
+
+            # create qresult, set its attributes and yield
+            # not initializing hit_list directly to handle empty hits
+            # (i.e. need to set its query description manually)
+            qresult = QueryResult(id=qid, hits=hit_list)
+            for attr, value in qresult_attrs.items():
+                setattr(qresult, attr, value)
+            yield qresult
+            self.line = read_forward(self.handle)
+
+            # Skip line beginning with '# Alignment of', which are output
+            # when running phmmer with the '-A' flag.
+            if self.line.startswith("#"):
+                self.line = self.handle.readline()
+
+            # HMMER >= 3.1 outputs '[ok]' at the end of all results file,
+            # which means we can break the main loop when we see the line
+            if "[ok]" in self.line:
+                break
+
+    def _parse_hit(self, qid, qdesc):
+        """Parse a HMMER3 hit block, beginning with the hit table (PRIVATE)."""
+        # get to the end of the hit table delimiter and read one more line
+        self._read_until(lambda line: line.startswith("    ------- ------ -----"))
+        self.line = read_forward(self.handle)
+
+        # assume every hit is in inclusion threshold until the inclusion
+        # threshold line is encountered
+        is_included = True
+
+        # parse the hit table
+        hit_attr_list = []
+        while True:
+            if not self.line:
+                return []
+            elif self.line.startswith("  ------ inclusion"):
+                is_included = False
+                self.line = read_forward(self.handle)
+            # if there are no hits, then there are no hsps
+            # so we forward-read until 'Internal pipeline..'
+            elif self.line.startswith("   [No hits detected that satisfy reporting"):
+                while True:
+                    self.line = read_forward(self.handle)
+                    if self.line.startswith("Internal pipeline"):
+                        assert len(hit_attr_list) == 0
+                        return []
+            elif self.line.startswith("Domain annotation for each "):
+                hit_list = self._create_hits(hit_attr_list, qid, qdesc)
+                return hit_list
+            # entering hit results row
+            # parse the columns into a list
+            row = [x for x in self.line.strip().split(" ") if x]
+            # join the description words if it's >1 word
+            if len(row) > 10:
+                row[9] = " ".join(row[9:])
+            # if there's no description, set it to an empty string
+            elif len(row) < 10:
+                row.append("")
+                assert len(row) == 10
+            # create the hit object
+            hit_attrs = {
+                "id": row[8],
+                "query_id": qid,
+                "evalue": float(row[0]),
+                "bitscore": float(row[1]),
+                "bias": float(row[2]),
+                # row[3:6] is not parsed, since the info is available
+                # at the HSP level
+                "domain_exp_num": float(row[6]),
+                "domain_obs_num": int(row[7]),
+                "description": row[9],
+                "is_included": is_included,
+            }
+            hit_attr_list.append(hit_attrs)
+
+            self.line = read_forward(self.handle)
+
+    def _create_hits(self, hit_attrs, qid, qdesc):
+        """Parse a HMMER3 hsp block, beginning with the hsp table (PRIVATE)."""
+        # read through until the beginning of the hsp block
+        self._read_until(
+            lambda line: line.startswith("Internal pipeline") or line.startswith(">>")
+        )
+
+        # start parsing the hsp block
+        hit_list = []
+        while True:
+            if self.line.startswith("Internal pipeline"):
+                # by this time we should've emptied the hit attr list
+                assert len(hit_attrs) == 0
+                return hit_list
+            assert self.line.startswith(">>")
+            hid, hdesc = self.line[len(">> ") :].split("  ", 1)
+            hdesc = hdesc.strip()
+
+            # read through the hsp table header and move one more line
+            self._read_until(
+                lambda line: line.startswith(" ---   ------ ----- --------")
+                or line.startswith("   [No individual domains")
+            )
+            self.line = read_forward(self.handle)
+
+            # parse the hsp table for the current hit
+            hsp_list = []
+            while True:
+                # break out of hsp parsing if there are no hits, it's the last hsp
+                # or it's the start of a new hit
+                if (
+                    self.line.startswith("   [No targets detected that satisfy")
+                    or self.line.startswith("   [No individual domains")
+                    or self.line.startswith("Internal pipeline statistics summary:")
+                    or self.line.startswith("  Alignments for each domain:")
+                    or self.line.startswith(">>")
+                ):
+
+                    hit_attr = hit_attrs.pop(0)
+                    hit = Hit(hsp_list)
+                    for attr, value in hit_attr.items():
+                        if attr == "description":
+                            cur_val = getattr(hit, attr)
+                            if cur_val and value and cur_val.startswith(value):
+                                continue
+                        setattr(hit, attr, value)
+                    if not hit:
+                        hit.query_description = qdesc
+                    hit_list.append(hit)
+                    break
+
+                parsed = [x for x in self.line.strip().split(" ") if x]
+                assert len(parsed) == 16
+                # parsed column order:
+                # index, is_included, bitscore, bias, evalue_cond, evalue
+                # hmmfrom, hmmto, query_ends, hit_ends, alifrom, alito,
+                # envfrom, envto, acc_avg
+                frag = HSPFragment(hid, qid)
+                # set query and hit descriptions if they are defined / nonempty string
+                if qdesc:
+                    frag.query_description = qdesc
+                if hdesc:
+                    frag.hit_description = hdesc
+                # HMMER3 results are always protein
+                frag.molecule_type = "protein"
+                # depending on whether the program is hmmsearch, hmmscan, or phmmer
+                # {hmm,ali}{from,to} can either be hit_{from,to} or query_{from,to}
+                # for hmmscan, hit is the hmm profile, query is the sequence
+                if self._meta.get("program") == "hmmscan":
+                    # adjust 'from' and 'to' coordinates to 0-based ones
+                    frag.hit_start = int(parsed[6]) - 1
+                    frag.hit_end = int(parsed[7])
+                    frag.query_start = int(parsed[9]) - 1
+                    frag.query_end = int(parsed[10])
+                elif self._meta.get("program") in ["hmmsearch", "phmmer"]:
+                    # adjust 'from' and 'to' coordinates to 0-based ones
+                    frag.hit_start = int(parsed[9]) - 1
+                    frag.hit_end = int(parsed[10])
+                    frag.query_start = int(parsed[6]) - 1
+                    frag.query_end = int(parsed[7])
+                # strand is always 0, since HMMER now only handles protein
+                frag.hit_strand = frag.query_strand = 0
+
+                hsp = HSP([frag])
+                hsp.domain_index = int(parsed[0])
+                hsp.is_included = parsed[1] == "!"
+                hsp.bitscore = float(parsed[2])
+                hsp.bias = float(parsed[3])
+                hsp.evalue_cond = float(parsed[4])
+                hsp.evalue = float(parsed[5])
+                if self._meta.get("program") == "hmmscan":
+                    # adjust 'from' and 'to' coordinates to 0-based ones
+                    hsp.hit_endtype = parsed[8]
+                    hsp.query_endtype = parsed[11]
+                elif self._meta.get("program") in ["hmmsearch", "phmmer"]:
+                    # adjust 'from' and 'to' coordinates to 0-based ones
+                    hsp.hit_endtype = parsed[11]
+                    hsp.query_endtype = parsed[8]
+                # adjust 'from' and 'to' coordinates to 0-based ones
+                hsp.env_start = int(parsed[12]) - 1
+                hsp.env_end = int(parsed[13])
+                hsp.env_endtype = parsed[14]
+                hsp.acc_avg = float(parsed[15])
+
+                hsp_list.append(hsp)
+                self.line = read_forward(self.handle)
+
+            # parse the hsp alignments
+            if self.line.startswith("  Alignments for each domain:"):
+                self._parse_aln_block(hid, hit.hsps)
+
+    def _parse_aln_block(self, hid, hsp_list):
+        """Parse a HMMER3 HSP alignment block (PRIVATE)."""
+        self.line = read_forward(self.handle)
+        dom_counter = 0
+        while True:
+            if self.line.startswith(">>") or self.line.startswith("Internal pipeline"):
+                return hsp_list
+            assert self.line.startswith("  == domain %i" % (dom_counter + 1))
+            # alias hsp to local var
+            # but note that we're still changing the attrs of the actual
+            # hsp inside the qresult as we're not creating a copy
+            frag = hsp_list[dom_counter][0]
+            # XXX: should we validate again here? regex is expensive..
+            # regx = re.search(_HRE_VALIDATE, self.line)
+            # assert hsp.bitscore == float(regx.group(1))
+            # assert hsp.evalue_cond == float(regx.group(2))
+            hmmseq = ""
+            aliseq = ""
+            annot = {}
+            self.line = self.handle.readline()
+
+            # parse all the alignment blocks in the hsp
+            while True:
+
+                regx = None
+
+                # check for hit or query line
+                # we don't check for the hit or query id specifically
+                # to anticipate special cases where query id == hit id
+                regx = re.search(_HRE_ID_LINE, self.line)
+                if regx:
+                    # the first hit/query self.line we encounter is the hmmseq
+                    if len(hmmseq) == len(aliseq):
+                        hmmseq += regx.group(2)
+                    # and for subsequent self.lines, len(hmmseq) is either
+                    # > or == len(aliseq)
+                    elif len(hmmseq) > len(aliseq):
+                        aliseq += regx.group(2)
+                    assert len(hmmseq) >= len(aliseq)
+                # check for start of new domain
+                elif (
+                    self.line.startswith("  == domain")
+                    or self.line.startswith(">>")
+                    or self.line.startswith("Internal pipeline")
+                ):
+                    frag.aln_annotation = annot
+                    if self._meta.get("program") == "hmmscan":
+                        frag.hit = hmmseq
+                        frag.query = aliseq
+                    elif self._meta.get("program") in ["hmmsearch", "phmmer"]:
+                        frag.hit = aliseq
+                        frag.query = hmmseq
+                    dom_counter += 1
+                    hmmseq = ""
+                    aliseq = ""
+                    annot = {}
+                    break
+                # otherwise check if it's an annotation line and parse it
+                # len(hmmseq) is only != len(aliseq) when the cursor is parsing
+                # the similarity character. Since we're not parsing that, we
+                # check for when the condition is False (i.e. when it's ==)
+                elif len(hmmseq) == len(aliseq):
+                    regx = re.search(_HRE_ANNOT_LINE, self.line)
+                    if regx:
+                        annot_name = regx.group(3)
+                        if annot_name in annot:
+                            annot[annot_name] += regx.group(2)
+                        else:
+                            annot[annot_name] = regx.group(2)
+
+                self.line = self.handle.readline()
+
+
+class Hmmer3TextIndexer(_BaseHmmerTextIndexer):
+    """Indexer class for HMMER plain text output."""
+
+    _parser = Hmmer3TextParser
+    qresult_start = b"Query: "
+    qresult_end = b"//"
+
+    def __iter__(self):
+        """Iterate over Hmmer3TextIndexer; yields query results' key, offsets, 0."""
+        handle = self._handle
+        handle.seek(0)
+        start_offset = handle.tell()
+        regex_id = re.compile(_QRE_ID_LEN_PTN.encode())
+
+        while True:
+            line = read_forward(handle)
+            end_offset = handle.tell()
+
+            if line.startswith(self.qresult_start):
+                regx = re.search(regex_id, line)
+                qresult_key = regx.group(1).strip()
+                # qresult start offset is the offset of this line
+                # (starts with the start mark)
+                start_offset = end_offset - len(line)
+            elif line.startswith(self.qresult_end):
+                yield qresult_key.decode(), start_offset, 0
+                start_offset = end_offset
+            elif not line:
+                break
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/InterproscanIO/__init__.py b/code/lib/Bio/SearchIO/InterproscanIO/__init__.py
new file mode 100644
index 0000000..620a519
--- /dev/null
+++ b/code/lib/Bio/SearchIO/InterproscanIO/__init__.py
@@ -0,0 +1,96 @@
+# Copyright 2018 by Adhemar Zerlotini. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO support for InterProScan output formats.
+
+This module adds support for parsing InterProScan XML output.
+The InterProScan is available as a command line program or on
+EMBL-EBI's web page.
+Bio.SearchIO.InterproscanIO was tested on the following version:
+
+- versions: 5.26-65.0 (interproscan-model-2.1.xsd)
+
+More information about InterProScan are available through these links:
+- Publication: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3998142/
+- Web interface: https://www.ebi.ac.uk/interpro/search/sequence-search
+- Documentation: https://github.com/ebi-pf-team/interproscan/wiki
+
+
+Supported format
+================
+
+Bio.SearchIO.InterproscanIO supports the following format:
+
+- XML   - 'interproscan-xml' - parsing
+
+
+interproscan-xml
+================
+
+The interproscan-xml parser follows the InterProScan XML described here:
+https://github.com/ebi-pf-team/interproscan/wiki/OutputFormats
+
++--------------+--------------------+------------------------------------------+
+| Object       | Attribute          | XML Element                              |
++==============+====================+==========================================+
+| QueryResult  | target             | ``InterPro``                             |
+|              +--------------------+------------------------------------------+
+|              | program            | ``InterProScan``                         |
+|              +--------------------+------------------------------------------+
+|              | version            | ``protein-matches.interproscan-version`` |
++--------------+--------------------+------------------------------------------+
+| Hit          | accession          | ``signature.name``                       |
+|              +--------------------+------------------------------------------+
+|              | id                 | ``signature.ac``                         |
+|              +--------------------+------------------------------------------+
+|              | description        | ``signature.desc``                       |
+|              +--------------------+------------------------------------------+
+|              | dbxrefs            | ``IPR:entry.ac``                         |
+|              |                    | ``go-xref.id``                           |
+|              |                    | ``pathway-xref.db:pathway-xref.id``      |
+|              +--------------------+------------------------------------------+
+|              | attributes         |                                          |
+|              | ['Target']         | ``*-match`` / ``*-location``             |
+|              | ['Target version'] | ``signature-library-release.library``    |
+|              | ['Hit type']       | ``signature-library-release.version``    |
++--------------+--------------------+------------------------------------------+
+| HSP          | bitscore           | ``*-location.score``                     |
+|              +--------------------+------------------------------------------+
+|              | evalue             | ``*-location.evalue``                    |
++--------------+--------------------+------------------------------------------+
+| HSPFragment  | query_start        | ``*-location.start``                     |
+| (also via    +--------------------+------------------------------------------+
+| HSP)         | query_end          | ``*-location.end``                       |
+|              +--------------------+------------------------------------------+
+|              | hit_start          | ``*-location.hmm-start``                 |
+|              +--------------------+------------------------------------------+
+|              | hit_end            | ``*-location.hmm-end``                   |
+|              +--------------------+------------------------------------------+
+|              | query              | ``sequence``                             |
++--------------+--------------------+------------------------------------------+
+
+InterProScan XML files may contain a match with multiple locations or multiple
+matches to the same protein with a single location. In both cases, the match
+is uniquely stored as a HIT object and the locations as HSP objects.
+
+``HSP.*start == *start - 1`` (Since every start position is 0-based in Biopython)
+
+``HSP.aln_span ==  query-end - query-start``
+
+The types of matches or locations (eg. hmmer3-match, hmmer3-location,
+coils-match, panther-location) are stored in hit.attributes['Hit type'].
+For instance, for every 'phobious-match', there will be a 'phobious-location'.
+Therefore, Hit.type will store the string excluding '-match' or '-location'
+('phobious', in this example).
+"""
+
+from .interproscan_xml import InterproscanXmlParser
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/InterproscanIO/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SearchIO/InterproscanIO/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..8571d35
Binary files /dev/null and b/code/lib/Bio/SearchIO/InterproscanIO/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/InterproscanIO/__pycache__/interproscan_xml.cpython-37.pyc b/code/lib/Bio/SearchIO/InterproscanIO/__pycache__/interproscan_xml.cpython-37.pyc
new file mode 100644
index 0000000..bde264a
Binary files /dev/null and b/code/lib/Bio/SearchIO/InterproscanIO/__pycache__/interproscan_xml.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/InterproscanIO/interproscan_xml.py b/code/lib/Bio/SearchIO/InterproscanIO/interproscan_xml.py
new file mode 100644
index 0000000..97625b0
--- /dev/null
+++ b/code/lib/Bio/SearchIO/InterproscanIO/interproscan_xml.py
@@ -0,0 +1,194 @@
+# Copyright 2018 by Adhemar Zerlotini. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Bio.SearchIO parser for InterProScan XML output formats."""
+# for more info: https://github.com/ebi-pf-team/interproscan/wiki/OutputFormats
+
+import re
+from xml.etree import ElementTree
+
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+
+
+# element - hit attribute name mapping
+_ELEM_HIT = {
+    "name": ("accession", str),
+    "ac": ("id", str),
+    "desc": ("description", str),
+}
+# element - hsp attribute name mapping
+_ELEM_HSP = {"score": ("bitscore", float), "evalue": ("evalue", float)}
+# element - fragment attribute name mapping
+_ELEM_FRAG = {
+    "start": ("query_start", int),
+    "end": ("query_end", int),
+    "hmm-start": ("hit_start", int),
+    "hmm-end": ("hit_end", int),
+}
+
+
+class InterproscanXmlParser:
+    """Parser for the InterProScan XML format."""
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self.xml_iter = iter(ElementTree.iterparse(handle, events=("start", "end")))
+        self._meta = self._parse_header()
+
+    def __iter__(self):
+        """Iterate qresults."""
+        yield from self._parse_qresult()
+
+    def _parse_header(self):
+        """Parse the header for the InterProScan version (PRIVATE)."""
+        event, elem = next(self.xml_iter)
+        meta = {}
+        meta["target"] = "InterPro"
+        meta["program"] = "InterProScan"
+        meta["version"] = elem.attrib["interproscan-version"]
+        # store the namespace value
+        self.NS = re.sub("protein-matches", "", elem.tag)
+        return meta
+
+    def _parse_qresult(self):
+        """Parse query results (PRIVATE)."""
+        for event, elem in self.xml_iter:
+            if event == "end" and elem.tag == self.NS + "protein":
+                # store the query sequence
+                seq = elem.find(self.NS + "sequence")
+                query_seq = seq.text
+
+                # store the query id and description
+                xref = elem.find(self.NS + "xref")
+                query_id = xref.attrib["id"]
+                query_desc = xref.attrib["name"]
+
+                # parse each hit
+                hit_list = []
+                for hit_new in self._parse_hit(
+                    elem.find(self.NS + "matches"), query_id, query_seq
+                ):
+                    # interproscan results contain duplicate hits rather than
+                    # a single hit with multiple hsps. In this case the hsps
+                    # of a duplicate hit will be appended to the already
+                    # existing hit
+                    for hit in hit_list:
+                        if hit.id == hit_new.id:
+                            for hsp in hit_new.hsps:
+                                hit.hsps.append(hsp)
+                            break
+                    else:
+                        hit_list.append(hit_new)
+
+                # create qresult and assing attributes
+                qresult = QueryResult(hit_list, query_id)
+                setattr(qresult, "description", query_desc)
+                for key, value in self._meta.items():
+                    setattr(qresult, key, value)
+                yield qresult
+
+    def _parse_hit(self, root_hit_elem, query_id, query_seq=None):
+        """Parse hit (PRIVATE)."""
+        # feed the loop below an empty list so iteration still works
+        if root_hit_elem is None:
+            root_hit_elem = []
+
+        for hit_elem in root_hit_elem:
+            # store the match/location type
+            hit_type = re.sub(r"%s(\w+)-match" % self.NS, r"\1", hit_elem.find(".").tag)
+            # store the hit id
+            signature = hit_elem.find(self.NS + "signature")
+            hit_id = signature.attrib["ac"]
+
+            # store xrefs and alt_descs
+            xrefs = self._parse_xrefs(signature.find(self.NS + "entry"))
+
+            # parse each hsp
+            hsps = list(
+                self._parse_hsp(
+                    hit_elem.find(self.NS + "locations"), query_id, hit_id, query_seq
+                )
+            )
+
+            # create hit and assign attributes
+            hit = Hit(hsps, hit_id)
+            setattr(hit, "dbxrefs", xrefs)
+            for key, (attr, caster) in _ELEM_HIT.items():
+                value = signature.attrib.get(key)
+                if value is not None:
+                    setattr(hit, attr, caster(value))
+            # format specific attributes
+            hit.attributes["Hit type"] = hit_type
+            signature_lib = signature.find(self.NS + "signature-library-release")
+            hit.attributes["Target"] = str(signature_lib.attrib.get("library"))
+            hit.attributes["Target version"] = str(signature_lib.attrib.get("version"))
+
+            yield hit
+
+    def _parse_hsp(self, root_hsp_elem, query_id, hit_id, query_seq=None):
+        """Parse hsp (PRIVATE)."""
+        # feed the loop below an empty list so iteration still works
+        if root_hsp_elem is None:
+            root_hsp_elem = []
+
+        for hsp_elem in root_hsp_elem:
+            # create frag and assign attributes
+            frag = HSPFragment(hit_id, query_id)
+            setattr(frag, "molecule_type", "protein")
+            if query_seq is not None:
+                setattr(frag, "query", query_seq)
+            for key, (attr, caster) in _ELEM_FRAG.items():
+                value = hsp_elem.attrib.get(key)
+                if value is not None:
+                    # start should be 0-based
+                    if attr.endswith("start"):
+                        value = caster(value) - 1
+                    # store query start and end to calculate aln_span
+                    if attr == "query_start":
+                        start = int(value)
+                    if attr == "query_end":
+                        end = int(value)
+                    setattr(frag, attr, caster(value))
+            # calculate aln_span and store
+            setattr(frag, "aln_span", end - start)
+
+            # create hsp and assign attributes
+            hsp = HSP([frag])
+            setattr(hsp, "query_id", query_id)
+            setattr(hsp, "hit_id", hit_id)
+            for key, (attr, caster) in _ELEM_HSP.items():
+                value = hsp_elem.attrib.get(key)
+                if value is not None:
+                    setattr(hsp, attr, caster(value))
+            yield hsp
+
+    def _parse_xrefs(self, root_entry_elem):
+        """Parse xrefs (PRIVATE)."""
+        xrefs = []
+        # store entry id and description
+        if root_entry_elem is not None:
+            xrefs.append("IPR:" + root_entry_elem.attrib["ac"])
+
+        # store go-xrefs and pathway-refs id and description
+        if root_entry_elem is not None:
+            xref_elems = []
+            xref_elems = xref_elems + root_entry_elem.findall(self.NS + "go-xref")
+            xref_elems = xref_elems + root_entry_elem.findall(self.NS + "pathway-xref")
+
+            for entry in xref_elems:
+                xref = entry.attrib["id"]
+                if ":" not in xref:
+                    xref = entry.attrib["db"] + ":" + xref
+                xrefs.append(xref)
+        return xrefs
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/__init__.py b/code/lib/Bio/SearchIO/__init__.py
new file mode 100644
index 0000000..1b0084a
--- /dev/null
+++ b/code/lib/Bio/SearchIO/__init__.py
@@ -0,0 +1,684 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Biopython interface for sequence search program outputs.
+
+The SearchIO submodule provides parsers, indexers, and writers for outputs from
+various sequence search programs. It provides an API similar to SeqIO and
+AlignIO, with the following main functions: ``parse``, ``read``, ``to_dict``,
+``index``, ``index_db``, ``write``, and ``convert``.
+
+SearchIO parses a search output file's contents into a hierarchy of four nested
+objects: QueryResult, Hit, HSP, and HSPFragment. Each of them models a part of
+the search output file:
+
+    - QueryResult represents a search query. This is the main object returned
+      by the input functions and it contains all other objects.
+    - Hit represents a database hit,
+    - HSP represents high-scoring alignment region(s) in the hit,
+    - HSPFragment represents a contiguous alignment within the HSP
+
+In addition to the four objects above, SearchIO is also tightly integrated with
+the SeqRecord objects (see SeqIO) and MultipleSeqAlignment objects (see
+AlignIO). SeqRecord objects are used to store the actual matching hit and query
+sequences, while MultipleSeqAlignment objects stores the alignment between them.
+
+A detailed description of these objects' features and their example usages are
+available in their respective documentations.
+
+
+Input
+=====
+The main function for parsing search output files is Bio.SearchIO.parse(...).
+This function parses a given search output file and returns a generator object
+that yields one QueryResult object per iteration.
+
+``parse`` takes two arguments: 1) a file handle or a filename of the input file
+(the search output file) and 2) the format name.
+
+    >>> from Bio import SearchIO
+    >>> for qresult in SearchIO.parse('Blast/mirna.xml', 'blast-xml'):
+    ...     print("%s %s" % (qresult.id, qresult.description))
+    ...
+    33211 mir_1
+    33212 mir_2
+    33213 mir_3
+
+SearchIO also provides the Bio.SearchIO.read(...) function, which is intended
+for use on search output files containing only one query. ``read`` returns one
+QueryResult object and will raise an exception if the source file contains more
+than one queries:
+
+    >>> qresult = SearchIO.read('Blast/xml_2226_blastp_004.xml', 'blast-xml')
+    >>> print("%s %s" % (qresult.id, qresult.description))
+    ...
+    gi|11464971:4-101 pleckstrin [Mus musculus]
+
+    >>> SearchIO.read('Blast/mirna.xml', 'blast-xml')
+    Traceback (most recent call last):
+    ...
+    ValueError: ...
+
+For accessing search results of large output files, you may use the indexing
+functions Bio.SearchIO.index(...) or Bio.SearchIO.index_db(...). They have a
+similar interface to their counterparts in SeqIO and AlignIO, with the addition
+of optional, format-specific keyword arguments.
+
+
+Output
+======
+SearchIO has writing support for several formats, accessible from the
+Bio.SearchIO.write(...) function. This function returns a tuple of four
+numbers: the number of QueryResult, Hit, HSP, and HSPFragment written::
+
+    qresults = SearchIO.parse('Blast/mirna.xml', 'blast-xml')
+    SearchIO.write(qresults, 'results.tab', 'blast-tab')
+    <stdout> (3, 239, 277, 277)
+
+Note that different writers may require different attribute values of the
+SearchIO objects. This limits the scope of writable search results to search
+results possessing the required attributes.
+
+For example, the writer for HMMER domain table output requires
+the conditional e-value attribute from each HSP object, among others. If you
+try to write to the HMMER domain table format and your HSPs do not have this
+attribute, an exception will be raised.
+
+
+Conversion
+==========
+SearchIO provides a shortcut function Bio.SearchIO.convert(...) to convert a
+given file into another format. Under the hood, ``convert`` simply parses a given
+output file and writes it to another using the ``parse`` and ``write`` functions.
+
+Note that the same restrictions found in Bio.SearchIO.write(...) applies to the
+convert function as well.
+
+
+Conventions
+===========
+The main goal of creating SearchIO is to have a common, easy to use interface
+across different search output files. As such, we have also created some
+conventions / standards for SearchIO that extend beyond the common object model.
+These conventions apply to all files parsed by SearchIO, regardless of their
+individual formats.
+
+Python-style sequence coordinates
+---------------------------------
+
+When storing sequence coordinates (start and end values), SearchIO uses
+the Python-style slice convention: zero-based and half-open intervals. For
+example, if in a BLAST XML output file the start and end coordinates of an
+HSP are 10 and 28, they would become 9 and 28 in SearchIO. The start
+coordinate becomes 9 because Python indices start from zero, while the end
+coordinate remains 28 as Python slices omit the last item in an interval.
+
+Beside giving you the benefits of standardization, this convention also
+makes the coordinates usable for slicing sequences. For example, given a
+full query sequence and the start and end coordinates of an HSP, one can
+use the coordinates to extract part of the query sequence that results in
+the database hit.
+
+When these objects are written to an output file using
+SearchIO.write(...), the coordinate values are restored to their
+respective format's convention. Using the example above, if the HSP would
+be written to an XML file, the start and end coordinates would become 10
+and 28 again.
+
+Sequence coordinate order
+-------------------------
+
+Some search output format reverses the start and end coordinate sequences
+according to the sequence's strand. For example, in BLAST plain text
+format if the matching strand lies in the minus orientation, then the
+start coordinate will always be bigger than the end coordinate.
+
+In SearchIO, start coordinates are always smaller than the end
+coordinates, regardless of their originating strand. This ensures
+consistency when using the coordinates to slice full sequences.
+
+Note that this coordinate order convention is only enforced in the
+HSPFragment level. If an HSP object has several HSPFragment objects, each
+individual fragment will conform to this convention. But the order of the
+fragments within the HSP object follows what the search output file uses.
+
+Similar to the coordinate style convention, the start and end coordinates'
+order are restored to their respective formats when the objects are
+written using Bio.SearchIO.write(...).
+
+Frames and strand values
+------------------------
+
+SearchIO only allows -1, 0, 1 and None as strand values. For frames, the
+only allowed values are integers from -3 to 3 (inclusive) and None. Both
+of these are standard Biopython conventions.
+
+
+Supported Formats
+=================
+Below is a list of search program output formats supported by SearchIO.
+
+Support for parsing, indexing, and writing:
+
+ - blast-tab        - BLAST+ tabular output. Both variants without comments
+                      (-m 6 flag) and with comments (-m 7 flag) are supported.
+ - blast-xml        - BLAST+ XML output.
+ - blat-psl         - The default output of BLAT (PSL format). Variants with or
+                      without header are both supported. PSLX (PSL + sequences)
+                      is also supported.
+ - hmmer3-tab       - HMMER3 table output.
+ - hmmer3-domtab    - HMMER3 domain table output. When using this format, the
+                      program name has to be specified. For example, for parsing
+                      hmmscan output, the name would be 'hmmscan-domtab'.
+
+Support for parsing and indexing:
+
+ - exonerate-text   - Exonerate plain text output.
+ - exonerate-vulgar - Exonerate vulgar line.
+ - exonerate-cigar  - Exonerate cigar line.
+ - fasta-m10        - Bill Pearson's FASTA -m 10 output.
+ - hmmer3-text      - HMMER3 regular text output format. Supported HMMER3
+                      subprograms are hmmscan, hmmsearch, and phmmer.
+ - hmmer2-text      - HMMER2 regular text output format. Supported HMMER2
+                      subprograms are hmmpfam, hmmsearch.
+
+Support for parsing:
+
+ - blast-text       - BLAST+ plain text output.
+ - hhsuite2-text    - HHSUITE plain text output.
+
+Each of these formats have different keyword arguments available for use with
+the main SearchIO functions. More details and examples are available in each
+of the format's documentation.
+
+"""
+
+from Bio.File import as_handle
+from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+from Bio.SearchIO._utils import get_processor
+
+
+__all__ = ("read", "parse", "to_dict", "index", "index_db", "write", "convert")
+
+
+# dictionary of supported formats for parse() and read()
+_ITERATOR_MAP = {
+    "blast-tab": ("BlastIO", "BlastTabParser"),
+    "blast-text": ("BlastIO", "BlastTextParser"),
+    "blast-xml": ("BlastIO", "BlastXmlParser"),
+    "blat-psl": ("BlatIO", "BlatPslParser"),
+    "exonerate-cigar": ("ExonerateIO", "ExonerateCigarParser"),
+    "exonerate-text": ("ExonerateIO", "ExonerateTextParser"),
+    "exonerate-vulgar": ("ExonerateIO", "ExonerateVulgarParser"),
+    "fasta-m10": ("FastaIO", "FastaM10Parser"),
+    "hhsuite2-text": ("HHsuiteIO", "Hhsuite2TextParser"),
+    "hhsuite3-text": ("HHsuiteIO", "Hhsuite2TextParser"),
+    "hmmer2-text": ("HmmerIO", "Hmmer2TextParser"),
+    "hmmer3-text": ("HmmerIO", "Hmmer3TextParser"),
+    "hmmer3-tab": ("HmmerIO", "Hmmer3TabParser"),
+    # for hmmer3-domtab, the specific program is part of the format name
+    # as we need it distinguish hit / target coordinates
+    "hmmscan3-domtab": ("HmmerIO", "Hmmer3DomtabHmmhitParser"),
+    "hmmsearch3-domtab": ("HmmerIO", "Hmmer3DomtabHmmqueryParser"),
+    "interproscan-xml": ("InterproscanIO", "InterproscanXmlParser"),
+    "phmmer3-domtab": ("HmmerIO", "Hmmer3DomtabHmmqueryParser"),
+}
+
+# dictionary of supported formats for index()
+_INDEXER_MAP = {
+    "blast-tab": ("BlastIO", "BlastTabIndexer"),
+    "blast-xml": ("BlastIO", "BlastXmlIndexer"),
+    "blat-psl": ("BlatIO", "BlatPslIndexer"),
+    "exonerate-cigar": ("ExonerateIO", "ExonerateCigarIndexer"),
+    "exonerate-text": ("ExonerateIO", "ExonerateTextIndexer"),
+    "exonerate-vulgar": ("ExonerateIO", "ExonerateVulgarIndexer"),
+    "fasta-m10": ("FastaIO", "FastaM10Indexer"),
+    "hmmer2-text": ("HmmerIO", "Hmmer2TextIndexer"),
+    "hmmer3-text": ("HmmerIO", "Hmmer3TextIndexer"),
+    "hmmer3-tab": ("HmmerIO", "Hmmer3TabIndexer"),
+    "hmmscan3-domtab": ("HmmerIO", "Hmmer3DomtabHmmhitIndexer"),
+    "hmmsearch3-domtab": ("HmmerIO", "Hmmer3DomtabHmmqueryIndexer"),
+    "phmmer3-domtab": ("HmmerIO", "Hmmer3DomtabHmmqueryIndexer"),
+}
+
+# dictionary of supported formats for write()
+_WRITER_MAP = {
+    "blast-tab": ("BlastIO", "BlastTabWriter"),
+    "blast-xml": ("BlastIO", "BlastXmlWriter"),
+    "blat-psl": ("BlatIO", "BlatPslWriter"),
+    "hmmer3-tab": ("HmmerIO", "Hmmer3TabWriter"),
+    "hmmscan3-domtab": ("HmmerIO", "Hmmer3DomtabHmmhitWriter"),
+    "hmmsearch3-domtab": ("HmmerIO", "Hmmer3DomtabHmmqueryWriter"),
+    "phmmer3-domtab": ("HmmerIO", "Hmmer3DomtabHmmqueryWriter"),
+}
+
+
+def parse(handle, format=None, **kwargs):
+    """Iterate over search tool output file as QueryResult objects.
+
+    Arguments:
+     - handle - Handle to the file, or the filename as a string.
+     - format - Lower case string denoting one of the supported formats.
+     - kwargs - Format-specific keyword arguments.
+
+    This function is used to iterate over each query in a given search output
+    file:
+
+    >>> from Bio import SearchIO
+    >>> qresults = SearchIO.parse('Blast/mirna.xml', 'blast-xml')
+    >>> qresults
+    <generator object ...>
+    >>> for qresult in qresults:
+    ...     print("Search %s has %i hits" % (qresult.id, len(qresult)))
+    ...
+    Search 33211 has 100 hits
+    Search 33212 has 44 hits
+    Search 33213 has 95 hits
+
+    Depending on the file format, ``parse`` may also accept additional keyword
+    argument(s) that modifies the behavior of the format parser. Here is a
+    simple example, where the keyword argument enables parsing of a commented
+    BLAST tabular output file:
+
+    >>> from Bio import SearchIO
+    >>> for qresult in SearchIO.parse('Blast/mirna.tab', 'blast-tab', comments=True):
+    ...     print("Search %s has %i hits" % (qresult.id, len(qresult)))
+    ...
+    Search 33211 has 100 hits
+    Search 33212 has 44 hits
+    Search 33213 has 95 hits
+
+    """
+    # get the iterator object and do error checking
+    iterator = get_processor(format, _ITERATOR_MAP)
+
+    # HACK: force BLAST XML decoding to use utf-8
+    handle_kwargs = {}
+    if format == "blast-xml":
+        handle_kwargs["encoding"] = "utf-8"
+
+    # and start iterating
+    with as_handle(handle, **handle_kwargs) as source_file:
+        generator = iterator(source_file, **kwargs)
+        yield from generator
+
+
+def read(handle, format=None, **kwargs):
+    """Turn a search output file containing one query into a single QueryResult.
+
+     - handle - Handle to the file, or the filename as a string.
+     - format - Lower case string denoting one of the supported formats.
+     - kwargs - Format-specific keyword arguments.
+
+    ``read`` is used for parsing search output files containing exactly one query:
+
+    >>> from Bio import SearchIO
+    >>> qresult = SearchIO.read('Blast/xml_2226_blastp_004.xml', 'blast-xml')
+    >>> print("%s %s" % (qresult.id, qresult.description))
+    ...
+    gi|11464971:4-101 pleckstrin [Mus musculus]
+
+    If the given handle has no results, an exception will be raised:
+
+    >>> from Bio import SearchIO
+    >>> qresult = SearchIO.read('Blast/tab_2226_tblastn_002.txt', 'blast-tab')
+    Traceback (most recent call last):
+    ...
+    ValueError: No query results found in handle
+
+    Similarly, if the given handle has more than one results, an exception will
+    also be raised:
+
+    >>> from Bio import SearchIO
+    >>> qresult = SearchIO.read('Blast/tab_2226_tblastn_001.txt', 'blast-tab')
+    Traceback (most recent call last):
+    ...
+    ValueError: More than one query results found in handle
+
+    Like ``parse``, ``read`` may also accept keyword argument(s) depending on the
+    search output file format.
+
+    """
+    query_results = parse(handle, format, **kwargs)
+
+    try:
+        query_result = next(query_results)
+    except StopIteration:
+        raise ValueError("No query results found in handle") from None
+    try:
+        next(query_results)
+        raise ValueError("More than one query results found in handle")
+    except StopIteration:
+        pass
+
+    return query_result
+
+
+def to_dict(qresults, key_function=None):
+    """Turn a QueryResult iterator or list into a dictionary.
+
+     - qresults     - Iterable returning QueryResult objects.
+     - key_function - Optional callback function which when given a
+                      QueryResult object should return a unique key for the
+                      dictionary. Defaults to using .id of the result.
+
+    This function enables access of QueryResult objects from a single search
+    output file using its identifier.
+
+    >>> from Bio import SearchIO
+    >>> qresults = SearchIO.parse('Blast/wnts.xml', 'blast-xml')
+    >>> search_dict = SearchIO.to_dict(qresults)
+    >>> list(search_dict)
+    ['gi|195230749:301-1383', 'gi|325053704:108-1166', ..., 'gi|53729353:216-1313']
+    >>> search_dict['gi|156630997:105-1160']
+    QueryResult(id='gi|156630997:105-1160', 5 hits)
+
+    By default, the dictionary key is the QueryResult's string ID. This may be
+    changed by supplying a callback function that returns the desired identifier.
+    Here is an example using a function that removes the 'gi|' part in the
+    beginning of the QueryResult ID.
+
+    >>> from Bio import SearchIO
+    >>> qresults = SearchIO.parse('Blast/wnts.xml', 'blast-xml')
+    >>> key_func = lambda qresult: qresult.id.split('|')[1]
+    >>> search_dict = SearchIO.to_dict(qresults, key_func)
+    >>> list(search_dict)
+    ['195230749:301-1383', '325053704:108-1166', ..., '53729353:216-1313']
+    >>> search_dict['156630997:105-1160']
+    QueryResult(id='gi|156630997:105-1160', 5 hits)
+
+    Note that the callback function does not change the QueryResult's ID value.
+    It only changes the key value used to retrieve the associated QueryResult.
+
+    As this function loads all QueryResult objects into memory, it may be
+    unsuitable for dealing with files containing many queries. In that case, it
+    is recommended that you use either ``index`` or ``index_db``.
+
+    Since Python 3.7, the default dict class maintains key order, meaning
+    this dictionary will reflect the order of records given to it. For
+    CPython and PyPy, this was already implemented for Python 3.6, so
+    effectively you can always assume the record order is preserved.
+    """
+
+    def _default_key_function(rec):
+        return rec.id
+
+    if key_function is None:
+        key_function = _default_key_function
+
+    qdict = {}
+    for qresult in qresults:
+        key = key_function(qresult)
+        if key in qdict:
+            raise ValueError("Duplicate key %r" % key)
+        qdict[key] = qresult
+    return qdict
+
+
+def index(filename, format=None, key_function=None, **kwargs):
+    """Indexes a search output file and returns a dictionary-like object.
+
+     - filename     - string giving name of file to be indexed
+     - format       - Lower case string denoting one of the supported formats.
+     - key_function - Optional callback function which when given a
+                      QueryResult should return a unique key for the dictionary.
+     - kwargs       - Format-specific keyword arguments.
+
+    Index returns a pseudo-dictionary object with QueryResult objects as its
+    values and a string identifier as its keys. The function is mainly useful
+    for dealing with large search output files, as it enables access to any
+    given QueryResult object much faster than using parse or read.
+
+    Index works by storing in-memory the start locations of all queries in a
+    file. When a user requested access to the query, this function will jump
+    to its start position, parse the whole query, and return it as a
+    QueryResult object:
+
+    >>> from Bio import SearchIO
+    >>> search_idx = SearchIO.index('Blast/wnts.xml', 'blast-xml')
+    >>> search_idx
+    SearchIO.index('Blast/wnts.xml', 'blast-xml', key_function=None)
+    >>> sorted(search_idx)
+    ['gi|156630997:105-1160', 'gi|195230749:301-1383', ..., 'gi|53729353:216-1313']
+    >>> search_idx['gi|195230749:301-1383']
+    QueryResult(id='gi|195230749:301-1383', 5 hits)
+    >>> search_idx.close()
+
+    If the file is BGZF compressed, this is detected automatically. Ordinary
+    GZIP files are not supported:
+
+    >>> from Bio import SearchIO
+    >>> search_idx = SearchIO.index('Blast/wnts.xml.bgz', 'blast-xml')
+    >>> search_idx
+    SearchIO.index('Blast/wnts.xml.bgz', 'blast-xml', key_function=None)
+    >>> search_idx['gi|195230749:301-1383']
+    QueryResult(id='gi|195230749:301-1383', 5 hits)
+    >>> search_idx.close()
+
+    You can supply a custom callback function to alter the default identifier
+    string. This function should accept as its input the QueryResult ID string
+    and return a modified version of it.
+
+    >>> from Bio import SearchIO
+    >>> key_func = lambda id: id.split('|')[1]
+    >>> search_idx = SearchIO.index('Blast/wnts.xml', 'blast-xml', key_func)
+    >>> search_idx
+    SearchIO.index('Blast/wnts.xml', 'blast-xml', key_function=<function <lambda> at ...>)
+    >>> sorted(search_idx)
+    ['156630997:105-1160', ..., '371502086:108-1205', '53729353:216-1313']
+    >>> search_idx['156630997:105-1160']
+    QueryResult(id='gi|156630997:105-1160', 5 hits)
+    >>> search_idx.close()
+
+    Note that the callback function does not change the QueryResult's ID value.
+    It only changes the key value used to retrieve the associated QueryResult.
+
+    """
+    if not isinstance(filename, str):
+        raise TypeError("Need a filename (not a handle)")
+
+    from Bio.File import _IndexedSeqFileDict
+
+    proxy_class = get_processor(format, _INDEXER_MAP)
+    repr = "SearchIO.index(%r, %r, key_function=%r)" % (filename, format, key_function)
+    return _IndexedSeqFileDict(
+        proxy_class(filename, **kwargs), key_function, repr, "QueryResult"
+    )
+
+
+def index_db(index_filename, filenames=None, format=None, key_function=None, **kwargs):
+    """Indexes several search output files into an SQLite database.
+
+     - index_filename - The SQLite filename.
+     - filenames    - List of strings specifying file(s) to be indexed, or when
+                      indexing a single file this can be given as a string.
+                      (optional if reloading an existing index, but must match)
+     - format       - Lower case string denoting one of the supported formats.
+                      (optional if reloading an existing index, but must match)
+     - key_function - Optional callback function which when given a
+                      QueryResult identifier string should return a unique
+                      key for the dictionary.
+     - kwargs       - Format-specific keyword arguments.
+
+    The ``index_db`` function is similar to ``index`` in that it indexes the start
+    position of all queries from search output files. The main difference is
+    instead of storing these indices in-memory, they are written to disk as an
+    SQLite database file. This allows the indices to persist between Python
+    sessions. This enables access to any queries in the file without any
+    indexing overhead, provided it has been indexed at least once.
+
+    >>> from Bio import SearchIO
+    >>> idx_filename = ":memory:" # Use a real filename, this is in RAM only!
+    >>> db_idx = SearchIO.index_db(idx_filename, 'Blast/mirna.xml', 'blast-xml')
+    >>> sorted(db_idx)
+    ['33211', '33212', '33213']
+    >>> db_idx['33212']
+    QueryResult(id='33212', 44 hits)
+    >>> db_idx.close()
+
+    ``index_db`` can also index multiple files and store them in the same
+    database, making it easier to group multiple search files and access them
+    from a single interface.
+
+    >>> from Bio import SearchIO
+    >>> idx_filename = ":memory:" # Use a real filename, this is in RAM only!
+    >>> files = ['Blast/mirna.xml', 'Blast/wnts.xml']
+    >>> db_idx = SearchIO.index_db(idx_filename, files, 'blast-xml')
+    >>> sorted(db_idx)
+    ['33211', '33212', '33213', 'gi|156630997:105-1160', ..., 'gi|53729353:216-1313']
+    >>> db_idx['33212']
+    QueryResult(id='33212', 44 hits)
+    >>> db_idx.close()
+
+    One common example where this is helpful is if you had a large set of
+    query sequences (say ten thousand) which you split into ten query files
+    of one thousand sequences each in order to run as ten separate BLAST jobs
+    on a cluster. You could use ``index_db`` to index the ten BLAST output
+    files together for seamless access to all the results as one dictionary.
+
+    Note that ':memory:' rather than an index filename tells SQLite to hold
+    the index database in memory. This is useful for quick tests, but using
+    the Bio.SearchIO.index(...) function instead would use less memory.
+
+    BGZF compressed files are supported, and detected automatically. Ordinary
+    GZIP compressed files are not supported.
+
+    See also Bio.SearchIO.index(), Bio.SearchIO.to_dict(), and the Python module
+    glob which is useful for building lists of files.
+    """
+    # cast filenames to list if it's a string
+    # (can we check if it's a string or a generator?)
+    if isinstance(filenames, str):
+        filenames = [filenames]
+
+    from Bio.File import _SQLiteManySeqFilesDict
+
+    repr = "SearchIO.index_db(%r, filenames=%r, format=%r, key_function=%r, ...)" % (
+        index_filename,
+        filenames,
+        format,
+        key_function,
+    )
+
+    def proxy_factory(format, filename=None):
+        """Given a filename returns proxy object, else boolean if format OK."""
+        if filename:
+            return get_processor(format, _INDEXER_MAP)(filename, **kwargs)
+        else:
+            return format in _INDEXER_MAP
+
+    return _SQLiteManySeqFilesDict(
+        index_filename, filenames, proxy_factory, format, key_function, repr
+    )
+
+
+def write(qresults, handle, format=None, **kwargs):
+    """Write QueryResult objects to a file in the given format.
+
+     - qresults - An iterator returning QueryResult objects or a single
+                  QueryResult object.
+     - handle   - Handle to the file, or the filename as a string.
+     - format   - Lower case string denoting one of the supported formats.
+     - kwargs   - Format-specific keyword arguments.
+
+    The ``write`` function writes QueryResult object(s) into the given output
+    handle / filename. You can supply it with a single QueryResult object or an
+    iterable returning one or more QueryResult objects. In both cases, the
+    function will return a tuple of four values: the number of QueryResult, Hit,
+    HSP, and HSPFragment objects it writes to the output file::
+
+        from Bio import SearchIO
+        qresults = SearchIO.parse('Blast/mirna.xml', 'blast-xml')
+        SearchIO.write(qresults, 'results.tab', 'blast-tab')
+        <stdout> (3, 239, 277, 277)
+
+    The output of different formats may be adjusted using the format-specific
+    keyword arguments. Here is an example that writes BLAT PSL output file with
+    a header::
+
+        from Bio import SearchIO
+        qresults = SearchIO.parse('Blat/psl_34_001.psl', 'blat-psl')
+        SearchIO.write(qresults, 'results.tab', 'blat-psl', header=True)
+        <stdout> (2, 13, 22, 26)
+
+    """
+    # turn qresults into an iterator if it's a single QueryResult object
+    if isinstance(qresults, QueryResult):
+        qresults = iter([qresults])
+    else:
+        qresults = iter(qresults)
+
+    # get the writer object and do error checking
+    writer_class = get_processor(format, _WRITER_MAP)
+
+    # write to the handle
+    with as_handle(handle, "w") as target_file:
+        writer = writer_class(target_file, **kwargs)
+        # count how many qresults, hits, and hsps
+        qresult_count, hit_count, hsp_count, frag_count = writer.write_file(qresults)
+
+    return qresult_count, hit_count, hsp_count, frag_count
+
+
+def convert(in_file, in_format, out_file, out_format, in_kwargs=None, out_kwargs=None):
+    """Convert between two search output formats, return number of records.
+
+     - in_file    - Handle to the input file, or the filename as string.
+     - in_format  - Lower case string denoting the format of the input file.
+     - out_file   - Handle to the output file, or the filename as string.
+     - out_format - Lower case string denoting the format of the output file.
+     - in_kwargs  - Dictionary of keyword arguments for the input function.
+     - out_kwargs - Dictionary of keyword arguments for the output function.
+
+    The convert function is a shortcut function for ``parse`` and ``write``. It has
+    the same return type as ``write``. Format-specific arguments may be passed to
+    the convert function, but only as dictionaries.
+
+    Here is an example of using ``convert`` to convert from a BLAST+ XML file
+    into a tabular file with comments::
+
+        from Bio import SearchIO
+        in_file = 'Blast/mirna.xml'
+        in_fmt = 'blast-xml'
+        out_file = 'results.tab'
+        out_fmt = 'blast-tab'
+        out_kwarg = {'comments': True}
+        SearchIO.convert(in_file, in_fmt, out_file, out_fmt, out_kwargs=out_kwarg)
+        <stdout> (3, 239, 277, 277)
+
+    Given that different search output file provide different statistics and
+    different level of details, the convert function is limited only to
+    converting formats that have the same statistics and for conversion to
+    formats with the same level of detail, or less.
+
+    For example, converting from a BLAST+ XML output to a HMMER table file
+    is not possible, as these are two search programs with different kinds of
+    statistics. In theory, you may provide the necessary values required by the
+    HMMER table file (e.g. conditional e-values, envelope coordinates, etc).
+    However, these values are likely to hold little meaning as they are not true
+    HMMER-computed values.
+
+    Another example is converting from BLAST+ XML to BLAST+ tabular file. This
+    is possible, as BLAST+ XML provide all the values necessary to create a
+    BLAST+ tabular file. However, the reverse conversion may not be possible.
+    There are more details covered in the XML file that are not found in a
+    tabular file (e.g. the lambda and kappa values)
+
+    """
+    if in_kwargs is None:
+        in_kwargs = {}
+    if out_kwargs is None:
+        out_kwargs = {}
+
+    qresults = parse(in_file, in_format, **in_kwargs)
+    return write(qresults, out_file, out_format, **out_kwargs)
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/__pycache__/BlatIO.cpython-37.pyc b/code/lib/Bio/SearchIO/__pycache__/BlatIO.cpython-37.pyc
new file mode 100644
index 0000000..bfed650
Binary files /dev/null and b/code/lib/Bio/SearchIO/__pycache__/BlatIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/__pycache__/FastaIO.cpython-37.pyc b/code/lib/Bio/SearchIO/__pycache__/FastaIO.cpython-37.pyc
new file mode 100644
index 0000000..fdb3870
Binary files /dev/null and b/code/lib/Bio/SearchIO/__pycache__/FastaIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SearchIO/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..1f04b72
Binary files /dev/null and b/code/lib/Bio/SearchIO/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/__pycache__/_index.cpython-37.pyc b/code/lib/Bio/SearchIO/__pycache__/_index.cpython-37.pyc
new file mode 100644
index 0000000..9d3448a
Binary files /dev/null and b/code/lib/Bio/SearchIO/__pycache__/_index.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/__pycache__/_utils.cpython-37.pyc b/code/lib/Bio/SearchIO/__pycache__/_utils.cpython-37.pyc
new file mode 100644
index 0000000..20cf9af
Binary files /dev/null and b/code/lib/Bio/SearchIO/__pycache__/_utils.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/_index.py b/code/lib/Bio/SearchIO/_index.py
new file mode 100644
index 0000000..0fa5b04
--- /dev/null
+++ b/code/lib/Bio/SearchIO/_index.py
@@ -0,0 +1,34 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# Revisions copyright 2012-2016 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Custom indexing for Bio.SearchIO objects."""
+
+from io import StringIO
+
+from Bio.File import _IndexedSeqFileProxy, _open_for_random_access
+
+
+class SearchIndexer(_IndexedSeqFileProxy):
+    """Base class for file format specific random access.
+
+    Subclasses for each file format should define '_parser' and optionally
+    'get_raw' methods.
+    """
+
+    def __init__(self, filename, **kwargs):
+        """Initialize the class."""
+        self._handle = _open_for_random_access(filename)
+        self._kwargs = kwargs
+
+    def _parse(self, handle):
+        """Pass handle and arguments to the next iterable (PRIVATE)."""
+        return next(iter(self._parser(handle, **self._kwargs)))
+
+    def get(self, offset):
+        """Get offset and convert it from bytes to string."""
+        return self._parse(StringIO(self.get_raw(offset).decode()))
diff --git a/code/lib/Bio/SearchIO/_legacy/NCBIStandalone.py b/code/lib/Bio/SearchIO/_legacy/NCBIStandalone.py
new file mode 100644
index 0000000..378da39
--- /dev/null
+++ b/code/lib/Bio/SearchIO/_legacy/NCBIStandalone.py
@@ -0,0 +1,1953 @@
+# Copyright 1999-2000 by Jeffrey Chang.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+# Patches by Mike Poidinger to support multiple databases.
+# Updated by Peter Cock in 2007 to do a better job on BLAST 2.2.15
+
+"""Code for calling standalone BLAST and parsing plain text output (DEPRECATED).
+
+Rather than parsing the human readable plain text BLAST output (which seems to
+change with every update to BLAST), we and the NBCI recommend you parse the
+XML output instead. The plain text parser in this module still works at the
+time of writing, but is considered obsolete and updating it to cope with the
+latest versions of BLAST is not a priority for us.
+
+This module also provides code to work with the "legacy" standalone version of
+NCBI BLAST, tools blastall, rpsblast and blastpgp via three helper functions of
+the same name. These functions are very limited for dealing with the output as
+files rather than handles, for which the wrappers in Bio.Blast.Applications are
+preferred. Furthermore, the NCBI themselves regard these command line tools as
+"legacy", and encourage using the new BLAST+ tools instead. Biopython has
+wrappers for these under Bio.Blast.Applications (see the tutorial).
+"""
+
+import re
+
+from io import StringIO
+from Bio.SearchIO._legacy.ParserSupport import (
+    UndoHandle,
+    AbstractParser,
+    AbstractConsumer,
+    read_and_call,
+    read_and_call_until,
+    read_and_call_while,
+    attempt_read_and_call,
+    is_blank_line,
+    safe_peekline,
+    safe_readline,
+)
+from Bio.Blast import Record
+
+from Bio import BiopythonWarning
+import warnings
+
+_score_e_re = re.compile(r"Score +E")
+
+
+class LowQualityBlastError(Exception):
+    """Error caused by running a low quality sequence through BLAST.
+
+    When low quality sequences (like GenBank entries containing only
+    stretches of a single nucleotide) are BLASTed, they will result in
+    BLAST generating an error and not being able to perform the BLAST.
+    search. This error should be raised for the BLAST reports produced
+    in this case.
+    """
+
+    pass
+
+
+class ShortQueryBlastError(Exception):
+    """Error caused by running a short query sequence through BLAST.
+
+    If the query sequence is too short, BLAST outputs warnings and errors::
+
+        Searching[blastall] WARNING:  [000.000]  AT1G08320: SetUpBlastSearch failed.
+        [blastall] ERROR:  [000.000]  AT1G08320: Blast:
+        [blastall] ERROR:  [000.000]  AT1G08320: Blast: Query must be at least wordsize
+        done
+
+    This exception is raised when that condition is detected.
+    """
+
+    pass
+
+
+class _Scanner:
+    """Scan BLAST output from blastall or blastpgp.
+
+    Tested with blastall and blastpgp v2.0.10, v2.0.11
+
+    Methods:
+     - feed     Feed data into the scanner.
+
+    """
+
+    def __init__(self):
+        """Raise warning that this module is outdated."""
+        warnings.warn(
+            "Parsing BLAST plain text output file is not a well supported"
+            " functionality anymore. Consider generating your BLAST output for parsing"
+            " as XML or tabular format instead.",
+            BiopythonWarning,
+        )
+
+    def feed(self, handle, consumer):
+        """Feed in a BLAST report for scanning.
+
+        Arguments:
+         - handle is a file-like object that contains the BLAST report.
+         - consumer is a Consumer object that will receive events as the
+           report is scanned.
+        """
+        if isinstance(handle, UndoHandle):
+            uhandle = handle
+        else:
+            uhandle = UndoHandle(handle)
+
+        # Try to fast-forward to the beginning of the blast report.
+        read_and_call_until(uhandle, consumer.noevent, contains="BLAST")
+        # Now scan the BLAST report.
+        self._scan_header(uhandle, consumer)
+        self._scan_rounds(uhandle, consumer)
+        self._scan_database_report(uhandle, consumer)
+        self._scan_parameters(uhandle, consumer)
+
+    def _scan_header(self, uhandle, consumer):
+        # BLASTP 2.0.10 [Aug-26-1999]
+        #
+        #
+        # Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaf
+        # Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
+        # "Gapped BLAST and PSI-BLAST: a new generation of protein database sea
+        # programs",  Nucleic Acids Res. 25:3389-3402.
+        #
+        # Query= test
+        #          (140 letters)
+        #
+        # Database: sdqib40-1.35.seg.fa
+        #            1323 sequences; 223,339 total letters
+        #
+        # ========================================================
+        # This next example is from the online version of Blast,
+        # note there are TWO references, an RID line, and also
+        # the database is BEFORE the query line.
+        # Note there possibleuse of non-ASCII in the author names.
+        # ========================================================
+        #
+        # BLASTP 2.2.15 [Oct-15-2006]
+        # Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Sch??ffer,
+        # Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman
+        # (1997), "Gapped BLAST and PSI-BLAST: a new generation of
+        # protein database search programs", Nucleic Acids Res. 25:3389-3402.
+        #
+        # Reference: Sch??ffer, Alejandro A., L. Aravind, Thomas L. Madden, Sergei
+        # Shavirin, John L. Spouge, Yuri I. Wolf, Eugene V. Koonin, and
+        # Stephen F. Altschul (2001), "Improving the accuracy of PSI-BLAST
+        # protein database searches with composition-based statistics
+        # and other refinements", Nucleic Acids Res. 29:2994-3005.
+        #
+        # RID: 1166022616-19998-65316425856.BLASTQ1
+        #
+        #
+        # Database: All non-redundant GenBank CDS
+        # translations+PDB+SwissProt+PIR+PRF excluding environmental samples
+        #            4,254,166 sequences; 1,462,033,012 total letters
+        # Query=  gi:16127998
+        # Length=428
+        #
+
+        consumer.start_header()
+
+        read_and_call(uhandle, consumer.version, contains="BLAST")
+        read_and_call_while(uhandle, consumer.noevent, blank=1)
+
+        # There might be a <pre> line, for qblast output.
+        attempt_read_and_call(uhandle, consumer.noevent, start="<pre>")
+
+        # Read the reference(s)
+        while attempt_read_and_call(uhandle, consumer.reference, start="Reference"):
+            # References are normally multiline terminated by a blank line
+            # (or, based on the old code, the RID line)
+            while True:
+                line = uhandle.readline()
+                if is_blank_line(line):
+                    consumer.noevent(line)
+                    break
+                elif line.startswith("RID"):
+                    break
+                else:
+                    # More of the reference
+                    consumer.reference(line)
+
+        # Deal with the optional RID: ...
+        read_and_call_while(uhandle, consumer.noevent, blank=1)
+        attempt_read_and_call(uhandle, consumer.reference, start="RID:")
+        read_and_call_while(uhandle, consumer.noevent, blank=1)
+
+        # blastpgp may have a reference for compositional score matrix
+        # adjustment (see Bug 2502):
+        if attempt_read_and_call(uhandle, consumer.reference, start="Reference"):
+            read_and_call_until(uhandle, consumer.reference, blank=1)
+            read_and_call_while(uhandle, consumer.noevent, blank=1)
+
+        # blastpgp has a Reference for composition-based statistics.
+        if attempt_read_and_call(uhandle, consumer.reference, start="Reference"):
+            read_and_call_until(uhandle, consumer.reference, blank=1)
+            read_and_call_while(uhandle, consumer.noevent, blank=1)
+
+        line = uhandle.peekline()
+        assert line.strip() != ""
+        assert not line.startswith("RID:")
+        if line.startswith("Query="):
+            # This is an old style query then database...
+
+            # Read the Query lines and the following blank line.
+            read_and_call(uhandle, consumer.query_info, start="Query=")
+            read_and_call_until(uhandle, consumer.query_info, blank=1)
+            read_and_call_while(uhandle, consumer.noevent, blank=1)
+
+            # Read the database lines and the following blank line.
+            read_and_call_until(uhandle, consumer.database_info, end="total letters")
+            read_and_call(uhandle, consumer.database_info, contains="sequences")
+            read_and_call_while(uhandle, consumer.noevent, blank=1)
+        elif line.startswith("Database:"):
+            # This is a new style database then query...
+            read_and_call_until(uhandle, consumer.database_info, end="total letters")
+            read_and_call(uhandle, consumer.database_info, contains="sequences")
+            read_and_call_while(uhandle, consumer.noevent, blank=1)
+
+            # Read the Query lines and the following blank line.
+            # Or, on BLAST 2.2.22+ there is no blank link - need to spot
+            # the "... Score     E" line instead.
+            read_and_call(uhandle, consumer.query_info, start="Query=")
+            # BLAST 2.2.25+ has a blank line before Length=
+            read_and_call_until(uhandle, consumer.query_info, start="Length=")
+            while True:
+                line = uhandle.peekline()
+                if not line.strip() or _score_e_re.search(line) is not None:
+                    break
+                # It is more of the query (and its length)
+                read_and_call(uhandle, consumer.query_info)
+            read_and_call_while(uhandle, consumer.noevent, blank=1)
+        else:
+            raise ValueError("Invalid header?")
+
+        consumer.end_header()
+
+    def _scan_rounds(self, uhandle, consumer):
+        # Scan a bunch of rounds.
+        # Each round begins with either a "Searching......" line
+        # or a 'Score     E' line followed by descriptions and alignments.
+        # The email server doesn't give the "Searching....." line.
+        # If there is no 'Searching.....' line then you'll first see a
+        # 'Results from round' line
+
+        while not self._eof(uhandle):
+            line = safe_peekline(uhandle)
+            if (
+                not line.startswith("Searching")
+                and not line.startswith("Results from round")
+                and _score_e_re.search(line) is None
+                and "No hits found" not in line
+            ):
+                break
+            self._scan_descriptions(uhandle, consumer)
+            self._scan_alignments(uhandle, consumer)
+
+    def _scan_descriptions(self, uhandle, consumer):
+        # Searching..................................................done
+        # Results from round 2
+        #
+        #
+        #                                                                    Sc
+        # Sequences producing significant alignments:                        (b
+        # Sequences used in model and found again:
+        #
+        # d1tde_2 3.4.1.4.4 (119-244) Thioredoxin reductase [Escherichia ...
+        # d1tcob_ 1.31.1.5.16 Calcineurin regulatory subunit (B-chain) [B...
+        # d1symb_ 1.31.1.2.2 Calcyclin (S100) [RAT (RATTUS NORVEGICUS)]
+        #
+        # Sequences not found previously or not previously below threshold:
+        #
+        # d1osa__ 1.31.1.5.11 Calmodulin [Paramecium tetraurelia]
+        # d1aoza3 2.5.1.3.3 (339-552) Ascorbate oxidase [zucchini (Cucurb...
+        #
+
+        # If PSI-BLAST, may also have:
+        #
+        # CONVERGED!
+
+        consumer.start_descriptions()
+
+        # Read 'Searching'
+        # This line seems to be missing in BLASTN 2.1.2 (others?)
+        attempt_read_and_call(uhandle, consumer.noevent, start="Searching")
+
+        # blastpgp 2.0.10 from NCBI 9/19/99 for Solaris sometimes crashes here.
+        # If this happens, the handle will yield no more information.
+        if not uhandle.peekline():
+            raise ValueError(
+                "Unexpected end of blast report. Looks suspiciously like a PSI-BLAST crash."
+            )
+
+        # BLASTN 2.2.3 sometimes spews a bunch of warnings and errors here:
+        # Searching[blastall] WARNING:  [000.000]  AT1G08320: SetUpBlastSearch
+        # [blastall] ERROR:  [000.000]  AT1G08320: Blast:
+        # [blastall] ERROR:  [000.000]  AT1G08320: Blast: Query must be at leas
+        # done
+        # Reported by David Weisman.
+        # Check for these error lines and ignore them for now.  Let
+        # the BlastErrorParser deal with them.
+        line = uhandle.peekline()
+        if "ERROR:" in line or line.startswith("done"):
+            read_and_call_while(uhandle, consumer.noevent, contains="ERROR:")
+            read_and_call(uhandle, consumer.noevent, start="done")
+
+        # Check to see if this is PSI-BLAST.
+        # If it is, the 'Searching' line will be followed by:
+        # (version 2.0.10)
+        #     Searching.............................
+        #     Results from round 2
+        # or (version 2.0.11)
+        #     Searching.............................
+        #
+        #
+        #     Results from round 2
+
+        # Skip a bunch of blank lines.
+        read_and_call_while(uhandle, consumer.noevent, blank=1)
+        # Check for the results line if it's there.
+        if attempt_read_and_call(uhandle, consumer.round, start="Results"):
+            read_and_call_while(uhandle, consumer.noevent, blank=1)
+
+        # Three things can happen here:
+        # 1.  line contains 'Score     E'
+        # 2.  line contains "No hits found"
+        # 3.  no descriptions
+        # The first one begins a bunch of descriptions.  The last two
+        # indicates that no descriptions follow, and we should go straight
+        # to the alignments.
+        if not attempt_read_and_call(
+            uhandle, consumer.description_header, has_re=_score_e_re
+        ):
+            # Either case 2 or 3.  Look for "No hits found".
+            attempt_read_and_call(uhandle, consumer.no_hits, contains="No hits found")
+            try:
+                read_and_call_while(uhandle, consumer.noevent, blank=1)
+            except ValueError as err:
+                if str(err) != "Unexpected end of stream.":
+                    raise
+
+            consumer.end_descriptions()
+            # Stop processing.
+            return
+
+        # Read the score header lines
+        read_and_call(uhandle, consumer.description_header, start="Sequences producing")
+
+        # If PSI-BLAST, read the 'Sequences used in model' line.
+        attempt_read_and_call(
+            uhandle, consumer.model_sequences, start="Sequences used in model"
+        )
+        read_and_call_while(uhandle, consumer.noevent, blank=1)
+
+        # In BLAT, rather than a "No hits found" line, we just
+        # get no descriptions (and no alignments). This can be
+        # spotted because the next line is the database block:
+        if safe_peekline(uhandle).startswith("  Database:"):
+            consumer.end_descriptions()
+            # Stop processing.
+            return
+
+        # Read the descriptions and the following blank lines, making
+        # sure that there are descriptions.
+        if not uhandle.peekline().startswith("Sequences not found"):
+            read_and_call_until(uhandle, consumer.description, blank=1)
+            read_and_call_while(uhandle, consumer.noevent, blank=1)
+
+        # If PSI-BLAST, read the 'Sequences not found' line followed
+        # by more descriptions.  However, I need to watch out for the
+        # case where there were no sequences not found previously, in
+        # which case there will be no more descriptions.
+        if attempt_read_and_call(
+            uhandle, consumer.nonmodel_sequences, start="Sequences not found"
+        ):
+            # Read the descriptions and the following blank lines.
+            read_and_call_while(uhandle, consumer.noevent, blank=1)
+            line = safe_peekline(uhandle)
+            # Brad -- added check for QUERY. On some PSI-BLAST outputs
+            # there will be a 'Sequences not found' line followed by no
+            # descriptions. Check for this case since the first thing you'll
+            # get is a blank line and then 'QUERY'
+            if (
+                not line.startswith("CONVERGED")
+                and line[0] != ">"
+                and not line.startswith("QUERY")
+            ):
+                read_and_call_until(uhandle, consumer.description, blank=1)
+                read_and_call_while(uhandle, consumer.noevent, blank=1)
+
+        attempt_read_and_call(uhandle, consumer.converged, start="CONVERGED")
+        read_and_call_while(uhandle, consumer.noevent, blank=1)
+
+        consumer.end_descriptions()
+
+    def _scan_alignments(self, uhandle, consumer):
+        if self._eof(uhandle):
+            return
+
+        # qblast inserts a helpful line here.
+        attempt_read_and_call(uhandle, consumer.noevent, start="ALIGNMENTS")
+
+        # First, check to see if I'm at the database report.
+        line = safe_peekline(uhandle)
+        if not line:
+            # EOF
+            return
+        elif line.startswith("  Database") or line.startswith("Lambda"):
+            return
+        elif line[0] == ">":
+            # XXX make a better check here between pairwise and masterslave
+            self._scan_pairwise_alignments(uhandle, consumer)
+        elif line.startswith("Effective"):
+            return
+        else:
+            # XXX put in a check to make sure I'm in a masterslave alignment
+            self._scan_masterslave_alignment(uhandle, consumer)
+
+    def _scan_pairwise_alignments(self, uhandle, consumer):
+        while not self._eof(uhandle):
+            line = safe_peekline(uhandle)
+            if line[0] != ">":
+                break
+            self._scan_one_pairwise_alignment(uhandle, consumer)
+
+    def _scan_one_pairwise_alignment(self, uhandle, consumer):
+        if self._eof(uhandle):
+            return
+        consumer.start_alignment()
+
+        self._scan_alignment_header(uhandle, consumer)
+
+        # Scan a bunch of score/alignment pairs.
+        while True:
+            if self._eof(uhandle):
+                # Shouldn't have issued that _scan_alignment_header event...
+                break
+            line = safe_peekline(uhandle)
+            if not line.startswith(" Score"):
+                break
+            self._scan_hsp(uhandle, consumer)
+        consumer.end_alignment()
+
+    def _scan_alignment_header(self, uhandle, consumer):
+        # >d1rip__ 2.24.7.1.1 Ribosomal S17 protein [Bacillus
+        #           stearothermophilus]
+        #           Length = 81
+        #
+        # Or, more recently with different white space:
+        #
+        # >gi|15799684|ref|NP_285696.1| threonine synthase ...
+        #  gi|15829258|ref|NP_308031.1| threonine synthase
+        #  ...
+        # Length=428
+        read_and_call(uhandle, consumer.title, start=">")
+        while True:
+            line = safe_readline(uhandle)
+            if line.lstrip().startswith(("Length =", "Length=")):
+                consumer.length(line)
+                break
+            elif is_blank_line(line):
+                # Check to make sure I haven't missed the Length line
+                raise ValueError("I missed the Length in an alignment header")
+            consumer.title(line)
+
+        # Older versions of BLAST will have a line with some spaces.
+        # Version 2.0.14 (maybe 2.0.13?) and above print a true blank line.
+        if not attempt_read_and_call(uhandle, consumer.noevent, start="          "):
+            read_and_call(uhandle, consumer.noevent, blank=1)
+
+    def _scan_hsp(self, uhandle, consumer):
+        consumer.start_hsp()
+        self._scan_hsp_header(uhandle, consumer)
+        self._scan_hsp_alignment(uhandle, consumer)
+        consumer.end_hsp()
+
+    def _scan_hsp_header(self, uhandle, consumer):
+        #  Score = 22.7 bits (47), Expect = 2.5
+        #  Identities = 10/36 (27%), Positives = 18/36 (49%)
+        #  Strand = Plus / Plus
+        #  Frame = +3
+        #
+
+        read_and_call(uhandle, consumer.score, start=" Score")
+        read_and_call(uhandle, consumer.identities, start=" Identities")
+        # BLASTN
+        attempt_read_and_call(uhandle, consumer.strand, start=" Strand")
+        # BLASTX, TBLASTN, TBLASTX
+        attempt_read_and_call(uhandle, consumer.frame, start=" Frame")
+        read_and_call(uhandle, consumer.noevent, blank=1)
+
+    def _scan_hsp_alignment(self, uhandle, consumer):
+        # Query: 11 GRGVSACA-------TCDGFFYRNQKVAVIGGGNTAVEEALYLSNIASEVHLIHRRDGF
+        #           GRGVS+         TC    Y  + + V GGG+ + EE   L     +   I R+
+        # Sbjct: 12 GRGVSSVVRRCIHKPTCKE--YAVKIIDVTGGGSFSAEEVQELREATLKEVDILRKVSG
+        #
+        # Query: 64 AEKILIKR 71
+        #              I +K
+        # Sbjct: 70 PNIIQLKD 77
+        #
+
+        while True:
+            # Blastn adds an extra line filled with spaces before Query
+            attempt_read_and_call(uhandle, consumer.noevent, start="     ")
+            read_and_call(uhandle, consumer.query, start="Query")
+            read_and_call(uhandle, consumer.align, start="     ")
+            read_and_call(uhandle, consumer.sbjct, start="Sbjct")
+            try:
+                read_and_call_while(uhandle, consumer.noevent, blank=1)
+            except ValueError as err:
+                if str(err) != "Unexpected end of stream.":
+                    raise
+                # End of File (well, it looks like it with recent versions
+                # of BLAST for multiple queries after the Iterator class
+                # has broken up the whole file into chunks).
+                break
+            line = safe_peekline(uhandle)
+            # Alignment continues if I see a 'Query' or the spaces for Blastn.
+            if not (line.startswith("Query") or line.startswith("     ")):
+                break
+
+    def _scan_masterslave_alignment(self, uhandle, consumer):
+        consumer.start_alignment()
+        while True:
+            line = safe_readline(uhandle)
+            # Check to see whether I'm finished reading the alignment.
+            # This is indicated by 1) database section, 2) next psi-blast
+            # round, which can also be a 'Results from round' if no
+            # searching line is present
+            # patch by chapmanb
+            if line.startswith("Searching") or line.startswith("Results from round"):
+                uhandle.saveline(line)
+                break
+            elif line.startswith("  Database"):
+                uhandle.saveline(line)
+                break
+            elif is_blank_line(line):
+                consumer.noevent(line)
+            else:
+                consumer.multalign(line)
+        read_and_call_while(uhandle, consumer.noevent, blank=1)
+        consumer.end_alignment()
+
+    def _eof(self, uhandle):
+        try:
+            line = safe_peekline(uhandle)
+        except ValueError as err:
+            if str(err) != "Unexpected end of stream.":
+                raise
+            line = ""
+        return not line
+
+    def _scan_database_report(self, uhandle, consumer):
+        #   Database: sdqib40-1.35.seg.fa
+        #     Posted date:  Nov 1, 1999  4:25 PM
+        #   Number of letters in database: 223,339
+        #   Number of sequences in database:  1323
+        #
+        # Lambda     K      H
+        #    0.322    0.133    0.369
+        #
+        # Gapped
+        # Lambda     K      H
+        #    0.270   0.0470    0.230
+        #
+        ##########################################
+        # Or, more recently Blast 2.2.15 gives less blank lines
+        ##########################################
+        #   Database: All non-redundant GenBank CDS translations+PDB+SwissProt+PIR+PRF excluding
+        # environmental samples
+        #     Posted date:  Dec 12, 2006  5:51 PM
+        #   Number of letters in database: 667,088,753
+        #   Number of sequences in database:  2,094,974
+        # Lambda     K      H
+        #    0.319    0.136    0.395
+        # Gapped
+        # Lambda     K      H
+        #    0.267   0.0410    0.140
+
+        if self._eof(uhandle):
+            return
+
+        consumer.start_database_report()
+
+        # Subset of the database(s) listed below
+        #    Number of letters searched: 562,618,960
+        #    Number of sequences searched:  228,924
+        if attempt_read_and_call(uhandle, consumer.noevent, start="  Subset"):
+            read_and_call(uhandle, consumer.noevent, contains="letters")
+            read_and_call(uhandle, consumer.noevent, contains="sequences")
+            read_and_call(uhandle, consumer.noevent, start="  ")
+
+        # Sameet Mehta reported seeing output from BLASTN 2.2.9 that
+        # was missing the "Database" stanza completely.
+        while attempt_read_and_call(uhandle, consumer.database, start="  Database"):
+            # BLAT output ends abruptly here, without any of the other
+            # information.  Check to see if this is the case.  If so,
+            # then end the database report here gracefully.
+            if not uhandle.peekline().strip() or uhandle.peekline().startswith("BLAST"):
+                consumer.end_database_report()
+                return
+
+            # Database can span multiple lines.
+            read_and_call_until(uhandle, consumer.database, start="    Posted")
+            read_and_call(uhandle, consumer.posted_date, start="    Posted")
+            read_and_call(
+                uhandle, consumer.num_letters_in_database, start="  Number of letters"
+            )
+            read_and_call(
+                uhandle,
+                consumer.num_sequences_in_database,
+                start="  Number of sequences",
+            )
+            # There may not be a line starting with spaces...
+            attempt_read_and_call(uhandle, consumer.noevent, start="  ")
+
+            line = safe_readline(uhandle)
+            uhandle.saveline(line)
+            if "Lambda" in line:
+                break
+
+        try:
+            read_and_call(uhandle, consumer.noevent, start="Lambda")
+            read_and_call(uhandle, consumer.ka_params)
+        except Exception:  # TODO: ValueError, AttributeError?
+            pass
+
+        # This blank line is optional:
+        attempt_read_and_call(uhandle, consumer.noevent, blank=1)
+
+        # not BLASTP
+        attempt_read_and_call(uhandle, consumer.gapped, start="Gapped")
+        # not TBLASTX
+        if attempt_read_and_call(uhandle, consumer.noevent, start="Lambda"):
+            read_and_call(uhandle, consumer.ka_params_gap)
+
+        # Blast 2.2.4 can sometimes skip the whole parameter section.
+        # Thus, I need to be careful not to read past the end of the
+        # file.
+        try:
+            read_and_call_while(uhandle, consumer.noevent, blank=1)
+        except ValueError as x:
+            if str(x) != "Unexpected end of stream.":
+                raise
+        consumer.end_database_report()
+
+    def _scan_parameters(self, uhandle, consumer):
+        # Matrix: BLOSUM62
+        # Gap Penalties: Existence: 11, Extension: 1
+        # Number of Hits to DB: 50604
+        # Number of Sequences: 1323
+        # Number of extensions: 1526
+        # Number of successful extensions: 6
+        # Number of sequences better than 10.0: 5
+        # Number of HSP's better than 10.0 without gapping: 5
+        # Number of HSP's successfully gapped in prelim test: 0
+        # Number of HSP's that attempted gapping in prelim test: 1
+        # Number of HSP's gapped (non-prelim): 5
+        # length of query: 140
+        # length of database: 223,339
+        # effective HSP length: 39
+        # effective length of query: 101
+        # effective length of database: 171,742
+        # effective search space: 17345942
+        # effective search space used: 17345942
+        # T: 11
+        # A: 40
+        # X1: 16 ( 7.4 bits)
+        # X2: 38 (14.8 bits)
+        # X3: 64 (24.9 bits)
+        # S1: 41 (21.9 bits)
+        # S2: 42 (20.8 bits)
+        ##########################################
+        # Or, more recently Blast(x) 2.2.15 gives
+        ##########################################
+        # Matrix: BLOSUM62
+        # Gap Penalties: Existence: 11, Extension: 1
+        # Number of Sequences: 4535438
+        # Number of Hits to DB: 2,588,844,100
+        # Number of extensions: 60427286
+        # Number of successful extensions: 126433
+        # Number of sequences better than  2.0: 30
+        # Number of HSP's gapped: 126387
+        # Number of HSP's successfully gapped: 35
+        # Length of query: 291
+        # Length of database: 1,573,298,872
+        # Length adjustment: 130
+        # Effective length of query: 161
+        # Effective length of database: 983,691,932
+        # Effective search space: 158374401052
+        # Effective search space used: 158374401052
+        # Neighboring words threshold: 12
+        # Window for multiple hits: 40
+        # X1: 16 ( 7.3 bits)
+        # X2: 38 (14.6 bits)
+        # X3: 64 (24.7 bits)
+        # S1: 41 (21.7 bits)
+        # S2: 32 (16.9 bits)
+
+        # Blast 2.2.4 can sometimes skip the whole parameter section.
+        # BLAT also skips the whole parameter section.
+        # Thus, check to make sure that the parameter section really
+        # exists.
+        if not uhandle.peekline().strip():
+            return
+
+        # BLASTN 2.2.9 looks like it reverses the "Number of Hits" and
+        # "Number of Sequences" lines.
+        consumer.start_parameters()
+
+        # Matrix line may be missing in BLASTN 2.2.9
+        attempt_read_and_call(uhandle, consumer.matrix, start="Matrix")
+        # not TBLASTX
+        attempt_read_and_call(uhandle, consumer.gap_penalties, start="Gap")
+        attempt_read_and_call(
+            uhandle, consumer.num_sequences, start="Number of Sequences"
+        )
+        attempt_read_and_call(uhandle, consumer.num_hits, start="Number of Hits")
+        attempt_read_and_call(
+            uhandle, consumer.num_sequences, start="Number of Sequences"
+        )
+        attempt_read_and_call(
+            uhandle, consumer.num_extends, start="Number of extensions"
+        )
+        attempt_read_and_call(
+            uhandle, consumer.num_good_extends, start="Number of successful"
+        )
+        attempt_read_and_call(
+            uhandle, consumer.num_seqs_better_e, start="Number of sequences"
+        )
+
+        # not BLASTN, TBLASTX
+        if attempt_read_and_call(
+            uhandle, consumer.hsps_no_gap, start="Number of HSP's better"
+        ):
+            # BLASTN 2.2.9
+            if attempt_read_and_call(
+                uhandle, consumer.noevent, start="Number of HSP's gapped:"
+            ):
+                read_and_call(
+                    uhandle, consumer.noevent, start="Number of HSP's successfully"
+                )
+                # This is omitted in 2.2.15
+                attempt_read_and_call(
+                    uhandle, consumer.noevent, start="Number of extra gapped extensions"
+                )
+            else:
+                read_and_call(
+                    uhandle,
+                    consumer.hsps_prelim_gapped,
+                    start="Number of HSP's successfully",
+                )
+                read_and_call(
+                    uhandle,
+                    consumer.hsps_prelim_gap_attempted,
+                    start="Number of HSP's that",
+                )
+                read_and_call(
+                    uhandle, consumer.hsps_gapped, start="Number of HSP's gapped"
+                )
+        # e.g. BLASTX 2.2.15 where the "better" line is missing
+        elif attempt_read_and_call(
+            uhandle, consumer.noevent, start="Number of HSP's gapped"
+        ):
+            read_and_call(
+                uhandle, consumer.noevent, start="Number of HSP's successfully"
+            )
+
+        # not in blastx 2.2.1
+        attempt_read_and_call(
+            uhandle, consumer.query_length, has_re=re.compile(r"[Ll]ength of query")
+        )
+        # Not in BLASTX 2.2.22+
+        attempt_read_and_call(
+            uhandle,
+            consumer.database_length,
+            has_re=re.compile(r"[Ll]ength of \s*[Dd]atabase"),
+        )
+
+        # BLASTN 2.2.9
+        attempt_read_and_call(uhandle, consumer.noevent, start="Length adjustment")
+        attempt_read_and_call(
+            uhandle, consumer.effective_hsp_length, start="effective HSP"
+        )
+        # Not in blastx 2.2.1
+        attempt_read_and_call(
+            uhandle,
+            consumer.effective_query_length,
+            has_re=re.compile(r"[Ee]ffective length of query"),
+        )
+
+        # This is not in BLASTP 2.2.15
+        attempt_read_and_call(
+            uhandle,
+            consumer.effective_database_length,
+            has_re=re.compile(r"[Ee]ffective length of \s*[Dd]atabase"),
+        )
+        # Not in blastx 2.2.1, added a ':' to distinguish between
+        # this and the 'effective search space used' line
+        attempt_read_and_call(
+            uhandle,
+            consumer.effective_search_space,
+            has_re=re.compile(r"[Ee]ffective search space:"),
+        )
+        # Does not appear in BLASTP 2.0.5
+        attempt_read_and_call(
+            uhandle,
+            consumer.effective_search_space_used,
+            has_re=re.compile(r"[Ee]ffective search space used"),
+        )
+
+        # BLASTX, TBLASTN, TBLASTX
+        attempt_read_and_call(uhandle, consumer.frameshift, start="frameshift")
+
+        # not in BLASTN 2.2.9
+        attempt_read_and_call(uhandle, consumer.threshold, start="T")
+        # In BLASTX 2.2.15 replaced by: "Neighboring words threshold: 12"
+        attempt_read_and_call(
+            uhandle, consumer.threshold, start="Neighboring words threshold"
+        )
+
+        # not in BLASTX 2.2.15
+        attempt_read_and_call(uhandle, consumer.window_size, start="A")
+        # get this instead: "Window for multiple hits: 40"
+        attempt_read_and_call(
+            uhandle, consumer.window_size, start="Window for multiple hits"
+        )
+
+        # not in BLASTX 2.2.22+
+        attempt_read_and_call(uhandle, consumer.dropoff_1st_pass, start="X1")
+        # not TBLASTN
+        attempt_read_and_call(uhandle, consumer.gap_x_dropoff, start="X2")
+
+        # not BLASTN, TBLASTX
+        attempt_read_and_call(uhandle, consumer.gap_x_dropoff_final, start="X3")
+
+        # not TBLASTN
+        attempt_read_and_call(uhandle, consumer.gap_trigger, start="S1")
+        # not in blastx 2.2.1
+        # first we make sure we have additional lines to work with, if
+        # not then the file is done and we don't have a final S2
+        if not is_blank_line(uhandle.peekline(), allow_spaces=1):
+            read_and_call(uhandle, consumer.blast_cutoff, start="S2")
+
+        consumer.end_parameters()
+
+
+class BlastParser(AbstractParser):
+    """Parses BLAST data into a Record.Blast object."""
+
+    def __init__(self):
+        """Initialize the class."""
+        self._scanner = _Scanner()
+        self._consumer = _BlastConsumer()
+
+    def parse(self, handle):
+        """Parse BLAST handle into a Record.Blast object."""
+        self._scanner.feed(handle, self._consumer)
+        return self._consumer.data
+
+
+class PSIBlastParser(AbstractParser):
+    """Parses BLAST data into a Record.PSIBlast object."""
+
+    def __init__(self):
+        """Initialize the class."""
+        self._scanner = _Scanner()
+        self._consumer = _PSIBlastConsumer()
+
+    def parse(self, handle):
+        """Parse BLAST handle into a Record.PSIBlast object."""
+        self._scanner.feed(handle, self._consumer)
+        return self._consumer.data
+
+
+class _HeaderConsumer:
+    def start_header(self):
+        self._header = Record.Header()
+
+    def version(self, line):
+        c = line.split()
+        self._header.application = c[0]
+        self._header.version = c[1]
+        if len(c) > 2:
+            # The date is missing in the new C++ output from blastx 2.2.22+
+            # Just get "BLASTX 2.2.22+\n" and that's all.
+            self._header.date = c[2][1:-1]
+
+    def reference(self, line):
+        if line.startswith("Reference: "):
+            self._header.reference = line[11:]
+        else:
+            self._header.reference += line
+
+    def query_info(self, line):
+        if line.startswith("Query= "):
+            self._header.query = line[7:].lstrip()
+        elif line.startswith("Length="):
+            # New style way to give the query length in BLAST 2.2.22+ (the C++ code)
+            self._header.query_letters = _safe_int(line[7:].strip())
+        elif not line.startswith("       "):  # continuation of query_info
+            self._header.query = "%s%s" % (self._header.query, line)
+        else:
+            # Hope it is the old style way to give the query length:
+            (letters,) = _re_search(
+                r"([0-9,]+) letters",
+                line,
+                "I could not find the number of letters in line\n%s" % line,
+            )
+            self._header.query_letters = _safe_int(letters)
+
+    def database_info(self, line):
+        line = line.rstrip()
+        if line.startswith("Database: "):
+            self._header.database = line[10:]
+        elif not line.endswith("total letters"):
+            if self._header.database:
+                # Need to include a space when merging multi line datase descr
+                self._header.database += " " + line.strip()
+            else:
+                self._header.database = line.strip()
+        else:
+            sequences, letters = _re_search(
+                r"([0-9,]+) sequences; ([0-9,-]+) total letters",
+                line,
+                "I could not find the sequences and letters in line\n%s" % line,
+            )
+            self._header.database_sequences = _safe_int(sequences)
+            self._header.database_letters = _safe_int(letters)
+
+    def end_header(self):
+        # Get rid of the trailing newlines
+        self._header.reference = self._header.reference.rstrip()
+        self._header.query = self._header.query.rstrip()
+
+
+class _DescriptionConsumer:
+    def start_descriptions(self):
+        self._descriptions = []
+        self._model_sequences = []
+        self._nonmodel_sequences = []
+        self._converged = 0
+        self._type = None
+        self._roundnum = None
+
+        self.__has_n = 0  # Does the description line contain an N value?
+
+    def description_header(self, line):
+        if line.startswith("Sequences producing"):
+            cols = line.split()
+            if cols[-1] == "N":
+                self.__has_n = 1
+
+    def description(self, line):
+        dh = self._parse(line)
+        if self._type == "model":
+            self._model_sequences.append(dh)
+        elif self._type == "nonmodel":
+            self._nonmodel_sequences.append(dh)
+        else:
+            self._descriptions.append(dh)
+
+    def model_sequences(self, line):
+        self._type = "model"
+
+    def nonmodel_sequences(self, line):
+        self._type = "nonmodel"
+
+    def converged(self, line):
+        self._converged = 1
+
+    def no_hits(self, line):
+        pass
+
+    def round(self, line):
+        if not line.startswith("Results from round"):
+            raise ValueError("I didn't understand the round line\n%s" % line)
+        self._roundnum = _safe_int(line[18:].strip())
+
+    def end_descriptions(self):
+        pass
+
+    def _parse(self, description_line):
+        line = description_line  # for convenience
+        dh = Record.Description()
+
+        # I need to separate the score and p-value from the title.
+        # sp|P21297|FLBT_CAUCR FLBT PROTEIN     [snip]         284  7e-77
+        # sp|P21297|FLBT_CAUCR FLBT PROTEIN     [snip]         284  7e-77  1
+        # special cases to handle:
+        #   - title must be preserved exactly (including whitespaces)
+        #   - score could be equal to e-value (not likely, but what if??)
+        #   - sometimes there's an "N" score of '1'.
+        cols = line.split()
+        if len(cols) < 3:
+            raise ValueError("Line does not appear to contain description:\n%s" % line)
+        if self.__has_n:
+            i = line.rfind(cols[-1])  # find start of N
+            i = line.rfind(cols[-2], 0, i)  # find start of p-value
+            i = line.rfind(cols[-3], 0, i)  # find start of score
+        else:
+            i = line.rfind(cols[-1])  # find start of p-value
+            i = line.rfind(cols[-2], 0, i)  # find start of score
+        if self.__has_n:
+            dh.title, dh.score, dh.e, dh.num_alignments = (
+                line[:i].rstrip(),
+                cols[-3],
+                cols[-2],
+                cols[-1],
+            )
+        else:
+            dh.title, dh.score, dh.e, dh.num_alignments = (
+                line[:i].rstrip(),
+                cols[-2],
+                cols[-1],
+                1,
+            )
+        dh.num_alignments = _safe_int(dh.num_alignments)
+        dh.score = _safe_int(dh.score)
+        dh.e = _safe_float(dh.e)
+        return dh
+
+
+class _AlignmentConsumer:
+    # This is a little bit tricky.  An alignment can either be a
+    # pairwise alignment or a multiple alignment.  Since it's difficult
+    # to know a-priori which one the blast record will contain, I'm going
+    # to make one class that can parse both of them.
+    def start_alignment(self):
+        self._alignment = Record.Alignment()
+        self._multiple_alignment = Record.MultipleAlignment()
+
+    def title(self, line):
+        if self._alignment.title:
+            self._alignment.title += " "
+        self._alignment.title += line.strip()
+
+    def length(self, line):
+        # e.g. "Length = 81" or more recently, "Length=428"
+        parts = line.replace(" ", "").split("=")
+        if len(parts) != 2:
+            raise ValueError("Unrecognised format length line: %r" % line)
+        self._alignment.length = parts[1]
+        self._alignment.length = _safe_int(self._alignment.length)
+
+    def multalign(self, line):
+        # Standalone version uses 'QUERY', while WWW version uses blast_tmp.
+        if line.startswith("QUERY") or line.startswith("blast_tmp"):
+            # If this is the first line of the multiple alignment,
+            # then I need to figure out how the line is formatted.
+
+            # Format of line is:
+            # QUERY 1   acttg...gccagaggtggtttattcagtctccataagagaggggacaaacg 60
+            try:
+                name, start, seq, end = line.split()
+            except ValueError:
+                raise ValueError("I do not understand the line\n%s" % line) from None
+            self._start_index = line.index(start, len(name))
+            self._seq_index = line.index(seq, self._start_index + len(start))
+            # subtract 1 for the space
+            self._name_length = self._start_index - 1
+            self._start_length = self._seq_index - self._start_index - 1
+            self._seq_length = line.rfind(end) - self._seq_index - 1
+
+            # self._seq_index = line.index(seq)
+            # # subtract 1 for the space
+            # self._seq_length = line.rfind(end) - self._seq_index - 1
+            # self._start_index = line.index(start)
+            # self._start_length = self._seq_index - self._start_index - 1
+            # self._name_length = self._start_index
+
+        # Extract the information from the line
+        name = line[: self._name_length]
+        name = name.rstrip()
+        start = line[self._start_index : self._start_index + self._start_length]
+        start = start.rstrip()
+        if start:
+            start = _safe_int(start)
+        end = line[self._seq_index + self._seq_length :].rstrip()
+        if end:
+            end = _safe_int(end)
+        seq = line[self._seq_index : self._seq_index + self._seq_length].rstrip()
+        # right pad the sequence with spaces if necessary
+        if len(seq) < self._seq_length:
+            seq += " " * (self._seq_length - len(seq))
+
+        # I need to make sure the sequence is aligned correctly with the query.
+        # First, I will find the length of the query.  Then, if necessary,
+        # I will pad my current sequence with spaces so that they will line
+        # up correctly.
+
+        # Two possible things can happen:
+        # QUERY
+        # 504
+        #
+        # QUERY
+        # 403
+        #
+        # Sequence 504 will need padding at the end.  Since I won't know
+        # this until the end of the alignment, this will be handled in
+        # end_alignment.
+        # Sequence 403 will need padding before being added to the alignment.
+
+        align = self._multiple_alignment.alignment  # for convenience
+        align.append((name, start, seq, end))
+
+        # This is old code that tried to line up all the sequences
+        # in a multiple alignment by using the sequence title's as
+        # identifiers.  The problem with this is that BLAST assigns
+        # different HSP's from the same sequence the same id.  Thus,
+        # in one alignment block, there may be multiple sequences with
+        # the same id.  I'm not sure how to handle this, so I'm not
+        # going to.
+
+        # # If the sequence is the query, then just add it.
+        # if name == 'QUERY':
+        #     if len(align) == 0:
+        #         align.append((name, start, seq))
+        #     else:
+        #         aname, astart, aseq = align[0]
+        #         if name != aname:
+        #             raise ValueError, "Query is not the first sequence"
+        #         aseq = aseq + seq
+        #         align[0] = aname, astart, aseq
+        # else:
+        #     if len(align) == 0:
+        #         raise ValueError, "I could not find the query sequence"
+        #     qname, qstart, qseq = align[0]
+        #
+        #     # Now find my sequence in the multiple alignment.
+        #     for i in range(1, len(align)):
+        #         aname, astart, aseq = align[i]
+        #         if name == aname:
+        #             index = i
+        #             break
+        #     else:
+        #         # If I couldn't find it, then add a new one.
+        #         align.append((None, None, None))
+        #         index = len(align)-1
+        #         # Make sure to left-pad it.
+        #         aname, astart, aseq = name, start, ' '*(len(qseq)-len(seq))
+        #
+        #     if len(qseq) != len(aseq) + len(seq):
+        #         # If my sequences are shorter than the query sequence,
+        #         # then I will need to pad some spaces to make them line up.
+        #         # Since I've already right padded seq, that means aseq
+        #         # must be too short.
+        #         aseq = aseq + ' '*(len(qseq)-len(aseq)-len(seq))
+        #     aseq = aseq + seq
+        #     if astart is None:
+        #         astart = start
+        #     align[index] = aname, astart, aseq
+
+    def end_alignment(self):
+        # Remove trailing newlines
+        if self._alignment:
+            self._alignment.title = self._alignment.title.rstrip()
+
+        # This code is also obsolete.  See note above.
+        # If there's a multiple alignment, I will need to make sure
+        # all the sequences are aligned.  That is, I may need to
+        # right-pad the sequences.
+        # if self._multiple_alignment is not None:
+        #     align = self._multiple_alignment.alignment
+        #     seqlen = None
+        #     for i in range(len(align)):
+        #         name, start, seq = align[i]
+        #         if seqlen is None:
+        #             seqlen = len(seq)
+        #         else:
+        #             if len(seq) < seqlen:
+        #                 seq = seq + ' '*(seqlen - len(seq))
+        #                 align[i] = name, start, seq
+        #             elif len(seq) > seqlen:
+        #                 raise ValueError, \
+        #                       "Sequence %s is longer than the query" % name
+
+        # Clean up some variables, if they exist.
+        try:
+            del self._seq_index
+            del self._seq_length
+            del self._start_index
+            del self._start_length
+            del self._name_length
+        except AttributeError:
+            pass
+
+
+class _HSPConsumer:
+    def start_hsp(self):
+        self._hsp = Record.HSP()
+
+    def score(self, line):
+        self._hsp.bits, self._hsp.score = _re_search(
+            r"Score =\s*([0-9.e+]+) bits \(([0-9]+)\)",
+            line,
+            "I could not find the score in line\n%s" % line,
+        )
+        self._hsp.score = _safe_float(self._hsp.score)
+        self._hsp.bits = _safe_float(self._hsp.bits)
+
+        x, y = _re_search(
+            r"Expect\(?(\d*)\)? = +([0-9.e\-|\+]+)",
+            line,
+            "I could not find the expect in line\n%s" % line,
+        )
+        if x:
+            self._hsp.num_alignments = _safe_int(x)
+        else:
+            self._hsp.num_alignments = 1
+        self._hsp.expect = _safe_float(y)
+
+    def identities(self, line):
+        x, y = _re_search(
+            r"Identities = (\d+)\/(\d+)",
+            line,
+            "I could not find the identities in line\n%s" % line,
+        )
+        self._hsp.identities = _safe_int(x), _safe_int(y)
+        self._hsp.align_length = _safe_int(y)
+
+        if "Positives" in line:
+            x, y = _re_search(
+                r"Positives = (\d+)\/(\d+)",
+                line,
+                "I could not find the positives in line\n%s" % line,
+            )
+            self._hsp.positives = _safe_int(x), _safe_int(y)
+            assert self._hsp.align_length == _safe_int(y)
+
+        if "Gaps" in line:
+            x, y = _re_search(
+                r"Gaps = (\d+)\/(\d+)",
+                line,
+                "I could not find the gaps in line\n%s" % line,
+            )
+            self._hsp.gaps = _safe_int(x), _safe_int(y)
+            assert self._hsp.align_length == _safe_int(y)
+
+    def strand(self, line):
+        self._hsp.strand = _re_search(
+            r"Strand\s?=\s?(\w+)\s?/\s?(\w+)",
+            line,
+            "I could not find the strand in line\n%s" % line,
+        )
+
+    def frame(self, line):
+        # Frame can be in formats:
+        # Frame = +1
+        # Frame = +2 / +2
+        if "/" in line:
+            self._hsp.frame = _re_search(
+                r"Frame\s?=\s?([-+][123])\s?/\s?([-+][123])",
+                line,
+                "I could not find the frame in line\n%s" % line,
+            )
+        else:
+            self._hsp.frame = _re_search(
+                r"Frame = ([-+][123])",
+                line,
+                "I could not find the frame in line\n%s" % line,
+            )
+
+    # Match a space, if one is available.  Masahir Ishikawa found a
+    # case where there's no space between the start and the sequence:
+    # Query: 100tt 101
+    # line below modified by Yair Benita, Sep 2004
+    # Note that the colon is not always present. 2006
+    _query_re = re.compile(r"Query(:?) \s*(\d+)\s*(.+) (\d+)")
+
+    def query(self, line):
+        m = self._query_re.search(line)
+        if m is None:
+            if (
+                line.strip()
+                == "Query        ------------------------------------------------------------"
+            ):
+                # Special case - long gap relative to the subject,
+                # note there is no start/end present, cannot update those
+                self._hsp.query += "-" * 60
+                self._query_len = 60  # number of dashes
+                self._query_start_index = 13  # offset of first dash
+                return
+            raise ValueError("I could not find the query in line\n%s" % line)
+
+        # line below modified by Yair Benita, Sep 2004.
+        # added the end attribute for the query
+        colon, start, seq, end = m.groups()
+        seq = seq.strip()
+        self._hsp.query += seq
+        if self._hsp.query_start is None:
+            self._hsp.query_start = _safe_int(start)
+
+        # line below added by Yair Benita, Sep 2004.
+        # added the end attribute for the query
+        self._hsp.query_end = _safe_int(end)
+
+        # Get index for sequence start (regular expression element 3)
+        self._query_start_index = m.start(3)
+        self._query_len = len(seq)
+
+    def align(self, line):
+        seq = line[self._query_start_index :].rstrip()
+        if len(seq) < self._query_len:
+            # Make sure the alignment is the same length as the query
+            seq += " " * (self._query_len - len(seq))
+            if len(seq) < self._query_len:
+                raise ValueError("Match is longer than the query in line\n%s" % line)
+        self._hsp.match += seq
+
+    # To match how we do the query, cache the regular expression.
+    # Note that the colon is not always present.
+    _sbjct_re = re.compile(r"Sbjct(:?) \s*(\d+)\s*(.+) (\d+)")
+
+    def sbjct(self, line):
+        m = self._sbjct_re.search(line)
+        if m is None:
+            raise ValueError("I could not find the sbjct in line\n%s" % line)
+        colon, start, seq, end = m.groups()
+        # mikep 26/9/00
+        # On occasion, there is a blast hit with no subject match
+        # so far, it only occurs with 1-line short "matches"
+        # I have decided to let these pass as they appear
+        if not seq.strip():
+            seq = " " * self._query_len
+        else:
+            seq = seq.strip()
+        self._hsp.sbjct += seq
+        if self._hsp.sbjct_start is None:
+            self._hsp.sbjct_start = _safe_int(start)
+
+        self._hsp.sbjct_end = _safe_int(end)
+        if len(seq) != self._query_len:
+            raise ValueError(
+                "QUERY and SBJCT sequence lengths don't match (%i %r vs %i) in line\n%s"
+                % (self._query_len, self._hsp.query, len(seq), line)
+            )
+
+        del self._query_start_index  # clean up unused variables
+        del self._query_len
+
+    def end_hsp(self):
+        pass
+
+
+class _DatabaseReportConsumer:
+    def start_database_report(self):
+        self._dr = Record.DatabaseReport()
+
+    def database(self, line):
+        m = re.search(r"Database: (.+)$", line)
+        if m:
+            self._dr.database_name.append(m.group(1))
+        elif self._dr.database_name:
+            # This must be a continuation of the previous name.
+            self._dr.database_name[-1] = "%s%s" % (
+                self._dr.database_name[-1],
+                line.strip(),
+            )
+
+    def posted_date(self, line):
+        self._dr.posted_date.append(
+            _re_search(
+                r"Posted date:\s*(.+)$",
+                line,
+                "I could not find the posted date in line\n%s" % line,
+            )
+        )
+
+    def num_letters_in_database(self, line):
+        (letters,) = _get_cols(
+            line, (-1,), ncols=6, expected={2: "letters", 4: "database:"}
+        )
+        self._dr.num_letters_in_database.append(_safe_int(letters))
+
+    def num_sequences_in_database(self, line):
+        (sequences,) = _get_cols(
+            line, (-1,), ncols=6, expected={2: "sequences", 4: "database:"}
+        )
+        self._dr.num_sequences_in_database.append(_safe_int(sequences))
+
+    def ka_params(self, line):
+        self._dr.ka_params = [_safe_float(x) for x in line.split()]
+
+    def gapped(self, line):
+        self._dr.gapped = 1
+
+    def ka_params_gap(self, line):
+        self._dr.ka_params_gap = [_safe_float(x) for x in line.split()]
+
+    def end_database_report(self):
+        pass
+
+
+class _ParametersConsumer:
+    def start_parameters(self):
+        self._params = Record.Parameters()
+
+    def matrix(self, line):
+        self._params.matrix = line[8:].rstrip()
+
+    def gap_penalties(self, line):
+        self._params.gap_penalties = [
+            _safe_float(x)
+            for x in _get_cols(
+                line, (3, 5), ncols=6, expected={2: "Existence:", 4: "Extension:"}
+            )
+        ]
+
+    def num_hits(self, line):
+        if "1st pass" in line:
+            (x,) = _get_cols(line, (-4,), ncols=11, expected={2: "Hits"})
+            self._params.num_hits = _safe_int(x)
+        else:
+            (x,) = _get_cols(line, (-1,), ncols=6, expected={2: "Hits"})
+            self._params.num_hits = _safe_int(x)
+
+    def num_sequences(self, line):
+        if "1st pass" in line:
+            (x,) = _get_cols(line, (-4,), ncols=9, expected={2: "Sequences:"})
+            self._params.num_sequences = _safe_int(x)
+        else:
+            (x,) = _get_cols(line, (-1,), ncols=4, expected={2: "Sequences:"})
+            self._params.num_sequences = _safe_int(x)
+
+    def num_extends(self, line):
+        if "1st pass" in line:
+            (x,) = _get_cols(line, (-4,), ncols=9, expected={2: "extensions:"})
+            self._params.num_extends = _safe_int(x)
+        else:
+            (x,) = _get_cols(line, (-1,), ncols=4, expected={2: "extensions:"})
+            self._params.num_extends = _safe_int(x)
+
+    def num_good_extends(self, line):
+        if "1st pass" in line:
+            (x,) = _get_cols(line, (-4,), ncols=10, expected={3: "extensions:"})
+            self._params.num_good_extends = _safe_int(x)
+        else:
+            (x,) = _get_cols(line, (-1,), ncols=5, expected={3: "extensions:"})
+            self._params.num_good_extends = _safe_int(x)
+
+    def num_seqs_better_e(self, line):
+        (self._params.num_seqs_better_e,) = _get_cols(
+            line, (-1,), ncols=7, expected={2: "sequences"}
+        )
+        self._params.num_seqs_better_e = _safe_int(self._params.num_seqs_better_e)
+
+    def hsps_no_gap(self, line):
+        (self._params.hsps_no_gap,) = _get_cols(
+            line, (-1,), ncols=9, expected={3: "better", 7: "gapping:"}
+        )
+        self._params.hsps_no_gap = _safe_int(self._params.hsps_no_gap)
+
+    def hsps_prelim_gapped(self, line):
+        (self._params.hsps_prelim_gapped,) = _get_cols(
+            line, (-1,), ncols=9, expected={4: "gapped", 6: "prelim"}
+        )
+        self._params.hsps_prelim_gapped = _safe_int(self._params.hsps_prelim_gapped)
+
+    def hsps_prelim_gapped_attempted(self, line):
+        (self._params.hsps_prelim_gapped_attempted,) = _get_cols(
+            line, (-1,), ncols=10, expected={4: "attempted", 7: "prelim"}
+        )
+        self._params.hsps_prelim_gapped_attempted = _safe_int(
+            self._params.hsps_prelim_gapped_attempted
+        )
+
+    def hsps_gapped(self, line):
+        (self._params.hsps_gapped,) = _get_cols(
+            line, (-1,), ncols=6, expected={3: "gapped"}
+        )
+        self._params.hsps_gapped = _safe_int(self._params.hsps_gapped)
+
+    def query_length(self, line):
+        (self._params.query_length,) = _get_cols(
+            line.lower(), (-1,), ncols=4, expected={0: "length", 2: "query:"}
+        )
+        self._params.query_length = _safe_int(self._params.query_length)
+
+    def database_length(self, line):
+        (self._params.database_length,) = _get_cols(
+            line.lower(), (-1,), ncols=4, expected={0: "length", 2: "database:"}
+        )
+        self._params.database_length = _safe_int(self._params.database_length)
+
+    def effective_hsp_length(self, line):
+        (self._params.effective_hsp_length,) = _get_cols(
+            line, (-1,), ncols=4, expected={1: "HSP", 2: "length:"}
+        )
+        self._params.effective_hsp_length = _safe_int(self._params.effective_hsp_length)
+
+    def effective_query_length(self, line):
+        (self._params.effective_query_length,) = _get_cols(
+            line, (-1,), ncols=5, expected={1: "length", 3: "query:"}
+        )
+        self._params.effective_query_length = _safe_int(
+            self._params.effective_query_length
+        )
+
+    def effective_database_length(self, line):
+        (self._params.effective_database_length,) = _get_cols(
+            line.lower(), (-1,), ncols=5, expected={1: "length", 3: "database:"}
+        )
+        self._params.effective_database_length = _safe_int(
+            self._params.effective_database_length
+        )
+
+    def effective_search_space(self, line):
+        (self._params.effective_search_space,) = _get_cols(
+            line, (-1,), ncols=4, expected={1: "search"}
+        )
+        self._params.effective_search_space = _safe_int(
+            self._params.effective_search_space
+        )
+
+    def effective_search_space_used(self, line):
+        (self._params.effective_search_space_used,) = _get_cols(
+            line, (-1,), ncols=5, expected={1: "search", 3: "used:"}
+        )
+        self._params.effective_search_space_used = _safe_int(
+            self._params.effective_search_space_used
+        )
+
+    def frameshift(self, line):
+        self._params.frameshift = _get_cols(
+            line, (4, 5), ncols=6, expected={0: "frameshift", 2: "decay"}
+        )
+
+    def threshold(self, line):
+        if line[:2] == "T:":
+            # Assume its an old style line like "T: 123"
+            (self._params.threshold,) = _get_cols(
+                line, (1,), ncols=2, expected={0: "T:"}
+            )
+        elif line[:28] == "Neighboring words threshold:":
+            (self._params.threshold,) = _get_cols(
+                line,
+                (3,),
+                ncols=4,
+                expected={0: "Neighboring", 1: "words", 2: "threshold:"},
+            )
+        else:
+            raise ValueError("Unrecognised threshold line:\n%s" % line)
+        self._params.threshold = _safe_int(self._params.threshold)
+
+    def window_size(self, line):
+        if line[:2] == "A:":
+            (self._params.window_size,) = _get_cols(
+                line, (1,), ncols=2, expected={0: "A:"}
+            )
+        elif line[:25] == "Window for multiple hits:":
+            (self._params.window_size,) = _get_cols(
+                line, (4,), ncols=5, expected={0: "Window", 2: "multiple", 3: "hits:"}
+            )
+        else:
+            raise ValueError("Unrecognised window size line:\n%s" % line)
+        self._params.window_size = _safe_int(self._params.window_size)
+
+    def dropoff_1st_pass(self, line):
+        score, bits = _re_search(
+            r"X1: (\d+) \(\s*([0-9,.]+) bits\)",
+            line,
+            "I could not find the dropoff in line\n%s" % line,
+        )
+        self._params.dropoff_1st_pass = _safe_int(score), _safe_float(bits)
+
+    def gap_x_dropoff(self, line):
+        score, bits = _re_search(
+            r"X2: (\d+) \(\s*([0-9,.]+) bits\)",
+            line,
+            "I could not find the gap dropoff in line\n%s" % line,
+        )
+        self._params.gap_x_dropoff = _safe_int(score), _safe_float(bits)
+
+    def gap_x_dropoff_final(self, line):
+        score, bits = _re_search(
+            r"X3: (\d+) \(\s*([0-9,.]+) bits\)",
+            line,
+            "I could not find the gap dropoff final in line\n%s" % line,
+        )
+        self._params.gap_x_dropoff_final = _safe_int(score), _safe_float(bits)
+
+    def gap_trigger(self, line):
+        score, bits = _re_search(
+            r"S1: (\d+) \(\s*([0-9,.]+) bits\)",
+            line,
+            "I could not find the gap trigger in line\n%s" % line,
+        )
+        self._params.gap_trigger = _safe_int(score), _safe_float(bits)
+
+    def blast_cutoff(self, line):
+        score, bits = _re_search(
+            r"S2: (\d+) \(\s*([0-9,.]+) bits\)",
+            line,
+            "I could not find the blast cutoff in line\n%s" % line,
+        )
+        self._params.blast_cutoff = _safe_int(score), _safe_float(bits)
+
+    def end_parameters(self):
+        pass
+
+
+class _BlastConsumer(
+    AbstractConsumer,
+    _HeaderConsumer,
+    _DescriptionConsumer,
+    _AlignmentConsumer,
+    _HSPConsumer,
+    _DatabaseReportConsumer,
+    _ParametersConsumer,
+):
+    # This Consumer is inherits from many other consumer classes that handle
+    # the actual dirty work.  An alternate way to do it is to create objects
+    # of those classes and then delegate the parsing tasks to them in a
+    # decorator-type pattern.  The disadvantage of that is that the method
+    # names will need to be resolved in this classes.  However, using
+    # a decorator will retain more control in this class (which may or
+    # may not be a bad thing).  In addition, having each sub-consumer as
+    # its own object prevents this object's dictionary from being cluttered
+    # with members and reduces the chance of member collisions.
+    def __init__(self):
+        self.data = None
+
+    def round(self, line):
+        # Make sure nobody's trying to pass me PSI-BLAST data!
+        raise ValueError("This consumer doesn't handle PSI-BLAST data")
+
+    def start_header(self):
+        self.data = Record.Blast()
+        _HeaderConsumer.start_header(self)
+
+    def end_header(self):
+        _HeaderConsumer.end_header(self)
+        self.data.__dict__.update(self._header.__dict__)
+
+    def end_descriptions(self):
+        self.data.descriptions = self._descriptions
+
+    def end_alignment(self):
+        _AlignmentConsumer.end_alignment(self)
+        if self._alignment.hsps:
+            self.data.alignments.append(self._alignment)
+        if self._multiple_alignment.alignment:
+            self.data.multiple_alignment = self._multiple_alignment
+
+    def end_hsp(self):
+        _HSPConsumer.end_hsp(self)
+        try:
+            self._alignment.hsps.append(self._hsp)
+        except AttributeError:
+            raise ValueError("Found an HSP before an alignment") from None
+
+    def end_database_report(self):
+        _DatabaseReportConsumer.end_database_report(self)
+        self.data.__dict__.update(self._dr.__dict__)
+
+    def end_parameters(self):
+        _ParametersConsumer.end_parameters(self)
+        self.data.__dict__.update(self._params.__dict__)
+
+
+class _PSIBlastConsumer(
+    AbstractConsumer,
+    _HeaderConsumer,
+    _DescriptionConsumer,
+    _AlignmentConsumer,
+    _HSPConsumer,
+    _DatabaseReportConsumer,
+    _ParametersConsumer,
+):
+    def __init__(self):
+        self.data = None
+
+    def start_header(self):
+        self.data = Record.PSIBlast()
+        _HeaderConsumer.start_header(self)
+
+    def end_header(self):
+        _HeaderConsumer.end_header(self)
+        self.data.__dict__.update(self._header.__dict__)
+
+    def start_descriptions(self):
+        self._round = Record.Round()
+        self.data.rounds.append(self._round)
+        _DescriptionConsumer.start_descriptions(self)
+
+    def end_descriptions(self):
+        _DescriptionConsumer.end_descriptions(self)
+        self._round.number = self._roundnum
+        if self._descriptions:
+            self._round.new_seqs.extend(self._descriptions)
+        self._round.reused_seqs.extend(self._model_sequences)
+        self._round.new_seqs.extend(self._nonmodel_sequences)
+        if self._converged:
+            self.data.converged = 1
+
+    def end_alignment(self):
+        _AlignmentConsumer.end_alignment(self)
+        if self._alignment.hsps:
+            self._round.alignments.append(self._alignment)
+        if self._multiple_alignment:
+            self._round.multiple_alignment = self._multiple_alignment
+
+    def end_hsp(self):
+        _HSPConsumer.end_hsp(self)
+        try:
+            self._alignment.hsps.append(self._hsp)
+        except AttributeError:
+            raise ValueError("Found an HSP before an alignment") from None
+
+    def end_database_report(self):
+        _DatabaseReportConsumer.end_database_report(self)
+        self.data.__dict__.update(self._dr.__dict__)
+
+    def end_parameters(self):
+        _ParametersConsumer.end_parameters(self)
+        self.data.__dict__.update(self._params.__dict__)
+
+
+class Iterator:
+    """Iterates over a file of multiple BLAST results.
+
+    Methods:
+    next   Return the next record from the stream, or None.
+
+    """
+
+    def __init__(self, handle, parser=None):
+        """Initialize a new iterator.
+
+        Arguments:
+         - handle is a file-like object.
+         - parser is an optional Parser object to change the results
+           into another form.  If set to None, then the raw contents
+           of the file will be returned.
+        """
+        try:
+            handle.readline
+        except AttributeError:
+            raise ValueError(
+                "I expected a file handle or file-like object, got %s" % type(handle)
+            ) from None
+        self._uhandle = UndoHandle(handle)
+        self._parser = parser
+        self._header = []
+
+    def __next__(self):
+        """Return the next Blast record from the file.
+
+        If no more records, return None.
+        """
+        lines = []
+        query = False
+        while True:
+            line = self._uhandle.readline()
+            if not line:
+                break
+            # If I've reached the next one, then put the line back and stop.
+            if lines and (
+                line.startswith("BLAST")
+                or line.startswith("BLAST", 1)
+                or line.startswith("<?xml ")
+            ):
+                self._uhandle.saveline(line)
+                break
+            # New style files omit the BLAST line to mark a new query:
+            if line.startswith("Query="):
+                if not query:
+                    if not self._header:
+                        self._header = lines[:]
+                    query = True
+                else:
+                    # Start of another record
+                    self._uhandle.saveline(line)
+                    break
+            lines.append(line)
+
+        if query and "BLAST" not in lines[0]:
+            # Cheat and re-insert the header
+            # print("-"*50)
+            # print("".join(self._header))
+            # print("-"*50)
+            # print("".join(lines))
+            # print("-"*50)
+            lines = self._header + lines
+
+        if not lines:
+            return None
+
+        data = "".join(lines)
+        if self._parser is not None:
+            return self._parser.parse(StringIO(data))
+        return data
+
+    def __iter__(self):
+        return iter(self.__next__, None)
+
+
+def _re_search(regex, line, error_msg):
+    m = re.search(regex, line)
+    if not m:
+        raise ValueError(error_msg)
+    return m.groups()
+
+
+def _get_cols(line, cols_to_get, ncols=None, expected=None):
+    if expected is None:
+        expected = {}
+    cols = line.split()
+
+    # Check to make sure number of columns is correct
+    if ncols is not None and len(cols) != ncols:
+        raise ValueError(
+            "I expected %d columns (got %d) in line\n%s" % (ncols, len(cols), line)
+        )
+
+    # Check to make sure columns contain the correct data
+    for k in expected:
+        if cols[k] != expected[k]:
+            raise ValueError(
+                "I expected '%s' in column %d in line\n%s" % (expected[k], k, line)
+            )
+
+    # Construct the answer tuple
+    results = []
+    for c in cols_to_get:
+        results.append(cols[c])
+    return tuple(results)
+
+
+def _safe_int(str):
+    try:
+        return int(str)
+    except ValueError:
+        # Something went wrong.  Try to clean up the string.
+        # Remove all commas from the string
+        str = str.replace(",", "")
+    # try again after removing commas.
+    # Note int() will return a long rather than overflow
+    try:
+        return int(str)
+    except ValueError:
+        pass
+    # Call float to handle things like "54.3", note could lose precision, e.g.
+    # >>> int("5399354557888517312")
+    # 5399354557888517312
+    # >>> int(float("5399354557888517312"))
+    # 5399354557888517120
+    return int(float(str))
+
+
+def _safe_float(str):
+    # Thomas Rosleff Soerensen (rosleff@mpiz-koeln.mpg.de) noted that
+    # float('e-172') does not produce an error on his platform.  Thus,
+    # we need to check the string for this condition.
+
+    # Sometimes BLAST leaves of the '1' in front of an exponent.
+    if str and str[0] in ["E", "e"]:
+        str = "1" + str
+    try:
+        return float(str)
+    except ValueError:
+        # Remove all commas from the string
+        str = str.replace(",", "")
+    # try again.
+    return float(str)
+
+
+class _BlastErrorConsumer(_BlastConsumer):
+    def __init__(self):
+        _BlastConsumer.__init__(self)
+
+    def noevent(self, line):
+        if "Query must be at least wordsize" in line:
+            raise ShortQueryBlastError("Query must be at least wordsize")
+        # Now pass the line back up to the superclass.
+        method = getattr(
+            _BlastConsumer, "noevent", _BlastConsumer.__getattr__(self, "noevent")
+        )
+        method(line)
+
+
+class BlastErrorParser(AbstractParser):
+    """Attempt to catch and diagnose BLAST errors while parsing.
+
+    This utilizes the BlastParser module but adds an additional layer
+    of complexity on top of it by attempting to diagnose ValueErrors
+    that may actually indicate problems during BLAST parsing.
+
+    Current BLAST problems this detects are:
+    - LowQualityBlastError - When BLASTing really low quality sequences
+    (ie. some GenBank entries which are just short stretches of a single
+    nucleotide), BLAST will report an error with the sequence and be
+    unable to search with this. This will lead to a badly formatted
+    BLAST report that the parsers choke on. The parser will convert the
+    ValueError to a LowQualityBlastError and attempt to provide useful
+    information.
+    """
+
+    def __init__(self, bad_report_handle=None):
+        """Initialize a parser that tries to catch BlastErrors.
+
+        Arguments:
+        - bad_report_handle - An optional argument specifying a handle
+        where bad reports should be sent. This would allow you to save
+        all of the bad reports to a file, for instance. If no handle
+        is specified, the bad reports will not be saved.
+        """
+        self._bad_report_handle = bad_report_handle
+
+        # self._b_parser = BlastParser()
+        self._scanner = _Scanner()
+        self._consumer = _BlastErrorConsumer()
+
+    def parse(self, handle):
+        """Parse a handle, attempting to diagnose errors."""
+        results = handle.read()
+
+        try:
+            self._scanner.feed(StringIO(results), self._consumer)
+        except ValueError:
+            # if we have a bad_report_file, save the info to it first
+            if self._bad_report_handle:
+                # send the info to the error handle
+                self._bad_report_handle.write(results)
+
+            # now we want to try and diagnose the error
+            self._diagnose_error(StringIO(results), self._consumer.data)
+
+            # if we got here we can't figure out the problem
+            # so we should pass along the syntax error we got
+            raise
+        return self._consumer.data
+
+    def _diagnose_error(self, handle, data_record):
+        """Attempt to diagnose an error in the passed handle (PRIVATE).
+
+        Arguments:
+        - handle - The handle potentially containing the error
+        - data_record - The data record partially created by the consumer.
+        """
+        line = handle.readline()
+
+        while line:
+            # 'Searchingdone' instead of 'Searching......done' seems
+            # to indicate a failure to perform the BLAST due to
+            # low quality sequence
+            if line.startswith("Searchingdone"):
+                raise LowQualityBlastError(
+                    "Blast failure occurred on query: ", data_record.query
+                )
+            line = handle.readline()
diff --git a/code/lib/Bio/SearchIO/_legacy/ParserSupport.py b/code/lib/Bio/SearchIO/_legacy/ParserSupport.py
new file mode 100644
index 0000000..cea7499
--- /dev/null
+++ b/code/lib/Bio/SearchIO/_legacy/ParserSupport.py
@@ -0,0 +1,380 @@
+# Copyright 1999 by Jeffrey Chang.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Code to support writing parsers (DEPRECATED).
+
+Classes:
+ - UndoHandle             File object decorator with support for undo-like operations.
+ - AbstractParser         Base class for parsers.
+ - AbstractConsumer       Base class of all Consumers.
+ - TaggingConsumer        Consumer that tags output with its event.  For debugging
+
+Functions:
+ - safe_readline          Read a line from a handle, with check for EOF.
+ - safe_peekline          Peek at next line, with check for EOF.
+ - read_and_call          Read a line from a handle and pass it to a method.
+ - read_and_call_while    Read many lines, as long as a condition is met.
+ - read_and_call_until    Read many lines, until a condition is met.
+ - attempt_read_and_call  Like read_and_call, but forgiving of errors.
+ - is_blank_line          Test whether a line is blank.
+
+"""
+
+import sys
+from io import StringIO
+
+from abc import ABC, abstractmethod
+
+
+class UndoHandle:
+    """A Python handle that adds functionality for saving lines.
+
+    Saves lines in a LIFO fashion.
+    """
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        self._handle = handle
+        self._saved = []
+        try:
+            # If wrapping an online handle, this this is nice to have:
+            self.url = handle.url
+        except AttributeError:
+            pass
+
+    def __iter__(self):
+        """Iterate over the lines in the File."""
+        return self
+
+    def __next__(self):
+        """Return the next line."""
+        next = self.readline()
+        if not next:
+            raise StopIteration
+        return next
+
+    def readlines(self, *args, **keywds):
+        """Read all the lines from the file as a list of strings."""
+        lines = self._saved + self._handle.readlines(*args, **keywds)
+        self._saved = []
+        return lines
+
+    def readline(self, *args, **keywds):
+        """Read the next line from the file as string."""
+        if self._saved:
+            line = self._saved.pop(0)
+        else:
+            line = self._handle.readline(*args, **keywds)
+        return line
+
+    def read(self, size=-1):
+        """Read the File."""
+        if size == -1:
+            saved = "".join(self._saved)
+            self._saved[:] = []
+        else:
+            saved = ""
+            while size > 0 and self._saved:
+                if len(self._saved[0]) <= size:
+                    size = size - len(self._saved[0])
+                    saved = saved + self._saved.pop(0)
+                else:
+                    saved = saved + self._saved[0][:size]
+                    self._saved[0] = self._saved[0][size:]
+                    size = 0
+        return saved + self._handle.read(size)
+
+    def saveline(self, line):
+        """Store a line in the cache memory for later use.
+
+        This acts to undo a readline, reflecting the name of the class: UndoHandle.
+        """
+        if line:
+            self._saved = [line] + self._saved
+
+    def peekline(self):
+        """Return the next line in the file, but do not move forward though the file."""
+        if self._saved:
+            line = self._saved[0]
+        else:
+            line = self._handle.readline()
+            self.saveline(line)
+        return line
+
+    def tell(self):
+        """Return the current position of the file read/write pointer within the File."""
+        return self._handle.tell() - sum(len(line) for line in self._saved)
+
+    def seek(self, *args):
+        """Set the current position at the offset specified."""
+        self._saved = []
+        self._handle.seek(*args)
+
+    def __getattr__(self, attr):
+        """Return File attribute."""
+        return getattr(self._handle, attr)
+
+    def __enter__(self):
+        """Call special method when opening the file using a with-statement."""
+        return self
+
+    def __exit__(self, type, value, traceback):
+        """Call special method when closing the file using a with-statement."""
+        self._handle.close()
+
+
+class AbstractParser(ABC):
+    """Abstract base class for other parsers."""
+
+    @abstractmethod
+    def parse(self, handle):
+        """Provision for parsing a file handle."""
+        raise NotImplementedError
+
+    def parse_str(self, string):
+        """Make string a handle, so it can be taken by parse."""
+        return self.parse(StringIO(string))
+
+    def parse_file(self, filename):
+        """Parse a file, open the file as handle so it can be taken by parse."""
+        with open(filename) as h:
+            retval = self.parse(h)
+        return retval
+
+
+class AbstractConsumer:
+    """Base class for other Consumers.
+
+    Derive Consumers from this class and implement appropriate
+    methods for each event that you want to receive.
+
+    """
+
+    # Optionally implement in the sub-class
+    def _unhandled_section(self):
+        pass
+
+    # Optionally implement in the sub-class
+    def _unhandled(self, data):
+        pass
+
+    def __getattr__(self, attr):
+        if attr[:6] == "start_" or attr[:4] == "end_":
+            method = self._unhandled_section
+        else:
+            method = self._unhandled
+        return method
+
+
+class TaggingConsumer(AbstractConsumer):
+    """Debugging consumer which tags data with the event and logs it.
+
+    This is a Consumer that tags the data stream with the event and
+    prints it to a handle.  Useful for debugging.
+
+    """
+
+    def __init__(self, handle=None, colwidth=15, maxwidth=80):
+        """Initialize.
+
+        Arguments:
+         - handle to log to, defaults to ``sys.stdout``
+         - colwidth for logging to the handle
+         - maxwidth for truncation when logging
+
+        """
+        # I can't assign sys.stdout to handle in the argument list.
+        # If I do that, handle will be assigned the value of sys.stdout
+        # the first time this function is called.  This will fail if
+        # the user has assigned sys.stdout to some other file, which may
+        # be closed or invalid at a later time.
+        if handle is None:
+            handle = sys.stdout
+        self._handle = handle
+        self._colwidth = colwidth
+        self._maxwidth = maxwidth
+
+    def unhandled_section(self):
+        """Tag an unhandled section."""
+        self._print_name("unhandled_section")
+
+    def unhandled(self, data):
+        """Tag unhandled data."""
+        self._print_name("unhandled", data)
+
+    def _print_name(self, name, data=None):
+        if data is None:
+            # Write the name of a section.
+            self._handle.write("%s %s\n" % ("*" * self._colwidth, name))
+        else:
+            # Write the tag and line.
+            self._handle.write(
+                "%-*s: %s\n"
+                % (
+                    self._colwidth,
+                    name[: self._colwidth],
+                    data[: self._maxwidth - self._colwidth - 2].rstrip(),
+                )
+            )
+
+    def __getattr__(self, attr):
+        if attr[:6] == "start_" or attr[:4] == "end_":
+            method = lambda a=attr, s=self: s._print_name(a)  # noqa: E731
+        else:
+            method = lambda x, a=attr, s=self: s._print_name(a, x)  # noqa: E731
+        return method
+
+
+def read_and_call(uhandle, method, **keywds):
+    """Read line and pass it to the method.
+
+    Read a line from uhandle, check it, and pass it to the method.
+    Raises a ValueError if the line does not pass the checks.
+
+    start, end, contains, blank, and has_re specify optional conditions
+    that the line must pass.  start and end specifies what the line must
+    begin or end with (not counting EOL characters).  contains
+    specifies a substring that must be found in the line.  If blank
+    is a true value, then the line must be blank.  has_re should be
+    a regular expression object with a pattern that the line must match
+    somewhere.
+
+    """
+    line = safe_readline(uhandle)
+    errmsg = _fails_conditions(*(line,), **keywds)
+    if errmsg is not None:
+        raise ValueError(errmsg)
+    method(line)
+
+
+def read_and_call_while(uhandle, method, **keywds):
+    """Read line and pass it to the method while condition is true.
+
+    Read a line from uhandle and pass it to the method as long as
+    some condition is true.  Returns the number of lines that were read.
+
+    See the docstring for read_and_call for a description of the parameters.
+
+    """
+    nlines = 0
+    while True:
+        line = safe_readline(uhandle)
+        # If I've failed the condition, then stop reading the line.
+        if _fails_conditions(*(line,), **keywds):
+            uhandle.saveline(line)
+            break
+        method(line)
+        nlines = nlines + 1
+    return nlines
+
+
+def read_and_call_until(uhandle, method, **keywds):
+    """Read line and pass it to the method until condition is true.
+
+    Read a line from uhandle and pass it to the method until
+    some condition is true.  Returns the number of lines that were read.
+
+    See the docstring for read_and_call for a description of the parameters.
+
+    """
+    nlines = 0
+    while True:
+        line = safe_readline(uhandle)
+        # If I've met the condition, then stop reading the line.
+        if not _fails_conditions(*(line,), **keywds):
+            uhandle.saveline(line)
+            break
+        method(line)
+        nlines = nlines + 1
+    return nlines
+
+
+def attempt_read_and_call(uhandle, method, **keywds):
+    """Attempt read line and call method.
+
+    Similar to read_and_call, but returns a boolean specifying
+    whether the line has passed the checks.  Does not raise
+    exceptions.
+
+    See docs for read_and_call for a description of the function
+    arguments.
+
+    """
+    line = safe_readline(uhandle)
+    passed = not _fails_conditions(*(line,), **keywds)
+    if passed:
+        method(line)
+    else:
+        uhandle.saveline(line)
+    return passed
+
+
+def _fails_conditions(
+    line, start=None, end=None, contains=None, blank=None, has_re=None
+):
+    if start is not None:
+        if line[: len(start)] != start:
+            return "Line does not start with '%s':\n%s" % (start, line)
+    if end is not None:
+        if line.rstrip()[-len(end) :] != end:
+            return "Line does not end with '%s':\n%s" % (end, line)
+    if contains is not None:
+        if contains not in line:
+            return "Line does not contain '%s':\n%s" % (contains, line)
+    if blank is not None:
+        if blank:
+            if not is_blank_line(line):
+                return "Expected blank line, but got:\n%s" % line
+        else:
+            if is_blank_line(line):
+                return "Expected non-blank line, but got a blank one"
+    if has_re is not None:
+        if has_re.search(line) is None:
+            return "Line does not match regex '%s':\n%s" % (has_re.pattern, line)
+    return None
+
+
+def is_blank_line(line, allow_spaces=0):
+    """Check if a line is blank.
+
+    Return whether a line is blank.  allow_spaces specifies whether to
+    allow whitespaces in a blank line.  A true value signifies that a
+    line containing whitespaces as well as end-of-line characters
+    should be considered blank.
+
+    """
+    if not line:
+        return 1
+    if allow_spaces:
+        return line.rstrip() == ""
+    return line[0] == "\n" or line[0] == "\r"
+
+
+def safe_readline(handle):
+    """Read a line, otherwise raises ValueError.
+
+    Read a line from an UndoHandle and return it.  If there are no more
+    lines to read, I will raise a ValueError.
+
+    """
+    line = handle.readline()
+    if not line:
+        raise ValueError("Unexpected end of stream.")
+    return line
+
+
+def safe_peekline(handle):
+    """Peek at the next line if present, otherwise raises ValueError.
+
+    Peek at the next line in an UndoHandle and return it.  If there are no
+    more lines to peek, I will raise a ValueError.
+
+    """
+    line = handle.peekline()
+    if not line:
+        raise ValueError("Unexpected end of stream.")
+    return line
diff --git a/code/lib/Bio/SearchIO/_legacy/__init__.py b/code/lib/Bio/SearchIO/_legacy/__init__.py
new file mode 100644
index 0000000..618df08
--- /dev/null
+++ b/code/lib/Bio/SearchIO/_legacy/__init__.py
@@ -0,0 +1,5 @@
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Legacy functionalities from other parts of Biopython used by SearchIO."""
diff --git a/code/lib/Bio/SearchIO/_legacy/__pycache__/NCBIStandalone.cpython-37.pyc b/code/lib/Bio/SearchIO/_legacy/__pycache__/NCBIStandalone.cpython-37.pyc
new file mode 100644
index 0000000..ada6d15
Binary files /dev/null and b/code/lib/Bio/SearchIO/_legacy/__pycache__/NCBIStandalone.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/_legacy/__pycache__/ParserSupport.cpython-37.pyc b/code/lib/Bio/SearchIO/_legacy/__pycache__/ParserSupport.cpython-37.pyc
new file mode 100644
index 0000000..b72fb05
Binary files /dev/null and b/code/lib/Bio/SearchIO/_legacy/__pycache__/ParserSupport.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/_legacy/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SearchIO/_legacy/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..e9759bc
Binary files /dev/null and b/code/lib/Bio/SearchIO/_legacy/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/_model/__init__.py b/code/lib/Bio/SearchIO/_model/__init__.py
new file mode 100644
index 0000000..c81f503
--- /dev/null
+++ b/code/lib/Bio/SearchIO/_model/__init__.py
@@ -0,0 +1,59 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO objects to model similarity search program outputs.
+
+The SearchIO object model consists of a hierarchy of four nested objects:
+
+    * QueryResult, to represent a search query.
+
+      This is the top-level object returned by the main SearchIO ``parse`` and
+      ``read`` functions. QueryResult objects may contain zero or more Hit
+      objects, each accessible by its ID string (like in Python dictionaries)
+      or integer index (like in Python lists).
+
+    * Hit, to represent a database entry containing a full or partial sequence
+      match with the query sequence.
+
+      Hit objects contain one or more HSP objects, each accessible by its integer
+      index. They behave very similar to a Python list.
+
+    * HSP, to represent a region of significant alignment(s) between the query
+      and hit sequences.
+
+      HSP objects contain one or more HSPFragment objects, each accessible by
+      its integer index. In most cases, the HSP objects are where the bulk of
+      search result statistics (e.g. e-value, bitscore) are stored. Like Hit
+      objects, HSPs also behave very similar to a Python list.
+
+    * HSPFragment, to represent a single contiguous alignment between the query
+      and hit sequences.
+
+      HSPFragment objects may store hit and query sequences resulting from the
+      sequence search. If present, these sequences are stored as SeqRecord
+      objects (see SeqRecord). If both of them are present, HSPFragment will
+      create a MultipleSeqAlignment object from both sequences.
+
+      Most search programs only have HSPs with one HSPFragment in them, making
+      these two objects inseparable. However, there are programs (e.g. BLAT and
+      Exonerate) which may have more than one HSPFragment objects in any given
+      HSP. If you are not using these programs, you can safely consider HSP and
+      HSPFragment as a single union.
+
+"""
+
+from .query import QueryResult
+from .hit import Hit
+from .hsp import HSP, HSPFragment
+
+
+__all__ = ("QueryResult", "Hit", "HSP", "HSPFragment")
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/_model/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SearchIO/_model/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..6efc357
Binary files /dev/null and b/code/lib/Bio/SearchIO/_model/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/_model/__pycache__/_base.cpython-37.pyc b/code/lib/Bio/SearchIO/_model/__pycache__/_base.cpython-37.pyc
new file mode 100644
index 0000000..fb5f3e0
Binary files /dev/null and b/code/lib/Bio/SearchIO/_model/__pycache__/_base.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/_model/__pycache__/hit.cpython-37.pyc b/code/lib/Bio/SearchIO/_model/__pycache__/hit.cpython-37.pyc
new file mode 100644
index 0000000..898a65a
Binary files /dev/null and b/code/lib/Bio/SearchIO/_model/__pycache__/hit.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/_model/__pycache__/hsp.cpython-37.pyc b/code/lib/Bio/SearchIO/_model/__pycache__/hsp.cpython-37.pyc
new file mode 100644
index 0000000..e11c670
Binary files /dev/null and b/code/lib/Bio/SearchIO/_model/__pycache__/hsp.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/_model/__pycache__/query.cpython-37.pyc b/code/lib/Bio/SearchIO/_model/__pycache__/query.cpython-37.pyc
new file mode 100644
index 0000000..d74679d
Binary files /dev/null and b/code/lib/Bio/SearchIO/_model/__pycache__/query.cpython-37.pyc differ
diff --git a/code/lib/Bio/SearchIO/_model/_base.py b/code/lib/Bio/SearchIO/_model/_base.py
new file mode 100644
index 0000000..001755a
--- /dev/null
+++ b/code/lib/Bio/SearchIO/_model/_base.py
@@ -0,0 +1,68 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Abstract base classes for the SearchIO object model."""
+
+
+from Bio.SearchIO._utils import getattr_str
+
+
+class _BaseSearchObject:
+    """Abstract class for SearchIO objects."""
+
+    _NON_STICKY_ATTRS = ()
+
+    def _transfer_attrs(self, obj):
+        """Transfer instance attributes to the given object (PRIVATE).
+
+        This method is used to transfer attributes set externally (for example
+        using ``setattr``) to a new object created from this one (for example
+        from slicing).
+
+        The reason this method is necessary is because different parsers will
+        set different attributes for each QueryResult, Hit, HSP, or HSPFragment
+        objects, depending on the attributes they found in the search output
+        file. Ideally, we want these attributes to 'stick' with any new instance
+        object created from the original one.
+
+        """
+        # list of attribute names we don't want to transfer
+        for attr in self.__dict__:
+            if attr not in self._NON_STICKY_ATTRS:
+                setattr(obj, attr, self.__dict__[attr])
+
+
+class _BaseHSP(_BaseSearchObject):
+    """Abstract base class for HSP objects."""
+
+    def _str_hsp_header(self):
+        """Print the alignment header info (PRIVATE)."""
+        lines = []
+        # set query id line
+        qid_line = "      Query: %s %s" % (self.query_id, self.query_description)
+        qid_line = qid_line[:77] + "..." if len(qid_line) > 80 else qid_line
+        # set hit id line
+        hid_line = "        Hit: %s %s" % (self.hit_id, self.hit_description)
+        hid_line = hid_line[:77] + "..." if len(hid_line) > 80 else hid_line
+        lines.append(qid_line)
+        lines.append(hid_line)
+
+        # coordinates
+        query_start = getattr_str(self, "query_start")
+        query_end = getattr_str(self, "query_end")
+        hit_start = getattr_str(self, "hit_start")
+        hit_end = getattr_str(self, "hit_end")
+
+        # strands
+        try:
+            qstrand = self.query_strand
+            hstrand = self.hit_strand
+        except ValueError:
+            qstrand = self.query_strand_all[0]
+            hstrand = self.hit_strand_all[0]
+        lines.append("Query range: [%s:%s] (%r)" % (query_start, query_end, qstrand))
+        lines.append("  Hit range: [%s:%s] (%r)" % (hit_start, hit_end, hstrand))
+
+        return "\n".join(lines)
diff --git a/code/lib/Bio/SearchIO/_model/hit.py b/code/lib/Bio/SearchIO/_model/hit.py
new file mode 100644
index 0000000..bd027db
--- /dev/null
+++ b/code/lib/Bio/SearchIO/_model/hit.py
@@ -0,0 +1,463 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO object to model a single database hit."""
+
+
+from itertools import chain
+
+from Bio.SearchIO._utils import allitems, optionalcascade, getattr_str
+
+from ._base import _BaseSearchObject
+from .hsp import HSP
+
+
+class Hit(_BaseSearchObject):
+    """Class representing a single database hit of a search result.
+
+    Hit objects are the second-level container in the SearchIO module. They
+    are the objects contained within a QueryResult (see QueryResult). They
+    themselves are container for HSP objects and will contain at least one
+    HSP.
+
+    To have a quick look at a Hit and its contents, invoke ``print`` on it::
+
+        >>> from Bio import SearchIO
+        >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
+        >>> hit = qresult[3]
+        >>> print(hit)
+        Query: 33211
+               mir_1
+          Hit: gi|301171322|ref|NR_035857.1| (86)
+               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
+         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
+                  #   E-value  Bit score    Span      Query range              Hit range
+               ----  --------  ---------  ------  ---------------  ---------------------
+                  0   8.9e-20     100.47      60           [1:61]                [13:73]
+                  1   3.3e-06      55.39      60           [0:60]                [13:73]
+
+    You can invoke ``len`` on a Hit object to see how many HSP objects it contains::
+
+        >>> len(hit)
+        2
+
+    Hit objects behave very similar to Python lists. You can retrieve the HSP
+    object inside a Hit using the HSP's integer index. Hit objects can also be
+    sliced, which will return a new Hit objects containing only the sliced HSPs::
+
+        # HSP items inside the Hit can be retrieved using its integer index
+        >>> hit[0]
+        HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments)
+
+        # slicing returns a new Hit
+        >>> hit
+        Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps)
+        >>> hit[:1]
+        Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps)
+        >>> print(hit[1:])
+        Query: 33211
+               mir_1
+          Hit: gi|301171322|ref|NR_035857.1| (86)
+               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
+         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
+                  #   E-value  Bit score    Span      Query range              Hit range
+               ----  --------  ---------  ------  ---------------  ---------------------
+                  0   3.3e-06      55.39      60           [0:60]                [13:73]
+
+    Hit objects provide ``filter`` and ``map`` methods, which are analogous to
+    Python's built-in ``filter`` and ``map`` except that they return a new Hit
+    object instead of a list.
+
+    Here is an example of using ``filter`` to select for HSPs whose e-value is
+    less than 1e-10::
+
+        >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10
+        >>> filtered_hit = hit.filter(evalue_filter)
+        >>> len(hit)
+        2
+        >>> len(filtered_hit)
+        1
+        >>> print(filtered_hit)
+        Query: 33211
+               mir_1
+          Hit: gi|301171322|ref|NR_035857.1| (86)
+               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
+         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
+                  #   E-value  Bit score    Span      Query range              Hit range
+               ----  --------  ---------  ------  ---------------  ---------------------
+                  0   8.9e-20     100.47      60           [1:61]                [13:73]
+
+    There are also other methods which are counterparts of Python lists' methods
+    with the same names: ``append``, ``index``, ``pop``, and ``sort``. Consult their
+    respective documentations for more details and examples of their usage.
+
+    """
+
+    # attributes we don't want to transfer when creating a new Hit class
+    # from this one
+    _NON_STICKY_ATTRS = ("_items",)
+
+    def __init__(self, hsps=(), id=None, query_id=None):
+        """Initialize a Hit object.
+
+        :param hsps: HSP objects contained in the Hit object
+        :type hsps: iterable yielding HSP
+        :param id: hit ID
+        :type id: string
+        :param query_id: query ID
+        :type query_id: string
+
+        If multiple HSP objects are used for initialization, they must all
+        have the same ``query_id``, ``query_description``, ``hit_id``, and
+        ``hit_description`` properties.
+        """
+        # default attribute values
+        self._id = id
+        self._id_alt = []
+        self._query_id = query_id
+        self._description = None
+        self._description_alt = []
+        self._query_description = None
+        self.attributes = {}
+        self.dbxrefs = []
+
+        # TODO - Move this into the for look below in case
+        # hsps is a single use iterator?
+        for attr in ("query_id", "query_description", "hit_id", "hit_description"):
+            # HACK: setting the if clause to '> 1' allows for empty hit objects.
+            # This makes it easier to work with file formats with unpredictable
+            # hit-hsp ordering. The empty hit object itself is nonfunctional,
+            # however, since all its cascading properties are empty.
+            if len({getattr(hsp, attr) for hsp in hsps}) > 1:
+                raise ValueError(
+                    "Hit object can not contain HSPs with more than one %s." % attr
+                )
+
+        self._items = []
+        for hsp in hsps:
+            # validate each HSP
+            self._validate_hsp(hsp)
+            # and store it them as an instance attribute
+            self.append(hsp)
+
+    def __repr__(self):
+        """Return string representation of Hit object."""
+        return "Hit(id=%r, query_id=%r, %r hsps)" % (self.id, self.query_id, len(self))
+
+    def __iter__(self):
+        """Iterate over hsps."""
+        return iter(self.hsps)
+
+    def __len__(self):
+        """Return number of hsps."""
+        return len(self.hsps)
+
+    def __bool__(self):
+        """Return True if there are hsps."""
+        return bool(self.hsps)
+
+    def __contains__(self, hsp):
+        """Return True if hsp in items."""
+        return hsp in self._items
+
+    def __str__(self):
+        """Return a human readable summary of the Hit object."""
+        lines = []
+
+        # set query id line
+        qid_line = "Query: %s" % self.query_id
+        lines.append(qid_line)
+        if self.query_description:
+            line = "       %s" % self.query_description
+            line = line[:77] + "..." if len(line) > 80 else line
+            lines.append(line)
+
+        # set hit id line
+        hid_line = "  Hit: %s" % self.id
+        try:
+            seq_len = self.seq_len
+        except AttributeError:
+            pass
+        else:
+            hid_line += " (%i)" % seq_len
+        lines.append(hid_line)
+        if self.description:
+            line = "       %s" % self.description
+            line = line[:77] + "..." if len(line) > 80 else line
+            lines.append(line)
+
+        # set attributes lines
+        for key, value in sorted(self.attributes.items()):
+            lines.append(" %s: %s" % (key, value))
+
+        # set dbxrefs line
+        if self.dbxrefs:
+            lines.append("Database cross-references: " + ", ".join(self.dbxrefs))
+
+        # set hsp line and table
+        if not self.hsps:
+            lines.append(" HSPs: ?")
+        else:
+            lines.append(
+                " HSPs: %s  %s  %s  %s  %s  %s"
+                % ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21)
+            )
+            pattern = "%11s  %8s  %9s  %6s  %15s  %21s"
+            lines.append(
+                pattern
+                % ("#", "E-value", "Bit score", "Span", "Query range", "Hit range")
+            )
+            lines.append(
+                pattern % ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21)
+            )
+            for idx, hsp in enumerate(self.hsps):
+                # evalue
+                evalue = getattr_str(hsp, "evalue", fmt="%.2g")
+                # bitscore
+                bitscore = getattr_str(hsp, "bitscore", fmt="%.2f")
+                # alignment length
+                aln_span = getattr_str(hsp, "aln_span")
+                # query region
+                query_start = getattr_str(hsp, "query_start")
+                query_end = getattr_str(hsp, "query_end")
+                query_range = "[%s:%s]" % (query_start, query_end)
+                # max column length is 18
+                query_range = (
+                    query_range[:13] + "~]" if len(query_range) > 15 else query_range
+                )
+                # hit region
+                hit_start = getattr_str(hsp, "hit_start")
+                hit_end = getattr_str(hsp, "hit_end")
+                hit_range = "[%s:%s]" % (hit_start, hit_end)
+                hit_range = hit_range[:19] + "~]" if len(hit_range) > 21 else hit_range
+                # append the hsp row
+                lines.append(
+                    pattern % (idx, evalue, bitscore, aln_span, query_range, hit_range)
+                )
+
+        return "\n".join(lines)
+
+    def __getitem__(self, idx):
+        """Return the HSP object at the given index."""
+        # if key is slice, return a new Hit instance
+        if isinstance(idx, slice):
+            obj = self.__class__(self.hsps[idx])
+            self._transfer_attrs(obj)
+            return obj
+        return self._items[idx]
+
+    def __setitem__(self, idx, hsps):
+        """Assign hsps to index idx."""
+        # handle case if hsps is a list of hsp
+        if isinstance(hsps, (list, tuple)):
+            for hsp in hsps:
+                self._validate_hsp(hsp)
+        else:
+            self._validate_hsp(hsps)
+
+        self._items[idx] = hsps
+
+    def __delitem__(self, idx):
+        """Delete item of index idx."""
+        del self._items[idx]
+
+    # hsp properties #
+    def _validate_hsp(self, hsp):
+        """Validate an HSP object (PRIVATE).
+
+        Valid HSP objects have the same hit_id as the Hit object ID and the
+        same query_id as the Hit object's query_id.
+
+        """
+        if not isinstance(hsp, HSP):
+            raise TypeError("Hit objects can only contain HSP objects.")
+        # HACK: to make validation during __init__ work
+        if self._items:
+            if self.id is not None:
+                if hsp.hit_id != self.id:
+                    raise ValueError(
+                        "Expected HSP with hit ID %r, found %r instead."
+                        % (self.id, hsp.hit_id)
+                    )
+            else:
+                self.id = hsp.hit_id
+
+            if self.description is not None:
+                if hsp.hit_description != self.description:
+                    raise ValueError(
+                        "Expected HSP with hit description %r, found %r instead."
+                        % (self.description, hsp.hit_description)
+                    )
+            else:
+                self.description = hsp.hit_description
+
+            if self.query_id is not None:
+                if hsp.query_id != self.query_id:
+                    raise ValueError(
+                        "Expected HSP with query ID %r, found %r instead."
+                        % (self.query_id, hsp.query_id)
+                    )
+            else:
+                self.query_id = hsp.query_id
+
+            if self.query_description is not None:
+                if hsp.query_description != self.query_description:
+                    raise ValueError(
+                        "Expected HSP with query description %r, found %r instead."
+                        % (self.query_description, hsp.query_description)
+                    )
+            else:
+                self.query_description = hsp.query_description
+
+    # properties #
+    description = optionalcascade(
+        "_description", "hit_description", """Hit description"""
+    )
+    query_description = optionalcascade(
+        "_query_description",
+        "query_description",
+        """Description of the query that produced the hit""",
+    )
+    id = optionalcascade("_id", "hit_id", """Hit ID string.""")
+    query_id = optionalcascade(
+        "_query_id", "query_id", """ID string of the query that produced the hit"""
+    )
+    # returns all hsps
+    hsps = allitems(doc="""HSP objects contained in the Hit""")
+
+    @property
+    def id_all(self):
+        """Alternative ID(s) of the Hit."""
+        return [self.id] + self._id_alt
+
+    @property
+    def description_all(self):
+        """Alternative descriptions of the Hit."""
+        return [self.description] + self._description_alt
+
+    @property
+    def fragments(self):
+        """Access the HSPFragment objects contained in the Hit."""
+        return list(chain(*self._items))
+
+    # public methods #
+    def append(self, hsp):
+        """Add a HSP object to the end of Hit.
+
+        Parameters
+        hsp -- HSP object to append.
+
+        Any HSP object appended must have the same ``hit_id`` property as the
+        Hit object's ``id`` property and the same ``query_id`` property as the
+        Hit object's ``query_id`` property.
+
+        """
+        self._validate_hsp(hsp)
+        self._items.append(hsp)
+
+    def filter(self, func=None):
+        """Create new Hit object whose HSP objects pass the filter function.
+
+        :param func: function for filtering
+        :type func: callable, accepts HSP, returns bool
+
+        ``filter`` is analogous to Python's built-in ``filter`` function, except
+        that instead of returning a list it returns a ``Hit`` object. Here is an
+        example of using ``filter`` to select for HSPs having bitscores bigger
+        than 60::
+
+            >>> from Bio import SearchIO
+            >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
+            >>> hit = qresult[3]
+            >>> evalue_filter = lambda hsp: hsp.bitscore > 60
+            >>> filtered_hit = hit.filter(evalue_filter)
+            >>> len(hit)
+            2
+            >>> len(filtered_hit)
+            1
+            >>> print(filtered_hit)
+            Query: 33211
+                   mir_1
+              Hit: gi|301171322|ref|NR_035857.1| (86)
+                   Pan troglodytes microRNA mir-520c (MIR520C), microRNA
+             HSPs: ----  --------  ---------  ------  ---------------  ---------------------
+                      #   E-value  Bit score    Span      Query range              Hit range
+                   ----  --------  ---------  ------  ---------------  ---------------------
+                      0   8.9e-20     100.47      60           [1:61]                [13:73]
+
+        """
+        hsps = list(filter(func, self.hsps))
+        if hsps:
+            obj = self.__class__(hsps)
+            self._transfer_attrs(obj)
+            return obj
+
+    def index(self, hsp):
+        """Return the index of a given HSP object, zero-based.
+
+        :param hsp: object to look up
+        :type hsp: HSP
+
+        """
+        return self._items.index(hsp)
+
+    def map(self, func=None):
+        """Create new Hit object, mapping the given function to its HSPs.
+
+        :param func: function for mapping
+        :type func: callable, accepts HSP, returns HSP
+
+        ``map`` is analogous to Python's built-in ``map`` function. It is applied to
+        all HSPs contained in the Hit object and returns a new Hit object.
+
+        """
+        if func is not None:
+            hsps = [func(x) for x in self.hsps[:]]  # this creates a shallow copy
+        else:
+            hsps = self.hsps[:]
+        if hsps:
+            obj = self.__class__(hsps)
+            self._transfer_attrs(obj)
+            return obj
+
+    def pop(self, index=-1):
+        """Remove and returns the HSP object at the specified index.
+
+        :param index: index of HSP object to pop
+        :type index: int
+
+        """
+        return self._items.pop(index)
+
+    def sort(self, key=None, reverse=False, in_place=True):
+        """Sort the HSP objects.
+
+        :param key: sorting function
+        :type key: callable, accepts HSP, returns key for sorting
+        :param reverse: whether to reverse sorting results or no
+        :type reverse: bool
+        :param in_place: whether to do in-place sorting or no
+        :type in_place: bool
+
+        ``sort`` defaults to sorting in-place, to mimick Python's ``list.sort``
+        method. If you set the ``in_place`` argument to False, it will treat
+        return a new, sorted Hit object and keep the initial one unsorted
+
+        """
+        if in_place:
+            self._items.sort(key=key, reverse=reverse)
+        else:
+            hsps = self.hsps[:]
+            hsps.sort(key=key, reverse=reverse)
+            obj = self.__class__(hsps)
+            self._transfer_attrs(obj)
+            return obj
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/_model/hsp.py b/code/lib/Bio/SearchIO/_model/hsp.py
new file mode 100644
index 0000000..ce15ee2
--- /dev/null
+++ b/code/lib/Bio/SearchIO/_model/hsp.py
@@ -0,0 +1,1230 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO objects to model high scoring regions between query and hit."""
+
+import warnings
+from operator import ge, le
+
+from Bio import BiopythonWarning
+from Bio.Align import MultipleSeqAlignment
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from Bio.SearchIO._utils import (
+    singleitem,
+    allitems,
+    fullcascade,
+    fragcascade,
+    getattr_str,
+)
+
+from ._base import _BaseHSP
+
+
+class HSP(_BaseHSP):
+    """Class representing high-scoring region(s) between query and hit.
+
+    HSP (high-scoring pair) objects are contained by Hit objects (see Hit).
+    In most cases, HSP objects store the bulk of the statistics and results
+    (e.g. e-value, bitscores, query sequence, etc.) produced by a search
+    program.
+
+    Depending on the search output file format, a given HSP will contain one
+    or more HSPFragment object(s). Examples of search programs that produce HSP
+    with one HSPFragments are BLAST, HMMER, and FASTA. Other programs such as
+    BLAT or Exonerate may produce HSPs containing more than one HSPFragment.
+    However, their native terminologies may differ: in BLAT these fragments
+    are called 'blocks' while in Exonerate they are called exons or NER.
+
+    Here are examples from each type of HSP. The first one comes from a BLAST
+    search::
+
+        >>> from Bio import SearchIO
+        >>> blast_qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
+        >>> blast_hsp = blast_qresult[1][0]     # the first HSP from the second hit
+        >>> blast_hsp
+        HSP(hit_id='gi|301171311|ref|NR_035856.1|', query_id='33211', 1 fragments)
+        >>> print(blast_hsp)
+              Query: 33211 mir_1
+                Hit: gi|301171311|ref|NR_035856.1| Pan troglodytes microRNA mir-520b ...
+        Query range: [1:61] (1)
+          Hit range: [0:60] (1)
+        Quick stats: evalue 1.7e-22; bitscore 109.49
+          Fragments: 1 (60 columns)
+             Query - CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
+                     ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+               Hit - CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
+
+    For HSPs with a single HSPFragment, you can invoke ``print`` on it and see the
+    underlying sequence alignment, if it exists. This is not the case for HSPs
+    with more than one HSPFragment. Below is an example, using an HSP from a
+    BLAT search. Invoking ``print`` on these HSPs will instead show a table of the
+    HSPFragment objects it contains::
+
+        >>> blat_qresult = SearchIO.read('Blat/mirna.pslx', 'blat-psl', pslx=True)
+        >>> blat_hsp = blat_qresult[1][0]       # the first HSP from the second hit
+        >>> blat_hsp
+        HSP(hit_id='chr11', query_id='blat_1', 2 fragments)
+        >>> print(blat_hsp)
+              Query: blat_1 <unknown description>
+                Hit: chr11 <unknown description>
+        Query range: [42:67] (-1)
+          Hit range: [59018929:59018955] (1)
+        Quick stats: evalue ?; bitscore ?
+          Fragments: ---  --------------  ----------------------  ----------------------
+                       #            Span             Query range               Hit range
+                     ---  --------------  ----------------------  ----------------------
+                       0               6                 [61:67]     [59018929:59018935]
+                       1              16                 [42:58]     [59018939:59018955]
+
+    Notice that in HSPs with more than one HSPFragments, the HSP's ``query_range``
+    ``hit_range`` properties encompasses all fragments it contains.
+
+    You can check whether an HSP has more than one HSPFragments or not using the
+    ``is_fragmented`` property::
+
+        >>> blast_hsp.is_fragmented
+        False
+        >>> blat_hsp.is_fragmented
+        True
+
+    Since HSP objects are also containers similar to Python lists, you can
+    access a single fragment in an HSP using its integer index::
+
+        >>> blat_fragment = blat_hsp[0]
+        >>> print(blat_fragment)
+              Query: blat_1 <unknown description>
+                Hit: chr11 <unknown description>
+        Query range: [61:67] (-1)
+          Hit range: [59018929:59018935] (1)
+          Fragments: 1 (6 columns)
+             Query - tatagt
+               Hit - tatagt
+
+    This applies to HSPs objects with a single fragment as well::
+
+        >>> blast_fragment = blast_hsp[0]
+        >>> print(blast_fragment)
+              Query: 33211 mir_1
+                Hit: gi|301171311|ref|NR_035856.1| Pan troglodytes microRNA mir-520b ...
+        Query range: [1:61] (1)
+          Hit range: [0:60] (1)
+          Fragments: 1 (60 columns)
+             Query - CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
+                     ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+               Hit - CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
+
+    Regardless of the search output file format, HSP objects provide the
+    properties listed below. These properties always return values in a list,
+    due to the HSP object itself being a list-like container. However, for
+    HSP objects with a single HSPFragment, shortcut properties that fetches
+    the item from the list are also provided.
+
+    +----------------------+---------------------+-----------------------------+
+    | Property             | Shortcut            | Value                       |
+    +======================+=====================+=============================+
+    | aln_all              | aln                 | HSP alignments as           |
+    |                      |                     | MultipleSeqAlignment object |
+    +----------------------+---------------------+-----------------------------+
+    | aln_annotation_all   | aln_annotation      | dictionary of annotation(s) |
+    |                      |                     | of all fragments' alignments|
+    +----------------------+---------------------+-----------------------------+
+    | fragments            | fragment            | HSPFragment objects         |
+    +----------------------+---------------------+-----------------------------+
+    | hit_all              | hit                 | hit sequence as SeqRecord   |
+    |                      |                     | objects                     |
+    +----------------------+---------------------+-----------------------------+
+    | hit_features_all     | hit_features        | SeqFeatures of all hit      |
+    |                      |                     | fragments                   |
+    +----------------------+---------------------+-----------------------------+
+    | hit_start_all        | hit_start*          | start coordinates of the    |
+    |                      |                     | hit fragments               |
+    +----------------------+---------------------+-----------------------------+
+    | hit_end_all          | hit_end*            | end coordinates of the hit  |
+    |                      |                     | fragments                   |
+    +----------------------+---------------------+-----------------------------+
+    | hit_span_all         | hit_span*           | sizes of each hit fragments |
+    +----------------------+---------------------+-----------------------------+
+    | hit_strand_all       | hit_strand          | strand orientations of the  |
+    |                      |                     | hit fragments               |
+    +----------------------+---------------------+-----------------------------+
+    | hit_frame_all        | hit_frame           | reading frames of the hit   |
+    |                      |                     | fragments                   |
+    +----------------------+---------------------+-----------------------------+
+    | hit_range_all        | hit_range           | tuples of start and end     |
+    |                      |                     | coordinates of each hit     |
+    |                      |                     | fragment                    |
+    +----------------------+---------------------+-----------------------------+
+    | query_all            | query               | query sequence as SeqRecord |
+    |                      |                     | object                      |
+    +----------------------+---------------------+-----------------------------+
+    | query_features_all   | query_features      | SeqFeatures of all query    |
+    |                      |                     | fragments                   |
+    +----------------------+---------------------+-----------------------------+
+    | query_start_all      | query_start*        | start coordinates of the    |
+    |                      |                     | fragments                   |
+    +----------------------+---------------------+-----------------------------+
+    | query_end_all        | query_end*          | end coordinates of the      |
+    |                      |                     | query fragments             |
+    +----------------------+---------------------+-----------------------------+
+    | query_span_all       | query_span*         | sizes of each query         |
+    |                      |                     | fragments                   |
+    +----------------------+---------------------+-----------------------------+
+    | query_strand_all     | query_strand        | strand orientations of the  |
+    |                      |                     | query fragments             |
+    +----------------------+---------------------+-----------------------------+
+    | query_frame_all      | query_frame         | reading frames of the query |
+    |                      |                     | fragments                   |
+    +----------------------+---------------------+-----------------------------+
+    | query_range_all      | query_range         | tuples of start and end     |
+    |                      |                     | coordinates of each query   |
+    |                      |                     | fragment                    |
+    +----------------------+---------------------+-----------------------------+
+
+    For all types of HSP objects, the property will return the values in a list.
+    Shorcuts are only applicable for HSPs with one fragment. Except the ones
+    noted, if they are used on an HSP with more than one fragments, an exception
+    will be raised.
+
+    For properties that may be used in HSPs with multiple or single fragments
+    (``*_start``, ``*_end``, and ``*_span`` properties), their interpretation depends
+    on how many fragment the HSP has:
+
+    +------------+---------------------------------------------------+
+    | Property   | Value                                             |
+    +============+===================================================+
+    | hit_start  | smallest coordinate value of all hit fragments    |
+    +------------+---------------------------------------------------+
+    | hit_end    | largest coordinate value of all hit fragments     |
+    +------------+---------------------------------------------------+
+    | hit_span   | difference between ``hit_start`` and ``hit_end``  |
+    +------------+---------------------------------------------------+
+    | query_start| smallest coordinate value of all query fragments  |
+    +------------+---------------------------------------------------+
+    | query_end  | largest coordinate value of all query fragments   |
+    +------------+---------------------------------------------------+
+    | query_span | difference between ``query_start`` and            |
+    |            | ``query_end``                                     |
+    +------------+---------------------------------------------------+
+
+    In addition to the objects listed above, HSP objects also provide the
+    following properties and/or attributes:
+
+    +--------------------+------------------------------------------------------+
+    | Property           | Value                                                |
+    +====================+======================================================+
+    | aln_span           | total number of residues in all HSPFragment objects  |
+    +--------------------+------------------------------------------------------+
+    | molecule_type      | molecule_type of the hit and query SeqRecord objects |
+    +--------------------+------------------------------------------------------+
+    | is_fragmented      | boolean, whether there are multiple fragments or not |
+    +--------------------+------------------------------------------------------+
+    | hit_id             | ID of the hit sequence                               |
+    +--------------------+------------------------------------------------------+
+    | hit_description    | description of the hit sequence                      |
+    +--------------------+------------------------------------------------------+
+    | hit_inter_ranges   | list of hit sequence coordinates of the regions      |
+    |                    | between fragments                                    |
+    +--------------------+------------------------------------------------------+
+    | hit_inter_spans    | list of lengths of the regions between hit fragments |
+    +--------------------+------------------------------------------------------+
+    | output_index       | 0-based index for storing the order by which the HSP |
+    |                    | appears in the output file (default: -1).            |
+    +--------------------+------------------------------------------------------+
+    | query_id           | ID of the query sequence                             |
+    +--------------------+------------------------------------------------------+
+    | query_description  | description of the query sequence                    |
+    +--------------------+------------------------------------------------------+
+    | query_inter_ranges | list of query sequence coordinates of the regions    |
+    |                    | between fragments                                    |
+    +--------------------+------------------------------------------------------+
+    | query_inter_spans  | list of lengths of the regions between query         |
+    |                    | fragments                                            |
+    +--------------------+------------------------------------------------------+
+
+    .. [1] may be used in HSPs with multiple fragments
+
+    """
+
+    # attributes we don't want to transfer when creating a new Hit class
+    # from this one
+    _NON_STICKY_ATTRS = ("_items",)
+
+    def __init__(self, fragments=(), output_index=-1):
+        """Initialize an HSP object.
+
+        :param fragments: fragments contained in the HSP object
+        :type fragments: iterable yielding HSPFragment
+        :param output_index: optional index / ordering of the HSP fragment in
+            the original input file.
+        :type output_index: integer
+
+        HSP objects must be initialized with a list containing at least one
+        HSPFragment object. If multiple HSPFragment objects are used for
+        initialization, they must all have the same ``query_id``,
+        ``query_description``, ``hit_id``, ``hit_description``, and
+        ``molecule_type`` properties.
+
+        """
+        if not fragments:
+            raise ValueError("HSP objects must have at least one HSPFragment object.")
+        # TODO - Move this into the for look in case hsps is a single use
+        # iterable?
+        # check that all fragments contain the same IDs, descriptions,
+        # molecule_type
+        for attr in (
+            "query_id",
+            "query_description",
+            "hit_id",
+            "hit_description",
+            "molecule_type",
+        ):
+            if len({getattr(frag, attr) for frag in fragments}) != 1:
+                raise ValueError(
+                    "HSP object can not contain fragments with more than one %s." % attr
+                )
+
+        self.output_index = output_index
+        self._items = []
+        for fragment in fragments:
+            self._validate_fragment(fragment)
+            self._items.append(fragment)
+
+    def __repr__(self):
+        """Return string representation of HSP object."""
+        return "%s(hit_id=%r, query_id=%r, %r fragments)" % (
+            self.__class__.__name__,
+            self.hit_id,
+            self.query_id,
+            len(self),
+        )
+
+    def __iter__(self):
+        """Iterate over HSP items."""
+        return iter(self._items)
+
+    def __contains__(self, fragment):
+        """Return True if HSPFragment is on HSP items."""
+        return fragment in self._items
+
+    def __len__(self):
+        """Return number of HSPs items."""
+        return len(self._items)
+
+    def __bool__(self):
+        """Return True if it has HSPs."""
+        return bool(self._items)
+
+    def __str__(self):
+        """Return a human readable summary of the HSP object."""
+        lines = []
+        # set hsp info line
+        statline = []
+        # evalue
+        evalue = getattr_str(self, "evalue", fmt="%.2g")
+        statline.append("evalue " + evalue)
+        # bitscore
+        bitscore = getattr_str(self, "bitscore", fmt="%.2f")
+        statline.append("bitscore " + bitscore)
+        lines.append("Quick stats: " + "; ".join(statline))
+
+        if len(self.fragments) == 1:
+            return "\n".join(
+                [self._str_hsp_header(), "\n".join(lines), self.fragments[0]._str_aln()]
+            )
+        else:
+            lines.append(
+                "  Fragments: %s  %s  %s  %s" % ("-" * 3, "-" * 14, "-" * 22, "-" * 22)
+            )
+            pattern = "%16s  %14s  %22s  %22s"
+            lines.append(pattern % ("#", "Span", "Query range", "Hit range"))
+            lines.append(pattern % ("-" * 3, "-" * 14, "-" * 22, "-" * 22))
+            for idx, block in enumerate(self.fragments):
+                # set hsp line and table
+                # alignment span
+                aln_span = getattr_str(block, "aln_span")
+                # query region
+                query_start = getattr_str(block, "query_start")
+                query_end = getattr_str(block, "query_end")
+                query_range = "[%s:%s]" % (query_start, query_end)
+                # max column length is 20
+                query_range = (
+                    query_range[:20] + "~]" if len(query_range) > 22 else query_range
+                )
+                # hit region
+                hit_start = getattr_str(block, "hit_start")
+                hit_end = getattr_str(block, "hit_end")
+                hit_range = "[%s:%s]" % (hit_start, hit_end)
+                hit_range = hit_range[:20] + "~]" if len(hit_range) > 22 else hit_range
+                # append the hsp row
+                lines.append(pattern % (str(idx), aln_span, query_range, hit_range))
+
+            return self._str_hsp_header() + "\n" + "\n".join(lines)
+
+    def __getitem__(self, idx):
+        """Return object of index idx."""
+        # if key is slice, return a new HSP instance
+        if isinstance(idx, slice):
+            obj = self.__class__(self._items[idx])
+            self._transfer_attrs(obj)
+            return obj
+        return self._items[idx]
+
+    def __setitem__(self, idx, fragments):
+        """Set an item of index idx with the given fragments."""
+        # handle case if hsps is a list of hsp
+        if isinstance(fragments, (list, tuple)):
+            for fragment in fragments:
+                self._validate_fragment(fragment)
+        else:
+            self._validate_fragment(fragments)
+
+        self._items[idx] = fragments
+
+    def __delitem__(self, idx):
+        """Delete item of index idx."""
+        # note that this may result in an empty HSP object, which should be
+        # invalid
+        del self._items[idx]
+
+    def _validate_fragment(self, fragment):
+        if not isinstance(fragment, HSPFragment):
+            raise TypeError("HSP objects can only contain HSPFragment objects.")
+        # HACK: to make validation during __init__ work
+        if self._items:
+            if fragment.hit_id != self.hit_id:
+                raise ValueError(
+                    "Expected HSPFragment with hit ID %r, found %r instead."
+                    % (self.id, fragment.hit_id)
+                )
+
+            if fragment.hit_description != self.hit_description:
+                raise ValueError(
+                    "Expected HSPFragment with hit description %r, found %r instead."
+                    % (self.description, fragment.hit_description)
+                )
+
+            if fragment.query_id != self.query_id:
+                raise ValueError(
+                    "Expected HSPFragment with query ID %r, found %r instead."
+                    % (self.query_id, fragment.query_id)
+                )
+
+            if fragment.query_description != self.query_description:
+                raise ValueError(
+                    "Expected HSP with query description %r, found %r instead."
+                    % (self.query_description, fragment.query_description)
+                )
+
+    def _aln_span_get(self):
+        # length of all alignments
+        # alignment span can be its own attribute, or computed from
+        # query / hit length
+        return sum(frg.aln_span for frg in self.fragments)
+
+    aln_span = property(
+        fget=_aln_span_get, doc="Total number of columns in all HSPFragment objects."
+    )
+
+    # coordinate properties #
+    def _get_coords(self, seq_type, coord_type):
+        assert seq_type in ("hit", "query")
+        assert coord_type in ("start", "end")
+        coord_name = "%s_%s" % (seq_type, coord_type)
+        coords = [getattr(frag, coord_name) for frag in self.fragments]
+        if None in coords:
+            warnings.warn(
+                "'None' exist in %s coordinates; ignored" % (coord_name),
+                BiopythonWarning,
+            )
+        return coords
+
+    def _hit_start_get(self):
+        return min(self._get_coords("hit", "start"))
+
+    hit_start = property(
+        fget=_hit_start_get, doc="Smallest coordinate value of all hit fragments."
+    )
+
+    def _query_start_get(self):
+        return min(self._get_coords("query", "start"))
+
+    query_start = property(
+        fget=_query_start_get, doc="Smallest coordinate value of all query fragments."
+    )
+
+    def _hit_end_get(self):
+        return max(self._get_coords("hit", "end"))
+
+    hit_end = property(
+        fget=_hit_end_get, doc="Largest coordinate value of all hit fragments."
+    )
+
+    def _query_end_get(self):
+        return max(self._get_coords("query", "end"))
+
+    query_end = property(
+        fget=_query_end_get, doc="Largest coordinate value of all hit fragments."
+    )
+
+    # coordinate-dependent properties #
+    def _hit_span_get(self):
+        try:
+            return self.hit_end - self.hit_start
+        except TypeError:  # triggered if any of the coordinates are None
+            return None
+
+    hit_span = property(
+        fget=_hit_span_get, doc="The number of hit residues covered by the HSP."
+    )
+
+    def _query_span_get(self):
+        try:
+            return self.query_end - self.query_start
+        except TypeError:  # triggered if any of the coordinates are None
+            return None
+
+    query_span = property(
+        fget=_query_span_get, doc="The number of query residues covered by the HSP."
+    )
+
+    def _hit_range_get(self):
+        return (self.hit_start, self.hit_end)
+
+    hit_range = property(
+        fget=_hit_range_get, doc="Tuple of HSP hit start and end coordinates."
+    )
+
+    def _query_range_get(self):
+        return (self.query_start, self.query_end)
+
+    query_range = property(
+        fget=_query_range_get, doc="Tuple of HSP query start and end coordinates."
+    )
+
+    def _inter_ranges_get(self, seq_type):
+        # this property assumes that there are no mixed strands in a hit/query
+        assert seq_type in ("query", "hit")
+        strand = getattr(self, "%s_strand_all" % seq_type)[0]
+        coords = getattr(self, "%s_range_all" % seq_type)
+        # determine function used to set inter range
+        # start and end coordinates, given two pairs
+        # of fragment start and end coordinates
+        if strand == -1:
+            startfunc, endfunc = min, max
+        else:
+            startfunc, endfunc = max, min
+        inter_coords = []
+        for idx, coord in enumerate(coords[:-1]):
+            start = startfunc(coords[idx])
+            end = endfunc(coords[idx + 1])
+            inter_coords.append((min(start, end), max(start, end)))
+
+        return inter_coords
+
+    def _hit_inter_ranges_get(self):
+        return self._inter_ranges_get("hit")
+
+    hit_inter_ranges = property(
+        fget=_hit_inter_ranges_get,
+        doc="Hit sequence coordinates of the regions between fragments.",
+    )
+
+    def _query_inter_ranges_get(self):
+        return self._inter_ranges_get("query")
+
+    query_inter_ranges = property(
+        fget=_query_inter_ranges_get,
+        doc="Query sequence coordinates of the regions between fragments.",
+    )
+
+    def _inter_spans_get(self, seq_type):
+        assert seq_type in ("query", "hit")
+        attr_name = "%s_inter_ranges" % seq_type
+        return [coord[1] - coord[0] for coord in getattr(self, attr_name)]
+
+    def _hit_inter_spans_get(self):
+        return self._inter_spans_get("hit")
+
+    hit_inter_spans = property(
+        fget=_hit_inter_spans_get, doc="Lengths of regions between hit fragments."
+    )
+
+    def _query_inter_spans_get(self):
+        return self._inter_spans_get("query")
+
+    query_inter_spans = property(
+        fget=_query_inter_spans_get, doc="Lengths of regions between query fragments."
+    )
+
+    # shortcuts for fragments' properties #
+
+    # bool check if there's more than one fragments
+    is_fragmented = property(
+        lambda self: len(self) > 1,
+        doc="Whether the HSP has more than one HSPFragment objects.",
+    )
+
+    # first item properties with setters
+    hit_description = fullcascade(
+        "hit_description", doc="Description of the hit sequence."
+    )
+
+    query_description = fullcascade(
+        "query_description", doc="Description of the query sequence."
+    )
+
+    hit_id = fullcascade("hit_id", doc="ID of the hit sequence.")
+
+    query_id = fullcascade("query_id", doc="ID of the query sequence.")
+
+    molecule_type = fullcascade(
+        "molecule_type", doc="molecule_type of the hit and query SeqRecord objects."
+    )
+
+    # properties for single-fragment HSPs
+    fragment = singleitem(doc="HSPFragment object, first fragment.")
+
+    hit = singleitem("hit", doc="Hit sequence as a SeqRecord object, first fragment.")
+
+    query = singleitem(
+        "query", doc="Query sequence as a SeqRecord object, first fragment."
+    )
+
+    aln = singleitem(
+        "aln", doc="Alignment of the first fragment as a MultipleSeqAlignment object."
+    )
+
+    aln_annotation = singleitem(
+        "aln_annotation",
+        doc="Dictionary of annotation(s) of the first fragment's alignment.",
+    )
+
+    hit_features = singleitem(
+        "hit_features", doc="Hit sequence features, first fragment."
+    )
+
+    query_features = singleitem(
+        "query_features", doc="Query sequence features, first fragment."
+    )
+
+    hit_strand = singleitem("hit_strand", doc="Hit strand orientation, first fragment.")
+
+    query_strand = singleitem(
+        "query_strand", doc="Query strand orientation, first fragment."
+    )
+
+    hit_frame = singleitem(
+        "hit_frame", doc="Hit sequence reading frame, first fragment."
+    )
+
+    query_frame = singleitem(
+        "query_frame", doc="Query sequence reading frame, first fragment."
+    )
+
+    # properties for multi-fragment HSPs
+    fragments = allitems(doc="List of all HSPFragment objects.")
+
+    hit_all = allitems(
+        "hit", doc="List of all fragments' hit sequences as SeqRecord objects."
+    )
+
+    query_all = allitems(
+        "query", doc="List of all fragments' query sequences as SeqRecord objects."
+    )
+
+    aln_all = allitems(
+        "aln", doc="List of all fragments' alignments as MultipleSeqAlignment objects."
+    )
+
+    aln_annotation_all = allitems(
+        "aln_annotation",
+        doc="Dictionary of annotation(s) of all fragments' alignments.",
+    )
+
+    hit_features_all = allitems(
+        "hit_features", doc="List of all hit sequence features."
+    )
+
+    query_features_all = allitems(
+        "query_features", doc="List of all query sequence features."
+    )
+
+    hit_strand_all = allitems(
+        "hit_strand", doc="List of all fragments' hit sequence strands."
+    )
+
+    query_strand_all = allitems(
+        "query_strand", doc="List of all fragments' query sequence strands"
+    )
+
+    hit_frame_all = allitems(
+        "hit_frame", doc="List of all fragments' hit sequence reading frames."
+    )
+
+    query_frame_all = allitems(
+        "query_frame", doc="List of all fragments' query sequence reading frames."
+    )
+
+    hit_start_all = allitems(
+        "hit_start", doc="List of all fragments' hit start coordinates."
+    )
+
+    query_start_all = allitems(
+        "query_start", doc="List of all fragments' query start coordinates."
+    )
+
+    hit_end_all = allitems("hit_end", doc="List of all fragments' hit end coordinates.")
+
+    query_end_all = allitems(
+        "query_end", doc="List of all fragments' query end coordinates."
+    )
+
+    hit_span_all = allitems("hit_span", doc="List of all fragments' hit sequence size.")
+
+    query_span_all = allitems(
+        "query_span", doc="List of all fragments' query sequence size."
+    )
+
+    hit_range_all = allitems(
+        "hit_range", doc="List of all fragments' hit start and end coordinates."
+    )
+
+    query_range_all = allitems(
+        "query_range", doc="List of all fragments' query start and end coordinates."
+    )
+
+
+class HSPFragment(_BaseHSP):
+    """Class representing a contiguous alignment of hit-query sequence.
+
+    HSPFragment forms the core of any parsed search output file. Depending on
+    the search output file format, it may contain the actual query and/or hit
+    sequences that produces the search hits. These sequences are stored as
+    SeqRecord objects (see SeqRecord):
+
+    >>> from Bio import SearchIO
+    >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
+    >>> fragment = qresult[0][0][0]   # first hit, first hsp, first fragment
+    >>> print(fragment)
+          Query: 33211 mir_1
+            Hit: gi|262205317|ref|NR_030195.1| Homo sapiens microRNA 520b (MIR520...
+    Query range: [0:61] (1)
+      Hit range: [0:61] (1)
+      Fragments: 1 (61 columns)
+         Query - CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
+                 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+           Hit - CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
+
+    # the query sequence is a SeqRecord object
+    >>> fragment.query.__class__
+    <class 'Bio.SeqRecord.SeqRecord'>
+    >>> print(fragment.query)
+    ID: 33211
+    Name: aligned query sequence
+    Description: mir_1
+    Number of features: 0
+    /molecule_type=DNA
+    Seq('CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTT...GGG')
+
+    # the hit sequence is a SeqRecord object as well
+    >>> fragment.hit.__class__
+    <class 'Bio.SeqRecord.SeqRecord'>
+    >>> print(fragment.hit)
+    ID: gi|262205317|ref|NR_030195.1|
+    Name: aligned hit sequence
+    Description: Homo sapiens microRNA 520b (MIR520B), microRNA
+    Number of features: 0
+    /molecule_type=DNA
+    Seq('CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTT...GGG')
+
+    # when both query and hit are present, we get a MultipleSeqAlignment object
+    >>> fragment.aln.__class__
+    <class 'Bio.Align.MultipleSeqAlignment'>
+    >>> print(fragment.aln)
+    Alignment with 2 rows and 61 columns
+    CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAG...GGG 33211
+    CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAG...GGG gi|262205317|ref|NR_030195.1|
+
+    """
+
+    def __init__(
+        self,
+        hit_id="<unknown id>",
+        query_id="<unknown id>",
+        hit=None,
+        query=None,
+        molecule_type=None,
+    ):
+        """Initialize the class."""
+        self._molecule_type = molecule_type
+        self.aln_annotation = {}
+
+        self._hit_id = hit_id
+        self._query_id = query_id
+
+        for seq_type in ("query", "hit"):
+            # query or hit attributes default attributes
+            setattr(self, "_%s_description" % seq_type, "<unknown description>")
+            setattr(self, "_%s_features" % seq_type, [])
+            # query or hit attributes whose default attribute is None
+            for attr in ("strand", "frame", "start", "end"):
+                setattr(self, "%s_%s" % (seq_type, attr), None)
+            # self.query or self.hit
+            if eval(seq_type):
+                setattr(self, seq_type, eval(seq_type))
+            else:
+                setattr(self, seq_type, None)
+
+    def __repr__(self):
+        """Return HSPFragment info; hit id, query id, number of columns."""
+        info = "hit_id=%r, query_id=%r" % (self.hit_id, self.query_id)
+        try:
+            info += ", %i columns" % len(self)
+        except AttributeError:
+            pass
+        return "%s(%s)" % (self.__class__.__name__, info)
+
+    def __len__(self):
+        """Return alignment span."""
+        return self.aln_span
+
+    def __str__(self):
+        """Return string of HSP header and alignments."""
+        return self._str_hsp_header() + "\n" + self._str_aln()
+
+    def __getitem__(self, idx):
+        """Return object of index idx."""
+        if self.aln is not None:
+            obj = self.__class__(
+                hit_id=self.hit_id,
+                query_id=self.query_id,
+                molecule_type=self.molecule_type,
+            )
+            # transfer query and hit attributes
+            # let SeqRecord handle feature slicing, then retrieve the sliced
+            # features into the sliced HSPFragment
+            if self.query is not None:
+                obj.query = self.query[idx]
+                obj.query_features = obj.query.features
+            if self.hit is not None:
+                obj.hit = self.hit[idx]
+                obj.hit_features = obj.hit.features
+            # description, strand, frame
+            for attr in ("description", "strand", "frame"):
+                for seq_type in ("hit", "query"):
+                    attr_name = "%s_%s" % (seq_type, attr)
+                    self_val = getattr(self, attr_name)
+                    setattr(obj, attr_name, self_val)
+            # alignment annotation should be transferred, since we can compute
+            # the resulting annotation
+            obj.aln_annotation = {}
+            for key, value in self.aln_annotation.items():
+                assert len(value[idx]) == len(obj)
+                obj.aln_annotation[key] = value[idx]
+            return obj
+        else:
+            raise TypeError(
+                "Slicing for HSP objects without alignment is not supported."
+            )
+
+    def _str_aln(self):
+        lines = []
+        # alignment length
+        aln_span = getattr_str(self, "aln_span")
+        lines.append("  Fragments: 1 (%s columns)" % aln_span)
+        # sequences
+        if self.query is not None and self.hit is not None:
+            try:
+                qseq = self.query.seq
+            except AttributeError:  # query is None
+                qseq = "?"
+            try:
+                hseq = self.hit.seq
+            except AttributeError:  # hit is None
+                hseq = "?"
+
+            # similarity line
+            simil = ""
+            if "similarity" in self.aln_annotation and isinstance(
+                self.aln_annotation.get("similarity"), str
+            ):
+                simil = self.aln_annotation["similarity"]
+
+            if self.aln_span <= 67:
+                lines.append("%10s - %s" % ("Query", qseq))
+                if simil:
+                    lines.append("             %s" % simil)
+                lines.append("%10s - %s" % ("Hit", hseq))
+            else:
+                # adjust continuation character length, so we don't display
+                # the same residues twice
+                if self.aln_span - 66 > 3:
+                    cont = "~" * 3
+                else:
+                    cont = "~" * (self.aln_span - 66)
+                lines.append("%10s - %s%s%s" % ("Query", qseq[:59], cont, qseq[-5:]))
+                if simil:
+                    lines.append("             %s%s%s" % (simil[:59], cont, simil[-5:]))
+                lines.append("%10s - %s%s%s" % ("Hit", hseq[:59], cont, hseq[-5:]))
+
+        return "\n".join(lines)
+
+    # sequence properties #
+    def _set_seq(self, seq, seq_type):
+        """Check the given sequence for attribute setting (PRIVATE).
+
+        :param seq: sequence to check
+        :type seq: string or SeqRecord
+        :param seq_type: sequence type
+        :type seq_type: string, choice of 'hit' or 'query'
+
+        """
+        assert seq_type in ("hit", "query")
+        if seq is None:
+            return seq  # return immediately if seq is None
+        else:
+            if not isinstance(seq, (str, SeqRecord)):
+                raise TypeError(
+                    "%s sequence must be a string or a SeqRecord object." % seq_type
+                )
+        # check length if the opposite sequence is not None
+        opp_type = "hit" if seq_type == "query" else "query"
+        opp_seq = getattr(self, "_%s" % opp_type, None)
+        if opp_seq is not None:
+            if len(seq) != len(opp_seq):
+                raise ValueError(
+                    "Sequence lengths do not match. Expected: %r (%s); found: %r (%s)."
+                    % (len(opp_seq), opp_type, len(seq), seq_type)
+                )
+
+        seq_id = getattr(self, "%s_id" % seq_type)
+        seq_desc = getattr(self, "%s_description" % seq_type)
+        seq_feats = getattr(self, "%s_features" % seq_type)
+        seq_name = "aligned %s sequence" % seq_type
+
+        if isinstance(seq, SeqRecord):
+            seq.id = seq_id
+            seq.description = seq_desc
+            seq.name = seq_name
+            seq.features = seq_feats
+            seq.annotations["molecule_type"] = self.molecule_type
+        elif isinstance(seq, str):
+            seq = SeqRecord(
+                Seq(seq),
+                id=seq_id,
+                name=seq_name,
+                description=seq_desc,
+                features=seq_feats,
+                annotations={"molecule_type": self.molecule_type},
+            )
+
+        return seq
+
+    def _hit_get(self):
+        return self._hit
+
+    def _hit_set(self, value):
+        self._hit = self._set_seq(value, "hit")
+
+    hit = property(
+        fget=_hit_get,
+        fset=_hit_set,
+        doc="Hit sequence as a SeqRecord object, defaults to None.",
+    )
+
+    def _query_get(self):
+        return self._query
+
+    def _query_set(self, value):
+        self._query = self._set_seq(value, "query")
+
+    query = property(
+        fget=_query_get,
+        fset=_query_set,
+        doc="Query sequence as a SeqRecord object, defaults to None.",
+    )
+
+    def _aln_get(self):
+        if self.query is None and self.hit is None:
+            return None
+        if self.hit is None:
+            msa = MultipleSeqAlignment([self.query])
+        elif self.query is None:
+            msa = MultipleSeqAlignment([self.hit])
+        else:
+            msa = MultipleSeqAlignment([self.query, self.hit])
+        molecule_type = self.molecule_type
+        if molecule_type is not None:
+            msa.molecule_type = molecule_type
+        return msa
+
+    aln = property(
+        fget=_aln_get,
+        doc="Query-hit alignment as a MultipleSeqAlignment object, defaults to None.",
+    )
+
+    def _molecule_type_get(self):
+        return self._molecule_type
+
+    def _molecule_type_set(self, value):
+        self._molecule_type = value
+        try:
+            self.query.annotations["molecule_type"] = value
+        except AttributeError:
+            pass
+        try:
+            self.hit.annotations["molecule_type"] = value
+        except AttributeError:
+            pass
+
+    molecule_type = property(
+        fget=_molecule_type_get,
+        fset=_molecule_type_set,
+        doc="molecule type used in the fragment's "
+        "sequence records and alignment, defaults to None.",
+    )
+
+    def _aln_span_get(self):
+        # length of alignment (gaps included)
+        # alignment span can be its own attribute, or computed from
+        # query / hit length
+        try:
+            self._aln_span
+        except AttributeError:
+            if self.query is not None:
+                self._aln_span = len(self.query)
+            elif self.hit is not None:
+                self._aln_span = len(self.hit)
+
+        return self._aln_span
+
+    def _aln_span_set(self, value):
+        self._aln_span = value
+
+    aln_span = property(
+        fget=_aln_span_get,
+        fset=_aln_span_set,
+        doc="The number of alignment columns covered by the fragment.",
+    )
+
+    # id, description, and features properties #
+    hit_description = fragcascade("description", "hit", doc="Hit sequence description.")
+
+    query_description = fragcascade(
+        "description", "query", doc="Query sequence description."
+    )
+
+    hit_id = fragcascade("id", "hit", doc="Hit sequence ID.")
+
+    query_id = fragcascade("id", "query", doc="Query sequence ID.")
+
+    hit_features = fragcascade("features", "hit", doc="Hit sequence features.")
+
+    query_features = fragcascade("features", "query", doc="Query sequence features.")
+
+    # strand properties #
+    def _prep_strand(self, strand):
+        # follow SeqFeature's convention
+        if strand not in (-1, 0, 1, None):
+            raise ValueError("Strand should be -1, 0, 1, or None; not %r" % strand)
+        return strand
+
+    def _get_strand(self, seq_type):
+        assert seq_type in ("hit", "query")
+        strand = getattr(self, "_%s_strand" % seq_type)
+
+        if strand is None:
+            # try to compute strand from frame
+            frame = getattr(self, "%s_frame" % seq_type)
+            if frame is not None:
+                try:
+                    strand = frame // abs(frame)
+                except ZeroDivisionError:
+                    strand = 0
+                setattr(self, "%s_strand" % seq_type, strand)
+
+        return strand
+
+    def _hit_strand_get(self):
+        return self._get_strand("hit")
+
+    def _hit_strand_set(self, value):
+        self._hit_strand = self._prep_strand(value)
+
+    hit_strand = property(
+        fget=_hit_strand_get,
+        fset=_hit_strand_set,
+        doc="Hit sequence strand, defaults to None.",
+    )
+
+    def _query_strand_get(self):
+        return self._get_strand("query")
+
+    def _query_strand_set(self, value):
+        self._query_strand = self._prep_strand(value)
+
+    query_strand = property(
+        fget=_query_strand_get,
+        fset=_query_strand_set,
+        doc="Query sequence strand, defaults to None.",
+    )
+
+    # frame properties #
+    def _prep_frame(self, frame):
+        if frame not in (-3, -2, -1, 0, 1, 2, 3, None):
+            raise ValueError(
+                "Strand should be an integer between -3 and 3, or None; not %r" % frame
+            )
+        return frame
+
+    def _hit_frame_get(self):
+        return self._hit_frame
+
+    def _hit_frame_set(self, value):
+        self._hit_frame = self._prep_frame(value)
+
+    hit_frame = property(
+        fget=_hit_frame_get,
+        fset=_hit_frame_set,
+        doc="Hit sequence reading frame, defaults to None.",
+    )
+
+    def _query_frame_get(self):
+        """Get query sequence reading frame (PRIVATE)."""
+        return self._query_frame
+
+    def _query_frame_set(self, value):
+        """Set query sequence reading frame (PRIVATE)."""
+        self._query_frame = self._prep_frame(value)
+
+    query_frame = property(
+        fget=_query_frame_get,
+        fset=_query_frame_set,
+        doc="Query sequence reading frame, defaults to None.",
+    )
+
+    # coordinate properties #
+    def _prep_coord(self, coord, opp_coord_name, op):
+        # coord must either be None or int
+        if coord is None:
+            return coord
+        assert isinstance(coord, int)
+        # try to get opposite coordinate, if it's not present, return
+        try:
+            opp_coord = getattr(self, opp_coord_name)
+        except AttributeError:
+            return coord
+        # if opposite coordinate is None, return
+        if opp_coord is None:
+            return coord
+        # otherwise compare it to coord ('>=' or '<=')
+        else:
+            assert op(coord, opp_coord)
+        return coord
+
+    def _hit_start_get(self):
+        """Get the sequence hit start coordinate (PRIVATE)."""
+        return self._hit_start
+
+    def _hit_start_set(self, value):
+        """Set the sequence hit start coordinate (PRIVATE)."""
+        self._hit_start = self._prep_coord(value, "hit_end", le)
+
+    hit_start = property(
+        fget=_hit_start_get,
+        fset=_hit_start_set,
+        doc="Hit sequence start coordinate, defaults to None.",
+    )
+
+    def _query_start_get(self):
+        """Get the query sequence start coordinate (PRIVATE)."""
+        return self._query_start
+
+    def _query_start_set(self, value):
+        """Set the query sequence start coordinate (PRIVATE)."""
+        self._query_start = self._prep_coord(value, "query_end", le)
+
+    query_start = property(
+        fget=_query_start_get,
+        fset=_query_start_set,
+        doc="Query sequence start coordinate, defaults to None.",
+    )
+
+    def _hit_end_get(self):
+        """Get the hit sequence end coordinate (PRIVATE)."""
+        return self._hit_end
+
+    def _hit_end_set(self, value):
+        """Set the hit sequence end coordinate (PRIVATE)."""
+        self._hit_end = self._prep_coord(value, "hit_start", ge)
+
+    hit_end = property(
+        fget=_hit_end_get,
+        fset=_hit_end_set,
+        doc="Hit sequence end coordinate, defaults to None.",
+    )
+
+    def _query_end_get(self):
+        """Get the query sequence end coordinate (PRIVATE)."""
+        return self._query_end
+
+    def _query_end_set(self, value):
+        """Set the query sequence end coordinate (PRIVATE)."""
+        self._query_end = self._prep_coord(value, "query_start", ge)
+
+    query_end = property(
+        fget=_query_end_get,
+        fset=_query_end_set,
+        doc="Query sequence end coordinate, defaults to None.",
+    )
+
+    # coordinate-dependent properties #
+    def _hit_span_get(self):
+        """Return the number of residues covered by the hit sequence (PRIVATE)."""
+        try:
+            return self.hit_end - self.hit_start
+        except TypeError:  # triggered if any of the coordinates are None
+            return None
+
+    hit_span = property(
+        fget=_hit_span_get, doc="The number of residues covered by the hit sequence."
+    )
+
+    def _query_span_get(self):
+        """Return the number or residues covered by the query (PRIVATE)."""
+        try:
+            return self.query_end - self.query_start
+        except TypeError:  # triggered if any of the coordinates are None
+            return None
+
+    query_span = property(
+        fget=_query_span_get,
+        doc="The number of residues covered by the query sequence.",
+    )
+
+    def _hit_range_get(self):
+        """Return the start and end of a hit (PRIVATE)."""
+        return (self.hit_start, self.hit_end)
+
+    hit_range = property(
+        fget=_hit_range_get, doc="Tuple of hit start and end coordinates."
+    )
+
+    def _query_range_get(self):
+        """Return the start and end of a query (PRIVATE)."""
+        return (self.query_start, self.query_end)
+
+    query_range = property(
+        fget=_query_range_get, doc="Tuple of query start and end coordinates."
+    )
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/_model/query.py b/code/lib/Bio/SearchIO/_model/query.py
new file mode 100644
index 0000000..f82cc5c
--- /dev/null
+++ b/code/lib/Bio/SearchIO/_model/query.py
@@ -0,0 +1,743 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SearchIO object to model search results from a single query."""
+
+
+from copy import deepcopy
+from itertools import chain
+from collections import OrderedDict
+
+from Bio.SearchIO._utils import optionalcascade
+
+from ._base import _BaseSearchObject
+from .hit import Hit
+
+
+class QueryResult(_BaseSearchObject):
+    """Class representing search results from a single query.
+
+    QueryResult is the container object that stores all search hits from a
+    single search query. It is the top-level object returned by SearchIO's two
+    main functions, ``read`` and ``parse``. Depending on the search results and
+    search output format, a QueryResult object will contain zero or more Hit
+    objects (see Hit).
+
+    You can take a quick look at a QueryResult's contents and attributes by
+    invoking ``print`` on it::
+
+        >>> from Bio import SearchIO
+        >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
+        >>> print(qresult)
+        Program: blastn (2.2.27+)
+          Query: 33211 (61)
+                 mir_1
+         Target: refseq_rna
+           Hits: ----  -----  ----------------------------------------------------------
+                    #  # HSP  ID + description
+                 ----  -----  ----------------------------------------------------------
+                    0      1  gi|262205317|ref|NR_030195.1|  Homo sapiens microRNA 52...
+                    1      1  gi|301171311|ref|NR_035856.1|  Pan troglodytes microRNA...
+                    2      1  gi|270133242|ref|NR_032573.1|  Macaca mulatta microRNA ...
+                    3      2  gi|301171322|ref|NR_035857.1|  Pan troglodytes microRNA...
+                    4      1  gi|301171267|ref|NR_035851.1|  Pan troglodytes microRNA...
+                    5      2  gi|262205330|ref|NR_030198.1|  Homo sapiens microRNA 52...
+                    6      1  gi|262205302|ref|NR_030191.1|  Homo sapiens microRNA 51...
+                    7      1  gi|301171259|ref|NR_035850.1|  Pan troglodytes microRNA...
+                    8      1  gi|262205451|ref|NR_030222.1|  Homo sapiens microRNA 51...
+                    9      2  gi|301171447|ref|NR_035871.1|  Pan troglodytes microRNA...
+                   10      1  gi|301171276|ref|NR_035852.1|  Pan troglodytes microRNA...
+                   11      1  gi|262205290|ref|NR_030188.1|  Homo sapiens microRNA 51...
+        ...
+
+    If you just want to know how many hits a QueryResult has, you can invoke
+    ``len`` on it. Alternatively, you can simply type its name in the interpreter::
+
+        >>> len(qresult)
+        100
+        >>> qresult
+        QueryResult(id='33211', 100 hits)
+
+    QueryResult behaves like a hybrid of Python's built-in list and dictionary.
+    You can retrieve its items (Hit objects) using the integer index of the
+    item, just like regular Python lists::
+
+        >>> first_hit = qresult[0]
+        >>> first_hit
+        Hit(id='gi|262205317|ref|NR_030195.1|', query_id='33211', 1 hsps)
+
+    You can slice QueryResult objects as well. Slicing will return a new
+    QueryResult object containing only the sliced hits::
+
+        >>> sliced_qresult = qresult[:3]    # slice the first three hits
+        >>> len(qresult)
+        100
+        >>> len(sliced_qresult)
+        3
+        >>> print(sliced_qresult)
+        Program: blastn (2.2.27+)
+          Query: 33211 (61)
+                 mir_1
+         Target: refseq_rna
+           Hits: ----  -----  ----------------------------------------------------------
+                    #  # HSP  ID + description
+                 ----  -----  ----------------------------------------------------------
+                    0      1  gi|262205317|ref|NR_030195.1|  Homo sapiens microRNA 52...
+                    1      1  gi|301171311|ref|NR_035856.1|  Pan troglodytes microRNA...
+                    2      1  gi|270133242|ref|NR_032573.1|  Macaca mulatta microRNA ...
+
+    Like Python dictionaries, you can also retrieve hits using the hit's ID.
+    This is useful for retrieving hits that you know should exist in a given
+    search::
+
+        >>> hit = qresult['gi|262205317|ref|NR_030195.1|']
+        >>> hit
+        Hit(id='gi|262205317|ref|NR_030195.1|', query_id='33211', 1 hsps)
+
+    You can also replace a Hit in QueryResult with another Hit using either the
+    integer index or hit key string. Note that the replacing object must be a
+    Hit that has the same ``query_id`` property as the QueryResult object.
+
+    If you're not sure whether a QueryResult contains a particular hit, you can
+    use the hit ID to check for membership first::
+
+        >>> 'gi|262205317|ref|NR_030195.1|' in qresult
+        True
+        >>> 'gi|262380031|ref|NR_023426.1|' in qresult
+        False
+
+    Or, if you just want to know the rank / position of a given hit, you can
+    use the hit ID as an argument for the ``index`` method. Note that the values
+    returned will be zero-based. So zero (0) means the hit is the first in the
+    QueryResult, three (3) means the hit is the fourth item, and so on. If the
+    hit does not exist in the QueryResult, a ``ValueError`` will be raised.
+
+        >>> qresult.index('gi|262205317|ref|NR_030195.1|')
+        0
+        >>> qresult.index('gi|262205330|ref|NR_030198.1|')
+        5
+        >>> qresult.index('gi|262380031|ref|NR_023426.1|')
+        Traceback (most recent call last):
+        ...
+        ValueError: ...
+
+    To ease working with a large number of hits, QueryResult has several
+    ``filter`` and ``map`` methods, analogous to Python's built-in functions with
+    the same names. There are ``filter`` and ``map`` methods available for
+    operations over both Hit objects or HSP objects. As an example, here we are
+    using the ``hit_map`` method to rename all hit IDs within a QueryResult::
+
+        >>> def renamer(hit):
+        ...     hit.id = hit.id.split('|')[3]
+        ...     return hit
+        >>> mapped_qresult = qresult.hit_map(renamer)
+        >>> print(mapped_qresult)
+        Program: blastn (2.2.27+)
+          Query: 33211 (61)
+                 mir_1
+         Target: refseq_rna
+           Hits: ----  -----  ----------------------------------------------------------
+                    #  # HSP  ID + description
+                 ----  -----  ----------------------------------------------------------
+                    0      1  NR_030195.1  Homo sapiens microRNA 520b (MIR520B), micr...
+                    1      1  NR_035856.1  Pan troglodytes microRNA mir-520b (MIR520B...
+                    2      1  NR_032573.1  Macaca mulatta microRNA mir-519a (MIR519A)...
+        ...
+
+    The principle for other ``map`` and ``filter`` methods are similar: they accept
+    a function, applies it, and returns a new QueryResult object.
+
+    There are also other methods useful for working with list-like objects:
+    ``append``, ``pop``, and ``sort``. More details and examples are available in
+    their respective documentations.
+
+    Finally, just like Python lists and dictionaries, QueryResult objects are
+    iterable. Iteration over QueryResults will yield Hit objects::
+
+        >>> for hit in qresult[:4]:     # iterate over the first four items
+        ...     hit
+        ...
+        Hit(id='gi|262205317|ref|NR_030195.1|', query_id='33211', 1 hsps)
+        Hit(id='gi|301171311|ref|NR_035856.1|', query_id='33211', 1 hsps)
+        Hit(id='gi|270133242|ref|NR_032573.1|', query_id='33211', 1 hsps)
+        Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps)
+
+    If you need access to all the hits in a QueryResult object, you can get
+    them in a list using the ``hits`` property. Similarly, access to all hit IDs is
+    available through the ``hit_keys`` property.
+
+        >>> qresult.hits
+        [Hit(id='gi|262205317|ref|NR_030195.1|', query_id='33211', 1 hsps), ...]
+        >>> qresult.hit_keys
+        ['gi|262205317|ref|NR_030195.1|', 'gi|301171311|ref|NR_035856.1|', ...]
+
+    """
+
+    # attributes we don't want to transfer when creating a new QueryResult class
+    # from this one
+    _NON_STICKY_ATTRS = ("_items", "__alt_hit_ids")
+
+    def __init__(self, hits=(), id=None, hit_key_function=None):
+        """Initialize a QueryResult object.
+
+        :param id: query sequence ID
+        :type id: string
+        :param hits: iterator yielding Hit objects
+        :type hits: iterable
+        :param hit_key_function: function to define hit keys
+        :type hit_key_function: callable, accepts Hit objects, returns string
+
+        """
+        # default values
+        self._id = id
+        self._hit_key_function = hit_key_function or _hit_key_func
+        self._items = OrderedDict()
+        self._description = None
+        self.__alt_hit_ids = {}
+        self.program = "<unknown program>"
+        self.target = "<unknown target>"
+        self.version = "<unknown version>"
+
+        # validate Hit objects and fill up self._items
+        for hit in hits:
+            # validation is handled by __setitem__
+            self.append(hit)
+
+    def __iter__(self):
+        """Iterate over hits."""
+        return iter(self.hits)
+
+    @property
+    def hits(self):
+        """Hit objects contained in the QueryResult."""
+        return list(self._items.values())
+
+    @property
+    def hit_keys(self):
+        """Hit IDs of the Hit objects contained in the QueryResult."""
+        return list(self._items.keys())
+
+    @property
+    def items(self):
+        """List of tuples of Hit IDs and Hit objects."""
+        return list(self._items.items())
+
+    def iterhits(self):
+        """Return an iterator over the Hit objects."""
+        yield from self._items.values()
+
+    def iterhit_keys(self):
+        """Return an iterator over the ID of the Hit objects."""
+        yield from self._items
+
+    def iteritems(self):
+        """Return an iterator yielding tuples of Hit ID and Hit objects."""
+        yield from self._items.items()
+
+    def __contains__(self, hit_key):
+        """Return True if hit key in items or alternative hit identifiers."""
+        if isinstance(hit_key, Hit):
+            return self._hit_key_function(hit_key) in self._items
+        return hit_key in self._items or hit_key in self.__alt_hit_ids
+
+    def __len__(self):
+        """Return the number of items."""
+        return len(self._items)
+
+    def __bool__(self):
+        """Return True if there are items."""
+        return bool(self._items)
+
+    def __repr__(self):
+        """Return string representation of the QueryResult object."""
+        return "QueryResult(id=%r, %r hits)" % (self.id, len(self))
+
+    def __str__(self):
+        """Return a human readable summary of the QueryResult object."""
+        lines = []
+
+        # set program and version line
+        lines.append("Program: %s (%s)" % (self.program, self.version))
+
+        # set query id line
+        qid_line = "  Query: %s" % self.id
+        try:
+            seq_len = self.seq_len
+        except AttributeError:
+            pass
+        else:
+            qid_line += " (%i)" % seq_len
+        lines.append(qid_line)
+        if self.description:
+            line = "         %s" % self.description
+            line = line[:77] + "..." if len(line) > 80 else line
+            lines.append(line)
+
+        # set target line
+        lines.append(" Target: %s" % self.target)
+
+        # set hit lines
+        if not self.hits:
+            lines.append("   Hits: 0")
+        else:
+            lines.append("   Hits: %s  %s  %s" % ("-" * 4, "-" * 5, "-" * 58))
+            pattern = "%13s  %5s  %s"
+            lines.append(pattern % ("#", "# HSP", "ID + description"))
+            lines.append(pattern % ("-" * 4, "-" * 5, "-" * 58))
+            for idx, hit in enumerate(self.hits):
+                if idx < 30:
+                    hid_line = "%s  %s" % (hit.id, hit.description)
+                    if len(hid_line) > 58:
+                        hid_line = hid_line[:55] + "..."
+                    lines.append(pattern % (idx, len(hit), hid_line))
+                elif idx > len(self.hits) - 4:
+                    hid_line = "%s  %s" % (hit.id, hit.description)
+                    if len(hid_line) > 58:
+                        hid_line = hid_line[:55] + "..."
+                    lines.append(pattern % (idx, len(hit), hid_line))
+                elif idx == 30:
+                    lines.append("%14s" % "~~~")
+
+        return "\n".join(lines)
+
+    def __getitem__(self, hit_key):
+        """Return a QueryResult object that matches the hit_key."""
+        # retrieval using slice objects returns another QueryResult object
+        if isinstance(hit_key, slice):
+            # should we return just a list of Hits instead of a full blown
+            # QueryResult object if it's a slice?
+            hits = list(self.hits)[hit_key]
+            obj = self.__class__(hits, self.id, self._hit_key_function)
+            self._transfer_attrs(obj)
+            return obj
+
+        # if key is an int, then retrieve the Hit at the int index
+        elif isinstance(hit_key, int):
+            length = len(self)
+            if 0 <= hit_key < length:
+                for idx, item in enumerate(self.iterhits()):
+                    if idx == hit_key:
+                        return item
+            elif -1 * length <= hit_key < 0:
+                for idx, item in enumerate(self.iterhits()):
+                    if length + hit_key == idx:
+                        return item
+            raise IndexError("list index out of range")
+
+        # if key is a string, then do a regular dictionary retrieval
+        # falling back on alternative hit IDs
+        try:
+            return self._items[hit_key]
+        except KeyError:
+            return self._items[self.__alt_hit_ids[hit_key]]
+
+    def __setitem__(self, hit_key, hit):
+        """Add an item of key hit_key and value hit."""
+        # only accept string keys
+        if not isinstance(hit_key, str):
+            raise TypeError("QueryResult object keys must be a string.")
+        # hit must be a Hit object
+        if not isinstance(hit, Hit):
+            raise TypeError("QueryResult objects can only contain Hit objects.")
+        qid = self.id
+        hqid = hit.query_id
+        # and it must have the same query ID as this object's ID
+        # unless it's the query ID is None (default for empty objects), in which
+        # case we want to use the hit's query ID as the query ID
+        if qid is not None:
+            if hqid != qid:
+                raise ValueError(
+                    "Expected Hit with query ID %r, found %r instead." % (qid, hqid)
+                )
+        else:
+            self.id = hqid
+        # same thing with descriptions
+        qdesc = self.description
+        hqdesc = hit.query_description
+        if qdesc is not None:
+            if hqdesc != qdesc:
+                raise ValueError(
+                    "Expected Hit with query description %r, found %r instead."
+                    % (qdesc, hqdesc)
+                )
+        else:
+            self.description = hqdesc
+
+        # remove existing alt_id references, if hit_key already exists
+        if hit_key in self._items:
+            for alt_key in self._items[hit_key].id_all[1:]:
+                del self.__alt_hit_ids[alt_key]
+
+        # if hit_key is already present as an alternative ID
+        # delete it from the alternative ID dict
+        if hit_key in self.__alt_hit_ids:
+            del self.__alt_hit_ids[hit_key]
+
+        self._items[hit_key] = hit
+        for alt_id in hit.id_all[1:]:
+            self.__alt_hit_ids[alt_id] = hit_key
+
+    def __delitem__(self, hit_key):
+        """Delete item of key hit_key."""
+        # if hit_key an integer or slice, get the corresponding key first
+        # and put it into a list
+        if isinstance(hit_key, int):
+            hit_keys = [list(self.hit_keys)[hit_key]]
+        # the same, if it's a slice
+        elif isinstance(hit_key, slice):
+            hit_keys = list(self.hit_keys)[hit_key]
+        # otherwise put it in a list
+        else:
+            hit_keys = [hit_key]
+
+        for key in hit_keys:
+            deleted = False
+            if key in self._items:
+                del self._items[key]
+                deleted = True
+            if key in self.__alt_hit_ids:
+                del self._items[self.__alt_hit_ids[key]]
+                del self.__alt_hit_ids[key]
+                deleted = True
+            if not deleted:
+                raise KeyError(repr(key))
+
+    # properties #
+    id = optionalcascade("_id", "query_id", """QueryResult ID string""")
+    description = optionalcascade(
+        "_description", "query_description", """QueryResult description"""
+    )
+
+    @property
+    def hsps(self):
+        """Access the HSP objects contained in the QueryResult."""
+        return sorted(
+            (hsp for hsp in chain(*self.hits)), key=lambda hsp: hsp.output_index
+        )
+
+    @property
+    def fragments(self):
+        """Access the HSPFragment objects contained in the QueryResult."""
+        return list(chain(*self.hsps))
+
+    # public methods #
+    def absorb(self, hit):
+        """Add a Hit object to the end of QueryResult.
+
+        If the QueryResult already has a Hit with the same ID, append the new
+        Hit's HSPs into the existing Hit.
+
+        :param hit: object to absorb
+        :type hit: Hit
+
+        This method is used for file formats that may output the same Hit in
+        separate places, such as BLAT or Exonerate. In both formats, Hit
+        with different strands are put in different places. However, SearchIO
+        considers them to be the same as a Hit object should be all database
+        entries with the same ID, regardless of strand orientation.
+
+        """
+        try:
+            self.append(hit)
+        except ValueError:
+            assert hit.id in self
+            for hsp in hit:
+                self[hit.id].append(hsp)
+
+    def append(self, hit):
+        """Add a Hit object to the end of QueryResult.
+
+        :param hit: object to append
+        :type hit: Hit
+
+        Any Hit object appended must have the same ``query_id`` property as the
+        QueryResult's ``id`` property. If the hit key already exists, a
+        ``ValueError`` will be raised.
+
+        """
+        # if a custom hit_key_function is supplied, use it to define th hit key
+        if self._hit_key_function is not None:
+            hit_key = self._hit_key_function(hit)
+        else:
+            hit_key = hit.id
+
+        if hit_key not in self and all(pid not in self for pid in hit.id_all[1:]):
+            self[hit_key] = hit
+        else:
+            raise ValueError(
+                "The ID or alternative IDs of Hit %r exists in this QueryResult."
+                % hit_key
+            )
+
+    def hit_filter(self, func=None):
+        """Create new QueryResult object whose Hit objects pass the filter function.
+
+        :param func: filter function
+        :type func: callable, accepts Hit, returns bool
+
+        Here is an example of using ``hit_filter`` to select Hits whose
+        description begins with the string 'Homo sapiens', case sensitive::
+
+            >>> from Bio import SearchIO
+            >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
+            >>> def desc_filter(hit):
+            ...     return hit.description.startswith('Homo sapiens')
+            ...
+            >>> len(qresult)
+            100
+            >>> filtered = qresult.hit_filter(desc_filter)
+            >>> len(filtered)
+            39
+            >>> print(filtered[:4])
+            Program: blastn (2.2.27+)
+              Query: 33211 (61)
+                     mir_1
+             Target: refseq_rna
+               Hits: ----  -----  ----------------------------------------------------------
+                        #  # HSP  ID + description
+                     ----  -----  ----------------------------------------------------------
+                        0      1  gi|262205317|ref|NR_030195.1|  Homo sapiens microRNA 52...
+                        1      2  gi|262205330|ref|NR_030198.1|  Homo sapiens microRNA 52...
+                        2      1  gi|262205302|ref|NR_030191.1|  Homo sapiens microRNA 51...
+                        3      1  gi|262205451|ref|NR_030222.1|  Homo sapiens microRNA 51...
+
+        Note that instance attributes (other than the hits) from the unfiltered
+        QueryResult are retained in the filtered object.
+
+            >>> qresult.program == filtered.program
+            True
+            >>> qresult.target == filtered.target
+            True
+
+        """
+        hits = list(filter(func, self.hits))
+        obj = self.__class__(hits, self.id, self._hit_key_function)
+        self._transfer_attrs(obj)
+        return obj
+
+    def hit_map(self, func=None):
+        """Create new QueryResult object, mapping the given function to its Hits.
+
+        :param func: map function
+        :type func: callable, accepts Hit, returns Hit
+
+        Here is an example of using ``hit_map`` with a function that discards all
+        HSPs in a Hit except for the first one::
+
+            >>> from Bio import SearchIO
+            >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
+            >>> print(qresult[:8])
+            Program: blastn (2.2.27+)
+              Query: 33211 (61)
+                     mir_1
+             Target: refseq_rna
+               Hits: ----  -----  ----------------------------------------------------------
+                        #  # HSP  ID + description
+                     ----  -----  ----------------------------------------------------------
+                        0      1  gi|262205317|ref|NR_030195.1|  Homo sapiens microRNA 52...
+                        1      1  gi|301171311|ref|NR_035856.1|  Pan troglodytes microRNA...
+                        2      1  gi|270133242|ref|NR_032573.1|  Macaca mulatta microRNA ...
+                        3      2  gi|301171322|ref|NR_035857.1|  Pan troglodytes microRNA...
+                        4      1  gi|301171267|ref|NR_035851.1|  Pan troglodytes microRNA...
+                        5      2  gi|262205330|ref|NR_030198.1|  Homo sapiens microRNA 52...
+                        6      1  gi|262205302|ref|NR_030191.1|  Homo sapiens microRNA 51...
+                        7      1  gi|301171259|ref|NR_035850.1|  Pan troglodytes microRNA...
+
+            >>> top_hsp = lambda hit: hit[:1]
+            >>> mapped_qresult = qresult.hit_map(top_hsp)
+            >>> print(mapped_qresult[:8])
+            Program: blastn (2.2.27+)
+              Query: 33211 (61)
+                     mir_1
+             Target: refseq_rna
+               Hits: ----  -----  ----------------------------------------------------------
+                        #  # HSP  ID + description
+                     ----  -----  ----------------------------------------------------------
+                        0      1  gi|262205317|ref|NR_030195.1|  Homo sapiens microRNA 52...
+                        1      1  gi|301171311|ref|NR_035856.1|  Pan troglodytes microRNA...
+                        2      1  gi|270133242|ref|NR_032573.1|  Macaca mulatta microRNA ...
+                        3      1  gi|301171322|ref|NR_035857.1|  Pan troglodytes microRNA...
+                        4      1  gi|301171267|ref|NR_035851.1|  Pan troglodytes microRNA...
+                        5      1  gi|262205330|ref|NR_030198.1|  Homo sapiens microRNA 52...
+                        6      1  gi|262205302|ref|NR_030191.1|  Homo sapiens microRNA 51...
+                        7      1  gi|301171259|ref|NR_035850.1|  Pan troglodytes microRNA...
+
+        """
+        hits = [deepcopy(hit) for hit in self.hits]
+        if func is not None:
+            hits = [func(x) for x in hits]
+        obj = self.__class__(hits, self.id, self._hit_key_function)
+        self._transfer_attrs(obj)
+        return obj
+
+    def hsp_filter(self, func=None):
+        """Create new QueryResult object whose HSP objects pass the filter function.
+
+        ``hsp_filter`` is the same as ``hit_filter``, except that it filters
+        directly on each HSP object in every Hit. If the filtering removes
+        all HSP objects in a given Hit, the entire Hit will be discarded. This
+        will result in the QueryResult having less Hit after filtering.
+        """
+        hits = [x for x in (hit.filter(func) for hit in self.hits) if x]
+        obj = self.__class__(hits, self.id, self._hit_key_function)
+        self._transfer_attrs(obj)
+        return obj
+
+    def hsp_map(self, func=None):
+        """Create new QueryResult object, mapping the given function to its HSPs.
+
+        ``hsp_map`` is the same as ``hit_map``, except that it applies the given
+        function to all HSP objects in every Hit, instead of the Hit objects.
+        """
+        hits = [x for x in (hit.map(func) for hit in list(self.hits)[:]) if x]
+        obj = self.__class__(hits, self.id, self._hit_key_function)
+        self._transfer_attrs(obj)
+        return obj
+
+    # marker for default self.pop() return value
+    # this method is adapted from Python's built in OrderedDict.pop
+    # implementation
+    __marker = object()
+
+    def pop(self, hit_key=-1, default=__marker):
+        """Remove the specified hit key and return the Hit object.
+
+        :param hit_key: key of the Hit object to return
+        :type hit_key: int or string
+        :param default: return value if no Hit exists with the given key
+        :type default: object
+
+        By default, ``pop`` will remove and return the last Hit object in the
+        QueryResult object. To remove specific Hit objects, you can use its
+        integer index or hit key.
+
+            >>> from Bio import SearchIO
+            >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
+            >>> len(qresult)
+            100
+            >>> for hit in qresult[:5]:
+            ...     print(hit.id)
+            ...
+            gi|262205317|ref|NR_030195.1|
+            gi|301171311|ref|NR_035856.1|
+            gi|270133242|ref|NR_032573.1|
+            gi|301171322|ref|NR_035857.1|
+            gi|301171267|ref|NR_035851.1|
+
+            # remove the last hit
+            >>> qresult.pop()
+            Hit(id='gi|397513516|ref|XM_003827011.1|', query_id='33211', 1 hsps)
+
+            # remove the first hit
+            >>> qresult.pop(0)
+            Hit(id='gi|262205317|ref|NR_030195.1|', query_id='33211', 1 hsps)
+
+            # remove hit with the given ID
+            >>> qresult.pop('gi|301171322|ref|NR_035857.1|')
+            Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps)
+
+        """
+        # if key is an integer (index)
+        # get the ID for the Hit object at that index
+        if isinstance(hit_key, int):
+            # raise the appropriate error if there is no hit
+            if not self:
+                raise IndexError("pop from empty list")
+            hit_key = list(self.hit_keys)[hit_key]
+
+        try:
+            hit = self._items.pop(hit_key)
+            # remove all alternative IDs of the popped hit
+            for alt_id in hit.id_all[1:]:
+                try:
+                    del self.__alt_hit_ids[alt_id]
+                except KeyError:
+                    pass
+            return hit
+        except KeyError:
+            if hit_key in self.__alt_hit_ids:
+                return self.pop(self.__alt_hit_ids[hit_key], default)
+            # if key doesn't exist and no default is set, raise a KeyError
+            if default is self.__marker:
+                raise KeyError(hit_key) from None
+        # if key doesn't exist but a default is set, return the default value
+        return default
+
+    def index(self, hit_key):
+        """Return the index of a given hit key, zero-based.
+
+        :param hit_key: hit ID
+        :type hit_key: string
+
+        This method is useful for finding out the integer index (usually
+        correlated with search rank) of a given hit key.
+
+            >>> from Bio import SearchIO
+            >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
+            >>> qresult.index('gi|301171259|ref|NR_035850.1|')
+            7
+
+        """
+        if isinstance(hit_key, Hit):
+            return list(self.hit_keys).index(hit_key.id)
+        try:
+            return list(self.hit_keys).index(hit_key)
+        except ValueError:
+            if hit_key in self.__alt_hit_ids:
+                return self.index(self.__alt_hit_ids[hit_key])
+            raise
+
+    def sort(self, key=None, reverse=False, in_place=True):
+        """Sort the Hit objects.
+
+        :param key: sorting function
+        :type key: callable, accepts Hit, returns key for sorting
+        :param reverse: whether to reverse sorting results or no
+        :type reverse: bool
+        :param in_place: whether to do in-place sorting or no
+        :type in_place: bool
+
+        ``sort`` defaults to sorting in-place, to mimick Python's ``list.sort``
+        method. If you set the ``in_place`` argument to False, it will treat
+        return a new, sorted QueryResult object and keep the initial one
+        unsorted.
+
+        """
+        if key is None:
+            # if reverse is True, reverse the hits
+            if reverse:
+                sorted_hits = list(self.hits)[::-1]
+            # otherwise (default options) make a copy of the hits
+            else:
+                sorted_hits = list(self.hits)[:]
+        else:
+            sorted_hits = sorted(self.hits, key=key, reverse=reverse)
+
+        # if sorting is in-place, don't create a new QueryResult object
+        if in_place:
+            new_hits = OrderedDict()
+            for hit in sorted_hits:
+                new_hits[self._hit_key_function(hit)] = hit
+            self._items = new_hits
+        # otherwise, return a new sorted QueryResult object
+        else:
+            obj = self.__class__(sorted_hits, self.id, self._hit_key_function)
+            self._transfer_attrs(obj)
+            return obj
+
+
+def _hit_key_func(hit):
+    """Map hit to its identifier (PRIVATE).
+
+    Default hit key function for QueryResult.__init__ use.
+    """
+    return hit.id
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SearchIO/_utils.py b/code/lib/Bio/SearchIO/_utils.py
new file mode 100644
index 0000000..3d801ab
--- /dev/null
+++ b/code/lib/Bio/SearchIO/_utils.py
@@ -0,0 +1,167 @@
+# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Common SearchIO utility functions."""
+
+
+def getattr_str(obj, attr, fmt=None, fallback="?"):
+    """Return string of the given object's attribute.
+
+    Defaults to the given fallback value if attribute is not present.
+    """
+    try:
+        value = getattr(obj, attr)
+    except AttributeError:
+        return fallback
+    if fmt is None:
+        return str(value)
+    return fmt % value
+
+
+def read_forward(handle):
+    """Read through whitespaces, return the first non-whitespace line."""
+    while True:
+        line = handle.readline()
+        # if line is empty or line has characters and stripping does not remove
+        # them, return the line
+        if (not line) or (line and line.strip()):
+            return line
+
+
+def get_processor(format, mapping):
+    """Return the object to process the given format according to the mapping.
+
+    :param format: format name
+    :type format: string, lower case
+    :param mapping: mapping of format name and its processor object
+    :type mapping: dictionary {string: object}
+
+    """
+    # map file format to iterator name
+    try:
+        obj_info = mapping[format]
+    except KeyError:
+        # handle the errors with helpful messages
+        if format is None:
+            raise ValueError("Format required (lower case string)") from None
+        elif not isinstance(format, str):
+            raise TypeError("Need a string for the file format (lower case)") from None
+        elif format != format.lower():
+            raise ValueError("Format string %r should be lower case" % format) from None
+        else:
+            raise ValueError(
+                "Unknown format %r. Supported formats are %r"
+                % (format, "', '".join(mapping))
+            ) from None
+
+    mod_name, obj_name = obj_info
+    mod = __import__("Bio.SearchIO.%s" % mod_name, fromlist=[""])
+
+    return getattr(mod, obj_name)
+
+
+def singleitem(attr=None, doc=""):
+    """Property for fetching attribute from first entry of container.
+
+    Returns a property that fetches the given attribute from
+    the first item in a SearchIO container object.
+    """
+
+    def getter(self):
+        if len(self._items) > 1:
+            raise ValueError("More than one HSPFragment objects found in HSP")
+        if attr is None:
+            return self._items[0]
+        return getattr(self._items[0], attr)
+
+    return property(fget=getter, doc=doc)
+
+
+def allitems(attr=None, doc=""):
+    """Property for fetching attribute from all entries of container.
+
+    Returns a property that fetches the given attributes from
+    all items in a SearchIO container object.
+    """
+
+    def getter(self):
+        if attr is None:
+            return self._items
+        return [getattr(frag, attr) for frag in self._items]
+
+    return property(fget=getter, doc=doc)
+
+
+def fullcascade(attr, doc=""):
+    """Return a getter property with a cascading setter.
+
+    This is similar to ``optionalcascade``, but for SearchIO containers that have
+    at least one item (HSP). The getter always retrieves the attribute
+    value from the first item. If the items have more than one attribute values,
+    an error will be raised. The setter behaves like ``partialcascade``, except
+    that it only sets attributes to items in the object, not the object itself.
+
+    """
+
+    def getter(self):
+        return getattr(self._items[0], attr)
+
+    def setter(self, value):
+        for item in self:
+            setattr(item, attr, value)
+
+    return property(fget=getter, fset=setter, doc=doc)
+
+
+def optionalcascade(cont_attr, item_attr, doc=""):
+    """Return a getter property with a cascading setter.
+
+    This is used for the ``id`` and ``description`` properties of the container
+    objects with zero or more items. These items have their own private
+    attributes that stores query and/or hit ID and description. When the
+    container has zero items, attribute values are always retrieved from the
+    container's attribute. Otherwise, the first item's attribute is used.
+
+    To keep the container items' query and/or hit ID and description in-sync,
+    the setter cascades any new value given to the items' values.
+
+    """
+
+    def getter(self):
+        if self._items:
+            # don't use self._items here, so QueryResult can use this property
+            # as well (the underlying OrderedDict is not integer-indexable)
+            return getattr(self[0], item_attr)
+        else:
+            return getattr(self, cont_attr)
+
+    def setter(self, value):
+        setattr(self, cont_attr, value)
+        for item in self:
+            setattr(item, item_attr, value)
+
+    return property(fget=getter, fset=setter, doc=doc)
+
+
+def fragcascade(attr, seq_type, doc=""):
+    """Return a getter property with cascading setter, for HSPFragment objects.
+
+    Similar to ``partialcascade``, but for HSPFragment objects and acts on ``query``
+    or ``hit`` properties of the object if they are not None.
+
+    """
+    assert seq_type in ("hit", "query")
+    attr_name = "_%s_%s" % (seq_type, attr)
+
+    def getter(self):
+        return getattr(self, attr_name)
+
+    def setter(self, value):
+        setattr(self, attr_name, value)
+        seq = getattr(self, seq_type)
+        if seq is not None:
+            setattr(seq, attr, value)
+
+    return property(fget=getter, fset=setter, doc=doc)
diff --git a/code/lib/Bio/Seq.py b/code/lib/Bio/Seq.py
new file mode 100644
index 0000000..2b47b5a
--- /dev/null
+++ b/code/lib/Bio/Seq.py
@@ -0,0 +1,3223 @@
+# Copyright 2000 Andrew Dalke.
+# Copyright 2000-2002 Brad Chapman.
+# Copyright 2004-2005, 2010 by M de Hoon.
+# Copyright 2007-2020 by Peter Cock.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Provide objects to represent biological sequences.
+
+See also the Seq_ wiki and the chapter in our tutorial:
+ - `HTML Tutorial`_
+ - `PDF Tutorial`_
+
+.. _Seq: http://biopython.org/wiki/Seq
+.. _`HTML Tutorial`: http://biopython.org/DIST/docs/tutorial/Tutorial.html
+.. _`PDF Tutorial`: http://biopython.org/DIST/docs/tutorial/Tutorial.pdf
+
+"""
+import array
+import warnings
+
+from abc import ABC
+from abc import abstractmethod
+
+from Bio import BiopythonDeprecationWarning
+from Bio import BiopythonWarning
+from Bio.Data import CodonTable
+from Bio.Data import IUPACData
+
+
+def _maketrans(complement_mapping):
+    """Make a python string translation table (PRIVATE).
+
+    Arguments:
+     - complement_mapping - a dictionary such as ambiguous_dna_complement
+       and ambiguous_rna_complement from Data.IUPACData.
+
+    Returns a translation table (a string of length 256) for use with the
+    python string's translate method to use in a (reverse) complement.
+
+    Compatible with lower case and upper case sequences.
+
+    For internal use only.
+    """
+    keys = "".join(complement_mapping.keys()).encode("ASCII")
+    values = "".join(complement_mapping.values()).encode("ASCII")
+    return bytes.maketrans(keys + keys.lower(), values + values.lower())
+
+
+_dna_complement_table = _maketrans(IUPACData.ambiguous_dna_complement)
+ambiguous_rna_complement = dict(IUPACData.ambiguous_rna_complement)
+ambiguous_rna_complement["T"] = ambiguous_rna_complement["U"]
+_rna_complement_table = _maketrans(ambiguous_rna_complement)
+del ambiguous_rna_complement
+
+
+class SequenceDataAbstractBaseClass(ABC):
+    """Abstract base class for sequence content providers.
+
+    Most users will not need to use this class. It is used internally as a base
+    class for sequence content provider classes such as _UndefinedSequenceData
+    defined in this module, and _TwoBitSequenceData in Bio.SeqIO.TwoBitIO.
+    Instances of these classes can be used instead of a ``bytes`` object as the
+    data argument when creating a Seq object, and provide the sequence content
+    only when requested via ``__getitem__``. This allows lazy parsers to load
+    and parse sequence data from a file only for the requested sequence regions,
+    and _UndefinedSequenceData instances to raise an exception when undefined
+    sequence data are requested.
+
+    Future implementations of lazy parsers that similarly provide on-demand
+    parsing of sequence data should use a subclass of this abstract class and
+    implement the abstract methods ``__len__`` and ``__getitem__``:
+
+    * ``__len__`` must return the sequence length;
+    * ``__getitem__`` must return
+
+      * a ``bytes`` object for the requested region; or
+      * a new instance of the subclass for the requested region; or
+      * raise an ``UndefinedSequenceError``.
+
+      Calling ``__getitem__`` for a sequence region of size zero should always
+      return an empty ``bytes`` object.
+      Calling ``__getitem__`` for the full sequence (as in data[:]) should
+      either return a ``bytes`` object with the full sequence, or raise an
+      ``UndefinedSequenceError``.
+
+    Subclasses of SequenceDataAbstractBaseClass must call ``super().__init__()``
+    as part of their ``__init__`` method.
+    """
+
+    __slots__ = ()
+
+    def __init__(self):
+        """Check if ``__getitem__`` returns a bytes-like object."""
+        assert self[:0] == b""
+
+    @abstractmethod
+    def __len__(self):
+        pass
+
+    @abstractmethod
+    def __getitem__(self, key):
+        pass
+
+    def __bytes__(self):
+        return self[:]
+
+    def __hash__(self):
+        return hash(bytes(self))
+
+    def __eq__(self, other):
+        return bytes(self) == other
+
+    def __lt__(self, other):
+        return bytes(self) < other
+
+    def __le__(self, other):
+        return bytes(self) <= other
+
+    def __gt__(self, other):
+        return bytes(self) > other
+
+    def __ge__(self, other):
+        return bytes(self) >= other
+
+    def __add__(self, other):
+        return bytes(self) + other
+
+    def __radd__(self, other):
+        return other + bytes(self)
+
+    def __mul__(self, other):
+        return bytes(self) * other
+
+    def __contains__(self, item):
+        return bytes(self).__contains__(item)
+
+    def decode(self, encoding="utf-8"):
+        """Decode the data as bytes using the codec registered for encoding.
+
+        encoding
+          The encoding with which to decode the bytes.
+        """
+        return bytes(self).decode(encoding)
+
+    def count(self, sub, start=None, end=None):
+        """Return the number of non-overlapping occurrences of sub in data[start:end].
+
+        Optional arguments start and end are interpreted as in slice notation.
+        """
+        return bytes(self).count(sub, start, end)
+
+    def find(self, sub, start=None, end=None):
+        """Return the lowest index in data where subsection sub is found.
+
+        Return the lowest index in data where subsection sub is found,
+        such that sub is contained within data[start,end].  Optional
+        arguments start and end are interpreted as in slice notation.
+
+        Return -1 on failure.
+        """
+        return bytes(self).find(sub, start, end)
+
+    def rfind(self, sub, start=None, end=None):
+        """Return the highest index in data where subsection sub is found.
+
+        Return the highest index in data where subsection sub is found,
+        such that sub is contained within data[start,end].  Optional
+        arguments start and end are interpreted as in slice notation.
+
+        Return -1 on failure.
+        """
+        return bytes(self).rfind(sub, start, end)
+
+    def index(self, sub, start=None, end=None):
+        """Return the lowest index in data where subsection sub is found.
+
+        Return the lowest index in data where subsection sub is found,
+        such that sub is contained within data[start,end].  Optional
+        arguments start and end are interpreted as in slice notation.
+
+        Raises ValueError when the subsection is not found.
+        """
+        return bytes(self).index(sub, start, end)
+
+    def rindex(self, sub, start=None, end=None):
+        """Return the highest index in data where subsection sub is found.
+
+        Return the highest index in data where subsection sub is found,
+        such that sub is contained within data[start,end].  Optional
+        arguments start and end are interpreted as in slice notation.
+
+        Raise ValueError when the subsection is not found.
+        """
+        return bytes(self).rindex(sub, start, end)
+
+    def startswith(self, prefix, start=None, end=None):
+        """Return True if data starts with the specified prefix, False otherwise.
+
+        With optional start, test data beginning at that position.
+        With optional end, stop comparing data at that position.
+        prefix can also be a tuple of bytes to try.
+        """
+        return bytes(self).startswith(prefix, start, end)
+
+    def endswith(self, suffix, start=None, end=None):
+        """Return True if data ends with the specified suffix, False otherwise.
+
+        With optional start, test data beginning at that position.
+        With optional end, stop comparing data at that position.
+        suffix can also be a tuple of bytes to try.
+        """
+        return bytes(self).endswith(suffix, start, end)
+
+    def split(self, sep=None, maxsplit=-1):
+        """Return a list of the sections in the data, using sep as the delimiter.
+
+        sep
+          The delimiter according which to split the data.
+          None (the default value) means split on ASCII whitespace characters
+          (space, tab, return, newline, formfeed, vertical tab).
+        maxsplit
+          Maximum number of splits to do.
+          -1 (the default value) means no limit.
+        """
+        return bytes(self).split(sep, maxsplit)
+
+    def rsplit(self, sep=None, maxsplit=-1):
+        """Return a list of the sections in the data, using sep as the delimiter.
+
+        sep
+          The delimiter according which to split the data.
+          None (the default value) means split on ASCII whitespace characters
+          (space, tab, return, newline, formfeed, vertical tab).
+        maxsplit
+          Maximum number of splits to do.
+          -1 (the default value) means no limit.
+
+        Splitting is done starting at the end of the data and working to the front.
+        """
+        return bytes(self).rsplit(sep, maxsplit)
+
+    def strip(self, chars=None):
+        """Strip leading and trailing characters contained in the argument.
+
+        If the argument is omitted or None, strip leading and trailing ASCII whitespace.
+        """
+        return bytes(self).strip(chars)
+
+    def lstrip(self, chars=None):
+        """Strip leading characters contained in the argument.
+
+        If the argument is omitted or None, strip leading ASCII whitespace.
+        """
+        return bytes(self).lstrip(chars)
+
+    def rstrip(self, chars=None):
+        """Strip trailing characters contained in the argument.
+
+        If the argument is omitted or None, strip trailing ASCII whitespace.
+        """
+        return bytes(self).rstrip(chars)
+
+    def upper(self):
+        """Return a copy of data with all ASCII characters converted to uppercase."""
+        return bytes(self).upper()
+
+    def lower(self):
+        """Return a copy of data with all ASCII characters converted to lowercase."""
+        return bytes(self).lower()
+
+    def replace(self, old, new):
+        """Return a copy with all occurrences of substring old replaced by new."""
+        return bytes(self).replace(old, new)
+
+    def translate(self, table, delete=b""):
+        """Return a copy with each character mapped by the given translation table.
+
+          table
+            Translation table, which must be a bytes object of length 256.
+
+        All characters occurring in the optional argument delete are removed.
+        The remaining characters are mapped through the given translation table.
+        """
+        return bytes(self).translate(table)
+
+
+class _SeqAbstractBaseClass(ABC):
+    """Abstract base class for the Seq and MutableSeq classes (PRIVATE).
+
+    Most users will not need to use this class. It is used internally as an
+    abstract base class for Seq and MutableSeq, as most of their methods are
+    identical.
+    """
+
+    __slots__ = ("_data",)
+
+    @abstractmethod
+    def __init__(self):
+        pass
+
+    def __bytes__(self):
+        return bytes(self._data)
+
+    def __repr__(self):
+        """Return (truncated) representation of the sequence."""
+        data = self._data
+        if isinstance(data, _UndefinedSequenceData):
+            return f"Seq(None, length={len(self)})"
+        if len(data) > 60:
+            # Shows the last three letters as it is often useful to see if
+            # there is a stop codon at the end of a sequence.
+            # Note total length is 54+3+3=60
+            start = data[:54].decode("ASCII")
+            end = data[-3:].decode("ASCII")
+            return f"{self.__class__.__name__}('{start}...{end}')"
+        else:
+            data = data.decode("ASCII")
+            return f"{self.__class__.__name__}('{data}')"
+
+    def __str__(self):
+        """Return the full sequence as a python string."""
+        return self._data.decode("ASCII")
+
+    def __eq__(self, other):
+        """Compare the sequence to another sequence or a string.
+
+        Sequences are equal to each other if their sequence contents is
+        identical:
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> seq1 = Seq("ACGT")
+        >>> seq2 = Seq("ACGT")
+        >>> mutable_seq = MutableSeq("ACGT")
+        >>> seq1 == seq2
+        True
+        >>> seq1 == mutable_seq
+        True
+        >>> seq1 == "ACGT"
+        True
+
+        Note that the sequence objects themselves are not identical to each
+        other:
+
+        >>> id(seq1) == id(seq2)
+        False
+        >>> seq1 is seq2
+        False
+
+        Sequences can also be compared to strings, ``bytes``, and ``bytearray``
+        objects:
+
+        >>> seq1 == "ACGT"
+        True
+        >>> seq1 == b"ACGT"
+        True
+        >>> seq1 == bytearray(b"ACGT")
+        True
+        """
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self._data == other._data
+        elif isinstance(other, str):
+            return self._data == other.encode("ASCII")
+        else:
+            return self._data == other
+
+    def __lt__(self, other):
+        """Implement the less-than operand."""
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self._data < other._data
+        elif isinstance(other, str):
+            return self._data < other.encode("ASCII")
+        else:
+            return self._data < other
+
+    def __le__(self, other):
+        """Implement the less-than or equal operand."""
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self._data <= other._data
+        elif isinstance(other, str):
+            return self._data <= other.encode("ASCII")
+        else:
+            return self._data <= other
+
+    def __gt__(self, other):
+        """Implement the greater-than operand."""
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self._data > other._data
+        elif isinstance(other, str):
+            return self._data > other.encode("ASCII")
+        else:
+            return self._data > other
+
+    def __ge__(self, other):
+        """Implement the greater-than or equal operand."""
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self._data >= other._data
+        elif isinstance(other, str):
+            return self._data >= other.encode("ASCII")
+        else:
+            return self._data >= other
+
+    def __len__(self):
+        """Return the length of the sequence."""
+        return len(self._data)
+
+    def __getitem__(self, index):
+        """Return a subsequence as a single letter or as a sequence object.
+
+        If the index is an integer, a single letter is returned as a Python
+        string:
+
+        >>> seq = Seq('ACTCGACGTCG')
+        >>> seq[5]
+        'A'
+
+        Otherwise, a new sequence object of the same class is returned:
+
+        >>> seq[5:8]
+        Seq('ACG')
+        >>> mutable_seq = MutableSeq('ACTCGACGTCG')
+        >>> mutable_seq[5:8]
+        MutableSeq('ACG')
+        """
+        if isinstance(index, int):
+            # Return a single letter as a string
+            return chr(self._data[index])
+        else:
+            # Return the (sub)sequence as another Seq/MutableSeq object
+            return self.__class__(self._data[index])
+
+    def __add__(self, other):
+        """Add a sequence or string to this sequence.
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> Seq("MELKI") + "LV"
+        Seq('MELKILV')
+        >>> MutableSeq("MELKI") + "LV"
+        MutableSeq('MELKILV')
+        """
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self.__class__(self._data + other._data)
+        elif isinstance(other, str):
+            return self.__class__(self._data + other.encode("ASCII"))
+
+        from Bio.SeqRecord import SeqRecord  # Lazy to avoid circular imports
+
+        if isinstance(other, SeqRecord):
+            # Get the SeqRecord's __radd__ to handle this
+            return NotImplemented
+        else:
+            raise TypeError
+
+    def __radd__(self, other):
+        """Add a sequence string on the left.
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> "LV" + Seq("MELKI")
+        Seq('LVMELKI')
+        >>> "LV" + MutableSeq("MELKI")
+        MutableSeq('LVMELKI')
+
+        Adding two sequence objects is handled via the __add__ method.
+        """
+        if isinstance(other, str):
+            return self.__class__(other.encode("ASCII") + self._data)
+        else:
+            raise TypeError
+
+    def __mul__(self, other):
+        """Multiply sequence by integer.
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> Seq('ATG') * 2
+        Seq('ATGATG')
+        >>> MutableSeq('ATG') * 2
+        MutableSeq('ATGATG')
+        """
+        if not isinstance(other, int):
+            raise TypeError(f"can't multiply {self.__class__.__name__} by non-int type")
+        return self.__class__(self._data * other)
+
+    def __rmul__(self, other):
+        """Multiply integer by sequence.
+
+        >>> from Bio.Seq import Seq
+        >>> 2 * Seq('ATG')
+        Seq('ATGATG')
+        """
+        if not isinstance(other, int):
+            raise TypeError(f"can't multiply {self.__class__.__name__} by non-int type")
+        return self.__class__(self._data * other)
+
+    def __imul__(self, other):
+        """Multiply the sequence object by other and assign.
+
+        >>> from Bio.Seq import Seq
+        >>> seq = Seq('ATG')
+        >>> seq *= 2
+        >>> seq
+        Seq('ATGATG')
+
+        Note that this is different from in-place multiplication. The ``seq``
+        variable is reassigned to the multiplication result, but any variable
+        pointing to ``seq`` will remain unchanged:
+
+        >>> seq = Seq('ATG')
+        >>> seq2 = seq
+        >>> id(seq) == id(seq2)
+        True
+        >>> seq *= 2
+        >>> seq
+        Seq('ATGATG')
+        >>> seq2
+        Seq('ATG')
+        >>> id(seq) == id(seq2)
+        False
+        """
+        if not isinstance(other, int):
+            raise TypeError(f"can't multiply {self.__class__.__name__} by non-int type")
+        return self.__class__(self._data * other)
+
+    def count(self, sub, start=None, end=None):
+        """Return a non-overlapping count, like that of a python string.
+
+        The number of occurrences of substring argument sub in the
+        (sub)sequence given by [start:end] is returned as an integer.
+        Optional arguments start and end are interpreted as in slice
+        notation.
+
+        Arguments:
+         - sub - a string or another Seq object to look for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> my_seq = Seq("AAAATGA")
+        >>> print(my_seq.count("A"))
+        5
+        >>> print(my_seq.count("ATG"))
+        1
+        >>> print(my_seq.count(Seq("AT")))
+        1
+        >>> print(my_seq.count("AT", 2, -1))
+        1
+
+        HOWEVER, please note because the ``count`` method of Seq and MutableSeq
+        objects, like that of Python strings, do a non-overlapping search, this
+        may not give the answer you expect:
+
+        >>> "AAAA".count("AA")
+        2
+        >>> print(Seq("AAAA").count("AA"))
+        2
+
+        For an overlapping search, use the ``count_overlap`` method:
+
+        >>> print(Seq("AAAA").count_overlap("AA"))
+        3
+        """
+        if isinstance(sub, MutableSeq):
+            sub = sub._data
+        elif isinstance(sub, Seq):
+            sub = bytes(sub)
+        elif isinstance(sub, str):
+            sub = sub.encode("ASCII")
+        elif not isinstance(sub, (bytes, bytearray)):
+            raise TypeError(
+                "a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                % type(sub)
+            )
+        return self._data.count(sub, start, end)
+
+    def count_overlap(self, sub, start=None, end=None):
+        """Return an overlapping count.
+
+        Returns an integer, the number of occurrences of substring
+        argument sub in the (sub)sequence given by [start:end].
+        Optional arguments start and end are interpreted as in slice
+        notation.
+
+        Arguments:
+         - sub - a string or another Seq object to look for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> print(Seq("AAAA").count_overlap("AA"))
+        3
+        >>> print(Seq("ATATATATA").count_overlap("ATA"))
+        4
+        >>> print(Seq("ATATATATA").count_overlap("ATA", 3, -1))
+        1
+
+        For a non-overlapping search, use the ``count`` method:
+
+        >>> print(Seq("AAAA").count("AA"))
+        2
+
+        Where substrings do not overlap, ``count_overlap`` behaves the same as
+        the ``count`` method:
+
+        >>> from Bio.Seq import Seq
+        >>> my_seq = Seq("AAAATGA")
+        >>> print(my_seq.count_overlap("A"))
+        5
+        >>> my_seq.count_overlap("A") == my_seq.count("A")
+        True
+        >>> print(my_seq.count_overlap("ATG"))
+        1
+        >>> my_seq.count_overlap("ATG") == my_seq.count("ATG")
+        True
+        >>> print(my_seq.count_overlap(Seq("AT")))
+        1
+        >>> my_seq.count_overlap(Seq("AT")) == my_seq.count(Seq("AT"))
+        True
+        >>> print(my_seq.count_overlap("AT", 2, -1))
+        1
+        >>> my_seq.count_overlap("AT", 2, -1) == my_seq.count("AT", 2, -1)
+        True
+
+        HOWEVER, do not use this method for such cases because the
+        count() method is much for efficient.
+        """
+        if isinstance(sub, MutableSeq):
+            sub = sub._data
+        elif isinstance(sub, Seq):
+            sub = bytes(sub)
+        elif isinstance(sub, str):
+            sub = sub.encode("ASCII")
+        elif not isinstance(sub, (bytes, bytearray)):
+            raise TypeError(
+                "a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                % type(sub)
+            )
+        data = self._data
+        overlap_count = 0
+        while True:
+            start = data.find(sub, start, end) + 1
+            if start != 0:
+                overlap_count += 1
+            else:
+                return overlap_count
+
+    def __contains__(self, item):
+        """Return True if item is a subsequence of the sequence, and False otherwise.
+
+        e.g.
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> my_dna = Seq("ATATGAAATTTGAAAA")
+        >>> "AAA" in my_dna
+        True
+        >>> Seq("AAA") in my_dna
+        True
+        >>> MutableSeq("AAA") in my_dna
+        True
+        """
+        if isinstance(item, _SeqAbstractBaseClass):
+            item = bytes(item)
+        elif isinstance(item, str):
+            item = item.encode("ASCII")
+        return item in self._data
+
+    def find(self, sub, start=None, end=None):
+        """Return the lowest index in the sequence where subsequence sub is found.
+
+        With optional arguments start and end, return the lowest index in the
+        sequence such that the subsequence sub is contained within the sequence
+        region [start:end].
+
+        Arguments:
+         - sub - a string or another Seq or MutableSeq object to search for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        Returns -1 if the subsequence is NOT found.
+
+        e.g. Locating the first typical start codon, AUG, in an RNA sequence:
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_rna.find("AUG")
+        3
+
+        The next typical start codon can then be found by starting the search
+        at position 4:
+
+        >>> my_rna.find("AUG", 4)
+        15
+        """
+        if isinstance(sub, _SeqAbstractBaseClass):
+            sub = bytes(sub)
+        elif isinstance(sub, str):
+            sub = sub.encode("ASCII")
+        elif not isinstance(sub, (bytes, bytearray)):
+            raise TypeError(
+                "a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                % type(sub)
+            )
+        return self._data.find(sub, start, end)
+
+    def rfind(self, sub, start=None, end=None):
+        """Return the highest index in the sequence where subsequence sub is found.
+
+        With optional arguments start and end, return the highest index in the
+        sequence such that the subsequence sub is contained within the sequence
+        region [start:end].
+
+        Arguments:
+         - sub - a string or another Seq or MutableSeq object to search for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        Returns -1 if the subsequence is NOT found.
+
+        e.g. Locating the last typical start codon, AUG, in an RNA sequence:
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_rna.rfind("AUG")
+        15
+
+        The location of the typical start codon before that can be found by
+        ending the search at positon 15:
+
+        >>> my_rna.rfind("AUG", end=15)
+        3
+        """
+        if isinstance(sub, _SeqAbstractBaseClass):
+            sub = bytes(sub)
+        elif isinstance(sub, str):
+            sub = sub.encode("ASCII")
+        elif not isinstance(sub, (bytes, bytearray)):
+            raise TypeError(
+                "a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                % type(sub)
+            )
+        return self._data.rfind(sub, start, end)
+
+    def index(self, sub, start=None, end=None):
+        """Return the lowest index in the sequence where subsequence sub is found.
+
+        With optional arguments start and end, return the lowest index in the
+        sequence such that the subsequence sub is contained within the sequence
+        region [start:end].
+
+        Arguments:
+         - sub - a string or another Seq or MutableSeq object to search for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        Raises a ValueError if the subsequence is NOT found.
+
+        e.g. Locating the first typical start codon, AUG, in an RNA sequence:
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_rna.index("AUG")
+        3
+
+        The next typical start codon can then be found by starting the search
+        at position 4:
+
+        >>> my_rna.index("AUG", 4)
+        15
+
+        This method performs the same search as the ``find`` method.  However,
+        if the subsequence is not found, ``find`` returns -1 which ``index``
+        raises a ValueError:
+
+        >>> my_rna.index("T")
+        Traceback (most recent call last):
+                   ...
+        ValueError: ...
+        >>> my_rna.find("T")
+        -1
+        """
+        if isinstance(sub, MutableSeq):
+            sub = sub._data
+        elif isinstance(sub, Seq):
+            sub = bytes(sub)
+        elif isinstance(sub, str):
+            sub = sub.encode("ASCII")
+        elif not isinstance(sub, (bytes, bytearray)):
+            raise TypeError(
+                "a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                % type(sub)
+            )
+        return self._data.index(sub, start, end)
+
+    def rindex(self, sub, start=None, end=None):
+        """Return the highest index in the sequence where subsequence sub is found.
+
+        With optional arguments start and end, return the highest index in the
+        sequence such that the subsequence sub is contained within the sequence
+        region [start:end].
+
+        Arguments:
+         - sub - a string or another Seq or MutableSeq object to search for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        Returns -1 if the subsequence is NOT found.
+
+        e.g. Locating the last typical start codon, AUG, in an RNA sequence:
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_rna.rindex("AUG")
+        15
+
+        The location of the typical start codon before that can be found by
+        ending the search at positon 15:
+
+        >>> my_rna.rindex("AUG", end=15)
+        3
+
+        This method performs the same search as the ``rfind`` method.  However,
+        if the subsequence is not found, ``rfind`` returns -1 which ``rindex``
+        raises a ValueError:
+
+        >>> my_rna.rindex("T")
+        Traceback (most recent call last):
+                   ...
+        ValueError: ...
+        >>> my_rna.rfind("T")
+        -1
+        """
+        if isinstance(sub, MutableSeq):
+            sub = sub._data
+        elif isinstance(sub, Seq):
+            sub = bytes(sub)
+        elif isinstance(sub, str):
+            sub = sub.encode("ASCII")
+        elif not isinstance(sub, (bytes, bytearray)):
+            raise TypeError(
+                "a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                % type(sub)
+            )
+        return self._data.rindex(sub, start, end)
+
+    def startswith(self, prefix, start=None, end=None):
+        """Return True if the sequence starts with the given prefix, False otherwise.
+
+        Return True if the sequence starts with the specified prefix
+        (a string or another Seq object), False otherwise.
+        With optional start, test sequence beginning at that position.
+        With optional end, stop comparing sequence at that position.
+        prefix can also be a tuple of strings to try.  e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_rna.startswith("GUC")
+        True
+        >>> my_rna.startswith("AUG")
+        False
+        >>> my_rna.startswith("AUG", 3)
+        True
+        >>> my_rna.startswith(("UCC", "UCA", "UCG"), 1)
+        True
+        """
+        if isinstance(prefix, tuple):
+            prefix = tuple(
+                bytes(p) if isinstance(p, _SeqAbstractBaseClass) else p.encode("ASCII")
+                for p in prefix
+            )
+        elif isinstance(prefix, _SeqAbstractBaseClass):
+            prefix = bytes(prefix)
+        elif isinstance(prefix, str):
+            prefix = prefix.encode("ASCII")
+        return self._data.startswith(prefix, start, end)
+
+    def endswith(self, suffix, start=None, end=None):
+        """Return True if the sequence ends with the given suffix, False otherwise.
+
+        Return True if the sequence ends with the specified suffix
+        (a string or another Seq object), False otherwise.
+        With optional start, test sequence beginning at that position.
+        With optional end, stop comparing sequence at that position.
+        suffix can also be a tuple of strings to try.  e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_rna.endswith("UUG")
+        True
+        >>> my_rna.endswith("AUG")
+        False
+        >>> my_rna.endswith("AUG", 0, 18)
+        True
+        >>> my_rna.endswith(("UCC", "UCA", "UUG"))
+        True
+        """
+        if isinstance(suffix, tuple):
+            suffix = tuple(
+                bytes(p) if isinstance(p, _SeqAbstractBaseClass) else p.encode("ASCII")
+                for p in suffix
+            )
+        elif isinstance(suffix, _SeqAbstractBaseClass):
+            suffix = bytes(suffix)
+        elif isinstance(suffix, str):
+            suffix = suffix.encode("ASCII")
+        return self._data.endswith(suffix, start, end)
+
+    def split(self, sep=None, maxsplit=-1):
+        """Return a list of subsequences when splitting the sequence by separator sep.
+
+        Return a list of the subsequences in the sequence (as Seq objects),
+        using sep as the delimiter string.  If maxsplit is given, at
+        most maxsplit splits are done.  If maxsplit is omitted, all
+        splits are made.
+
+        For consistency with the ``split`` method of Python strings, any
+        whitespace (tabs, spaces, newlines) is a separator if sep is None, the
+        default value
+
+        e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_aa = my_rna.translate()
+        >>> my_aa
+        Seq('VMAIVMGR*KGAR*L')
+        >>> for pep in my_aa.split("*"):
+        ...     pep
+        Seq('VMAIVMGR')
+        Seq('KGAR')
+        Seq('L')
+        >>> for pep in my_aa.split("*", 1):
+        ...     pep
+        Seq('VMAIVMGR')
+        Seq('KGAR*L')
+
+        See also the rsplit method, which splits the sequence starting from the
+        end:
+
+        >>> for pep in my_aa.rsplit("*", 1):
+        ...     pep
+        Seq('VMAIVMGR*KGAR')
+        Seq('L')
+        """
+        if isinstance(sep, _SeqAbstractBaseClass):
+            sep = bytes(sep)
+        elif isinstance(sep, str):
+            sep = sep.encode("ASCII")
+        return [Seq(part) for part in self._data.split(sep, maxsplit)]
+
+    def rsplit(self, sep=None, maxsplit=-1):
+        """Return a list of subsequences by splitting the sequence from the right.
+
+        Return a list of the subsequences in the sequence (as Seq objects),
+        using sep as the delimiter string.  If maxsplit is given, at
+        most maxsplit splits are done.  If maxsplit is omitted, all
+        splits are made.
+
+        For consistency with the ``rsplit`` method of Python strings, any
+        whitespace (tabs, spaces, newlines) is a separator if sep is None, the
+        default value
+
+        e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_aa = my_rna.translate()
+        >>> my_aa
+        Seq('VMAIVMGR*KGAR*L')
+        >>> for pep in my_aa.rsplit("*"):
+        ...     pep
+        Seq('VMAIVMGR')
+        Seq('KGAR')
+        Seq('L')
+        >>> for pep in my_aa.rsplit("*", 1):
+        ...     pep
+        Seq('VMAIVMGR*KGAR')
+        Seq('L')
+
+        See also the split method, which splits the sequence starting from the
+        beginning:
+
+        >>> for pep in my_aa.split("*", 1):
+        ...     pep
+        Seq('VMAIVMGR')
+        Seq('KGAR*L')
+        """
+        if isinstance(sep, _SeqAbstractBaseClass):
+            sep = bytes(sep)
+        elif isinstance(sep, str):
+            sep = sep.encode("ASCII")
+        return [Seq(part) for part in self._data.rsplit(sep, maxsplit)]
+
+    def strip(self, chars=None, inplace=False):
+        """Return a sequence object with leading and trailing ends stripped.
+
+        With default arguments, leading and trailing whitespace is removed:
+
+        >>> seq = Seq(" ACGT ")
+        >>> seq.strip()
+        Seq('ACGT')
+        >>> seq
+        Seq(' ACGT ')
+
+        If ``chars`` is given and not ``None``, remove characters in ``chars``
+        instead.  The order of the characters to be removed is not important:
+
+        >>> Seq("ACGTACGT").strip("TGCA")
+        Seq('')
+
+        A copy of the sequence is returned if ``inplace`` is ``False`` (the
+        default value).  If ``inplace`` is ``True``, the sequence is stripped
+        in-place and returned.
+
+        >>> seq = MutableSeq(" ACGT ")
+        >>> seq.strip(inplace=False)
+        MutableSeq('ACGT')
+        >>> seq
+        MutableSeq(' ACGT ')
+        >>> seq.strip(inplace=True)
+        MutableSeq('ACGT')
+        >>> seq
+        MutableSeq('ACGT')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if ``strip``
+        is called on a ``Seq`` object with ``inplace=True``.
+
+        See also the lstrip and rstrip methods.
+        """
+        if isinstance(chars, _SeqAbstractBaseClass):
+            chars = bytes(chars)
+        elif isinstance(chars, str):
+            chars = chars.encode("ASCII")
+        try:
+            data = self._data.strip(chars)
+        except TypeError:
+            raise TypeError(
+                "argument must be None or a string, Seq, MutableSeq, or bytes-like object"
+            ) from None
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        elif isinstance(self, UnknownSeq):
+            return Seq(data)
+        else:
+            return self.__class__(data)
+
+    def lstrip(self, chars=None, inplace=False):
+        """Return a sequence object with leading and trailing ends stripped.
+
+        With default arguments, leading whitespace is removed:
+
+        >>> seq = Seq(" ACGT ")
+        >>> seq.lstrip()
+        Seq('ACGT ')
+        >>> seq
+        Seq(' ACGT ')
+
+        If ``chars`` is given and not ``None``, remove characters in ``chars``
+        from the leading end instead.  The order of the characters to be removed
+        is not important:
+
+        >>> Seq("ACGACGTTACG").lstrip("GCA")
+        Seq('TTACG')
+
+        A copy of the sequence is returned if ``inplace`` is ``False`` (the
+        default value).  If ``inplace`` is ``True``, the sequence is stripped
+        in-place and returned.
+
+        >>> seq = MutableSeq(" ACGT ")
+        >>> seq.lstrip(inplace=False)
+        MutableSeq('ACGT ')
+        >>> seq
+        MutableSeq(' ACGT ')
+        >>> seq.lstrip(inplace=True)
+        MutableSeq('ACGT ')
+        >>> seq
+        MutableSeq('ACGT ')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``lstrip`` is called on a ``Seq`` object with ``inplace=True``.
+
+        See also the strip and rstrip methods.
+        """
+        if isinstance(chars, _SeqAbstractBaseClass):
+            chars = bytes(chars)
+        elif isinstance(chars, str):
+            chars = chars.encode("ASCII")
+        try:
+            data = self._data.lstrip(chars)
+        except TypeError:
+            raise TypeError(
+                "argument must be None or a string, Seq, MutableSeq, or bytes-like object"
+            ) from None
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        elif isinstance(self, UnknownSeq):
+            return Seq(data)
+        else:
+            return self.__class__(data)
+
+    def rstrip(self, chars=None, inplace=False):
+        """Return a sequence object with trailing ends stripped.
+
+        With default arguments, trailing whitespace is removed:
+
+        >>> seq = Seq(" ACGT ")
+        >>> seq.rstrip()
+        Seq(' ACGT')
+        >>> seq
+        Seq(' ACGT ')
+
+        If ``chars`` is given and not ``None``, remove characters in ``chars``
+        from the trailing end instead.  The order of the characters to be
+        removed is not important:
+
+        >>> Seq("ACGACGTTACG").rstrip("GCA")
+        Seq('ACGACGTT')
+
+        A copy of the sequence is returned if ``inplace`` is ``False`` (the
+        default value).  If ``inplace`` is ``True``, the sequence is stripped
+        in-place and returned.
+
+        >>> seq = MutableSeq(" ACGT ")
+        >>> seq.rstrip(inplace=False)
+        MutableSeq(' ACGT')
+        >>> seq
+        MutableSeq(' ACGT ')
+        >>> seq.rstrip(inplace=True)
+        MutableSeq(' ACGT')
+        >>> seq
+        MutableSeq(' ACGT')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``rstrip`` is called on a ``Seq`` object with ``inplace=True``.
+
+        See also the strip and lstrip methods.
+        """
+        if isinstance(chars, _SeqAbstractBaseClass):
+            chars = bytes(chars)
+        elif isinstance(chars, str):
+            chars = chars.encode("ASCII")
+        try:
+            data = self._data.rstrip(chars)
+        except TypeError:
+            raise TypeError(
+                "argument must be None or a string, Seq, MutableSeq, or bytes-like object"
+            ) from None
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        elif isinstance(self, UnknownSeq):
+            return Seq(data)
+        else:
+            return self.__class__(data)
+
+    def upper(self, inplace=False):
+        """Return the sequence in upper case.
+
+        An upper-case copy of the sequence is returned if inplace is False,
+        the default value:
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> my_seq = Seq("VHLTPeeK*")
+        >>> my_seq
+        Seq('VHLTPeeK*')
+        >>> my_seq.lower()
+        Seq('vhltpeek*')
+        >>> my_seq.upper()
+        Seq('VHLTPEEK*')
+        >>> my_seq
+        Seq('VHLTPeeK*')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> my_seq = MutableSeq("VHLTPeeK*")
+        >>> my_seq
+        MutableSeq('VHLTPeeK*')
+        >>> my_seq.lower()
+        MutableSeq('vhltpeek*')
+        >>> my_seq.upper()
+        MutableSeq('VHLTPEEK*')
+        >>> my_seq
+        MutableSeq('VHLTPeeK*')
+
+        >>> my_seq.lower(inplace=True)
+        MutableSeq('vhltpeek*')
+        >>> my_seq
+        MutableSeq('vhltpeek*')
+        >>> my_seq.upper(inplace=True)
+        MutableSeq('VHLTPEEK*')
+        >>> my_seq
+        MutableSeq('VHLTPEEK*')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``upper`` is called on a ``Seq`` object with ``inplace=True``.
+
+        See also the ``lower`` method.
+        """
+        data = self._data.upper()
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        else:
+            return self.__class__(data)
+
+    def lower(self, inplace=False):
+        """Return the sequence in lower case.
+
+        An lower-case copy of the sequence is returned if inplace is False,
+        the default value:
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> my_seq = Seq("VHLTPeeK*")
+        >>> my_seq
+        Seq('VHLTPeeK*')
+        >>> my_seq.lower()
+        Seq('vhltpeek*')
+        >>> my_seq.upper()
+        Seq('VHLTPEEK*')
+        >>> my_seq
+        Seq('VHLTPeeK*')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> my_seq = MutableSeq("VHLTPeeK*")
+        >>> my_seq
+        MutableSeq('VHLTPeeK*')
+        >>> my_seq.lower()
+        MutableSeq('vhltpeek*')
+        >>> my_seq.upper()
+        MutableSeq('VHLTPEEK*')
+        >>> my_seq
+        MutableSeq('VHLTPeeK*')
+
+        >>> my_seq.lower(inplace=True)
+        MutableSeq('vhltpeek*')
+        >>> my_seq
+        MutableSeq('vhltpeek*')
+        >>> my_seq.upper(inplace=True)
+        MutableSeq('VHLTPEEK*')
+        >>> my_seq
+        MutableSeq('VHLTPEEK*')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``lower`` is called on a ``Seq`` object with ``inplace=True``.
+
+        See also the ``upper`` method.
+        """
+        data = self._data.lower()
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        else:
+            return self.__class__(data)
+
+    def translate(
+        self, table="Standard", stop_symbol="*", to_stop=False, cds=False, gap="-"
+    ):
+        """Turn a nucleotide sequence into a protein sequence by creating a new sequence object.
+
+        This method will translate DNA or RNA sequences. It should not
+        be used on protein sequences as any result will be biologically
+        meaningless.
+
+        Arguments:
+         - table - Which codon table to use?  This can be either a name
+           (string), an NCBI identifier (integer), or a CodonTable
+           object (useful for non-standard genetic codes).  This
+           defaults to the "Standard" table.
+         - stop_symbol - Single character string, what to use for
+           terminators.  This defaults to the asterisk, "*".
+         - to_stop - Boolean, defaults to False meaning do a full
+           translation continuing on past any stop codons (translated as the
+           specified stop_symbol).  If True, translation is terminated at
+           the first in frame stop codon (and the stop_symbol is not
+           appended to the returned protein sequence).
+         - cds - Boolean, indicates this is a complete CDS.  If True,
+           this checks the sequence starts with a valid alternative start
+           codon (which will be translated as methionine, M), that the
+           sequence length is a multiple of three, and that there is a
+           single in frame stop codon at the end (this will be excluded
+           from the protein sequence, regardless of the to_stop option).
+           If these tests fail, an exception is raised.
+         - gap - Single character string to denote symbol used for gaps.
+           Defaults to the minus sign.
+
+        A ``Seq`` object is returned if ``translate`` is called on a ``Seq``
+        object; a ``MutableSeq`` object is returned if ``translate`` is called
+        pn a ``MutableSeq`` object.
+
+        e.g. Using the standard table:
+
+        >>> coding_dna = Seq("GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")
+        >>> coding_dna.translate()
+        Seq('VAIVMGR*KGAR*')
+        >>> coding_dna.translate(stop_symbol="@")
+        Seq('VAIVMGR@KGAR@')
+        >>> coding_dna.translate(to_stop=True)
+        Seq('VAIVMGR')
+
+        Now using NCBI table 2, where TGA is not a stop codon:
+
+        >>> coding_dna.translate(table=2)
+        Seq('VAIVMGRWKGAR*')
+        >>> coding_dna.translate(table=2, to_stop=True)
+        Seq('VAIVMGRWKGAR')
+
+        In fact, GTG is an alternative start codon under NCBI table 2, meaning
+        this sequence could be a complete CDS:
+
+        >>> coding_dna.translate(table=2, cds=True)
+        Seq('MAIVMGRWKGAR')
+
+        It isn't a valid CDS under NCBI table 1, due to both the start codon
+        and also the in frame stop codons:
+
+        >>> coding_dna.translate(table=1, cds=True)
+        Traceback (most recent call last):
+            ...
+        Bio.Data.CodonTable.TranslationError: First codon 'GTG' is not a start codon
+
+        If the sequence has no in-frame stop codon, then the to_stop argument
+        has no effect:
+
+        >>> coding_dna2 = Seq("TTGGCCATTGTAATGGGCCGC")
+        >>> coding_dna2.translate()
+        Seq('LAIVMGR')
+        >>> coding_dna2.translate(to_stop=True)
+        Seq('LAIVMGR')
+
+        NOTE - Ambiguous codons like "TAN" or "NNN" could be an amino acid
+        or a stop codon.  These are translated as "X".  Any invalid codon
+        (e.g. "TA?" or "T-A") will throw a TranslationError.
+
+        NOTE - This does NOT behave like the python string's translate
+        method.  For that use str(my_seq).translate(...) instead
+        """
+        if isinstance(table, str) and len(table) == 256:
+            raise ValueError(
+                "The MutableSeq object translate method DOES NOT "
+                "take a 256 character string mapping table like "
+                "the python string object's translate method. "
+                "Use str(my_seq).translate(...) instead."
+            )
+
+        try:
+            data = str(self)
+        except UndefinedSequenceError:
+            # translating an undefined sequence yields an undefined
+            # sequence with the length divided by 3
+            n = len(self)
+            if n % 3 != 0:
+                warnings.warn(
+                    "Partial codon, len(sequence) not a multiple of three. "
+                    "This may become an error in future.",
+                    BiopythonWarning,
+                )
+            return Seq(None, n // 3)
+
+        return self.__class__(
+            _translate_str(str(self), table, stop_symbol, to_stop, cds, gap=gap)
+        )
+
+    def complement_rna(self, inplace=False):
+        """Return the complement as an RNA sequence.
+
+        >>> Seq("CGA").complement_rna()
+        Seq('GCU')
+
+        Any T in the sequence is treated as a U:
+
+        >>> Seq("CGAUT").complement_rna()
+        Seq('GCUAA')
+
+        In contrast, ``complement`` returns a DNA sequence by default:
+
+        >>> Seq("CGA").complement()
+        Seq('GCT')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> my_seq = MutableSeq("CGA")
+        >>> my_seq
+        MutableSeq('CGA')
+        >>> my_seq.complement_rna()
+        MutableSeq('GCU')
+        >>> my_seq
+        MutableSeq('CGA')
+
+        >>> my_seq.complement_rna(inplace=True)
+        MutableSeq('GCU')
+        >>> my_seq
+        MutableSeq('GCU')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``complement_rna`` is called on a ``Seq`` object with ``inplace=True``.
+        """
+        try:
+            data = self._data.translate(_rna_complement_table)
+        except UndefinedSequenceError:
+            # complement of an undefined sequence is an undefined sequence
+            # of the same length
+            return self
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        return self.__class__(data)
+
+    def reverse_complement_rna(self, inplace=False):
+        """Return the reverse complement as an RNA sequence.
+
+        >>> Seq("CGA").reverse_complement_rna()
+        Seq('UCG')
+
+        Any T in the sequence is treated as a U:
+
+        >>> Seq("CGAUT").reverse_complement_rna()
+        Seq('AAUCG')
+
+        In contrast, ``reverse_complement`` returns a DNA sequence by default:
+
+        >>> Seq("CGA").reverse_complement()
+        Seq('TCG')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> my_seq = MutableSeq("CGA")
+        >>> my_seq
+        MutableSeq('CGA')
+        >>> my_seq.reverse_complement_rna()
+        MutableSeq('UCG')
+        >>> my_seq
+        MutableSeq('CGA')
+
+        >>> my_seq.reverse_complement_rna(inplace=True)
+        MutableSeq('UCG')
+        >>> my_seq
+        MutableSeq('UCG')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``reverse_complement_rna`` is called on a ``Seq`` object with
+        ``inplace=True``.
+        """
+        try:
+            data = self._data.translate(_rna_complement_table)
+        except UndefinedSequenceError:
+            # reverse complement of an undefined sequence is an undefined sequence
+            # of the same length
+            return self
+        data = self._data.translate(_rna_complement_table)
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[::-1] = data
+            return self
+        return self.__class__(data[::-1])
+
+    def transcribe(self, inplace=False):
+        """Transcribe a DNA sequence into RNA and return the RNA sequence as a new Seq object.
+
+        >>> from Bio.Seq import Seq
+        >>> coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")
+        >>> coding_dna
+        Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+        >>> coding_dna.transcribe()
+        Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> sequence = MutableSeq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")
+        >>> sequence
+        MutableSeq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+        >>> sequence.transcribe()
+        MutableSeq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+        >>> sequence
+        MutableSeq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+
+        >>> sequence.transcribe(inplace=True)
+        MutableSeq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+        >>> sequence
+        MutableSeq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``transcribe`` is called on a ``Seq`` object with ``inplace=True``.
+
+        Trying to transcribe an RNA sequence has no effect.
+        If you have a nucleotide sequence which might be DNA or RNA
+        (or even a mixture), calling the transcribe method will ensure
+        any T becomes U.
+
+        Trying to transcribe a protein sequence will replace any
+        T for Threonine with U for Selenocysteine, which has no
+        biologically plausible rational.
+
+        >>> from Bio.Seq import Seq
+        >>> my_protein = Seq("MAIVMGRT")
+        >>> my_protein.transcribe()
+        Seq('MAIVMGRU')
+        """
+        try:
+            data = self._data.replace(b"T", b"U").replace(b"t", b"u")
+        except UndefinedSequenceError:
+            # transcribing an undefined sequence yields an undefined sequence
+            # of the same length
+            return self
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        return self.__class__(data)
+
+    def back_transcribe(self, inplace=False):
+        """Return the DNA sequence from an RNA sequence by creating a new Seq object.
+
+        >>> from Bio.Seq import Seq
+        >>> messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG")
+        >>> messenger_rna
+        Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+        >>> messenger_rna.back_transcribe()
+        Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> sequence = MutableSeq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG")
+        >>> sequence
+        MutableSeq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+        >>> sequence.back_transcribe()
+        MutableSeq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+        >>> sequence
+        MutableSeq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+
+        >>> sequence.back_transcribe(inplace=True)
+        MutableSeq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+        >>> sequence
+        MutableSeq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``transcribe`` is called on a ``Seq`` object with ``inplace=True``.
+
+        Trying to back-transcribe DNA has no effect, If you have a nucleotide
+        sequence which might be DNA or RNA (or even a mixture), calling the
+        back-transcribe method will ensure any U becomes T.
+
+        Trying to back-transcribe a protein sequence will replace any U for
+        Selenocysteine with T for Threonine, which is biologically meaningless.
+
+        >>> from Bio.Seq import Seq
+        >>> my_protein = Seq("MAIVMGRU")
+        >>> my_protein.back_transcribe()
+        Seq('MAIVMGRT')
+        """
+        try:
+            data = self._data.replace(b"U", b"T").replace(b"u", b"t")
+        except UndefinedSequenceError:
+            # back-transcribing an undefined sequence yields an undefined
+            # sequence of the same length
+            return self
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        return self.__class__(data)
+
+    def join(self, other):
+        """Return a merge of the sequences in other, spaced by the sequence from self.
+
+        Accepts a Seq object, MutableSeq object, or string (and iterates over
+        the letters), or an iterable containing Seq, MutableSeq, or string
+        objects. These arguments will be concatenated with the calling sequence
+        as the spacer:
+
+        >>> concatenated = Seq('NNNNN').join([Seq("AAA"), Seq("TTT"), Seq("PPP")])
+        >>> concatenated
+        Seq('AAANNNNNTTTNNNNNPPP')
+
+        Joining the letters of a single sequence:
+
+        >>> Seq('NNNNN').join(Seq("ACGT"))
+        Seq('ANNNNNCNNNNNGNNNNNT')
+        >>> Seq('NNNNN').join("ACGT")
+        Seq('ANNNNNCNNNNNGNNNNNT')
+        """
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self.__class__(str(self).join(str(other)))
+        elif isinstance(other, str):
+            return self.__class__(str(self).join(other))
+
+        from Bio.SeqRecord import SeqRecord  # Lazy to avoid circular imports
+
+        if isinstance(other, SeqRecord):
+            raise TypeError("Iterable cannot be a SeqRecord")
+
+        for c in other:
+            if isinstance(c, SeqRecord):
+                raise TypeError("Iterable cannot contain SeqRecords")
+            elif not isinstance(c, (str, _SeqAbstractBaseClass)):
+                raise TypeError(
+                    "Input must be an iterable of Seq objects, MutableSeq objects, or strings"
+                )
+        return self.__class__(str(self).join([str(_) for _ in other]))
+
+    def replace(self, old, new, inplace=False):
+        """Return a copy with all occurrences of subsequence old replaced by new.
+
+        >>> s = Seq("ACGTAACCGGTT")
+        >>> t = s.replace("AC", "XYZ")
+        >>> s
+        Seq('ACGTAACCGGTT')
+        >>> t
+        Seq('XYZGTAXYZCGGTT')
+
+        For mutable sequences, passing inplace=True will modify the sequence in place:
+
+        >>> m = MutableSeq("ACGTAACCGGTT")
+        >>> t = m.replace("AC", "XYZ")
+        >>> m
+        MutableSeq('ACGTAACCGGTT')
+        >>> t
+        MutableSeq('XYZGTAXYZCGGTT')
+
+        >>> m = MutableSeq("ACGTAACCGGTT")
+        >>> t = m.replace("AC", "XYZ", inplace=True)
+        >>> m
+        MutableSeq('XYZGTAXYZCGGTT')
+        >>> t
+        MutableSeq('XYZGTAXYZCGGTT')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``replace`` is called on a ``Seq`` object with ``inplace=True``.
+        """
+        if isinstance(old, _SeqAbstractBaseClass):
+            old = bytes(old)
+        elif isinstance(old, str):
+            old = old.encode("ASCII")
+        if isinstance(new, _SeqAbstractBaseClass):
+            new = bytes(new)
+        elif isinstance(new, str):
+            new = new.encode("ASCII")
+        data = self._data.replace(old, new)
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        return self.__class__(data)
+
+
+class Seq(_SeqAbstractBaseClass):
+    """Read-only sequence object (essentially a string with biological methods).
+
+    Like normal python strings, our basic sequence object is immutable.
+    This prevents you from doing my_seq[5] = "A" for example, but does allow
+    Seq objects to be used as dictionary keys.
+
+    The Seq object provides a number of string like methods (such as count,
+    find, split and strip).
+
+    The Seq object also provides some biological methods, such as complement,
+    reverse_complement, transcribe, back_transcribe and translate (which are
+    not applicable to protein sequences).
+    """
+
+    def __init__(self, data, length=None):
+        """Create a Seq object.
+
+        Arguments:
+         - data - Sequence, required (string)
+         - length - Sequence length, used only if data is None (integer)
+
+        You will typically use Bio.SeqIO to read in sequences from files as
+        SeqRecord objects, whose sequence will be exposed as a Seq object via
+        the seq property.
+
+        However, you can also create a Seq object directly:
+
+        >>> from Bio.Seq import Seq
+        >>> my_seq = Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF")
+        >>> my_seq
+        Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF')
+        >>> print(my_seq)
+        MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
+
+        To create a Seq object with for a sequence of known length but
+        unknown sequence contents, use None for the data argument and pass
+        the sequence length for the length argument. Trying to access the
+        sequence contents of a Seq object created in this way will raise
+        an UndefinedSequenceError:
+
+        >>> my_undefined_seq = Seq(None, 20)
+        >>> my_undefined_seq
+        Seq(None, length=20)
+        >>> len(my_undefined_seq)
+        20
+        >>> print(my_undefined_seq)
+        Traceback (most recent call last):
+        ...
+        Bio.Seq.UndefinedSequenceError: Sequence content is undefined
+        """
+        if length is None:
+            if isinstance(data, (bytes, SequenceDataAbstractBaseClass)):
+                self._data = data
+            elif isinstance(data, (bytearray, _SeqAbstractBaseClass)):
+                self._data = bytes(data)
+            elif isinstance(data, str):
+                self._data = bytes(data, encoding="ASCII")
+            else:
+                raise TypeError(
+                    "data should be a string, bytes, bytearray, Seq, or MutableSeq object"
+                )
+        else:
+            if data is not None:
+                raise ValueError("length should be None if data is None")
+            self._data = _UndefinedSequenceData(length)
+
+    def __hash__(self):
+        """Hash of the sequence as a string for comparison.
+
+        See Seq object comparison documentation (method ``__eq__`` in
+        particular) as this has changed in Biopython 1.65. Older versions
+        would hash on object identity.
+        """
+        return hash(self._data)
+
+    def tomutable(self):
+        """Return the full sequence as a MutableSeq object.
+
+        >>> from Bio.Seq import Seq
+        >>> my_seq = Seq("MKQHKAMIVALIVICITAVVAAL")
+        >>> my_seq
+        Seq('MKQHKAMIVALIVICITAVVAAL')
+        >>> my_seq.tomutable()
+        MutableSeq('MKQHKAMIVALIVICITAVVAAL')
+        """
+        warnings.warn(
+            "myseq.tomutable() is deprecated; please use MutableSeq(myseq) instead.",
+            BiopythonDeprecationWarning,
+        )
+        return MutableSeq(self)
+
+    def encode(self, encoding="utf-8", errors="strict"):
+        """Return an encoded version of the sequence as a bytes object.
+
+        The Seq object aims to match the interface of a Python string.
+
+        This is essentially to save you doing str(my_seq).encode() when
+        you need a bytes string, for example for computing a hash:
+
+        >>> from Bio.Seq import Seq
+        >>> Seq("ACGT").encode("ascii")
+        b'ACGT'
+        """
+        warnings.warn(
+            "myseq.encode has been deprecated; please use bytes(myseq) instead.",
+            BiopythonDeprecationWarning,
+        )
+        return str(self).encode(encoding, errors)
+
+    def complement(self):
+        """Return the complement sequence by creating a new Seq object.
+
+        This method is intended for use with DNA sequences:
+
+        >>> from Bio.Seq import Seq
+        >>> my_dna = Seq("CCCCCGATAG")
+        >>> my_dna
+        Seq('CCCCCGATAG')
+        >>> my_dna.complement()
+        Seq('GGGGGCTATC')
+
+        You can of course used mixed case sequences,
+
+        >>> from Bio.Seq import Seq
+        >>> my_dna = Seq("CCCCCgatA-GD")
+        >>> my_dna
+        Seq('CCCCCgatA-GD')
+        >>> my_dna.complement()
+        Seq('GGGGGctaT-CH')
+
+        Note in the above example, ambiguous character D denotes
+        G, A or T so its complement is H (for C, T or A).
+
+        Note that if the sequence contains neither T nor U, we
+        assume it is DNA and map any A character to T:
+
+        >>> Seq("CGA").complement()
+        Seq('GCT')
+        >>> Seq("CGAT").complement()
+        Seq('GCTA')
+
+        If you actually have RNA, this currently works but we
+        may deprecate this later. We recommend using the new
+        complement_rna method instead:
+
+        >>> Seq("CGAU").complement()
+        Seq('GCUA')
+        >>> Seq("CGAU").complement_rna()
+        Seq('GCUA')
+
+        If the sequence contains both T and U, an exception is
+        raised:
+
+        >>> Seq("CGAUT").complement()
+        Traceback (most recent call last):
+           ...
+        ValueError: Mixed RNA/DNA found
+
+        Trying to complement a protein sequence gives a meaningless
+        sequence:
+
+        >>> my_protein = Seq("MAIVMGR")
+        >>> my_protein.complement()
+        Seq('KTIBKCY')
+
+        Here "M" was interpreted as the IUPAC ambiguity code for
+        "A" or "C", with complement "K" for "T" or "G". Likewise
+        "A" has complement "T". The letter "I" has no defined
+        meaning under the IUPAC convention, and is unchanged.
+        """
+        if isinstance(self._data, _UndefinedSequenceData):
+            # complement of an undefined sequence is an undefined sequence
+            # of the same length
+            return self
+        if (b"U" in self._data or b"u" in self._data) and (
+            b"T" in self._data or b"t" in self._data
+        ):
+            # TODO - Handle this cleanly?
+            raise ValueError("Mixed RNA/DNA found")
+        elif b"U" in self._data or b"u" in self._data:
+            ttable = _rna_complement_table
+        else:
+            ttable = _dna_complement_table
+        # Much faster on really long sequences than the previous loop based
+        # one. Thanks to Michael Palmer, University of Waterloo.
+        return Seq(self._data.translate(ttable))
+
+    def reverse_complement(self):
+        """Return the reverse complement sequence by creating a new Seq object.
+
+        This method is intended for use with DNA sequences:
+
+        >>> from Bio.Seq import Seq
+        >>> my_dna = Seq("CCCCCGATAGNR")
+        >>> my_dna
+        Seq('CCCCCGATAGNR')
+        >>> my_dna.reverse_complement()
+        Seq('YNCTATCGGGGG')
+
+        Note in the above example, since R = G or A, its complement
+        is Y (which denotes C or T).
+
+        You can of course used mixed case sequences,
+
+        >>> from Bio.Seq import Seq
+        >>> my_dna = Seq("CCCCCgatA-G")
+        >>> my_dna
+        Seq('CCCCCgatA-G')
+        >>> my_dna.reverse_complement()
+        Seq('C-TatcGGGGG')
+
+        As discussed for the complement method, if the sequence
+        contains neither T nor U, is is assumed to be DNA and
+        will map any letter A to T.
+
+        If you are dealing with RNA you should use the new
+        reverse_complement_rna method instead
+
+        >>> Seq("CGA").reverse_complement()  # defaults to DNA
+        Seq('TCG')
+        >>> Seq("CGA").reverse_complement_rna()
+        Seq('UCG')
+
+        If the sequence contains both T and U, an exception is raised:
+
+        >>> Seq("CGAUT").reverse_complement()
+        Traceback (most recent call last):
+           ...
+        ValueError: Mixed RNA/DNA found
+
+        Trying to reverse complement a protein sequence will give
+        a meaningless sequence:
+
+        >>> from Bio.Seq import Seq
+        >>> my_protein = Seq("MAIVMGR")
+        >>> my_protein.reverse_complement()
+        Seq('YCKBITK')
+
+        Here "M" was interpretted as the IUPAC ambiguity code for
+        "A" or "C", with complement "K" for "T" or "G" - and so on.
+        """
+        # Use -1 stride/step to reverse the complement
+        return self.complement()[::-1]
+
+    def ungap(self, gap="-"):
+        """Return a copy of the sequence without the gap character(s) (OBSOLETE).
+
+        The gap character now defaults to the minus sign, and can only
+        be specified via the method argument. This is no longer possible
+        via the sequence's alphabet (as was possible up to Biopython 1.77):
+
+        >>> from Bio.Seq import Seq
+        >>> my_dna = Seq("-ATA--TGAAAT-TTGAAAA")
+        >>> my_dna
+        Seq('-ATA--TGAAAT-TTGAAAA')
+        >>> my_dna.ungap("-")
+        Seq('ATATGAAATTTGAAAA')
+
+        This method is OBSOLETE; please use my_dna.replace(gap, "") instead.
+        """
+        if not gap:
+            raise ValueError("Gap character required.")
+        elif len(gap) != 1 or not isinstance(gap, str):
+            raise ValueError(f"Unexpected gap character, {gap!r}")
+        return self.replace(gap, b"")
+
+
+class UnknownSeq(Seq):
+    """Read-only sequence object of known length but unknown contents (DEPRECATED).
+
+    If you have an unknown sequence, you can represent this with a normal
+    Seq object, for example:
+
+    >>> my_seq = Seq("N"*5)
+    >>> my_seq
+    Seq('NNNNN')
+    >>> len(my_seq)
+    5
+    >>> print(my_seq)
+    NNNNN
+
+    However, this is rather wasteful of memory (especially for large
+    sequences), which is where this class is most useful:
+
+    >>> unk_five = UnknownSeq(5)
+    >>> unk_five
+    UnknownSeq(5, character='?')
+    >>> len(unk_five)
+    5
+    >>> print(unk_five)
+    ?????
+
+    You can add unknown sequence together. Provided the characters are the
+    same, you get another memory saving UnknownSeq:
+
+    >>> unk_four = UnknownSeq(4)
+    >>> unk_four
+    UnknownSeq(4, character='?')
+    >>> unk_four + unk_five
+    UnknownSeq(9, character='?')
+
+    If the characters are different, addition gives an ordinary Seq object:
+
+    >>> unk_nnnn = UnknownSeq(4, character="N")
+    >>> unk_nnnn
+    UnknownSeq(4, character='N')
+    >>> unk_nnnn + unk_four
+    Seq('NNNN????')
+
+    Combining with a real Seq gives a new Seq object:
+
+    >>> known_seq = Seq("ACGT")
+    >>> unk_four + known_seq
+    Seq('????ACGT')
+    >>> known_seq + unk_four
+    Seq('ACGT????')
+
+    Although originally intended for unknown sequences (thus the class name),
+    this can be used for homopolymer sequences like AAAAAA, and the biological
+    methods will respect this:
+
+    >>> homopolymer = UnknownSeq(6, character="A")
+    >>> homopolymer.complement()
+    UnknownSeq(6, character='T')
+    >>> homopolymer.complement_rna()
+    UnknownSeq(6, character='U')
+    >>> homopolymer.translate()
+    UnknownSeq(2, character='K')
+    """
+
+    def __init__(self, length, alphabet=None, character="?"):
+        """Create a new UnknownSeq object.
+
+        Arguments:
+         - length - Integer, required.
+         - alphabet - no longer used, must be None.
+         - character - single letter string, default "?". Typically "N"
+           for nucleotides, "X" for proteins, and "?" otherwise.
+        """
+        warnings.warn(
+            "UnknownSeq(length) is deprecated; please use Seq(None, length) instead.",
+            BiopythonDeprecationWarning,
+        )
+        if alphabet is not None:
+            raise ValueError("The alphabet argument is no longer supported")
+        self._length = int(length)
+        if self._length < 0:
+            # TODO - Block zero length UnknownSeq?  You can just use a Seq!
+            raise ValueError("Length must not be negative.")
+        if not character or len(character) != 1:
+            raise ValueError("character argument should be a single letter string.")
+        self._character = character
+
+    def __len__(self):
+        """Return the stated length of the unknown sequence."""
+        return self._length
+
+    def __bytes__(self):
+        """Return the unknown sequence as full string of the given length."""
+        return self._character.encode("ASCII") * self._length
+
+    @property
+    def _data(self):
+        return self._character.encode("ASCII") * self._length
+
+    def __str__(self):
+        """Return the unknown sequence as full string of the given length."""
+        return self._character * self._length
+
+    def __repr__(self):
+        """Return (truncated) representation of the sequence for debugging."""
+        return f"UnknownSeq({self._length}, character={self._character!r})"
+
+    def __add__(self, other):
+        """Add another sequence or string to this sequence.
+
+        Adding two UnknownSeq objects returns another UnknownSeq object
+        provided the character is the same.
+
+        >>> from Bio.Seq import UnknownSeq
+        >>> UnknownSeq(10, character='X') + UnknownSeq(5, character='X')
+        UnknownSeq(15, character='X')
+
+        If the characters differ, an UnknownSeq object cannot be used, so a
+        Seq object is returned:
+
+        >>> from Bio.Seq import UnknownSeq
+        >>> UnknownSeq(10, character='X') + UnknownSeq(5, character="x")
+        Seq('XXXXXXXXXXxxxxx')
+
+        If adding a string to an UnknownSeq, a new Seq is returned:
+
+        >>> from Bio.Seq import UnknownSeq
+        >>> UnknownSeq(5, character='X') + "LV"
+        Seq('XXXXXLV')
+        """
+        if isinstance(other, UnknownSeq) and other._character == self._character:
+            return UnknownSeq(len(self) + len(other), character=self._character)
+        # Offload to the base class...
+        return Seq(bytes(self)) + other
+
+    def __radd__(self, other):
+        """Add a sequence on the left."""
+        # If other is an UnknownSeq, then __add__ would be called.
+        # Offload to the base class...
+        return other + Seq(bytes(self))
+
+    def __mul__(self, other):
+        """Multiply UnknownSeq by integer.
+
+        >>> from Bio.Seq import UnknownSeq
+        >>> UnknownSeq(3) * 2
+        UnknownSeq(6, character='?')
+        >>> UnknownSeq(3, character="N") * 2
+        UnknownSeq(6, character='N')
+        """
+        if not isinstance(other, int):
+            raise TypeError(f"can't multiply {self.__class__.__name__} by non-int type")
+        return self.__class__(len(self) * other, character=self._character)
+
+    def __rmul__(self, other):
+        """Multiply integer by UnknownSeq.
+
+        >>> from Bio.Seq import UnknownSeq
+        >>> 2 * UnknownSeq(3)
+        UnknownSeq(6, character='?')
+        >>> 2 * UnknownSeq(3, character="N")
+        UnknownSeq(6, character='N')
+        """
+        if not isinstance(other, int):
+            raise TypeError(f"can't multiply {self.__class__.__name__} by non-int type")
+        return self.__class__(len(self) * other, character=self._character)
+
+    def __imul__(self, other):
+        """Multiply UnknownSeq in-place.
+
+        >>> from Bio.Seq import UnknownSeq
+        >>> seq = UnknownSeq(3, character="N")
+        >>> seq *= 2
+        >>> seq
+        UnknownSeq(6, character='N')
+        """
+        if not isinstance(other, int):
+            raise TypeError(f"can't multiply {self.__class__.__name__} by non-int type")
+        return self.__class__(len(self) * other, character=self._character)
+
+    def __getitem__(self, index):
+        """Get a subsequence from the UnknownSeq object.
+
+        >>> unk = UnknownSeq(8, character="N")
+        >>> print(unk[:])
+        NNNNNNNN
+        >>> print(unk[5:3])
+        <BLANKLINE>
+        >>> print(unk[1:-1])
+        NNNNNN
+        >>> print(unk[1:-1:2])
+        NNN
+        """
+        if isinstance(index, int):
+            if index >= -self._length and index < self._length:
+                return self._character
+            raise IndexError("sequence index out of range")
+        start, stop, stride = index.indices(self._length)
+        length = len(range(start, stop, stride))
+        return UnknownSeq(length, character=self._character)
+
+    def count(self, sub, start=None, end=None):
+        """Return a non-overlapping count, like that of a python string.
+
+        This behaves like the python string (and Seq object) method of the
+        same name, which does a non-overlapping count!
+
+        For an overlapping search use the newer count_overlap() method.
+
+        Returns an integer, the number of occurrences of substring
+        argument sub in the (sub)sequence given by [start:end].
+        Optional arguments start and end are interpreted as in slice
+        notation.
+
+        Arguments:
+         - sub - a string or another Seq object to look for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        >>> "NNNN".count("N")
+        4
+        >>> Seq("NNNN").count("N")
+        4
+        >>> UnknownSeq(4, character="N").count("N")
+        4
+        >>> UnknownSeq(4, character="N").count("A")
+        0
+        >>> UnknownSeq(4, character="N").count("AA")
+        0
+
+        HOWEVER, please note because that python strings and Seq objects (and
+        MutableSeq objects) do a non-overlapping search, this may not give
+        the answer you expect:
+
+        >>> UnknownSeq(4, character="N").count("NN")
+        2
+        >>> UnknownSeq(4, character="N").count("NNN")
+        1
+        """
+        if isinstance(sub, _SeqAbstractBaseClass):
+            sub = str(sub)
+        elif not isinstance(sub, str):
+            raise TypeError(
+                "a Seq, MutableSeq, or string object is required, not '%s'" % type(sub)
+            )
+        # Handling case where subsequence not in self
+        if set(sub) != set(self._character):
+            return 0
+        start, stop, stride = slice(start, end, len(sub)).indices(self._length)
+        return len(range(start, stop - len(sub) + 1, stride))
+
+    def count_overlap(self, sub, start=None, end=None):
+        """Return an overlapping count.
+
+        For a non-overlapping search use the count() method.
+
+        Returns an integer, the number of occurrences of substring
+        argument sub in the (sub)sequence given by [start:end].
+        Optional arguments start and end are interpreted as in slice
+        notation.
+
+        Arguments:
+         - sub - a string or another Seq object to look for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        e.g.
+
+        >>> from Bio.Seq import UnknownSeq
+        >>> UnknownSeq(4, character="N").count_overlap("NN")
+        3
+        >>> UnknownSeq(4, character="N").count_overlap("NNN")
+        2
+
+        Where substrings do not overlap, should behave the same as
+        the count() method:
+
+        >>> UnknownSeq(4, character="N").count_overlap("N")
+        4
+        >>> UnknownSeq(4, character="N").count_overlap("N") == UnknownSeq(4, character="N").count("N")
+        True
+        >>> UnknownSeq(4, character="N").count_overlap("A")
+        0
+        >>> UnknownSeq(4, character="N").count_overlap("A") == UnknownSeq(4, character="N").count("A")
+        True
+        >>> UnknownSeq(4, character="N").count_overlap("AA")
+        0
+        >>> UnknownSeq(4, character="N").count_overlap("AA") == UnknownSeq(4, character="N").count("AA")
+        True
+        """
+        if isinstance(sub, _SeqAbstractBaseClass):
+            sub = str(sub)
+        elif not isinstance(sub, str):
+            raise TypeError(
+                "a Seq, MutableSeq, or string object is required, not '%s'" % type(sub)
+            )
+        # Handling case where subsequence not in self
+        if set(sub) != set(self._character):
+            return 0
+        start, stop, stride = slice(start, end).indices(self._length)
+        return len(range(start, stop - len(sub) + 1, stride))
+
+    def complement(self):
+        """Return the complement assuming it is DNA.
+
+        In typical usage this will return the same unknown sequence:
+
+        >>> my_nuc = UnknownSeq(8, character='N')
+        >>> my_nuc
+        UnknownSeq(8, character='N')
+        >>> print(my_nuc)
+        NNNNNNNN
+        >>> my_nuc.complement()
+        UnknownSeq(8, character='N')
+        >>> print(my_nuc.complement())
+        NNNNNNNN
+
+        If your sequence isn't actually unknown, and has a nucleotide letter
+        other than N, the appropriate DNA complement base is used:
+
+        >>> UnknownSeq(8, character="A").complement()
+        UnknownSeq(8, character='T')
+        """
+        s = complement(self._character)
+        return UnknownSeq(self._length, character=s)
+
+    def complement_rna(self):
+        """Return the complement assuming it is RNA.
+
+        In typical usage this will return the same unknown sequence. If your
+        sequence isn't actually unknown, the appropriate RNA complement base
+        is used:
+
+        >>> UnknownSeq(8, character="A").complement_rna()
+        UnknownSeq(8, character='U')
+        """
+        s = complement_rna(self._character)
+        return UnknownSeq(self._length, character=s)
+
+    def reverse_complement(self):
+        """Return the reverse complement assuming it is DNA.
+
+        In typical usage this will return the same unknown sequence:
+
+        >>> from Bio.Seq import UnknownSeq
+        >>> example = UnknownSeq(6, character="N")
+        >>> print(example)
+        NNNNNN
+        >>> print(example.reverse_complement())
+        NNNNNN
+
+        If your sequence isn't actually unknown, the appropriate DNA
+        complement base is used:
+
+        >>> UnknownSeq(8, character="A").reverse_complement()
+        UnknownSeq(8, character='T')
+        """
+        return self.complement()
+
+    def reverse_complement_rna(self):
+        """Return the reverse complement assuming it is RNA.
+
+        In typical usage this will return the same unknown sequence. If your
+        sequence isn't actually unknown, the appropriate RNA complement base
+        is used:
+
+        >>> UnknownSeq(8, character="A").reverse_complement_rna()
+        UnknownSeq(8, character='U')
+        """
+        return self.complement_rna()
+
+    def transcribe(self):
+        """Return an unknown RNA sequence from an unknown DNA sequence.
+
+        >>> my_dna = UnknownSeq(10, character="N")
+        >>> my_dna
+        UnknownSeq(10, character='N')
+        >>> print(my_dna)
+        NNNNNNNNNN
+        >>> my_rna = my_dna.transcribe()
+        >>> my_rna
+        UnknownSeq(10, character='N')
+        >>> print(my_rna)
+        NNNNNNNNNN
+
+        In typical usage this will return the same unknown sequence. If your
+        sequence isn't actually unknown, but a homopolymer of T, the standard
+        DNA to RNA transcription is done, replacing T with U:
+
+        >>> UnknownSeq(9, character="t").transcribe()
+        UnknownSeq(9, character='u')
+        """
+        s = transcribe(self._character)
+        return UnknownSeq(self._length, character=s)
+
+    def back_transcribe(self):
+        """Return an unknown DNA sequence from an unknown RNA sequence.
+
+        >>> my_rna = UnknownSeq(20, character="N")
+        >>> my_rna
+        UnknownSeq(20, character='N')
+        >>> print(my_rna)
+        NNNNNNNNNNNNNNNNNNNN
+        >>> my_dna = my_rna.back_transcribe()
+        >>> my_dna
+        UnknownSeq(20, character='N')
+        >>> print(my_dna)
+        NNNNNNNNNNNNNNNNNNNN
+
+        In typical usage this will return the same unknown sequence. If your
+        sequence is actually a U homopolymer, the standard RNA to DNA back
+        translation applies, replacing U with T:
+
+        >>> UnknownSeq(9, character="U").back_transcribe()
+        UnknownSeq(9, character='T')
+        """
+        s = back_transcribe(self._character)
+        return UnknownSeq(self._length, character=s)
+
+    def upper(self):
+        """Return an upper case copy of the sequence.
+
+        >>> from Bio.Seq import UnknownSeq
+        >>> my_seq = UnknownSeq(20, character="n")
+        >>> my_seq
+        UnknownSeq(20, character='n')
+        >>> print(my_seq)
+        nnnnnnnnnnnnnnnnnnnn
+        >>> my_seq.upper()
+        UnknownSeq(20, character='N')
+        >>> print(my_seq.upper())
+        NNNNNNNNNNNNNNNNNNNN
+
+        See also the lower method.
+        """
+        return UnknownSeq(self._length, character=self._character.upper())
+
+    def lower(self):
+        """Return a lower case copy of the sequence.
+
+        >>> from Bio.Seq import UnknownSeq
+        >>> my_seq = UnknownSeq(20, character="X")
+        >>> my_seq
+        UnknownSeq(20, character='X')
+        >>> print(my_seq)
+        XXXXXXXXXXXXXXXXXXXX
+        >>> my_seq.lower()
+        UnknownSeq(20, character='x')
+        >>> print(my_seq.lower())
+        xxxxxxxxxxxxxxxxxxxx
+
+        See also the upper method.
+        """
+        return UnknownSeq(self._length, character=self._character.lower())
+
+    def translate(
+        self, table="Standard", stop_symbol="*", to_stop=False, cds=False, gap="-"
+    ):
+        """Translate an unknown nucleotide sequence into an unknown protein.
+
+        If your sequence makes sense as codons (e.g. a poly-A tail AAAAAA),
+        it will be translated accordingly:
+
+        >>> UnknownSeq(7, character='A').translate()
+        UnknownSeq(2, character='K')
+
+        Otherwise, it will be translated as X for unknown amino acid:
+
+        >>> UnknownSeq(7).translate()
+        UnknownSeq(2, character='X')
+        """
+        try:
+            s = translate(
+                self._character * 3,
+                table=table,
+                stop_symbol=stop_symbol,
+                to_stop=to_stop,
+                cds=cds,
+                gap=gap,
+            )
+        except CodonTable.TranslationError:
+            # Preserve historic behaviour, ??? (default character) and XXX -> X
+            s = "X"
+        # Don't worry about to_stop - no known stop codon is three bases the same,
+        return UnknownSeq(self._length // 3, character=s)
+
+    def ungap(self, gap="-"):
+        """Return a copy of the sequence without the gap character(s).
+
+        The gap character now defaults to the minus sign, and can only
+        be specified via the method argument. This is no longer possible
+        via the sequence's alphabet (as was possible up to Biopython 1.77):
+
+        >>> from Bio.Seq import UnknownSeq
+        >>> my_dna = UnknownSeq(20, character='N')
+        >>> my_dna
+        UnknownSeq(20, character='N')
+        >>> my_dna.ungap()  # using default
+        UnknownSeq(20, character='N')
+        >>> my_dna.ungap("-")
+        UnknownSeq(20, character='N')
+
+        If the UnknownSeq is using the gap character, then an empty Seq is
+        returned:
+
+        >>> my_gap = UnknownSeq(20, character="-")
+        >>> my_gap
+        UnknownSeq(20, character='-')
+        >>> my_gap.ungap()  # using default
+        Seq('')
+        >>> my_gap.ungap("-")
+        Seq('')
+        """
+        if self._character == gap:
+            return Seq("")
+        else:
+            return UnknownSeq(self._length, character=self._character)
+
+    def join(self, other):
+        """Return a merge of the sequences in other, spaced by the sequence from self.
+
+        Accepts either a Seq or string (and iterates over the letters), or an
+        iterable containing Seq or string objects. These arguments will be
+        concatenated with the calling sequence as the spacer:
+
+        >>> concatenated = UnknownSeq(5).join([Seq("AAA"), Seq("TTT"), Seq("PPP")])
+        >>> concatenated
+        Seq('AAA?????TTT?????PPP')
+
+        If all the inputs are also UnknownSeq using the same character, then it
+        returns a new UnknownSeq:
+
+        >>> UnknownSeq(5).join([UnknownSeq(3), UnknownSeq(3), UnknownSeq(3)])
+        UnknownSeq(19, character='?')
+
+        Examples taking a single sequence and joining the letters:
+
+        >>> UnknownSeq(3).join("ACGT")
+        Seq('A???C???G???T')
+        >>> UnknownSeq(3).join(UnknownSeq(4))
+        UnknownSeq(13, character='?')
+
+        Will only return an UnknownSeq object if all of the objects to be joined are
+        also UnknownSeqs with the same character as the spacer, similar to how the
+        addition of an UnknownSeq and another UnknownSeq would work.
+        """
+        from Bio.SeqRecord import SeqRecord  # Lazy to avoid circular imports
+
+        if isinstance(other, (str, _SeqAbstractBaseClass)):
+            if isinstance(other, UnknownSeq) and self._character == other._character:
+                # Special case, can return an UnknownSeq
+                return self.__class__(
+                    len(other) + len(self) * (len(other) - 1), character=self._character
+                )
+            return Seq(str(self).join(str(other)))
+        if isinstance(other, SeqRecord):
+            raise TypeError("Iterable cannot be a SeqRecord")
+
+        for c in other:
+            if isinstance(c, SeqRecord):
+                raise TypeError("Iterable cannot contain SeqRecords")
+            elif not isinstance(c, (str, _SeqAbstractBaseClass)):
+                raise TypeError("Input must be an iterable of Seqs or Strings")
+        temp_data = str(self).join([str(_) for _ in other])
+        if temp_data.count(self._character) == len(temp_data):
+            # Can return an UnknownSeq
+            return self.__class__(len(temp_data), character=self._character)
+        return Seq(temp_data)
+
+
+class MutableSeq(_SeqAbstractBaseClass):
+    """An editable sequence object.
+
+    Unlike normal python strings and our basic sequence object (the Seq class)
+    which are immutable, the MutableSeq lets you edit the sequence in place.
+    However, this means you cannot use a MutableSeq object as a dictionary key.
+
+    >>> from Bio.Seq import MutableSeq
+    >>> my_seq = MutableSeq("ACTCGTCGTCG")
+    >>> my_seq
+    MutableSeq('ACTCGTCGTCG')
+    >>> my_seq[5]
+    'T'
+    >>> my_seq[5] = "A"
+    >>> my_seq
+    MutableSeq('ACTCGACGTCG')
+    >>> my_seq[5]
+    'A'
+    >>> my_seq[5:8] = "NNN"
+    >>> my_seq
+    MutableSeq('ACTCGNNNTCG')
+    >>> len(my_seq)
+    11
+
+    Note that the MutableSeq object does not support as many string-like
+    or biological methods as the Seq object.
+    """
+
+    def __init__(self, data):
+        """Create a MutableSeq object."""
+        if isinstance(data, array.array):
+            if data.typecode != "u":
+                raise ValueError(
+                    "data should be a string, array of characters, Seq object, "
+                    "or MutableSeq object"
+                )
+            warnings.warn(
+                "Initializing a MutableSeq by an array has been deprecated; please "
+                "use a bytearray object instead.",
+                BiopythonDeprecationWarning,
+            )
+            data = data.tounicode()
+        if isinstance(data, bytearray):
+            self._data = data
+        elif isinstance(data, bytes):
+            self._data = bytearray(data)
+        elif isinstance(data, str):
+            self._data = bytearray(data, "ASCII")
+        elif isinstance(data, MutableSeq):
+            self._data = data._data[:]  # Take a copy
+        elif isinstance(data, Seq):
+            # Make no assumptions about the Seq subclass internal storage
+            self._data = bytearray(bytes(data))
+        else:
+            raise TypeError(
+                "data should be a string, bytearray object, Seq object, or a "
+                "MutableSeq object"
+            )
+
+    @property
+    def data(self):
+        """Get the data."""
+        warnings.warn(
+            "Accessing MutableSeq.data has been deprecated, as it is now a private "
+            "attribute. Please use indexing to access the sequence contents of "
+            "a MutableSeq object.",
+            BiopythonDeprecationWarning,
+        )
+        return array.array("u", self._data.decode("ASCII"))
+
+    @data.setter
+    def data(self, value):
+        """Set the data."""
+        warnings.warn(
+            "Accessing MutableSeq.data has been deprecated, as it is now a private "
+            "attribute. Please use indexing to access the sequence contents of "
+            "a MutableSeq object.",
+            BiopythonDeprecationWarning,
+        )
+        self.__init__(value)
+
+    def __setitem__(self, index, value):
+        """Set a subsequence of single letter via value parameter.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> my_seq[0] = 'T'
+        >>> my_seq
+        MutableSeq('TCTCGACGTCG')
+        """
+        if isinstance(index, int):
+            # Replacing a single letter with a new string
+            self._data[index] = ord(value)
+        else:
+            # Replacing a sub-sequence
+            if isinstance(value, MutableSeq):
+                self._data[index] = value._data
+            elif isinstance(value, Seq):
+                self._data[index] = bytes(value)
+            elif isinstance(value, str):
+                self._data[index] = value.encode("ASCII")
+            else:
+                raise TypeError("received unexpected type %s" % type(value))
+
+    def __delitem__(self, index):
+        """Delete a subsequence of single letter.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> del my_seq[0]
+        >>> my_seq
+        MutableSeq('CTCGACGTCG')
+        """
+        # Could be deleting a single letter, or a slice
+        del self._data[index]
+
+    def append(self, c):
+        """Add a subsequence to the mutable sequence object.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> my_seq.append('A')
+        >>> my_seq
+        MutableSeq('ACTCGACGTCGA')
+
+        No return value.
+        """
+        self._data.append(ord(c.encode("ASCII")))
+
+    def insert(self, i, c):
+        """Add a subsequence to the mutable sequence object at a given index.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> my_seq.insert(0,'A')
+        >>> my_seq
+        MutableSeq('AACTCGACGTCG')
+        >>> my_seq.insert(8,'G')
+        >>> my_seq
+        MutableSeq('AACTCGACGGTCG')
+
+        No return value.
+        """
+        self._data.insert(i, ord(c.encode("ASCII")))
+
+    def pop(self, i=(-1)):
+        """Remove a subsequence of a single letter at given index.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> my_seq.pop()
+        'G'
+        >>> my_seq
+        MutableSeq('ACTCGACGTC')
+        >>> my_seq.pop()
+        'C'
+        >>> my_seq
+        MutableSeq('ACTCGACGT')
+
+        Returns the last character of the sequence.
+        """
+        c = self._data[i]
+        del self._data[i]
+        return chr(c)
+
+    def remove(self, item):
+        """Remove a subsequence of a single letter from mutable sequence.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> my_seq.remove('C')
+        >>> my_seq
+        MutableSeq('ATCGACGTCG')
+        >>> my_seq.remove('A')
+        >>> my_seq
+        MutableSeq('TCGACGTCG')
+
+        No return value.
+        """
+        codepoint = ord(item)
+        try:
+            self._data.remove(codepoint)
+        except ValueError:
+            raise ValueError("value not found in MutableSeq") from None
+
+    def reverse(self):
+        """Modify the mutable sequence to reverse itself.
+
+        No return value.
+        """
+        self._data.reverse()
+
+    def complement(self):
+        """Modify the mutable sequence to take on its complement.
+
+        No return value.
+
+        If the sequence contains neither T nor U, DNA is assumed
+        and any A will be mapped to T.
+
+        If the sequence contains both T and U, an exception is raised.
+        """
+        if ord("U") in self._data and ord("T") in self._data:
+            raise ValueError("Mixed RNA/DNA found")
+        elif ord("U") in self._data:
+            table = _rna_complement_table
+        else:
+            table = _dna_complement_table
+        self._data = self._data.translate(table)
+
+    def reverse_complement(self):
+        """Modify the mutable sequence to take on its reverse complement.
+
+        No return value.
+        """
+        self.complement()
+        self._data.reverse()
+
+    def extend(self, other):
+        """Add a sequence to the original mutable sequence object.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> my_seq.extend('A')
+        >>> my_seq
+        MutableSeq('ACTCGACGTCGA')
+        >>> my_seq.extend('TTT')
+        >>> my_seq
+        MutableSeq('ACTCGACGTCGATTT')
+
+        No return value.
+        """
+        if isinstance(other, MutableSeq):
+            self._data.extend(other._data)
+        elif isinstance(other, Seq):
+            self._data.extend(bytes(other))
+        elif isinstance(other, str):
+            self._data.extend(other.encode("ASCII"))
+        else:
+            raise TypeError("expected a string, Seq or MutableSeq")
+
+    def toseq(self):
+        """Return the full sequence as a new immutable Seq object.
+
+        >>> from Bio.Seq import MutableSeq
+        >>> my_mseq = MutableSeq("MKQHKAMIVALIVICITAVVAAL")
+        >>> my_mseq
+        MutableSeq('MKQHKAMIVALIVICITAVVAAL')
+        >>> my_mseq.toseq()
+        Seq('MKQHKAMIVALIVICITAVVAAL')
+        """
+        warnings.warn(
+            "myseq.toseq() is deprecated; please use Seq(myseq) instead.",
+            BiopythonDeprecationWarning,
+        )
+        return Seq(self)
+
+
+class UndefinedSequenceError(ValueError):
+    """Sequence contents is undefined."""
+
+
+class _UndefinedSequenceData(SequenceDataAbstractBaseClass):
+    """Stores the length of a sequence with an undefined sequence contents (PRIVATE).
+
+    Objects of this class can be used to create a Seq object to represent
+    sequences with a known length, but an unknown sequence contents.
+    Calling __len__ returns the sequence length, calling __getitem__ raises a
+    ValueError except for requests of zero size, for which it returns an empty
+    bytes object.
+    """
+
+    __slots__ = ("_length",)
+
+    def __init__(self, length):
+        """Initialize the object with the sequence length."""
+        if length < 0:
+            raise ValueError("Length must not be negative.")
+        self._length = length
+        super().__init__()
+
+    def __getitem__(self, key):
+        if isinstance(key, slice):
+            start, end, step = key.indices(self._length)
+            size = len(range(start, end, step))
+            if size == 0:
+                return b""
+            return _UndefinedSequenceData(size)
+        else:
+            raise UndefinedSequenceError("Sequence content is undefined")
+
+    def __len__(self):
+        return self._length
+
+    def __bytes__(self):
+        if self._length == 0:
+            return b""
+        raise UndefinedSequenceError("Sequence content is undefined")
+
+    def __add__(self, other):
+        if isinstance(other, _UndefinedSequenceData):
+            return _UndefinedSequenceData(self._length + other._length)
+        raise TypeError
+
+
+# The transcribe, backward_transcribe, and translate functions are
+# user-friendly versions of the corresponding Seq/MutableSeq methods.
+# The functions work both on Seq objects, and on strings.
+
+
+def transcribe(dna):
+    """Transcribe a DNA sequence into RNA.
+
+    If given a string, returns a new string object.
+
+    Given a Seq or MutableSeq, returns a new Seq object.
+
+    e.g.
+
+    >>> transcribe("ACTGN")
+    'ACUGN'
+    """
+    if isinstance(dna, Seq):
+        return dna.transcribe()
+    elif isinstance(dna, MutableSeq):
+        return Seq(dna).transcribe()
+    else:
+        return dna.replace("T", "U").replace("t", "u")
+
+
+def back_transcribe(rna):
+    """Return the RNA sequence back-transcribed into DNA.
+
+    If given a string, returns a new string object.
+
+    Given a Seq or MutableSeq, returns a new Seq object.
+
+    e.g.
+
+    >>> back_transcribe("ACUGN")
+    'ACTGN'
+    """
+    if isinstance(rna, Seq):
+        return rna.back_transcribe()
+    elif isinstance(rna, MutableSeq):
+        return Seq(rna).back_transcribe()
+    else:
+        return rna.replace("U", "T").replace("u", "t")
+
+
+def _translate_str(
+    sequence, table, stop_symbol="*", to_stop=False, cds=False, pos_stop="X", gap=None
+):
+    """Translate nucleotide string into a protein string (PRIVATE).
+
+    Arguments:
+     - sequence - a string
+     - table - Which codon table to use?  This can be either a name (string),
+       an NCBI identifier (integer), or a CodonTable object (useful for
+       non-standard genetic codes).  This defaults to the "Standard" table.
+     - stop_symbol - a single character string, what to use for terminators.
+     - to_stop - boolean, should translation terminate at the first
+       in frame stop codon?  If there is no in-frame stop codon
+       then translation continues to the end.
+     - pos_stop - a single character string for a possible stop codon
+       (e.g. TAN or NNN)
+     - cds - Boolean, indicates this is a complete CDS.  If True, this
+       checks the sequence starts with a valid alternative start
+       codon (which will be translated as methionine, M), that the
+       sequence length is a multiple of three, and that there is a
+       single in frame stop codon at the end (this will be excluded
+       from the protein sequence, regardless of the to_stop option).
+       If these tests fail, an exception is raised.
+     - gap - Single character string to denote symbol used for gaps.
+       Defaults to None.
+
+    Returns a string.
+
+    e.g.
+
+    >>> from Bio.Data import CodonTable
+    >>> table = CodonTable.ambiguous_dna_by_id[1]
+    >>> _translate_str("AAA", table)
+    'K'
+    >>> _translate_str("TAR", table)
+    '*'
+    >>> _translate_str("TAN", table)
+    'X'
+    >>> _translate_str("TAN", table, pos_stop="@")
+    '@'
+    >>> _translate_str("TA?", table)
+    Traceback (most recent call last):
+       ...
+    Bio.Data.CodonTable.TranslationError: Codon 'TA?' is invalid
+
+    In a change to older versions of Biopython, partial codons are now
+    always regarded as an error (previously only checked if cds=True)
+    and will trigger a warning (likely to become an exception in a
+    future release).
+
+    If **cds=True**, the start and stop codons are checked, and the start
+    codon will be translated at methionine. The sequence must be an
+    while number of codons.
+
+    >>> _translate_str("ATGCCCTAG", table, cds=True)
+    'MP'
+    >>> _translate_str("AAACCCTAG", table, cds=True)
+    Traceback (most recent call last):
+       ...
+    Bio.Data.CodonTable.TranslationError: First codon 'AAA' is not a start codon
+    >>> _translate_str("ATGCCCTAGCCCTAG", table, cds=True)
+    Traceback (most recent call last):
+       ...
+    Bio.Data.CodonTable.TranslationError: Extra in frame stop codon found.
+    """
+    try:
+        table_id = int(table)
+    except ValueError:
+        # Assume it's a table name
+        # The same table can be used for RNA or DNA
+        codon_table = CodonTable.ambiguous_generic_by_name[table]
+
+    except (AttributeError, TypeError):
+        # Assume it's a CodonTable object
+        if isinstance(table, CodonTable.CodonTable):
+            codon_table = table
+        else:
+            raise ValueError("Bad table argument") from None
+    else:
+        # Assume it's a table ID
+        # The same table can be used for RNA or DNA
+        codon_table = CodonTable.ambiguous_generic_by_id[table_id]
+    sequence = sequence.upper()
+    amino_acids = []
+    forward_table = codon_table.forward_table
+    stop_codons = codon_table.stop_codons
+    if codon_table.nucleotide_alphabet is not None:
+        valid_letters = set(codon_table.nucleotide_alphabet.upper())
+    else:
+        # Assume the worst case, ambiguous DNA or RNA:
+        valid_letters = set(
+            IUPACData.ambiguous_dna_letters.upper()
+            + IUPACData.ambiguous_rna_letters.upper()
+        )
+    n = len(sequence)
+
+    # Check for tables with 'ambiguous' (dual-coding) stop codons:
+    dual_coding = [c for c in stop_codons if c in forward_table]
+    if dual_coding:
+        c = dual_coding[0]
+        if to_stop:
+            raise ValueError(
+                "You cannot use 'to_stop=True' with this table as it contains"
+                f" {len(dual_coding)} codon(s) which can be both STOP and an"
+                f" amino acid (e.g. '{c}' -> '{forward_table[c]}' or STOP)."
+            )
+        warnings.warn(
+            f"This table contains {len(dual_coding)} codon(s) which code(s) for"
+            f" both STOP and an amino acid (e.g. '{c}' -> '{forward_table[c]}'"
+            " or STOP). Such codons will be translated as amino acid.",
+            BiopythonWarning,
+        )
+
+    if cds:
+        if str(sequence[:3]).upper() not in codon_table.start_codons:
+            raise CodonTable.TranslationError(
+                f"First codon '{sequence[:3]}' is not a start codon"
+            )
+        if n % 3 != 0:
+            raise CodonTable.TranslationError(
+                f"Sequence length {n} is not a multiple of three"
+            )
+        if str(sequence[-3:]).upper() not in stop_codons:
+            raise CodonTable.TranslationError(
+                f"Final codon '{sequence[-3:]}' is not a stop codon"
+            )
+        # Don't translate the stop symbol, and manually translate the M
+        sequence = sequence[3:-3]
+        n -= 6
+        amino_acids = ["M"]
+    elif n % 3 != 0:
+        warnings.warn(
+            "Partial codon, len(sequence) not a multiple of three. "
+            "Explicitly trim the sequence or add trailing N before "
+            "translation. This may become an error in future.",
+            BiopythonWarning,
+        )
+    if gap is not None:
+        if not isinstance(gap, str):
+            raise TypeError("Gap character should be a single character string.")
+        elif len(gap) > 1:
+            raise ValueError("Gap character should be a single character string.")
+
+    for i in range(0, n - n % 3, 3):
+        codon = sequence[i : i + 3]
+        try:
+            amino_acids.append(forward_table[codon])
+        except (KeyError, CodonTable.TranslationError):
+            if codon in codon_table.stop_codons:
+                if cds:
+                    raise CodonTable.TranslationError(
+                        "Extra in frame stop codon found."
+                    ) from None
+                if to_stop:
+                    break
+                amino_acids.append(stop_symbol)
+            elif valid_letters.issuperset(set(codon)):
+                # Possible stop codon (e.g. NNN or TAN)
+                amino_acids.append(pos_stop)
+            elif gap is not None and codon == gap * 3:
+                # Gapped translation
+                amino_acids.append(gap)
+            else:
+                raise CodonTable.TranslationError(
+                    f"Codon '{codon}' is invalid"
+                ) from None
+    return "".join(amino_acids)
+
+
+def translate(
+    sequence, table="Standard", stop_symbol="*", to_stop=False, cds=False, gap=None
+):
+    """Translate a nucleotide sequence into amino acids.
+
+    If given a string, returns a new string object. Given a Seq or
+    MutableSeq, returns a Seq object.
+
+    Arguments:
+     - table - Which codon table to use?  This can be either a name
+       (string), an NCBI identifier (integer), or a CodonTable object
+       (useful for non-standard genetic codes).  Defaults to the "Standard"
+       table.
+     - stop_symbol - Single character string, what to use for any
+       terminators, defaults to the asterisk, "*".
+     - to_stop - Boolean, defaults to False meaning do a full
+       translation continuing on past any stop codons
+       (translated as the specified stop_symbol).  If
+       True, translation is terminated at the first in
+       frame stop codon (and the stop_symbol is not
+       appended to the returned protein sequence).
+     - cds - Boolean, indicates this is a complete CDS.  If True, this
+       checks the sequence starts with a valid alternative start
+       codon (which will be translated as methionine, M), that the
+       sequence length is a multiple of three, and that there is a
+       single in frame stop codon at the end (this will be excluded
+       from the protein sequence, regardless of the to_stop option).
+       If these tests fail, an exception is raised.
+     - gap - Single character string to denote symbol used for gaps.
+       Defaults to None.
+
+    A simple string example using the default (standard) genetic code:
+
+    >>> coding_dna = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG"
+    >>> translate(coding_dna)
+    'VAIVMGR*KGAR*'
+    >>> translate(coding_dna, stop_symbol="@")
+    'VAIVMGR@KGAR@'
+    >>> translate(coding_dna, to_stop=True)
+    'VAIVMGR'
+
+    Now using NCBI table 2, where TGA is not a stop codon:
+
+    >>> translate(coding_dna, table=2)
+    'VAIVMGRWKGAR*'
+    >>> translate(coding_dna, table=2, to_stop=True)
+    'VAIVMGRWKGAR'
+
+    In fact this example uses an alternative start codon valid under NCBI
+    table 2, GTG, which means this example is a complete valid CDS which
+    when translated should really start with methionine (not valine):
+
+    >>> translate(coding_dna, table=2, cds=True)
+    'MAIVMGRWKGAR'
+
+    Note that if the sequence has no in-frame stop codon, then the to_stop
+    argument has no effect:
+
+    >>> coding_dna2 = "GTGGCCATTGTAATGGGCCGC"
+    >>> translate(coding_dna2)
+    'VAIVMGR'
+    >>> translate(coding_dna2, to_stop=True)
+    'VAIVMGR'
+
+    NOTE - Ambiguous codons like "TAN" or "NNN" could be an amino acid
+    or a stop codon.  These are translated as "X".  Any invalid codon
+    (e.g. "TA?" or "T-A") will throw a TranslationError.
+
+    It will however translate either DNA or RNA.
+
+    NOTE - Since version 1.71 Biopython contains codon tables with 'ambiguous
+    stop codons'. These are stop codons with unambiguous sequence but which
+    have a context dependent coding as STOP or as amino acid. With these tables
+    'to_stop' must be False (otherwise a ValueError is raised). The dual
+    coding codons will always be translated as amino acid, except for
+    'cds=True', where the last codon will be translated as STOP.
+
+    >>> coding_dna3 = "ATGGCACGGAAGTGA"
+    >>> translate(coding_dna3)
+    'MARK*'
+
+    >>> translate(coding_dna3, table=27)  # Table 27: TGA -> STOP or W
+    'MARKW'
+
+    It will however raise a BiopythonWarning (not shown).
+
+    >>> translate(coding_dna3, table=27, cds=True)
+    'MARK'
+
+    >>> translate(coding_dna3, table=27, to_stop=True)
+    Traceback (most recent call last):
+       ...
+    ValueError: You cannot use 'to_stop=True' with this table ...
+    """
+    if isinstance(sequence, Seq):
+        return sequence.translate(table, stop_symbol, to_stop, cds)
+    elif isinstance(sequence, MutableSeq):
+        # Return a Seq object
+        return Seq(sequence).translate(table, stop_symbol, to_stop, cds)
+    else:
+        # Assume it's a string, return a string
+        return _translate_str(sequence, table, stop_symbol, to_stop, cds, gap=gap)
+
+
+def reverse_complement(sequence):
+    """Return the reverse complement sequence of a nucleotide string.
+
+    If given a string, returns a new string object.
+    Given a Seq or a MutableSeq, returns a new Seq object.
+
+    Supports unambiguous and ambiguous nucleotide sequences.
+
+    e.g.
+
+    >>> reverse_complement("ACTG-NH")
+    'DN-CAGT'
+
+    If neither T nor U is present, DNA is assumed and A is mapped to T:
+
+    >>> reverse_complement("A")
+    'T'
+    """
+    return complement(sequence)[::-1]
+
+
+def complement(sequence):
+    """Return the complement sequence of a DNA string.
+
+    If given a string, returns a new string object.
+
+    Given a Seq or a MutableSeq, returns a new Seq object.
+
+    Supports unambiguous and ambiguous nucleotide sequences.
+
+    e.g.
+
+    >>> complement("ACTG-NH")
+    'TGAC-ND'
+
+    If neither T nor U is present, DNA is assumed and A is mapped to T:
+
+    >>> complement("A")
+    'T'
+
+    However, this may not be supported in future. Please use the
+    complement_rna function if you have RNA.
+    """
+    if isinstance(sequence, Seq):
+        # Return a Seq
+        return sequence.complement()
+    elif isinstance(sequence, MutableSeq):
+        # Return a Seq
+        # Don't use the MutableSeq reverse_complement method as it is
+        # 'in place'.
+        return Seq(sequence).complement()
+
+    # Assume it's a string.
+    # In order to avoid some code duplication, the old code would turn the
+    # string into a Seq, use the reverse_complement method, and convert back
+    # to a string.
+    # This worked, but is over five times slower on short sequences!
+    sequence = sequence.encode("ASCII")
+    if (b"U" in sequence or b"u" in sequence) and (
+        b"T" in sequence or b"t" in sequence
+    ):  # ugly but this is what black wants
+        raise ValueError("Mixed RNA/DNA found")
+    elif b"U" in sequence or b"u" in sequence:
+        # TODO - warning or exception in future?
+        ttable = _rna_complement_table
+    else:
+        ttable = _dna_complement_table
+    sequence = sequence.translate(ttable)
+    return sequence.decode("ASCII")
+
+
+def complement_rna(sequence):
+    """Return the complement sequence of an RNA string.
+
+    >>> complement("ACG")  # assumed DNA
+    'TGC'
+    >>> complement_rna("ACG")
+    'UGC'
+
+    Any T in the sequence is treated as a U.
+    """
+    if isinstance(sequence, Seq):
+        # Return a Seq
+        return sequence.complement_rna()
+    elif isinstance(sequence, MutableSeq):
+        # Return a Seq
+        return Seq(sequence).complement_rna()
+    sequence = sequence.encode("ASCII")
+    sequence = sequence.translate(_rna_complement_table)
+    return sequence.decode("ASCII")
+
+
+def _test():
+    """Run the Bio.Seq module's doctests (PRIVATE)."""
+    print("Running doctests...")
+    import doctest
+
+    doctest.testmod(optionflags=doctest.IGNORE_EXCEPTION_DETAIL)
+    print("Done")
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/code/lib/Bio/SeqFeature.py b/code/lib/Bio/SeqFeature.py
new file mode 100644
index 0000000..625154a
--- /dev/null
+++ b/code/lib/Bio/SeqFeature.py
@@ -0,0 +1,2224 @@
+# Copyright 2000-2003 Jeff Chang.
+# Copyright 2001-2008 Brad Chapman.
+# Copyright 2005-2016 by Peter Cock.
+# Copyright 2006-2009 Michiel de Hoon.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Represent a Sequence Feature holding info about a part of a sequence.
+
+This is heavily modeled after the Biocorba SeqFeature objects, and
+may be pretty biased towards GenBank stuff since I'm writing it
+for the GenBank parser output...
+
+What's here:
+
+Base class to hold a Feature
+----------------------------
+
+Classes:
+ - SeqFeature
+
+Hold information about a Reference
+----------------------------------
+
+This is an attempt to create a General class to hold Reference type
+information.
+
+Classes:
+ - Reference
+
+Specify locations of a feature on a Sequence
+--------------------------------------------
+
+This aims to handle, in Ewan Birney's words, 'the dreaded fuzziness issue'.
+This has the advantages of allowing us to handle fuzzy stuff in case anyone
+needs it, and also be compatible with BioPerl etc and BioSQL.
+
+Classes:
+ - FeatureLocation - Specify the start and end location of a feature.
+ - CompoundLocation - Collection of FeatureLocation objects (for joins etc).
+ - ExactPosition - Specify the position as being exact.
+ - WithinPosition - Specify a position occurring within some range.
+ - BetweenPosition - Specify a position occurring between a range (OBSOLETE?).
+ - BeforePosition - Specify the position as being found before some base.
+ - AfterPosition - Specify the position as being found after some base.
+ - OneOfPosition - Specify a position where the location can be multiple positions.
+ - UncertainPosition - Specify a specific position which is uncertain.
+ - UnknownPosition - Represents missing information like '?' in UniProt.
+
+"""
+import functools
+
+from collections import OrderedDict
+
+from Bio.Seq import MutableSeq
+from Bio.Seq import reverse_complement
+from Bio.Seq import Seq
+
+
+class SeqFeature:
+    """Represent a Sequence Feature on an object.
+
+    Attributes:
+     - location - the location of the feature on the sequence (FeatureLocation)
+     - type - the specified type of the feature (ie. CDS, exon, repeat...)
+     - location_operator - a string specifying how this SeqFeature may
+       be related to others. For example, in the example GenBank feature
+       shown below, the location_operator would be "join". This is a proxy
+       for feature.location.operator and only applies to compound locations.
+     - strand - A value specifying on which strand (of a DNA sequence, for
+       instance) the feature deals with. 1 indicates the plus strand, -1
+       indicates the minus strand, 0 indicates stranded but unknown (? in GFF3),
+       while the default of None indicates that strand doesn't apply (dot in GFF3,
+       e.g. features on proteins). Note this is a shortcut for accessing the
+       strand property of the feature's location.
+     - id - A string identifier for the feature.
+     - ref - A reference to another sequence. This could be an accession
+       number for some different sequence. Note this is a shortcut for the
+       reference property of the feature's location.
+     - ref_db - A different database for the reference accession number.
+       Note this is a shortcut for the reference property of the location
+     - qualifiers - A dictionary of qualifiers on the feature. These are
+       analogous to the qualifiers from a GenBank feature table. The keys of
+       the dictionary are qualifier names, the values are the qualifier
+       values. As of Biopython 1.69 this is an ordered dictionary.
+
+    """
+
+    def __init__(
+        self,
+        location=None,
+        type="",
+        location_operator="",
+        strand=None,
+        id="<unknown id>",
+        qualifiers=None,
+        sub_features=None,
+        ref=None,
+        ref_db=None,
+    ):
+        """Initialize a SeqFeature on a Sequence.
+
+        location can either be a FeatureLocation (with strand argument also
+        given if required), or None.
+
+        e.g. With no strand, on the forward strand, and on the reverse strand:
+
+        >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
+        >>> f1 = SeqFeature(FeatureLocation(5, 10), type="domain")
+        >>> f1.strand == f1.location.strand == None
+        True
+        >>> f2 = SeqFeature(FeatureLocation(7, 110, strand=1), type="CDS")
+        >>> f2.strand == f2.location.strand == +1
+        True
+        >>> f3 = SeqFeature(FeatureLocation(9, 108, strand=-1), type="CDS")
+        >>> f3.strand == f3.location.strand == -1
+        True
+
+        An invalid strand will trigger an exception:
+
+        >>> f4 = SeqFeature(FeatureLocation(50, 60), strand=2)
+        Traceback (most recent call last):
+           ...
+        ValueError: Strand should be +1, -1, 0 or None, not 2
+
+        Similarly if set via the FeatureLocation directly:
+
+        >>> loc4 = FeatureLocation(50, 60, strand=2)
+        Traceback (most recent call last):
+           ...
+        ValueError: Strand should be +1, -1, 0 or None, not 2
+
+        For exact start/end positions, an integer can be used (as shown above)
+        as shorthand for the ExactPosition object. For non-exact locations, the
+        FeatureLocation must be specified via the appropriate position objects.
+
+        Note that the strand, ref and ref_db arguments to the SeqFeature are
+        now obsolete and will be deprecated in a future release (which will
+        give warning messages) and later removed. Set them via the location
+        object instead.
+
+        Note that location_operator and sub_features arguments can no longer
+        be used, instead do this via the CompoundLocation object.
+        """
+        if (
+            location is not None
+            and not isinstance(location, FeatureLocation)
+            and not isinstance(location, CompoundLocation)
+        ):
+            raise TypeError(
+                "FeatureLocation, CompoundLocation (or None) required for the location"
+            )
+        self.location = location
+        self.type = type
+        if location_operator:
+            # TODO - Deprecation warning
+            self.location_operator = location_operator
+        if strand is not None:
+            # TODO - Deprecation warning
+            self.strand = strand
+        self.id = id
+        if qualifiers is None:
+            qualifiers = OrderedDict()
+        self.qualifiers = qualifiers
+        if sub_features is not None:
+            raise TypeError("Rather than sub_features, use a CompoundFeatureLocation")
+        if ref is not None:
+            # TODO - Deprecation warning
+            self.ref = ref
+        if ref_db is not None:
+            # TODO - Deprecation warning
+            self.ref_db = ref_db
+
+    def _get_strand(self):
+        """Get function for the strand property (PRIVATE)."""
+        return self.location.strand
+
+    def _set_strand(self, value):
+        """Set function for the strand property (PRIVATE)."""
+        try:
+            self.location.strand = value
+        except AttributeError:
+            if self.location is None:
+                if value is not None:
+                    raise ValueError("Can't set strand without a location.") from None
+            else:
+                raise
+
+    strand = property(
+        fget=_get_strand,
+        fset=_set_strand,
+        doc="""Feature's strand
+
+                          This is a shortcut for feature.location.strand
+                          """,
+    )
+
+    def _get_ref(self):
+        """Get function for the reference property (PRIVATE)."""
+        try:
+            return self.location.ref
+        except AttributeError:
+            return None
+
+    def _set_ref(self, value):
+        """Set function for the reference property (PRIVATE)."""
+        try:
+            self.location.ref = value
+        except AttributeError:
+            if self.location is None:
+                if value is not None:
+                    raise ValueError("Can't set ref without a location.") from None
+            else:
+                raise
+
+    ref = property(
+        fget=_get_ref,
+        fset=_set_ref,
+        doc="""Feature location reference (e.g. accession).
+
+                       This is a shortcut for feature.location.ref
+                       """,
+    )
+
+    def _get_ref_db(self):
+        """Get function for the database reference property (PRIVATE)."""
+        try:
+            return self.location.ref_db
+        except AttributeError:
+            return None
+
+    def _set_ref_db(self, value):
+        """Set function for the database reference property (PRIVATE)."""
+        self.location.ref_db = value
+
+    ref_db = property(
+        fget=_get_ref_db,
+        fset=_set_ref_db,
+        doc="""Feature location reference's database.
+
+                          This is a shortcut for feature.location.ref_db
+                          """,
+    )
+
+    def _get_location_operator(self):
+        """Get function for the location operator property (PRIVATE)."""
+        try:
+            return self.location.operator
+        except AttributeError:
+            return None
+
+    def _set_location_operator(self, value):
+        """Set function for the location operator property (PRIVATE)."""
+        if value:
+            if isinstance(self.location, CompoundLocation):
+                self.location.operator = value
+            elif self.location is None:
+                raise ValueError(
+                    "Location is None so can't set its operator (to %r)" % value
+                )
+            else:
+                raise ValueError("Only CompoundLocation gets an operator (%r)" % value)
+
+    location_operator = property(
+        fget=_get_location_operator,
+        fset=_set_location_operator,
+        doc="Location operator for compound locations (e.g. join).",
+    )
+
+    def __repr__(self):
+        """Represent the feature as a string for debugging."""
+        answer = "%s(%r" % (self.__class__.__name__, self.location)
+        if self.type:
+            answer += ", type=%r" % self.type
+        if self.location_operator:
+            answer += ", location_operator=%r" % self.location_operator
+        if self.id and self.id != "<unknown id>":
+            answer += ", id=%r" % self.id
+        if self.ref:
+            answer += ", ref=%r" % self.ref
+        if self.ref_db:
+            answer += ", ref_db=%r" % self.ref_db
+        answer += ")"
+        return answer
+
+    def __str__(self):
+        """Return the full feature as a python string."""
+        out = "type: %s\n" % self.type
+        out += "location: %s\n" % self.location
+        if self.id and self.id != "<unknown id>":
+            out += "id: %s\n" % self.id
+        out += "qualifiers:\n"
+        for qual_key in sorted(self.qualifiers):
+            out += "    Key: %s, Value: %s\n" % (qual_key, self.qualifiers[qual_key])
+        return out
+
+    def _shift(self, offset):
+        """Return a copy of the feature with its location shifted (PRIVATE).
+
+        The annotation qaulifiers are copied.
+        """
+        return SeqFeature(
+            location=self.location._shift(offset),
+            type=self.type,
+            location_operator=self.location_operator,
+            id=self.id,
+            qualifiers=OrderedDict(self.qualifiers.items()),
+        )
+
+    def _flip(self, length):
+        """Return a copy of the feature with its location flipped (PRIVATE).
+
+        The argument length gives the length of the parent sequence. For
+        example a location 0..20 (+1 strand) with parent length 30 becomes
+        after flipping 10..30 (-1 strand). Strandless (None) or unknown
+        strand (0) remain like that - just their end points are changed.
+
+        The annotation qaulifiers are copied.
+        """
+        return SeqFeature(
+            location=self.location._flip(length),
+            type=self.type,
+            location_operator=self.location_operator,
+            id=self.id,
+            qualifiers=OrderedDict(self.qualifiers.items()),
+        )
+
+    def extract(self, parent_sequence, references=None):
+        """Extract the feature's sequence from supplied parent sequence.
+
+        The parent_sequence can be a Seq like object or a string, and will
+        generally return an object of the same type. The exception to this is
+        a MutableSeq as the parent sequence will return a Seq object.
+
+        This should cope with complex locations including complements, joins
+        and fuzzy positions. Even mixed strand features should work! This
+        also covers features on protein sequences (e.g. domains), although
+        here reverse strand features are not permitted. If the
+        location refers to other records, they must be supplied in the
+        optional dictionary references.
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
+        >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL")
+        >>> f = SeqFeature(FeatureLocation(8, 15), type="domain")
+        >>> f.extract(seq)
+        Seq('VALIVIC')
+
+        If the FeatureLocation is None, e.g. when parsing invalid locus
+        locations in the GenBank parser, extract() will raise a ValueError.
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqFeature import SeqFeature
+        >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL")
+        >>> f = SeqFeature(None, type="domain")
+        >>> f.extract(seq)
+        Traceback (most recent call last):
+           ...
+        ValueError: The feature's .location is None. Check the sequence file for a valid location.
+
+        Note - currently only compound features of type "join" are supported.
+        """
+        if self.location is None:
+            raise ValueError(
+                "The feature's .location is None. Check the "
+                "sequence file for a valid location."
+            )
+        return self.location.extract(parent_sequence, references=references)
+
+    def translate(
+        self,
+        parent_sequence,
+        table="Standard",
+        start_offset=None,
+        stop_symbol="*",
+        to_stop=False,
+        cds=None,
+        gap=None,
+    ):
+        """Get a translation of the feature's sequence.
+
+        This method is intended for CDS or other features that code proteins
+        and is a shortcut that will both extract the feature and
+        translate it, taking into account the codon_start and transl_table
+        qualifiers, if they are present. If they are not present the
+        value of the arguments "table" and "start_offset" are used.
+
+        The "cds" parameter is set to "True" if the feature is of type
+        "CDS" but can be overridden by giving an explicit argument.
+
+        The arguments stop_symbol, to_stop and gap have the same meaning
+        as Seq.translate, refer to that documentation for further information.
+
+        Arguments:
+         - parent_sequence - A DNA or RNA sequence.
+         - table - Which codon table to use if there is no transl_table
+           qualifier for this feature. This can be either a name
+           (string), an NCBI identifier (integer), or a CodonTable
+           object (useful for non-standard genetic codes).  This
+           defaults to the "Standard" table.
+         - start_offset - offset at which the first complete codon of a
+           coding feature can be found, relative to the first base of
+           that feature. Has a valid value of 0, 1 or 2. NOTE: this
+           uses python's 0-based numbering whereas the codon_start
+           qualifier in files from NCBI use 1-based numbering.
+           Will override a codon_start qualifier
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
+        >>> seq = Seq("GGTTACACTTACCGATAATGTCTCTGATGA")
+        >>> f = SeqFeature(FeatureLocation(0, 30), type="CDS")
+        >>> f.qualifiers['transl_table'] = [11]
+
+        Note that features of type CDS are subject to the usual
+        checks at translation. But you can override this behaviour
+        by giving explicit arguments:
+
+        >>> f.translate(seq, cds=False)
+        Seq('GYTYR*CL**')
+
+        Now use the start_offset argument to change the frame. Note
+        this uses python 0-based numbering.
+
+        >>> f.translate(seq, start_offset=1, cds=False)
+        Seq('VTLTDNVSD')
+
+        Alternatively use the codon_start qualifier to do the same
+        thing. Note: this uses 1-based numbering, which is found
+        in files from NCBI.
+
+        >>> f.qualifiers['codon_start'] = [2]
+        >>> f.translate(seq, cds=False)
+        Seq('VTLTDNVSD')
+        """
+        # see if this feature should be translated in a different
+        # frame using the "codon_start" qualifier
+        if start_offset is None:
+            try:
+                start_offset = int(self.qualifiers["codon_start"][0]) - 1
+            except KeyError:
+                start_offset = 0
+
+        if start_offset not in [0, 1, 2]:
+            raise ValueError(
+                "The start_offset must be 0, 1, or 2. "
+                f"The supplied value is {start_offset}. "
+                "Check the value of either the codon_start qualifier "
+                "or the start_offset argument"
+            )
+
+        feat_seq = self.extract(parent_sequence)[start_offset:]
+        codon_table = self.qualifiers.get("transl_table", [table])[0]
+
+        if cds is None:
+            cds = self.type == "CDS"
+
+        return feat_seq.translate(
+            table=codon_table,
+            stop_symbol=stop_symbol,
+            to_stop=to_stop,
+            cds=cds,
+            gap=gap,
+        )
+
+    def __bool__(self):
+        """Boolean value of an instance of this class (True).
+
+        This behaviour is for backwards compatibility, since until the
+        __len__ method was added, a SeqFeature always evaluated as True.
+
+        Note that in comparison, Seq objects, strings, lists, etc, will all
+        evaluate to False if they have length zero.
+
+        WARNING: The SeqFeature may in future evaluate to False when its
+        length is zero (in order to better match normal python behaviour)!
+        """
+        return True
+
+    def __len__(self):
+        """Return the length of the region where the feature is located.
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
+        >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL")
+        >>> f = SeqFeature(FeatureLocation(8, 15), type="domain")
+        >>> len(f)
+        7
+        >>> f.extract(seq)
+        Seq('VALIVIC')
+        >>> len(f.extract(seq))
+        7
+
+        This is a proxy for taking the length of the feature's location:
+
+        >>> len(f.location)
+        7
+
+        For simple features this is the same as the region spanned (end
+        position minus start position using Pythonic counting). However, for
+        a compound location (e.g. a CDS as the join of several exons) the
+        gaps are not counted (e.g. introns). This ensures that len(f) matches
+        len(f.extract(parent_seq)), and also makes sure things work properly
+        with features wrapping the origin etc.
+        """
+        return len(self.location)
+
+    def __iter__(self):
+        """Iterate over the parent positions within the feature.
+
+        The iteration order is strand aware, and can be thought of as moving
+        along the feature using the parent sequence coordinates:
+
+        >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
+        >>> f = SeqFeature(FeatureLocation(5, 10), type="domain", strand=-1)
+        >>> len(f)
+        5
+        >>> for i in f: print(i)
+        9
+        8
+        7
+        6
+        5
+        >>> list(f)
+        [9, 8, 7, 6, 5]
+
+        This is a proxy for iterating over the location,
+
+        >>> list(f.location)
+        [9, 8, 7, 6, 5]
+        """
+        return iter(self.location)
+
+    def __contains__(self, value):
+        """Check if an integer position is within the feature.
+
+        >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
+        >>> f = SeqFeature(FeatureLocation(5, 10), type="domain", strand=-1)
+        >>> len(f)
+        5
+        >>> [i for i in range(15) if i in f]
+        [5, 6, 7, 8, 9]
+
+        For example, to see which features include a SNP position, you could
+        use this:
+
+        >>> from Bio import SeqIO
+        >>> record = SeqIO.read("GenBank/NC_000932.gb", "gb")
+        >>> for f in record.features:
+        ...     if 1750 in f:
+        ...         print("%s %s" % (f.type, f.location))
+        source [0:154478](+)
+        gene [1716:4347](-)
+        tRNA join{[4310:4347](-), [1716:1751](-)}
+
+        Note that for a feature defined as a join of several subfeatures (e.g.
+        the union of several exons) the gaps are not checked (e.g. introns).
+        In this example, the tRNA location is defined in the GenBank file as
+        complement(join(1717..1751,4311..4347)), so that position 1760 falls
+        in the gap:
+
+        >>> for f in record.features:
+        ...     if 1760 in f:
+        ...         print("%s %s" % (f.type, f.location))
+        source [0:154478](+)
+        gene [1716:4347](-)
+
+        Note that additional care may be required with fuzzy locations, for
+        example just before a BeforePosition:
+
+        >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
+        >>> from Bio.SeqFeature import BeforePosition
+        >>> f = SeqFeature(FeatureLocation(BeforePosition(3), 8), type="domain")
+        >>> len(f)
+        5
+        >>> [i for i in range(10) if i in f]
+        [3, 4, 5, 6, 7]
+
+        Note that is is a proxy for testing membership on the location.
+
+        >>> [i for i in range(10) if i in f.location]
+        [3, 4, 5, 6, 7]
+        """
+        return value in self.location
+
+
+# --- References
+
+
+# TODO -- Will this hold PubMed and Medline information decently?
+class Reference:
+    """Represent a Generic Reference object.
+
+    Attributes:
+     - location - A list of Location objects specifying regions of
+       the sequence that the references correspond to. If no locations are
+       specified, the entire sequence is assumed.
+     - authors - A big old string, or a list split by author, of authors
+       for the reference.
+     - title - The title of the reference.
+     - journal - Journal the reference was published in.
+     - medline_id - A medline reference for the article.
+     - pubmed_id - A pubmed reference for the article.
+     - comment - A place to stick any comments about the reference.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.location = []
+        self.authors = ""
+        self.consrtm = ""
+        self.title = ""
+        self.journal = ""
+        self.medline_id = ""
+        self.pubmed_id = ""
+        self.comment = ""
+
+    def __str__(self):
+        """Return the full Reference object as a python string."""
+        out = ""
+        for single_location in self.location:
+            out += "location: %s\n" % single_location
+        out += "authors: %s\n" % self.authors
+        if self.consrtm:
+            out += "consrtm: %s\n" % self.consrtm
+        out += "title: %s\n" % self.title
+        out += "journal: %s\n" % self.journal
+        out += "medline id: %s\n" % self.medline_id
+        out += "pubmed id: %s\n" % self.pubmed_id
+        out += "comment: %s\n" % self.comment
+        return out
+
+    def __repr__(self):
+        """Represent the Reference object as a string for debugging."""
+        # TODO - Update this is __init__ later accepts values
+        return "%s(title=%r, ...)" % (self.__class__.__name__, self.title)
+
+    def __eq__(self, other):
+        """Check if two Reference objects should be considered equal.
+
+        Note prior to Biopython 1.70 the location was not compared, as
+        until then __eq__ for the FeatureLocation class was not defined.
+        """
+        return (
+            self.authors == other.authors
+            and self.consrtm == other.consrtm
+            and self.title == other.title
+            and self.journal == other.journal
+            and self.medline_id == other.medline_id
+            and self.pubmed_id == other.pubmed_id
+            and self.comment == other.comment
+            and self.location == other.location
+        )
+
+
+# --- Handling feature locations
+
+
+class FeatureLocation:
+    """Specify the location of a feature along a sequence.
+
+    The FeatureLocation is used for simple continuous features, which can
+    be described as running from a start position to and end position
+    (optionally with a strand and reference information).  More complex
+    locations made up from several non-continuous parts (e.g. a coding
+    sequence made up of several exons) are described using a SeqFeature
+    with a CompoundLocation.
+
+    Note that the start and end location numbering follow Python's scheme,
+    thus a GenBank entry of 123..150 (one based counting) becomes a location
+    of [122:150] (zero based counting).
+
+    >>> from Bio.SeqFeature import FeatureLocation
+    >>> f = FeatureLocation(122, 150)
+    >>> print(f)
+    [122:150]
+    >>> print(f.start)
+    122
+    >>> print(f.end)
+    150
+    >>> print(f.strand)
+    None
+
+    Note the strand defaults to None. If you are working with nucleotide
+    sequences you'd want to be explicit if it is the forward strand:
+
+    >>> from Bio.SeqFeature import FeatureLocation
+    >>> f = FeatureLocation(122, 150, strand=+1)
+    >>> print(f)
+    [122:150](+)
+    >>> print(f.strand)
+    1
+
+    Note that for a parent sequence of length n, the FeatureLocation
+    start and end must satisfy the inequality 0 <= start <= end <= n.
+    This means even for features on the reverse strand of a nucleotide
+    sequence, we expect the 'start' coordinate to be less than the
+    'end'.
+
+    >>> from Bio.SeqFeature import FeatureLocation
+    >>> r = FeatureLocation(122, 150, strand=-1)
+    >>> print(r)
+    [122:150](-)
+    >>> print(r.start)
+    122
+    >>> print(r.end)
+    150
+    >>> print(r.strand)
+    -1
+
+    i.e. Rather than thinking of the 'start' and 'end' biologically in a
+    strand aware manner, think of them as the 'left most' or 'minimum'
+    boundary, and the 'right most' or 'maximum' boundary of the region
+    being described. This is particularly important with compound
+    locations describing non-continuous regions.
+
+    In the example above we have used standard exact positions, but there
+    are also specialised position objects used to represent fuzzy positions
+    as well, for example a GenBank location like complement(<123..150)
+    would use a BeforePosition object for the start.
+    """
+
+    def __init__(self, start, end, strand=None, ref=None, ref_db=None):
+        """Initialize the class.
+
+        start and end arguments specify the values where the feature begins
+        and ends. These can either by any of the ``*Position`` objects that
+        inherit from AbstractPosition, or can just be integers specifying the
+        position. In the case of integers, the values are assumed to be
+        exact and are converted in ExactPosition arguments. This is meant
+        to make it easy to deal with non-fuzzy ends.
+
+        i.e. Short form:
+
+        >>> from Bio.SeqFeature import FeatureLocation
+        >>> loc = FeatureLocation(5, 10, strand=-1)
+        >>> print(loc)
+        [5:10](-)
+
+        Explicit form:
+
+        >>> from Bio.SeqFeature import FeatureLocation, ExactPosition
+        >>> loc = FeatureLocation(ExactPosition(5), ExactPosition(10), strand=-1)
+        >>> print(loc)
+        [5:10](-)
+
+        Other fuzzy positions are used similarly,
+
+        >>> from Bio.SeqFeature import FeatureLocation
+        >>> from Bio.SeqFeature import BeforePosition, AfterPosition
+        >>> loc2 = FeatureLocation(BeforePosition(5), AfterPosition(10), strand=-1)
+        >>> print(loc2)
+        [<5:>10](-)
+
+        For nucleotide features you will also want to specify the strand,
+        use 1 for the forward (plus) strand, -1 for the reverse (negative)
+        strand, 0 for stranded but strand unknown (? in GFF3), or None for
+        when the strand does not apply (dot in GFF3), e.g. features on
+        proteins.
+
+        >>> loc = FeatureLocation(5, 10, strand=+1)
+        >>> print(loc)
+        [5:10](+)
+        >>> print(loc.strand)
+        1
+
+        Normally feature locations are given relative to the parent
+        sequence you are working with, but an explicit accession can
+        be given with the optional ref and db_ref strings:
+
+        >>> loc = FeatureLocation(105172, 108462, ref="AL391218.9", strand=1)
+        >>> print(loc)
+        AL391218.9[105172:108462](+)
+        >>> print(loc.ref)
+        AL391218.9
+
+        """
+        # TODO - Check 0 <= start <= end (<= length of reference)
+        if isinstance(start, AbstractPosition):
+            self._start = start
+        elif isinstance(start, int):
+            self._start = ExactPosition(start)
+        else:
+            raise TypeError("start=%r %s" % (start, type(start)))
+        if isinstance(end, AbstractPosition):
+            self._end = end
+        elif isinstance(end, int):
+            self._end = ExactPosition(end)
+        else:
+            raise TypeError("end=%r %s" % (end, type(end)))
+        if (
+            isinstance(self.start.position, int)
+            and isinstance(self.end.position, int)
+            and self.start > self.end
+        ):
+            raise ValueError(
+                f"End location ({self.end}) must be greater than "
+                f"or equal to start location ({self.start})"
+            )
+        self.strand = strand
+        self.ref = ref
+        self.ref_db = ref_db
+
+    def _get_strand(self):
+        """Get function for the strand property (PRIVATE)."""
+        return self._strand
+
+    def _set_strand(self, value):
+        """Set function for the strand property (PRIVATE)."""
+        if value not in [+1, -1, 0, None]:
+            raise ValueError("Strand should be +1, -1, 0 or None, not %r" % value)
+        self._strand = value
+
+    strand = property(
+        fget=_get_strand,
+        fset=_set_strand,
+        doc="Strand of the location (+1, -1, 0 or None).",
+    )
+
+    def __str__(self):
+        """Return a representation of the FeatureLocation object (with python counting).
+
+        For the simple case this uses the python splicing syntax, [122:150]
+        (zero based counting) which GenBank would call 123..150 (one based
+        counting).
+        """
+        answer = "[%s:%s]" % (self._start, self._end)
+        if self.ref and self.ref_db:
+            answer = "%s:%s%s" % (self.ref_db, self.ref, answer)
+        elif self.ref:
+            answer = self.ref + answer
+        # Is ref_db without ref meaningful?
+        if self.strand is None:
+            return answer
+        elif self.strand == +1:
+            return answer + "(+)"
+        elif self.strand == -1:
+            return answer + "(-)"
+        else:
+            # strand = 0, stranded but strand unknown, ? in GFF3
+            return answer + "(?)"
+
+    def __repr__(self):
+        """Represent the FeatureLocation object as a string for debugging."""
+        optional = ""
+        if self.strand is not None:
+            optional += ", strand=%r" % self.strand
+        if self.ref is not None:
+            optional += ", ref=%r" % self.ref
+        if self.ref_db is not None:
+            optional += ", ref_db=%r" % self.ref_db
+        return "%s(%r, %r%s)" % (
+            self.__class__.__name__,
+            self.start,
+            self.end,
+            optional,
+        )
+
+    def __add__(self, other):
+        """Combine location with another FeatureLocation object, or shift it.
+
+        You can add two feature locations to make a join CompoundLocation:
+
+        >>> from Bio.SeqFeature import FeatureLocation
+        >>> f1 = FeatureLocation(5, 10)
+        >>> f2 = FeatureLocation(20, 30)
+        >>> combined = f1 + f2
+        >>> print(combined)
+        join{[5:10], [20:30]}
+
+        This is thus equivalent to:
+
+        >>> from Bio.SeqFeature import CompoundLocation
+        >>> join = CompoundLocation([f1, f2])
+        >>> print(join)
+        join{[5:10], [20:30]}
+
+        You can also use sum(...) in this way:
+
+        >>> join = sum([f1, f2])
+        >>> print(join)
+        join{[5:10], [20:30]}
+
+        Furthermore, you can combine a FeatureLocation with a CompoundLocation
+        in this way.
+
+        Separately, adding an integer will give a new FeatureLocation with
+        its start and end offset by that amount. For example:
+
+        >>> print(f1)
+        [5:10]
+        >>> print(f1 + 100)
+        [105:110]
+        >>> print(200 + f1)
+        [205:210]
+
+        This can be useful when editing annotation.
+        """
+        if isinstance(other, FeatureLocation):
+            return CompoundLocation([self, other])
+        elif isinstance(other, int):
+            return self._shift(other)
+        else:
+            # This will allow CompoundLocation's __radd__ to be called:
+            return NotImplemented
+
+    def __radd__(self, other):
+        """Add a feature locationanother FeatureLocation object to the left."""
+        if isinstance(other, int):
+            return self._shift(other)
+        else:
+            return NotImplemented
+
+    def __nonzero__(self):
+        """Return True regardless of the length of the feature.
+
+        This behaviour is for backwards compatibility, since until the
+        __len__ method was added, a FeatureLocation always evaluated as True.
+
+        Note that in comparison, Seq objects, strings, lists, etc, will all
+        evaluate to False if they have length zero.
+
+        WARNING: The FeatureLocation may in future evaluate to False when its
+        length is zero (in order to better match normal python behaviour)!
+        """
+        return True
+
+    def __len__(self):
+        """Return the length of the region described by the FeatureLocation object.
+
+        Note that extra care may be needed for fuzzy locations, e.g.
+
+        >>> from Bio.SeqFeature import FeatureLocation
+        >>> from Bio.SeqFeature import BeforePosition, AfterPosition
+        >>> loc = FeatureLocation(BeforePosition(5), AfterPosition(10))
+        >>> len(loc)
+        5
+        """
+        return int(self._end) - int(self._start)
+
+    def __contains__(self, value):
+        """Check if an integer position is within the FeatureLocation object.
+
+        Note that extra care may be needed for fuzzy locations, e.g.
+
+        >>> from Bio.SeqFeature import FeatureLocation
+        >>> from Bio.SeqFeature import BeforePosition, AfterPosition
+        >>> loc = FeatureLocation(BeforePosition(5), AfterPosition(10))
+        >>> len(loc)
+        5
+        >>> [i for i in range(15) if i in loc]
+        [5, 6, 7, 8, 9]
+        """
+        if not isinstance(value, int):
+            raise ValueError(
+                "Currently we only support checking for integer "
+                "positions being within a FeatureLocation."
+            )
+        if value < self._start or value >= self._end:
+            return False
+        else:
+            return True
+
+    def __iter__(self):
+        """Iterate over the parent positions within the FeatureLocation object.
+
+        >>> from Bio.SeqFeature import FeatureLocation
+        >>> from Bio.SeqFeature import BeforePosition, AfterPosition
+        >>> loc = FeatureLocation(BeforePosition(5), AfterPosition(10))
+        >>> len(loc)
+        5
+        >>> for i in loc: print(i)
+        5
+        6
+        7
+        8
+        9
+        >>> list(loc)
+        [5, 6, 7, 8, 9]
+        >>> [i for i in range(15) if i in loc]
+        [5, 6, 7, 8, 9]
+
+        Note this is strand aware:
+
+        >>> loc = FeatureLocation(BeforePosition(5), AfterPosition(10), strand = -1)
+        >>> list(loc)
+        [9, 8, 7, 6, 5]
+        """
+        if self.strand == -1:
+            yield from range(self._end - 1, self._start - 1, -1)
+        else:
+            yield from range(self._start, self._end)
+
+    def __eq__(self, other):
+        """Implement equality by comparing all the location attributes."""
+        if not isinstance(other, FeatureLocation):
+            return False
+        return (
+            self._start == other.start
+            and self._end == other.end
+            and self._strand == other.strand
+            and self.ref == other.ref
+            and self.ref_db == other.ref_db
+        )
+
+    def _shift(self, offset):
+        """Return a copy of the FeatureLocation shifted by an offset (PRIVATE).
+
+        Returns self when location is relative to an external reference.
+        """
+        # TODO - What if offset is a fuzzy position?
+        if self.ref or self.ref_db:
+            return self
+        return FeatureLocation(
+            start=self._start._shift(offset),
+            end=self._end._shift(offset),
+            strand=self.strand,
+        )
+
+    def _flip(self, length):
+        """Return a copy of the location after the parent is reversed (PRIVATE).
+
+        Returns self when location is relative to an external reference.
+        """
+        if self.ref or self.ref_db:
+            return self
+        # Note this will flip the start and end too!
+        if self.strand == +1:
+            flip_strand = -1
+        elif self.strand == -1:
+            flip_strand = +1
+        else:
+            # 0 or None
+            flip_strand = self.strand
+        return FeatureLocation(
+            start=self._end._flip(length),
+            end=self._start._flip(length),
+            strand=flip_strand,
+        )
+
+    @property
+    def parts(self):
+        """Read only list of sections (always one, the FeatureLocation object).
+
+        This is a convenience property allowing you to write code handling
+        both simple FeatureLocation objects (with one part) and more complex
+        CompoundLocation objects (with multiple parts) interchangeably.
+        """
+        return [self]
+
+    @property
+    def start(self):
+        """Start location - left most (minimum) value, regardless of strand.
+
+        Read only, returns an integer like position object, possibly a fuzzy
+        position.
+        """
+        return self._start
+
+    @property
+    def end(self):
+        """End location - right most (maximum) value, regardless of strand.
+
+        Read only, returns an integer like position object, possibly a fuzzy
+        position.
+        """
+        return self._end
+
+    @property
+    def nofuzzy_start(self):
+        """Start position (integer, approximated if fuzzy, read only) (OBSOLETE).
+
+        This is now an alias for int(feature.start), which should be
+        used in preference -- unless you are trying to support old
+        versions of Biopython.
+        """
+        try:
+            return int(self._start)
+        except TypeError:
+            if isinstance(self._start, UnknownPosition):
+                return None
+            raise
+
+    @property
+    def nofuzzy_end(self):
+        """End position (integer, approximated if fuzzy, read only) (OBSOLETE).
+
+        This is now an alias for int(feature.end), which should be
+        used in preference -- unless you are trying to support old
+        versions of Biopython.
+        """
+        try:
+            return int(self._end)
+        except TypeError:
+            if isinstance(self._end, UnknownPosition):
+                return None
+            raise
+
+    def extract(self, parent_sequence, references=None):
+        """Extract the sequence from supplied parent sequence using the FeatureLocation object.
+
+        The parent_sequence can be a Seq like object or a string, and will
+        generally return an object of the same type. The exception to this is
+        a MutableSeq as the parent sequence will return a Seq object.
+        If the location refers to other records, they must be supplied
+        in the optional dictionary references.
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqFeature import FeatureLocation
+        >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL")
+        >>> feature_loc = FeatureLocation(8, 15)
+        >>> feature_loc.extract(seq)
+        Seq('VALIVIC')
+
+        """
+        if self.ref or self.ref_db:
+            if not references:
+                raise ValueError(
+                    f"Feature references another sequence ({self.ref}),"
+                    " references mandatory"
+                )
+            elif self.ref not in references:
+                # KeyError?
+                raise ValueError(
+                    f"Feature references another sequence ({self.ref}),"
+                    " not found in references"
+                )
+            parent_sequence = references[self.ref]
+            try:
+                # If was a SeqRecord, just take the sequence
+                # (should focus on the annotation of the feature)
+                parent_sequence = parent_sequence.seq
+            except AttributeError:
+                pass
+        if isinstance(parent_sequence, MutableSeq):
+            # This avoids complications with reverse complements
+            # (the MutableSeq reverse complement acts in situ)
+            parent_sequence = Seq(parent_sequence)
+        f_seq = parent_sequence[self.nofuzzy_start : self.nofuzzy_end]
+        if self.strand == -1:
+            try:
+                f_seq = f_seq.reverse_complement()
+            except AttributeError:
+                assert isinstance(f_seq, str)
+                f_seq = reverse_complement(f_seq)
+        return f_seq
+
+
+class CompoundLocation:
+    """For handling joins etc where a feature location has several parts."""
+
+    def __init__(self, parts, operator="join"):
+        """Initialize the class.
+
+        >>> from Bio.SeqFeature import FeatureLocation, CompoundLocation
+        >>> f1 = FeatureLocation(10, 40, strand=+1)
+        >>> f2 = FeatureLocation(50, 59, strand=+1)
+        >>> f = CompoundLocation([f1, f2])
+        >>> len(f) == len(f1) + len(f2) == 39 == len(list(f))
+        True
+        >>> print(f.operator)
+        join
+        >>> 5 in f
+        False
+        >>> 15 in f
+        True
+        >>> f.strand
+        1
+
+        Notice that the strand of the compound location is computed
+        automatically - in the case of mixed strands on the sub-locations
+        the overall strand is set to None.
+
+        >>> f = CompoundLocation([FeatureLocation(3, 6, strand=+1),
+        ...                       FeatureLocation(10, 13, strand=-1)])
+        >>> print(f.strand)
+        None
+        >>> len(f)
+        6
+        >>> list(f)
+        [3, 4, 5, 12, 11, 10]
+
+        The example above doing list(f) iterates over the coordinates within the
+        feature. This allows you to use max and min on the location, to find the
+        range covered:
+
+        >>> min(f)
+        3
+        >>> max(f)
+        12
+
+        More generally, you can use the compound location's start and end which
+        give the full span covered, 0 <= start <= end <= full sequence length.
+
+        >>> f.start == min(f)
+        True
+        >>> f.end == max(f) + 1
+        True
+
+        This is consistent with the behaviour of the simple FeatureLocation for
+        a single region, where again the 'start' and 'end' do not necessarily
+        give the biological start and end, but rather the 'minimal' and 'maximal'
+        coordinate boundaries.
+
+        Note that adding locations provides a more intuitive method of
+        construction:
+
+        >>> f = FeatureLocation(3, 6, strand=+1) + FeatureLocation(10, 13, strand=-1)
+        >>> len(f)
+        6
+        >>> list(f)
+        [3, 4, 5, 12, 11, 10]
+        """
+        self.operator = operator
+        self.parts = list(parts)
+        for loc in self.parts:
+            if not isinstance(loc, FeatureLocation):
+                raise ValueError(
+                    "CompoundLocation should be given a list of "
+                    "FeatureLocation objects, not %s" % loc.__class__
+                )
+        if len(parts) < 2:
+            raise ValueError(
+                "CompoundLocation should have at least 2 parts, not %r" % parts
+            )
+
+    def __str__(self):
+        """Return a representation of the CompoundLocation object (with python counting)."""
+        return "%s{%s}" % (self.operator, ", ".join(str(loc) for loc in self.parts))
+
+    def __repr__(self):
+        """Represent the CompoundLocation object as string for debugging."""
+        return "%s(%r, %r)" % (self.__class__.__name__, self.parts, self.operator)
+
+    def _get_strand(self):
+        """Get function for the strand property (PRIVATE)."""
+        # Historically a join on the reverse strand has been represented
+        # in Biopython with both the parent SeqFeature and its children
+        # (the exons for a CDS) all given a strand of -1.  Likewise, for
+        # a join feature on the forward strand they all have strand +1.
+        # However, we must also consider evil mixed strand examples like
+        # this, join(complement(69611..69724),139856..140087,140625..140650)
+        if len({loc.strand for loc in self.parts}) == 1:
+            return self.parts[0].strand
+        else:
+            return None  # i.e. mixed strands
+
+    def _set_strand(self, value):
+        """Set function for the strand property (PRIVATE)."""
+        # Should this be allowed/encouraged?
+        for loc in self.parts:
+            loc.strand = value
+
+    strand = property(
+        fget=_get_strand,
+        fset=_set_strand,
+        doc="""Overall strand of the compound location.
+
+        If all the parts have the same strand, that is returned. Otherwise
+        for mixed strands, this returns None.
+
+        >>> from Bio.SeqFeature import FeatureLocation, CompoundLocation
+        >>> f1 = FeatureLocation(15, 17, strand=1)
+        >>> f2 = FeatureLocation(20, 30, strand=-1)
+        >>> f = f1 + f2
+        >>> f1.strand
+        1
+        >>> f2.strand
+        -1
+        >>> f.strand
+        >>> f.strand is None
+        True
+
+        If you set the strand of a CompoundLocation, this is applied to
+        all the parts - use with caution:
+
+        >>> f.strand = 1
+        >>> f1.strand
+        1
+        >>> f2.strand
+        1
+        >>> f.strand
+        1
+
+        """,
+    )
+
+    def __add__(self, other):
+        """Combine locations, or shift the location by an integer offset.
+
+        >>> from Bio.SeqFeature import FeatureLocation
+        >>> f1 = FeatureLocation(15, 17) + FeatureLocation(20, 30)
+        >>> print(f1)
+        join{[15:17], [20:30]}
+
+        You can add another FeatureLocation:
+
+        >>> print(f1 + FeatureLocation(40, 50))
+        join{[15:17], [20:30], [40:50]}
+        >>> print(FeatureLocation(5, 10) + f1)
+        join{[5:10], [15:17], [20:30]}
+
+        You can also add another CompoundLocation:
+
+        >>> f2 = FeatureLocation(40, 50) + FeatureLocation(60, 70)
+        >>> print(f2)
+        join{[40:50], [60:70]}
+        >>> print(f1 + f2)
+        join{[15:17], [20:30], [40:50], [60:70]}
+
+        Also, as with the FeatureLocation, adding an integer shifts the
+        location's co-ordinates by that offset:
+
+        >>> print(f1 + 100)
+        join{[115:117], [120:130]}
+        >>> print(200 + f1)
+        join{[215:217], [220:230]}
+        >>> print(f1 + (-5))
+        join{[10:12], [15:25]}
+        """
+        if isinstance(other, FeatureLocation):
+            return CompoundLocation(self.parts + [other], self.operator)
+        elif isinstance(other, CompoundLocation):
+            if self.operator != other.operator:
+                # Handle join+order -> order as a special case?
+                raise ValueError(
+                    "Mixed operators %s and %s" % (self.operator, other.operator)
+                )
+            return CompoundLocation(self.parts + other.parts, self.operator)
+        elif isinstance(other, int):
+            return self._shift(other)
+        else:
+            raise NotImplementedError
+
+    def __radd__(self, other):
+        """Add a feature to the left."""
+        if isinstance(other, FeatureLocation):
+            return CompoundLocation([other] + self.parts, self.operator)
+        elif isinstance(other, int):
+            return self._shift(other)
+        else:
+            raise NotImplementedError
+
+    def __contains__(self, value):
+        """Check if an integer position is within the CompoundLocation object."""
+        for loc in self.parts:
+            if value in loc:
+                return True
+        return False
+
+    def __nonzero__(self):
+        """Return True regardless of the length of the feature.
+
+        This behaviour is for backwards compatibility, since until the
+        __len__ method was added, a FeatureLocation always evaluated as True.
+
+        Note that in comparison, Seq objects, strings, lists, etc, will all
+        evaluate to False if they have length zero.
+
+        WARNING: The FeatureLocation may in future evaluate to False when its
+        length is zero (in order to better match normal python behaviour)!
+        """
+        return True
+
+    def __len__(self):
+        """Return the length of the CompoundLocation object."""
+        return sum(len(loc) for loc in self.parts)
+
+    def __iter__(self):
+        """Iterate over the parent positions within the CompoundLocation object."""
+        for loc in self.parts:
+            yield from loc
+
+    def __eq__(self, other):
+        """Check if all parts of CompoundLocation are equal to all parts of other CompoundLocation."""
+        if not isinstance(other, CompoundLocation):
+            return False
+        if len(self.parts) != len(other.parts):
+            return False
+        if self.operator != other.operator:
+            return False
+        for self_part, other_part in zip(self.parts, other.parts):
+            if self_part != other_part:
+                return False
+        return True
+
+    def _shift(self, offset):
+        """Return a copy of the CompoundLocation shifted by an offset (PRIVATE)."""
+        return CompoundLocation(
+            [loc._shift(offset) for loc in self.parts], self.operator
+        )
+
+    def _flip(self, length):
+        """Return a copy of the locations after the parent is reversed (PRIVATE).
+
+        Note that the order of the parts is NOT reversed too. Consider a CDS
+        on the forward strand with exons small, medium and large (in length).
+        Once we change the frame of reference to the reverse complement strand,
+        the start codon is still part of the small exon, and the stop codon
+        still part of the large exon - so the part order remains the same!
+
+        Here is an artificial example, were the features map to the two upper
+        case regions and the lower case runs of n are not used:
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqFeature import FeatureLocation
+        >>> dna = Seq("nnnnnAGCATCCTGCTGTACnnnnnnnnGAGAMTGCCATGCCCCTGGAGTGAnnnnn")
+        >>> small = FeatureLocation(5, 20, strand=1)
+        >>> large = FeatureLocation(28, 52, strand=1)
+        >>> location = small + large
+        >>> print(small)
+        [5:20](+)
+        >>> print(large)
+        [28:52](+)
+        >>> print(location)
+        join{[5:20](+), [28:52](+)}
+        >>> for part in location.parts:
+        ...     print(len(part))
+        ...
+        15
+        24
+
+        As you can see, this is a silly example where each "exon" is a word:
+
+        >>> print(small.extract(dna).translate())
+        SILLY
+        >>> print(large.extract(dna).translate())
+        EXAMPLE*
+        >>> print(location.extract(dna).translate())
+        SILLYEXAMPLE*
+        >>> for part in location.parts:
+        ...     print(part.extract(dna).translate())
+        ...
+        SILLY
+        EXAMPLE*
+
+        Now, let's look at this from the reverse strand frame of reference:
+
+        >>> flipped_dna = dna.reverse_complement()
+        >>> flipped_location = location._flip(len(dna))
+        >>> print(flipped_location.extract(flipped_dna).translate())
+        SILLYEXAMPLE*
+        >>> for part in flipped_location.parts:
+        ...     print(part.extract(flipped_dna).translate())
+        ...
+        SILLY
+        EXAMPLE*
+
+        The key point here is the first part of the CompoundFeature is still the
+        small exon, while the second part is still the large exon:
+
+        >>> for part in flipped_location.parts:
+        ...     print(len(part))
+        ...
+        15
+        24
+        >>> print(flipped_location)
+        join{[37:52](-), [5:29](-)}
+
+        Notice the parts are not reversed. However, there was a bug here in older
+        versions of Biopython which would have given join{[5:29](-), [37:52](-)}
+        and the translation would have wrongly been "EXAMPLE*SILLY" instead.
+
+        """
+        return CompoundLocation(
+            [loc._flip(length) for loc in self.parts], self.operator
+        )
+
+    @property
+    def start(self):
+        """Start location - left most (minimum) value, regardless of strand.
+
+        Read only, returns an integer like position object, possibly a fuzzy
+        position.
+
+        For the special case of a CompoundLocation wrapping the origin of a
+        circular genome, this will return zero.
+        """
+        return min(loc.start for loc in self.parts)
+
+    @property
+    def end(self):
+        """End location - right most (maximum) value, regardless of strand.
+
+        Read only, returns an integer like position object, possibly a fuzzy
+        position.
+
+        For the special case of a CompoundLocation wrapping the origin of
+        a circular genome this will match the genome length (minus one
+        given how Python counts from zero).
+        """
+        return max(loc.end for loc in self.parts)
+
+    @property
+    def nofuzzy_start(self):
+        """Start position (integer, approximated if fuzzy, read only) (OBSOLETE).
+
+        This is an alias for int(feature.start), which should be used in
+        preference -- unless you are trying to support old versions of
+        Biopython.
+        """
+        try:
+            return int(self.start)
+        except TypeError:
+            if isinstance(self.start, UnknownPosition):
+                return None
+            raise
+
+    @property
+    def nofuzzy_end(self):
+        """End position (integer, approximated if fuzzy, read only) (OBSOLETE).
+
+        This is an alias for int(feature.end), which should be used in
+        preference -- unless you are trying to support old versions of
+        Biopython.
+        """
+        try:
+            return int(self.end)
+        except TypeError:
+            if isinstance(self.end, UnknownPosition):
+                return None
+            raise
+
+    @property
+    def ref(self):
+        """Not present in CompoundLocation, dummy method for API compatibility."""
+        return None
+
+    @property
+    def ref_db(self):
+        """Not present in CompoundLocation, dummy method for API compatibility."""
+        return None
+
+    def extract(self, parent_sequence, references=None):
+        """Extract the sequence from supplied parent sequence using the CompoundLocation object.
+
+        The parent_sequence can be a Seq like object or a string, and will
+        generally return an object of the same type. The exception to this is
+        a MutableSeq as the parent sequence will return a Seq object.
+        If the location refers to other records, they must be supplied
+        in the optional dictionary references.
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqFeature import FeatureLocation, CompoundLocation
+        >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL")
+        >>> fl1 = FeatureLocation(2, 8)
+        >>> fl2 = FeatureLocation(10, 15)
+        >>> fl3 = CompoundLocation([fl1,fl2])
+        >>> fl3.extract(seq)
+        Seq('QHKAMILIVIC')
+
+        """
+        # This copes with mixed strand features & all on reverse:
+        parts = [
+            loc.extract(parent_sequence, references=references) for loc in self.parts
+        ]
+        f_seq = functools.reduce(lambda x, y: x + y, parts)
+        return f_seq
+
+
+class AbstractPosition:
+    """Abstract base class representing a position."""
+
+    def __repr__(self):
+        """Represent the AbstractPosition object as a string for debugging."""
+        return "%s(...)" % (self.__class__.__name__)
+
+
+class ExactPosition(int, AbstractPosition):
+    """Specify the specific position of a boundary.
+
+    Arguments:
+     - position - The position of the boundary.
+     - extension - An optional argument which must be zero since we don't
+       have an extension. The argument is provided so that the same number
+       of arguments can be passed to all position types.
+
+    In this case, there is no fuzziness associated with the position.
+
+    >>> p = ExactPosition(5)
+    >>> p
+    ExactPosition(5)
+    >>> print(p)
+    5
+
+    >>> isinstance(p, AbstractPosition)
+    True
+    >>> isinstance(p, int)
+    True
+
+    Integer comparisons and operations should work as expected:
+
+    >>> p == 5
+    True
+    >>> p < 6
+    True
+    >>> p <= 5
+    True
+    >>> p + 10
+    15
+
+    """
+
+    def __new__(cls, position, extension=0):
+        """Create an ExactPosition object."""
+        if extension != 0:
+            raise AttributeError(
+                "Non-zero extension %s for exact position." % extension
+            )
+        return int.__new__(cls, position)
+
+    # Must define this on Python 3.8 onwards because we redefine __repr__
+    def __str__(self):
+        """Return a representation of the ExactPosition object (with python counting)."""
+        return str(int(self))
+
+    def __repr__(self):
+        """Represent the ExactPosition object as a string for debugging."""
+        return "%s(%i)" % (self.__class__.__name__, int(self))
+
+    @property
+    def position(self):
+        """Legacy attribute to get position as integer (OBSOLETE)."""
+        return int(self)
+
+    @property
+    def extension(self):
+        """Not present in this object, return zero (OBSOLETE)."""
+        return 0
+
+    def _shift(self, offset):
+        """Return a copy of the position object with its location shifted (PRIVATE)."""
+        # By default preserve any subclass
+        return self.__class__(int(self) + offset)
+
+    def _flip(self, length):
+        """Return a copy of the location after the parent is reversed (PRIVATE)."""
+        # By default preserve any subclass
+        return self.__class__(length - int(self))
+
+
+class UncertainPosition(ExactPosition):
+    """Specify a specific position which is uncertain.
+
+    This is used in UniProt, e.g. ?222 for uncertain position 222, or in the
+    XML format explicitly marked as uncertain. Does not apply to GenBank/EMBL.
+    """
+
+    pass
+
+
+class UnknownPosition(AbstractPosition):
+    """Specify a specific position which is unknown (has no position).
+
+    This is used in UniProt, e.g. ? or in the XML as unknown.
+    """
+
+    def __repr__(self):
+        """Represent the UnknownPosition object as a string for debugging."""
+        return "%s()" % self.__class__.__name__
+
+    def __hash__(self):
+        """Return the hash value of the UnknownPosition object."""
+        return hash(None)
+
+    @property
+    def position(self):
+        """Legacy attribute to get location (None) (OBSOLETE)."""
+        return None
+
+    @property
+    def extension(self):  # noqa: D402
+        """Legacy attribute to get extension (zero) as integer (OBSOLETE)."""  # noqa: D402
+        return 0
+
+    def _shift(self, offset):
+        """Return a copy of the position object with its location shifted (PRIVATE)."""
+        return self
+
+    def _flip(self, length):
+        """Return a copy of the location after the parent is reversed (PRIVATE)."""
+        return self
+
+
+class WithinPosition(int, AbstractPosition):
+    """Specify the position of a boundary within some coordinates.
+
+    Arguments:
+    - position - The default integer position
+    - left - The start (left) position of the boundary
+    - right - The end (right) position of the boundary
+
+    This allows dealing with a location like ((1.4)..100). This
+    indicates that the start of the sequence is somewhere between 1
+    and 4. Since this is a start coordinate, it should acts like
+    it is at position 1 (or in Python counting, 0).
+
+    >>> p = WithinPosition(10, 10, 13)
+    >>> p
+    WithinPosition(10, left=10, right=13)
+    >>> print(p)
+    (10.13)
+    >>> int(p)
+    10
+
+    Basic integer comparisons and operations should work as though
+    this were a plain integer:
+
+    >>> p == 10
+    True
+    >>> p in [9, 10, 11]
+    True
+    >>> p < 11
+    True
+    >>> p + 10
+    20
+
+    >>> isinstance(p, WithinPosition)
+    True
+    >>> isinstance(p, AbstractPosition)
+    True
+    >>> isinstance(p, int)
+    True
+
+    Note this also applies for comparison to other position objects,
+    where again the integer behaviour is used:
+
+    >>> p == 10
+    True
+    >>> p == ExactPosition(10)
+    True
+    >>> p == BeforePosition(10)
+    True
+    >>> p == AfterPosition(10)
+    True
+
+    If this were an end point, you would want the position to be 13:
+
+    >>> p2 = WithinPosition(13, 10, 13)
+    >>> p2
+    WithinPosition(13, left=10, right=13)
+    >>> print(p2)
+    (10.13)
+    >>> int(p2)
+    13
+    >>> p2 == 13
+    True
+    >>> p2 == ExactPosition(13)
+    True
+
+    The old legacy properties of position and extension give the
+    starting/lower/left position as an integer, and the distance
+    to the ending/higher/right position as an integer. Note that
+    the position object will act like either the left or the right
+    end-point depending on how it was created:
+
+    >>> p.position == p2.position == 10
+    True
+    >>> p.extension == p2.extension == 3
+    True
+    >>> int(p) == int(p2)
+    False
+    >>> p == 10
+    True
+    >>> p2 == 13
+    True
+
+    """
+
+    def __new__(cls, position, left, right):
+        """Create a WithinPosition object."""
+        if not (position == left or position == right):
+            raise RuntimeError(
+                "WithinPosition: %r should match left %r or "
+                "right %r" % (position, left, right)
+            )
+        obj = int.__new__(cls, position)
+        obj._left = left
+        obj._right = right
+        return obj
+
+    def __getnewargs__(self):
+        """Return the arguments accepted by __new__.
+
+        Necessary to allow pickling and unpickling of class instances.
+        """
+        return (int(self), self._left, self._right)
+
+    def __repr__(self):
+        """Represent the WithinPosition object as a string for debugging."""
+        return "%s(%i, left=%i, right=%i)" % (
+            self.__class__.__name__,
+            int(self),
+            self._left,
+            self._right,
+        )
+
+    def __str__(self):
+        """Return a representation of the WithinPosition object (with python counting)."""
+        return "(%s.%s)" % (self._left, self._right)
+
+    @property
+    def position(self):
+        """Legacy attribute to get (left) position as integer (OBSOLETE)."""
+        return self._left
+
+    @property
+    def extension(self):  # noqa: D402
+        """Legacy attribute to get extension (from left to right) as an integer (OBSOLETE)."""  # noqa: D402
+        return self._right - self._left
+
+    def _shift(self, offset):
+        """Return a copy of the position object with its location shifted (PRIVATE)."""
+        return self.__class__(
+            int(self) + offset, self._left + offset, self._right + offset
+        )
+
+    def _flip(self, length):
+        """Return a copy of the location after the parent is reversed (PRIVATE)."""
+        return self.__class__(
+            length - int(self), length - self._right, length - self._left
+        )
+
+
+class BetweenPosition(int, AbstractPosition):
+    """Specify the position of a boundary between two coordinates (OBSOLETE?).
+
+    Arguments:
+     - position - The default integer position
+     - left - The start (left) position of the boundary
+     - right - The end (right) position of the boundary
+
+    This allows dealing with a position like 123^456. This
+    indicates that the start of the sequence is somewhere between
+    123 and 456. It is up to the parser to set the position argument
+    to either boundary point (depending on if this is being used as
+    a start or end of the feature). For example as a feature end:
+
+    >>> p = BetweenPosition(456, 123, 456)
+    >>> p
+    BetweenPosition(456, left=123, right=456)
+    >>> print(p)
+    (123^456)
+    >>> int(p)
+    456
+
+    Integer equality and comparison use the given position,
+
+    >>> p == 456
+    True
+    >>> p in [455, 456, 457]
+    True
+    >>> p > 300
+    True
+
+    The old legacy properties of position and extension give the
+    starting/lower/left position as an integer, and the distance
+    to the ending/higher/right position as an integer. Note that
+    the position object will act like either the left or the right
+    end-point depending on how it was created:
+
+    >>> p2 = BetweenPosition(123, left=123, right=456)
+    >>> p.position == p2.position == 123
+    True
+    >>> p.extension
+    333
+    >>> p2.extension
+    333
+    >>> p.extension == p2.extension == 333
+    True
+    >>> int(p) == int(p2)
+    False
+    >>> p == 456
+    True
+    >>> p2 == 123
+    True
+
+    Note this potentially surprising behaviour:
+
+    >>> BetweenPosition(123, left=123, right=456) == ExactPosition(123)
+    True
+    >>> BetweenPosition(123, left=123, right=456) == BeforePosition(123)
+    True
+    >>> BetweenPosition(123, left=123, right=456) == AfterPosition(123)
+    True
+
+    i.e. For equality (and sorting) the position objects behave like
+    integers.
+
+    """
+
+    def __new__(cls, position, left, right):
+        """Create a new instance in BetweenPosition object."""
+        assert position == left or position == right
+        obj = int.__new__(cls, position)
+        obj._left = left
+        obj._right = right
+        return obj
+
+    def __getnewargs__(self):
+        """Return the arguments accepted by __new__.
+
+        Necessary to allow pickling and unpickling of class instances.
+        """
+        return (int(self), self._left, self._right)
+
+    def __repr__(self):
+        """Represent the BetweenPosition object as a string for debugging."""
+        return "%s(%i, left=%i, right=%i)" % (
+            self.__class__.__name__,
+            int(self),
+            self._left,
+            self._right,
+        )
+
+    def __str__(self):
+        """Return a representation of the BetweenPosition object (with python counting)."""
+        return "(%s^%s)" % (self._left, self._right)
+
+    @property
+    def position(self):
+        """Legacy attribute to get (left) position as integer (OBSOLETE)."""
+        return self._left
+
+    @property
+    def extension(self):  # noqa: D402
+        """Legacy attribute to get extension (from left to right) as an integer (OBSOLETE)."""  # noqa: D402
+        return self._right - self._left
+
+    def _shift(self, offset):
+        """Return a copy of the position object with its location shifted (PRIVATE)."""
+        return self.__class__(
+            int(self) + offset, self._left + offset, self._right + offset
+        )
+
+    def _flip(self, length):
+        """Return a copy of the location after the parent is reversed (PRIVATE)."""
+        return self.__class__(
+            length - int(self), length - self._right, length - self._left
+        )
+
+
+class BeforePosition(int, AbstractPosition):
+    """Specify a position where the actual location occurs before it.
+
+    Arguments:
+     - position - The upper boundary of where the location can occur.
+     - extension - An optional argument which must be zero since we don't
+       have an extension. The argument is provided so that the same number
+       of arguments can be passed to all position types.
+
+    This is used to specify positions like (<10..100) where the location
+    occurs somewhere before position 10.
+
+    >>> p = BeforePosition(5)
+    >>> p
+    BeforePosition(5)
+    >>> print(p)
+    <5
+    >>> int(p)
+    5
+    >>> p + 10
+    15
+
+    Note this potentially surprising behaviour:
+
+    >>> p == ExactPosition(5)
+    True
+    >>> p == AfterPosition(5)
+    True
+
+    Just remember that for equality and sorting the position objects act
+    like integers.
+    """
+
+    # Subclasses int so can't use __init__
+    def __new__(cls, position, extension=0):
+        """Create a new instance in BeforePosition object."""
+        if extension != 0:
+            raise AttributeError(
+                "Non-zero extension %s for exact position." % extension
+            )
+        return int.__new__(cls, position)
+
+    @property
+    def position(self):
+        """Legacy attribute to get position as integer (OBSOLETE)."""
+        return int(self)
+
+    @property
+    def extension(self):  # noqa: D402
+        """Legacy attribute to get extension (zero) as integer (OBSOLETE)."""  # noqa: D402
+        return 0
+
+    def __repr__(self):
+        """Represent the location as a string for debugging."""
+        return "%s(%i)" % (self.__class__.__name__, int(self))
+
+    def __str__(self):
+        """Return a representation of the BeforePosition object (with python counting)."""
+        return "<%s" % self.position
+
+    def _shift(self, offset):
+        """Return a copy of the position object with its location shifted (PRIVATE)."""
+        return self.__class__(int(self) + offset)
+
+    def _flip(self, length):
+        """Return a copy of the location after the parent is reversed (PRIVATE)."""
+        return AfterPosition(length - int(self))
+
+
+class AfterPosition(int, AbstractPosition):
+    """Specify a position where the actual location is found after it.
+
+    Arguments:
+     - position - The lower boundary of where the location can occur.
+     - extension - An optional argument which must be zero since we don't
+       have an extension. The argument is provided so that the same number
+       of arguments can be passed to all position types.
+
+    This is used to specify positions like (>10..100) where the location
+    occurs somewhere after position 10.
+
+    >>> p = AfterPosition(7)
+    >>> p
+    AfterPosition(7)
+    >>> print(p)
+    >7
+    >>> int(p)
+    7
+    >>> p + 10
+    17
+
+    >>> isinstance(p, AfterPosition)
+    True
+    >>> isinstance(p, AbstractPosition)
+    True
+    >>> isinstance(p, int)
+    True
+
+    Note this potentially surprising behaviour:
+
+    >>> p == ExactPosition(7)
+    True
+    >>> p == BeforePosition(7)
+    True
+
+    Just remember that for equality and sorting the position objects act
+    like integers.
+    """
+
+    # Subclasses int so can't use __init__
+    def __new__(cls, position, extension=0):
+        """Create a new instance of the AfterPosition object."""
+        if extension != 0:
+            raise AttributeError(
+                "Non-zero extension %s for exact position." % extension
+            )
+        return int.__new__(cls, position)
+
+    @property
+    def position(self):
+        """Legacy attribute to get position as integer (OBSOLETE)."""
+        return int(self)
+
+    @property
+    def extension(self):  # noqa: D402
+        """Legacy attribute to get extension (zero) as integer (OBSOLETE)."""  # noqa: D402
+        return 0
+
+    def __repr__(self):
+        """Represent the location as a string for debugging."""
+        return "%s(%i)" % (self.__class__.__name__, int(self))
+
+    def __str__(self):
+        """Return a representation of the AfterPosition object (with python counting)."""
+        return ">%s" % self.position
+
+    def _shift(self, offset):
+        """Return a copy of the position object with its location shifted (PRIVATE)."""
+        return self.__class__(int(self) + offset)
+
+    def _flip(self, length):
+        """Return a copy of the location after the parent is reversed (PRIVATE)."""
+        return BeforePosition(length - int(self))
+
+
+class OneOfPosition(int, AbstractPosition):
+    """Specify a position where the location can be multiple positions.
+
+    This models the GenBank 'one-of(1888,1901)' function, and tries
+    to make this fit within the Biopython Position models. If this was
+    a start position it should act like 1888, but as an end position 1901.
+
+    >>> p = OneOfPosition(1888, [ExactPosition(1888), ExactPosition(1901)])
+    >>> p
+    OneOfPosition(1888, choices=[ExactPosition(1888), ExactPosition(1901)])
+    >>> int(p)
+    1888
+
+    Integer comparisons and operators act like using int(p),
+
+    >>> p == 1888
+    True
+    >>> p <= 1888
+    True
+    >>> p > 1888
+    False
+    >>> p + 100
+    1988
+
+    >>> isinstance(p, OneOfPosition)
+    True
+    >>> isinstance(p, AbstractPosition)
+    True
+    >>> isinstance(p, int)
+    True
+
+    The old legacy properties of position and extension give the
+    starting/lowest/left-most position as an integer, and the
+    distance to the ending/highest/right-most position as an integer.
+    Note that the position object will act like one of the list of
+    possible locations depending on how it was created:
+
+    >>> p2 = OneOfPosition(1901, [ExactPosition(1888), ExactPosition(1901)])
+    >>> p.position == p2.position == 1888
+    True
+    >>> p.extension == p2.extension == 13
+    True
+    >>> int(p) == int(p2)
+    False
+    >>> p == 1888
+    True
+    >>> p2 == 1901
+    True
+
+    """
+
+    def __new__(cls, position, choices):
+        """Initialize with a set of possible positions.
+
+        position_list is a list of AbstractPosition derived objects,
+        specifying possible locations.
+
+        position is an integer specifying the default behaviour.
+        """
+        if position not in choices:
+            raise ValueError(
+                "OneOfPosition: %r should match one of %r" % (position, choices)
+            )
+        obj = int.__new__(cls, position)
+        obj.position_choices = choices
+        return obj
+
+    def __getnewargs__(self):
+        """Return the arguments accepted by __new__.
+
+        Necessary to allow pickling and unpickling of class instances.
+        """
+        return (int(self), self.position_choices)
+
+    @property
+    def position(self):
+        """Legacy attribute to get (left) position as integer (OBSOLETE)."""
+        return min(int(pos) for pos in self.position_choices)
+
+    @property
+    def extension(self):
+        """Legacy attribute to get extension as integer (OBSOLETE)."""
+        positions = [int(pos) for pos in self.position_choices]
+        return max(positions) - min(positions)
+
+    def __repr__(self):
+        """Represent the OneOfPosition object as a string for debugging."""
+        return "%s(%i, choices=%r)" % (
+            self.__class__.__name__,
+            int(self),
+            self.position_choices,
+        )
+
+    def __str__(self):
+        """Return a representation of the OneOfPosition object (with python counting)."""
+        out = "one-of("
+        for position in self.position_choices:
+            out += "%s," % position
+        # replace the last comma with the closing parenthesis
+        return out[:-1] + ")"
+
+    def _shift(self, offset):
+        """Return a copy of the position object with its location shifted (PRIVATE)."""
+        return self.__class__(
+            int(self) + offset, [p._shift(offset) for p in self.position_choices]
+        )
+
+    def _flip(self, length):
+        """Return a copy of the location after the parent is reversed (PRIVATE)."""
+        return self.__class__(
+            length - int(self), [p._flip(length) for p in self.position_choices[::-1]]
+        )
+
+
+class PositionGap:
+    """Simple class to hold information about a gap between positions."""
+
+    def __init__(self, gap_size):
+        """Intialize with a position object containing the gap information."""
+        self.gap_size = gap_size
+
+    def __repr__(self):
+        """Represent the position gap as a string for debugging."""
+        return "%s(%r)" % (self.__class__.__name__, self.gap_size)
+
+    def __str__(self):
+        """Return a representation of the PositionGap object (with python counting)."""
+        return "gap(%s)" % self.gap_size
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SeqIO/AbiIO.py b/code/lib/Bio/SeqIO/AbiIO.py
new file mode 100644
index 0000000..31ca8cc
--- /dev/null
+++ b/code/lib/Bio/SeqIO/AbiIO.py
@@ -0,0 +1,602 @@
+# Copyright 2011 by Wibowo Arindrarto (w.arindrarto@gmail.com)
+# Revisions copyright 2011-2016 by Peter Cock.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO parser for the ABI format.
+
+ABI is the format used by Applied Biosystem's sequencing machines to store
+sequencing results.
+
+For more details on the format specification, visit:
+http://www6.appliedbiosystems.com/support/software_community/ABIF_File_Format.pdf
+
+"""
+import datetime
+import struct
+import sys
+
+from os.path import basename
+
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import SequenceIterator
+
+
+# dictionary for determining which tags goes into SeqRecord annotation
+# each key is tag_name + tag_number
+# if a tag entry needs to be added, just add its key and its key
+# for the annotations dictionary as the value
+# dictionary for tags that require preprocessing before use in creating
+# seqrecords
+_EXTRACT = {
+    "TUBE1": "sample_well",
+    "DySN1": "dye",
+    "GTyp1": "polymer",
+    "MODL1": "machine_model",
+}
+
+
+# Complete data structure representing 98% of the API. The general section
+# represents the part of the API that's common to ALL instruments, whereas the
+# instrument specific sections are labelled as they are in the ABIF spec
+#
+# Keys don't seem to clash from machine to machine, so when we parse, we look
+# for ANY key, and store that in the raw ABIF data structure attached to the
+# annotations, with the assumption that anyone parsing the data can look up the
+# spec themself
+#
+# Key definitions are retained in case end users want "nice" labels pre-made
+# for them for all of the available fields.
+_INSTRUMENT_SPECIFIC_TAGS = {}
+
+# fmt: off
+_INSTRUMENT_SPECIFIC_TAGS["general"] = {
+    "APFN2": "Sequencing Analysis parameters file name",
+    "APXV1": "Analysis Protocol XML schema version",
+    "APrN1": "Analysis Protocol settings name",
+    "APrV1": "Analysis Protocol settings version",
+    "APrX1": "Analysis Protocol XML string",
+    "CMNT1": "Sample Comment",
+    "CTID1": "Container Identifier, a.k.a. plate barcode",
+    "CTNM1": "Container name, usually identical to CTID, but not necessarily so",
+    "CTTL1": "Comment Title",
+    "CpEP1": "Capillary type electrophoresis. 1 for a capillary based machine. 0 for a slab gel based machine.",
+    "DATA1": "Channel 1 raw data",
+    "DATA2": "Channel 2 raw data",
+    "DATA3": "Channel 3 raw data",
+    "DATA4": "Channel 4 raw data",
+    "DATA5": "Short Array holding measured volts/10 (EP voltage) during run",
+    "DATA6": "Short Array holding measured milliAmps trace (EP current) during run",
+    "DATA7": "Short Array holding measured milliWatts trace (Laser EP Power) during run",
+    "DATA8": "Short Array holding measured oven Temperature (polymer temperature) trace during run",
+    "DATA9": "Channel 9 processed data",
+    "DATA10": "Channel 10 processed data",
+    "DATA11": "Channel 11 processed data",
+    "DATA12": "Channel 12 processed data",
+    # Prism 3100/3100-Avant may provide DATA105
+    #          3130/3130-XL may provide DATA105
+    # 3530/3530-XL may provide DATA105-199, 9-12, 205-299
+    "DSam1": "Downsampling factor",
+    "DySN1": "Dye set name",
+    "Dye#1": "Number of dyes",
+    "DyeN1": "Dye 1 name",
+    "DyeN2": "Dye 2 name",
+    "DyeN3": "Dye 3 name",
+    "DyeN4": "Dye 4 name",
+    "DyeW1": "Dye 1 wavelength",
+    "DyeW2": "Dye 2 wavelength",
+    "DyeW3": "Dye 3 wavelength",
+    "DyeW4": "Dye 4 wavelength",
+    # 'DyeN5-N': 'Dye 5-N Name',
+    # 'DyeW5-N': 'Dye 5-N Wavelength',
+    "EPVt1": "Electrophoresis voltage setting (volts)",
+    "EVNT1": "Start Run event",
+    "EVNT2": "Stop Run event",
+    "EVNT3": "Start Collection event",
+    "EVNT4": "Stop Collection event",
+    "FWO_1": 'Base Order. Sequencing Analysis Filter wheel order. Fixed for 3500 at "GATC"',
+    "GTyp1": "Gel or polymer Type",
+    "InSc1": "Injection time (seconds)",
+    "InVt1": "Injection voltage (volts)",
+    "LANE1": "Lane/Capillary",
+    "LIMS1": "Sample tracking ID",
+    "LNTD1": "Length to detector",
+    "LsrP1": "Laser Power setting (micro Watts)",
+    "MCHN1": "Instrument name and serial number",
+    "MODF1": "Data collection module file",
+    "MODL1": "Model number",
+    "NAVG1": "Pixels averaged per lane",
+    "NLNE1": "Number of capillaries",
+    "OfSc1": "List of scans that are marked off scale in Collection. (optional)",
+    # OvrI and OrvV are listed as "1-N", and "One for each dye (unanalyzed
+    # and/or analyzed data)"
+    "OvrI1": "List of scan number indexes that have values greater than 32767 but did not "
+             "saturate the camera. In Genemapper samples, this can have indexes with "
+             "values greater than 32000. In sequencing samples, this cannot have "
+             "indexes with values greater than 32000.",
+    "OvrI2": "List of scan number indexes that have values greater than 32767 but did not "
+             "saturate the camera. In Genemapper samples, this can have indexes with "
+             "values greater than 32000. In sequencing samples, this cannot have "
+             "indexes with values greater than 32000.",
+    "OvrI3": "List of scan number indexes that have values greater than 32767 but did not "
+             "saturate the camera. In Genemapper samples, this can have indexes with "
+             "values greater than 32000. In sequencing samples, this cannot have "
+             "indexes with values greater than 32000.",
+    "OvrI4": "List of scan number indexes that have values greater than 32767 but did not "
+             "saturate the camera. In Genemapper samples, this can have indexes with "
+             "values greater than 32000. In sequencing samples, this cannot have "
+             "indexes with values greater than 32000.",
+    "OvrV1": "List of color data values found at the locations listed in the OvrI tag. "
+             "There must be exactly as many numbers in this array as in the OvrI array.",
+    "OvrV2": "List of color data values found at the locations listed in the OvrI tag. "
+             "There must be exactly as many numbers in this array as in the OvrI array.",
+    "OvrV3": "List of color data values found at the locations listed in the OvrI tag. "
+             "There must be exactly as many numbers in this array as in the OvrI array.",
+    "OvrV4": "List of color data values found at the locations listed in the OvrI tag. "
+             "There must be exactly as many numbers in this array as in the OvrI array.",
+    "PDMF1": "Sequencing Analysis Mobility file name chosen in collection",
+    "RMXV1": "Run Module XML schema version",
+    "RMdN1": "Run Module name (same as MODF)",
+    "RMdX1": "Run Module XML string",
+    "RPrN1": "Run Protocol name",
+    "RPrV1": "Run Protocol version",
+    "RUND1": "Run Started Date",
+    "RUND2": "Run Stopped Date",
+    "RUND3": "Data Collection Started Date",
+    "RUND4": "Data Collection Stopped date",
+    "RUNT1": "Run Started Time",
+    "RUNT2": "Run Stopped Time",
+    "RUNT3": "Data Collection Started Time",
+    "RUNT4": "Data Collection Stopped Time",
+    "Rate1": "Scanning Rate. Milliseconds per frame.",
+    "RunN1": "Run Name",
+    "SCAN1": "Number of scans",
+    "SMED1": "Polymer lot expiration date",
+    "SMLt1": "Polymer lot number",
+    "SMPL1": "Sample name",
+    "SVER1": "Data collection software version",
+    "SVER3": "Data collection firmware version",
+    "Satd1": "Array of longs representing the scan numbers of data points, which are flagged as saturated by data collection (optional)",
+    "Scal1": "Rescaling divisor for color data",
+    "Scan1": "Number of scans (legacy - use SCAN)",
+    "TUBE1": "Well ID",
+    "Tmpr1": "Run temperature setting",
+    "User1": "Name of user who created the plate (optional)",
+}
+
+#  No instrument specific tags
+# _INSTRUMENT_SPECIFIC_TAGS['abi_prism_3100/3100-Avant'] = {
+# }
+
+_INSTRUMENT_SPECIFIC_TAGS["abi_3130/3130xl"] = {
+    "CTOw1": "Container owner",
+    "HCFG1": "Instrument Class",
+    "HCFG2": "Instrument Family",
+    "HCFG3": "Official Instrument Name",
+    "HCFG4": "Instrument Parameters",
+    "RMdVa1": "Run Module version",
+}
+
+_INSTRUMENT_SPECIFIC_TAGS["abi_3530/3530xl"] = {
+    "AAct1": "Primary Analysis Audit Active indication. True if system auditing was enabled during the last write of this file, "
+             "false if system auditing was disabled.",
+    "ABED1": "Anode buffer expiration date using ISO 8601 format using the patterns YYYY-MM-DDTHH:MM:SS.ss+/-HH:MM. Hundredths of a second are optional.",
+    "ABID1": "Anode buffer tray first installed date",
+    "ABLt1": "Anode buffer lot number",
+    "ABRn1": "Number of runs (injections) processed with the current Anode Buffer (runs allowed - runs remaining)",
+    "ABTp1": "Anode buffer type",
+    "AEPt1": "Analysis Ending scan number for basecalling on initial analysis",
+    "AEPt2": "Analysis Ending scan number for basecalling on last analysis",
+    "APCN1": "Amplicon name",
+    "ARTN1": "Analysis Return code. Produced only by 5 Prime basecaller 1.0b3",
+    "ASPF1": "Flag to indicate whether adaptive processing worked or not",
+    "ASPt1": "Analysis Starting scan number for first analysis",
+    "ASPt2": "Analysis Starting scan number for last analysis",
+    "AUDT2": "Audit log used across 3500 software (optional)",
+    "AVld1": "Assay validation flag (true or false)",
+    "AmbT1": "Record of ambient temperature readings",
+    "AsyC1": "The assay contents (xml format)",
+    "AsyN1": "The assay name",
+    "AsyV1": "The assay version",
+    "B1Pt1": "Reference scan number for mobility and spacing curves for first analysis",
+    "B1Pt2": "Reference scan number for mobility and spacing curves for last analysis",
+    "BCTS1": "Basecaller timestamp. Time of completion of most recent analysis",
+    "BcRn1": "Basecalling qc code",
+    "BcRs1": "Basecalling warnings, a concatenated comma separated string",
+    "BcRs2": "Basecalling errors, a concatenated comma separated string",
+    "CAED1": "Capillary array expiration",
+    "CALt1": "Capillary array lot number",
+    "CARn1": "Number of injections processed (including the one of which this sample was a part) through the capillary array",
+    "CASN1": "Capillary array serial number",
+    "CBED1": "Cathode buffer expiration date",
+    "CBID1": "Cathode buffer tray first installed date",
+    "CBLt1": "Cathode buffer lot number",
+    "CBRn1": "Number of runs (injections) processed with the current Cathode Buffer (runs allowed - runs remaining)",
+    "CBTp1": "Cathode buffer type",
+    "CLRG1": "Start of the clear range (inclusive).",
+    "CLRG2": "Clear range length",
+    "CRLn1": "Contiguous read length",
+    "CRLn2": 'One of "Pass", "Fail", or "Check"',
+    "CTOw1": "The name entered as the Owner of a plate, in the plate editor",
+    "CkSm1": "File checksum",
+    "DCEv1": "A list of door-close events, separated by semicolon. Door open events are generally paired with door close events.",
+    "DCHT1": "Reserved for backward compatibility. The detection cell heater temperature setting from the Run Module. Not used for 3500.",
+    "DOEv1": "A list of door-open events, separated by semicolon. Door close events are generally paired with door open events.",
+    "ESig2": "Electronic signature record used across 3500 software",
+    "FTab1": "Feature table. Can be created by Nibbler for Clear Range.",
+    "FVoc1": "Feature table vocabulary. Can be created by Nibbler for Clear Range.",
+    "Feat1": "Features. Can be created by Nibbler for Clear Range.",
+    "HCFG1": "The Instrument Class. All upper case, no spaces. Initial valid value: CE",
+    "HCFG2": "The Instrument Family. All upper case, no spaces. Valid values: 31XX or 37XX for UDC, 35XX (for 3500)",
+    "HCFG3": "The official instrument name. Mixed case, minus any special formatting. Initial valid values: 3130, 3130xl, 3730, 3730xl, 3500, 3500xl.",
+    "HCFG4": "Instrument parameters. Contains key-value pairs of instrument configuration information, separated by semicolons. "
+             "Four parameters are included initially: UnitID=<UNITD number>, CPUBoard=<board type>, "
+             "ArraySize=<# of capillaries>, SerialNumber=<Instrument Serial#>.",
+    "InjN1": "Injection name",
+    "LAST1": "Parameter settings information",
+    "NOIS1": "The estimate of rms baseline noise (S/N ratio) for each dye for a successfully analyzed sample. "
+             "Corresponds in order to the raw data in tags DATA 1-4. KB basecaller only.",
+    "P1AM1": "Amplitude of primary peak, which is not necessarily equal to corresponding signal strength at that position",
+    "P1RL1": "Deviation of primary peak position from (PLoc,2), times 100, rounded to integer",
+    "P1WD1": "Full-width Half-max of primary peak, times 100, rounded to integer. "
+             "Corresponding signal intensity is not necessarily equal to one half of primary peak amplitude",
+    "P2AM1": "Amplitude of secondary peak, which is not necessarily equal to corresponding signal strength at that position",
+    "P2BA1": "Base of secondary peak",
+    "P2RL1": "Deviation of secondary peak position from (PLoc,2), times 100, rounded to integer",
+    "PBAS1": "Array of sequence characters edited by user",
+    "PBAS2": "Array of sequence characters as called by Basecaller",
+    "PCON1": "Array of quality Values (0-255) as edited by user",
+    "PCON2": "Array of quality values (0-255) as called by Basecaller",
+    "PDMF2": "Mobility file name chosen in most recent analysis (identical to PDMF1)",
+    "PLOC1": "Array of peak locations edited by user",
+    "PLOC2": "Array of peak locations as called by Basecaller",
+    "PRJT1": "SeqScape 2.0 project template name",
+    "PROJ4": "SeqScape 2.0 project name",
+    "PSZE1": "Plate size. The number of sample positions in the container. Current allowed values: 96, 384.",
+    "PTYP1": "Plate type. Current allowed values: 96-Well, 384-Well.",
+    "PuSc1": "Median pupscore",
+    "QV201": "QV20+ value",
+    "QV202": 'One of "Pass", "Fail", or "Check"',
+    "QcPa1": "QC parameters",
+    "QcRn1": "Trimming and QC code",
+    "QcRs1": "QC warnings, a concatenated comma separated string",
+    "QcRs2": "QC errors, a concatenated comma separated string",
+    "RGOw1": "The name entered as the Owner of a Results Group, in the Results Group Editor. Implemented as the user name from the results group.",
+    "RInj1": "Reinjection number. The reinjection number that this sample belongs to. Not present if there was no reinjection.",
+    "RNmF1": "Raman normalization factor",
+    "RevC1": "for whether the sequence has been complemented",
+    "RunN1": "Run name (which, for 3500, is different from injection name)",
+    "S/N%1": "Signal strength for each dye",
+    "SMID1": "Polymer first installed date",
+    "SMRn1": "Number of runs (injections) processed with the current polymer (runs allowed - runs remaining)",
+    "SPAC1": "Average peak spacing used in last analysis",
+    "SPAC2": "Basecaller name - corresponds to name of bcp file.",
+    "SPAC3": "Average peak spacing last calculated by the Basecaller.",
+    "SPEC1": "Sequencing Analysis Specimen Name",
+    "SVER2": "Basecaller version number",
+    "SVER4": "Sample File Format Version String",
+    "ScPa1": "The parameter string of size caller",
+    "ScSt1": "Raw data start point. Set to 0 for 3500 data collection.",
+    "SpeN1": "Active spectral calibration name",
+    "TrPa1": "Timming parameters",
+    "TrSc1": "Trace score.",
+    "TrSc2": 'One of "Pass", "Fail", or "Check"',
+    "phAR1": "Trace peak aria ratio",
+    "phCH1": 'Chemistry type ("term", "prim", "unknown"), based on DYE_1 information',
+    "phDY1": 'Dye ("big", "d-rhod", "unknown"), based on mob file information',
+    "phQL1": "Maximum Quality Value",
+    "phTR1": "Set Trim region",
+    "phTR2": "Trim probability",
+}
+
+_INSTRUMENT_SPECIFIC_TAGS["abi_3730/3730xl"] = {
+    "BufT1": "Buffer tray heater temperature (degrees C)",
+}
+# fmt: on
+
+# dictionary for data unpacking format
+_BYTEFMT = {
+    1: "b",  # byte
+    2: "s",  # char
+    3: "H",  # word
+    4: "h",  # short
+    5: "i",  # long
+    6: "2i",  # rational, legacy unsupported
+    7: "f",  # float
+    8: "d",  # double
+    10: "h2B",  # date
+    11: "4B",  # time
+    12: "2i2b",  # thumb
+    13: "B",  # bool
+    14: "2h",  # point, legacy unsupported
+    15: "4h",  # rect, legacy unsupported
+    16: "2i",  # vPoint, legacy unsupported
+    17: "4i",  # vRect, legacy unsupported
+    18: "s",  # pString
+    19: "s",  # cString
+    20: "2i",  # tag, legacy unsupported
+}
+# header data structure (excluding 4 byte ABIF marker)
+_HEADFMT = ">H4sI2H3I"
+# directory data structure
+_DIRFMT = ">4sI2H4I"
+
+__global_tag_listing = []
+for tag in _INSTRUMENT_SPECIFIC_TAGS.values():
+    __global_tag_listing += tag.keys()
+
+
+def _get_string_tag(opt_bytes_value, default=None):
+    """Return the string value of the given an optional raw bytes tag value.
+
+    If the bytes value is None, return the given default value.
+
+    """
+    if opt_bytes_value is None:
+        return default
+    try:
+        return opt_bytes_value.decode()
+    except UnicodeDecodeError:
+        return opt_bytes_value.decode(encoding=sys.getdefaultencoding())
+
+
+class AbiIterator(SequenceIterator):
+    """Parser for Abi files."""
+
+    def __init__(self, source, trim=False):
+        """Return an iterator for the Abi file format."""
+        self.trim = trim
+        super().__init__(source, mode="b", fmt="ABI")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        # check if input file is a valid Abi file
+        marker = handle.read(4)
+        if not marker:
+            # handle empty file gracefully
+            raise ValueError("Empty file.")
+
+        if marker != b"ABIF":
+            raise OSError("File should start ABIF, not %r" % marker)
+        records = self.iterate(handle)
+        return records
+
+    def iterate(self, handle):
+        """Parse the file and generate SeqRecord objects."""
+        # dirty hack for handling time information
+        times = {"RUND1": "", "RUND2": "", "RUNT1": "", "RUNT2": ""}
+
+        # initialize annotations
+        annot = dict(zip(_EXTRACT.values(), [None] * len(_EXTRACT)))
+
+        # parse header and extract data from directories
+        header = struct.unpack(_HEADFMT, handle.read(struct.calcsize(_HEADFMT)))
+
+        # Set default sample ID value, which we expect to be present in most
+        # cases in the SMPL1 tag, but may be missing.
+        sample_id = "<unknown id>"
+
+        raw = {}
+        seq = qual = None
+        for tag_name, tag_number, tag_data in _abi_parse_header(header, handle):
+            key = tag_name + str(tag_number)
+
+            raw[key] = tag_data
+
+            # PBAS2 is base-called sequence, only available in 3530
+            if key == "PBAS2":
+                seq = tag_data.decode()
+            # PCON2 is quality values of base-called sequence
+            elif key == "PCON2":
+                qual = [ord(val) for val in tag_data.decode()]
+            # SMPL1 is sample id entered before sequencing run, it must be
+            # a string.
+            elif key == "SMPL1":
+                sample_id = _get_string_tag(tag_data)
+            elif key in times:
+                times[key] = tag_data
+            else:
+                if key in _EXTRACT:
+                    annot[_EXTRACT[key]] = tag_data
+
+        # set time annotations
+        annot["run_start"] = "%s %s" % (times["RUND1"], times["RUNT1"])
+        annot["run_finish"] = "%s %s" % (times["RUND2"], times["RUNT2"])
+
+        # raw data (for advanced end users benefit)
+        annot["abif_raw"] = raw
+
+        # fsa check
+        is_fsa_file = all(tn not in raw for tn in ("PBAS1", "PBAS2"))
+
+        if is_fsa_file:
+            try:
+                file_name = basename(handle.name).replace(".fsa", "")
+            except AttributeError:
+                file_name = ""
+
+            sample_id = _get_string_tag(raw.get("LIMS1"), sample_id)
+            description = _get_string_tag(raw.get("CTID1"), "<unknown description>")
+            record = SeqRecord(
+                Seq(""),
+                id=sample_id,
+                name=file_name,
+                description=description,
+                annotations=annot,
+            )
+
+        else:
+            # use the file name as SeqRecord.name if available
+            try:
+                file_name = basename(handle.name).replace(".ab1", "")
+            except AttributeError:
+                file_name = ""
+            record = SeqRecord(
+                Seq(seq),
+                id=sample_id,
+                name=file_name,
+                description="",
+                annotations=annot,
+            )
+        if qual:
+            # Expect this to be missing for FSA files.
+            record.letter_annotations["phred_quality"] = qual
+        elif not is_fsa_file and not qual and self.trim:
+            raise ValueError(
+                "The 'abi-trim' format can not be used for files without"
+                " quality values."
+            )
+
+        if self.trim and not is_fsa_file:
+            record = _abi_trim(record)
+
+        record.annotations["molecule_type"] = "DNA"
+        yield record
+
+
+def _AbiTrimIterator(handle):
+    """Return an iterator for the Abi file format that yields trimmed SeqRecord objects (PRIVATE)."""
+    return AbiIterator(handle, trim=True)
+
+
+def _abi_parse_header(header, handle):
+    """Return directory contents (PRIVATE)."""
+    # header structure (after ABIF marker):
+    # file version, tag name, tag number,
+    # element type code, element size, number of elements
+    # data size, data offset, handle (not file handle)
+    head_elem_size = header[4]
+    head_elem_num = header[5]
+    head_offset = header[7]
+    index = 0
+
+    while index < head_elem_num:
+        start = head_offset + index * head_elem_size
+        # add directory offset to tuple
+        # to handle directories with data size <= 4 bytes
+        handle.seek(start)
+        dir_entry = struct.unpack(_DIRFMT, handle.read(struct.calcsize(_DIRFMT))) + (
+            start,
+        )
+        index += 1
+        # only parse desired dirs
+        key = dir_entry[0].decode()
+        key += str(dir_entry[1])
+
+        tag_name = dir_entry[0].decode()
+        tag_number = dir_entry[1]
+        elem_code = dir_entry[2]
+        elem_num = dir_entry[4]
+        data_size = dir_entry[5]
+        data_offset = dir_entry[6]
+        tag_offset = dir_entry[8]
+        # if data size <= 4 bytes, data is stored inside tag
+        # so offset needs to be changed
+        if data_size <= 4:
+            data_offset = tag_offset + 20
+        handle.seek(data_offset)
+        data = handle.read(data_size)
+        yield tag_name, tag_number, _parse_tag_data(elem_code, elem_num, data)
+
+
+def _abi_trim(seq_record):
+    """Trims the sequence using Richard Mott's modified trimming algorithm (PRIVATE).
+
+    Arguments:
+        - seq_record - SeqRecord object to be trimmed.
+
+    Trimmed bases are determined from their segment score, which is a
+    cumulative sum of each base's score. Base scores are calculated from
+    their quality values.
+
+    More about the trimming algorithm:
+    http://www.phrap.org/phredphrap/phred.html
+    http://resources.qiagenbioinformatics.com/manuals/clcgenomicsworkbench/650/Quality_trimming.html
+    """
+    start = False  # flag for starting position of trimmed sequence
+    segment = 20  # minimum sequence length
+    trim_start = 0  # init start index
+    cutoff = 0.05  # default cutoff value for calculating base score
+
+    if len(seq_record) <= segment:
+        return seq_record
+    else:
+        # calculate base score
+        score_list = [
+            cutoff - (10 ** (qual / -10.0))
+            for qual in seq_record.letter_annotations["phred_quality"]
+        ]
+
+        # calculate cumulative score
+        # if cumulative value < 0, set it to 0
+        # first value is set to 0, because of the assumption that
+        # the first base will always be trimmed out
+        cummul_score = [0]
+        for i in range(1, len(score_list)):
+            score = cummul_score[-1] + score_list[i]
+            if score < 0:
+                cummul_score.append(0)
+            else:
+                cummul_score.append(score)
+                if not start:
+                    # trim_start = value when cumulative score is first > 0
+                    trim_start = i
+                    start = True
+
+        # trim_finish = index of highest cumulative score,
+        # marking the end of sequence segment with highest cumulative score
+        trim_finish = cummul_score.index(max(cummul_score))
+
+        return seq_record[trim_start:trim_finish]
+
+
+def _parse_tag_data(elem_code, elem_num, raw_data):
+    """Return single data value (PRIVATE).
+
+    Arguments:
+     - elem_code - What kind of data
+     - elem_num - How many data points
+     - raw_data - abi file object from which the tags would be unpacked
+
+    """
+    if elem_code in _BYTEFMT:
+        # because '>1s' unpack differently from '>s'
+        if elem_num == 1:
+            num = ""
+        else:
+            num = str(elem_num)
+        fmt = ">" + num + _BYTEFMT[elem_code]
+
+        assert len(raw_data) == struct.calcsize(fmt)
+        data = struct.unpack(fmt, raw_data)
+
+        # no need to use tuple if len(data) == 1
+        # also if data is date / time
+        if elem_code not in [10, 11] and len(data) == 1:
+            data = data[0]
+
+        # account for different data types
+        if elem_code == 2:
+            return data
+        elif elem_code == 10:
+            return str(datetime.date(*data))
+        elif elem_code == 11:
+            return str(datetime.time(*data[:3]))
+        elif elem_code == 13:
+            return bool(data)
+        elif elem_code == 18:
+            return data[1:]
+        elif elem_code == 19:
+            return data[:-1]
+        else:
+            return data
+    else:
+        return None
+
+
+if __name__ == "__main__":
+    pass
diff --git a/code/lib/Bio/SeqIO/AceIO.py b/code/lib/Bio/SeqIO/AceIO.py
new file mode 100644
index 0000000..85bb0f5
--- /dev/null
+++ b/code/lib/Bio/SeqIO/AceIO.py
@@ -0,0 +1,101 @@
+# Copyright 2008-2015 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the "ace" file format.
+
+You are expected to use this module via the Bio.SeqIO functions.
+See also the Bio.Sequencing.Ace module which offers more than just accessing
+the contig consensus sequences in an ACE file as SeqRecord objects.
+"""
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from Bio.Sequencing import Ace
+
+
+def AceIterator(source):
+    """Return SeqRecord objects from an ACE file.
+
+    This uses the Bio.Sequencing.Ace module to do the hard work.  Note that
+    by iterating over the file in a single pass, we are forced to ignore any
+    WA, CT, RT or WR footer tags.
+
+    Ace files include the base quality for each position, which are taken
+    to be PHRED style scores. Just as if you had read in a FASTQ or QUAL file
+    using PHRED scores using Bio.SeqIO, these are stored in the SeqRecord's
+    letter_annotations dictionary under the "phred_quality" key.
+
+    >>> from Bio import SeqIO
+    >>> with open("Ace/consed_sample.ace") as handle:
+    ...     for record in SeqIO.parse(handle, "ace"):
+    ...         print("%s %s... %i" % (record.id, record.seq[:10], len(record)))
+    ...         print(max(record.letter_annotations["phred_quality"]))
+    Contig1 agccccgggc... 1475
+    90
+
+    However, ACE files do not include a base quality for any gaps in the
+    consensus sequence, and these are represented in Biopython with a quality
+    of zero. Using zero is perhaps misleading as there may be very strong
+    evidence to support the gap in the consensus. Previous versions of
+    Biopython therefore used None instead, but this complicated usage, and
+    prevented output of the gapped sequence as FASTQ format.
+
+    >>> from Bio import SeqIO
+    >>> with open("Ace/contig1.ace") as handle:
+    ...     for record in SeqIO.parse(handle, "ace"):
+    ...         print("%s ...%s..." % (record.id, record.seq[85:95]))
+    ...         print(record.letter_annotations["phred_quality"][85:95])
+    ...         print(max(record.letter_annotations["phred_quality"]))
+    Contig1 ...AGAGG-ATGC...
+    [57, 57, 54, 57, 57, 0, 57, 72, 72, 72]
+    90
+    Contig2 ...GAATTACTAT...
+    [68, 68, 68, 68, 68, 68, 68, 68, 68, 68]
+    90
+
+    """
+    for ace_contig in Ace.parse(source):
+        # Convert the ACE contig record into a SeqRecord...
+        consensus_seq_str = ace_contig.sequence
+        if "*" in consensus_seq_str:
+            # For consistency with most other file formats, map
+            # any * gaps into - gaps.
+            assert "-" not in consensus_seq_str
+            consensus_seq = Seq(consensus_seq_str.replace("*", "-"))
+        else:
+            consensus_seq = Seq(consensus_seq_str)
+
+        # TODO? - Base segments (BS lines) which indicates which read
+        # phrap has chosen to be the consensus at a particular position.
+        # Perhaps as SeqFeature objects?
+
+        # TODO - Supporting reads (RD lines, plus perhaps QA and DS lines)
+        # Perhaps as SeqFeature objects?
+
+        seq_record = SeqRecord(consensus_seq, id=ace_contig.name, name=ace_contig.name)
+
+        # Consensus base quality (BQ lines).  Note that any gaps (originally
+        # as * characters) in the consensus do not get a quality entry, so
+        # we assign a quality of None (zero would be misleading as there may
+        # be excellent support for having a gap here).
+        quals = []
+        i = 0
+        for base in consensus_seq:
+            if base == "-":
+                quals.append(0)
+            else:
+                quals.append(ace_contig.quality[i])
+                i += 1
+        assert i == len(ace_contig.quality)
+        seq_record.letter_annotations["phred_quality"] = quals
+
+        yield seq_record
+    # All done
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SeqIO/FastaIO.py b/code/lib/Bio/SeqIO/FastaIO.py
new file mode 100644
index 0000000..73abcd4
--- /dev/null
+++ b/code/lib/Bio/SeqIO/FastaIO.py
@@ -0,0 +1,426 @@
+# Copyright 2006-2017,2020 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# This module is for reading and writing FASTA format files as SeqRecord
+# objects.  The code is partly inspired  by earlier Biopython modules,
+# Bio.Fasta.* and the now removed module Bio.SeqIO.FASTA
+"""Bio.SeqIO support for the "fasta" (aka FastA or Pearson) file format.
+
+You are expected to use this module via the Bio.SeqIO functions.
+"""
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import _clean
+from .Interfaces import _get_seq_string
+from .Interfaces import SequenceIterator
+from .Interfaces import SequenceWriter
+
+
+def SimpleFastaParser(handle):
+    """Iterate over Fasta records as string tuples.
+
+    Arguments:
+     - handle - input stream opened in text mode
+
+    For each record a tuple of two strings is returned, the FASTA title
+    line (without the leading '>' character), and the sequence (with any
+    whitespace removed). The title line is not divided up into an
+    identifier (the first word) and comment or description.
+
+    >>> with open("Fasta/dups.fasta") as handle:
+    ...     for values in SimpleFastaParser(handle):
+    ...         print(values)
+    ...
+    ('alpha', 'ACGTA')
+    ('beta', 'CGTC')
+    ('gamma', 'CCGCC')
+    ('alpha (again - this is a duplicate entry to test the indexing code)', 'ACGTA')
+    ('delta', 'CGCGC')
+
+    """
+    # Skip any text before the first record (e.g. blank lines, comments)
+    for line in handle:
+        if line[0] == ">":
+            title = line[1:].rstrip()
+            break
+    else:
+        # no break encountered - probably an empty file
+        return
+
+    # Main logic
+    # Note, remove trailing whitespace, and any internal spaces
+    # (and any embedded \r which are possible in mangled files
+    # when not opened in universal read lines mode)
+    lines = []
+    for line in handle:
+        if line[0] == ">":
+            yield title, "".join(lines).replace(" ", "").replace("\r", "")
+            lines = []
+            title = line[1:].rstrip()
+            continue
+        lines.append(line.rstrip())
+
+    yield title, "".join(lines).replace(" ", "").replace("\r", "")
+
+
+def FastaTwoLineParser(handle):
+    """Iterate over no-wrapping Fasta records as string tuples.
+
+    Arguments:
+     - handle - input stream opened in text mode
+
+    Functionally the same as SimpleFastaParser but with a strict
+    interpretation of the FASTA format as exactly two lines per
+    record, the greater-than-sign identifier with description,
+    and the sequence with no line wrapping.
+
+    Any line wrapping will raise an exception, as will excess blank
+    lines (other than the special case of a zero-length sequence
+    as the second line of a record).
+
+    Examples
+    --------
+    This file uses two lines per FASTA record:
+
+    >>> with open("Fasta/aster_no_wrap.pro") as handle:
+    ...     for title, seq in FastaTwoLineParser(handle):
+    ...         print("%s = %s..." % (title, seq[:3]))
+    ...
+    gi|3298468|dbj|BAA31520.1| SAMIPF = GGH...
+
+    This equivalent file uses line wrapping:
+
+    >>> with open("Fasta/aster.pro") as handle:
+    ...     for title, seq in FastaTwoLineParser(handle):
+    ...         print("%s = %s..." % (title, seq[:3]))
+    ...
+    Traceback (most recent call last):
+       ...
+    ValueError: Expected FASTA record starting with '>' character. Perhaps this file is using FASTA line wrapping? Got: 'MTFGLVYTVYATAIDPKKGSLGTIAPIAIGFIVGANI'
+
+    """
+    idx = -1  # for empty file
+    for idx, line in enumerate(handle):
+        if idx % 2 == 0:  # title line
+            if line[0] != ">":
+                raise ValueError(
+                    "Expected FASTA record starting with '>' character. "
+                    "Perhaps this file is using FASTA line wrapping? "
+                    f"Got: '{line}'"
+                )
+            title = line[1:].rstrip()
+        else:  # sequence line
+            if line[0] == ">":
+                raise ValueError(
+                    "Two '>' FASTA lines in a row. Missing sequence line "
+                    "if this is strict two-line-per-record FASTA format. "
+                    f"Have '>{title}' and '{line}'"
+                )
+            yield title, line.strip()
+
+    if idx == -1:
+        pass  # empty file
+    elif idx % 2 == 0:  # on a title line
+        raise ValueError(
+            "Missing sequence line at end of file if this is strict "
+            f"two-line-per-record FASTA format. Have title line '{line}'"
+        )
+    else:
+        assert line[0] != ">", "line[0] == '>' ; this should be impossible!"
+
+
+class FastaIterator(SequenceIterator):
+    """Parser for Fasta files."""
+
+    def __init__(self, source, alphabet=None, title2ids=None):
+        """Iterate over Fasta records as SeqRecord objects.
+
+        Arguments:
+         - source - input stream opened in text mode, or a path to a file
+         - alphabet - optional alphabet, not used. Leave as None.
+         - title2ids - A function that, when given the title of the FASTA
+           file (without the beginning >), will return the id, name and
+           description (in that order) for the record as a tuple of strings.
+           If this is not given, then the entire title line will be used
+           as the description, and the first word as the id and name.
+
+        By default this will act like calling Bio.SeqIO.parse(handle, "fasta")
+        with no custom handling of the title lines:
+
+        >>> with open("Fasta/dups.fasta") as handle:
+        ...     for record in FastaIterator(handle):
+        ...         print(record.id)
+        ...
+        alpha
+        beta
+        gamma
+        alpha
+        delta
+
+        However, you can supply a title2ids function to alter this:
+
+        >>> def take_upper(title):
+        ...     return title.split(None, 1)[0].upper(), "", title
+        >>> with open("Fasta/dups.fasta") as handle:
+        ...     for record in FastaIterator(handle, title2ids=take_upper):
+        ...         print(record.id)
+        ...
+        ALPHA
+        BETA
+        GAMMA
+        ALPHA
+        DELTA
+
+        """
+        if alphabet is not None:
+            raise ValueError("The alphabet argument is no longer supported")
+        self.title2ids = title2ids
+        super().__init__(source, mode="t", fmt="Fasta")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        records = self.iterate(handle)
+        return records
+
+    def iterate(self, handle):
+        """Parse the file and generate SeqRecord objects."""
+        title2ids = self.title2ids
+        if title2ids:
+            for title, sequence in SimpleFastaParser(handle):
+                id, name, descr = title2ids(title)
+                yield SeqRecord(Seq(sequence), id=id, name=name, description=descr)
+        else:
+            for title, sequence in SimpleFastaParser(handle):
+                try:
+                    first_word = title.split(None, 1)[0]
+                except IndexError:
+                    assert not title, repr(title)
+                    # Should we use SeqRecord default for no ID?
+                    first_word = ""
+                yield SeqRecord(
+                    Seq(sequence), id=first_word, name=first_word, description=title,
+                )
+
+
+class FastaTwoLineIterator(SequenceIterator):
+    """Parser for Fasta files with exactly two lines per record."""
+
+    def __init__(self, source):
+        """Iterate over two-line Fasta records (as SeqRecord objects).
+
+        Arguments:
+         - source - input stream opened in text mode, or a path to a file
+
+        This uses a strict interpretation of the FASTA as requiring
+        exactly two lines per record (no line wrapping).
+
+        Only the default title to ID/name/description parsing offered
+        by the relaxed FASTA parser is offered.
+        """
+        super().__init__(source, mode="t", fmt="FASTA")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        records = self.iterate(handle)
+        return records
+
+    def iterate(self, handle):
+        """Parse the file and generate SeqRecord objects."""
+        for title, sequence in FastaTwoLineParser(handle):
+            try:
+                first_word = title.split(None, 1)[0]
+            except IndexError:
+                assert not title, repr(title)
+                # Should we use SeqRecord default for no ID?
+                first_word = ""
+            yield SeqRecord(
+                Seq(sequence), id=first_word, name=first_word, description=title,
+            )
+
+
+class FastaWriter(SequenceWriter):
+    """Class to write Fasta format files (OBSOLETE).
+
+    Please use the ``as_fasta`` function instead, or the top level
+    ``Bio.SeqIO.write()`` function instead using ``format="fasta"``.
+    """
+
+    def __init__(self, target, wrap=60, record2title=None):
+        """Create a Fasta writer (OBSOLETE).
+
+        Arguments:
+         - target - Output stream opened in text mode, or a path to a file.
+         - wrap -   Optional line length used to wrap sequence lines.
+           Defaults to wrapping the sequence at 60 characters
+           Use zero (or None) for no wrapping, giving a single
+           long line for the sequence.
+         - record2title - Optional function to return the text to be
+           used for the title line of each record.  By default
+           a combination of the record.id and record.description
+           is used.  If the record.description starts with the
+           record.id, then just the record.description is used.
+
+        You can either use::
+
+            handle = open(filename, "w")
+            writer = FastaWriter(handle)
+            writer.write_file(myRecords)
+            handle.close()
+
+        Or, follow the sequential file writer system, for example::
+
+            handle = open(filename, "w")
+            writer = FastaWriter(handle)
+            writer.write_header() # does nothing for Fasta files
+            ...
+            Multiple writer.write_record() and/or writer.write_records() calls
+            ...
+            writer.write_footer() # does nothing for Fasta files
+            handle.close()
+
+        """
+        super().__init__(target)
+        if wrap:
+            if wrap < 1:
+                raise ValueError
+        self.wrap = wrap
+        self.record2title = record2title
+
+    def write_record(self, record):
+        """Write a single Fasta record to the file."""
+        if self.record2title:
+            title = self.clean(self.record2title(record))
+        else:
+            id = self.clean(record.id)
+            description = self.clean(record.description)
+            if description and description.split(None, 1)[0] == id:
+                # The description includes the id at the start
+                title = description
+            elif description:
+                title = "%s %s" % (id, description)
+            else:
+                title = id
+
+        assert "\n" not in title
+        assert "\r" not in title
+        self.handle.write(">%s\n" % title)
+
+        data = _get_seq_string(record)  # Catches sequence being None
+
+        assert "\n" not in data
+        assert "\r" not in data
+
+        if self.wrap:
+            for i in range(0, len(data), self.wrap):
+                self.handle.write(data[i : i + self.wrap] + "\n")
+        else:
+            self.handle.write(data + "\n")
+
+
+class FastaTwoLineWriter(FastaWriter):
+    """Class to write 2-line per record Fasta format files (OBSOLETE).
+
+    This means we write the sequence information  without line
+    wrapping, and will always write a blank line for an empty
+    sequence.
+
+    Please use the ``as_fasta_2line`` function instead, or the top level
+    ``Bio.SeqIO.write()`` function instead using ``format="fasta"``.
+    """
+
+    def __init__(self, handle, record2title=None):
+        """Create a 2-line per record Fasta writer (OBSOLETE).
+
+        Arguments:
+         - handle - Handle to an output file, e.g. as returned
+           by open(filename, "w")
+         - record2title - Optional function to return the text to be
+           used for the title line of each record.  By default
+           a combination of the record.id and record.description
+           is used.  If the record.description starts with the
+           record.id, then just the record.description is used.
+
+        You can either use::
+
+            handle = open(filename, "w")
+            writer = FastaWriter(handle)
+            writer.write_file(myRecords)
+            handle.close()
+
+        Or, follow the sequential file writer system, for example::
+
+            handle = open(filename, "w")
+            writer = FastaWriter(handle)
+            writer.write_header() # does nothing for Fasta files
+            ...
+            Multiple writer.write_record() and/or writer.write_records() calls
+            ...
+            writer.write_footer() # does nothing for Fasta files
+            handle.close()
+
+        """
+        super().__init__(handle, wrap=None, record2title=record2title)
+
+
+def as_fasta(record):
+    """Turn a SeqRecord into a FASTA formatted string.
+
+    This is used internally by the SeqRecord's .format("fasta")
+    method and by the SeqIO.write(..., ..., "fasta") function.
+    """
+    id = _clean(record.id)
+    description = _clean(record.description)
+    if description and description.split(None, 1)[0] == id:
+        # The description includes the id at the start
+        title = description
+    elif description:
+        title = "%s %s" % (id, description)
+    else:
+        title = id
+    assert "\n" not in title
+    assert "\r" not in title
+    lines = [">%s\n" % title]
+
+    data = _get_seq_string(record)  # Catches sequence being None
+    assert "\n" not in data
+    assert "\r" not in data
+    for i in range(0, len(data), 60):
+        lines.append(data[i : i + 60] + "\n")
+
+    return "".join(lines)
+
+
+def as_fasta_2line(record):
+    """Turn a SeqRecord into a two-line FASTA formatted string.
+
+    This is used internally by the SeqRecord's .format("fasta-2line")
+    method and by the SeqIO.write(..., ..., "fasta-2line") function.
+    """
+    id = _clean(record.id)
+    description = _clean(record.description)
+    if description and description.split(None, 1)[0] == id:
+        # The description includes the id at the start
+        title = description
+    elif description:
+        title = "%s %s" % (id, description)
+    else:
+        title = id
+    assert "\n" not in title
+    assert "\r" not in title
+
+    data = _get_seq_string(record)  # Catches sequence being None
+    assert "\n" not in data
+    assert "\r" not in data
+
+    return ">%s\n%s\n" % (title, data)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/SeqIO/GckIO.py b/code/lib/Bio/SeqIO/GckIO.py
new file mode 100644
index 0000000..2fa48eb
--- /dev/null
+++ b/code/lib/Bio/SeqIO/GckIO.py
@@ -0,0 +1,230 @@
+# Copyright 2019 Damien Goutte-Gattat.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the "gck" file format.
+
+The GCK binary format is generated by the Gene Construction Kit software
+from Textco BioSoftware, Inc.
+"""
+from struct import unpack
+
+from Bio.Seq import Seq
+from Bio.SeqFeature import FeatureLocation
+from Bio.SeqFeature import SeqFeature
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import SequenceIterator
+
+
+def _read(handle, length):
+    """Read the specified number of bytes from the given handle."""
+    data = handle.read(length)
+    if len(data) < length:
+        raise ValueError(f"Cannot read {length} bytes from handle")
+    return data
+
+
+def _read_packet(handle):
+    """Read a length-prefixed packet.
+
+    Parts of a GCK file are made of "packets" comprising of 4 bytes
+    giving the packet's size, followed by the packet's data.
+
+    There is no type tag. The type of a packet, and thus the type of data
+    it contains, is solely indicated by the position of the packet within
+    the GCK file.
+    """
+    length = _read(handle, 4)
+    length = unpack(">I", length)[0]
+    data = _read(handle, length)
+    return (data, length)
+
+
+def _read_pstring(handle):
+    """Read a Pascal string.
+
+    A Pascal string is one byte for length followed by the actual string.
+    """
+    length = _read(handle, 1)
+    length = unpack(">B", length)[0]
+    data = _read(handle, length).decode("ASCII")
+    return data
+
+
+def _read_p4string(handle):
+    """Read a 32-bit Pascal string.
+
+    Similar to a Pascal string but length is encoded on 4 bytes.
+    """
+    length = _read(handle, 4)
+    length = unpack(">I", length)[0]
+    data = _read(handle, length).decode("ASCII")
+    return data
+
+
+def _parse(handle):
+    # Skip file header
+    # GCK files start with a 24-bytes header. Bytes 4 and 8 seem to
+    # always be 12, maybe this could act as a magic cookie. Bytes
+    # 17-20 and 21-24 contain variable values of unknown meaning.
+    # check if file is empty
+    data = handle.read(24)
+    if not data:
+        raise ValueError("Empty file.")
+    if len(data) < 24:
+        raise ValueError("Improper header, cannot read 24 bytes from handle")
+    # Read the actual sequence data
+    packet, length = _read_packet(handle)
+    # The body of the sequence packet starts with a 32-bit integer
+    # representing the length of the sequence.
+    seq_length = unpack(">I", packet[:4])[0]
+    # This length should not be larger than the length of the
+    # sequence packet.
+    if seq_length > length - 4:
+        raise ValueError("Conflicting sequence length values")
+    sequence = packet[4:].decode("ASCII")
+    record = SeqRecord(Seq(sequence))
+
+    # Skip unknown packet
+    _read_packet(handle)
+
+    # Read features packet
+    packet, length = _read_packet(handle)
+    (seq_length, num_features) = unpack(">IH", packet[:6])
+    # Check that length in the features packet matches the actual
+    # length of the sequence
+    if seq_length != len(sequence):
+        raise ValueError("Conflicting sequence length values")
+    # Each feature is stored in a 92-bytes structure.
+    if length - 6 != num_features * 92:
+        raise ValueError("Features packet size inconsistent with number of features")
+    for i in range(0, num_features):
+        offset = 6 + i * 92
+        feature_data = packet[offset : offset + 92]
+
+        # There's probably more stuff to unpack in that structure,
+        # but those values are the only ones I understand.
+        (start, end, type, strand, has_name, has_comment, version) = unpack(
+            ">II6xH14xB17xII35xB", feature_data
+        )
+
+        if strand == 1:  # Reverse strand
+            strand = -1
+        else:
+            # Other possible values are 0 (no strand specified),
+            # 2 (forward strand), and 3 (both strands). All are
+            # treated as a forward strand.
+            strand = 1
+        location = FeatureLocation(start, end, strand=strand)
+
+        # It looks like any value > 0 indicates a CDS...
+        if type > 0:
+            type = "CDS"
+        else:
+            type = "misc_feature"
+
+        # Each feature may have a name and a comment, which are then
+        # stored immediately after the features packet. Names are
+        # stored as Pascal strings (1 length byte followed by the
+        # string itself), comments are stored as "32-bit Pascal strings"
+        # (4 length bytes followed by the string).
+        qualifiers = {}
+        if has_name > 0:
+            name = _read_pstring(handle)
+            qualifiers["label"] = [name]
+        if has_comment > 0:
+            comment = _read_p4string(handle)
+            qualifiers["note"] = [comment]
+
+        # Each feature may exist in several "versions". We keep only
+        # the most recent version.
+        if version > 0:
+            continue
+
+        feature = SeqFeature(location, type=type, qualifiers=qualifiers)
+        record.features.append(feature)
+
+    # Read restriction sites packet
+    # We are not interested in restriction sites, but we must still read
+    # that packet so that we can skip the names and comments for each
+    # site, which are stored after that packet in a similar way as for
+    # the features above.
+    packet, length = _read_packet(handle)
+    (seq_length, num_sites) = unpack(">IH", packet[:6])
+    # Each site is stored in a 88-bytes structure
+    if length - 6 != num_sites * 88:
+        raise ValueError("Sites packet size inconsistent with number of sites")
+    for i in range(0, num_sites):
+        offset = 6 + i * 88
+        site_data = packet[offset : offset + 88]
+
+        (start, end, has_name, has_comment) = unpack(">II24xII48x", site_data)
+
+        # Skip names and comments
+        if has_name:
+            _read_pstring(handle)
+        if has_comment:
+            _read_p4string(handle)
+
+    # Skip unknown packet
+    _read_packet(handle)
+
+    # Next in the file are "version packets".
+    # However they are not properly formatted "packets" as they are not
+    # preceded by an integer giving their size. Instead we have a
+    # short integer indicating how many versions are there, and then
+    # as many 260-bytes block as we have versions.
+    num_versions = _read(handle, 2)
+    num_versions = unpack(">H", num_versions)[0]
+    versions = _read(handle, num_versions * 260)
+    for i in range(0, num_versions):
+        offset = i * 260
+        version_data = versions[offset : offset + 260]
+
+        # Each version may have a comment, which is then stored
+        # after all the "version packets".
+        has_comment = unpack(">I", version_data[-4:])[0]
+        if has_comment > 0:
+            _read_p4string(handle)
+
+    # Skip unknown fixed-size block
+    # Whatever this block contains, it is not preceded by any length
+    # indicator, so I hope its size is indeed constant in all files...
+    _read(handle, 706)
+
+    # Read the construct's name
+    name = _read_pstring(handle)
+    record.name = record.id = name.split(" ")[0]
+    record.description = name
+
+    # Circularity byte
+    # There may be other flags in that block, but their meaning
+    # is unknown to me.
+    flags = _read(handle, 17)
+    circularity = unpack(">16xB", flags)[0]
+    if circularity > 0:
+        record.annotations["topology"] = "circular"
+    else:
+        record.annotations["topology"] = "linear"
+
+    yield record
+
+
+class GckIterator(SequenceIterator):
+    """Parser for GCK files."""
+
+    def __init__(self, source):
+        """Break up a GCK file into SeqRecord objects."""
+        super().__init__(source, mode="b", fmt="GCK")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator.
+
+        Note that a GCK file can only contain one sequence, so this
+        iterator will always return a single record.
+        """
+        records = _parse(handle)
+        return records
diff --git a/code/lib/Bio/SeqIO/IgIO.py b/code/lib/Bio/SeqIO/IgIO.py
new file mode 100644
index 0000000..6921172
--- /dev/null
+++ b/code/lib/Bio/SeqIO/IgIO.py
@@ -0,0 +1,128 @@
+# Copyright 2008-2015 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the "ig" (IntelliGenetics or MASE) file format.
+
+This module is for reading and writing IntelliGenetics format files as
+SeqRecord objects.  This file format appears to be the same as the MASE
+multiple sequence alignment format.
+
+You are expected to use this module via the Bio.SeqIO functions.
+"""
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import SequenceIterator
+
+
+class IgIterator(SequenceIterator):
+    """Parser for IntelliGenetics files."""
+
+    def __init__(self, source):
+        """Iterate over IntelliGenetics records (as SeqRecord objects).
+
+        source - file-like object opened in text mode, or a path to a file
+
+        The optional free format file header lines (which start with two
+        semi-colons) are ignored.
+
+        The free format commentary lines at the start of each record (which
+        start with a semi-colon) are recorded as a single string with embedded
+        new line characters in the SeqRecord's annotations dictionary under the
+        key 'comment'.
+
+        Examples
+        --------
+        >>> with open("IntelliGenetics/TAT_mase_nuc.txt") as handle:
+        ...     for record in IgIterator(handle):
+        ...         print("%s length %i" % (record.id, len(record)))
+        ...
+        A_U455 length 303
+        B_HXB2R length 306
+        C_UG268A length 267
+        D_ELI length 309
+        F_BZ163A length 309
+        O_ANT70 length 342
+        O_MVP5180 length 348
+        CPZGAB length 309
+        CPZANT length 309
+        A_ROD length 390
+        B_EHOA length 420
+        D_MM251 length 390
+        STM_STM length 387
+        VER_AGM3 length 354
+        GRI_AGM677 length 264
+        SAB_SAB1C length 219
+        SYK_SYK length 330
+
+        """
+        super().__init__(source, mode="t", fmt="IntelliGenetics")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        records = self.iterate(handle)
+        return records
+
+    def iterate(self, handle):
+        """Iterate over the records in the IntelliGenetics file."""
+        # Skip any file header text before the first record (;; lines)
+        for line in handle:
+            if not line.startswith(";;"):
+                break
+        else:
+            # Empty file, or header only
+            return
+
+        if line[0] != ";":
+            raise ValueError("Records should start with ';' and not:\n%r" % line)
+        while line:
+            # Now iterate over the records
+
+            # Try and agree with SeqRecord convention from the GenBank parser,
+            # (and followed in the SwissProt parser) which stores the comments
+            # as a long string with newlines under annotations key 'comment'.
+
+            # Note some examples use "; ..." and others ";..."
+            comment_lines = []
+            while line.startswith(";"):
+                # TODO - Extract identifier from lines like "LOCUS\tB_SF2"?
+                comment_lines.append(line[1:].strip())
+                line = next(handle)
+            title = line.rstrip()
+
+            seq_lines = []
+            for line in handle:
+                if line[0] == ";":
+                    break
+                # Remove trailing whitespace, and any internal spaces
+                seq_lines.append(line.rstrip().replace(" ", ""))
+            else:
+                line = None
+            seq_str = "".join(seq_lines)
+            if seq_str.endswith("1"):
+                # Remove the optional terminator (digit one)
+                seq_str = seq_str[:-1]
+            if "1" in seq_str:
+                raise ValueError(
+                    "Potential terminator digit one found within sequence."
+                )
+
+            # Return the record and then continue...
+            yield SeqRecord(
+                Seq(seq_str),
+                id=title,
+                name=title,
+                annotations={"comment": "\n".join(comment_lines)},
+            )
+
+        # We should be at the end of the file now
+        assert not line
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/SeqIO/InsdcIO.py b/code/lib/Bio/SeqIO/InsdcIO.py
new file mode 100644
index 0000000..fd6c079
--- /dev/null
+++ b/code/lib/Bio/SeqIO/InsdcIO.py
@@ -0,0 +1,1511 @@
+# Copyright 2007-2016 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the "genbank" and "embl" file formats.
+
+You are expected to use this module via the Bio.SeqIO functions.
+Note that internally this module calls Bio.GenBank to do the actual
+parsing of GenBank, EMBL and IMGT files.
+
+See Also:
+International Nucleotide Sequence Database Collaboration
+http://www.insdc.org/
+
+GenBank
+http://www.ncbi.nlm.nih.gov/Genbank/
+
+EMBL Nucleotide Sequence Database
+http://www.ebi.ac.uk/embl/
+
+DDBJ (DNA Data Bank of Japan)
+http://www.ddbj.nig.ac.jp/
+
+IMGT (use a variant of EMBL format with longer feature indents)
+http://imgt.cines.fr/download/LIGM-DB/userman_doc.html
+http://imgt.cines.fr/download/LIGM-DB/ftable_doc.html
+http://www.ebi.ac.uk/imgt/hla/docs/manual.html
+
+"""
+import warnings
+
+from datetime import datetime
+
+from Bio import BiopythonWarning
+from Bio import SeqFeature
+from Bio import SeqIO
+from Bio.GenBank.Scanner import _ImgtScanner
+from Bio.GenBank.Scanner import EmblScanner
+from Bio.GenBank.Scanner import GenBankScanner
+from Bio.Seq import UndefinedSequenceError
+from Bio.Seq import UnknownSeq
+
+from .Interfaces import _get_seq_string
+from .Interfaces import SequenceIterator
+from .Interfaces import SequenceWriter
+
+
+# NOTE
+# ====
+# The "brains" for parsing GenBank, EMBL and IMGT files (and any
+# other flat file variants from the INSDC in future) is in
+# Bio.GenBank.Scanner (plus the _FeatureConsumer in Bio.GenBank)
+# However, all the writing code is in this file.
+
+
+class GenBankIterator(SequenceIterator):
+    """Parser for GenBank files."""
+
+    def __init__(self, source):
+        """Break up a Genbank file into SeqRecord objects.
+
+        Argument source is a file-like object opened in text mode or a path to a file.
+        Every section from the LOCUS line to the terminating // becomes
+        a single SeqRecord with associated annotation and features.
+
+        Note that for genomes or chromosomes, there is typically only
+        one record.
+
+        This gets called internally by Bio.SeqIO for the GenBank file format:
+
+        >>> from Bio import SeqIO
+        >>> for record in SeqIO.parse("GenBank/cor6_6.gb", "gb"):
+        ...     print(record.id)
+        ...
+        X55053.1
+        X62281.1
+        M81224.1
+        AJ237582.1
+        L31939.1
+        AF297471.1
+
+        Equivalently,
+
+        >>> with open("GenBank/cor6_6.gb") as handle:
+        ...     for record in GenBankIterator(handle):
+        ...         print(record.id)
+        ...
+        X55053.1
+        X62281.1
+        M81224.1
+        AJ237582.1
+        L31939.1
+        AF297471.1
+
+        """
+        super().__init__(source, mode="t", fmt="GenBank")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        records = GenBankScanner(debug=0).parse_records(handle)
+        return records
+
+
+class EmblIterator(SequenceIterator):
+    """Parser for EMBL files."""
+
+    def __init__(self, source):
+        """Break up an EMBL file into SeqRecord objects.
+
+        Argument source is a file-like object opened in text mode or a path to a file.
+        Every section from the LOCUS line to the terminating // becomes
+        a single SeqRecord with associated annotation and features.
+
+        Note that for genomes or chromosomes, there is typically only
+        one record.
+
+        This gets called internally by Bio.SeqIO for the EMBL file format:
+
+        >>> from Bio import SeqIO
+        >>> for record in SeqIO.parse("EMBL/epo_prt_selection.embl", "embl"):
+        ...     print(record.id)
+        ...
+        A00022.1
+        A00028.1
+        A00031.1
+        A00034.1
+        A00060.1
+        A00071.1
+        A00072.1
+        A00078.1
+        CQ797900.1
+
+        Equivalently,
+
+        >>> with open("EMBL/epo_prt_selection.embl") as handle:
+        ...     for record in EmblIterator(handle):
+        ...         print(record.id)
+        ...
+        A00022.1
+        A00028.1
+        A00031.1
+        A00034.1
+        A00060.1
+        A00071.1
+        A00072.1
+        A00078.1
+        CQ797900.1
+
+        """
+        super().__init__(source, mode="t", fmt="EMBL")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        records = EmblScanner(debug=0).parse_records(handle)
+        return records
+
+
+class ImgtIterator(SequenceIterator):
+    """Parser for IMGT files."""
+
+    def __init__(self, source):
+        """Break up an IMGT file into SeqRecord objects.
+
+        Argument source is a file-like object opened in text mode or a path to a file.
+        Every section from the LOCUS line to the terminating // becomes
+        a single SeqRecord with associated annotation and features.
+
+        Note that for genomes or chromosomes, there is typically only
+        one record.
+        """
+        super().__init__(source, mode="t", fmt="IMGT")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        records = _ImgtScanner(debug=0).parse_records(handle)
+        return records
+
+
+class GenBankCdsFeatureIterator(SequenceIterator):
+    """Parser for GenBank files, creating a SeqRecord for each CDS feature."""
+
+    def __init__(self, source):
+        """Break up a Genbank file into SeqRecord objects for each CDS feature.
+
+        Argument source is a file-like object opened in text mode or a path to a file.
+
+        Every section from the LOCUS line to the terminating // can contain
+        many CDS features.  These are returned as with the stated amino acid
+        translation sequence (if given).
+        """
+        super().__init__(source, mode="t", fmt="GenBank")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        return GenBankScanner(debug=0).parse_cds_features(handle)
+
+
+class EmblCdsFeatureIterator(SequenceIterator):
+    """Parser for EMBL files, creating a SeqRecord for each CDS feature."""
+
+    def __init__(self, source):
+        """Break up a EMBL file into SeqRecord objects for each CDS feature.
+
+        Argument source is a file-like object opened in text mode or a path to a file.
+
+        Every section from the LOCUS line to the terminating // can contain
+        many CDS features.  These are returned as with the stated amino acid
+        translation sequence (if given).
+        """
+        super().__init__(source, mode="t", fmt="EMBL")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        return EmblScanner(debug=0).parse_cds_features(handle)
+
+
+def _insdc_feature_position_string(pos, offset=0):
+    """Build a GenBank/EMBL position string (PRIVATE).
+
+    Use offset=1 to add one to convert a start position from python counting.
+    """
+    if isinstance(pos, SeqFeature.ExactPosition):
+        return "%i" % (pos.position + offset)
+    elif isinstance(pos, SeqFeature.WithinPosition):
+        return "(%i.%i)" % (
+            pos.position + offset,
+            pos.position + pos.extension + offset,
+        )
+    elif isinstance(pos, SeqFeature.BetweenPosition):
+        return "(%i^%i)" % (
+            pos.position + offset,
+            pos.position + pos.extension + offset,
+        )
+    elif isinstance(pos, SeqFeature.BeforePosition):
+        return "<%i" % (pos.position + offset)
+    elif isinstance(pos, SeqFeature.AfterPosition):
+        return ">%i" % (pos.position + offset)
+    elif isinstance(pos, SeqFeature.OneOfPosition):
+        return "one-of(%s)" % ",".join(
+            _insdc_feature_position_string(p, offset) for p in pos.position_choices
+        )
+    elif isinstance(pos, SeqFeature.AbstractPosition):
+        raise NotImplementedError("Please report this as a bug in Biopython.")
+    else:
+        raise ValueError("Expected a SeqFeature position object.")
+
+
+def _insdc_location_string_ignoring_strand_and_subfeatures(location, rec_length):
+    if location.ref:
+        ref = "%s:" % location.ref
+    else:
+        ref = ""
+    assert not location.ref_db
+    if (
+        isinstance(location.start, SeqFeature.ExactPosition)
+        and isinstance(location.end, SeqFeature.ExactPosition)
+        and location.start.position == location.end.position
+    ):
+        # Special case, for 12:12 return 12^13
+        # (a zero length slice, meaning the point between two letters)
+        if location.end.position == rec_length:
+            # Very special case, for a between position at the end of a
+            # sequence (used on some circular genomes, Bug 3098) we have
+            # N:N so return N^1
+            return "%s%i^1" % (ref, rec_length)
+        else:
+            return "%s%i^%i" % (ref, location.end.position, location.end.position + 1)
+    if (
+        isinstance(location.start, SeqFeature.ExactPosition)
+        and isinstance(location.end, SeqFeature.ExactPosition)
+        and location.start.position + 1 == location.end.position
+    ):
+        # Special case, for 11:12 return 12 rather than 12..12
+        # (a length one slice, meaning a single letter)
+        return "%s%i" % (ref, location.end.position)
+    elif isinstance(location.start, SeqFeature.UnknownPosition) or isinstance(
+        location.end, SeqFeature.UnknownPosition
+    ):
+        # Special case for features from SwissProt/UniProt files
+        if isinstance(location.start, SeqFeature.UnknownPosition) and isinstance(
+            location.end, SeqFeature.UnknownPosition
+        ):
+            # warnings.warn("Feature with unknown location", BiopythonWarning)
+            # return "?"
+            raise ValueError("Feature with unknown location")
+        elif isinstance(location.start, SeqFeature.UnknownPosition):
+            # Treat the unknown start position as a BeforePosition
+            return "%s<%i..%s" % (
+                ref,
+                location.nofuzzy_end,
+                _insdc_feature_position_string(location.end),
+            )
+        else:
+            # Treat the unknown end position as an AfterPosition
+            return "%s%s..>%i" % (
+                ref,
+                _insdc_feature_position_string(location.start, +1),
+                location.nofuzzy_start + 1,
+            )
+    else:
+        # Typical case, e.g. 12..15 gets mapped to 11:15
+        return (
+            ref
+            + _insdc_feature_position_string(location.start, +1)
+            + ".."
+            + _insdc_feature_position_string(location.end)
+        )
+
+
+def _insdc_location_string(location, rec_length):
+    """Build a GenBank/EMBL location from a (Compound) FeatureLocation (PRIVATE).
+
+    There is a choice of how to show joins on the reverse complement strand,
+    GenBank used "complement(join(1,10),(20,100))" while EMBL used to use
+    "join(complement(20,100),complement(1,10))" instead (but appears to have
+    now adopted the GenBank convention). Notice that the order of the entries
+    is reversed! This function therefore uses the first form. In this situation
+    we expect the CompoundFeatureLocation and its parts to all be marked as
+    strand == -1, and to be in the order 19:100 then 0:10.
+    """
+    try:
+        parts = location.parts
+        # CompoundFeatureLocation
+        if location.strand == -1:
+            # Special case, put complement outside the join/order/... and reverse order
+            return "complement(%s(%s))" % (
+                location.operator,
+                ",".join(
+                    _insdc_location_string_ignoring_strand_and_subfeatures(
+                        p, rec_length
+                    )
+                    for p in parts[::-1]
+                ),
+            )
+        else:
+            return "%s(%s)" % (
+                location.operator,
+                ",".join(_insdc_location_string(p, rec_length) for p in parts),
+            )
+    except AttributeError:
+        # Simple FeatureLocation
+        loc = _insdc_location_string_ignoring_strand_and_subfeatures(
+            location, rec_length
+        )
+        if location.strand == -1:
+            return "complement(%s)" % loc
+        else:
+            return loc
+
+
+class _InsdcWriter(SequenceWriter):
+    """Base class for GenBank and EMBL writers (PRIVATE)."""
+
+    MAX_WIDTH = 80
+    QUALIFIER_INDENT = 21
+    QUALIFIER_INDENT_STR = " " * QUALIFIER_INDENT
+    QUALIFIER_INDENT_TMP = "     %s                "  # 21 if %s is empty
+    FTQUAL_NO_QUOTE = (
+        "anticodon",
+        "citation",
+        "codon_start",
+        "compare",
+        "direction",
+        "estimated_length",
+        "mod_base",
+        "number",
+        "rpt_type",
+        "rpt_unit_range",
+        "tag_peptide",
+        "transl_except",
+        "transl_table",
+    )
+
+    def _write_feature_qualifier(self, key, value=None, quote=None):
+        if value is None:
+            # Value-less entry like /pseudo
+            self.handle.write("%s/%s\n" % (self.QUALIFIER_INDENT_STR, key))
+            return
+
+        if type(value) == str:
+            value = value.replace(
+                '"', '""'
+            )  # NCBI says escape " as "" in qualifier values
+
+        # Quick hack with no line wrapping, may be useful for testing:
+        # self.handle.write('%s/%s="%s"\n' % (self.QUALIFIER_INDENT_STR, key, value))
+        if quote is None:
+            # Try to mimic unwritten rules about when quotes can be left out:
+            if isinstance(value, int) or key in self.FTQUAL_NO_QUOTE:
+                quote = False
+            else:
+                quote = True
+        if quote:
+            line = '%s/%s="%s"' % (self.QUALIFIER_INDENT_STR, key, value)
+        else:
+            line = "%s/%s=%s" % (self.QUALIFIER_INDENT_STR, key, value)
+        if len(line) <= self.MAX_WIDTH:
+            self.handle.write(line + "\n")
+            return
+        while line.lstrip():
+            if len(line) <= self.MAX_WIDTH:
+                self.handle.write(line + "\n")
+                return
+            # Insert line break...
+            for index in range(
+                min(len(line) - 1, self.MAX_WIDTH), self.QUALIFIER_INDENT + 1, -1
+            ):
+                if line[index] == " ":
+                    break
+            if line[index] != " ":
+                # No nice place to break...
+                index = self.MAX_WIDTH
+            assert index <= self.MAX_WIDTH
+            self.handle.write(line[:index] + "\n")
+            line = self.QUALIFIER_INDENT_STR + line[index:].lstrip()
+
+    def _wrap_location(self, location):
+        """Split a feature location into lines (break at commas) (PRIVATE)."""
+        # TODO - Rewrite this not to recurse!
+        length = self.MAX_WIDTH - self.QUALIFIER_INDENT
+        if len(location) <= length:
+            return location
+        index = location[:length].rfind(",")
+        if index == -1:
+            # No good place to split (!)
+            warnings.warn("Couldn't split location:\n%s" % location, BiopythonWarning)
+            return location
+        return (
+            location[: index + 1]
+            + "\n"
+            + self.QUALIFIER_INDENT_STR
+            + self._wrap_location(location[index + 1 :])
+        )
+
+    def _write_feature(self, feature, record_length):
+        """Write a single SeqFeature object to features table (PRIVATE)."""
+        assert feature.type, feature
+        location = _insdc_location_string(feature.location, record_length)
+        f_type = feature.type.replace(" ", "_")
+        line = (
+            (self.QUALIFIER_INDENT_TMP % f_type)[: self.QUALIFIER_INDENT]
+            + self._wrap_location(location)
+            + "\n"
+        )
+        self.handle.write(line)
+        # Now the qualifiers...
+        # Note as of Biopython 1.69, this is an ordered-dict, don't sort it:
+        for key, values in feature.qualifiers.items():
+            if isinstance(values, (list, tuple)):
+                for value in values:
+                    self._write_feature_qualifier(key, value)
+            else:
+                # String, int, etc - or None for a /pseudo tpy entry
+                self._write_feature_qualifier(key, values)
+
+    @staticmethod
+    def _get_annotation_str(record, key, default=".", just_first=False):
+        """Get an annotation dictionary entry (as a string) (PRIVATE).
+
+        Some entries are lists, in which case if just_first=True the first entry
+        is returned.  If just_first=False (default) this verifies there is only
+        one entry before returning it.
+        """
+        try:
+            answer = record.annotations[key]
+        except KeyError:
+            return default
+        if isinstance(answer, list):
+            if not just_first:
+                assert len(answer) == 1
+            return str(answer[0])
+        else:
+            return str(answer)
+
+    @staticmethod
+    def _split_multi_line(text, max_len):
+        """Return a list of strings (PRIVATE).
+
+        Any single words which are too long get returned as a whole line
+        (e.g. URLs) without an exception or warning.
+        """
+        # TODO - Do the line splitting while preserving white space?
+        text = text.strip()
+        if len(text) <= max_len:
+            return [text]
+
+        words = text.split()
+        text = ""
+        while words and len(text) + 1 + len(words[0]) <= max_len:
+            text += " " + words.pop(0)
+            text = text.strip()
+        # assert len(text) <= max_len
+        answer = [text]
+        while words:
+            text = words.pop(0)
+            while words and len(text) + 1 + len(words[0]) <= max_len:
+                text += " " + words.pop(0)
+                text = text.strip()
+            # assert len(text) <= max_len
+            answer.append(text)
+        assert not words
+        return answer
+
+    def _split_contig(self, record, max_len):
+        """Return a list of strings, splits on commas (PRIVATE)."""
+        # TODO - Merge this with _write_multi_line method?
+        # It would need the addition of the comma splitting logic...
+        # are there any other cases where that would be sensible?
+        contig = record.annotations.get("contig", "")
+        if isinstance(contig, (list, tuple)):
+            contig = "".join(contig)
+        contig = self.clean(contig)
+        answer = []
+        while contig:
+            if len(contig) > max_len:
+                # Split lines at the commas
+                pos = contig[: max_len - 1].rfind(",")
+                if pos == -1:
+                    raise ValueError("Could not break up CONTIG")
+                text, contig = contig[: pos + 1], contig[pos + 1 :]
+            else:
+                text, contig = contig, ""
+            answer.append(text)
+        return answer
+
+
+class GenBankWriter(_InsdcWriter):
+    """GenBank writer."""
+
+    HEADER_WIDTH = 12
+    QUALIFIER_INDENT = 21
+    STRUCTURED_COMMENT_START = "-START##"
+    STRUCTURED_COMMENT_END = "-END##"
+    STRUCTURED_COMMENT_DELIM = " :: "
+    LETTERS_PER_LINE = 60
+    SEQUENCE_INDENT = 9
+
+    def _write_single_line(self, tag, text):
+        """Write single line in each GenBank record (PRIVATE).
+
+        Used in the 'header' of each GenBank record.
+        """
+        assert len(tag) < self.HEADER_WIDTH
+        if len(text) > self.MAX_WIDTH - self.HEADER_WIDTH:
+            if tag:
+                warnings.warn(
+                    "Annotation %r too long for %r line" % (text, tag), BiopythonWarning
+                )
+            else:
+                # Can't give such a precise warning
+                warnings.warn("Annotation %r too long" % text, BiopythonWarning)
+        self.handle.write(
+            "%s%s\n" % (tag.ljust(self.HEADER_WIDTH), text.replace("\n", " "))
+        )
+
+    def _write_multi_line(self, tag, text):
+        """Write multiple lines in each GenBank record (PRIVATE).
+
+        Used in the 'header' of each GenBank record.
+        """
+        # TODO - Do the line splitting while preserving white space?
+        max_len = self.MAX_WIDTH - self.HEADER_WIDTH
+        lines = self._split_multi_line(text, max_len)
+        self._write_single_line(tag, lines[0])
+        for line in lines[1:]:
+            self._write_single_line("", line)
+
+    def _write_multi_entries(self, tag, text_list):
+        # used for DBLINK and any similar later line types.
+        # If the list of strings is empty, nothing is written.
+        for i, text in enumerate(text_list):
+            if i == 0:
+                self._write_single_line(tag, text)
+            else:
+                self._write_single_line("", text)
+
+    @staticmethod
+    def _get_date(record):
+        default = "01-JAN-1980"
+        try:
+            date = record.annotations["date"]
+        except KeyError:
+            return default
+        # Cope with a list of one string:
+        if isinstance(date, list) and len(date) == 1:
+            date = date[0]
+        if isinstance(date, datetime):
+            date = date.strftime("%d-%b-%Y").upper()
+
+        months = [
+            "JAN",
+            "FEB",
+            "MAR",
+            "APR",
+            "MAY",
+            "JUN",
+            "JUL",
+            "AUG",
+            "SEP",
+            "OCT",
+            "NOV",
+            "DEC",
+        ]
+        if not isinstance(date, str) or len(date) != 11:
+            return default
+        try:
+            datetime(int(date[-4:]), months.index(date[3:6]) + 1, int(date[0:2]))
+        except ValueError:
+            date = default
+        return date
+
+    @staticmethod
+    def _get_data_division(record):
+        try:
+            division = record.annotations["data_file_division"]
+        except KeyError:
+            division = "UNK"
+        if division in [
+            "PRI",
+            "ROD",
+            "MAM",
+            "VRT",
+            "INV",
+            "PLN",
+            "BCT",
+            "VRL",
+            "PHG",
+            "SYN",
+            "UNA",
+            "EST",
+            "PAT",
+            "STS",
+            "GSS",
+            "HTG",
+            "HTC",
+            "ENV",
+            "CON",
+            "TSA",
+        ]:
+            # Good, already GenBank style
+            #    PRI - primate sequences
+            #    ROD - rodent sequences
+            #    MAM - other mammalian sequences
+            #    VRT - other vertebrate sequences
+            #    INV - invertebrate sequences
+            #    PLN - plant, fungal, and algal sequences
+            #    BCT - bacterial sequences [plus archaea]
+            #    VRL - viral sequences
+            #    PHG - bacteriophage sequences
+            #    SYN - synthetic sequences
+            #    UNA - unannotated sequences
+            #    EST - EST sequences (expressed sequence tags)
+            #    PAT - patent sequences
+            #    STS - STS sequences (sequence tagged sites)
+            #    GSS - GSS sequences (genome survey sequences)
+            #    HTG - HTGS sequences (high throughput genomic sequences)
+            #    HTC - HTC sequences (high throughput cDNA sequences)
+            #    ENV - Environmental sampling sequences
+            #    CON - Constructed sequences
+            #    TSA - Transcriptome Shotgun Assembly
+            #
+            # (plus UNK for unknown)
+            pass
+        else:
+            # See if this is in EMBL style:
+            #    Division                 Code
+            #    -----------------        ----
+            #    Bacteriophage            PHG - common
+            #    Environmental Sample     ENV - common
+            #    Fungal                   FUN - map to PLN (plants + fungal)
+            #    Human                    HUM - map to PRI (primates)
+            #    Invertebrate             INV - common
+            #    Other Mammal             MAM - common
+            #    Other Vertebrate         VRT - common
+            #    Mus musculus             MUS - map to ROD (rodent)
+            #    Plant                    PLN - common
+            #    Prokaryote               PRO - map to BCT (poor name)
+            #    Other Rodent             ROD - common
+            #    Synthetic                SYN - common
+            #    Transgenic               TGN - ??? map to SYN ???
+            #    Unclassified             UNC - map to UNK
+            #    Viral                    VRL - common
+            #
+            # (plus XXX for submitting which we can map to UNK)
+            embl_to_gbk = {
+                "FUN": "PLN",
+                "HUM": "PRI",
+                "MUS": "ROD",
+                "PRO": "BCT",
+                "UNC": "UNK",
+                "XXX": "UNK",
+            }
+            try:
+                division = embl_to_gbk[division]
+            except KeyError:
+                division = "UNK"
+        assert len(division) == 3
+        return division
+
+    def _get_topology(self, record):
+        """Set the topology to 'circular', 'linear' if defined (PRIVATE)."""
+        max_topology_len = len("circular")
+
+        topology = self._get_annotation_str(record, "topology", default="")
+        if topology and len(topology) <= max_topology_len:
+            return topology.ljust(max_topology_len)
+        else:
+            return " " * max_topology_len
+
+    def _write_the_first_line(self, record):
+        """Write the LOCUS line (PRIVATE)."""
+        locus = record.name
+        if not locus or locus == "<unknown name>":
+            locus = record.id
+        if not locus or locus == "<unknown id>":
+            locus = self._get_annotation_str(record, "accession", just_first=True)
+        if len(locus) > 16:
+            if len(locus) + 1 + len(str(len(record))) > 28:
+                # Locus name and record length to long to squeeze in.
+                # Per updated GenBank standard (Dec 15, 2018) 229.0
+                # the Locus identifier can be any length, and a space
+                # is added after the identifier to keep the identifier
+                # and length fields separated
+                warnings.warn(
+                    "Increasing length of locus line to allow "
+                    "long name. This will result in fields that "
+                    "are not in usual positions.",
+                    BiopythonWarning,
+                )
+
+        if len(locus.split()) > 1:
+            raise ValueError("Invalid whitespace in %r for LOCUS line" % locus)
+        if len(record) > 99999999999:
+            # As of the GenBank release notes 229.0, the locus line can be
+            # any length. However, long locus lines may not be compatible
+            # with all software.
+            warnings.warn(
+                "The sequence length is very long. The LOCUS "
+                "line will be increased in length to compensate. "
+                "This may cause unexpected behavior.",
+                BiopythonWarning,
+            )
+
+        # Get the molecule type
+        mol_type = self._get_annotation_str(record, "molecule_type", None)
+        if mol_type is None:
+            raise ValueError("missing molecule_type in annotations")
+        if mol_type and len(mol_type) > 7:
+            # Deal with common cases from EMBL to GenBank
+            mol_type = mol_type.replace("unassigned ", "").replace("genomic ", "")
+            if len(mol_type) > 7:
+                warnings.warn("Molecule type %r too long" % mol_type, BiopythonWarning)
+                mol_type = "DNA"
+        if mol_type in ["protein", "PROTEIN"]:
+            mol_type = ""
+
+        if mol_type == "":
+            units = "aa"
+        else:
+            units = "bp"
+
+        topology = self._get_topology(record)
+
+        division = self._get_data_division(record)
+
+        # Accommodate longer header, with long accessions and lengths
+        if len(locus) > 16 and len(str(len(record))) > (11 - (len(locus) - 16)):
+            name_length = locus + " " + str(len(record))
+
+        # This is the older, standard 80 position header
+        else:
+            name_length = str(len(record)).rjust(28)
+            name_length = locus + name_length[len(locus) :]
+            assert len(name_length) == 28, name_length
+            assert " " in name_length, name_length
+
+        assert len(units) == 2
+        assert len(division) == 3
+        line = "LOCUS       %s %s    %s %s %s %s\n" % (
+            name_length,
+            units,
+            mol_type.ljust(7),
+            topology,
+            division,
+            self._get_date(record),
+        )
+        # Extra long header
+        if len(line) > 80:
+            splitline = line.split()
+            if splitline[3] not in ["bp", "aa"]:
+                raise ValueError(
+                    "LOCUS line does not contain size units at "
+                    "expected position:\n" + line
+                )
+
+            if not (
+                splitline[4].strip() == ""
+                or "DNA" in splitline[4].strip().upper()
+                or "RNA" in splitline[4].strip().upper()
+            ):
+                raise ValueError(
+                    "LOCUS line does not contain valid "
+                    "sequence type (DNA, RNA, ...):\n" + line
+                )
+
+            self.handle.write(line)
+
+        # 80 position header
+        else:
+            assert len(line) == 79 + 1, repr(line)  # plus one for new line
+
+            # We're bending the rules to allow an identifier over 16 characters
+            # if we can steal spaces from the length field:
+            # assert line[12:28].rstrip() == locus, \
+            #     'LOCUS line does not contain the locus at the expected position:\n' + line
+            # assert line[28:29] == " "
+            # assert line[29:40].lstrip() == str(len(record)), \
+            #     'LOCUS line does not contain the length at the expected position:\n' + line
+            assert line[12:40].split() == [locus, str(len(record))], line
+
+            # Tests copied from Bio.GenBank.Scanner
+            if line[40:44] not in [" bp ", " aa "]:
+                raise ValueError(
+                    "LOCUS line does not contain size units at "
+                    "expected position:\n" + line
+                )
+            if line[44:47] not in ["   ", "ss-", "ds-", "ms-"]:
+                raise ValueError(
+                    "LOCUS line does not have valid strand "
+                    "type (Single stranded, ...):\n" + line
+                )
+            if not (
+                line[47:54].strip() == ""
+                or "DNA" in line[47:54].strip().upper()
+                or "RNA" in line[47:54].strip().upper()
+            ):
+                raise ValueError(
+                    "LOCUS line does not contain valid "
+                    "sequence type (DNA, RNA, ...):\n" + line
+                )
+            if line[54:55] != " ":
+                raise ValueError(
+                    "LOCUS line does not contain space at position 55:\n" + line
+                )
+            if line[55:63].strip() not in ["", "linear", "circular"]:
+                raise ValueError(
+                    "LOCUS line does not contain valid "
+                    "entry (linear, circular, ...):\n" + line
+                )
+            if line[63:64] != " ":
+                raise ValueError(
+                    "LOCUS line does not contain space at position 64:\n" + line
+                )
+            if line[67:68] != " ":
+                raise ValueError(
+                    "LOCUS line does not contain space at position 68:\n" + line
+                )
+            if line[70:71] != "-":
+                raise ValueError(
+                    "LOCUS line does not contain - at position 71 in date:\n" + line
+                )
+            if line[74:75] != "-":
+                raise ValueError(
+                    "LOCUS line does not contain - at position 75 in date:\n" + line
+                )
+
+            self.handle.write(line)
+
+    def _write_references(self, record):
+        number = 0
+        for ref in record.annotations["references"]:
+            if not isinstance(ref, SeqFeature.Reference):
+                continue
+            number += 1
+            data = str(number)
+            # TODO - support more complex record reference locations?
+            if ref.location and len(ref.location) == 1:
+                molecule_type = record.annotations.get("molecule_type")
+                if molecule_type and "protein" in molecule_type:
+                    units = "residues"
+                else:
+                    units = "bases"
+                data += "  (%s %i to %i)" % (
+                    units,
+                    ref.location[0].nofuzzy_start + 1,
+                    ref.location[0].nofuzzy_end,
+                )
+            self._write_single_line("REFERENCE", data)
+            if ref.authors:
+                # We store the AUTHORS data as a single string
+                self._write_multi_line("  AUTHORS", ref.authors)
+            if ref.consrtm:
+                # We store the consortium as a single string
+                self._write_multi_line("  CONSRTM", ref.consrtm)
+            if ref.title:
+                # We store the title as a single string
+                self._write_multi_line("  TITLE", ref.title)
+            if ref.journal:
+                # We store this as a single string - holds the journal name,
+                # volume, year, and page numbers of the citation
+                self._write_multi_line("  JOURNAL", ref.journal)
+            if ref.medline_id:
+                # This line type is obsolete and was removed from the GenBank
+                # flatfile format in April 2005. Should we write it?
+                # Note this has a two space indent:
+                self._write_multi_line("  MEDLINE", ref.medline_id)
+            if ref.pubmed_id:
+                # Note this has a THREE space indent:
+                self._write_multi_line("   PUBMED", ref.pubmed_id)
+            if ref.comment:
+                self._write_multi_line("  REMARK", ref.comment)
+
+    def _write_comment(self, record):
+        # This is a bit complicated due to the range of possible
+        # ways people might have done their annotation...
+        # Currently the parser uses a single string with newlines.
+        # A list of lines is also reasonable.
+        # A single (long) string is perhaps the most natural of all.
+        # This means we may need to deal with line wrapping.
+        lines = []
+        if "structured_comment" in record.annotations:
+            comment = record.annotations["structured_comment"]
+            # Find max length of keys for equal padded printing
+            padding = 0
+            for key, data in comment.items():
+                for subkey, subdata in data.items():
+                    padding = len(subkey) if len(subkey) > padding else padding
+            # Construct output
+            for key, data in comment.items():
+                lines.append(f"##{key}{self.STRUCTURED_COMMENT_START}")
+                for subkey, subdata in data.items():
+                    spaces = " " * (padding - len(subkey))
+                    lines.append(
+                        f"{subkey}{spaces}{self.STRUCTURED_COMMENT_DELIM}{subdata}"
+                    )
+                lines.append(f"##{key}{self.STRUCTURED_COMMENT_END}")
+        if "comment" in record.annotations:
+            comment = record.annotations["comment"]
+            if isinstance(comment, str):
+                lines += comment.split("\n")
+            elif isinstance(comment, (list, tuple)):
+                lines += list(comment)
+            else:
+                raise ValueError("Could not understand comment annotation")
+        self._write_multi_line("COMMENT", lines[0])
+        for line in lines[1:]:
+            self._write_multi_line("", line)
+
+    def _write_contig(self, record):
+        max_len = self.MAX_WIDTH - self.HEADER_WIDTH
+        lines = self._split_contig(record, max_len)
+        self._write_single_line("CONTIG", lines[0])
+        for text in lines[1:]:
+            self._write_single_line("", text)
+
+    def _write_sequence(self, record):
+        # Loosely based on code from Howard Salis
+        # TODO - Force lower case?
+
+        if isinstance(record.seq, UnknownSeq):
+            data = None
+        else:
+            try:
+                data = _get_seq_string(record)
+            except UndefinedSequenceError:
+                data = None
+
+        if data is None:
+            # We have already recorded the length, and there is no need
+            # to record a long sequence of NNNNNNN...NNN or whatever.
+            if "contig" in record.annotations:
+                self._write_contig(record)
+            else:
+                self.handle.write("ORIGIN\n")
+            return
+
+        # Catches sequence being None:
+        data = data.lower()
+        seq_len = len(data)
+        self.handle.write("ORIGIN\n")
+        for line_number in range(0, seq_len, self.LETTERS_PER_LINE):
+            self.handle.write(str(line_number + 1).rjust(self.SEQUENCE_INDENT))
+            for words in range(
+                line_number, min(line_number + self.LETTERS_PER_LINE, seq_len), 10
+            ):
+                self.handle.write(" %s" % data[words : words + 10])
+            self.handle.write("\n")
+
+    def write_record(self, record):
+        """Write a single record to the output file."""
+        handle = self.handle
+        self._write_the_first_line(record)
+
+        default = record.id
+        if default.count(".") == 1 and default[default.index(".") + 1 :].isdigit():
+            # Good, looks like accession.version and not something
+            # else like identifier.start-end
+            default = record.id.split(".", 1)[0]
+        accession = self._get_annotation_str(
+            record, "accession", default, just_first=True
+        )
+        acc_with_version = accession
+        if record.id.startswith(accession + "."):
+            try:
+                acc_with_version = "%s.%i" % (
+                    accession,
+                    int(record.id.split(".", 1)[1]),
+                )
+            except ValueError:
+                pass
+        gi = self._get_annotation_str(record, "gi", just_first=True)
+
+        descr = record.description
+        if descr == "<unknown description>":
+            descr = ""  # Trailing dot will be added later
+
+        # The DEFINITION field must end with a period
+        # see ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt [3.4.5]
+        # and discussion https://github.com/biopython/biopython/pull/616
+        # So let's add a period
+        descr += "."
+        self._write_multi_line("DEFINITION", descr)
+
+        self._write_single_line("ACCESSION", accession)
+        if gi != ".":
+            self._write_single_line("VERSION", "%s  GI:%s" % (acc_with_version, gi))
+        else:
+            self._write_single_line("VERSION", "%s" % acc_with_version)
+
+        # The NCBI initially expected two types of link,
+        # e.g. "Project:28471" and "Trace Assembly Archive:123456"
+        #
+        # This changed and at some point the formatting switched to
+        # include a space after the colon, e.g.
+        #
+        # LOCUS       NC_000011               1606 bp    DNA     linear   CON 06-JUN-2016
+        # DEFINITION  Homo sapiens chromosome 11, GRCh38.p7 Primary Assembly.
+        # ACCESSION   NC_000011 REGION: complement(5225466..5227071) GPC_000001303
+        # VERSION     NC_000011.10  GI:568815587
+        # DBLINK      BioProject: PRJNA168
+        #             Assembly: GCF_000001405.33
+        # ...
+        #
+        # Or,
+        #
+        # LOCUS       JU120277                1044 bp    mRNA    linear   TSA 27-NOV-2012
+        # DEFINITION  TSA: Tupaia chinensis tbc000002.Tuchadli mRNA sequence.
+        # ACCESSION   JU120277
+        # VERSION     JU120277.1  GI:379775257
+        # DBLINK      BioProject: PRJNA87013
+        #             Sequence Read Archive: SRR433859
+        # ...
+        dbxrefs_with_space = []
+        for x in record.dbxrefs:
+            if ": " not in x:
+                x = x.replace(":", ": ")
+            dbxrefs_with_space.append(x)
+        self._write_multi_entries("DBLINK", dbxrefs_with_space)
+        del dbxrefs_with_space
+
+        try:
+            # List of strings
+            # Keywords should be given separated with semi colons,
+            keywords = "; ".join(record.annotations["keywords"])
+            # with a trailing period:
+            if not keywords.endswith("."):
+                keywords += "."
+        except KeyError:
+            # If no keywords, there should be just a period:
+            keywords = "."
+        self._write_multi_line("KEYWORDS", keywords)
+
+        if "segment" in record.annotations:
+            # Deal with SEGMENT line found only in segmented records,
+            # e.g. AH000819
+            segment = record.annotations["segment"]
+            if isinstance(segment, list):
+                assert len(segment) == 1, segment
+                segment = segment[0]
+            self._write_single_line("SEGMENT", segment)
+
+        self._write_multi_line("SOURCE", self._get_annotation_str(record, "source"))
+        # The ORGANISM line MUST be a single line, as any continuation is the taxonomy
+        org = self._get_annotation_str(record, "organism")
+        if len(org) > self.MAX_WIDTH - self.HEADER_WIDTH:
+            org = org[: self.MAX_WIDTH - self.HEADER_WIDTH - 4] + "..."
+        self._write_single_line("  ORGANISM", org)
+        try:
+            # List of strings
+            # Taxonomy should be given separated with semi colons,
+            taxonomy = "; ".join(record.annotations["taxonomy"])
+            # with a trailing period:
+            if not taxonomy.endswith("."):
+                taxonomy += "."
+        except KeyError:
+            taxonomy = "."
+        self._write_multi_line("", taxonomy)
+
+        if "db_source" in record.annotations:
+            # Hack around the issue of BioSQL loading a list for the db_source
+            db_source = record.annotations["db_source"]
+            if isinstance(db_source, list):
+                db_source = db_source[0]
+            self._write_single_line("DBSOURCE", db_source)
+
+        if "references" in record.annotations:
+            self._write_references(record)
+
+        if (
+            "comment" in record.annotations
+            or "structured_comment" in record.annotations
+        ):
+            self._write_comment(record)
+
+        handle.write("FEATURES             Location/Qualifiers\n")
+        rec_length = len(record)
+        for feature in record.features:
+            self._write_feature(feature, rec_length)
+        self._write_sequence(record)
+        handle.write("//\n")
+
+
+class EmblWriter(_InsdcWriter):
+    """EMBL writer."""
+
+    HEADER_WIDTH = 5
+    QUALIFIER_INDENT = 21
+    QUALIFIER_INDENT_STR = "FT" + " " * (QUALIFIER_INDENT - 2)
+    QUALIFIER_INDENT_TMP = "FT   %s                "  # 21 if %s is empty
+    # Note second spacer line of just FH is expected:
+    FEATURE_HEADER = "FH   Key             Location/Qualifiers\nFH\n"
+
+    LETTERS_PER_BLOCK = 10
+    BLOCKS_PER_LINE = 6
+    LETTERS_PER_LINE = LETTERS_PER_BLOCK * BLOCKS_PER_LINE
+    POSITION_PADDING = 10
+
+    def _write_contig(self, record):
+        max_len = self.MAX_WIDTH - self.HEADER_WIDTH
+        lines = self._split_contig(record, max_len)
+        for text in lines:
+            self._write_single_line("CO", text)
+
+    def _write_sequence(self, record):
+        handle = self.handle  # save looking up this multiple times
+
+        if isinstance(record.seq, UnknownSeq):
+            data = None
+        else:
+            try:
+                data = _get_seq_string(record)
+            except UndefinedSequenceError:
+                data = None
+
+        if data is None:
+            # We have already recorded the length, and there is no need
+            # to record a long sequence of NNNNNNN...NNN or whatever.
+            if "contig" in record.annotations:
+                self._write_contig(record)
+            else:
+                # TODO - Can the sequence just be left out as in GenBank files?
+                handle.write("SQ   \n")
+            return
+
+        # Catches sequence being None
+        data = data.lower()
+        seq_len = len(data)
+
+        molecule_type = record.annotations.get("molecule_type")
+        if molecule_type is not None and "DNA" in molecule_type:
+            # TODO - What if we have RNA?
+            a_count = data.count("A") + data.count("a")
+            c_count = data.count("C") + data.count("c")
+            g_count = data.count("G") + data.count("g")
+            t_count = data.count("T") + data.count("t")
+            other = seq_len - (a_count + c_count + g_count + t_count)
+            handle.write(
+                "SQ   Sequence %i BP; %i A; %i C; %i G; %i T; %i other;\n"
+                % (seq_len, a_count, c_count, g_count, t_count, other)
+            )
+        else:
+            handle.write("SQ   \n")
+
+        for line_number in range(0, seq_len // self.LETTERS_PER_LINE):
+            handle.write("    ")  # Just four, not five
+            for block in range(self.BLOCKS_PER_LINE):
+                index = (
+                    self.LETTERS_PER_LINE * line_number + self.LETTERS_PER_BLOCK * block
+                )
+                handle.write(" %s" % data[index : index + self.LETTERS_PER_BLOCK])
+            handle.write(
+                str((line_number + 1) * self.LETTERS_PER_LINE).rjust(
+                    self.POSITION_PADDING
+                )
+            )
+            handle.write("\n")
+        if seq_len % self.LETTERS_PER_LINE:
+            # Final (partial) line
+            line_number = seq_len // self.LETTERS_PER_LINE
+            handle.write("    ")  # Just four, not five
+            for block in range(self.BLOCKS_PER_LINE):
+                index = (
+                    self.LETTERS_PER_LINE * line_number + self.LETTERS_PER_BLOCK * block
+                )
+                handle.write(
+                    (" %s" % data[index : index + self.LETTERS_PER_BLOCK]).ljust(11)
+                )
+            handle.write(str(seq_len).rjust(self.POSITION_PADDING))
+            handle.write("\n")
+
+    def _write_single_line(self, tag, text):
+        assert len(tag) == 2
+        line = tag + "   " + text
+        if len(text) > self.MAX_WIDTH:
+            warnings.warn("Line %r too long" % line, BiopythonWarning)
+        self.handle.write(line + "\n")
+
+    def _write_multi_line(self, tag, text):
+        max_len = self.MAX_WIDTH - self.HEADER_WIDTH
+        lines = self._split_multi_line(text, max_len)
+        for line in lines:
+            self._write_single_line(tag, line)
+
+    def _write_the_first_lines(self, record):
+        """Write the ID and AC lines (PRIVATE)."""
+        if "." in record.id and record.id.rsplit(".", 1)[1].isdigit():
+            version = "SV " + record.id.rsplit(".", 1)[1]
+            accession = self._get_annotation_str(
+                record, "accession", record.id.rsplit(".", 1)[0], just_first=True
+            )
+        else:
+            version = ""
+            accession = self._get_annotation_str(
+                record, "accession", record.id, just_first=True
+            )
+
+        if ";" in accession:
+            raise ValueError(
+                "Cannot have semi-colon in EMBL accession, '%s'" % accession
+            )
+        if " " in accession:
+            # This is out of practicality... might it be allowed?
+            raise ValueError("Cannot have spaces in EMBL accession, '%s'" % accession)
+
+        topology = self._get_annotation_str(record, "topology", default="")
+
+        # Get the molecule type
+        # TODO - record this explicitly in the parser?
+        # Note often get RNA vs DNA discrepancy in real EMBL/NCBI files
+        mol_type = record.annotations.get("molecule_type")
+        if mol_type is None:
+            raise ValueError("missing molecule_type in annotations")
+        if mol_type not in ("DNA", "RNA", "protein"):
+            warnings.warn("Non-standard molecule type: %s" % mol_type, BiopythonWarning)
+        mol_type_upper = mol_type.upper()
+        if "DNA" in mol_type_upper:
+            units = "BP"
+        elif "RNA" in mol_type_upper:
+            units = "BP"
+        elif "PROTEIN" in mol_type_upper:
+            mol_type = "PROTEIN"
+            units = "AA"
+        else:
+            raise ValueError("failed to understand molecule_type '%s'" % mol_type)
+
+        # Get the taxonomy division
+        division = self._get_data_division(record)
+
+        # TODO - Full ID line
+        handle = self.handle
+        # ID   <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.
+        # 1. Primary accession number
+        # 2. Sequence version number
+        # 3. Topology: 'circular' or 'linear'
+        # 4. Molecule type
+        # 5. Data class
+        # 6. Taxonomic division
+        # 7. Sequence length
+        self._write_single_line(
+            "ID",
+            "%s; %s; %s; %s; ; %s; %i %s."
+            % (accession, version, topology, mol_type, division, len(record), units),
+        )
+        handle.write("XX\n")
+        self._write_single_line("AC", accession + ";")
+        handle.write("XX\n")
+
+    @staticmethod
+    def _get_data_division(record):
+        try:
+            division = record.annotations["data_file_division"]
+        except KeyError:
+            division = "UNC"
+        if division in [
+            "PHG",
+            "ENV",
+            "FUN",
+            "HUM",
+            "INV",
+            "MAM",
+            "VRT",
+            "MUS",
+            "PLN",
+            "PRO",
+            "ROD",
+            "SYN",
+            "TGN",
+            "UNC",
+            "VRL",
+            "XXX",
+        ]:
+            # Good, already EMBL style
+            #    Division                 Code
+            #    -----------------        ----
+            #    Bacteriophage            PHG
+            #    Environmental Sample     ENV
+            #    Fungal                   FUN
+            #    Human                    HUM
+            #    Invertebrate             INV
+            #    Other Mammal             MAM
+            #    Other Vertebrate         VRT
+            #    Mus musculus             MUS
+            #    Plant                    PLN
+            #    Prokaryote               PRO
+            #    Other Rodent             ROD
+            #    Synthetic                SYN
+            #    Transgenic               TGN
+            #    Unclassified             UNC (i.e. unknown)
+            #    Viral                    VRL
+            #
+            # (plus XXX used for submitting data to EMBL)
+            pass
+        else:
+            # See if this is in GenBank style & can be converted.
+            # Generally a problem as the GenBank groups are wider
+            # than those of EMBL. Note that GenBank use "BCT" for
+            # both bacteria and archaea thus this maps to EMBL's
+            # "PRO" nicely.
+            gbk_to_embl = {"BCT": "PRO", "UNK": "UNC"}
+            try:
+                division = gbk_to_embl[division]
+            except KeyError:
+                division = "UNC"
+        assert len(division) == 3
+        return division
+
+    def _write_keywords(self, record):
+        # Put the keywords right after DE line.
+        # Each 'keyword' can have multiple words and spaces, but we
+        # must not split any 'keyword' between lines.
+        # TODO - Combine short keywords onto one line
+        for keyword in record.annotations["keywords"]:
+            self._write_single_line("KW", keyword)
+        self.handle.write("XX\n")
+
+    def _write_references(self, record):
+        # The order should be RN, RC, RP, RX, RG, RA, RT, RL
+        number = 0
+        for ref in record.annotations["references"]:
+            if not isinstance(ref, SeqFeature.Reference):
+                continue
+            number += 1
+            self._write_single_line("RN", "[%i]" % number)
+            # TODO - support for RC line (needed in parser too)
+            # TODO - support more complex record reference locations?
+            if ref.location and len(ref.location) == 1:
+                self._write_single_line(
+                    "RP",
+                    "%i-%i"
+                    % (ref.location[0].nofuzzy_start + 1, ref.location[0].nofuzzy_end),
+                )
+            # TODO - record any DOI or AGRICOLA identifier in the reference object?
+            if ref.pubmed_id:
+                self._write_single_line("RX", "PUBMED; %s." % ref.pubmed_id)
+            if ref.consrtm:
+                self._write_single_line("RG", "%s" % ref.consrtm)
+            if ref.authors:
+                # We store the AUTHORS data as a single string
+                self._write_multi_line("RA", ref.authors + ";")
+            if ref.title:
+                # We store the title as a single string
+                self._write_multi_line("RT", '"%s";' % ref.title)
+            if ref.journal:
+                # We store this as a single string - holds the journal name,
+                # volume, year, and page numbers of the citation
+                self._write_multi_line("RL", ref.journal)
+            self.handle.write("XX\n")
+
+    def _write_comment(self, record):
+        # This is a bit complicated due to the range of possible
+        # ways people might have done their annotation...
+        # Currently the parser uses a single string with newlines.
+        # A list of lines is also reasonable.
+        # A single (long) string is perhaps the most natural of all.
+        # This means we may need to deal with line wrapping.
+        comment = record.annotations["comment"]
+        if isinstance(comment, str):
+            lines = comment.split("\n")
+        elif isinstance(comment, (list, tuple)):
+            lines = comment
+        else:
+            raise ValueError("Could not understand comment annotation")
+        # TODO - Merge this with the GenBank comment code?
+        if not lines:
+            return
+        for line in lines:
+            self._write_multi_line("CC", line)
+        self.handle.write("XX\n")
+
+    def write_record(self, record):
+        """Write a single record to the output file."""
+        handle = self.handle
+        self._write_the_first_lines(record)
+
+        # PR line (0 or 1 lines only), project identifier
+        #
+        # Assuming can't use 2 lines, we should prefer newer GenBank
+        # DBLINK BioProject:... entries over the older GenBank DBLINK
+        # Project:... lines.
+        #
+        # In either case, seems EMBL uses just "PR    Project:..."
+        # regardless of the type of ID (old numeric only, or new
+        # with alpha prefix), e.g. for CP002497 NCBI now uses:
+        #
+        # DBLINK      BioProject: PRJNA60715
+        #             BioSample: SAMN03081426
+        #
+        # While EMBL uses:
+        #
+        # XX
+        # PR   Project:PRJNA60715;
+        # XX
+        #
+        # Sorting ensures (new) BioProject:... is before old Project:...
+        for xref in sorted(record.dbxrefs):
+            if xref.startswith("BioProject:"):
+                self._write_single_line("PR", xref[3:] + ";")
+                handle.write("XX\n")
+                break
+            if xref.startswith("Project:"):
+                self._write_single_line("PR", xref + ";")
+                handle.write("XX\n")
+                break
+
+        # TODO - DT lines (date)
+
+        descr = record.description
+        if descr == "<unknown description>":
+            descr = "."
+        self._write_multi_line("DE", descr)
+        handle.write("XX\n")
+
+        if "keywords" in record.annotations:
+            self._write_keywords(record)
+
+        # Should this be "source" or "organism"?
+        self._write_multi_line("OS", self._get_annotation_str(record, "organism"))
+        try:
+            # List of strings
+            taxonomy = "; ".join(record.annotations["taxonomy"]) + "."
+        except KeyError:
+            taxonomy = "."
+        self._write_multi_line("OC", taxonomy)
+        handle.write("XX\n")
+
+        if "references" in record.annotations:
+            self._write_references(record)
+
+        if "comment" in record.annotations:
+            self._write_comment(record)
+
+        handle.write(self.FEATURE_HEADER)
+        rec_length = len(record)
+        for feature in record.features:
+            self._write_feature(feature, rec_length)
+        handle.write("XX\n")
+
+        self._write_sequence(record)
+        handle.write("//\n")
+
+
+class ImgtWriter(EmblWriter):
+    """IMGT writer (EMBL format variant)."""
+
+    HEADER_WIDTH = 5
+    QUALIFIER_INDENT = 25  # Not 21 as in EMBL
+    QUALIFIER_INDENT_STR = "FT" + " " * (QUALIFIER_INDENT - 2)
+    QUALIFIER_INDENT_TMP = "FT   %s                    "  # 25 if %s is empty
+    FEATURE_HEADER = "FH   Key                 Location/Qualifiers\nFH\n"
+
+
+def _genbank_convert_fasta(in_file, out_file):
+    """Fast GenBank to FASTA (PRIVATE)."""
+    # We don't need to parse the features...
+    records = GenBankScanner().parse_records(in_file, do_features=False)
+    return SeqIO.write(records, out_file, "fasta")
+
+
+def _embl_convert_fasta(in_file, out_file):
+    """Fast EMBL to FASTA (PRIVATE)."""
+    # We don't need to parse the features...
+    records = EmblScanner().parse_records(in_file, do_features=False)
+    return SeqIO.write(records, out_file, "fasta")
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/SeqIO/Interfaces.py b/code/lib/Bio/SeqIO/Interfaces.py
new file mode 100644
index 0000000..e10b923
--- /dev/null
+++ b/code/lib/Bio/SeqIO/Interfaces.py
@@ -0,0 +1,376 @@
+# Copyright 2006-2013 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support module (not for general use).
+
+Unless you are writing a new parser or writer for Bio.SeqIO, you should not
+use this module.  It provides base classes to try and simplify things.
+"""
+import warnings
+
+from abc import ABC
+from abc import abstractmethod
+
+from Bio import BiopythonDeprecationWarning
+from Bio import StreamModeError
+from Bio.Seq import MutableSeq
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+
+class SequenceIterator(ABC):
+    """Base class for building SeqRecord iterators.
+
+    You should write a parse method that returns a SeqRecord generator.  You
+    may wish to redefine the __init__ method as well.
+    """
+
+    def __init__(self, source, alphabet=None, mode="t", fmt=None):
+        """Create a SequenceIterator object.
+
+        Arguments:
+        - source - input file stream, or path to input file
+        - alphabet - no longer used, should be None
+
+        This method MAY be overridden by any subclass.
+
+        Note when subclassing:
+        - there should be a single non-optional argument, the source.
+        - you do not have to require an alphabet.
+        - you can add additional optional arguments.
+        """
+        if alphabet is not None:
+            raise ValueError("The alphabet argument is no longer supported")
+        try:
+            self.stream = open(source, "r" + mode)
+            self.should_close_stream = True
+        except TypeError:  # not a path, assume we received a stream
+            if mode == "t":
+                if source.read(0) != "":
+                    raise StreamModeError(
+                        "%s files must be opened in text mode." % fmt
+                    ) from None
+            elif mode == "b":
+                if source.read(0) != b"":
+                    raise StreamModeError(
+                        "%s files must be opened in binary mode." % fmt
+                    ) from None
+            else:
+                raise ValueError("Unknown mode '%s'" % mode) from None
+            self.stream = source
+            self.should_close_stream = False
+        try:
+            self.records = self.parse(self.stream)
+        except Exception:
+            if self.should_close_stream:
+                self.stream.close()
+            raise
+
+    def __next__(self):
+        try:
+            return next(self.records)
+        except Exception:
+            if self.should_close_stream:
+                self.stream.close()
+            raise
+
+    def __iter__(self):
+        """Iterate over the entries as a SeqRecord objects.
+
+        Example usage for Fasta files::
+
+            with open("example.fasta","r") as myFile:
+                myFastaReader = FastaIterator(myFile)
+                for record in myFastaReader:
+                    print(record.id)
+                    print(record.seq)
+
+        This method SHOULD NOT be overridden by any subclass. It should be
+        left as is, which will call the subclass implementation of __next__
+        to actually parse the file.
+        """
+        return self
+
+    @abstractmethod
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord iterator."""
+
+
+def _get_seq_string(record):
+    """Use this to catch errors like the sequence being None (PRIVATE)."""
+    if not isinstance(record, SeqRecord):
+        raise TypeError("Expected a SeqRecord object")
+    if record.seq is None:
+        raise TypeError("SeqRecord (id=%s) has None for its sequence." % record.id)
+    elif not isinstance(record.seq, (Seq, MutableSeq)):
+        raise TypeError("SeqRecord (id=%s) has an invalid sequence." % record.id)
+    return str(record.seq)
+
+
+# Function variant of the SequenceWriter method.
+def _clean(text):
+    """Use this to avoid getting newlines in the output (PRIVATE)."""
+    return text.replace("\n", " ").replace("\r", " ")
+
+
+class SequenceWriter:
+    """Base class for sequence writers. This class should be subclassed.
+
+    It is intended for sequential file formats with an (optional)
+    header, repeated records, and an (optional) footer, as well
+    as for interlaced file formats such as Clustal.
+
+    The user may call the write_file() method to write a complete
+    file containing the sequences.
+
+    Alternatively, users may call the write_header(), followed
+    by multiple calls to write_record() and/or write_records(),
+    followed finally by write_footer().
+
+    Note that write_header() cannot require any assumptions about
+    the number of records.
+    """
+
+    def __init__(self, target, mode="w"):
+        """Create the writer object."""
+        if mode == "w":
+            try:
+                target.write("")
+            except TypeError:
+                # target was opened in binary mode
+                raise StreamModeError("File must be opened in text mode.") from None
+            except AttributeError:
+                # target is a path
+                handle = open(target, mode)
+            else:
+                handle = target
+        elif mode == "wb":
+            try:
+                target.write(b"")
+            except TypeError:
+                # target was opened in text mode
+                raise StreamModeError("File must be opened in binary mode.") from None
+            except AttributeError:
+                # target is a path
+                handle = open(target, mode)
+            else:
+                handle = target
+        else:
+            raise RuntimeError("Unknown mode '%s'" % mode)
+
+        self._target = target
+        self.handle = handle
+
+    def clean(self, text):
+        """Use this to avoid getting newlines in the output."""
+        return text.replace("\n", " ").replace("\r", " ")
+
+    def write_header(self):
+        """Write the file header to the output file."""
+        pass
+        ##################################################
+        # You MUST implement this method in the subclass #
+        # if the file format defines a file header.      #
+        ##################################################
+
+    def write_footer(self):
+        """Write the file footer to the output file."""
+        pass
+        ##################################################
+        # You MUST implement this method in the subclass #
+        # if the file format defines a file footer.      #
+        ##################################################
+
+    def write_record(self, record):
+        """Write a single record to the output file.
+
+        record - a SeqRecord object
+        """
+        raise NotImplementedError("This method should be implemented")
+        ##################################################
+        # You MUST implement this method in the subclass #
+        # for sequential file formats.                   #
+        ##################################################
+
+    def write_records(self, records, maxcount=None):
+        """Write records to the output file, and return the number of records.
+
+        records - A list or iterator returning SeqRecord objects
+        maxcount - The maximum number of records allowed by the
+        file format, or None if there is no maximum.
+        """
+        count = 0
+        if maxcount is None:
+            for record in records:
+                self.write_record(record)
+                count += 1
+        else:
+            for record in records:
+                if count == maxcount:
+                    if maxcount == 1:
+                        raise ValueError("More than one sequence found")
+                    else:
+                        raise ValueError(
+                            "Number of sequences is larger than %d" % maxcount
+                        )
+                self.write_record(record)
+                count += 1
+        return count
+
+    def write_file(self, records, mincount=0, maxcount=None):
+        """Write a complete file with the records, and return the number of records.
+
+        records - A list or iterator returning SeqRecord objects
+        """
+        ##################################################
+        # You MUST implement this method in the subclass #
+        # for interlaced file formats.                   #
+        ##################################################
+        try:
+            self.write_header()
+            count = self.write_records(records, maxcount)
+            self.write_footer()
+        finally:
+            if self.handle is not self._target:
+                self.handle.close()
+        if count < mincount:
+            if mincount == 1:  # Common case
+                raise ValueError("Must have one sequence")
+            elif mincount == maxcount:
+                raise ValueError(
+                    "Number of sequences is %d (expected %d)" % (count, mincount)
+                )
+            else:
+                raise ValueError(
+                    "Number of sequences is %d (expected at least %d)"
+                    % (count, mincount)
+                )
+        return count
+
+
+class SequentialSequenceWriter(SequenceWriter):
+    """Base class for sequential sequence writers (DEPRECATED).
+
+    This class should be subclassed. It is no longer used.
+    It was intended for sequential file formats with an (optional)
+    header, repeated records, and an (optional) footer. It would
+    enforce callign the methods in appropriate order. To update
+    code using ``SequentialSequenceWriter``, just subclass
+    ``SequenceWriter`` and drop the ``._header_written`` etc
+    checks (or reimplement them).
+
+    In this case (as with interlaced file formats), the user may
+    simply call the write_file() method and be done.
+
+    However, they may also call the write_header(), followed
+    by multiple calls to write_record() and/or write_records()
+    followed finally by write_footer().
+
+    Users must call write_header() and write_footer() even when
+    the file format concerned doesn't have a header or footer.
+    This is to try and make life as easy as possible when
+    switching the output format.
+
+    Note that write_header() cannot require any assumptions about
+    the number of records.
+    """
+
+    def __init__(self, target, mode="w"):
+        """Initialize the class."""
+        super().__init__(target, mode)
+        self._header_written = False
+        self._record_written = False
+        self._footer_written = False
+        warnings.warn(
+            "SequentialSequenceWriter has been deprecated, any class "
+            "subclassing it will need to subclass SequenceWriter instead.",
+            BiopythonDeprecationWarning,
+        )
+
+    def write_header(self):
+        """Write the file header.
+
+        If your file format defines a header, you should implement this method
+        in order to write the header before any of the records.
+
+        The default implementation checks the private attribute ._header_written
+        to ensure the header is only written once.
+        """
+        assert not self._header_written, "You have aleady called write_header()"
+        assert (
+            not self._record_written
+        ), "You have aleady called write_record() or write_records()"
+        assert not self._footer_written, "You have aleady called write_footer()"
+        self._header_written = True
+
+    def write_footer(self):
+        """Write the file footer.
+
+        If your file format defines a footer, you should implement this method
+        in order to write the footer after all the records.
+
+        The default implementation checks the private attribute ._footer_written
+        to ensure the footer is only written once.
+        """
+        assert self._header_written, "You must call write_header() first"
+        assert (
+            self._record_written
+        ), "You have not called write_record() or write_records() yet"
+        assert not self._footer_written, "You have aleady called write_footer()"
+        self._footer_written = True
+
+    def write_record(self, record):
+        """Write a single record to the output file.
+
+        record - a SeqRecord object
+
+        Once you have called write_header() you can call write_record()
+        and/or write_records() as many times as needed.  Then call
+        write_footer() and close().
+        """
+        assert self._header_written, "You must call write_header() first"
+        assert not self._footer_written, "You have already called write_footer()"
+        self._record_written = True
+        raise NotImplementedError("This object should be subclassed")
+
+    def write_records(self, records):
+        """Write multiple record to the output file.
+
+        records - A list or iterator returning SeqRecord objects
+
+        Once you have called write_header() you can call write_record()
+        and/or write_records() as many times as needed.  Then call
+        write_footer() and close().
+
+        Returns the number of records written.
+        """
+        # Default implementation:
+        assert self._header_written, "You must call write_header() first"
+        assert not self._footer_written, "You have already called write_footer()"
+        count = 0
+        for record in records:
+            self.write_record(record)
+            count += 1
+        # Mark as true, even if there where no records
+        self._record_written = True
+        return count
+
+    def write_file(self, records):
+        """Use this to write an entire file containing the given records.
+
+        records - A list or iterator returning SeqRecord objects
+
+        This method can only be called once.  Returns the number of records
+        written.
+        """
+        try:
+            self.write_header()
+            count = self.write_records(records)
+            self.write_footer()
+        finally:
+            if self.handle is not self._target:
+                self.handle.close()
+        return count
diff --git a/code/lib/Bio/SeqIO/NibIO.py b/code/lib/Bio/SeqIO/NibIO.py
new file mode 100644
index 0000000..8c6e84c
--- /dev/null
+++ b/code/lib/Bio/SeqIO/NibIO.py
@@ -0,0 +1,170 @@
+# Copyright 2019 by Michiel de Hoon.  All rights reserved.
+# Based on code contributed and copyright 2016 by Peter Cock.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the UCSC nib file format.
+
+Nib stands for nibble (4 bit) representation of nucleotide sequences.
+The two nibbles in a byte each store one nucleotide, represented numerically
+as follows:
+
+    - ``0`` - T
+    - ``1`` - C
+    - ``2`` - A
+    - ``3`` - G
+    - ``4`` - N (unknown)
+
+As the first bit in a nibble is set if the nucleotide is soft-masked, we
+additionally have:
+
+    - ``8`` - t
+    - ``9`` - c
+    - ``a`` - a
+    - ``b`` - g
+    - ``c`` - n (unknown)
+
+A nib file contains only one sequence record.
+You are expected to use this module via the Bio.SeqIO functions under
+the format name "nib":
+
+    >>> from Bio import SeqIO
+    >>> record = SeqIO.read("Nib/test_even_bigendian.nib", "nib")
+    >>> print("%i %s..." % (len(record), record.seq[:20]))
+    50 nAGAAGagccgcNGgCActt...
+
+For detailed information on the file format, please see the UCSC
+description at https://genome.ucsc.edu/FAQ/FAQformat.html.
+"""
+import binascii
+import struct
+import sys
+
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import SequenceIterator
+from .Interfaces import SequenceWriter
+
+
+class NibIterator(SequenceIterator):
+    """Parser for nib files."""
+
+    def __init__(self, source):
+        """Iterate over a nib file and yield a SeqRecord.
+
+            - source - a file-like object or a path to a file in the nib file
+              format as defined by UCSC; the file must be opened in binary mode.
+
+        Note that a nib file always contains only one sequence record.
+        The sequence of the resulting SeqRecord object should match the sequence
+        generated by Jim Kent's nibFrag utility run with the -masked option.
+
+        This function is used internally via the Bio.SeqIO functions:
+
+        >>> from Bio import SeqIO
+        >>> record = SeqIO.read("Nib/test_even_bigendian.nib", "nib")
+        >>> print("%s %i" % (record.seq, len(record)))
+        nAGAAGagccgcNGgCActtGAnTAtCGTCgcCacCaGncGncTtGNtGG 50
+
+        You can also call it directly:
+
+        >>> with open("Nib/test_even_bigendian.nib", "rb") as handle:
+        ...     for record in NibIterator(handle):
+        ...         print("%s %i" % (record.seq, len(record)))
+        ...
+        nAGAAGagccgcNGgCActtGAnTAtCGTCgcCacCaGncGncTtGNtGG 50
+
+        """
+        super().__init__(source, mode="b", fmt="Nib")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        word = handle.read(4)
+        if not word:
+            raise ValueError("Empty file.")
+        signature = word.hex()
+        if signature == "3a3de96b":
+            byteorder = "little"  # little-endian
+        elif signature == "6be93d3a":
+            byteorder = "big"  # big-endian
+        else:
+            raise ValueError("unexpected signature in nib header")
+        records = self.iterate(handle, byteorder)
+        return records
+
+    def iterate(self, handle, byteorder):
+        """Iterate over the records in the nib file."""
+        number = handle.read(4)
+        length = int.from_bytes(number, byteorder)
+        data = handle.read()
+        indices = binascii.hexlify(data)
+        if length % 2 == 0:
+            if len(indices) != length:
+                raise ValueError("Unexpected file size")
+        elif length % 2 == 1:
+            if len(indices) != length + 1:
+                raise ValueError("Unexpected file size")
+            indices = indices[:length]
+        if not set(indices).issubset(b"0123489abc"):
+            raise ValueError("Unexpected sequence data found in file")
+        table = bytes.maketrans(b"0123489abc", b"TCAGNtcagn")
+        nucleotides = indices.translate(table)
+        sequence = Seq(nucleotides)
+        record = SeqRecord(sequence)
+        yield record
+
+
+class NibWriter(SequenceWriter):
+    """Nib file writer."""
+
+    def __init__(self, target):
+        """Initialize a Nib writer object.
+
+        Arguments:
+         - target - output stream opened in binary mode, or a path to a file
+
+        """
+        super().__init__(target, mode="wb")
+
+    def write_header(self):
+        """Write the file header."""
+        super().write_header()
+        handle = self.handle
+        byteorder = sys.byteorder
+        if byteorder == "little":  # little-endian
+            signature = "3a3de96b"
+        elif byteorder == "big":  # big-endian
+            signature = "6be93d3a"
+        else:
+            raise RuntimeError("unexpected system byte order %s" % byteorder)
+        handle.write(bytes.fromhex(signature))
+
+    def write_record(self, record):
+        """Write a single record to the output file."""
+        handle = self.handle
+        sequence = record.seq
+        nucleotides = bytes(sequence)
+        length = len(sequence)
+        handle.write(struct.pack("i", length))
+        table = bytes.maketrans(b"TCAGNtcagn", b"0123489abc")
+        padding = length % 2
+        suffix = padding * b"T"
+        nucleotides += suffix
+        if not set(nucleotides).issubset(b"ACGTNacgtn"):
+            raise ValueError("Sequence should contain A,C,G,T,N,a,c,g,t,n only")
+        indices = nucleotides.translate(table)
+        handle.write(binascii.unhexlify(indices))
+
+    def write_file(self, records):
+        """Write the complete file with the records, and return the number of records."""
+        count = super().write_file(records, mincount=1, maxcount=1)
+        return count
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/SeqIO/PdbIO.py b/code/lib/Bio/SeqIO/PdbIO.py
new file mode 100644
index 0000000..a5f0bf2
--- /dev/null
+++ b/code/lib/Bio/SeqIO/PdbIO.py
@@ -0,0 +1,515 @@
+# Copyright 2012 by Eric Talevich.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for accessing sequences in PDB and mmCIF files."""
+import collections
+import warnings
+
+from Bio import BiopythonParserWarning
+from Bio.Data.IUPACData import protein_letters_3to1_extended as iupac_3to1_ext
+from Bio.Data.SCOPData import protein_letters_3to1 as scop_3to1
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import SequenceIterator
+
+
+_aa3to1_dict = {}
+_aa3to1_dict.update(iupac_3to1_ext)
+_aa3to1_dict.update(scop_3to1)
+
+
+def _res2aacode(residue, undef_code="X"):
+    """Return the one-letter amino acid code from the residue name.
+
+    Non-amino acid are returned as "X".
+    """
+    if isinstance(residue, str):
+        return _aa3to1_dict.get(residue, undef_code)
+
+    return _aa3to1_dict.get(residue.resname, undef_code)
+
+
+def AtomIterator(pdb_id, structure):
+    """Return SeqRecords from Structure objects.
+
+    Base function for sequence parsers that read structures Bio.PDB parsers.
+
+    Once a parser from Bio.PDB has been used to load a structure into a
+    Bio.PDB.Structure.Structure object, there is no difference in how the
+    sequence parser interprets the residue sequence. The functions in this
+    module may be used by SeqIO modules wishing to parse sequences from lists
+    of residues.
+
+    Calling funtions must pass a Bio.PDB.Structure.Structure object.
+
+
+    See Bio.SeqIO.PdbIO.PdbAtomIterator and Bio.SeqIO.PdbIO.CifAtomIterator for
+    details.
+    """
+    model = structure[0]
+    for chn_id, chain in sorted(model.child_dict.items()):
+        # HETATM mod. res. policy: remove mod if in sequence, else discard
+        residues = [
+            res
+            for res in chain.get_unpacked_list()
+            if _res2aacode(res.get_resname().upper()) != "X"
+        ]
+        if not residues:
+            continue
+        # Identify missing residues in the structure
+        # (fill the sequence with 'X' residues in these regions)
+        gaps = []
+        rnumbers = [r.id[1] for r in residues]
+        for i, rnum in enumerate(rnumbers[:-1]):
+            if rnumbers[i + 1] != rnum + 1 and rnumbers[i + 1] != rnum:
+                # It's a gap!
+                gaps.append((i + 1, rnum, rnumbers[i + 1]))
+        if gaps:
+            res_out = []
+            prev_idx = 0
+            for i, pregap, postgap in gaps:
+                if postgap > pregap:
+                    gapsize = postgap - pregap - 1
+                    res_out.extend(_res2aacode(x) for x in residues[prev_idx:i])
+                    prev_idx = i
+                    res_out.append("X" * gapsize)
+                else:
+                    warnings.warn(
+                        "Ignoring out-of-order residues after a gap",
+                        BiopythonParserWarning,
+                    )
+                    # Keep the normal part, drop the out-of-order segment
+                    # (presumably modified or hetatm residues, e.g. 3BEG)
+                    res_out.extend(_res2aacode(x) for x in residues[prev_idx:i])
+                    break
+            else:
+                # Last segment
+                res_out.extend(_res2aacode(x) for x in residues[prev_idx:])
+        else:
+            # No gaps
+            res_out = [_res2aacode(x) for x in residues]
+        record_id = "%s:%s" % (pdb_id, chn_id)
+        # ENH - model number in SeqRecord id if multiple models?
+        # id = "Chain%s" % str(chain.id)
+        # if len(structure) > 1 :
+        #     id = ("Model%s|" % str(model.id)) + id
+
+        record = SeqRecord(Seq("".join(res_out)), id=record_id, description=record_id)
+        # TODO: Test PDB files with DNA and RNA too:
+        record.annotations["molecule_type"] = "protein"
+
+        record.annotations["model"] = model.id
+        record.annotations["chain"] = chain.id
+
+        record.annotations["start"] = int(rnumbers[0])
+        record.annotations["end"] = int(rnumbers[-1])
+        yield record
+
+
+class PdbSeqresIterator(SequenceIterator):
+    """Parser for PDB files."""
+
+    def __init__(self, source):
+        """Return SeqRecord objects for each chain in a PDB file.
+
+        Arguments:
+         - source - input stream opened in text mode, or a path to a file
+
+        The sequences are derived from the SEQRES lines in the
+        PDB file header, not the atoms of the 3D structure.
+
+        Specifically, these PDB records are handled: DBREF, SEQADV, SEQRES, MODRES
+
+        See: http://www.wwpdb.org/documentation/format23/sect3.html
+
+        This gets called internally via Bio.SeqIO for the SEQRES based interpretation
+        of the PDB file format:
+
+        >>> from Bio import SeqIO
+        >>> for record in SeqIO.parse("PDB/1A8O.pdb", "pdb-seqres"):
+        ...     print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
+        ...     print(record.dbxrefs)
+        ...
+        Record id 1A8O:A, chain A
+        ['UNP:P12497', 'UNP:POL_HV1N5']
+
+        Equivalently,
+
+        >>> with open("PDB/1A8O.pdb") as handle:
+        ...     for record in PdbSeqresIterator(handle):
+        ...         print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
+        ...         print(record.dbxrefs)
+        ...
+        Record id 1A8O:A, chain A
+        ['UNP:P12497', 'UNP:POL_HV1N5']
+
+        Note the chain is recorded in the annotations dictionary, and any PDB DBREF
+        lines are recorded in the database cross-references list.
+        """
+        super().__init__(source, mode="t", fmt="PDB")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        records = self.iterate(handle)
+        return records
+
+    def iterate(self, handle):
+        """Iterate over the records in the PDB file."""
+        chains = collections.defaultdict(list)
+        metadata = collections.defaultdict(list)
+
+        rec_name = None
+        for line in handle:
+            rec_name = line[0:6].strip()
+            if rec_name == "SEQRES":
+                # NB: We only actually need chain ID and the residues here;
+                # commented bits are placeholders from the wwPDB spec.
+                # Serial number of the SEQRES record for the current chain.
+                # Starts at 1 and increments by one each line.
+                # Reset to 1 for each chain.
+                # ser_num = int(line[8:10])
+                # Chain identifier. This may be any single legal character,
+                # including a blank which is used if there is only one chain.
+                chn_id = line[11]
+                # Number of residues in the chain (repeated on every record)
+                # num_res = int(line[13:17])
+                residues = [_res2aacode(res) for res in line[19:].split()]
+                chains[chn_id].extend(residues)
+            elif rec_name == "DBREF":
+                #  ID code of this entry (PDB ID)
+                pdb_id = line[7:11]
+                # Chain identifier.
+                chn_id = line[12]
+                # Initial sequence number of the PDB sequence segment.
+                # seq_begin = int(line[14:18])
+                # Initial insertion code of the PDB sequence segment.
+                # icode_begin = line[18]
+                # Ending sequence number of the PDB sequence segment.
+                # seq_end = int(line[20:24])
+                # Ending insertion code of the PDB sequence segment.
+                # icode_end = line[24]
+                # Sequence database name.
+                database = line[26:32].strip()
+                # Sequence database accession code.
+                db_acc = line[33:41].strip()
+                # Sequence database identification code.
+                db_id_code = line[42:54].strip()
+                # Initial sequence number of the database seqment.
+                # db_seq_begin = int(line[55:60])
+                # Insertion code of initial residue of the segment, if PDB is the
+                # reference.
+                # db_icode_begin = line[60]
+                # Ending sequence number of the database segment.
+                # db_seq_end = int(line[62:67])
+                # Insertion code of the ending residue of the segment, if PDB is the
+                # reference.
+                # db_icode_end = line[67]
+                metadata[chn_id].append(
+                    {
+                        "pdb_id": pdb_id,
+                        "database": database,
+                        "db_acc": db_acc,
+                        "db_id_code": db_id_code,
+                    }
+                )
+            # ENH: 'SEQADV' 'MODRES'
+
+        if rec_name is None:
+            raise ValueError("Empty file.")
+
+        for chn_id, residues in sorted(chains.items()):
+            record = SeqRecord(Seq("".join(residues)))
+            record.annotations = {"chain": chn_id}
+            # TODO: Test PDB files with DNA and RNA too:
+            record.annotations["molecule_type"] = "protein"
+            if chn_id in metadata:
+                m = metadata[chn_id][0]
+                record.id = record.name = "%s:%s" % (m["pdb_id"], chn_id)
+                record.description = "%s:%s %s" % (
+                    m["database"],
+                    m["db_acc"],
+                    m["db_id_code"],
+                )
+                for melem in metadata[chn_id]:
+                    record.dbxrefs.extend(
+                        [
+                            "%s:%s" % (melem["database"], melem["db_acc"]),
+                            "%s:%s" % (melem["database"], melem["db_id_code"]),
+                        ]
+                    )
+            else:
+                record.id = chn_id
+            yield record
+
+
+def PdbAtomIterator(source):
+    """Return SeqRecord objects for each chain in a PDB file.
+
+    Argument source is a file-like object or a path to a file.
+
+    The sequences are derived from the 3D structure (ATOM records), not the
+    SEQRES lines in the PDB file header.
+
+    Unrecognised three letter amino acid codes (e.g. "CSD") from HETATM entries
+    are converted to "X" in the sequence.
+
+    In addition to information from the PDB header (which is the same for all
+    records), the following chain specific information is placed in the
+    annotation:
+
+    record.annotations["residues"] = List of residue ID strings
+    record.annotations["chain"] = Chain ID (typically A, B ,...)
+    record.annotations["model"] = Model ID (typically zero)
+
+    Where amino acids are missing from the structure, as indicated by residue
+    numbering, the sequence is filled in with 'X' characters to match the size
+    of the missing region, and  None is included as the corresponding entry in
+    the list record.annotations["residues"].
+
+    This function uses the Bio.PDB module to do most of the hard work. The
+    annotation information could be improved but this extra parsing should be
+    done in parse_pdb_header, not this module.
+
+    This gets called internally via Bio.SeqIO for the atom based interpretation
+    of the PDB file format:
+
+    >>> from Bio import SeqIO
+    >>> for record in SeqIO.parse("PDB/1A8O.pdb", "pdb-atom"):
+    ...     print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
+    ...
+    Record id 1A8O:A, chain A
+
+    Equivalently,
+
+    >>> with open("PDB/1A8O.pdb") as handle:
+    ...     for record in PdbAtomIterator(handle):
+    ...         print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
+    ...
+    Record id 1A8O:A, chain A
+
+    """
+    # TODO - Add record.annotations to the doctest, esp the residues (not working?)
+
+    # Only import PDB when needed, to avoid/delay NumPy dependency in SeqIO
+    from Bio.PDB import PDBParser
+
+    structure = PDBParser().get_structure(None, source)
+    pdb_id = structure.header["idcode"]
+    if not pdb_id:
+        warnings.warn(
+            "'HEADER' line not found; can't determine PDB ID.", BiopythonParserWarning
+        )
+        pdb_id = "????"
+
+    for record in AtomIterator(pdb_id, structure):
+        # The PDB header was loaded as a dictionary, so let's reuse it all
+        record.annotations.update(structure.header)
+
+        # ENH - add letter annotations -- per-residue info, e.g. numbers
+
+        yield record
+
+
+PDBX_POLY_SEQ_SCHEME_FIELDS = (
+    "_pdbx_poly_seq_scheme.asym_id",  # Chain ID
+    "_pdbx_poly_seq_scheme.mon_id",  # Residue type
+)
+
+STRUCT_REF_FIELDS = (
+    "_struct_ref.id",  # ID of this reference
+    "_struct_ref.db_name",  # Name of the database
+    "_struct_ref.db_code",  # Code for this entity
+    "_struct_ref.pdbx_db_accession",  # DB accession ID of ref
+)
+
+STRUCT_REF_SEQ_FIELDS = (
+    "_struct_ref_seq.ref_id",  # Pointer to _struct_ref
+    "_struct_ref_seq.pdbx_PDB_id_code",  # PDB ID of this structure
+    "_struct_ref_seq.pdbx_strand_id",  # Chain ID of the reference
+)
+
+
+def CifSeqresIterator(source):
+    """Return SeqRecord objects for each chain in an mmCIF file.
+
+    Argument source is a file-like object or a path to a file.
+
+    The sequences are derived from the _entity_poly_seq entries in the mmCIF
+    file, not the atoms of the 3D structure.
+
+    Specifically, these mmCIF records are handled: _pdbx_poly_seq_scheme and
+    _struct_ref_seq. The _pdbx_poly_seq records contain sequence information,
+    and the _struct_ref_seq records contain database cross-references.
+
+    See:
+    http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v40.dic/Categories/pdbx_poly_seq_scheme.html
+    and
+    http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/struct_ref_seq.html
+
+    This gets called internally via Bio.SeqIO for the sequence-based
+    interpretation of the mmCIF file format:
+
+    >>> from Bio import SeqIO
+    >>> for record in SeqIO.parse("PDB/1A8O.cif", "cif-seqres"):
+    ...     print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
+    ...     print(record.dbxrefs)
+    ...
+    Record id 1A8O:A, chain A
+    ['UNP:P12497', 'UNP:POL_HV1N5']
+
+    Equivalently,
+
+    >>> with open("PDB/1A8O.cif") as handle:
+    ...     for record in CifSeqresIterator(handle):
+    ...         print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
+    ...         print(record.dbxrefs)
+    ...
+    Record id 1A8O:A, chain A
+    ['UNP:P12497', 'UNP:POL_HV1N5']
+
+    Note the chain is recorded in the annotations dictionary, and any mmCIF
+    _struct_ref_seq entries are recorded in the database cross-references list.
+    """
+    # Only import PDB when needed, to avoid/delay NumPy dependency in SeqIO
+    from Bio.PDB.MMCIF2Dict import MMCIF2Dict
+
+    chains = collections.defaultdict(list)
+    metadata = collections.defaultdict(list)
+    records = MMCIF2Dict(source)
+
+    # Explicitly convert records to list (See #1533).
+    # If an item is not present, use an empty list
+    for field in (
+        PDBX_POLY_SEQ_SCHEME_FIELDS + STRUCT_REF_SEQ_FIELDS + STRUCT_REF_FIELDS
+    ):
+        if field not in records:
+            records[field] = []
+        elif not isinstance(records[field], list):
+            records[field] = [records[field]]
+
+    for asym_id, mon_id in zip(
+        records["_pdbx_poly_seq_scheme.asym_id"],
+        records["_pdbx_poly_seq_scheme.mon_id"],
+    ):
+        mon_id_1l = _res2aacode(mon_id)
+        chains[asym_id].append(mon_id_1l)
+
+    # Build a dict of _struct_ref records, indexed by the id field:
+    struct_refs = {}
+    for ref_id, db_name, db_code, db_acc in zip(
+        records["_struct_ref.id"],
+        records["_struct_ref.db_name"],
+        records["_struct_ref.db_code"],
+        records["_struct_ref.pdbx_db_accession"],
+    ):
+        struct_refs[ref_id] = {
+            "database": db_name,
+            "db_id_code": db_code,
+            "db_acc": db_acc,
+        }
+
+    # Look through _struct_ref_seq records, look up the corresponding
+    # _struct_ref and add an entry to the metadata list for this chain.
+    for ref_id, pdb_id, chain_id in zip(
+        records["_struct_ref_seq.ref_id"],
+        records["_struct_ref_seq.pdbx_PDB_id_code"],
+        records["_struct_ref_seq.pdbx_strand_id"],
+    ):
+        struct_ref = struct_refs[ref_id]
+
+        # The names here mirror those in PdbIO
+        metadata[chain_id].append({"pdb_id": pdb_id})
+        metadata[chain_id][-1].update(struct_ref)
+
+    for chn_id, residues in sorted(chains.items()):
+        record = SeqRecord(Seq("".join(residues)))
+        record.annotations = {"chain": chn_id}
+        # TODO: Test PDB files with DNA and RNA too:
+        record.annotations["molecule_type"] = "protein"
+        if chn_id in metadata:
+            m = metadata[chn_id][0]
+            record.id = record.name = "%s:%s" % (m["pdb_id"], chn_id)
+            record.description = "%s:%s %s" % (
+                m["database"],
+                m["db_acc"],
+                m["db_id_code"],
+            )
+            for melem in metadata[chn_id]:
+                record.dbxrefs.extend(
+                    [
+                        "%s:%s" % (melem["database"], melem["db_acc"]),
+                        "%s:%s" % (melem["database"], melem["db_id_code"]),
+                    ]
+                )
+        else:
+            record.id = chn_id
+        yield record
+
+
+def CifAtomIterator(source):
+    """Return SeqRecord objects for each chain in an mmCIF file.
+
+    Argument source is a file-like object or a path to a file.
+
+    The sequences are derived from the 3D structure (_atom_site.* fields)
+    in the mmCIF file.
+
+    Unrecognised three letter amino acid codes (e.g. "CSD") from HETATM entries
+    are converted to "X" in the sequence.
+
+    In addition to information from the PDB header (which is the same for all
+    records), the following chain specific information is placed in the
+    annotation:
+
+    record.annotations["residues"] = List of residue ID strings
+    record.annotations["chain"] = Chain ID (typically A, B ,...)
+    record.annotations["model"] = Model ID (typically zero)
+
+    Where amino acids are missing from the structure, as indicated by residue
+    numbering, the sequence is filled in with 'X' characters to match the size
+    of the missing region, and  None is included as the corresponding entry in
+    the list record.annotations["residues"].
+
+    This function uses the Bio.PDB module to do most of the hard work. The
+    annotation information could be improved but this extra parsing should be
+    done in parse_pdb_header, not this module.
+
+    This gets called internally via Bio.SeqIO for the atom based interpretation
+    of the PDB file format:
+
+    >>> from Bio import SeqIO
+    >>> for record in SeqIO.parse("PDB/1A8O.cif", "cif-atom"):
+    ...     print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
+    ...
+    Record id 1A8O:A, chain A
+
+    Equivalently,
+
+    >>> with open("PDB/1A8O.cif") as handle:
+    ...     for record in CifAtomIterator(handle):
+    ...         print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
+    ...
+    Record id 1A8O:A, chain A
+
+    """
+    # TODO - Add record.annotations to the doctest, esp the residues (not working?)
+
+    # Only import parser when needed, to avoid/delay NumPy dependency in SeqIO
+    from Bio.PDB.MMCIFParser import MMCIFParser
+
+    structure = MMCIFParser().get_structure(None, source)
+    pdb_id = structure.header["idcode"]
+    if not pdb_id:
+        warnings.warn("Could not determine the PDB ID.", BiopythonParserWarning)
+        pdb_id = "????"
+    yield from AtomIterator(pdb_id, structure)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/SeqIO/PhdIO.py b/code/lib/Bio/SeqIO/PhdIO.py
new file mode 100644
index 0000000..4e97704
--- /dev/null
+++ b/code/lib/Bio/SeqIO/PhdIO.py
@@ -0,0 +1,158 @@
+# Copyright 2008-2016 by Peter Cock.  All rights reserved.
+# Revisions copyright 2009 by Cymon J. Cox.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the "phd" file format.
+
+PHD files are output by PHRED and used by PHRAP and CONSED.
+
+You are expected to use this module via the Bio.SeqIO functions, under the
+format name "phd". See also the underlying Bio.Sequencing.Phd module.
+
+For example, using Bio.SeqIO we can read in one of the example PHRED files
+from the Biopython unit tests:
+
+    >>> from Bio import SeqIO
+    >>> for record in SeqIO.parse("Phd/phd1", "phd"):
+    ...     print(record.id)
+    ...     print("%s..." % record.seq[:10])
+    ...     print("%s..." % record.letter_annotations["phred_quality"][:10])
+    34_222_(80-A03-19).b.ab1
+    ctccgtcgga...
+    [9, 9, 10, 19, 22, 37, 28, 28, 24, 22]...
+    425_103_(81-A03-19).g.ab1
+    cgggatccca...
+    [14, 17, 22, 10, 10, 10, 15, 8, 8, 9]...
+    425_7_(71-A03-19).b.ab1
+    acataaatca...
+    [10, 10, 10, 10, 8, 8, 6, 6, 6, 6]...
+
+Since PHRED files contain quality scores, you can save them as FASTQ or as
+QUAL files, for example using Bio.SeqIO.write(...), or simply with the format
+method of the SeqRecord object:
+
+    >>> print(record[:50].format("fastq"))
+    @425_7_(71-A03-19).b.ab1
+    acataaatcaaattactnaccaacacacaaaccngtctcgcgtagtggag
+    +
+    ++++))'''')(''')$!$''')''''(+.''$!$))))+)))'''''''
+    <BLANKLINE>
+
+Or,
+
+    >>> print(record[:50].format("qual"))
+    >425_7_(71-A03-19).b.ab1
+    10 10 10 10 8 8 6 6 6 6 8 7 6 6 6 8 3 0 3 6 6 6 8 6 6 6 6 7
+    10 13 6 6 3 0 3 8 8 8 8 10 8 8 8 6 6 6 6 6 6 6
+    <BLANKLINE>
+
+Note these examples only show the first 50 bases to keep the output short.
+"""
+from Bio.SeqIO import QualityIO
+from Bio.SeqRecord import SeqRecord
+from Bio.Sequencing import Phd
+
+from .Interfaces import SequenceWriter
+
+
+def PhdIterator(source):
+    """Return SeqRecord objects from a PHD file.
+
+    Arguments:
+     - source - input stream opened in text mode, or a path to a file
+
+    This uses the Bio.Sequencing.Phd module to do the hard work.
+    """
+    phd_records = Phd.parse(source)
+    for phd_record in phd_records:
+        # Convert the PHY record into a SeqRecord...
+        # The "filename" can contain spaces, e.g. 'HWI-EAS94_4_1_1_602_99 1'
+        # from unit test example file phd_solexa.
+        # This will cause problems if used as the record identifier
+        # (e.g. output for FASTQ format).
+        name = phd_record.file_name.split(None, 1)[0]
+        seq_record = SeqRecord(
+            phd_record.seq, id=name, name=name, description=phd_record.file_name
+        )
+        # Just re-use the comments dictionary as the SeqRecord's annotations
+        seq_record.annotations = phd_record.comments
+        seq_record.annotations["molecule_type"] = "DNA"
+        # And store the qualities and peak locations as per-letter-annotation
+        seq_record.letter_annotations["phred_quality"] = [
+            int(site[1]) for site in phd_record.sites
+        ]
+        try:
+            seq_record.letter_annotations["peak_location"] = [
+                int(site[2]) for site in phd_record.sites
+            ]
+        except IndexError:
+            # peak locations are not always there according to
+            # David Gordon (the Consed author)
+            pass
+        yield seq_record
+    # All done
+
+
+class PhdWriter(SequenceWriter):
+    """Class to write Phd format files."""
+
+    def __init__(self, handle):
+        """Initialize the class."""
+        super().__init__(handle)
+
+    def write_record(self, record):
+        """Write a single Phd record to the file."""
+        assert record.seq, "No sequence present in SeqRecord"
+        # This method returns the 'phred_quality' scores or converted
+        # 'solexa_quality' scores if present, else raises a value error
+        phred_qualities = QualityIO._get_phred_quality(record)
+        peak_locations = record.letter_annotations.get("peak_location")
+        if len(record.seq) != len(phred_qualities):
+            raise ValueError(
+                "Number of phd quality scores does not match length of sequence"
+            )
+        if peak_locations:
+            if len(record.seq) != len(peak_locations):
+                raise ValueError(
+                    "Number of peak location scores does not "
+                    "match length of sequence"
+                )
+        if None in phred_qualities:
+            raise ValueError("A quality value of None was found")
+        if record.description.startswith("%s " % record.id):
+            title = record.description
+        else:
+            title = "%s %s" % (record.id, record.description)
+        self.handle.write("BEGIN_SEQUENCE %s\nBEGIN_COMMENT\n" % self.clean(title))
+        for annot in [k.lower() for k in Phd.CKEYWORDS]:
+            value = None
+            if annot == "trim":
+                if record.annotations.get("trim"):
+                    value = "%s %s %.4f" % record.annotations["trim"]
+            elif annot == "trace_peak_area_ratio":
+                if record.annotations.get("trace_peak_area_ratio"):
+                    value = "%.4f" % record.annotations["trace_peak_area_ratio"]
+            else:
+                value = record.annotations.get(annot)
+            if value or value == 0:
+                self.handle.write("%s: %s\n" % (annot.upper(), value))
+
+        self.handle.write("END_COMMENT\nBEGIN_DNA\n")
+        for i, site in enumerate(record.seq):
+            if peak_locations:
+                self.handle.write(
+                    "%s %i %i\n" % (site, round(phred_qualities[i]), peak_locations[i])
+                )
+            else:
+                self.handle.write("%s %i\n" % (site, round(phred_qualities[i])))
+
+        self.handle.write("END_DNA\nEND_SEQUENCE\n")
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SeqIO/PirIO.py b/code/lib/Bio/SeqIO/PirIO.py
new file mode 100644
index 0000000..7f3ae07
--- /dev/null
+++ b/code/lib/Bio/SeqIO/PirIO.py
@@ -0,0 +1,292 @@
+# Copyright 2008-2015 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the "pir" (aka PIR or NBRF) file format.
+
+This module is for reading and writing PIR or NBRF format files as
+SeqRecord objects.
+
+You are expected to use this module via the Bio.SeqIO functions, or if
+the file contains a sequence alignment, optionally via Bio.AlignIO instead.
+
+This format was introduced for the Protein Information Resource (PIR), a
+project of the National Biomedical Research Foundation (NBRF).  The PIR
+database itself is now part of UniProt.
+
+The file format is described online at:
+http://www.ebi.ac.uk/help/pir_frame.html
+http://www.cmbi.kun.nl/bioinf/tools/crab_pir.html (currently down)
+
+An example file in this format would be::
+
+  >P1;CRAB_ANAPL
+  ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).
+    MDITIHNPLI RRPLFSWLAP SRIFDQIFGE HLQESELLPA SPSLSPFLMR
+    SPIFRMPSWL ETGLSEMRLE KDKFSVNLDV KHFSPEELKV KVLGDMVEIH
+    GKHEERQDEH GFIAREFNRK YRIPADVDPL TITSSLSLDG VLTVSAPRKQ
+    SDVPERSIPI TREEKPAIAG AQRK*
+
+  >P1;CRAB_BOVIN
+  ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).
+    MDIAIHHPWI RRPFFPFHSP SRLFDQFFGE HLLESDLFPA STSLSPFYLR
+    PPSFLRAPSW IDTGLSEMRL EKDRFSVNLD VKHFSPEELK VKVLGDVIEV
+    HGKHEERQDE HGFISREFHR KYRIPADVDP LAITSSLSSD GVLTVNGPRK
+    QASGPERTIP ITREEKPAVT AAPKK*
+
+Or, an example of a multiple sequence alignment::
+
+  >P1;S27231
+  rhodopsin - northern leopard frog
+  MNGTEGPNFY IPMSNKTGVV RSPFDYPQYY LAEPWKYSVL AAYMFLLILL GLPINFMTLY
+  VTIQHKKLRT PLNYILLNLG VCNHFMVLCG FTITMYTSLH GYFVFGQTGC YFEGFFATLG
+  GEIALWSLVV LAIERYIVVC KPMSNFRFGE NHAMMGVAFT WIMALACAVP PLFGWSRYIP
+  EGMQCSCGVD YYTLKPEVNN ESFVIYMFVV HFLIPLIIIS FCYGRLVCTV KEAAAQQQES
+  ATTQKAEKEV TRMVIIMVIF FLICWVPYAY VAFYIFTHQG SEFGPIFMTV PAFFAKSSAI
+  YNPVIYIMLN KQFRNCMITT LCCGKNPFGD DDASSAATSK TEATSVSTSQ VSPA*
+
+  >P1;I51200
+  rhodopsin - African clawed frog
+  MNGTEGPNFY VPMSNKTGVV RSPFDYPQYY LAEPWQYSAL AAYMFLLILL GLPINFMTLF
+  VTIQHKKLRT PLNYILLNLV FANHFMVLCG FTVTMYTSMH GYFIFGPTGC YIEGFFATLG
+  GEVALWSLVV LAVERYIVVC KPMANFRFGE NHAIMGVAFT WIMALSCAAP PLFGWSRYIP
+  EGMQCSCGVD YYTLKPEVNN ESFVIYMFIV HFTIPLIVIF FCYGRLLCTV KEAAAQQQES
+  LTTQKAEKEV TRMVVIMVVF FLICWVPYAY VAFYIFTHQG SNFGPVFMTV PAFFAKSSAI
+  YNPVIYIVLN KQFRNCLITT LCCGKNPFGD EDGSSAATSK TEASSVSSSQ VSPA*
+
+  >P1;JN0120
+  rhodopsin - Japanese lamprey
+  MNGTEGDNFY VPFSNKTGLA RSPYEYPQYY LAEPWKYSAL AAYMFFLILV GFPVNFLTLF
+  VTVQHKKLRT PLNYILLNLA MANLFMVLFG FTVTMYTSMN GYFVFGPTMC SIEGFFATLG
+  GEVALWSLVV LAIERYIVIC KPMGNFRFGN THAIMGVAFT WIMALACAAP PLVGWSRYIP
+  EGMQCSCGPD YYTLNPNFNN ESYVVYMFVV HFLVPFVIIF FCYGRLLCTV KEAAAAQQES
+  ASTQKAEKEV TRMVVLMVIG FLVCWVPYAS VAFYIFTHQG SDFGATFMTL PAFFAKSSAL
+  YNPVIYILMN KQFRNCMITT LCCGKNPLGD DE-SGASTSKT EVSSVSTSPV SPA*
+
+
+As with the FASTA format, each record starts with a line beginning with ">"
+character.  There is then a two letter sequence type (P1, F1, DL, DC, RL,
+RC, or XX), a semi colon, and the identification code.  The second like is
+free text description.  The remaining lines contain the sequence itself,
+terminating in an asterisk.  Space separated blocks of ten letters as shown
+above are typical.
+
+Sequence codes and their meanings:
+ - P1 - Protein (complete)
+ - F1 - Protein (fragment)
+ - D1 - DNA (e.g. EMBOSS seqret output)
+ - DL - DNA (linear)
+ - DC - DNA (circular)
+ - RL - RNA (linear)
+ - RC - RNA (circular)
+ - N3 - tRNA
+ - N1 - Other functional RNA
+ - XX - Unknown
+
+"""
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import _get_seq_string
+from .Interfaces import SequenceIterator
+from .Interfaces import SequenceWriter
+
+
+_pir_mol_type = {
+    "P1": "protein",
+    "F1": "protein",
+    "D1": "DNA",
+    "DL": "DNA",
+    "DC": "DNA",
+    "RL": "RNA",
+    "RC": "RNA",
+    "N3": "RNA",
+    "XX": None,
+}
+
+
+class PirIterator(SequenceIterator):
+    """Parser for PIR files."""
+
+    def __init__(self, source):
+        """Iterate over a PIR file and yield SeqRecord objects.
+
+        source - file-like object or a path to a file.
+
+        Examples
+        --------
+        >>> with open("NBRF/DMB_prot.pir") as handle:
+        ...    for record in PirIterator(handle):
+        ...        print("%s length %i" % (record.id, len(record)))
+        HLA:HLA00489 length 263
+        HLA:HLA00490 length 94
+        HLA:HLA00491 length 94
+        HLA:HLA00492 length 80
+        HLA:HLA00493 length 175
+        HLA:HLA01083 length 188
+
+        """
+        super().__init__(source, mode="t", fmt="Pir")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        records = self.iterate(handle)
+        return records
+
+    def iterate(self, handle):
+        """Iterate over the records in the PIR file."""
+        # Skip any text before the first record (e.g. blank lines, comments)
+        for line in handle:
+            if line[0] == ">":
+                break
+        else:
+            return  # Premature end of file, or just empty?
+
+        while True:
+            pir_type = line[1:3]
+            if pir_type not in _pir_mol_type or line[3] != ";":
+                raise ValueError(
+                    "Records should start with '>XX;' where XX is a valid sequence type"
+                )
+            identifier = line[4:].strip()
+            description = handle.readline().strip()
+
+            lines = []
+            for line in handle:
+                if line[0] == ">":
+                    break
+                # Remove trailing whitespace, and any internal spaces
+                lines.append(line.rstrip().replace(" ", ""))
+            else:
+                line = None
+            seq = "".join(lines)
+            if seq[-1] != "*":
+                # Note the * terminator is present on nucleotide sequences too,
+                # it is not a stop codon!
+                raise ValueError(
+                    "Sequences in PIR files should include a * terminator!"
+                )
+
+            # Return the record and then continue...
+            record = SeqRecord(
+                Seq(seq[:-1]), id=identifier, name=identifier, description=description,
+            )
+            record.annotations["PIR-type"] = pir_type
+            if _pir_mol_type[pir_type]:
+                record.annotations["molecule_type"] = _pir_mol_type[pir_type]
+            yield record
+
+            if line is None:
+                return  # StopIteration
+        raise ValueError("Unrecognised PIR record format.")
+
+
+class PirWriter(SequenceWriter):
+    """Class to write PIR format files."""
+
+    def __init__(self, handle, wrap=60, record2title=None, code=None):
+        """Create a PIR writer.
+
+        Arguments:
+         - handle - Handle to an output file, e.g. as returned
+           by open(filename, "w")
+         - wrap - Optional line length used to wrap sequence lines.
+           Defaults to wrapping the sequence at 60 characters
+           Use zero (or None) for no wrapping, giving a single
+           long line for the sequence.
+         - record2title - Optional function to return the text to be
+           used for the title line of each record.  By default
+           a combination of the record.id, record.name and
+           record.description is used.
+         - code - Optional sequence code must be one of P1, F1,
+           D1, DL, DC, RL, RC, N3 and XX. By default None is used,
+           which means auto detection based on the molecule type
+           in the record annotation.
+
+        You can either use::
+
+            handle = open(filename, "w")
+            writer = PirWriter(handle)
+            writer.write_file(myRecords)
+            handle.close()
+
+        Or, follow the sequential file writer system, for example::
+
+            handle = open(filename, "w")
+            writer = PirWriter(handle)
+            writer.write_header() # does nothing for PIR files
+            ...
+            Multiple writer.write_record() and/or writer.write_records() calls
+            ...
+            writer.write_footer() # does nothing for PIR files
+            handle.close()
+
+        """
+        super().__init__(handle)
+        self.wrap = None
+        if wrap:
+            if wrap < 1:
+                raise ValueError("wrap should be None, 0, or a positive integer")
+        self.wrap = wrap
+        self.record2title = record2title
+        self.code = code
+
+    def write_record(self, record):
+        """Write a single PIR record to the file."""
+        if self.record2title:
+            title = self.clean(self.record2title(record))
+        else:
+            title = self.clean(record.id)
+
+        if record.name and record.description:
+            description = self.clean(record.name + " - " + record.description)
+        elif record.name and not record.description:
+            description = self.clean(record.name)
+        else:
+            description = self.clean(record.description)
+
+        if self.code:
+            code = self.code
+        else:
+            molecule_type = record.annotations.get("molecule_type")
+            if molecule_type is None:
+                code = "XX"
+            elif "DNA" in molecule_type:
+                code = "D1"
+            elif "RNA" in molecule_type:
+                code = "RL"
+            elif "protein" in molecule_type:
+                code = "P1"
+            else:
+                code = "XX"
+
+        if code not in _pir_mol_type:
+            raise TypeError(
+                "Sequence code must be one of " + _pir_mol_type.keys() + "."
+            )
+        assert "\n" not in title
+        assert "\r" not in description
+
+        self.handle.write(">%s;%s\n%s\n" % (code, title, description))
+
+        data = _get_seq_string(record)  # Catches sequence being None
+
+        assert "\n" not in data
+        assert "\r" not in data
+
+        if self.wrap:
+            line = ""
+            for i in range(0, len(data), self.wrap):
+                line += data[i : i + self.wrap] + "\n"
+            line = line[:-1] + "*\n"
+            self.handle.write(line)
+        else:
+            self.handle.write(data + "*\n")
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/SeqIO/QualityIO.py b/code/lib/Bio/SeqIO/QualityIO.py
new file mode 100644
index 0000000..e1e8d4c
--- /dev/null
+++ b/code/lib/Bio/SeqIO/QualityIO.py
@@ -0,0 +1,2297 @@
+# Copyright 2009-2020 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the FASTQ and QUAL file formats.
+
+Note that you are expected to use this code via the Bio.SeqIO interface, as
+shown below.
+
+The FASTQ file format is used frequently at the Wellcome Trust Sanger Institute
+to bundle a FASTA sequence and its PHRED quality data (integers between 0 and
+90).  Rather than using a single FASTQ file, often paired FASTA and QUAL files
+are used containing the sequence and the quality information separately.
+
+The PHRED software reads DNA sequencing trace files, calls bases, and
+assigns a non-negative quality value to each called base using a logged
+transformation of the error probability, Q = -10 log10( Pe ), for example::
+
+    Pe = 1.0,         Q =  0
+    Pe = 0.1,         Q = 10
+    Pe = 0.01,        Q = 20
+    ...
+    Pe = 0.00000001,  Q = 80
+    Pe = 0.000000001, Q = 90
+
+In typical raw sequence reads, the PHRED quality valuea will be from 0 to 40.
+In the QUAL format these quality values are held as space separated text in
+a FASTA like file format.  In the FASTQ format, each quality values is encoded
+with a single ASCI character using chr(Q+33), meaning zero maps to the
+character "!" and for example 80 maps to "q".  For the Sanger FASTQ standard
+the allowed range of PHRED scores is 0 to 93 inclusive. The sequences and
+quality are then stored in pairs in a FASTA like format.
+
+Unfortunately there is no official document describing the FASTQ file format,
+and worse, several related but different variants exist. For more details,
+please read this open access publication::
+
+    The Sanger FASTQ file format for sequences with quality scores, and the
+    Solexa/Illumina FASTQ variants.
+    P.J.A.Cock (Biopython), C.J.Fields (BioPerl), N.Goto (BioRuby),
+    M.L.Heuer (BioJava) and P.M. Rice (EMBOSS).
+    Nucleic Acids Research 2010 38(6):1767-1771
+    https://doi.org/10.1093/nar/gkp1137
+
+The good news is that Roche 454 sequencers can output files in the QUAL format,
+and sensibly they use PHREP style scores like Sanger.  Converting a pair of
+FASTA and QUAL files into a Sanger style FASTQ file is easy. To extract QUAL
+files from a Roche 454 SFF binary file, use the Roche off instrument command
+line tool "sffinfo" with the -q or -qual argument.  You can extract a matching
+FASTA file using the -s or -seq argument instead.
+
+The bad news is that Solexa/Illumina did things differently - they have their
+own scoring system AND their own incompatible versions of the FASTQ format.
+Solexa/Illumina quality scores use Q = - 10 log10 ( Pe / (1-Pe) ), which can
+be negative.  PHRED scores and Solexa scores are NOT interchangeable (but a
+reasonable mapping can be achieved between them, and they are approximately
+equal for higher quality reads).
+
+Confusingly early Solexa pipelines produced a FASTQ like file but using their
+own score mapping and an ASCII offset of 64. To make things worse, for the
+Solexa/Illumina pipeline 1.3 onwards, they introduced a third variant of the
+FASTQ file format, this time using PHRED scores (which is more consistent) but
+with an ASCII offset of 64.
+
+i.e. There are at least THREE different and INCOMPATIBLE variants of the FASTQ
+file format: The original Sanger PHRED standard, and two from Solexa/Illumina.
+
+The good news is that as of CASAVA version 1.8, Illumina sequencers will
+produce FASTQ files using the standard Sanger encoding.
+
+You are expected to use this module via the Bio.SeqIO functions, with the
+following format names:
+
+    - "qual" means simple quality files using PHRED scores (e.g. from Roche 454)
+    - "fastq" means Sanger style FASTQ files using PHRED scores and an ASCII
+      offset of 33 (e.g. from the NCBI Short Read Archive and Illumina 1.8+).
+      These can potentially hold PHRED scores from 0 to 93.
+    - "fastq-sanger" is an alias for "fastq".
+    - "fastq-solexa" means old Solexa (and also very early Illumina) style FASTQ
+      files, using Solexa scores with an ASCII offset 64. These can hold Solexa
+      scores from -5 to 62.
+    - "fastq-illumina" means newer Illumina 1.3 to 1.7 style FASTQ files, using
+      PHRED scores but with an ASCII offset 64, allowing PHRED scores from 0
+      to 62.
+
+We could potentially add support for "qual-solexa" meaning QUAL files which
+contain Solexa scores, but thus far there isn't any reason to use such files.
+
+For example, consider the following short FASTQ file::
+
+    @EAS54_6_R1_2_1_413_324
+    CCCTTCTTGTCTTCAGCGTTTCTCC
+    +
+    ;;3;;;;;;;;;;;;7;;;;;;;88
+    @EAS54_6_R1_2_1_540_792
+    TTGGCAGGCCAAGGCCGATGGATCA
+    +
+    ;;;;;;;;;;;7;;;;;-;;;3;83
+    @EAS54_6_R1_2_1_443_348
+    GTTGCTTCTGGCGTGGGTGGGGGGG
+    +
+    ;;;;;;;;;;;9;7;;.7;393333
+
+This contains three reads of length 25.  From the read length these were
+probably originally from an early Solexa/Illumina sequencer but this file
+follows the Sanger FASTQ convention (PHRED style qualities with an ASCII
+offet of 33).  This means we can parse this file using Bio.SeqIO using
+"fastq" as the format name:
+
+>>> from Bio import SeqIO
+>>> for record in SeqIO.parse("Quality/example.fastq", "fastq"):
+...     print("%s %s" % (record.id, record.seq))
+EAS54_6_R1_2_1_413_324 CCCTTCTTGTCTTCAGCGTTTCTCC
+EAS54_6_R1_2_1_540_792 TTGGCAGGCCAAGGCCGATGGATCA
+EAS54_6_R1_2_1_443_348 GTTGCTTCTGGCGTGGGTGGGGGGG
+
+The qualities are held as a list of integers in each record's annotation:
+
+>>> print(record)
+ID: EAS54_6_R1_2_1_443_348
+Name: EAS54_6_R1_2_1_443_348
+Description: EAS54_6_R1_2_1_443_348
+Number of features: 0
+Per letter annotation for: phred_quality
+Seq('GTTGCTTCTGGCGTGGGTGGGGGGG')
+>>> print(record.letter_annotations["phred_quality"])
+[26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 24, 26, 22, 26, 26, 13, 22, 26, 18, 24, 18, 18, 18, 18]
+
+You can use the SeqRecord format method to show this in the QUAL format:
+
+>>> print(record.format("qual"))
+>EAS54_6_R1_2_1_443_348
+26 26 26 26 26 26 26 26 26 26 26 24 26 22 26 26 13 22 26 18
+24 18 18 18 18
+<BLANKLINE>
+
+Or go back to the FASTQ format, use "fastq" (or "fastq-sanger"):
+
+>>> print(record.format("fastq"))
+@EAS54_6_R1_2_1_443_348
+GTTGCTTCTGGCGTGGGTGGGGGGG
++
+;;;;;;;;;;;9;7;;.7;393333
+<BLANKLINE>
+
+Or, using the Illumina 1.3+ FASTQ encoding (PHRED values with an ASCII offset
+of 64):
+
+>>> print(record.format("fastq-illumina"))
+@EAS54_6_R1_2_1_443_348
+GTTGCTTCTGGCGTGGGTGGGGGGG
++
+ZZZZZZZZZZZXZVZZMVZRXRRRR
+<BLANKLINE>
+
+You can also get Biopython to convert the scores and show a Solexa style
+FASTQ file:
+
+>>> print(record.format("fastq-solexa"))
+@EAS54_6_R1_2_1_443_348
+GTTGCTTCTGGCGTGGGTGGGGGGG
++
+ZZZZZZZZZZZXZVZZMVZRXRRRR
+<BLANKLINE>
+
+Notice that this is actually the same output as above using "fastq-illumina"
+as the format! The reason for this is all these scores are high enough that
+the PHRED and Solexa scores are almost equal. The differences become apparent
+for poor quality reads. See the functions solexa_quality_from_phred and
+phred_quality_from_solexa for more details.
+
+If you wanted to trim your sequences (perhaps to remove low quality regions,
+or to remove a primer sequence), try slicing the SeqRecord objects.  e.g.
+
+>>> sub_rec = record[5:15]
+>>> print(sub_rec)
+ID: EAS54_6_R1_2_1_443_348
+Name: EAS54_6_R1_2_1_443_348
+Description: EAS54_6_R1_2_1_443_348
+Number of features: 0
+Per letter annotation for: phred_quality
+Seq('TTCTGGCGTG')
+>>> print(sub_rec.letter_annotations["phred_quality"])
+[26, 26, 26, 26, 26, 26, 24, 26, 22, 26]
+>>> print(sub_rec.format("fastq"))
+@EAS54_6_R1_2_1_443_348
+TTCTGGCGTG
++
+;;;;;;9;7;
+<BLANKLINE>
+
+If you wanted to, you could read in this FASTQ file, and save it as a QUAL file:
+
+>>> from Bio import SeqIO
+>>> record_iterator = SeqIO.parse("Quality/example.fastq", "fastq")
+>>> with open("Quality/temp.qual", "w") as out_handle:
+...     SeqIO.write(record_iterator, out_handle, "qual")
+3
+
+You can of course read in a QUAL file, such as the one we just created:
+
+>>> from Bio import SeqIO
+>>> for record in SeqIO.parse("Quality/temp.qual", "qual"):
+...     print("%s read of length %d" % (record.id, len(record.seq)))
+EAS54_6_R1_2_1_413_324 read of length 25
+EAS54_6_R1_2_1_540_792 read of length 25
+EAS54_6_R1_2_1_443_348 read of length 25
+
+Notice that QUAL files don't have a proper sequence present!  But the quality
+information is there:
+
+>>> print(record)
+ID: EAS54_6_R1_2_1_443_348
+Name: EAS54_6_R1_2_1_443_348
+Description: EAS54_6_R1_2_1_443_348
+Number of features: 0
+Per letter annotation for: phred_quality
+Undefined sequence of length 25
+>>> print(record.letter_annotations["phred_quality"])
+[26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 24, 26, 22, 26, 26, 13, 22, 26, 18, 24, 18, 18, 18, 18]
+
+Just to keep things tidy, if you are following this example yourself, you can
+delete this temporary file now:
+
+>>> import os
+>>> os.remove("Quality/temp.qual")
+
+Sometimes you won't have a FASTQ file, but rather just a pair of FASTA and QUAL
+files.  Because the Bio.SeqIO system is designed for reading single files, you
+would have to read the two in separately and then combine the data.  However,
+since this is such a common thing to want to do, there is a helper iterator
+defined in this module that does this for you - PairedFastaQualIterator.
+
+Alternatively, if you have enough RAM to hold all the records in memory at once,
+then a simple dictionary approach would work:
+
+>>> from Bio import SeqIO
+>>> reads = SeqIO.to_dict(SeqIO.parse("Quality/example.fasta", "fasta"))
+>>> for rec in SeqIO.parse("Quality/example.qual", "qual"):
+...     reads[rec.id].letter_annotations["phred_quality"]=rec.letter_annotations["phred_quality"]
+
+You can then access any record by its key, and get both the sequence and the
+quality scores.
+
+>>> print(reads["EAS54_6_R1_2_1_540_792"].format("fastq"))
+@EAS54_6_R1_2_1_540_792
+TTGGCAGGCCAAGGCCGATGGATCA
++
+;;;;;;;;;;;7;;;;;-;;;3;83
+<BLANKLINE>
+
+It is important that you explicitly tell Bio.SeqIO which FASTQ variant you are
+using ("fastq" or "fastq-sanger" for the Sanger standard using PHRED values,
+"fastq-solexa" for the original Solexa/Illumina variant, or "fastq-illumina"
+for the more recent variant), as this cannot be detected reliably
+automatically.
+
+To illustrate this problem, let's consider an artificial example:
+
+>>> from Bio.Seq import Seq
+>>> from Bio.SeqRecord import SeqRecord
+>>> test = SeqRecord(Seq("NACGTACGTA"), id="Test", description="Made up!")
+>>> print(test.format("fasta"))
+>Test Made up!
+NACGTACGTA
+<BLANKLINE>
+>>> print(test.format("fastq"))
+Traceback (most recent call last):
+ ...
+ValueError: No suitable quality scores found in letter_annotations of SeqRecord (id=Test).
+
+We created a sample SeqRecord, and can show it in FASTA format - but for QUAL
+or FASTQ format we need to provide some quality scores. These are held as a
+list of integers (one for each base) in the letter_annotations dictionary:
+
+>>> test.letter_annotations["phred_quality"] = [0, 1, 2, 3, 4, 5, 10, 20, 30, 40]
+>>> print(test.format("qual"))
+>Test Made up!
+0 1 2 3 4 5 10 20 30 40
+<BLANKLINE>
+>>> print(test.format("fastq"))
+@Test Made up!
+NACGTACGTA
++
+!"#$%&+5?I
+<BLANKLINE>
+
+We can check this FASTQ encoding - the first PHRED quality was zero, and this
+mapped to a exclamation mark, while the final score was 40 and this mapped to
+the letter "I":
+
+>>> ord('!') - 33
+0
+>>> ord('I') - 33
+40
+>>> [ord(letter)-33 for letter in '!"#$%&+5?I']
+[0, 1, 2, 3, 4, 5, 10, 20, 30, 40]
+
+Similarly, we could produce an Illumina 1.3 to 1.7 style FASTQ file using PHRED
+scores with an offset of 64:
+
+>>> print(test.format("fastq-illumina"))
+@Test Made up!
+NACGTACGTA
++
+@ABCDEJT^h
+<BLANKLINE>
+
+And we can check this too - the first PHRED score was zero, and this mapped to
+"@", while the final score was 40 and this mapped to "h":
+
+>>> ord("@") - 64
+0
+>>> ord("h") - 64
+40
+>>> [ord(letter)-64 for letter in "@ABCDEJT^h"]
+[0, 1, 2, 3, 4, 5, 10, 20, 30, 40]
+
+Notice how different the standard Sanger FASTQ and the Illumina 1.3 to 1.7 style
+FASTQ files look for the same data! Then we have the older Solexa/Illumina
+format to consider which encodes Solexa scores instead of PHRED scores.
+
+First let's see what Biopython says if we convert the PHRED scores into Solexa
+scores (rounding to one decimal place):
+
+>>> for q in [0, 1, 2, 3, 4, 5, 10, 20, 30, 40]:
+...     print("PHRED %i maps to Solexa %0.1f" % (q, solexa_quality_from_phred(q)))
+PHRED 0 maps to Solexa -5.0
+PHRED 1 maps to Solexa -5.0
+PHRED 2 maps to Solexa -2.3
+PHRED 3 maps to Solexa -0.0
+PHRED 4 maps to Solexa 1.8
+PHRED 5 maps to Solexa 3.3
+PHRED 10 maps to Solexa 9.5
+PHRED 20 maps to Solexa 20.0
+PHRED 30 maps to Solexa 30.0
+PHRED 40 maps to Solexa 40.0
+
+Now here is the record using the old Solexa style FASTQ file:
+
+>>> print(test.format("fastq-solexa"))
+@Test Made up!
+NACGTACGTA
++
+;;>@BCJT^h
+<BLANKLINE>
+
+Again, this is using an ASCII offset of 64, so we can check the Solexa scores:
+
+>>> [ord(letter)-64 for letter in ";;>@BCJT^h"]
+[-5, -5, -2, 0, 2, 3, 10, 20, 30, 40]
+
+This explains why the last few letters of this FASTQ output matched that using
+the Illumina 1.3 to 1.7 format - high quality PHRED scores and Solexa scores
+are approximately equal.
+
+"""
+import warnings
+
+from math import log
+
+from Bio import BiopythonParserWarning
+from Bio import BiopythonWarning
+from Bio import StreamModeError
+from Bio.File import as_handle
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import _clean
+from .Interfaces import _get_seq_string
+from .Interfaces import SequenceIterator
+from .Interfaces import SequenceWriter
+
+
+# define score offsets. See discussion for differences between Sanger and
+# Solexa offsets.
+SANGER_SCORE_OFFSET = 33
+SOLEXA_SCORE_OFFSET = 64
+
+
+def solexa_quality_from_phred(phred_quality):
+    """Covert a PHRED quality (range 0 to about 90) to a Solexa quality.
+
+    PHRED and Solexa quality scores are both log transformations of a
+    probality of error (high score = low probability of error). This function
+    takes a PHRED score, transforms it back to a probability of error, and
+    then re-expresses it as a Solexa score. This assumes the error estimates
+    are equivalent.
+
+    How does this work exactly? Well the PHRED quality is minus ten times the
+    base ten logarithm of the probability of error::
+
+        phred_quality = -10*log(error,10)
+
+    Therefore, turning this round::
+
+        error = 10 ** (- phred_quality / 10)
+
+    Now, Solexa qualities use a different log transformation::
+
+        solexa_quality = -10*log(error/(1-error),10)
+
+    After substitution and a little manipulation we get::
+
+         solexa_quality = 10*log(10**(phred_quality/10.0) - 1, 10)
+
+    However, real Solexa files use a minimum quality of -5. This does have a
+    good reason - a random base call would be correct 25% of the time,
+    and thus have a probability of error of 0.75, which gives 1.25 as the PHRED
+    quality, or -4.77 as the Solexa quality. Thus (after rounding), a random
+    nucleotide read would have a PHRED quality of 1, or a Solexa quality of -5.
+
+    Taken literally, this logarithic formula would map a PHRED quality of zero
+    to a Solexa quality of minus infinity. Of course, taken literally, a PHRED
+    score of zero means a probability of error of one (i.e. the base call is
+    definitely wrong), which is worse than random! In practice, a PHRED quality
+    of zero usually means a default value, or perhaps random - and therefore
+    mapping it to the minimum Solexa score of -5 is reasonable.
+
+    In conclusion, we follow EMBOSS, and take this logarithmic formula but also
+    apply a minimum value of -5.0 for the Solexa quality, and also map a PHRED
+    quality of zero to -5.0 as well.
+
+    Note this function will return a floating point number, it is up to you to
+    round this to the nearest integer if appropriate.  e.g.
+
+    >>> print("%0.2f" % round(solexa_quality_from_phred(80), 2))
+    80.00
+    >>> print("%0.2f" % round(solexa_quality_from_phred(50), 2))
+    50.00
+    >>> print("%0.2f" % round(solexa_quality_from_phred(20), 2))
+    19.96
+    >>> print("%0.2f" % round(solexa_quality_from_phred(10), 2))
+    9.54
+    >>> print("%0.2f" % round(solexa_quality_from_phred(5), 2))
+    3.35
+    >>> print("%0.2f" % round(solexa_quality_from_phred(4), 2))
+    1.80
+    >>> print("%0.2f" % round(solexa_quality_from_phred(3), 2))
+    -0.02
+    >>> print("%0.2f" % round(solexa_quality_from_phred(2), 2))
+    -2.33
+    >>> print("%0.2f" % round(solexa_quality_from_phred(1), 2))
+    -5.00
+    >>> print("%0.2f" % round(solexa_quality_from_phred(0), 2))
+    -5.00
+
+    Notice that for high quality reads PHRED and Solexa scores are numerically
+    equal. The differences are important for poor quality reads, where PHRED
+    has a minimum of zero but Solexa scores can be negative.
+
+    Finally, as a special case where None is used for a "missing value", None
+    is returned:
+
+    >>> print(solexa_quality_from_phred(None))
+    None
+    """
+    if phred_quality is None:
+        # Assume None is used as some kind of NULL or NA value; return None
+        # e.g. Bio.SeqIO gives Ace contig gaps a quality of None.
+        return None
+    elif phred_quality > 0:
+        # Solexa uses a minimum value of -5, which after rounding matches a
+        # random nucleotide base call.
+        return max(-5.0, 10 * log(10 ** (phred_quality / 10.0) - 1, 10))
+    elif phred_quality == 0:
+        # Special case, map to -5 as discussed in the docstring
+        return -5.0
+    else:
+        raise ValueError(
+            "PHRED qualities must be positive (or zero), not %r" % phred_quality
+        )
+
+
+def phred_quality_from_solexa(solexa_quality):
+    """Convert a Solexa quality (which can be negative) to a PHRED quality.
+
+    PHRED and Solexa quality scores are both log transformations of a
+    probality of error (high score = low probability of error). This function
+    takes a Solexa score, transforms it back to a probability of error, and
+    then re-expresses it as a PHRED score. This assumes the error estimates
+    are equivalent.
+
+    The underlying formulas are given in the documentation for the sister
+    function solexa_quality_from_phred, in this case the operation is::
+
+        phred_quality = 10*log(10**(solexa_quality/10.0) + 1, 10)
+
+    This will return a floating point number, it is up to you to round this to
+    the nearest integer if appropriate.  e.g.
+
+    >>> print("%0.2f" % round(phred_quality_from_solexa(80), 2))
+    80.00
+    >>> print("%0.2f" % round(phred_quality_from_solexa(20), 2))
+    20.04
+    >>> print("%0.2f" % round(phred_quality_from_solexa(10), 2))
+    10.41
+    >>> print("%0.2f" % round(phred_quality_from_solexa(0), 2))
+    3.01
+    >>> print("%0.2f" % round(phred_quality_from_solexa(-5), 2))
+    1.19
+
+    Note that a solexa_quality less then -5 is not expected, will trigger a
+    warning, but will still be converted as per the logarithmic mapping
+    (giving a number between 0 and 1.19 back).
+
+    As a special case where None is used for a "missing value", None is
+    returned:
+
+    >>> print(phred_quality_from_solexa(None))
+    None
+    """
+    if solexa_quality is None:
+        # Assume None is used as some kind of NULL or NA value; return None
+        return None
+    if solexa_quality < -5:
+        warnings.warn(
+            "Solexa quality less than -5 passed, %r" % solexa_quality, BiopythonWarning
+        )
+    return 10 * log(10 ** (solexa_quality / 10.0) + 1, 10)
+
+
+def _get_phred_quality(record):
+    """Extract PHRED qualities from a SeqRecord's letter_annotations (PRIVATE).
+
+    If there are no PHRED qualities, but there are Solexa qualities, those are
+    used instead after conversion.
+    """
+    try:
+        return record.letter_annotations["phred_quality"]
+    except KeyError:
+        pass
+    try:
+        return [
+            phred_quality_from_solexa(q)
+            for q in record.letter_annotations["solexa_quality"]
+        ]
+    except KeyError:
+        raise ValueError(
+            "No suitable quality scores found in "
+            "letter_annotations of SeqRecord (id=%s)." % record.id
+        ) from None
+
+
+# Only map 0 to 93, we need to give a warning on truncating at 93
+_phred_to_sanger_quality_str = {
+    qp: chr(min(126, qp + SANGER_SCORE_OFFSET)) for qp in range(0, 93 + 1)
+}
+# Only map -5 to 93, we need to give a warning on truncating at 93
+_solexa_to_sanger_quality_str = {
+    qs: chr(min(126, int(round(phred_quality_from_solexa(qs)) + SANGER_SCORE_OFFSET)))
+    for qs in range(-5, 93 + 1)
+}
+
+
+def _get_sanger_quality_str(record):
+    """Return a Sanger FASTQ encoded quality string (PRIVATE).
+
+    >>> from Bio.Seq import Seq
+    >>> from Bio.SeqRecord import SeqRecord
+    >>> r = SeqRecord(Seq("ACGTAN"), id="Test",
+    ...               letter_annotations = {"phred_quality":[50, 40, 30, 20, 10, 0]})
+    >>> _get_sanger_quality_str(r)
+    'SI?5+!'
+
+    If as in the above example (or indeed a SeqRecord parser with Bio.SeqIO),
+    the PHRED qualities are integers, this function is able to use a very fast
+    pre-cached mapping. However, if they are floats which differ slightly, then
+    it has to do the appropriate rounding - which is slower:
+
+    >>> r2 = SeqRecord(Seq("ACGTAN"), id="Test2",
+    ...      letter_annotations = {"phred_quality":[50.0, 40.05, 29.99, 20, 9.55, 0.01]})
+    >>> _get_sanger_quality_str(r2)
+    'SI?5+!'
+
+    If your scores include a None value, this raises an exception:
+
+    >>> r3 = SeqRecord(Seq("ACGTAN"), id="Test3",
+    ...               letter_annotations = {"phred_quality":[50, 40, 30, 20, 10, None]})
+    >>> _get_sanger_quality_str(r3)
+    Traceback (most recent call last):
+       ...
+    TypeError: A quality value of None was found
+
+    If (strangely) your record has both PHRED and Solexa scores, then the PHRED
+    scores are used in preference:
+
+    >>> r4 = SeqRecord(Seq("ACGTAN"), id="Test4",
+    ...               letter_annotations = {"phred_quality":[50, 40, 30, 20, 10, 0],
+    ...                                     "solexa_quality":[-5, -4, 0, None, 0, 40]})
+    >>> _get_sanger_quality_str(r4)
+    'SI?5+!'
+
+    If there are no PHRED scores, but there are Solexa scores, these are used
+    instead (after the appropriate conversion):
+
+    >>> r5 = SeqRecord(Seq("ACGTAN"), id="Test5",
+    ...      letter_annotations = {"solexa_quality":[40, 30, 20, 10, 0, -5]})
+    >>> _get_sanger_quality_str(r5)
+    'I?5+$"'
+
+    Again, integer Solexa scores can be looked up in a pre-cached mapping making
+    this very fast. You can still use approximate floating point scores:
+
+    >>> r6 = SeqRecord(Seq("ACGTAN"), id="Test6",
+    ...      letter_annotations = {"solexa_quality":[40.1, 29.7, 20.01, 10, 0.0, -4.9]})
+    >>> _get_sanger_quality_str(r6)
+    'I?5+$"'
+
+    Notice that due to the limited range of printable ASCII characters, a
+    PHRED quality of 93 is the maximum that can be held in an Illumina FASTQ
+    file (using ASCII 126, the tilde). This function will issue a warning
+    in this situation.
+    """
+    # TODO - This functions works and is fast, but it is also ugly
+    # and there is considerable repetition of code for the other
+    # two FASTQ variants.
+    try:
+        # These take priority (in case both Solexa and PHRED scores found)
+        qualities = record.letter_annotations["phred_quality"]
+    except KeyError:
+        # Fall back on solexa scores...
+        pass
+    else:
+        # Try and use the precomputed mapping:
+        try:
+            return "".join(_phred_to_sanger_quality_str[qp] for qp in qualities)
+        except KeyError:
+            # Could be a float, or a None in the list, or a high value.
+            pass
+        if None in qualities:
+            raise TypeError("A quality value of None was found")
+        if max(qualities) >= 93.5:
+            warnings.warn(
+                "Data loss - max PHRED quality 93 in Sanger FASTQ", BiopythonWarning
+            )
+        # This will apply the truncation at 93, giving max ASCII 126
+        return "".join(
+            chr(min(126, int(round(qp)) + SANGER_SCORE_OFFSET)) for qp in qualities
+        )
+    # Fall back on the Solexa scores...
+    try:
+        qualities = record.letter_annotations["solexa_quality"]
+    except KeyError:
+        raise ValueError(
+            "No suitable quality scores found in "
+            "letter_annotations of SeqRecord (id=%s)." % record.id
+        ) from None
+    # Try and use the precomputed mapping:
+    try:
+        return "".join(_solexa_to_sanger_quality_str[qs] for qs in qualities)
+    except KeyError:
+        # Either no PHRED scores, or something odd like a float or None
+        pass
+    if None in qualities:
+        raise TypeError("A quality value of None was found")
+    # Must do this the slow way, first converting the PHRED scores into
+    # Solexa scores:
+    if max(qualities) >= 93.5:
+        warnings.warn(
+            "Data loss - max PHRED quality 93 in Sanger FASTQ", BiopythonWarning
+        )
+    # This will apply the truncation at 93, giving max ASCII 126
+    return "".join(
+        chr(min(126, int(round(phred_quality_from_solexa(qs))) + SANGER_SCORE_OFFSET))
+        for qs in qualities
+    )
+
+
+# Only map 0 to 62, we need to give a warning on truncating at 62
+assert 62 + SOLEXA_SCORE_OFFSET == 126
+_phred_to_illumina_quality_str = {
+    qp: chr(qp + SOLEXA_SCORE_OFFSET) for qp in range(0, 62 + 1)
+}
+# Only map -5 to 62, we need to give a warning on truncating at 62
+_solexa_to_illumina_quality_str = {
+    qs: chr(int(round(phred_quality_from_solexa(qs))) + SOLEXA_SCORE_OFFSET)
+    for qs in range(-5, 62 + 1)
+}
+
+
+def _get_illumina_quality_str(record):
+    """Return an Illumina 1.3 to 1.7 FASTQ encoded quality string (PRIVATE).
+
+    Notice that due to the limited range of printable ASCII characters, a
+    PHRED quality of 62 is the maximum that can be held in an Illumina FASTQ
+    file (using ASCII 126, the tilde). This function will issue a warning
+    in this situation.
+    """
+    # TODO - This functions works and is fast, but it is also ugly
+    # and there is considerable repetition of code for the other
+    # two FASTQ variants.
+    try:
+        # These take priority (in case both Solexa and PHRED scores found)
+        qualities = record.letter_annotations["phred_quality"]
+    except KeyError:
+        # Fall back on solexa scores...
+        pass
+    else:
+        # Try and use the precomputed mapping:
+        try:
+            return "".join(_phred_to_illumina_quality_str[qp] for qp in qualities)
+        except KeyError:
+            # Could be a float, or a None in the list, or a high value.
+            pass
+        if None in qualities:
+            raise TypeError("A quality value of None was found")
+        if max(qualities) >= 62.5:
+            warnings.warn(
+                "Data loss - max PHRED quality 62 in Illumina FASTQ", BiopythonWarning
+            )
+        # This will apply the truncation at 62, giving max ASCII 126
+        return "".join(
+            chr(min(126, int(round(qp)) + SOLEXA_SCORE_OFFSET)) for qp in qualities
+        )
+    # Fall back on the Solexa scores...
+    try:
+        qualities = record.letter_annotations["solexa_quality"]
+    except KeyError:
+        raise ValueError(
+            "No suitable quality scores found in "
+            "letter_annotations of SeqRecord (id=%s)." % record.id
+        ) from None
+    # Try and use the precomputed mapping:
+    try:
+        return "".join(_solexa_to_illumina_quality_str[qs] for qs in qualities)
+    except KeyError:
+        # Either no PHRED scores, or something odd like a float or None
+        pass
+    if None in qualities:
+        raise TypeError("A quality value of None was found")
+    # Must do this the slow way, first converting the PHRED scores into
+    # Solexa scores:
+    if max(qualities) >= 62.5:
+        warnings.warn(
+            "Data loss - max PHRED quality 62 in Illumina FASTQ", BiopythonWarning
+        )
+    # This will apply the truncation at 62, giving max ASCII 126
+    return "".join(
+        chr(min(126, int(round(phred_quality_from_solexa(qs))) + SOLEXA_SCORE_OFFSET))
+        for qs in qualities
+    )
+
+
+# Only map 0 to 62, we need to give a warning on truncating at 62
+assert 62 + SOLEXA_SCORE_OFFSET == 126
+_solexa_to_solexa_quality_str = {
+    qs: chr(min(126, qs + SOLEXA_SCORE_OFFSET)) for qs in range(-5, 62 + 1)
+}
+# Only map -5 to 62, we need to give a warning on truncating at 62
+_phred_to_solexa_quality_str = {
+    qp: chr(min(126, int(round(solexa_quality_from_phred(qp))) + SOLEXA_SCORE_OFFSET))
+    for qp in range(0, 62 + 1)
+}
+
+
+def _get_solexa_quality_str(record):
+    """Return a Solexa FASTQ encoded quality string (PRIVATE).
+
+    Notice that due to the limited range of printable ASCII characters, a
+    Solexa quality of 62 is the maximum that can be held in a Solexa FASTQ
+    file (using ASCII 126, the tilde). This function will issue a warning
+    in this situation.
+    """
+    # TODO - This functions works and is fast, but it is also ugly
+    # and there is considerable repetition of code for the other
+    # two FASTQ variants.
+    try:
+        # These take priority (in case both Solexa and PHRED scores found)
+        qualities = record.letter_annotations["solexa_quality"]
+    except KeyError:
+        # Fall back on PHRED scores...
+        pass
+    else:
+        # Try and use the precomputed mapping:
+        try:
+            return "".join(_solexa_to_solexa_quality_str[qs] for qs in qualities)
+        except KeyError:
+            # Could be a float, or a None in the list, or a high value.
+            pass
+        if None in qualities:
+            raise TypeError("A quality value of None was found")
+        if max(qualities) >= 62.5:
+            warnings.warn(
+                "Data loss - max Solexa quality 62 in Solexa FASTQ", BiopythonWarning
+            )
+        # This will apply the truncation at 62, giving max ASCII 126
+        return "".join(
+            chr(min(126, int(round(qs)) + SOLEXA_SCORE_OFFSET)) for qs in qualities
+        )
+    # Fall back on the PHRED scores...
+    try:
+        qualities = record.letter_annotations["phred_quality"]
+    except KeyError:
+        raise ValueError(
+            "No suitable quality scores found in "
+            "letter_annotations of SeqRecord (id=%s)." % record.id
+        ) from None
+    # Try and use the precomputed mapping:
+    try:
+        return "".join(_phred_to_solexa_quality_str[qp] for qp in qualities)
+    except KeyError:
+        # Either no PHRED scores, or something odd like a float or None
+        # or too big to be in the cache
+        pass
+    if None in qualities:
+        raise TypeError("A quality value of None was found")
+    # Must do this the slow way, first converting the PHRED scores into
+    # Solexa scores:
+    if max(qualities) >= 62.5:
+        warnings.warn(
+            "Data loss - max Solexa quality 62 in Solexa FASTQ", BiopythonWarning
+        )
+    return "".join(
+        chr(min(126, int(round(solexa_quality_from_phred(qp))) + SOLEXA_SCORE_OFFSET))
+        for qp in qualities
+    )
+
+
+# TODO - Default to nucleotide or even DNA?
+def FastqGeneralIterator(source):
+    """Iterate over Fastq records as string tuples (not as SeqRecord objects).
+
+    Arguments:
+     - source - input stream opened in text mode, or a path to a file
+
+    This code does not try to interpret the quality string numerically.  It
+    just returns tuples of the title, sequence and quality as strings.  For
+    the sequence and quality, any whitespace (such as new lines) is removed.
+
+    Our SeqRecord based FASTQ iterators call this function internally, and then
+    turn the strings into a SeqRecord objects, mapping the quality string into
+    a list of numerical scores.  If you want to do a custom quality mapping,
+    then you might consider calling this function directly.
+
+    For parsing FASTQ files, the title string from the "@" line at the start
+    of each record can optionally be omitted on the "+" lines.  If it is
+    repeated, it must be identical.
+
+    The sequence string and the quality string can optionally be split over
+    multiple lines, although several sources discourage this.  In comparison,
+    for the FASTA file format line breaks between 60 and 80 characters are
+    the norm.
+
+    **WARNING** - Because the "@" character can appear in the quality string,
+    this can cause problems as this is also the marker for the start of
+    a new sequence.  In fact, the "+" sign can also appear as well.  Some
+    sources recommended having no line breaks in the  quality to avoid this,
+    but even that is not enough, consider this example::
+
+        @071113_EAS56_0053:1:1:998:236
+        TTTCTTGCCCCCATAGACTGAGACCTTCCCTAAATA
+        +071113_EAS56_0053:1:1:998:236
+        IIIIIIIIIIIIIIIIIIIIIIIIIIIIICII+III
+        @071113_EAS56_0053:1:1:182:712
+        ACCCAGCTAATTTTTGTATTTTTGTTAGAGACAGTG
+        +
+        @IIIIIIIIIIIIIIICDIIIII<%<6&-*).(*%+
+        @071113_EAS56_0053:1:1:153:10
+        TGTTCTGAAGGAAGGTGTGCGTGCGTGTGTGTGTGT
+        +
+        IIIIIIIIIIIICIIGIIIII>IAIIIE65I=II:6
+        @071113_EAS56_0053:1:3:990:501
+        TGGGAGGTTTTATGTGGA
+        AAGCAGCAATGTACAAGA
+        +
+        IIIIIII.IIIIII1@44
+        @-7.%<&+/$/%4(++(%
+
+    This is four PHRED encoded FASTQ entries originally from an NCBI source
+    (given the read length of 36, these are probably Solexa Illumina reads where
+    the quality has been mapped onto the PHRED values).
+
+    This example has been edited to illustrate some of the nasty things allowed
+    in the FASTQ format.  Firstly, on the "+" lines most but not all of the
+    (redundant) identifiers are omitted.  In real files it is likely that all or
+    none of these extra identifiers will be present.
+
+    Secondly, while the first three sequences have been shown without line
+    breaks, the last has been split over multiple lines.  In real files any line
+    breaks are likely to be consistent.
+
+    Thirdly, some of the quality string lines start with an "@" character.  For
+    the second record this is unavoidable.  However for the fourth sequence this
+    only happens because its quality string is split over two lines.  A naive
+    parser could wrongly treat any line starting with an "@" as the beginning of
+    a new sequence!  This code copes with this possible ambiguity by keeping
+    track of the length of the sequence which gives the expected length of the
+    quality string.
+
+    Using this tricky example file as input, this short bit of code demonstrates
+    what this parsing function would return:
+
+    >>> with open("Quality/tricky.fastq") as handle:
+    ...     for (title, sequence, quality) in FastqGeneralIterator(handle):
+    ...         print(title)
+    ...         print("%s %s" % (sequence, quality))
+    ...
+    071113_EAS56_0053:1:1:998:236
+    TTTCTTGCCCCCATAGACTGAGACCTTCCCTAAATA IIIIIIIIIIIIIIIIIIIIIIIIIIIIICII+III
+    071113_EAS56_0053:1:1:182:712
+    ACCCAGCTAATTTTTGTATTTTTGTTAGAGACAGTG @IIIIIIIIIIIIIIICDIIIII<%<6&-*).(*%+
+    071113_EAS56_0053:1:1:153:10
+    TGTTCTGAAGGAAGGTGTGCGTGCGTGTGTGTGTGT IIIIIIIIIIIICIIGIIIII>IAIIIE65I=II:6
+    071113_EAS56_0053:1:3:990:501
+    TGGGAGGTTTTATGTGGAAAGCAGCAATGTACAAGA IIIIIII.IIIIII1@44@-7.%<&+/$/%4(++(%
+
+    Finally we note that some sources state that the quality string should
+    start with "!" (which using the PHRED mapping means the first letter always
+    has a quality score of zero).  This rather restrictive rule is not widely
+    observed, so is therefore ignored here.  One plus point about this "!" rule
+    is that (provided there are no line breaks in the quality sequence) it
+    would prevent the above problem with the "@" character.
+    """
+    try:
+        handle = open(source)
+    except TypeError:
+        handle = source
+        if handle.read(0) != "":
+            raise StreamModeError("Fastq files must be opened in text mode") from None
+    try:
+        try:
+            line = next(handle)
+        except StopIteration:
+            return  # Premature end of file, or just empty?
+
+        while True:
+            if line[0] != "@":
+                raise ValueError(
+                    "Records in Fastq files should start with '@' character"
+                )
+            title_line = line[1:].rstrip()
+            seq_string = ""
+            # There will now be one or more sequence lines; keep going until we
+            # find the "+" marking the quality line:
+            for line in handle:
+                if line[0] == "+":
+                    break
+                seq_string += line.rstrip()
+            else:
+                if seq_string:
+                    raise ValueError("End of file without quality information.")
+                else:
+                    raise ValueError("Unexpected end of file")
+            # The title here is optional, but if present must match!
+            second_title = line[1:].rstrip()
+            if second_title and second_title != title_line:
+                raise ValueError("Sequence and quality captions differ.")
+            # This is going to slow things down a little, but assuming
+            # this isn't allowed we should try and catch it here:
+            if " " in seq_string or "\t" in seq_string:
+                raise ValueError("Whitespace is not allowed in the sequence.")
+            seq_len = len(seq_string)
+
+            # There will now be at least one line of quality data, followed by
+            # another sequence, or EOF
+            line = None
+            quality_string = ""
+            for line in handle:
+                if line[0] == "@":
+                    # This COULD be the start of a new sequence. However, it MAY just
+                    # be a line of quality data which starts with a "@" character.  We
+                    # should be able to check this by looking at the sequence length
+                    # and the amount of quality data found so far.
+                    if len(quality_string) >= seq_len:
+                        # We expect it to be equal if this is the start of a new record.
+                        # If the quality data is longer, we'll raise an error below.
+                        break
+                    # Continue - its just some (more) quality data.
+                quality_string += line.rstrip()
+            else:
+                if line is None:
+                    raise ValueError("Unexpected end of file")
+                line = None
+
+            if seq_len != len(quality_string):
+                raise ValueError(
+                    "Lengths of sequence and quality values differs for %s (%i and %i)."
+                    % (title_line, seq_len, len(quality_string))
+                )
+
+            # Return the record and then continue...
+            yield (title_line, seq_string, quality_string)
+
+            if line is None:
+                break
+    finally:
+        if handle is not source:
+            handle.close()
+
+
+class FastqPhredIterator(SequenceIterator):
+    """Parser for FASTQ files."""
+
+    def __init__(self, source, alphabet=None, title2ids=None):
+        """Iterate over FASTQ records as SeqRecord objects.
+
+        Arguments:
+         - source - input stream opened in text mode, or a path to a file
+         - alphabet - optional alphabet, no longer used. Leave as None.
+         - title2ids - A function that, when given the title line from the FASTQ
+           file (without the beginning >), will return the id, name and
+           description (in that order) for the record as a tuple of strings.
+           If this is not given, then the entire title line will be used as
+           the description, and the first word as the id and name.
+
+        Note that use of title2ids matches that of Bio.SeqIO.FastaIO.
+
+        For each sequence in a (Sanger style) FASTQ file there is a matching string
+        encoding the PHRED qualities (integers between 0 and about 90) using ASCII
+        values with an offset of 33.
+
+        For example, consider a file containing three short reads::
+
+            @EAS54_6_R1_2_1_413_324
+            CCCTTCTTGTCTTCAGCGTTTCTCC
+            +
+            ;;3;;;;;;;;;;;;7;;;;;;;88
+            @EAS54_6_R1_2_1_540_792
+            TTGGCAGGCCAAGGCCGATGGATCA
+            +
+            ;;;;;;;;;;;7;;;;;-;;;3;83
+            @EAS54_6_R1_2_1_443_348
+            GTTGCTTCTGGCGTGGGTGGGGGGG
+            +
+            ;;;;;;;;;;;9;7;;.7;393333
+
+        For each sequence (e.g. "CCCTTCTTGTCTTCAGCGTTTCTCC") there is a matching
+        string encoding the PHRED qualities using a ASCII values with an offset of
+        33 (e.g. ";;3;;;;;;;;;;;;7;;;;;;;88").
+
+        Using this module directly you might run:
+
+        >>> with open("Quality/example.fastq") as handle:
+        ...     for record in FastqPhredIterator(handle):
+        ...         print("%s %s" % (record.id, record.seq))
+        EAS54_6_R1_2_1_413_324 CCCTTCTTGTCTTCAGCGTTTCTCC
+        EAS54_6_R1_2_1_540_792 TTGGCAGGCCAAGGCCGATGGATCA
+        EAS54_6_R1_2_1_443_348 GTTGCTTCTGGCGTGGGTGGGGGGG
+
+        Typically however, you would call this via Bio.SeqIO instead with "fastq"
+        (or "fastq-sanger") as the format:
+
+        >>> from Bio import SeqIO
+        >>> with open("Quality/example.fastq") as handle:
+        ...     for record in SeqIO.parse(handle, "fastq"):
+        ...         print("%s %s" % (record.id, record.seq))
+        EAS54_6_R1_2_1_413_324 CCCTTCTTGTCTTCAGCGTTTCTCC
+        EAS54_6_R1_2_1_540_792 TTGGCAGGCCAAGGCCGATGGATCA
+        EAS54_6_R1_2_1_443_348 GTTGCTTCTGGCGTGGGTGGGGGGG
+
+        If you want to look at the qualities, they are record in each record's
+        per-letter-annotation dictionary as a simple list of integers:
+
+        >>> print(record.letter_annotations["phred_quality"])
+        [26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 24, 26, 22, 26, 26, 13, 22, 26, 18, 24, 18, 18, 18, 18]
+
+        """
+        if alphabet is not None:
+            raise ValueError("The alphabet argument is no longer supported")
+        self.title2ids = title2ids
+        super().__init__(source, mode="t", fmt="Fastq")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        records = self.iterate(handle)
+        return records
+
+    def iterate(self, handle):
+        """Parse the file and generate SeqRecord objects."""
+        title2ids = self.title2ids
+        assert SANGER_SCORE_OFFSET == ord("!")
+        # Originally, I used a list expression for each record:
+        #
+        # qualities = [ord(letter)-SANGER_SCORE_OFFSET for letter in quality_string]
+        #
+        # Precomputing is faster, perhaps partly by avoiding the subtractions.
+        q_mapping = {
+            chr(letter): letter - SANGER_SCORE_OFFSET
+            for letter in range(SANGER_SCORE_OFFSET, 94 + SANGER_SCORE_OFFSET)
+        }
+
+        for title_line, seq_string, quality_string in FastqGeneralIterator(handle):
+            if title2ids:
+                id, name, descr = title2ids(title_line)
+            else:
+                descr = title_line
+                id = descr.split()[0]
+                name = id
+            record = SeqRecord(Seq(seq_string), id=id, name=name, description=descr)
+            try:
+                qualities = [q_mapping[letter] for letter in quality_string]
+            except KeyError:
+                raise ValueError("Invalid character in quality string") from None
+            # For speed, will now use a dirty trick to speed up assigning the
+            # qualities. We do this to bypass the length check imposed by the
+            # per-letter-annotations restricted dict (as this has already been
+            # checked by FastqGeneralIterator). This is equivalent to:
+            # record.letter_annotations["phred_quality"] = qualities
+            dict.__setitem__(record._per_letter_annotations, "phred_quality", qualities)
+            yield record
+
+
+def FastqSolexaIterator(source, alphabet=None, title2ids=None):
+    r"""Parse old Solexa/Illumina FASTQ like files (which differ in the quality mapping).
+
+    The optional arguments are the same as those for the FastqPhredIterator.
+
+    For each sequence in Solexa/Illumina FASTQ files there is a matching string
+    encoding the Solexa integer qualities using ASCII values with an offset
+    of 64.  Solexa scores are scaled differently to PHRED scores, and Biopython
+    will NOT perform any automatic conversion when loading.
+
+    NOTE - This file format is used by the OLD versions of the Solexa/Illumina
+    pipeline. See also the FastqIlluminaIterator function for the NEW version.
+
+    For example, consider a file containing these five records::
+
+        @SLXA-B3_649_FC8437_R1_1_1_610_79
+        GATGTGCAATACCTTTGTAGAGGAA
+        +SLXA-B3_649_FC8437_R1_1_1_610_79
+        YYYYYYYYYYYYYYYYYYWYWYYSU
+        @SLXA-B3_649_FC8437_R1_1_1_397_389
+        GGTTTGAGAAAGAGAAATGAGATAA
+        +SLXA-B3_649_FC8437_R1_1_1_397_389
+        YYYYYYYYYWYYYYWWYYYWYWYWW
+        @SLXA-B3_649_FC8437_R1_1_1_850_123
+        GAGGGTGTTGATCATGATGATGGCG
+        +SLXA-B3_649_FC8437_R1_1_1_850_123
+        YYYYYYYYYYYYYWYYWYYSYYYSY
+        @SLXA-B3_649_FC8437_R1_1_1_362_549
+        GGAAACAAAGTTTTTCTCAACATAG
+        +SLXA-B3_649_FC8437_R1_1_1_362_549
+        YYYYYYYYYYYYYYYYYYWWWWYWY
+        @SLXA-B3_649_FC8437_R1_1_1_183_714
+        GTATTATTTAATGGCATACACTCAA
+        +SLXA-B3_649_FC8437_R1_1_1_183_714
+        YYYYYYYYYYWYYYYWYWWUWWWQQ
+
+    Using this module directly you might run:
+
+    >>> with open("Quality/solexa_example.fastq") as handle:
+    ...     for record in FastqSolexaIterator(handle):
+    ...         print("%s %s" % (record.id, record.seq))
+    SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA
+    SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA
+    SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG
+    SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG
+    SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA
+
+    Typically however, you would call this via Bio.SeqIO instead with
+    "fastq-solexa" as the format:
+
+    >>> from Bio import SeqIO
+    >>> with open("Quality/solexa_example.fastq") as handle:
+    ...     for record in SeqIO.parse(handle, "fastq-solexa"):
+    ...         print("%s %s" % (record.id, record.seq))
+    SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA
+    SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA
+    SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG
+    SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG
+    SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA
+
+    If you want to look at the qualities, they are recorded in each record's
+    per-letter-annotation dictionary as a simple list of integers:
+
+    >>> print(record.letter_annotations["solexa_quality"])
+    [25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 23, 25, 25, 25, 25, 23, 25, 23, 23, 21, 23, 23, 23, 17, 17]
+
+    These scores aren't very good, but they are high enough that they map
+    almost exactly onto PHRED scores:
+
+    >>> print("%0.2f" % phred_quality_from_solexa(25))
+    25.01
+
+    Let's look at faked example read which is even worse, where there are
+    more noticeable differences between the Solexa and PHRED scores::
+
+         @slxa_0001_1_0001_01
+         ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
+         +slxa_0001_1_0001_01
+         hgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;
+
+    Again, you would typically use Bio.SeqIO to read this file in (rather than
+    calling the Bio.SeqIO.QualtityIO module directly).  Most FASTQ files will
+    contain thousands of reads, so you would normally use Bio.SeqIO.parse()
+    as shown above.  This example has only as one entry, so instead we can
+    use the Bio.SeqIO.read() function:
+
+    >>> from Bio import SeqIO
+    >>> with open("Quality/solexa_faked.fastq") as handle:
+    ...     record = SeqIO.read(handle, "fastq-solexa")
+    >>> print("%s %s" % (record.id, record.seq))
+    slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
+    >>> print(record.letter_annotations["solexa_quality"])
+    [40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5]
+
+    These quality scores are so low that when converted from the Solexa scheme
+    into PHRED scores they look quite different:
+
+    >>> print("%0.2f" % phred_quality_from_solexa(-1))
+    2.54
+    >>> print("%0.2f" % phred_quality_from_solexa(-5))
+    1.19
+
+    Note you can use the Bio.SeqIO.write() function or the SeqRecord's format
+    method to output the record(s):
+
+    >>> print(record.format("fastq-solexa"))
+    @slxa_0001_1_0001_01
+    ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
+    +
+    hgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;
+    <BLANKLINE>
+
+    Note this output is slightly different from the input file as Biopython
+    has left out the optional repetition of the sequence identifier on the "+"
+    line.  If you want the to use PHRED scores, use "fastq" or "qual" as the
+    output format instead, and Biopython will do the conversion for you:
+
+    >>> print(record.format("fastq"))
+    @slxa_0001_1_0001_01
+    ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
+    +
+    IHGFEDCBA@?>=<;:9876543210/.-,++*)('&&%%$$##""
+    <BLANKLINE>
+
+    >>> print(record.format("qual"))
+    >slxa_0001_1_0001_01
+    40 39 38 37 36 35 34 33 32 31 30 29 28 27 26 25 24 23 22 21
+    20 19 18 17 16 15 14 13 12 11 10 10 9 8 7 6 5 5 4 4 3 3 2 2
+    1 1
+    <BLANKLINE>
+
+    As shown above, the poor quality Solexa reads have been mapped to the
+    equivalent PHRED score (e.g. -5 to 1 as shown earlier).
+    """
+    if alphabet is not None:
+        raise ValueError("The alphabet argument is no longer supported")
+
+    q_mapping = {
+        chr(letter): letter - SOLEXA_SCORE_OFFSET
+        for letter in range(SOLEXA_SCORE_OFFSET - 5, 63 + SOLEXA_SCORE_OFFSET)
+    }
+
+    for title_line, seq_string, quality_string in FastqGeneralIterator(source):
+        if title2ids:
+            id, name, descr = title2ids(title_line)
+        else:
+            descr = title_line
+            id = descr.split()[0]
+            name = id
+        record = SeqRecord(Seq(seq_string), id=id, name=name, description=descr)
+        try:
+            qualities = [q_mapping[letter] for letter in quality_string]
+        # DO NOT convert these into PHRED qualities automatically!
+        except KeyError:
+            raise ValueError("Invalid character in quality string") from None
+        # Dirty trick to speed up this line:
+        # record.letter_annotations["solexa_quality"] = qualities
+        dict.__setitem__(record._per_letter_annotations, "solexa_quality", qualities)
+        yield record
+
+
+def FastqIlluminaIterator(source, alphabet=None, title2ids=None):
+    """Parse Illumina 1.3 to 1.7 FASTQ like files (which differ in the quality mapping).
+
+    The optional arguments are the same as those for the FastqPhredIterator.
+
+    For each sequence in Illumina 1.3+ FASTQ files there is a matching string
+    encoding PHRED integer qualities using ASCII values with an offset of 64.
+
+    >>> from Bio import SeqIO
+    >>> record = SeqIO.read("Quality/illumina_faked.fastq", "fastq-illumina")
+    >>> print("%s %s" % (record.id, record.seq))
+    Test ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTN
+    >>> max(record.letter_annotations["phred_quality"])
+    40
+    >>> min(record.letter_annotations["phred_quality"])
+    0
+
+    NOTE - Older versions of the Solexa/Illumina pipeline encoded Solexa scores
+    with an ASCII offset of 64. They are approximately equal but only for high
+    quality reads. If you have an old Solexa/Illumina file with negative
+    Solexa scores, and try and read this as an Illumina 1.3+ file it will fail:
+
+    >>> record2 = SeqIO.read("Quality/solexa_faked.fastq", "fastq-illumina")
+    Traceback (most recent call last):
+       ...
+    ValueError: Invalid character in quality string
+
+    NOTE - True Sanger style FASTQ files use PHRED scores with an offset of 33.
+    """
+    if alphabet is not None:
+        raise ValueError("The alphabet argument is no longer supported")
+
+    q_mapping = {
+        chr(letter): letter - SOLEXA_SCORE_OFFSET
+        for letter in range(SOLEXA_SCORE_OFFSET, 63 + SOLEXA_SCORE_OFFSET)
+    }
+
+    for title_line, seq_string, quality_string in FastqGeneralIterator(source):
+        if title2ids:
+            id, name, descr = title2ids(title_line)
+        else:
+            descr = title_line
+            id = descr.split()[0]
+            name = id
+        record = SeqRecord(Seq(seq_string), id=id, name=name, description=descr)
+        try:
+            qualities = [q_mapping[letter] for letter in quality_string]
+        except KeyError:
+            raise ValueError("Invalid character in quality string") from None
+        # Dirty trick to speed up this line:
+        # record.letter_annotations["phred_quality"] = qualities
+        dict.__setitem__(record._per_letter_annotations, "phred_quality", qualities)
+        yield record
+
+
+class QualPhredIterator(SequenceIterator):
+    """Parser for QUAL files with PHRED quality scores but no sequence."""
+
+    def __init__(self, source, alphabet=None, title2ids=None):
+        """For QUAL files which include PHRED quality scores, but no sequence.
+
+        For example, consider this short QUAL file::
+
+            >EAS54_6_R1_2_1_413_324
+            26 26 18 26 26 26 26 26 26 26 26 26 26 26 26 22 26 26 26 26
+            26 26 26 23 23
+            >EAS54_6_R1_2_1_540_792
+            26 26 26 26 26 26 26 26 26 26 26 22 26 26 26 26 26 12 26 26
+            26 18 26 23 18
+            >EAS54_6_R1_2_1_443_348
+            26 26 26 26 26 26 26 26 26 26 26 24 26 22 26 26 13 22 26 18
+            24 18 18 18 18
+
+        Using this module directly you might run:
+
+        >>> with open("Quality/example.qual") as handle:
+        ...     for record in QualPhredIterator(handle):
+        ...         print("%s read of length %d" % (record.id, len(record.seq)))
+        EAS54_6_R1_2_1_413_324 read of length 25
+        EAS54_6_R1_2_1_540_792 read of length 25
+        EAS54_6_R1_2_1_443_348 read of length 25
+
+        Typically however, you would call this via Bio.SeqIO instead with "qual"
+        as the format:
+
+        >>> from Bio import SeqIO
+        >>> with open("Quality/example.qual") as handle:
+        ...     for record in SeqIO.parse(handle, "qual"):
+        ...         print("%s read of length %d" % (record.id, len(record.seq)))
+        EAS54_6_R1_2_1_413_324 read of length 25
+        EAS54_6_R1_2_1_540_792 read of length 25
+        EAS54_6_R1_2_1_443_348 read of length 25
+
+        Only the sequence length is known, as the QUAL file does not contain
+        the sequence string itself.
+
+        The quality scores themselves are available as a list of integers
+        in each record's per-letter-annotation:
+
+        >>> print(record.letter_annotations["phred_quality"])
+        [26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 24, 26, 22, 26, 26, 13, 22, 26, 18, 24, 18, 18, 18, 18]
+
+        You can still slice one of these SeqRecord objects:
+
+        >>> sub_record = record[5:10]
+        >>> print("%s %s" % (sub_record.id, sub_record.letter_annotations["phred_quality"]))
+        EAS54_6_R1_2_1_443_348 [26, 26, 26, 26, 26]
+
+        As of Biopython 1.59, this parser will accept files with negatives quality
+        scores but will replace them with the lowest possible PHRED score of zero.
+        This will trigger a warning, previously it raised a ValueError exception.
+        """
+        if alphabet is not None:
+            raise ValueError("The alphabet argument is no longer supported")
+        self.title2ids = title2ids
+        super().__init__(source, mode="t", fmt="QUAL")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        records = self.iterate(handle)
+        return records
+
+    def iterate(self, handle):
+        """Parse the file and generate SeqRecord objects."""
+        title2ids = self.title2ids
+        # Skip any text before the first record (e.g. blank lines, comments)
+        for line in handle:
+            if line[0] == ">":
+                break
+        else:
+            return
+
+        while True:
+            if line[0] != ">":
+                raise ValueError(
+                    "Records in Fasta files should start with '>' character"
+                )
+            if title2ids:
+                id, name, descr = title2ids(line[1:].rstrip())
+            else:
+                descr = line[1:].rstrip()
+                id = descr.split()[0]
+                name = id
+
+            qualities = []
+            for line in handle:
+                if line[0] == ">":
+                    break
+                qualities.extend(int(word) for word in line.split())
+            else:
+                line = None
+
+            if qualities and min(qualities) < 0:
+                warnings.warn(
+                    "Negative quality score %i found, substituting PHRED zero instead."
+                    % min(qualities),
+                    BiopythonParserWarning,
+                )
+                qualities = [max(0, q) for q in qualities]
+
+            # Return the record and then continue...
+            sequence = Seq(None, length=len(qualities))
+            record = SeqRecord(sequence, id=id, name=name, description=descr)
+            # Dirty trick to speed up this line:
+            # record.letter_annotations["phred_quality"] = qualities
+            dict.__setitem__(record._per_letter_annotations, "phred_quality", qualities)
+            yield record
+
+            if line is None:
+                return  # StopIteration
+        raise ValueError("Unrecognised QUAL record format.")
+
+
+class FastqPhredWriter(SequenceWriter):
+    """Class to write standard FASTQ format files (using PHRED quality scores) (OBSOLETE).
+
+    Although you can use this class directly, you are strongly encouraged
+    to use the ``as_fastq`` function, or top level ``Bio.SeqIO.write()``
+    function instead via the format name "fastq" or the alias "fastq-sanger".
+
+    For example, this code reads in a standard Sanger style FASTQ file
+    (using PHRED scores) and re-saves it as another Sanger style FASTQ file:
+
+    >>> from Bio import SeqIO
+    >>> record_iterator = SeqIO.parse("Quality/example.fastq", "fastq")
+    >>> with open("Quality/temp.fastq", "w") as out_handle:
+    ...     SeqIO.write(record_iterator, out_handle, "fastq")
+    3
+
+    You might want to do this if the original file included extra line breaks,
+    which while valid may not be supported by all tools.  The output file from
+    Biopython will have each sequence on a single line, and each quality
+    string on a single line (which is considered desirable for maximum
+    compatibility).
+
+    In this next example, an old style Solexa/Illumina FASTQ file (using Solexa
+    quality scores) is converted into a standard Sanger style FASTQ file using
+    PHRED qualities:
+
+    >>> from Bio import SeqIO
+    >>> record_iterator = SeqIO.parse("Quality/solexa_example.fastq", "fastq-solexa")
+    >>> with open("Quality/temp.fastq", "w") as out_handle:
+    ...     SeqIO.write(record_iterator, out_handle, "fastq")
+    5
+
+    This code is also called if you use the .format("fastq") method of a
+    SeqRecord, or .format("fastq-sanger") if you prefer that alias.
+
+    Note that Sanger FASTQ files have an upper limit of PHRED quality 93, which is
+    encoded as ASCII 126, the tilde. If your quality scores are truncated to fit, a
+    warning is issued.
+
+    P.S. To avoid cluttering up your working directory, you can delete this
+    temporary file now:
+
+    >>> import os
+    >>> os.remove("Quality/temp.fastq")
+    """
+
+    assert SANGER_SCORE_OFFSET == ord("!")
+
+    def write_record(self, record):
+        """Write a single FASTQ record to the file."""
+        assert self._header_written
+        assert not self._footer_written
+        self._record_written = True
+        # TODO - Is an empty sequence allowed in FASTQ format?
+        seq = record.seq
+        if seq is None:
+            raise ValueError("No sequence for record %s" % record.id)
+        qualities_str = _get_sanger_quality_str(record)
+        if len(qualities_str) != len(seq):
+            raise ValueError(
+                "Record %s has sequence length %i but %i quality scores"
+                % (record.id, len(seq), len(qualities_str))
+            )
+
+        # FASTQ files can include a description, just like FASTA files
+        # (at least, this is what the NCBI Short Read Archive does)
+        id = self.clean(record.id)
+        description = self.clean(record.description)
+        if description and description.split(None, 1)[0] == id:
+            # The description includes the id at the start
+            title = description
+        elif description:
+            title = "%s %s" % (id, description)
+        else:
+            title = id
+
+        self.handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qualities_str))
+
+
+def as_fastq(record):
+    """Turn a SeqRecord into a Sanger FASTQ formatted string.
+
+    This is used internally by the SeqRecord's .format("fastq")
+    method and by the SeqIO.write(..., ..., "fastq") function,
+    and under the format alias "fastq-sanger" as well.
+    """
+    seq_str = _get_seq_string(record)
+    qualities_str = _get_sanger_quality_str(record)
+    if len(qualities_str) != len(seq_str):
+        raise ValueError(
+            "Record %s has sequence length %i but %i quality scores"
+            % (record.id, len(seq_str), len(qualities_str))
+        )
+    id = _clean(record.id)
+    description = _clean(record.description)
+    if description and description.split(None, 1)[0] == id:
+        title = description
+    elif description:
+        title = "%s %s" % (id, description)
+    else:
+        title = id
+    return "@%s\n%s\n+\n%s\n" % (title, seq_str, qualities_str)
+
+
+class QualPhredWriter(SequenceWriter):
+    """Class to write QUAL format files (using PHRED quality scores) (OBSOLETE).
+
+    Although you can use this class directly, you are strongly encouraged
+    to use the ``as_qual`` function, or top level ``Bio.SeqIO.write()``
+    function instead.
+
+    For example, this code reads in a FASTQ file and saves the quality scores
+    into a QUAL file:
+
+    >>> from Bio import SeqIO
+    >>> record_iterator = SeqIO.parse("Quality/example.fastq", "fastq")
+    >>> with open("Quality/temp.qual", "w") as out_handle:
+    ...     SeqIO.write(record_iterator, out_handle, "qual")
+    3
+
+    This code is also called if you use the .format("qual") method of a
+    SeqRecord.
+
+    P.S. Don't forget to clean up the temp file if you don't need it anymore:
+
+    >>> import os
+    >>> os.remove("Quality/temp.qual")
+    """
+
+    def __init__(self, handle, wrap=60, record2title=None):
+        """Create a QUAL writer.
+
+        Arguments:
+         - handle - Handle to an output file, e.g. as returned
+           by open(filename, "w")
+         - wrap   - Optional line length used to wrap sequence lines.
+           Defaults to wrapping the sequence at 60 characters. Use
+           zero (or None) for no wrapping, giving a single long line
+           for the sequence.
+         - record2title - Optional function to return the text to be
+           used for the title line of each record.  By default a
+           combination of the record.id and record.description is
+           used.  If the record.description starts with the record.id,
+           then just the record.description is used.
+
+        The record2title argument is present for consistency with the
+        Bio.SeqIO.FastaIO writer class.
+        """
+        super().__init__(handle)
+        # self.handle = handle
+        self.wrap = None
+        if wrap:
+            if wrap < 1:
+                raise ValueError
+        self.wrap = wrap
+        self.record2title = record2title
+
+    def write_record(self, record):
+        """Write a single QUAL record to the file."""
+        assert self._header_written
+        assert not self._footer_written
+        self._record_written = True
+
+        handle = self.handle
+        wrap = self.wrap
+
+        if self.record2title:
+            title = self.clean(self.record2title(record))
+        else:
+            id = self.clean(record.id)
+            description = self.clean(record.description)
+            if description and description.split(None, 1)[0] == id:
+                # The description includes the id at the start
+                title = description
+            elif description:
+                title = "%s %s" % (id, description)
+            else:
+                title = id
+        handle.write(">%s\n" % title)
+
+        qualities = _get_phred_quality(record)
+        try:
+            # This rounds to the nearest integer.
+            # TODO - can we record a float in a qual file?
+            qualities_strs = [("%i" % round(q, 0)) for q in qualities]
+        except TypeError:
+            if None in qualities:
+                raise TypeError("A quality value of None was found") from None
+            else:
+                raise
+
+        if wrap > 5:
+            # Fast wrapping
+            data = " ".join(qualities_strs)
+            while True:
+                if len(data) <= wrap:
+                    self.handle.write(data + "\n")
+                    break
+                else:
+                    # By construction there must be spaces in the first X chars
+                    # (unless we have X digit or higher quality scores!)
+                    i = data.rfind(" ", 0, wrap)
+                    handle.write(data[:i] + "\n")
+                    data = data[i + 1 :]
+        elif wrap:
+            # Safe wrapping
+            while qualities_strs:
+                line = qualities_strs.pop(0)
+                while qualities_strs and len(line) + 1 + len(qualities_strs[0]) < wrap:
+                    line += " " + qualities_strs.pop(0)
+                handle.write(line + "\n")
+        else:
+            # No wrapping
+            data = " ".join(qualities_strs)
+            handle.write(data + "\n")
+
+
+def as_qual(record):
+    """Turn a SeqRecord into a QUAL formatted string.
+
+    This is used internally by the SeqRecord's .format("qual")
+    method and by the SeqIO.write(..., ..., "qual") function.
+    """
+    id = _clean(record.id)
+    description = _clean(record.description)
+    if description and description.split(None, 1)[0] == id:
+        title = description
+    elif description:
+        title = "%s %s" % (id, description)
+    else:
+        title = id
+    lines = [">%s\n" % title]
+
+    qualities = _get_phred_quality(record)
+    try:
+        # This rounds to the nearest integer.
+        # TODO - can we record a float in a qual file?
+        qualities_strs = [("%i" % round(q, 0)) for q in qualities]
+    except TypeError:
+        if None in qualities:
+            raise TypeError("A quality value of None was found") from None
+        else:
+            raise
+
+    # Safe wrapping
+    while qualities_strs:
+        line = qualities_strs.pop(0)
+        while qualities_strs and len(line) + 1 + len(qualities_strs[0]) < 60:
+            line += " " + qualities_strs.pop(0)
+        lines.append(line + "\n")
+    return "".join(lines)
+
+
+class FastqSolexaWriter(SequenceWriter):
+    r"""Write old style Solexa/Illumina FASTQ format files (with Solexa qualities) (OBSOLETE).
+
+    This outputs FASTQ files like those from the early Solexa/Illumina
+    pipeline, using Solexa scores and an ASCII offset of 64. These are
+    NOT compatible with the standard Sanger style PHRED FASTQ files.
+
+    If your records contain a "solexa_quality" entry under letter_annotations,
+    this is used, otherwise any "phred_quality" entry will be used after
+    conversion using the solexa_quality_from_phred function. If neither style
+    of quality scores are present, an exception is raised.
+
+    Although you can use this class directly, you are strongly encouraged
+    to use the ``as_fastq_solexa`` function, or top-level ``Bio.SeqIO.write()``
+    function instead.  For example, this code reads in a FASTQ file and re-saves
+    it as another FASTQ file:
+
+    >>> from Bio import SeqIO
+    >>> record_iterator = SeqIO.parse("Quality/solexa_example.fastq", "fastq-solexa")
+    >>> with open("Quality/temp.fastq", "w") as out_handle:
+    ...     SeqIO.write(record_iterator, out_handle, "fastq-solexa")
+    5
+
+    You might want to do this if the original file included extra line breaks,
+    which (while valid) may not be supported by all tools.  The output file
+    from Biopython will have each sequence on a single line, and each quality
+    string on a single line (which is considered desirable for maximum
+    compatibility).
+
+    This code is also called if you use the .format("fastq-solexa") method of
+    a SeqRecord. For example,
+
+    >>> record = SeqIO.read("Quality/sanger_faked.fastq", "fastq-sanger")
+    >>> print(record.format("fastq-solexa"))
+    @Test PHRED qualities from 40 to 0 inclusive
+    ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTN
+    +
+    hgfedcba`_^]\[ZYXWVUTSRQPONMLKJHGFECB@>;;
+    <BLANKLINE>
+
+    Note that Solexa FASTQ files have an upper limit of Solexa quality 62, which is
+    encoded as ASCII 126, the tilde.  If your quality scores must be truncated to fit,
+    a warning is issued.
+
+    P.S. Don't forget to delete the temp file if you don't need it anymore:
+
+    >>> import os
+    >>> os.remove("Quality/temp.fastq")
+    """
+
+    def write_record(self, record):
+        """Write a single FASTQ record to the file."""
+        assert self._header_written
+        assert not self._footer_written
+        self._record_written = True
+
+        # TODO - Is an empty sequence allowed in FASTQ format?
+        seq = record.seq
+        if seq is None:
+            raise ValueError("No sequence for record %s" % record.id)
+        qualities_str = _get_solexa_quality_str(record)
+        if len(qualities_str) != len(seq):
+            raise ValueError(
+                "Record %s has sequence length %i but %i quality scores"
+                % (record.id, len(seq), len(qualities_str))
+            )
+
+        # FASTQ files can include a description, just like FASTA files
+        # (at least, this is what the NCBI Short Read Archive does)
+        id = self.clean(record.id)
+        description = self.clean(record.description)
+        if description and description.split(None, 1)[0] == id:
+            # The description includes the id at the start
+            title = description
+        elif description:
+            title = "%s %s" % (id, description)
+        else:
+            title = id
+
+        self.handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qualities_str))
+
+
+def as_fastq_solexa(record):
+    """Turn a SeqRecord into a Solexa FASTQ formatted string.
+
+    This is used internally by the SeqRecord's .format("fastq-solexa")
+    method and by the SeqIO.write(..., ..., "fastq-solexa") function.
+    """
+    seq_str = _get_seq_string(record)
+    qualities_str = _get_solexa_quality_str(record)
+    if len(qualities_str) != len(seq_str):
+        raise ValueError(
+            "Record %s has sequence length %i but %i quality scores"
+            % (record.id, len(seq_str), len(qualities_str))
+        )
+    id = _clean(record.id)
+    description = _clean(record.description)
+    if description and description.split(None, 1)[0] == id:
+        # The description includes the id at the start
+        title = description
+    elif description:
+        title = "%s %s" % (id, description)
+    else:
+        title = id
+    return "@%s\n%s\n+\n%s\n" % (title, seq_str, qualities_str)
+
+
+class FastqIlluminaWriter(SequenceWriter):
+    r"""Write Illumina 1.3+ FASTQ format files (with PHRED quality scores) (OBSOLETE).
+
+    This outputs FASTQ files like those from the Solexa/Illumina 1.3+ pipeline,
+    using PHRED scores and an ASCII offset of 64. Note these files are NOT
+    compatible with the standard Sanger style PHRED FASTQ files which use an
+    ASCII offset of 32.
+
+    Although you can use this class directly, you are strongly encouraged to
+    use the ``as_fastq_illumina`` or top-level ``Bio.SeqIO.write()`` function
+    with format name "fastq-illumina" instead. This code is also called if you
+    use the .format("fastq-illumina") method of a SeqRecord. For example,
+
+    >>> from Bio import SeqIO
+    >>> record = SeqIO.read("Quality/sanger_faked.fastq", "fastq-sanger")
+    >>> print(record.format("fastq-illumina"))
+    @Test PHRED qualities from 40 to 0 inclusive
+    ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTN
+    +
+    hgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@
+    <BLANKLINE>
+
+    Note that Illumina FASTQ files have an upper limit of PHRED quality 62, which is
+    encoded as ASCII 126, the tilde. If your quality scores are truncated to fit, a
+    warning is issued.
+    """
+
+    def write_record(self, record):
+        """Write a single FASTQ record to the file."""
+        assert self._header_written
+        assert not self._footer_written
+        self._record_written = True
+
+        # TODO - Is an empty sequence allowed in FASTQ format?
+        seq = record.seq
+        if seq is None:
+            raise ValueError("No sequence for record %s" % record.id)
+        qualities_str = _get_illumina_quality_str(record)
+        if len(qualities_str) != len(seq):
+            raise ValueError(
+                "Record %s has sequence length %i but %i quality scores"
+                % (record.id, len(seq), len(qualities_str))
+            )
+
+        # FASTQ files can include a description, just like FASTA files
+        # (at least, this is what the NCBI Short Read Archive does)
+        id = self.clean(record.id)
+        description = self.clean(record.description)
+        if description and description.split(None, 1)[0] == id:
+            # The description includes the id at the start
+            title = description
+        elif description:
+            title = "%s %s" % (id, description)
+        else:
+            title = id
+
+        self.handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qualities_str))
+
+
+def as_fastq_illumina(record):
+    """Turn a SeqRecord into an Illumina FASTQ formatted string.
+
+    This is used internally by the SeqRecord's .format("fastq-illumina")
+    method and by the SeqIO.write(..., ..., "fastq-illumina") function.
+    """
+    seq_str = _get_seq_string(record)
+    qualities_str = _get_illumina_quality_str(record)
+    if len(qualities_str) != len(seq_str):
+        raise ValueError(
+            "Record %s has sequence length %i but %i quality scores"
+            % (record.id, len(seq_str), len(qualities_str))
+        )
+    id = _clean(record.id)
+    description = _clean(record.description)
+    if description and description.split(None, 1)[0] == id:
+        title = description
+    elif description:
+        title = "%s %s" % (id, description)
+    else:
+        title = id
+    return "@%s\n%s\n+\n%s\n" % (title, seq_str, qualities_str)
+
+
+def PairedFastaQualIterator(fasta_source, qual_source, alphabet=None, title2ids=None):
+    """Iterate over matched FASTA and QUAL files as SeqRecord objects.
+
+    For example, consider this short QUAL file with PHRED quality scores::
+
+        >EAS54_6_R1_2_1_413_324
+        26 26 18 26 26 26 26 26 26 26 26 26 26 26 26 22 26 26 26 26
+        26 26 26 23 23
+        >EAS54_6_R1_2_1_540_792
+        26 26 26 26 26 26 26 26 26 26 26 22 26 26 26 26 26 12 26 26
+        26 18 26 23 18
+        >EAS54_6_R1_2_1_443_348
+        26 26 26 26 26 26 26 26 26 26 26 24 26 22 26 26 13 22 26 18
+        24 18 18 18 18
+
+    And a matching FASTA file::
+
+        >EAS54_6_R1_2_1_413_324
+        CCCTTCTTGTCTTCAGCGTTTCTCC
+        >EAS54_6_R1_2_1_540_792
+        TTGGCAGGCCAAGGCCGATGGATCA
+        >EAS54_6_R1_2_1_443_348
+        GTTGCTTCTGGCGTGGGTGGGGGGG
+
+    You can parse these separately using Bio.SeqIO with the "qual" and
+    "fasta" formats, but then you'll get a group of SeqRecord objects with
+    no sequence, and a matching group with the sequence but not the
+    qualities.  Because it only deals with one input file handle, Bio.SeqIO
+    can't be used to read the two files together - but this function can!
+    For example,
+
+    >>> with open("Quality/example.fasta") as f:
+    ...     with open("Quality/example.qual") as q:
+    ...         for record in PairedFastaQualIterator(f, q):
+    ...             print("%s %s" % (record.id, record.seq))
+    ...
+    EAS54_6_R1_2_1_413_324 CCCTTCTTGTCTTCAGCGTTTCTCC
+    EAS54_6_R1_2_1_540_792 TTGGCAGGCCAAGGCCGATGGATCA
+    EAS54_6_R1_2_1_443_348 GTTGCTTCTGGCGTGGGTGGGGGGG
+
+    As with the FASTQ or QUAL parsers, if you want to look at the qualities,
+    they are in each record's per-letter-annotation dictionary as a simple
+    list of integers:
+
+    >>> print(record.letter_annotations["phred_quality"])
+    [26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 24, 26, 22, 26, 26, 13, 22, 26, 18, 24, 18, 18, 18, 18]
+
+    If you have access to data as a FASTQ format file, using that directly
+    would be simpler and more straight forward.  Note that you can easily use
+    this function to convert paired FASTA and QUAL files into FASTQ files:
+
+    >>> from Bio import SeqIO
+    >>> with open("Quality/example.fasta") as f:
+    ...     with open("Quality/example.qual") as q:
+    ...         SeqIO.write(PairedFastaQualIterator(f, q), "Quality/temp.fastq", "fastq")
+    ...
+    3
+
+    And don't forget to clean up the temp file if you don't need it anymore:
+
+    >>> import os
+    >>> os.remove("Quality/temp.fastq")
+    """
+    if alphabet is not None:
+        raise ValueError("The alphabet argument is no longer supported")
+
+    from Bio.SeqIO.FastaIO import FastaIterator
+
+    fasta_iter = FastaIterator(fasta_source, title2ids=title2ids)
+    qual_iter = QualPhredIterator(qual_source, title2ids=title2ids)
+
+    # Using zip wouldn't load everything into memory, but also would not catch
+    # any extra records found in only one file.
+    while True:
+        try:
+            f_rec = next(fasta_iter)
+        except StopIteration:
+            f_rec = None
+        try:
+            q_rec = next(qual_iter)
+        except StopIteration:
+            q_rec = None
+        if f_rec is None and q_rec is None:
+            # End of both files
+            break
+        if f_rec is None:
+            raise ValueError("FASTA file has more entries than the QUAL file.")
+        if q_rec is None:
+            raise ValueError("QUAL file has more entries than the FASTA file.")
+        if f_rec.id != q_rec.id:
+            raise ValueError(
+                "FASTA and QUAL entries do not match (%s vs %s)." % (f_rec.id, q_rec.id)
+            )
+        if len(f_rec) != len(q_rec.letter_annotations["phred_quality"]):
+            raise ValueError(
+                "Sequence length and number of quality scores disagree for %s"
+                % f_rec.id
+            )
+        # Merge the data....
+        f_rec.letter_annotations["phred_quality"] = q_rec.letter_annotations[
+            "phred_quality"
+        ]
+        yield f_rec
+    # Done
+
+
+def _fastq_generic(in_file, out_file, mapping):
+    """FASTQ helper function where can't have data loss by truncation (PRIVATE)."""
+    # For real speed, don't even make SeqRecord and Seq objects!
+    count = 0
+    null = chr(0)
+    with as_handle(out_file, "w") as out_handle:
+        for title, seq, old_qual in FastqGeneralIterator(in_file):
+            count += 1
+            # map the qual...
+            qual = old_qual.translate(mapping)
+            if null in qual:
+                raise ValueError("Invalid character in quality string")
+            out_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
+    return count
+
+
+def _fastq_generic2(in_file, out_file, mapping, truncate_char, truncate_msg):
+    """FASTQ helper function where there could be data loss by truncation (PRIVATE)."""
+    # For real speed, don't even make SeqRecord and Seq objects!
+    count = 0
+    null = chr(0)
+    with as_handle(out_file, "w") as out_handle:
+        for title, seq, old_qual in FastqGeneralIterator(in_file):
+            count += 1
+            # map the qual...
+            qual = old_qual.translate(mapping)
+            if null in qual:
+                raise ValueError("Invalid character in quality string")
+            if truncate_char in qual:
+                qual = qual.replace(truncate_char, chr(126))
+                warnings.warn(truncate_msg, BiopythonWarning)
+            out_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
+    return count
+
+
+def _fastq_sanger_convert_fastq_sanger(in_file, out_file):
+    """Fast Sanger FASTQ to Sanger FASTQ conversion (PRIVATE).
+
+    Useful for removing line wrapping and the redundant second identifier
+    on the plus lines. Will check also check the quality string is valid.
+
+    Avoids creating SeqRecord and Seq objects in order to speed up this
+    conversion.
+    """
+    # Map unexpected chars to null
+    mapping = "".join(
+        [chr(0) for ascii in range(0, 33)]
+        + [chr(ascii) for ascii in range(33, 127)]
+        + [chr(0) for ascii in range(127, 256)]
+    )
+    assert len(mapping) == 256
+    return _fastq_generic(in_file, out_file, mapping)
+
+
+def _fastq_solexa_convert_fastq_solexa(in_file, out_file):
+    """Fast Solexa FASTQ to Solexa FASTQ conversion (PRIVATE).
+
+    Useful for removing line wrapping and the redundant second identifier
+    on the plus lines. Will check also check the quality string is valid.
+    Avoids creating SeqRecord and Seq objects in order to speed up this
+    conversion.
+    """
+    # Map unexpected chars to null
+    mapping = "".join(
+        [chr(0) for ascii in range(0, 59)]
+        + [chr(ascii) for ascii in range(59, 127)]
+        + [chr(0) for ascii in range(127, 256)]
+    )
+    assert len(mapping) == 256
+    return _fastq_generic(in_file, out_file, mapping)
+
+
+def _fastq_illumina_convert_fastq_illumina(in_file, out_file):
+    """Fast Illumina 1.3+ FASTQ to Illumina 1.3+ FASTQ conversion (PRIVATE).
+
+    Useful for removing line wrapping and the redundant second identifier
+    on the plus lines. Will check also check the quality string is valid.
+    Avoids creating SeqRecord and Seq objects in order to speed up this
+    conversion.
+    """
+    # Map unexpected chars to null
+    mapping = "".join(
+        [chr(0) for ascii in range(0, 64)]
+        + [chr(ascii) for ascii in range(64, 127)]
+        + [chr(0) for ascii in range(127, 256)]
+    )
+    assert len(mapping) == 256
+    return _fastq_generic(in_file, out_file, mapping)
+
+
+def _fastq_illumina_convert_fastq_sanger(in_file, out_file):
+    """Fast Illumina 1.3+ FASTQ to Sanger FASTQ conversion (PRIVATE).
+
+    Avoids creating SeqRecord and Seq objects in order to speed up this
+    conversion.
+    """
+    # Map unexpected chars to null
+    mapping = "".join(
+        [chr(0) for ascii in range(0, 64)]
+        + [chr(33 + q) for q in range(0, 62 + 1)]
+        + [chr(0) for ascii in range(127, 256)]
+    )
+    assert len(mapping) == 256
+    return _fastq_generic(in_file, out_file, mapping)
+
+
+def _fastq_sanger_convert_fastq_illumina(in_file, out_file):
+    """Fast Sanger FASTQ to Illumina 1.3+ FASTQ conversion (PRIVATE).
+
+    Avoids creating SeqRecord and Seq objects in order to speed up this
+    conversion. Will issue a warning if the scores had to be truncated at 62
+    (maximum possible in the Illumina 1.3+ FASTQ format)
+    """
+    # Map unexpected chars to null
+    trunc_char = chr(1)
+    mapping = "".join(
+        [chr(0) for ascii in range(0, 33)]
+        + [chr(64 + q) for q in range(0, 62 + 1)]
+        + [trunc_char for ascii in range(96, 127)]
+        + [chr(0) for ascii in range(127, 256)]
+    )
+    assert len(mapping) == 256
+    return _fastq_generic2(
+        in_file,
+        out_file,
+        mapping,
+        trunc_char,
+        "Data loss - max PHRED quality 62 in Illumina 1.3+ FASTQ",
+    )
+
+
+def _fastq_solexa_convert_fastq_sanger(in_file, out_file):
+    """Fast Solexa FASTQ to Sanger FASTQ conversion (PRIVATE).
+
+    Avoids creating SeqRecord and Seq objects in order to speed up this
+    conversion.
+    """
+    # Map unexpected chars to null
+    mapping = "".join(
+        [chr(0) for ascii in range(0, 59)]
+        + [
+            chr(33 + int(round(phred_quality_from_solexa(q))))
+            for q in range(-5, 62 + 1)
+        ]
+        + [chr(0) for ascii in range(127, 256)]
+    )
+    assert len(mapping) == 256
+    return _fastq_generic(in_file, out_file, mapping)
+
+
+def _fastq_sanger_convert_fastq_solexa(in_file, out_file):
+    """Fast Sanger FASTQ to Solexa FASTQ conversion (PRIVATE).
+
+    Avoids creating SeqRecord and Seq objects in order to speed up this
+    conversion. Will issue a warning if the scores had to be truncated at 62
+    (maximum possible in the Solexa FASTQ format)
+    """
+    # Map unexpected chars to null
+    trunc_char = chr(1)
+    mapping = "".join(
+        [chr(0) for ascii in range(0, 33)]
+        + [chr(64 + int(round(solexa_quality_from_phred(q)))) for q in range(0, 62 + 1)]
+        + [trunc_char for ascii in range(96, 127)]
+        + [chr(0) for ascii in range(127, 256)]
+    )
+    assert len(mapping) == 256
+    return _fastq_generic2(
+        in_file,
+        out_file,
+        mapping,
+        trunc_char,
+        "Data loss - max Solexa quality 62 in Solexa FASTQ",
+    )
+
+
+def _fastq_solexa_convert_fastq_illumina(in_file, out_file):
+    """Fast Solexa FASTQ to Illumina 1.3+ FASTQ conversion (PRIVATE).
+
+    Avoids creating SeqRecord and Seq objects in order to speed up this
+    conversion.
+    """
+    # Map unexpected chars to null
+    mapping = "".join(
+        [chr(0) for ascii in range(0, 59)]
+        + [
+            chr(64 + int(round(phred_quality_from_solexa(q))))
+            for q in range(-5, 62 + 1)
+        ]
+        + [chr(0) for ascii in range(127, 256)]
+    )
+    assert len(mapping) == 256
+    return _fastq_generic(in_file, out_file, mapping)
+
+
+def _fastq_illumina_convert_fastq_solexa(in_file, out_file):
+    """Fast Illumina 1.3+ FASTQ to Solexa FASTQ conversion (PRIVATE).
+
+    Avoids creating SeqRecord and Seq objects in order to speed up this
+    conversion.
+    """
+    # Map unexpected chars to null
+    mapping = "".join(
+        [chr(0) for ascii in range(0, 64)]
+        + [chr(64 + int(round(solexa_quality_from_phred(q)))) for q in range(0, 62 + 1)]
+        + [chr(0) for ascii in range(127, 256)]
+    )
+    assert len(mapping) == 256
+    return _fastq_generic(in_file, out_file, mapping)
+
+
+def _fastq_convert_fasta(in_file, out_file):
+    """Fast FASTQ to FASTA conversion (PRIVATE).
+
+    Avoids dealing with the FASTQ quality encoding, and creating SeqRecord and
+    Seq objects in order to speed up this conversion.
+
+    NOTE - This does NOT check the characters used in the FASTQ quality string
+    are valid!
+    """
+    # For real speed, don't even make SeqRecord and Seq objects!
+    count = 0
+    with as_handle(out_file, "w") as out_handle:
+        for title, seq, qual in FastqGeneralIterator(in_file):
+            count += 1
+            out_handle.write(">%s\n" % title)
+            # Do line wrapping
+            for i in range(0, len(seq), 60):
+                out_handle.write(seq[i : i + 60] + "\n")
+    return count
+
+
+def _fastq_convert_tab(in_file, out_file):
+    """Fast FASTQ to simple tabbed conversion (PRIVATE).
+
+    Avoids dealing with the FASTQ quality encoding, and creating SeqRecord and
+    Seq objects in order to speed up this conversion.
+
+    NOTE - This does NOT check the characters used in the FASTQ quality string
+    are valid!
+    """
+    # For real speed, don't even make SeqRecord and Seq objects!
+    count = 0
+    with as_handle(out_file, "w") as out_handle:
+        for title, seq, qual in FastqGeneralIterator(in_file):
+            count += 1
+            out_handle.write("%s\t%s\n" % (title.split(None, 1)[0], seq))
+    return count
+
+
+def _fastq_convert_qual(in_file, out_file, mapping):
+    """FASTQ helper function for QUAL output (PRIVATE).
+
+    Mapping should be a dictionary mapping expected ASCII characters from the
+    FASTQ quality string to PHRED quality scores (as strings).
+    """
+    # For real speed, don't even make SeqRecord and Seq objects!
+    count = 0
+    with as_handle(out_file, "w") as out_handle:
+        for title, seq, qual in FastqGeneralIterator(in_file):
+            count += 1
+            out_handle.write(">%s\n" % title)
+            # map the qual... note even with Sanger encoding max 2 digits
+            try:
+                qualities_strs = [mapping[ascii] for ascii in qual]
+            except KeyError:
+                raise ValueError("Invalid character in quality string") from None
+            data = " ".join(qualities_strs)
+            while len(data) > 60:
+                # Know quality scores are either 1 or 2 digits, so there
+                # must be a space in any three consecutive characters.
+                if data[60] == " ":
+                    out_handle.write(data[:60] + "\n")
+                    data = data[61:]
+                elif data[59] == " ":
+                    out_handle.write(data[:59] + "\n")
+                    data = data[60:]
+                else:
+                    assert data[58] == " ", "Internal logic failure in wrapping"
+                    out_handle.write(data[:58] + "\n")
+                    data = data[59:]
+            out_handle.write(data + "\n")
+    return count
+
+
+def _fastq_sanger_convert_qual(in_file, out_file):
+    """Fast Sanger FASTQ to QUAL conversion (PRIVATE)."""
+    mapping = {chr(q + 33): str(q) for q in range(0, 93 + 1)}
+    return _fastq_convert_qual(in_file, out_file, mapping)
+
+
+def _fastq_solexa_convert_qual(in_file, out_file):
+    """Fast Solexa FASTQ to QUAL conversion (PRIVATE)."""
+    mapping = {
+        chr(q + 64): str(int(round(phred_quality_from_solexa(q))))
+        for q in range(-5, 62 + 1)
+    }
+    return _fastq_convert_qual(in_file, out_file, mapping)
+
+
+def _fastq_illumina_convert_qual(in_file, out_file):
+    """Fast Illumina 1.3+ FASTQ to QUAL conversion (PRIVATE)."""
+    mapping = {chr(q + 64): str(q) for q in range(0, 62 + 1)}
+    return _fastq_convert_qual(in_file, out_file, mapping)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/SeqIO/SeqXmlIO.py b/code/lib/Bio/SeqIO/SeqXmlIO.py
new file mode 100644
index 0000000..c4d15f6
--- /dev/null
+++ b/code/lib/Bio/SeqIO/SeqXmlIO.py
@@ -0,0 +1,669 @@
+# Copyright 2010 by Thomas Schmitt.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the "seqxml" file format, SeqXML.
+
+This module is for reading and writing SeqXML format files as
+SeqRecord objects, and is expected to be used via the Bio.SeqIO API.
+
+SeqXML is a lightweight XML format which is supposed be an alternative for
+FASTA files. For more Information see http://www.seqXML.org and Schmitt et al
+(2011), https://doi.org/10.1093/bib/bbr025
+"""
+from xml import sax
+from xml.sax import handler
+from xml.sax.saxutils import XMLGenerator
+from xml.sax.xmlreader import AttributesImpl
+
+from Bio.Seq import Seq
+from Bio.Seq import UnknownSeq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import SequenceIterator
+from .Interfaces import SequenceWriter
+
+
+class ContentHandler(handler.ContentHandler):
+    """Handles XML events generated by the parser (PRIVATE)."""
+
+    def __init__(self):
+        """Create a handler to handle XML events."""
+        super().__init__()
+        self.source = None
+        self.sourceVersion = None
+        self.seqXMLversion = None
+        self.ncbiTaxID = None
+        self.speciesName = None
+        self.startElementNS = None
+        self.data = None
+        self.records = []
+
+    def startDocument(self):
+        """Set XML handlers when an XML declaration is found."""
+        self.startElementNS = self.startSeqXMLElement
+
+    def startSeqXMLElement(self, name, qname, attrs):
+        """Handle start of a seqXML element."""
+        if name != (None, "seqXML"):
+            raise ValueError("Failed to find the start of seqXML element")
+        if qname is not None:
+            raise RuntimeError("Unexpected qname for seqXML element")
+        schema = None
+        for key, value in attrs.items():
+            namespace, localname = key
+            if namespace is None:
+                if localname == "source":
+                    self.source = value
+                elif localname == "sourceVersion":
+                    self.sourceVersion = value
+                elif localname == "seqXMLversion":
+                    self.seqXMLversion = value
+                elif localname == "ncbiTaxID":
+                    # check if it is an integer, but store as string
+                    number = int(value)
+                    self.ncbiTaxID = value
+                elif localname == "speciesName":
+                    self.speciesName = value
+                else:
+                    raise ValueError("Unexpected attribute for XML Schema")
+            elif namespace == "http://www.w3.org/2001/XMLSchema-instance":
+                if localname == "noNamespaceSchemaLocation":
+                    schema = value
+                else:
+                    raise ValueError("Unexpected attribute for XML Schema in namespace")
+            else:
+                raise ValueError(
+                    "Unexpected namespace '%s' for seqXML attribute" % namespace
+                )
+        if self.seqXMLversion is None:
+            raise ValueError("Failed to find seqXMLversion")
+        url = "http://www.seqxml.org/%s/seqxml.xsd" % self.seqXMLversion
+        if schema != url:
+            raise ValueError(
+                "XML Schema '%s' found not consistent with reported seqXML version %s"
+                % (schema, self.seqXMLversion)
+            )
+        self.endElementNS = self.endSeqXMLElement
+        self.startElementNS = self.startEntryElement
+
+    def endSeqXMLElement(self, name, qname):
+        """Handle end of the seqXML element."""
+        namespace, localname = name
+        if namespace is not None:
+            raise RuntimeError("Unexpected namespace '%s' for seqXML end" % namespace)
+        if qname is not None:
+            raise RuntimeError("Unexpected qname '%s' for seqXML end" % qname)
+        if localname != "seqXML":
+            raise RuntimeError("Failed to find end of seqXML element")
+        self.startElementNS = None
+        self.endElementNS = None
+
+    def startEntryElement(self, name, qname, attrs):
+        """Set new entry with id and the optional entry source (PRIVATE)."""
+        if name != (None, "entry"):
+            raise ValueError("Expected to find the start of an entry element")
+        if qname is not None:
+            raise RuntimeError("Unexpected qname for entry element")
+        record = SeqRecord("", id=None)
+        if self.speciesName is not None:
+            record.annotations["organism"] = self.speciesName
+        if self.ncbiTaxID is not None:
+            record.annotations["ncbi_taxid"] = self.ncbiTaxID
+        record.annotations["source"] = self.source
+        for key, value in attrs.items():
+            namespace, localname = key
+            if namespace is None:
+                if localname == "id":
+                    record.id = value
+                elif localname == "source":
+                    record.annotations["source"] = value
+                else:
+                    raise ValueError(
+                        "Unexpected attribute %s in entry element" % localname
+                    )
+            else:
+                raise ValueError(
+                    "Unexpected namespace '%s' for entry attribute" % namespace
+                )
+        if record.id is None:
+            raise ValueError("Failed to find entry ID")
+        self.records.append(record)
+        self.startElementNS = self.startEntryFieldElement
+        self.endElementNS = self.endEntryElement
+
+    def endEntryElement(self, name, qname):
+        """Handle end of an entry element."""
+        if name != (None, "entry"):
+            raise ValueError("Expected to find the end of an entry element")
+        if qname is not None:
+            raise RuntimeError("Unexpected qname for entry element")
+        self.startElementNS = self.startEntryElement
+        self.endElementNS = self.endSeqXMLElement
+
+    def startEntryFieldElement(self, name, qname, attrs):
+        """Receive a field of an entry element and forward it."""
+        namespace, localname = name
+        if namespace is not None:
+            raise ValueError(
+                "Unexpected namespace '%s' for %s element" % (namespace, localname)
+            )
+        if qname is not None:
+            raise RuntimeError(
+                "Unexpected qname '%s' for %s element" % (qname, localname)
+            )
+        if localname == "species":
+            return self.startSpeciesElement(attrs)
+        if localname == "description":
+            return self.startDescriptionElement(attrs)
+        if localname in ("DNAseq", "RNAseq", "AAseq"):
+            return self.startSequenceElement(attrs)
+        if localname == "DBRef":
+            return self.startDBRefElement(attrs)
+        if localname == "property":
+            return self.startPropertyElement(attrs)
+        raise ValueError("Unexpected field %s in entry" % localname)
+
+    def startSpeciesElement(self, attrs):
+        """Parse the species information."""
+        name = None
+        ncbiTaxID = None
+        for key, value in attrs.items():
+            namespace, localname = key
+            if namespace is None:
+                if localname == "name":
+                    name = value
+                elif localname == "ncbiTaxID":
+                    # check if it is an integer, but store as string
+                    number = int(value)
+                    ncbiTaxID = value
+                else:
+                    raise ValueError(
+                        "Unexpected attribute '%s' found in species tag" % key
+                    )
+            else:
+                raise ValueError(
+                    "Unexpected namespace '%s' for species attribute" % namespace
+                )
+        # The attributes "name" and "ncbiTaxID" are required:
+        if name is None:
+            raise ValueError("Failed to find species name")
+        if ncbiTaxID is None:
+            raise ValueError("Failed to find ncbiTaxId")
+        record = self.records[-1]
+        # The keywords for the species annotation are taken from SwissIO
+        record.annotations["organism"] = name
+        # TODO - Should have been a list to match SwissProt parser:
+        record.annotations["ncbi_taxid"] = ncbiTaxID
+        self.endElementNS = self.endSpeciesElement
+
+    def endSpeciesElement(self, name, qname):
+        """Handle end of a species element."""
+        namespace, localname = name
+        if namespace is not None:
+            raise RuntimeError("Unexpected namespace '%s' for species end" % namespace)
+        if qname is not None:
+            raise RuntimeError("Unexpected qname '%s' for species end" % qname)
+        if localname != "species":
+            raise RuntimeError("Failed to find end of species element")
+        self.endElementNS = self.endEntryElement
+
+    def startDescriptionElement(self, attrs):
+        """Parse the description."""
+        if attrs:
+            raise ValueError("Unexpected attributes found in description element")
+        if self.data is not None:
+            raise RuntimeError("Unexpected data found: '%s'" % self.data)
+        self.data = ""
+        self.endElementNS = self.endDescriptionElement
+
+    def endDescriptionElement(self, name, qname):
+        """Handle the end of a description element."""
+        namespace, localname = name
+        if namespace is not None:
+            raise RuntimeError(
+                "Unexpected namespace '%s' for description end" % namespace
+            )
+        if qname is not None:
+            raise RuntimeError("Unexpected qname '%s' for description end" % qname)
+        if localname != "description":
+            raise RuntimeError("Failed to find end of description element")
+        record = self.records[-1]
+        description = self.data
+        if description:  # ignore if empty string
+            record.description = description
+        self.data = None
+        self.endElementNS = self.endEntryElement
+
+    def startSequenceElement(self, attrs):
+        """Parse DNA, RNA, or protein sequence."""
+        if attrs:
+            raise ValueError("Unexpected attributes found in sequence element")
+        if self.data is not None:
+            raise RuntimeError("Unexpected data found: '%s'" % self.data)
+        self.data = ""
+        self.endElementNS = self.endSequenceElement
+
+    def endSequenceElement(self, name, qname):
+        """Handle the end of a sequence element."""
+        namespace, localname = name
+        if namespace is not None:
+            raise RuntimeError("Unexpected namespace '%s' for sequence end" % namespace)
+        if qname is not None:
+            raise RuntimeError("Unexpected qname '%s' for sequence end" % qname)
+        record = self.records[-1]
+        if localname == "DNAseq":
+            record.annotations["molecule_type"] = "DNA"
+        elif localname == "RNAseq":
+            record.annotations["molecule_type"] = "RNA"
+        elif localname == "AAseq":
+            record.annotations["molecule_type"] = "protein"
+        else:
+            raise RuntimeError(
+                "Failed to find end of sequence (localname = %s)" % localname
+            )
+        record.seq = Seq(self.data)
+        self.data = None
+        self.endElementNS = self.endEntryElement
+
+    def startDBRefElement(self, attrs):
+        """Parse a database cross reference."""
+        source = None
+        ID = None
+        for key, value in attrs.items():
+            namespace, localname = key
+            if namespace is None:
+                if localname == "source":
+                    source = value
+                elif localname == "id":
+                    ID = value
+                else:
+                    raise ValueError(
+                        "Unexpected attribute '%s' found for DBRef element" % key
+                    )
+            else:
+                raise ValueError(
+                    "Unexpected namespace '%s' for DBRef attribute" % namespace
+                )
+        # The attributes "source" and "id" are required:
+        if source is None:
+            raise ValueError("Failed to find source for DBRef element")
+        if ID is None:
+            raise ValueError("Failed to find id for DBRef element")
+        if self.data is not None:
+            raise RuntimeError("Unexpected data found: '%s'" % self.data)
+        self.data = ""
+        record = self.records[-1]
+        dbxref = "%s:%s" % (source, ID)
+        if dbxref not in record.dbxrefs:
+            record.dbxrefs.append(dbxref)
+        self.endElementNS = self.endDBRefElement
+
+    def endDBRefElement(self, name, qname):
+        """Handle the end of a DBRef element."""
+        namespace, localname = name
+        if namespace is not None:
+            raise RuntimeError(
+                "Unexpected namespace '%s' for DBRef element" % namespace
+            )
+        if qname is not None:
+            raise RuntimeError("Unexpected qname '%s' for DBRef element" % qname)
+        if localname != "DBRef":
+            raise RuntimeError(
+                "Unexpected localname '%s' for DBRef element" % localname
+            )
+        if self.data:
+            raise RuntimeError(
+                "Unexpected data received for DBRef element: '%s'" % self.data
+            )
+        self.data = None
+        self.endElementNS = self.endEntryElement
+
+    def startPropertyElement(self, attrs):
+        """Handle the start of a property element."""
+        property_name = None
+        property_value = None
+        for key, value in attrs.items():
+            namespace, localname = key
+            if namespace is None:
+                if localname == "name":
+                    property_name = value
+                elif localname == "value":
+                    property_value = value
+                else:
+                    raise ValueError(
+                        "Unexpected attribute '%s' found for property element", key
+                    )
+            else:
+                raise ValueError(
+                    "Unexpected namespace '%s' for property attribute" % namespace
+                )
+        # The attribute "name" is required:
+        if property_name is None:
+            raise ValueError("Failed to find name for property element")
+        record = self.records[-1]
+        if property_name == "molecule_type":
+            # At this point, record.annotations["molecule_type"] is either
+            # "DNA", "RNA", or "protein"; property_value may be a more detailed
+            # description such as "mRNA" or "genomic DNA".
+            assert record.annotations[property_name] in property_value
+            record.annotations[property_name] = property_value
+        else:
+            if property_name not in record.annotations:
+                record.annotations[property_name] = []
+            record.annotations[property_name].append(property_value)
+        self.endElementNS = self.endPropertyElement
+
+    def endPropertyElement(self, name, qname):
+        """Handle the end of a property element."""
+        namespace, localname = name
+        if namespace is not None:
+            raise RuntimeError(
+                "Unexpected namespace '%s' for property element" % namespace
+            )
+        if qname is not None:
+            raise RuntimeError("Unexpected qname '%s' for property element" % qname)
+        if localname != "property":
+            raise RuntimeError(
+                "Unexpected localname '%s' for property element" % localname
+            )
+        self.endElementNS = self.endEntryElement
+
+    def characters(self, data):
+        """Handle character data."""
+        if self.data is not None:
+            self.data += data
+
+
+class SeqXmlIterator(SequenceIterator):
+    """Parser for seqXML files.
+
+    Parses seqXML files and creates SeqRecords.
+    Assumes valid seqXML please validate beforehand.
+    It is assumed that all information for one record can be found within a
+    record element or above. Two types of methods are called when the start
+    tag of an element is reached. To receive only the attributes of an
+    element before its end tag is reached implement _attr_TAGNAME.
+    To get an element and its children as a DOM tree implement _elem_TAGNAME.
+    Everything that is part of the DOM tree will not trigger any further
+    method calls.
+    """
+
+    BLOCK = 1024
+
+    def __init__(self, stream_or_path, namespace=None):
+        """Create the object and initialize the XML parser."""
+        # Make sure we got a binary handle. If we got a text handle, then
+        # the parser will still run but unicode characters will be garbled
+        # if the text handle was opened with a different encoding than the
+        # one specified in the XML file. With a binary handle, the correct
+        # encoding is picked up by the parser from the XML file.
+        self.parser = sax.make_parser()
+        content_handler = ContentHandler()
+        self.parser.setContentHandler(content_handler)
+        self.parser.setFeature(handler.feature_namespaces, True)
+        super().__init__(stream_or_path, mode="b", fmt="SeqXML")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        parser = self.parser
+        content_handler = parser.getContentHandler()
+        BLOCK = self.BLOCK
+        while True:
+            # Read in another block of the file...
+            text = handle.read(BLOCK)
+            if not text:
+                if content_handler.startElementNS is None:
+                    raise ValueError("Empty file.")
+                else:
+                    raise ValueError("XML file contains no data.")
+            parser.feed(text)
+            seqXMLversion = content_handler.seqXMLversion
+            if seqXMLversion is not None:
+                break
+        self.seqXMLversion = seqXMLversion
+        self.source = content_handler.source
+        self.sourceVersion = content_handler.sourceVersion
+        self.ncbiTaxID = content_handler.ncbiTaxID
+        self.speciesName = content_handler.speciesName
+        records = self.iterate(handle)
+        return records
+
+    def iterate(self, handle):
+        """Iterate over the records in the XML file."""
+        parser = self.parser
+        content_handler = parser.getContentHandler()
+        records = content_handler.records
+        BLOCK = self.BLOCK
+        while True:
+            if len(records) > 1:
+                # Then at least the first record is finished
+                record = records.pop(0)
+                yield record
+            # Read in another block of the file...
+            text = handle.read(BLOCK)
+            if not text:
+                break
+            parser.feed(text)
+        # We have reached the end of the XML file;
+        # send out the remaining records
+        yield from records
+        records.clear()
+        parser.close()
+
+
+class SeqXmlWriter(SequenceWriter):
+    """Writes SeqRecords into seqXML file.
+
+    SeqXML requires the SeqRecord annotations to specify the molecule_type;
+    the molecule type is required to contain the term "DNA", "RNA", or
+    "protein".
+    """
+
+    def __init__(
+        self, target, source=None, source_version=None, species=None, ncbiTaxId=None
+    ):
+        """Create Object and start the xml generator.
+
+        Arguments:
+         - target - Output stream opened in binary mode, or a path to a file.
+         - source - The source program/database of the file, for example
+           UniProt.
+         - source_version - The version or release number of the source
+           program or database from which the data originated.
+         - species - The scientific name of the species of origin of all
+           entries in the file.
+         - ncbiTaxId - The NCBI taxonomy identifier of the species of origin.
+
+        """
+        super().__init__(target, "wb")
+        handle = self.handle
+        self.xml_generator = XMLGenerator(handle, "utf-8")
+        self.xml_generator.startDocument()
+        self.source = source
+        self.source_version = source_version
+        self.species = species
+        self.ncbiTaxId = ncbiTaxId
+
+    def write_header(self):
+        """Write root node with document metadata."""
+        attrs = {
+            "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
+            "xsi:noNamespaceSchemaLocation": "http://www.seqxml.org/0.4/seqxml.xsd",
+            "seqXMLversion": "0.4",
+        }
+
+        if self.source is not None:
+            attrs["source"] = self.source
+        if self.source_version is not None:
+            attrs["sourceVersion"] = self.source_version
+        if self.species is not None:
+            if not isinstance(self.species, str):
+                raise TypeError("species should be of type string")
+            attrs["speciesName"] = self.species
+        if self.ncbiTaxId is not None:
+            if not isinstance(self.ncbiTaxId, (str, int)):
+                raise TypeError("ncbiTaxID should be of type string or int")
+            attrs["ncbiTaxID"] = self.ncbiTaxId
+
+        self.xml_generator.startElement("seqXML", AttributesImpl(attrs))
+
+    def write_record(self, record):
+        """Write one record."""
+        if not record.id or record.id == "<unknown id>":
+            raise ValueError("SeqXML requires identifier")
+
+        if not isinstance(record.id, str):
+            raise TypeError("Identifier should be of type string")
+
+        attrb = {"id": record.id}
+
+        if (
+            "source" in record.annotations
+            and self.source != record.annotations["source"]
+        ):
+            if not isinstance(record.annotations["source"], str):
+                raise TypeError("source should be of type string")
+            attrb["source"] = record.annotations["source"]
+
+        self.xml_generator.startElement("entry", AttributesImpl(attrb))
+        self._write_species(record)
+        self._write_description(record)
+        self._write_seq(record)
+        self._write_dbxrefs(record)
+        self._write_properties(record)
+        self.xml_generator.endElement("entry")
+
+    def write_footer(self):
+        """Close the root node and finish the XML document."""
+        self.xml_generator.endElement("seqXML")
+        self.xml_generator.endDocument()
+
+    def _write_species(self, record):
+        """Write the species if given (PRIVATE)."""
+        local_ncbi_taxid = None
+        if "ncbi_taxid" in record.annotations:
+            local_ncbi_taxid = record.annotations["ncbi_taxid"]
+            if isinstance(local_ncbi_taxid, list):
+                # SwissProt parser uses a list (which could cope with chimeras)
+                if len(local_ncbi_taxid) == 1:
+                    local_ncbi_taxid = local_ncbi_taxid[0]
+                elif len(local_ncbi_taxid) == 0:
+                    local_ncbi_taxid = None
+                else:
+                    raise ValueError(
+                        "Multiple entries for record.annotations['ncbi_taxid'], %r"
+                        % local_ncbi_taxid
+                    )
+        if "organism" in record.annotations and local_ncbi_taxid:
+            local_org = record.annotations["organism"]
+
+            if not isinstance(local_org, str):
+                raise TypeError("organism should be of type string")
+
+            if not isinstance(local_ncbi_taxid, (str, int)):
+                raise TypeError("ncbiTaxID should be of type string or int")
+
+            # The local species definition is only written if it differs from the global species definition
+            if local_org != self.species or local_ncbi_taxid != self.ncbiTaxId:
+
+                attr = {"name": local_org, "ncbiTaxID": str(local_ncbi_taxid)}
+                self.xml_generator.startElement("species", AttributesImpl(attr))
+                self.xml_generator.endElement("species")
+
+    def _write_description(self, record):
+        """Write the description if given (PRIVATE)."""
+        if record.description:
+
+            if not isinstance(record.description, str):
+                raise TypeError("Description should be of type string")
+
+            description = record.description
+            if description == "<unknown description>":
+                description = ""
+
+            if len(record.description) > 0:
+                self.xml_generator.startElement("description", AttributesImpl({}))
+                self.xml_generator.characters(description)
+                self.xml_generator.endElement("description")
+
+    def _write_seq(self, record):
+        """Write the sequence (PRIVATE).
+
+        Note that SeqXML requires the molecule type to contain the term
+        "DNA", "RNA", or "protein".
+        """
+        if isinstance(record.seq, UnknownSeq):
+            raise TypeError("Sequence type is UnknownSeq but SeqXML requires sequence")
+
+        seq = bytes(record.seq)
+
+        if not len(seq) > 0:
+            raise ValueError("The sequence length should be greater than 0")
+
+        molecule_type = record.annotations.get("molecule_type")
+        if molecule_type is None:
+            raise ValueError("molecule_type is not defined")
+        elif "DNA" in molecule_type:
+            seqElem = "DNAseq"
+        elif "RNA" in molecule_type:
+            seqElem = "RNAseq"
+        elif "protein" in molecule_type:
+            seqElem = "AAseq"
+        else:
+            raise ValueError("unknown molecule_type '%s'" % molecule_type)
+
+        self.xml_generator.startElement(seqElem, AttributesImpl({}))
+        self.xml_generator.characters(seq)
+        self.xml_generator.endElement(seqElem)
+
+    def _write_dbxrefs(self, record):
+        """Write all database cross references (PRIVATE)."""
+        if record.dbxrefs is not None:
+
+            for dbxref in record.dbxrefs:
+
+                if not isinstance(dbxref, str):
+                    raise TypeError("dbxrefs should be of type list of string")
+                if dbxref.find(":") < 1:
+                    raise ValueError(
+                        "dbxrefs should be in the form ['source:id', 'source:id' ]"
+                    )
+
+                dbsource, dbid = dbxref.split(":", 1)
+
+                attr = {"source": dbsource, "id": dbid}
+                self.xml_generator.startElement("DBRef", AttributesImpl(attr))
+                self.xml_generator.endElement("DBRef")
+
+    def _write_properties(self, record):
+        """Write all annotations that are key value pairs with values of a primitive type or list of primitive types (PRIVATE)."""
+        for key, value in record.annotations.items():
+
+            if key not in ("organism", "ncbi_taxid", "source"):
+
+                if value is None:
+
+                    attr = {"name": key}
+                    self.xml_generator.startElement("property", AttributesImpl(attr))
+                    self.xml_generator.endElement("property")
+
+                elif isinstance(value, list):
+
+                    for v in value:
+                        if v is None:
+                            attr = {"name": key}
+                        else:
+                            attr = {"name": key, "value": str(v)}
+                        self.xml_generator.startElement(
+                            "property", AttributesImpl(attr)
+                        )
+                        self.xml_generator.endElement("property")
+
+                elif isinstance(value, (int, float, str)):
+
+                    attr = {"name": key, "value": str(value)}
+                    self.xml_generator.startElement("property", AttributesImpl(attr))
+                    self.xml_generator.endElement("property")
diff --git a/code/lib/Bio/SeqIO/SffIO.py b/code/lib/Bio/SeqIO/SffIO.py
new file mode 100644
index 0000000..18d3ab1
--- /dev/null
+++ b/code/lib/Bio/SeqIO/SffIO.py
@@ -0,0 +1,1494 @@
+# Copyright 2009-2020 by Peter Cock.  All rights reserved.
+# Based on code contributed and copyright 2009 by Jose Blanca (COMAV-UPV).
+#
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""Bio.SeqIO support for the binary Standard Flowgram Format (SFF) file format.
+
+SFF was designed by 454 Life Sciences (Roche), the Whitehead Institute for
+Biomedical Research and the Wellcome Trust Sanger Institute. SFF was also used
+as the native output format from early versions of Ion Torrent's PGM platform
+as well. You are expected to use this module via the Bio.SeqIO functions under
+the format name "sff" (or "sff-trim" as described below).
+
+For example, to iterate over the records in an SFF file,
+
+    >>> from Bio import SeqIO
+    >>> for record in SeqIO.parse("Roche/E3MFGYR02_random_10_reads.sff", "sff"):
+    ...     print("%s %i %s..." % (record.id, len(record), record.seq[:20]))
+    ...
+    E3MFGYR02JWQ7T 265 tcagGGTCTACATGTTGGTT...
+    E3MFGYR02JA6IL 271 tcagTTTTTTTTGGAAAGGA...
+    E3MFGYR02JHD4H 310 tcagAAAGACAAGTGGTATC...
+    E3MFGYR02GFKUC 299 tcagCGGCCGGGCCTCTCAT...
+    E3MFGYR02FTGED 281 tcagTGGTAATGGGGGGAAA...
+    E3MFGYR02FR9G7 261 tcagCTCCGTAAGAAGGTGC...
+    E3MFGYR02GAZMS 278 tcagAAAGAAGTAAGGTAAA...
+    E3MFGYR02HHZ8O 221 tcagACTTTCTTCTTTACCG...
+    E3MFGYR02GPGB1 269 tcagAAGCAGTGGTATCAAC...
+    E3MFGYR02F7Z7G 219 tcagAATCATCCACTTTTTA...
+
+Each SeqRecord object will contain all the annotation from the SFF file,
+including the PHRED quality scores.
+
+    >>> print("%s %i" % (record.id, len(record)))
+    E3MFGYR02F7Z7G 219
+    >>> print("%s..." % record.seq[:10])
+    tcagAATCAT...
+    >>> print("%r..." % (record.letter_annotations["phred_quality"][:10]))
+    [22, 21, 23, 28, 26, 15, 12, 21, 28, 21]...
+
+Notice that the sequence is given in mixed case, the central upper case region
+corresponds to the trimmed sequence. This matches the output of the Roche
+tools (and the 3rd party tool sff_extract) for SFF to FASTA.
+
+    >>> print(record.annotations["clip_qual_left"])
+    4
+    >>> print(record.annotations["clip_qual_right"])
+    134
+    >>> print(record.seq[:4])
+    tcag
+    >>> print("%s...%s" % (record.seq[4:20], record.seq[120:134]))
+    AATCATCCACTTTTTA...CAAAACACAAACAG
+    >>> print(record.seq[134:])
+    atcttatcaacaaaactcaaagttcctaactgagacacgcaacaggggataagacaaggcacacaggggataggnnnnnnnnnnn
+
+The annotations dictionary also contains any adapter clip positions
+(usually zero), and information about the flows. e.g.
+
+    >>> len(record.annotations)
+    12
+    >>> print(record.annotations["flow_key"])
+    TCAG
+    >>> print(record.annotations["flow_values"][:10])
+    (83, 1, 128, 7, 4, 84, 6, 106, 3, 172)
+    >>> print(len(record.annotations["flow_values"]))
+    400
+    >>> print(record.annotations["flow_index"][:10])
+    (1, 2, 3, 2, 2, 0, 3, 2, 3, 3)
+    >>> print(len(record.annotations["flow_index"]))
+    219
+
+Note that to convert from a raw reading in flow_values to the corresponding
+homopolymer stretch estimate, the value should be rounded to the nearest 100:
+
+    >>> print("%r..." % [int(round(value, -2)) // 100
+    ...                  for value in record.annotations["flow_values"][:10]])
+    ...
+    [1, 0, 1, 0, 0, 1, 0, 1, 0, 2]...
+
+If a read name is exactly 14 alphanumeric characters, the annotations
+dictionary will also contain meta-data about the read extracted by
+interpretting the name as a 454 Sequencing System "Universal" Accession
+Number. Note that if a read name happens to be exactly 14 alphanumeric
+characters but was not generated automatically, these annotation records
+will contain nonsense information.
+
+    >>> print(record.annotations["region"])
+    2
+    >>> print(record.annotations["time"])
+    [2008, 1, 9, 16, 16, 0]
+    >>> print(record.annotations["coords"])
+    (2434, 1658)
+
+As a convenience method, you can read the file with SeqIO format name "sff-trim"
+instead of "sff" to get just the trimmed sequences (without any annotation
+except for the PHRED quality scores and anything encoded in the read names):
+
+    >>> from Bio import SeqIO
+    >>> for record in SeqIO.parse("Roche/E3MFGYR02_random_10_reads.sff", "sff-trim"):
+    ...     print("%s %i %s..." % (record.id, len(record), record.seq[:20]))
+    ...
+    E3MFGYR02JWQ7T 260 GGTCTACATGTTGGTTAACC...
+    E3MFGYR02JA6IL 265 TTTTTTTTGGAAAGGAAAAC...
+    E3MFGYR02JHD4H 292 AAAGACAAGTGGTATCAACG...
+    E3MFGYR02GFKUC 295 CGGCCGGGCCTCTCATCGGT...
+    E3MFGYR02FTGED 277 TGGTAATGGGGGGAAATTTA...
+    E3MFGYR02FR9G7 256 CTCCGTAAGAAGGTGCTGCC...
+    E3MFGYR02GAZMS 271 AAAGAAGTAAGGTAAATAAC...
+    E3MFGYR02HHZ8O 150 ACTTTCTTCTTTACCGTAAC...
+    E3MFGYR02GPGB1 221 AAGCAGTGGTATCAACGCAG...
+    E3MFGYR02F7Z7G 130 AATCATCCACTTTTTAACGT...
+
+Looking at the final record in more detail, note how this differs to the
+example above:
+
+    >>> print("%s %i" % (record.id, len(record)))
+    E3MFGYR02F7Z7G 130
+    >>> print("%s..." % record.seq[:10])
+    AATCATCCAC...
+    >>> print("%r..." % record.letter_annotations["phred_quality"][:10])
+    [26, 15, 12, 21, 28, 21, 36, 28, 27, 27]...
+    >>> len(record.annotations)
+    4
+    >>> print(record.annotations["region"])
+    2
+    >>> print(record.annotations["coords"])
+    (2434, 1658)
+    >>> print(record.annotations["time"])
+    [2008, 1, 9, 16, 16, 0]
+    >>> print(record.annotations["molecule_type"])
+    DNA
+
+You might use the Bio.SeqIO.convert() function to convert the (trimmed) SFF
+reads into a FASTQ file (or a FASTA file and a QUAL file), e.g.
+
+    >>> from Bio import SeqIO
+    >>> from io import StringIO
+    >>> out_handle = StringIO()
+    >>> count = SeqIO.convert("Roche/E3MFGYR02_random_10_reads.sff", "sff",
+    ...                       out_handle, "fastq")
+    ...
+    >>> print("Converted %i records" % count)
+    Converted 10 records
+
+The output FASTQ file would start like this:
+
+    >>> print("%s..." % out_handle.getvalue()[:50])
+    @E3MFGYR02JWQ7T
+    tcagGGTCTACATGTTGGTTAACCCGTACTGATT...
+
+Bio.SeqIO.index() provides memory efficient random access to the reads in an
+SFF file by name. SFF files can include an index within the file, which can
+be read in making this very fast. If the index is missing (or in a format not
+yet supported in Biopython) the file is indexed by scanning all the reads -
+which is a little slower. For example,
+
+    >>> from Bio import SeqIO
+    >>> reads = SeqIO.index("Roche/E3MFGYR02_random_10_reads.sff", "sff")
+    >>> record = reads["E3MFGYR02JHD4H"]
+    >>> print("%s %i %s..." % (record.id, len(record), record.seq[:20]))
+    E3MFGYR02JHD4H 310 tcagAAAGACAAGTGGTATC...
+    >>> reads.close()
+
+Or, using the trimmed reads:
+
+    >>> from Bio import SeqIO
+    >>> reads = SeqIO.index("Roche/E3MFGYR02_random_10_reads.sff", "sff-trim")
+    >>> record = reads["E3MFGYR02JHD4H"]
+    >>> print("%s %i %s..." % (record.id, len(record), record.seq[:20]))
+    E3MFGYR02JHD4H 292 AAAGACAAGTGGTATCAACG...
+    >>> reads.close()
+
+You can also use the Bio.SeqIO.write() function with the "sff" format. Note
+that this requires all the flow information etc, and thus is probably only
+useful for SeqRecord objects originally from reading another SFF file (and
+not the trimmed SeqRecord objects from parsing an SFF file as "sff-trim").
+
+As an example, let's pretend this example SFF file represents some DNA which
+was pre-amplified with a PCR primers AAAGANNNNN. The following script would
+produce a sub-file containing all those reads whose post-quality clipping
+region (i.e. the sequence after trimming) starts with AAAGA exactly (the non-
+degenerate bit of this pretend primer):
+
+    >>> from Bio import SeqIO
+    >>> records = (record for record in
+    ...            SeqIO.parse("Roche/E3MFGYR02_random_10_reads.sff", "sff")
+    ...            if record.seq[record.annotations["clip_qual_left"]:].startswith("AAAGA"))
+    ...
+    >>> count = SeqIO.write(records, "temp_filtered.sff", "sff")
+    >>> print("Selected %i records" % count)
+    Selected 2 records
+
+Of course, for an assembly you would probably want to remove these primers.
+If you want FASTA or FASTQ output, you could just slice the SeqRecord. However,
+if you want SFF output we have to preserve all the flow information - the trick
+is just to adjust the left clip position!
+
+    >>> from Bio import SeqIO
+    >>> def filter_and_trim(records, primer):
+    ...     for record in records:
+    ...         if record.seq[record.annotations["clip_qual_left"]:].startswith(primer):
+    ...             record.annotations["clip_qual_left"] += len(primer)
+    ...             yield record
+    ...
+    >>> records = SeqIO.parse("Roche/E3MFGYR02_random_10_reads.sff", "sff")
+    >>> count = SeqIO.write(filter_and_trim(records, "AAAGA"),
+    ...                     "temp_filtered.sff", "sff")
+    ...
+    >>> print("Selected %i records" % count)
+    Selected 2 records
+
+We can check the results, note the lower case clipped region now includes the "AAAGA"
+sequence:
+
+    >>> for record in SeqIO.parse("temp_filtered.sff", "sff"):
+    ...     print("%s %i %s..." % (record.id, len(record), record.seq[:20]))
+    ...
+    E3MFGYR02JHD4H 310 tcagaaagaCAAGTGGTATC...
+    E3MFGYR02GAZMS 278 tcagaaagaAGTAAGGTAAA...
+    >>> for record in SeqIO.parse("temp_filtered.sff", "sff-trim"):
+    ...     print("%s %i %s..." % (record.id, len(record), record.seq[:20]))
+    ...
+    E3MFGYR02JHD4H 287 CAAGTGGTATCAACGCAGAG...
+    E3MFGYR02GAZMS 266 AGTAAGGTAAATAACAAACG...
+    >>> import os
+    >>> os.remove("temp_filtered.sff")
+
+For a description of the file format, please see the Roche manuals and:
+http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=formats&m=doc&s=formats
+
+"""
+import re
+import struct
+
+from Bio import StreamModeError
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import SequenceIterator
+from .Interfaces import SequenceWriter
+
+
+_null = b"\0"
+_sff = b".sff"
+_hsh = b".hsh"
+_srt = b".srt"
+_mft = b".mft"
+_flag = b"\xff"
+
+
+def _sff_file_header(handle):
+    """Read in an SFF file header (PRIVATE).
+
+    Assumes the handle is at the start of the file, will read forwards
+    though the header and leave the handle pointing at the first record.
+    Returns a tuple of values from the header (header_length, index_offset,
+    index_length, number_of_reads, flows_per_read, flow_chars, key_sequence)
+
+    >>> with open("Roche/greek.sff", "rb") as handle:
+    ...     values = _sff_file_header(handle)
+    ...
+    >>> print(values[0])
+    840
+    >>> print(values[1])
+    65040
+    >>> print(values[2])
+    256
+    >>> print(values[3])
+    24
+    >>> print(values[4])
+    800
+    >>> values[-1]
+    'TCAG'
+
+    """
+    # file header (part one)
+    # use big endiean encdoing   >
+    # magic_number               I
+    # version                    4B
+    # index_offset               Q
+    # index_length               I
+    # number_of_reads            I
+    # header_length              H
+    # key_length                 H
+    # number_of_flows_per_read   H
+    # flowgram_format_code       B
+    # [rest of file header depends on the number of flows and how many keys]
+    fmt = ">4s4BQIIHHHB"
+    assert 31 == struct.calcsize(fmt)
+    data = handle.read(31)
+    if not data:
+        raise ValueError("Empty file.")
+    elif len(data) < 31:
+        raise ValueError("File too small to hold a valid SFF header.")
+    try:
+        (
+            magic_number,
+            ver0,
+            ver1,
+            ver2,
+            ver3,
+            index_offset,
+            index_length,
+            number_of_reads,
+            header_length,
+            key_length,
+            number_of_flows_per_read,
+            flowgram_format,
+        ) = struct.unpack(fmt, data)
+    except TypeError:
+        raise StreamModeError("SFF files must be opened in binary mode.") from None
+    if magic_number in [_hsh, _srt, _mft]:
+        # Probably user error, calling Bio.SeqIO.parse() twice!
+        raise ValueError("Handle seems to be at SFF index block, not start")
+    if magic_number != _sff:  # 779314790
+        raise ValueError("SFF file did not start '.sff', but %r" % magic_number)
+    if (ver0, ver1, ver2, ver3) != (0, 0, 0, 1):
+        raise ValueError(
+            "Unsupported SFF version in header, %i.%i.%i.%i" % (ver0, ver1, ver2, ver3)
+        )
+    if flowgram_format != 1:
+        raise ValueError("Flowgram format code %i not supported" % flowgram_format)
+    if (index_offset != 0) ^ (index_length != 0):
+        raise ValueError(
+            "Index offset %i but index length %i" % (index_offset, index_length)
+        )
+    flow_chars = handle.read(number_of_flows_per_read).decode("ASCII")
+    key_sequence = handle.read(key_length).decode("ASCII")
+    # According to the spec, the header_length field should be the total number
+    # of bytes required by this set of header fields, and should be equal to
+    # "31 + number_of_flows_per_read + key_length" rounded up to the next value
+    # divisible by 8.
+    assert header_length % 8 == 0
+    padding = header_length - number_of_flows_per_read - key_length - 31
+    assert 0 <= padding < 8, padding
+    if handle.read(padding).count(_null) != padding:
+        import warnings
+        from Bio import BiopythonParserWarning
+
+        warnings.warn(
+            "Your SFF file is invalid, post header %i byte "
+            "null padding region contained data." % padding,
+            BiopythonParserWarning,
+        )
+    return (
+        header_length,
+        index_offset,
+        index_length,
+        number_of_reads,
+        number_of_flows_per_read,
+        flow_chars,
+        key_sequence,
+    )
+
+
+def _sff_do_slow_index(handle):
+    """Generate an index by scanning though all the reads in an SFF file (PRIVATE).
+
+    This is a slow but generic approach if we can't parse the provided index
+    (if present).
+
+    Will use the handle seek/tell functions.
+    """
+    handle.seek(0)
+    (
+        header_length,
+        index_offset,
+        index_length,
+        number_of_reads,
+        number_of_flows_per_read,
+        flow_chars,
+        key_sequence,
+    ) = _sff_file_header(handle)
+    # Now on to the reads...
+    read_header_fmt = ">2HI4H"
+    read_header_size = struct.calcsize(read_header_fmt)
+    # NOTE - assuming flowgram_format==1, which means struct type H
+    read_flow_fmt = ">%iH" % number_of_flows_per_read
+    read_flow_size = struct.calcsize(read_flow_fmt)
+    assert 1 == struct.calcsize(">B")
+    assert 1 == struct.calcsize(">s")
+    assert 1 == struct.calcsize(">c")
+    assert read_header_size % 8 == 0  # Important for padding calc later!
+    for read in range(number_of_reads):
+        record_offset = handle.tell()
+        if record_offset == index_offset:
+            # Found index block within reads, ignore it:
+            offset = index_offset + index_length
+            if offset % 8:
+                offset += 8 - (offset % 8)
+            assert offset % 8 == 0
+            handle.seek(offset)
+            record_offset = offset
+        # assert record_offset%8 == 0 # Worth checking, but slow
+        # First the fixed header
+        data = handle.read(read_header_size)
+        (
+            read_header_length,
+            name_length,
+            seq_len,
+            clip_qual_left,
+            clip_qual_right,
+            clip_adapter_left,
+            clip_adapter_right,
+        ) = struct.unpack(read_header_fmt, data)
+        if read_header_length < 10 or read_header_length % 8 != 0:
+            raise ValueError(
+                "Malformed read header, says length is %i:\n%r"
+                % (read_header_length, data)
+            )
+        # now the name and any padding (remainder of header)
+        name = handle.read(name_length).decode()
+        padding = read_header_length - read_header_size - name_length
+        if handle.read(padding).count(_null) != padding:
+            import warnings
+            from Bio import BiopythonParserWarning
+
+            warnings.warn(
+                "Your SFF file is invalid, post name %i byte "
+                "padding region contained data" % padding,
+                BiopythonParserWarning,
+            )
+        assert record_offset + read_header_length == handle.tell()
+        # now the flowgram values, flowgram index, bases and qualities
+        size = read_flow_size + 3 * seq_len
+        handle.seek(size, 1)
+        # now any padding...
+        padding = size % 8
+        if padding:
+            padding = 8 - padding
+            if handle.read(padding).count(_null) != padding:
+                import warnings
+                from Bio import BiopythonParserWarning
+
+                warnings.warn(
+                    "Your SFF file is invalid, post quality %i "
+                    "byte padding region contained data" % padding,
+                    BiopythonParserWarning,
+                )
+        # print("%s %s %i" % (read, name, record_offset))
+        yield name, record_offset
+    if handle.tell() % 8 != 0:
+        raise ValueError("After scanning reads, did not end on a multiple of 8")
+
+
+def _sff_find_roche_index(handle):
+    """Locate any existing Roche style XML meta data and read index (PRIVATE).
+
+    Makes a number of hard coded assumptions based on reverse engineered SFF
+    files from Roche 454 machines.
+
+    Returns a tuple of read count, SFF "index" offset and size, XML offset
+    and size, and the actual read index offset and size.
+
+    Raises a ValueError for unsupported or non-Roche index blocks.
+    """
+    handle.seek(0)
+    (
+        header_length,
+        index_offset,
+        index_length,
+        number_of_reads,
+        number_of_flows_per_read,
+        flow_chars,
+        key_sequence,
+    ) = _sff_file_header(handle)
+    assert handle.tell() == header_length
+    if not index_offset or not index_length:
+        raise ValueError("No index present in this SFF file")
+    # Now jump to the header...
+    handle.seek(index_offset)
+    fmt = ">4s4B"
+    fmt_size = struct.calcsize(fmt)
+    data = handle.read(fmt_size)
+    if not data:
+        raise ValueError(
+            "Premature end of file? Expected index of size %i at offest %i, found nothing"
+            % (index_length, index_offset)
+        )
+    if len(data) < fmt_size:
+        raise ValueError(
+            "Premature end of file? Expected index of size %i at offest %i, found %r"
+            % (index_length, index_offset, data)
+        )
+    magic_number, ver0, ver1, ver2, ver3 = struct.unpack(fmt, data)
+    if magic_number == _mft:  # 778921588
+        # Roche 454 manifest index
+        # This is typical from raw Roche 454 SFF files (2009), and includes
+        # both an XML manifest and the sorted index.
+        if (ver0, ver1, ver2, ver3) != (49, 46, 48, 48):
+            # This is "1.00" as a string
+            raise ValueError(
+                "Unsupported version in .mft index header, %i.%i.%i.%i"
+                % (ver0, ver1, ver2, ver3)
+            )
+        fmt2 = ">LL"
+        fmt2_size = struct.calcsize(fmt2)
+        xml_size, data_size = struct.unpack(fmt2, handle.read(fmt2_size))
+        if index_length != fmt_size + fmt2_size + xml_size + data_size:
+            raise ValueError(
+                "Problem understanding .mft index header, %i != %i + %i + %i + %i"
+                % (index_length, fmt_size, fmt2_size, xml_size, data_size)
+            )
+        return (
+            number_of_reads,
+            header_length,
+            index_offset,
+            index_length,
+            index_offset + fmt_size + fmt2_size,
+            xml_size,
+            index_offset + fmt_size + fmt2_size + xml_size,
+            data_size,
+        )
+    elif magic_number == _srt:  # 779317876
+        # Roche 454 sorted index
+        # I've had this from Roche tool sfffile when the read identifiers
+        # had nonstandard lengths and there was no XML manifest.
+        if (ver0, ver1, ver2, ver3) != (49, 46, 48, 48):
+            # This is "1.00" as a string
+            raise ValueError(
+                "Unsupported version in .srt index header, %i.%i.%i.%i"
+                % (ver0, ver1, ver2, ver3)
+            )
+        data = handle.read(4)
+        if data != _null * 4:
+            raise ValueError("Did not find expected null four bytes in .srt index")
+        return (
+            number_of_reads,
+            header_length,
+            index_offset,
+            index_length,
+            0,
+            0,
+            index_offset + fmt_size + 4,
+            index_length - fmt_size - 4,
+        )
+    elif magic_number == _hsh:
+        raise ValueError(
+            "Hash table style indexes (.hsh) in SFF files are not (yet) supported"
+        )
+    else:
+        raise ValueError(
+            "Unknown magic number %r in SFF index header:\n%r" % (magic_number, data)
+        )
+
+
+def ReadRocheXmlManifest(handle):
+    """Read any Roche style XML manifest data in the SFF "index".
+
+    The SFF file format allows for multiple different index blocks, and Roche
+    took advantage of this to define their own index block which also embeds
+    an XML manifest string. This is not a publicly documented extension to
+    the SFF file format, this was reverse engineered.
+
+    The handle should be to an SFF file opened in binary mode. This function
+    will use the handle seek/tell functions and leave the handle in an
+    arbitrary location.
+
+    Any XML manifest found is returned as a Python string, which you can then
+    parse as appropriate, or reuse when writing out SFF files with the
+    SffWriter class.
+
+    Returns a string, or raises a ValueError if an Roche manifest could not be
+    found.
+    """
+    (
+        number_of_reads,
+        header_length,
+        index_offset,
+        index_length,
+        xml_offset,
+        xml_size,
+        read_index_offset,
+        read_index_size,
+    ) = _sff_find_roche_index(handle)
+    if not xml_offset or not xml_size:
+        raise ValueError("No XML manifest found")
+    handle.seek(xml_offset)
+    return handle.read(xml_size).decode()
+
+
+# This is a generator function!
+def _sff_read_roche_index(handle):
+    """Read any existing Roche style read index provided in the SFF file (PRIVATE).
+
+    Will use the handle seek/tell functions.
+
+    This works on ".srt1.00" and ".mft1.00" style Roche SFF index blocks.
+
+    Roche SFF indices use base 255 not 256, meaning we see bytes in range the
+    range 0 to 254 only. This appears to be so that byte 0xFF (character 255)
+    can be used as a marker character to separate entries (required if the
+    read name lengths vary).
+
+    Note that since only four bytes are used for the read offset, this is
+    limited to 255^4 bytes (nearly 4GB). If you try to use the Roche sfffile
+    tool to combine SFF files beyound this limit, they issue a warning and
+    omit the index (and manifest).
+    """
+    (
+        number_of_reads,
+        header_length,
+        index_offset,
+        index_length,
+        xml_offset,
+        xml_size,
+        read_index_offset,
+        read_index_size,
+    ) = _sff_find_roche_index(handle)
+    # Now parse the read index...
+    handle.seek(read_index_offset)
+    fmt = ">5B"
+    for read in range(number_of_reads):
+        # TODO - Be more aware of when the index should end?
+        data = handle.read(6)
+        while True:
+            more = handle.read(1)
+            if not more:
+                raise ValueError("Premature end of file!")
+            data += more
+            if more == _flag:
+                break
+        assert data[-1:] == _flag, data[-1:]
+        name = data[:-6].decode()
+        off4, off3, off2, off1, off0 = struct.unpack(fmt, data[-6:-1])
+        offset = off0 + 255 * off1 + 65025 * off2 + 16581375 * off3
+        if off4:
+            # Could in theory be used as a fifth piece of offset information,
+            # i.e. offset =+ 4228250625L*off4, but testing the Roche tools this
+            # is not the case. They simple don't support such large indexes.
+            raise ValueError("Expected a null terminator to the read name.")
+        yield name, offset
+    if handle.tell() != read_index_offset + read_index_size:
+        raise ValueError(
+            "Problem with index length? %i vs %i"
+            % (handle.tell(), read_index_offset + read_index_size)
+        )
+
+
+_valid_UAN_read_name = re.compile(r"^[a-zA-Z0-9]{14}$")
+
+
+def _sff_read_seq_record(
+    handle, number_of_flows_per_read, flow_chars, key_sequence, trim=False
+):
+    """Parse the next read in the file, return data as a SeqRecord (PRIVATE)."""
+    # Now on to the reads...
+    # the read header format (fixed part):
+    # read_header_length     H
+    # name_length            H
+    # seq_len                I
+    # clip_qual_left         H
+    # clip_qual_right        H
+    # clip_adapter_left      H
+    # clip_adapter_right     H
+    # [rest of read header depends on the name length etc]
+    read_header_fmt = ">2HI4H"
+    read_header_size = struct.calcsize(read_header_fmt)
+    read_flow_fmt = ">%iH" % number_of_flows_per_read
+    read_flow_size = struct.calcsize(read_flow_fmt)
+
+    (
+        read_header_length,
+        name_length,
+        seq_len,
+        clip_qual_left,
+        clip_qual_right,
+        clip_adapter_left,
+        clip_adapter_right,
+    ) = struct.unpack(read_header_fmt, handle.read(read_header_size))
+    if clip_qual_left:
+        clip_qual_left -= 1  # python counting
+    if clip_adapter_left:
+        clip_adapter_left -= 1  # python counting
+    if read_header_length < 10 or read_header_length % 8 != 0:
+        raise ValueError(
+            "Malformed read header, says length is %i" % read_header_length
+        )
+    # now the name and any padding (remainder of header)
+    name = handle.read(name_length).decode()
+    padding = read_header_length - read_header_size - name_length
+    if handle.read(padding).count(_null) != padding:
+        import warnings
+        from Bio import BiopythonParserWarning
+
+        warnings.warn(
+            "Your SFF file is invalid, post name %i "
+            "byte padding region contained data" % padding,
+            BiopythonParserWarning,
+        )
+    # now the flowgram values, flowgram index, bases and qualities
+    # NOTE - assuming flowgram_format==1, which means struct type H
+    flow_values = handle.read(read_flow_size)  # unpack later if needed
+    temp_fmt = ">%iB" % seq_len  # used for flow index and quals
+    flow_index = handle.read(seq_len)  # unpack later if needed
+    seq = handle.read(seq_len)  # Leave as bytes for Seq object
+    quals = list(struct.unpack(temp_fmt, handle.read(seq_len)))
+    # now any padding...
+    padding = (read_flow_size + seq_len * 3) % 8
+    if padding:
+        padding = 8 - padding
+        if handle.read(padding).count(_null) != padding:
+            import warnings
+            from Bio import BiopythonParserWarning
+
+            warnings.warn(
+                "Your SFF file is invalid, post quality %i "
+                "byte padding region contained data" % padding,
+                BiopythonParserWarning,
+            )
+    # Follow Roche and apply most aggressive of qual and adapter clipping.
+    # Note Roche seems to ignore adapter clip fields when writing SFF,
+    # and uses just the quality clipping values for any clipping.
+    clip_left = max(clip_qual_left, clip_adapter_left)
+    # Right clipping of zero means no clipping
+    if clip_qual_right:
+        if clip_adapter_right:
+            clip_right = min(clip_qual_right, clip_adapter_right)
+        else:
+            # Typical case with Roche SFF files
+            clip_right = clip_qual_right
+    elif clip_adapter_right:
+        clip_right = clip_adapter_right
+    else:
+        clip_right = seq_len
+    # Now build a SeqRecord
+    if trim:
+        if clip_left >= clip_right:
+            # Raise an error?
+            import warnings
+            from Bio import BiopythonParserWarning
+
+            warnings.warn(
+                "Overlapping clip values in SFF record, trimmed to nothing",
+                BiopythonParserWarning,
+            )
+            seq = ""
+            quals = []
+        else:
+            seq = seq[clip_left:clip_right].upper()
+            quals = quals[clip_left:clip_right]
+        # Don't record the clipping values, flow etc, they make no sense now:
+        annotations = {}
+    else:
+        if clip_left >= clip_right:
+            import warnings
+            from Bio import BiopythonParserWarning
+
+            warnings.warn(
+                "Overlapping clip values in SFF record", BiopythonParserWarning
+            )
+            seq = seq.lower()
+        else:
+            # This use of mixed case mimics the Roche SFF tool's FASTA output
+            seq = (
+                seq[:clip_left].lower()
+                + seq[clip_left:clip_right].upper()
+                + seq[clip_right:].lower()
+            )
+        annotations = {
+            "flow_values": struct.unpack(read_flow_fmt, flow_values),
+            "flow_index": struct.unpack(temp_fmt, flow_index),
+            "flow_chars": flow_chars,
+            "flow_key": key_sequence,
+            "clip_qual_left": clip_qual_left,
+            "clip_qual_right": clip_qual_right,
+            "clip_adapter_left": clip_adapter_left,
+            "clip_adapter_right": clip_adapter_right,
+        }
+    if re.match(_valid_UAN_read_name, name):
+        annotations["time"] = _get_read_time(name)
+        annotations["region"] = _get_read_region(name)
+        annotations["coords"] = _get_read_xy(name)
+    annotations["molecule_type"] = "DNA"
+    record = SeqRecord(
+        Seq(seq), id=name, name=name, description="", annotations=annotations
+    )
+    # Dirty trick to speed up this line:
+    # record.letter_annotations["phred_quality"] = quals
+    dict.__setitem__(record._per_letter_annotations, "phred_quality", quals)
+    # Return the record and then continue...
+    return record
+
+
+_powers_of_36 = [36 ** i for i in range(6)]
+
+
+def _string_as_base_36(string):
+    """Interpret a string as a base-36 number as per 454 manual (PRIVATE)."""
+    total = 0
+    for c, power in zip(string[::-1], _powers_of_36):
+        # For reference: ord('0') = 48, ord('9') = 57
+        # For reference: ord('A') = 65, ord('Z') = 90
+        # For reference: ord('a') = 97, ord('z') = 122
+        if 48 <= ord(c) <= 57:
+            val = ord(c) - 22  # equivalent to: - ord('0') + 26
+        elif 65 <= ord(c) <= 90:
+            val = ord(c) - 65
+        elif 97 <= ord(c) <= 122:
+            val = ord(c) - 97
+        else:
+            # Invalid character
+            val = 0
+        total += val * power
+    return total
+
+
+def _get_read_xy(read_name):
+    """Extract coordinates from last 5 characters of read name (PRIVATE)."""
+    number = _string_as_base_36(read_name[9:])
+    return divmod(number, 4096)
+
+
+_time_denominators = [
+    13 * 32 * 24 * 60 * 60,
+    32 * 24 * 60 * 60,
+    24 * 60 * 60,
+    60 * 60,
+    60,
+]
+
+
+def _get_read_time(read_name):
+    """Extract time from first 6 characters of read name (PRIVATE)."""
+    time_list = []
+    remainder = _string_as_base_36(read_name[:6])
+    for denominator in _time_denominators:
+        this_term, remainder = divmod(remainder, denominator)
+        time_list.append(this_term)
+    time_list.append(remainder)
+    time_list[0] += 2000
+    return time_list
+
+
+def _get_read_region(read_name):
+    """Extract region from read name (PRIVATE)."""
+    return int(read_name[8])
+
+
+def _sff_read_raw_record(handle, number_of_flows_per_read):
+    """Extract the next read in the file as a raw (bytes) string (PRIVATE)."""
+    read_header_fmt = ">2HI"
+    read_header_size = struct.calcsize(read_header_fmt)
+    read_flow_fmt = ">%iH" % number_of_flows_per_read
+    read_flow_size = struct.calcsize(read_flow_fmt)
+
+    raw = handle.read(read_header_size)
+    read_header_length, name_length, seq_len = struct.unpack(read_header_fmt, raw)
+    if read_header_length < 10 or read_header_length % 8 != 0:
+        raise ValueError(
+            "Malformed read header, says length is %i" % read_header_length
+        )
+    # now the four clip values (4H = 8 bytes), and read name
+    raw += handle.read(8 + name_length)
+    # and any padding (remainder of header)
+    padding = read_header_length - read_header_size - 8 - name_length
+    pad = handle.read(padding)
+    if pad.count(_null) != padding:
+        import warnings
+        from Bio import BiopythonParserWarning
+
+        warnings.warn(
+            "Your SFF file is invalid, post name %i "
+            "byte padding region contained data" % padding,
+            BiopythonParserWarning,
+        )
+    raw += pad
+    # now the flowgram values, flowgram index, bases and qualities
+    raw += handle.read(read_flow_size + seq_len * 3)
+    padding = (read_flow_size + seq_len * 3) % 8
+    # now any padding...
+    if padding:
+        padding = 8 - padding
+        pad = handle.read(padding)
+        if pad.count(_null) != padding:
+            import warnings
+            from Bio import BiopythonParserWarning
+
+            warnings.warn(
+                "Your SFF file is invalid, post quality %i "
+                "byte padding region contained data" % padding,
+                BiopythonParserWarning,
+            )
+        raw += pad
+    # Return the raw bytes
+    return raw
+
+
+class _AddTellHandle:
+    """Wrapper for handles which do not support the tell method (PRIVATE).
+
+    Intended for use with things like network handles where tell (and reverse
+    seek) are not supported. The SFF file needs to track the current offset in
+    order to deal with the index block.
+    """
+
+    def __init__(self, handle):
+        self._handle = handle
+        self._offset = 0
+
+    def read(self, length):
+        data = self._handle.read(length)
+        self._offset += len(data)
+        return data
+
+    def tell(self):
+        return self._offset
+
+    def seek(self, offset):
+        if offset < self._offset:
+            raise RuntimeError("Can't seek backwards")
+        self._handle.read(offset - self._offset)
+
+    def close(self):
+        return self._handle.close()
+
+
+class SffIterator(SequenceIterator):
+    """Parser for Standard Flowgram Format (SFF) files."""
+
+    def __init__(self, source, alphabet=None, trim=False):
+        """Iterate over Standard Flowgram Format (SFF) reads (as SeqRecord objects).
+
+            - source - path to an SFF file, e.g. from Roche 454 sequencing,
+              or a file-like object opened in binary mode.
+            - alphabet - optional alphabet, unused. Leave as None.
+            - trim - should the sequences be trimmed?
+
+        The resulting SeqRecord objects should match those from a paired FASTA
+        and QUAL file converted from the SFF file using the Roche 454 tool
+        ssfinfo. i.e. The sequence will be mixed case, with the trim regions
+        shown in lower case.
+
+        This function is used internally via the Bio.SeqIO functions:
+
+        >>> from Bio import SeqIO
+        >>> for record in SeqIO.parse("Roche/E3MFGYR02_random_10_reads.sff", "sff"):
+        ...     print("%s %i" % (record.id, len(record)))
+        ...
+        E3MFGYR02JWQ7T 265
+        E3MFGYR02JA6IL 271
+        E3MFGYR02JHD4H 310
+        E3MFGYR02GFKUC 299
+        E3MFGYR02FTGED 281
+        E3MFGYR02FR9G7 261
+        E3MFGYR02GAZMS 278
+        E3MFGYR02HHZ8O 221
+        E3MFGYR02GPGB1 269
+        E3MFGYR02F7Z7G 219
+
+        You can also call it directly:
+
+        >>> with open("Roche/E3MFGYR02_random_10_reads.sff", "rb") as handle:
+        ...     for record in SffIterator(handle):
+        ...         print("%s %i" % (record.id, len(record)))
+        ...
+        E3MFGYR02JWQ7T 265
+        E3MFGYR02JA6IL 271
+        E3MFGYR02JHD4H 310
+        E3MFGYR02GFKUC 299
+        E3MFGYR02FTGED 281
+        E3MFGYR02FR9G7 261
+        E3MFGYR02GAZMS 278
+        E3MFGYR02HHZ8O 221
+        E3MFGYR02GPGB1 269
+        E3MFGYR02F7Z7G 219
+
+        Or, with the trim option:
+
+        >>> with open("Roche/E3MFGYR02_random_10_reads.sff", "rb") as handle:
+        ...     for record in SffIterator(handle, trim=True):
+        ...         print("%s %i" % (record.id, len(record)))
+        ...
+        E3MFGYR02JWQ7T 260
+        E3MFGYR02JA6IL 265
+        E3MFGYR02JHD4H 292
+        E3MFGYR02GFKUC 295
+        E3MFGYR02FTGED 277
+        E3MFGYR02FR9G7 256
+        E3MFGYR02GAZMS 271
+        E3MFGYR02HHZ8O 150
+        E3MFGYR02GPGB1 221
+        E3MFGYR02F7Z7G 130
+
+        """
+        if alphabet is not None:
+            raise ValueError("The alphabet argument is no longer supported")
+        super().__init__(source, mode="b", fmt="SFF")
+        self.trim = trim
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        try:
+            if 0 != handle.tell():
+                raise ValueError("Not at start of file, offset %i" % handle.tell())
+        except AttributeError:
+            # Probably a network handle or something like that
+            handle = _AddTellHandle(handle)
+        records = self.iterate(handle)
+        return records
+
+    def iterate(self, handle):
+        """Parse the file and generate SeqRecord objects."""
+        trim = self.trim
+        (
+            header_length,
+            index_offset,
+            index_length,
+            number_of_reads,
+            number_of_flows_per_read,
+            flow_chars,
+            key_sequence,
+        ) = _sff_file_header(handle)
+        # Now on to the reads...
+        # the read header format (fixed part):
+        # read_header_length     H
+        # name_length            H
+        # seq_len                I
+        # clip_qual_left         H
+        # clip_qual_right        H
+        # clip_adapter_left      H
+        # clip_adapter_right     H
+        # [rest of read header depends on the name length etc]
+        read_header_fmt = ">2HI4H"
+        read_header_size = struct.calcsize(read_header_fmt)
+        read_flow_fmt = ">%iH" % number_of_flows_per_read
+        read_flow_size = struct.calcsize(read_flow_fmt)
+        assert 1 == struct.calcsize(">B")
+        assert 1 == struct.calcsize(">s")
+        assert 1 == struct.calcsize(">c")
+        assert read_header_size % 8 == 0  # Important for padding calc later!
+        # The spec allows for the index block to be before or even in the middle
+        # of the reads. We can check that if we keep track of our position
+        # in the file...
+        for read in range(number_of_reads):
+            if index_offset and handle.tell() == index_offset:
+                offset = index_offset + index_length
+                if offset % 8:
+                    offset += 8 - (offset % 8)
+                assert offset % 8 == 0
+                handle.seek(offset)
+                # Now that we've done this, we don't need to do it again. Clear
+                # the index_offset so we can skip extra handle.tell() calls:
+                index_offset = 0
+            yield _sff_read_seq_record(
+                handle, number_of_flows_per_read, flow_chars, key_sequence, trim,
+            )
+        _check_eof(handle, index_offset, index_length)
+
+
+def _check_eof(handle, index_offset, index_length):
+    """Check final padding is OK (8 byte alignment) and file ends (PRIVATE).
+
+    Will attempt to spot apparent SFF file concatenation and give an error.
+
+    Will not attempt to seek, only moves the handle forward.
+    """
+    offset = handle.tell()
+    extra = b""
+    padding = 0
+
+    if index_offset and offset <= index_offset:
+        # Index block then end of file...
+        if offset < index_offset:
+            raise ValueError(
+                "Gap of %i bytes after final record end %i, "
+                "before %i where index starts?"
+                % (index_offset - offset, offset, index_offset)
+            )
+        # Doing read to jump the index rather than a seek
+        # in case this is a network handle or similar
+        handle.read(index_offset + index_length - offset)
+        offset = index_offset + index_length
+        if offset != handle.tell():
+            raise ValueError(
+                "Wanted %i, got %i, index is %i to %i"
+                % (offset, handle.tell(), index_offset, index_offset + index_length)
+            )
+
+    if offset % 8:
+        padding = 8 - (offset % 8)
+        extra = handle.read(padding)
+
+    if padding >= 4 and extra[-4:] == _sff:
+        # Seen this in one user supplied file, should have been
+        # four bytes of null padding but was actually .sff and
+        # the start of a new concatenated SFF file!
+        raise ValueError(
+            "Your SFF file is invalid, post index %i byte "
+            "null padding region ended '.sff' which could "
+            "be the start of a concatenated SFF file? "
+            "See offset %i" % (padding, offset)
+        )
+    if padding and not extra:
+        # TODO - Is this error harmless enough to just ignore?
+        import warnings
+        from Bio import BiopythonParserWarning
+
+        warnings.warn(
+            "Your SFF file is technically invalid as it is missing "
+            "a terminal %i byte null padding region." % padding,
+            BiopythonParserWarning,
+        )
+        return
+    if extra.count(_null) != padding:
+        import warnings
+        from Bio import BiopythonParserWarning
+
+        warnings.warn(
+            "Your SFF file is invalid, post index %i byte "
+            "null padding region contained data: %r" % (padding, extra),
+            BiopythonParserWarning,
+        )
+
+    offset = handle.tell()
+    if offset % 8 != 0:
+        raise ValueError("Wanted offset %i %% 8 = %i to be zero" % (offset, offset % 8))
+    # Should now be at the end of the file...
+    extra = handle.read(4)
+    if extra == _sff:
+        raise ValueError(
+            "Additional data at end of SFF file, "
+            "perhaps multiple SFF files concatenated? "
+            "See offset %i" % offset
+        )
+    elif extra:
+        raise ValueError("Additional data at end of SFF file, see offset %i" % offset)
+
+
+class _SffTrimIterator(SffIterator):
+    """Iterate over SFF reads (as SeqRecord objects) with trimming (PRIVATE)."""
+
+    def __init__(self, source):
+        super().__init__(source, trim=True)
+
+
+class SffWriter(SequenceWriter):
+    """SFF file writer."""
+
+    def __init__(self, target, index=True, xml=None):
+        """Initialize an SFF writer object.
+
+        Arguments:
+         - target - Output stream opened in binary mode, or a path to a file.
+         - index - Boolean argument, should we try and write an index?
+         - xml - Optional string argument, xml manifest to be recorded
+           in the index block (see function ReadRocheXmlManifest for
+           reading this data).
+
+        """
+        super().__init__(target, "wb")
+        self._xml = xml
+        if index:
+            self._index = []
+        else:
+            self._index = None
+
+    def write_file(self, records):
+        """Use this to write an entire file containing the given records."""
+        try:
+            self._number_of_reads = len(records)
+        except TypeError:
+            self._number_of_reads = 0  # dummy value
+            if not hasattr(self.handle, "seek") or not hasattr(self.handle, "tell"):
+                raise ValueError(
+                    "A handle with a seek/tell methods is required in order "
+                    "to record the total record count in the file header "
+                    "(once it is known at the end)."
+                ) from None
+        if self._index is not None and not (
+            hasattr(self.handle, "seek") and hasattr(self.handle, "tell")
+        ):
+            import warnings
+
+            warnings.warn(
+                "A handle with a seek/tell methods is required in "
+                "order to record an SFF index."
+            )
+            self._index = None
+        self._index_start = 0
+        self._index_length = 0
+        if not hasattr(records, "next"):
+            records = iter(records)
+        # Get the first record in order to find the flow information
+        # we will need for the header.
+        try:
+            record = next(records)
+        except StopIteration:
+            record = None
+        if record is None:
+            # No records -> empty SFF file (or an error)?
+            # We can't write a header without the flow information.
+            # return 0
+            raise ValueError("Must have at least one sequence")
+        try:
+            self._key_sequence = record.annotations["flow_key"].encode("ASCII")
+            self._flow_chars = record.annotations["flow_chars"].encode("ASCII")
+            self._number_of_flows_per_read = len(self._flow_chars)
+        except KeyError:
+            raise ValueError("Missing SFF flow information") from None
+        self.write_header()
+        self.write_record(record)
+        count = 1
+        for record in records:
+            self.write_record(record)
+            count += 1
+        if self._number_of_reads == 0:
+            # Must go back and record the record count...
+            offset = self.handle.tell()
+            self.handle.seek(0)
+            self._number_of_reads = count
+            self.write_header()
+            self.handle.seek(offset)  # not essential?
+        else:
+            assert count == self._number_of_reads
+        if self._index is not None:
+            self._write_index()
+        return count
+
+    def _write_index(self):
+        assert len(self._index) == self._number_of_reads
+        handle = self.handle
+        self._index.sort()
+        self._index_start = handle.tell()  # need for header
+        # XML...
+        if self._xml is not None:
+            xml = self._xml.encode()
+        else:
+            from Bio import __version__
+
+            xml = "<!-- This file was output with Biopython %s -->\n" % __version__
+            xml += (
+                "<!-- This XML and index block attempts to mimic Roche SFF files -->\n"
+            )
+            xml += "<!-- This file may be a combination of multiple SFF files etc -->\n"
+            xml = xml.encode()
+        xml_len = len(xml)
+        # Write to the file...
+        fmt = ">I4BLL"
+        fmt_size = struct.calcsize(fmt)
+        handle.write(_null * fmt_size + xml)  # fill this later
+        fmt2 = ">6B"
+        assert 6 == struct.calcsize(fmt2)
+        self._index.sort()
+        index_len = 0  # don't know yet!
+        for name, offset in self._index:
+            # Roche files record the offsets using base 255 not 256.
+            # See comments for parsing the index block. There may be a faster
+            # way to code this, but we can't easily use shifts due to odd base
+            off3 = offset
+            off0 = off3 % 255
+            off3 -= off0
+            off1 = off3 % 65025
+            off3 -= off1
+            off2 = off3 % 16581375
+            off3 -= off2
+            if offset != off0 + off1 + off2 + off3:
+                raise RuntimeError(
+                    "%i -> %i %i %i %i" % (offset, off0, off1, off2, off3)
+                )
+            off3, off2, off1, off0 = (
+                off3 // 16581375,
+                off2 // 65025,
+                off1 // 255,
+                off0,
+            )
+            if not (off0 < 255 and off1 < 255 and off2 < 255 and off3 < 255):
+                raise RuntimeError(
+                    "%i -> %i %i %i %i" % (offset, off0, off1, off2, off3)
+                )
+            handle.write(name + struct.pack(fmt2, 0, off3, off2, off1, off0, 255))
+            index_len += len(name) + 6
+        # Note any padding in not included:
+        self._index_length = fmt_size + xml_len + index_len  # need for header
+        # Pad out to an 8 byte boundary (although I have noticed some
+        # real Roche SFF files neglect to do this depsite their manual
+        # suggesting this padding should be there):
+        if self._index_length % 8:
+            padding = 8 - (self._index_length % 8)
+            handle.write(_null * padding)
+        else:
+            padding = 0
+        offset = handle.tell()
+        if offset != self._index_start + self._index_length + padding:
+            raise RuntimeError(
+                "%i vs %i + %i + %i"
+                % (offset, self._index_start, self._index_length, padding)
+            )
+        # Must now go back and update the index header with index size...
+        handle.seek(self._index_start)
+        handle.write(
+            struct.pack(
+                fmt,
+                778921588,  # magic number
+                49,
+                46,
+                48,
+                48,  # Roche index version, "1.00"
+                xml_len,
+                index_len,
+            )
+            + xml
+        )
+        # Must now go back and update the header...
+        handle.seek(0)
+        self.write_header()
+        handle.seek(offset)  # not essential?
+
+    def write_header(self):
+        """Write the SFF file header."""
+        # Do header...
+        key_length = len(self._key_sequence)
+        # file header (part one)
+        # use big endiean encdoing   >
+        # magic_number               I
+        # version                    4B
+        # index_offset               Q
+        # index_length               I
+        # number_of_reads            I
+        # header_length              H
+        # key_length                 H
+        # number_of_flows_per_read   H
+        # flowgram_format_code       B
+        # [rest of file header depends on the number of flows and how many keys]
+        fmt = ">I4BQIIHHHB%is%is" % (self._number_of_flows_per_read, key_length)
+        # According to the spec, the header_length field should be the total
+        # number of bytes required by this set of header fields, and should be
+        # equal to "31 + number_of_flows_per_read + key_length" rounded up to
+        # the next value divisible by 8.
+        if struct.calcsize(fmt) % 8 == 0:
+            padding = 0
+        else:
+            padding = 8 - (struct.calcsize(fmt) % 8)
+        header_length = struct.calcsize(fmt) + padding
+        assert header_length % 8 == 0
+        header = struct.pack(
+            fmt,
+            779314790,  # magic number 0x2E736666
+            0,
+            0,
+            0,
+            1,  # version
+            self._index_start,
+            self._index_length,
+            self._number_of_reads,
+            header_length,
+            key_length,
+            self._number_of_flows_per_read,
+            1,  # the only flowgram format code we support
+            self._flow_chars,
+            self._key_sequence,
+        )
+        self.handle.write(header + _null * padding)
+
+    def write_record(self, record):
+        """Write a single additional record to the output file.
+
+        This assumes the header has been done.
+        """
+        # Basics
+        name = record.id.encode()
+        name_len = len(name)
+        seq = bytes(record.seq).upper()
+        seq_len = len(seq)
+        # Qualities
+        try:
+            quals = record.letter_annotations["phred_quality"]
+        except KeyError:
+            raise ValueError(
+                "Missing PHRED qualities information for %s" % record.id
+            ) from None
+        # Flow
+        try:
+            flow_values = record.annotations["flow_values"]
+            flow_index = record.annotations["flow_index"]
+            if (
+                self._key_sequence != record.annotations["flow_key"].encode()
+                or self._flow_chars != record.annotations["flow_chars"].encode()
+            ):
+                raise ValueError("Records have inconsistent SFF flow data")
+        except KeyError:
+            raise ValueError(
+                "Missing SFF flow information for %s" % record.id
+            ) from None
+        except AttributeError:
+            raise ValueError("Header not written yet?") from None
+        # Clipping
+        try:
+            clip_qual_left = record.annotations["clip_qual_left"]
+            if clip_qual_left < 0:
+                raise ValueError("Negative SFF clip_qual_left value for %s" % record.id)
+            if clip_qual_left:
+                clip_qual_left += 1
+            clip_qual_right = record.annotations["clip_qual_right"]
+            if clip_qual_right < 0:
+                raise ValueError(
+                    "Negative SFF clip_qual_right value for %s" % record.id
+                )
+            clip_adapter_left = record.annotations["clip_adapter_left"]
+            if clip_adapter_left < 0:
+                raise ValueError(
+                    "Negative SFF clip_adapter_left value for %s" % record.id
+                )
+            if clip_adapter_left:
+                clip_adapter_left += 1
+            clip_adapter_right = record.annotations["clip_adapter_right"]
+            if clip_adapter_right < 0:
+                raise ValueError(
+                    "Negative SFF clip_adapter_right value for %s" % record.id
+                )
+        except KeyError:
+            raise ValueError(
+                "Missing SFF clipping information for %s" % record.id
+            ) from None
+
+        # Capture information for index
+        if self._index is not None:
+            offset = self.handle.tell()
+            # Check the position of the final record (before sort by name)
+            # Using a four-digit base 255 number, so the upper bound is
+            # 254*(1)+254*(255)+254*(255**2)+254*(255**3) = 4228250624
+            # or equivalently it overflows at 255**4 = 4228250625
+            if offset > 4228250624:
+                import warnings
+
+                warnings.warn(
+                    "Read %s has file offset %i, which is too large "
+                    "to store in the Roche SFF index structure. No "
+                    "index block will be recorded." % (name, offset)
+                )
+                # No point recoring the offsets now
+                self._index = None
+            else:
+                self._index.append((name, self.handle.tell()))
+
+        # the read header format (fixed part):
+        # read_header_length     H
+        # name_length            H
+        # seq_len                I
+        # clip_qual_left         H
+        # clip_qual_right        H
+        # clip_adapter_left      H
+        # clip_adapter_right     H
+        # [rest of read header depends on the name length etc]
+        # name
+        # flow values
+        # flow index
+        # sequence
+        # padding
+        read_header_fmt = ">2HI4H%is" % name_len
+        if struct.calcsize(read_header_fmt) % 8 == 0:
+            padding = 0
+        else:
+            padding = 8 - (struct.calcsize(read_header_fmt) % 8)
+        read_header_length = struct.calcsize(read_header_fmt) + padding
+        assert read_header_length % 8 == 0
+        data = (
+            struct.pack(
+                read_header_fmt,
+                read_header_length,
+                name_len,
+                seq_len,
+                clip_qual_left,
+                clip_qual_right,
+                clip_adapter_left,
+                clip_adapter_right,
+                name,
+            )
+            + _null * padding
+        )
+        assert len(data) == read_header_length
+        # now the flowgram values, flowgram index, bases and qualities
+        # NOTE - assuming flowgram_format==1, which means struct type H
+        read_flow_fmt = ">%iH" % self._number_of_flows_per_read
+        read_flow_size = struct.calcsize(read_flow_fmt)
+        temp_fmt = ">%iB" % seq_len  # used for flow index and quals
+        data += (
+            struct.pack(read_flow_fmt, *flow_values)
+            + struct.pack(temp_fmt, *flow_index)
+            + seq
+            + struct.pack(temp_fmt, *quals)
+        )
+        # now any final padding...
+        padding = (read_flow_size + seq_len * 3) % 8
+        if padding:
+            padding = 8 - padding
+        self.handle.write(data + _null * padding)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/SeqIO/SnapGeneIO.py b/code/lib/Bio/SeqIO/SnapGeneIO.py
new file mode 100644
index 0000000..5c670ab
--- /dev/null
+++ b/code/lib/Bio/SeqIO/SnapGeneIO.py
@@ -0,0 +1,296 @@
+# Copyright 2017-2019 Damien Goutte-Gattat.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the SnapGene file format.
+
+The SnapGene binary format is the native format used by the SnapGene program
+from GSL Biotech LLC.
+"""
+from datetime import datetime
+from re import sub
+from struct import unpack
+from xml.dom.minidom import parseString
+
+from Bio.Seq import Seq
+from Bio.SeqFeature import FeatureLocation
+from Bio.SeqFeature import SeqFeature
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import SequenceIterator
+
+
+def _iterate(handle):
+    """Iterate over the packets of a SnapGene file.
+
+    A SnapGene file is made of packets, each packet being a TLV-like
+    structure comprising:
+
+      - 1 single byte indicating the packet's type;
+      - 1 big-endian long integer (4 bytes) indicating the length of the
+        packet's data;
+      - the actual data.
+    """
+    while True:
+        packet_type = handle.read(1)
+        if len(packet_type) < 1:  # No more packet
+            return
+        packet_type = unpack(">B", packet_type)[0]
+
+        length = handle.read(4)
+        if len(length) < 4:
+            raise ValueError("Unexpected end of packet")
+        length = unpack(">I", length)[0]
+
+        data = handle.read(length)
+        if len(data) < length:
+            raise ValueError("Unexpected end of packet")
+
+        yield (packet_type, length, data)
+
+
+def _parse_dna_packet(length, data, record):
+    """Parse a DNA sequence packet.
+
+    A DNA sequence packet contains a single byte flag followed by the
+    sequence itself.
+    """
+    if record.seq:
+        raise ValueError("The file contains more than one DNA packet")
+
+    flags, sequence = unpack(">B%ds" % (length - 1), data)
+    record.seq = Seq(sequence.decode("ASCII"))
+    record.annotations["molecule_type"] = "DNA"
+    if flags & 0x01:
+        record.annotations["topology"] = "circular"
+    else:
+        record.annotations["topology"] = "linear"
+
+
+def _parse_notes_packet(length, data, record):
+    """Parse a 'Notes' packet.
+
+    This type of packet contains some metadata about the sequence. They
+    are stored as a XML string with a 'Notes' root node.
+    """
+    xml = parseString(data.decode("UTF-8"))
+    type = _get_child_value(xml, "Type")
+    if type == "Synthetic":
+        record.annotations["data_file_division"] = "SYN"
+    else:
+        record.annotations["data_file_division"] = "UNC"
+
+    date = _get_child_value(xml, "LastModified")
+    if date:
+        record.annotations["date"] = datetime.strptime(date, "%Y.%m.%d")
+
+    acc = _get_child_value(xml, "AccessionNumber")
+    if acc:
+        record.id = acc
+
+    comment = _get_child_value(xml, "Comments")
+    if comment:
+        record.name = comment.split(" ", 1)[0]
+        record.description = comment
+        if not acc:
+            record.id = record.name
+
+
+def _parse_cookie_packet(length, data, record):
+    """Parse a SnapGene cookie packet.
+
+    Every SnapGene file starts with a packet of this type. It acts as
+    a magic cookie identifying the file as a SnapGene file.
+    """
+    cookie, seq_type, exp_version, imp_version = unpack(">8sHHH", data)
+    if cookie.decode("ASCII") != "SnapGene":
+        raise ValueError("The file is not a valid SnapGene file")
+
+
+def _parse_location(rangespec, strand, record):
+    start, end = [int(x) for x in rangespec.split("-")]
+    # Account for SnapGene's 1-based coordinates
+    start = start - 1
+    if start > end:
+        # Range wrapping the end of the sequence
+        l1 = FeatureLocation(start, len(record), strand=strand)
+        l2 = FeatureLocation(0, end, strand=strand)
+        location = l1 + l2
+    else:
+        location = FeatureLocation(start, end, strand=strand)
+    return location
+
+
+def _parse_features_packet(length, data, record):
+    """Parse a sequence features packet.
+
+    This packet stores sequence features (except primer binding sites,
+    which are in a dedicated Primers packet). The data is a XML string
+    starting with a 'Features' root node.
+    """
+    xml = parseString(data.decode("UTF-8"))
+    for feature in xml.getElementsByTagName("Feature"):
+        quals = {}
+
+        type = _get_attribute_value(feature, "type", default="misc_feature")
+
+        strand = +1
+        directionality = int(
+            _get_attribute_value(feature, "directionality", default="1")
+        )
+        if directionality == 2:
+            strand = -1
+
+        location = None
+        for segment in feature.getElementsByTagName("Segment"):
+            rng = _get_attribute_value(segment, "range")
+            if not location:
+                location = _parse_location(rng, strand, record)
+            else:
+                location = location + _parse_location(rng, strand, record)
+        if not location:
+            raise ValueError("Missing feature location")
+
+        for qualifier in feature.getElementsByTagName("Q"):
+            qname = _get_attribute_value(
+                qualifier, "name", error="Missing qualifier name"
+            )
+            qvalues = []
+            for value in qualifier.getElementsByTagName("V"):
+                if value.hasAttribute("text"):
+                    qvalues.append(_decode(value.attributes["text"].value))
+                elif value.hasAttribute("predef"):
+                    qvalues.append(_decode(value.attributes["predef"].value))
+                elif value.hasAttribute("int"):
+                    qvalues.append(int(value.attributes["int"].value))
+            quals[qname] = qvalues
+
+        name = _get_attribute_value(feature, "name")
+        if name:
+            if "label" not in quals:
+                # No explicit label attribute, use the SnapGene name
+                quals["label"] = [name]
+            elif name not in quals["label"]:
+                # The SnapGene name is different from the label,
+                # add a specific attribute to represent it
+                quals["name"] = [name]
+
+        feature = SeqFeature(location, type=type, qualifiers=quals)
+        record.features.append(feature)
+
+
+def _parse_primers_packet(length, data, record):
+    """Parse a Primers packet.
+
+    A Primers packet is similar to a Features packet but specifically
+    stores primer binding features. The data is a XML string starting
+    with a 'Primers' root node.
+    """
+    xml = parseString(data.decode("UTF-8"))
+    for primer in xml.getElementsByTagName("Primer"):
+        quals = {}
+
+        name = _get_attribute_value(primer, "name")
+        if name:
+            quals["label"] = [name]
+
+        for site in primer.getElementsByTagName("BindingSite"):
+            rng = _get_attribute_value(
+                site, "location", error="Missing binding site location"
+            )
+            strand = int(_get_attribute_value(site, "boundStrand", default="0"))
+            if strand == 1:
+                strand = -1
+            else:
+                strand = +1
+
+            feature = SeqFeature(
+                _parse_location(rng, strand, record),
+                type="primer_bind",
+                qualifiers=quals,
+            )
+            record.features.append(feature)
+
+
+_packet_handlers = {
+    0x00: _parse_dna_packet,
+    0x05: _parse_primers_packet,
+    0x06: _parse_notes_packet,
+    0x0A: _parse_features_packet,
+}
+
+
+# Helper functions to process the XML data in
+# some of the segments
+
+
+def _decode(text):
+    # Get rid of HTML tags in some values
+    return sub("<[^>]+>", "", text)
+
+
+def _get_attribute_value(node, name, default=None, error=None):
+    if node.hasAttribute(name):
+        return _decode(node.attributes[name].value)
+    elif error:
+        raise ValueError(error)
+    else:
+        return default
+
+
+def _get_child_value(node, name, default=None, error=None):
+    children = node.getElementsByTagName(name)
+    if (
+        children
+        and children[0].childNodes
+        and children[0].firstChild.nodeType == node.TEXT_NODE
+    ):
+        return _decode(children[0].firstChild.data)
+    elif error:
+        raise ValueError(error)
+    else:
+        return default
+
+
+class SnapGeneIterator(SequenceIterator):
+    """Parser for SnapGene files."""
+
+    def __init__(self, source):
+        """Parse a SnapGene file and return a SeqRecord object.
+
+        Argument source is a file-like object or a path to a file.
+
+        Note that a SnapGene file can only contain one sequence, so this
+        iterator will always return a single record.
+        """
+        super().__init__(source, mode="b", fmt="SnapGene")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        records = self.iterate(handle)
+        return records
+
+    def iterate(self, handle):
+        """Iterate over the records in the SnapGene file."""
+        record = SeqRecord(None)
+        packets = _iterate(handle)
+        try:
+            packet_type, length, data = next(packets)
+        except StopIteration:
+            raise ValueError("Empty file.") from None
+
+        if packet_type != 0x09:
+            raise ValueError("The file does not start with a SnapGene cookie packet")
+        _parse_cookie_packet(length, data, record)
+
+        for (packet_type, length, data) in packets:
+            handler = _packet_handlers.get(packet_type)
+            if handler is not None:
+                handler(length, data, record)
+
+        if not record.seq:
+            raise ValueError("No DNA packet in file")
+
+        yield record
diff --git a/code/lib/Bio/SeqIO/SwissIO.py b/code/lib/Bio/SeqIO/SwissIO.py
new file mode 100644
index 0000000..8362451
--- /dev/null
+++ b/code/lib/Bio/SeqIO/SwissIO.py
@@ -0,0 +1,142 @@
+# Copyright 2006-2013,2020 by Peter Cock.
+# Revisions copyright 2008-2009 by Michiel de Hoon.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the "swiss" (aka SwissProt/UniProt) file format.
+
+You are expected to use this module via the Bio.SeqIO functions.
+See also the Bio.SwissProt module which offers more than just accessing
+the sequences as SeqRecord objects.
+
+See also Bio.SeqIO.UniprotIO.py which supports the "uniprot-xml" format.
+"""
+from Bio import SeqFeature
+from Bio import SwissProt
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+
+def _make_position(location_string, offset=0):
+    """Turn a Swiss location position into a SeqFeature position object (PRIVATE).
+
+    An offset of -1 is used with a start location to make it pythonic.
+    """
+    if location_string == "?":
+        return SeqFeature.UnknownPosition()
+    # Hack so that feature from 0 to 0 becomes 0 to 0, not -1 to 0.
+    try:
+        return SeqFeature.ExactPosition(max(0, offset + int(location_string)))
+    except ValueError:
+        pass
+    if location_string.startswith("<"):
+        try:
+            return SeqFeature.BeforePosition(max(0, offset + int(location_string[1:])))
+        except ValueError:
+            pass
+    elif location_string.startswith(">"):  # e.g. ">13"
+        try:
+            return SeqFeature.AfterPosition(max(0, offset + int(location_string[1:])))
+        except ValueError:
+            pass
+    elif location_string.startswith("?"):  # e.g. "?22"
+        try:
+            return SeqFeature.UncertainPosition(
+                max(0, offset + int(location_string[1:]))
+            )
+        except ValueError:
+            pass
+    raise NotImplementedError("Cannot parse location '%s'" % location_string)
+
+
+def SwissIterator(source):
+    """Break up a Swiss-Prot/UniProt file into SeqRecord objects.
+
+    Argument source is a file-like object or a path to a file.
+
+    Every section from the ID line to the terminating // becomes
+    a single SeqRecord with associated annotation and features.
+
+    This parser is for the flat file "swiss" format as used by:
+     - Swiss-Prot aka SwissProt
+     - TrEMBL
+     - UniProtKB aka UniProt Knowledgebase
+
+    For consistency with BioPerl and EMBOSS we call this the "swiss"
+    format. See also the SeqIO support for "uniprot-xml" format.
+
+    Rather than calling it directly, you are expected to use this
+    parser via Bio.SeqIO.parse(..., format="swiss") instead.
+    """
+    swiss_records = SwissProt.parse(source)
+
+    for swiss_record in swiss_records:
+        # Convert the SwissProt record to a SeqRecord
+        record = SeqRecord(
+            Seq(swiss_record.sequence),
+            id=swiss_record.accessions[0],
+            name=swiss_record.entry_name,
+            description=swiss_record.description,
+            features=swiss_record.features,
+        )
+        for cross_reference in swiss_record.cross_references:
+            if len(cross_reference) < 2:
+                continue
+            database, accession = cross_reference[:2]
+            dbxref = "%s:%s" % (database, accession)
+            if dbxref not in record.dbxrefs:
+                record.dbxrefs.append(dbxref)
+        annotations = record.annotations
+        annotations["molecule_type"] = "protein"
+        annotations["accessions"] = swiss_record.accessions
+        if swiss_record.protein_existence:
+            annotations["protein_existence"] = swiss_record.protein_existence
+        if swiss_record.created:
+            date, version = swiss_record.created
+            annotations["date"] = date
+            annotations["sequence_version"] = version
+        if swiss_record.sequence_update:
+            date, version = swiss_record.sequence_update
+            annotations["date_last_sequence_update"] = date
+            annotations["sequence_version"] = version
+        if swiss_record.annotation_update:
+            date, version = swiss_record.annotation_update
+            annotations["date_last_annotation_update"] = date
+            annotations["entry_version"] = version
+        if swiss_record.gene_name:
+            annotations["gene_name"] = swiss_record.gene_name
+        annotations["organism"] = swiss_record.organism.rstrip(".")
+        annotations["taxonomy"] = swiss_record.organism_classification
+        annotations["ncbi_taxid"] = swiss_record.taxonomy_id
+        if swiss_record.host_organism:
+            annotations["organism_host"] = swiss_record.host_organism
+        if swiss_record.host_taxonomy_id:
+            annotations["host_ncbi_taxid"] = swiss_record.host_taxonomy_id
+        if swiss_record.comments:
+            annotations["comment"] = "\n".join(swiss_record.comments)
+        if swiss_record.references:
+            annotations["references"] = []
+            for reference in swiss_record.references:
+                feature = SeqFeature.Reference()
+                feature.comment = " ".join("%s=%s;" % k_v for k_v in reference.comments)
+                for key, value in reference.references:
+                    if key == "PubMed":
+                        feature.pubmed_id = value
+                    elif key == "MEDLINE":
+                        feature.medline_id = value
+                    elif key == "DOI":
+                        pass
+                    elif key == "AGRICOLA":
+                        pass
+                    else:
+                        raise ValueError("Unknown key %s found in references" % key)
+                feature.authors = reference.authors
+                feature.title = reference.title
+                feature.journal = reference.location
+                annotations["references"].append(feature)
+        if swiss_record.keywords:
+            record.annotations["keywords"] = swiss_record.keywords
+        yield record
diff --git a/code/lib/Bio/SeqIO/TabIO.py b/code/lib/Bio/SeqIO/TabIO.py
new file mode 100644
index 0000000..2770d90
--- /dev/null
+++ b/code/lib/Bio/SeqIO/TabIO.py
@@ -0,0 +1,139 @@
+# Copyright 2008-2017,2020 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the "tab" (simple tab separated) file format.
+
+You are expected to use this module via the Bio.SeqIO functions.
+
+The "tab" format is an ad-hoc plain text file format where each sequence is
+on one (long) line.  Each line contains the identifier/description, followed
+by a tab, followed by the sequence.  For example, consider the following
+short FASTA format file::
+
+    >ID123456 possible binding site?
+    CATCNAGATGACACTACGACTACGACTCAGACTAC
+    >ID123457 random sequence
+    ACACTACGACTACGACTCAGACTACAAN
+
+Apart from the descriptions, this can be represented in the simple two column
+tab separated format as follows::
+
+    ID123456(tab)CATCNAGATGACACTACGACTACGACTCAGACTAC
+    ID123457(tab)ACACTACGACTACGACTCAGACTACAAN
+
+When reading this file, "ID123456" or "ID123457" will be taken as the record's
+.id and .name property.  There is no other information to record.
+
+Similarly, when writing to this format, Biopython will ONLY record the record's
+.id and .seq (and not the description or any other information) as in the
+example above.
+"""
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import _clean
+from .Interfaces import _get_seq_string
+from .Interfaces import SequenceIterator
+from .Interfaces import SequenceWriter
+
+
+class TabIterator(SequenceIterator):
+    """Parser for tab-delimited files."""
+
+    def __init__(self, source):
+        """Iterate over tab separated lines as SeqRecord objects.
+
+        Each line of the file should contain one tab only, dividing the line
+        into an identifier and the full sequence.
+
+        Arguments:
+         - source - file-like object opened in text mode, or a path to a file
+
+        The first field is taken as the record's .id and .name (regardless of
+        any spaces within the text) and the second field is the sequence.
+
+        Any blank lines are ignored.
+
+        Examples
+        --------
+        >>> with open("GenBank/NC_005816.tsv") as handle:
+        ...     for record in TabIterator(handle):
+        ...         print("%s length %i" % (record.id, len(record)))
+        gi|45478712|ref|NP_995567.1| length 340
+        gi|45478713|ref|NP_995568.1| length 260
+        gi|45478714|ref|NP_995569.1| length 64
+        gi|45478715|ref|NP_995570.1| length 123
+        gi|45478716|ref|NP_995571.1| length 145
+        gi|45478717|ref|NP_995572.1| length 357
+        gi|45478718|ref|NP_995573.1| length 138
+        gi|45478719|ref|NP_995574.1| length 312
+        gi|45478720|ref|NP_995575.1| length 99
+        gi|45478721|ref|NP_995576.1| length 90
+
+        """
+        super().__init__(source, mode="t", fmt="Tab-separated plain-text")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        records = self.iterate(handle)
+        return records
+
+    def iterate(self, handle):
+        """Parse the file and generate SeqRecord objects."""
+        for line in handle:
+            try:
+                title, seq = line.split("\t")  # will fail if more than one tab!
+            except ValueError:
+                if line.strip() == "":
+                    # It's a blank line, ignore it
+                    continue
+                raise ValueError(
+                    "Each line should have one tab separating the"
+                    + " title and sequence, this line has %i tabs: %r"
+                    % (line.count("\t"), line)
+                ) from None
+            title = title.strip()
+            seq = seq.strip()  # removes the trailing new line
+            yield SeqRecord(Seq(seq), id=title, name=title, description="")
+
+
+class TabWriter(SequenceWriter):
+    """Class to write simple tab separated format files.
+
+    Each line consists of "id(tab)sequence" only.
+
+    Any description, name or other annotation is not recorded.
+
+    This class is not intended to be used directly. Instead, please use
+    the function ``as_tab``, or the top level ``Bio.SeqIO.write()`` function
+    with ``format="tab"``.
+    """
+
+    def write_record(self, record):
+        """Write a single tab line to the file."""
+        assert self._header_written
+        assert not self._footer_written
+        self._record_written = True
+        self.handle.write(as_tab(record))
+
+
+def as_tab(record):
+    """Return record as tab separated (id(tab)seq) string."""
+    title = _clean(record.id)
+    seq = _get_seq_string(record)  # Catches sequence being None
+    assert "\t" not in title
+    assert "\n" not in title
+    assert "\r" not in title
+    assert "\t" not in seq
+    assert "\n" not in seq
+    assert "\r" not in seq
+    return "%s\t%s\n" % (title, seq)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/SeqIO/TwoBitIO.py b/code/lib/Bio/SeqIO/TwoBitIO.py
new file mode 100644
index 0000000..4ad0775
--- /dev/null
+++ b/code/lib/Bio/SeqIO/TwoBitIO.py
@@ -0,0 +1,250 @@
+# Copyright 2020 by Michiel de Hoon
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for UCSC's "twoBit" (.2bit) file format.
+
+This parser reads the index stored in the twoBit file, as well as the masked
+regions and the N's for each sequence. It also creates sequence data objects
+(_TwoBitSequenceData objects), which support only two methods: __len__ and
+__getitem__. The former will return the length of the sequence, while the
+latter returns the sequence (as a bytes object) for the requested region.
+
+Using the information in the index, the __getitem__ method calculates the file
+position at which the requested region starts, and only reads the requested
+sequence region. Note that the full sequence of a record is loaded only if
+specifically requested, making the parser memory-efficient.
+
+The TwoBitIterator object implements the __getitem__, keys, and __len__
+methods that allow it to be used as a dictionary.
+"""
+# The .2bit file format is defined by UCSC as follows
+# (see http://genome.ucsc.edu/FAQ/FAQformat.html#format7):
+#
+#
+# A .2bit file stores multiple DNA sequences (up to 4 Gb total) in a compact
+# randomly-accessible format. The file contains masking information as well
+# as the DNA itself.
+#
+# The file begins with a 16-byte header containing the following fields:
+#
+# signature - the number 0x1A412743 in the architecture of the machine that
+#             created the file
+# version - zero for now. Readers should abort if they see a version number
+#           higher than 0
+# sequenceCount - the number of sequences in the file
+# reserved - always zero for now
+#
+# All fields are 32 bits unless noted. If the signature value is not as
+# given, the reader program should byte-swap the signature and check if the
+# swapped version matches. If so, all multiple-byte entities in the file
+# will have to be byte-swapped. This enables these binary files to be used
+# unchanged on different architectures.
+#
+# The header is followed by a file index, which contains one entry for each
+# sequence. Each index entry contains three fields:
+#
+# nameSize - a byte containing the length of the name field
+# name - the sequence name itself (in ASCII-compatible byte string), of
+#        variable length depending on nameSize
+# offset - the 32-bit offset of the sequence data relative to the start of
+#          the file, not aligned to any 4-byte padding boundary
+#
+# The index is followed by the sequence records, which contain nine fields:
+#
+# dnaSize - number of bases of DNA in the sequence
+# nBlockCount - the number of blocks of Ns in the file (representing unknown
+#               sequence)
+# nBlockStarts - an array of length nBlockCount of 32 bit integers
+#                indicating the (0-based) starting position of a block of Ns
+# nBlockSizes - an array of length nBlockCount of 32 bit integers indicating
+#               the length of a block of Ns
+# maskBlockCount - the number of masked (lower-case) blocks
+# maskBlockStarts - an array of length maskBlockCount of 32 bit integers
+#                   indicating the (0-based) starting position of a masked block
+# maskBlockSizes - an array of length maskBlockCount of 32 bit integers
+#                  indicating the length of a masked block
+# reserved - always zero for now
+# packedDna - the DNA packed to two bits per base, represented as so:
+#             T - 00, C - 01, A - 10, G - 11. The first base is in the most
+#             significant 2-bit byte; the last base is in the least significan
+#             2 bits. For example, the sequence TCAG is represented as 00011011.
+import numpy
+
+from Bio.Seq import Seq
+from Bio.Seq import SequenceDataAbstractBaseClass
+from Bio.SeqRecord import SeqRecord
+
+from . import _twoBitIO
+from .Interfaces import SequenceIterator
+
+
+class _TwoBitSequenceData(SequenceDataAbstractBaseClass):
+    """Stores information needed to retrieve sequence data from a .2bit file (PRIVATE).
+
+    Objects of this class store the file position at which the sequence data
+    start, the sequence length, and the start and end position of unknown (N)
+    and masked (lowercase) letters in the sequence.
+
+    Only two methods are provided: __len__ and __getitem__. The former will
+    return the length of the sequence, while the latter returns the sequence
+    (as a bytes object) for the requested region. The full sequence of a record
+    is loaded only if explicitly requested.
+    """
+
+    __slots__ = ("stream", "offset", "length", "nBlocks", "maskBlocks")
+
+    def __init__(self, stream, offset, length):
+        """Initialize the file stream and file position of the sequence data."""
+        self.stream = stream
+        self.offset = offset
+        self.length = length
+        super().__init__()
+
+    def __getitem__(self, key):
+        length = self.length
+        if isinstance(key, slice):
+            start, end, step = key.indices(length)
+            size = len(range(start, end, step))
+            if size == 0:
+                return b""
+        else:
+            if key < 0:
+                key += length
+                if key < 0:
+                    raise IndexError("index out of range")
+            start = key
+            end = key + 1
+            step = 1
+            size = 1
+        byteStart = start // 4
+        byteEnd = (end + 3) // 4
+        byteSize = byteEnd - byteStart
+        stream = self.stream
+        try:
+            stream.seek(self.offset + byteStart)
+        except ValueError as exception:
+            if str(exception) == "seek of closed file":
+                raise ValueError("cannot retrieve sequence: file is closed") from None
+            raise
+        data = numpy.fromfile(stream, dtype="uint8", count=byteSize)
+        sequence = _twoBitIO.convert(
+            data, start, end, step, self.nBlocks, self.maskBlocks
+        )
+        if isinstance(key, slice):
+            return sequence
+        else:  # single nucleotide
+            return ord(sequence)
+
+    def __len__(self):
+        return self.length
+
+    def upper(self):
+        """Remove the sequence mask."""
+        data = _TwoBitSequenceData(self.stream, self.offset, self.length)
+        data.nBlocks = self.nBlocks[:, :]
+        data.maskBlocks = numpy.empty((0, 2), dtype="uint32")
+        return data
+
+    def lower(self):
+        """Extend the sequence mask to the full sequence."""
+        data = _TwoBitSequenceData(self.stream, self.offset, self.length)
+        data.nBlocks = self.nBlocks[:, :]
+        data.maskBlocks = numpy.array([[0, self.length]], dtype="uint32")
+        return data
+
+
+class TwoBitIterator(SequenceIterator):
+    """Parser for UCSC twoBit (.2bit) files."""
+
+    def __init__(self, source):
+        """Read the file index."""
+        super().__init__(source, mode="b", fmt="twoBit")
+        # wait to close the file until the TwoBitIterator goes out of scope:
+        self.should_close_stream = False
+        stream = self.stream
+        data = stream.read(4)
+        if not data:
+            raise ValueError("Empty file.")
+        byteorders = ("little", "big")
+        dtypes = ("<u4", ">u4")
+        for byteorder, dtype in zip(byteorders, dtypes):
+            signature = int.from_bytes(data, byteorder)
+            if signature == 0x1A412743:
+                break
+        else:
+            raise ValueError("Unknown signature")
+        self.byteorder = byteorder
+        data = stream.read(4)
+        version = int.from_bytes(data, byteorder, signed=False)
+        if version == 1:
+            raise ValueError(
+                "version-1 twoBit files with 64-bit offsets for index are currently not supported"
+            )
+        if version != 0:
+            raise ValueError("Found unexpected file version %u; aborting" % version)
+        data = stream.read(4)
+        sequenceCount = int.from_bytes(data, byteorder, signed=False)
+        data = stream.read(4)
+        reserved = int.from_bytes(data, byteorder, signed=False)
+        if reserved != 0:
+            raise ValueError("Found non-zero reserved field; aborting")
+        sequences = {}
+        for i in range(sequenceCount):
+            data = stream.read(1)
+            nameSize = int.from_bytes(data, byteorder, signed=False)
+            data = stream.read(nameSize)
+            name = data.decode("ASCII")
+            data = stream.read(4)
+            offset = int.from_bytes(data, byteorder, signed=False)
+            sequences[name] = (stream, offset)
+        self.sequences = sequences
+        for name, (stream, offset) in sequences.items():
+            stream.seek(offset)
+            data = stream.read(4)
+            dnaSize = int.from_bytes(data, byteorder, signed=False)
+            sequence = _TwoBitSequenceData(stream, offset, dnaSize)
+            data = stream.read(4)
+            nBlockCount = int.from_bytes(data, byteorder, signed=False)
+            nBlockStarts = numpy.fromfile(stream, dtype=dtype, count=nBlockCount)
+            nBlockSizes = numpy.fromfile(stream, dtype=dtype, count=nBlockCount)
+            sequence.nBlocks = numpy.empty((nBlockCount, 2), dtype="uint32")
+            sequence.nBlocks[:, 0] = nBlockStarts
+            sequence.nBlocks[:, 1] = nBlockStarts + nBlockSizes
+            data = stream.read(4)
+            maskBlockCount = int.from_bytes(data, byteorder, signed=False)
+            maskBlockStarts = numpy.fromfile(stream, dtype=dtype, count=maskBlockCount)
+            maskBlockSizes = numpy.fromfile(stream, dtype=dtype, count=maskBlockCount)
+            sequence.maskBlocks = numpy.empty((maskBlockCount, 2), dtype="uint32")
+            sequence.maskBlocks[:, 0] = maskBlockStarts
+            sequence.maskBlocks[:, 1] = maskBlockStarts + maskBlockSizes
+            data = stream.read(4)
+            reserved = int.from_bytes(data, byteorder, signed=False)
+            if reserved != 0:
+                raise ValueError("Found non-zero reserved field %u" % reserved)
+            sequence.offset = stream.tell()
+            sequences[name] = sequence
+
+    def parse(self, stream):
+        """Iterate over the sequences in the file."""
+        for name, sequence in self.sequences.items():
+            sequence = Seq(sequence)
+            record = SeqRecord(sequence, id=name)
+            yield record
+
+    def __getitem__(self, name):
+        try:
+            sequence = self.sequences[name]
+        except ValueError:
+            raise KeyError(name) from None
+        sequence = Seq(sequence)
+        return SeqRecord(sequence, id=name)
+
+    def keys(self):
+        """Return a list with the names of the sequences in the file."""
+        return self.sequences.keys()
+
+    def __len__(self):
+        return len(self.sequences)
diff --git a/code/lib/Bio/SeqIO/UniprotIO.py b/code/lib/Bio/SeqIO/UniprotIO.py
new file mode 100644
index 0000000..50b881d
--- /dev/null
+++ b/code/lib/Bio/SeqIO/UniprotIO.py
@@ -0,0 +1,561 @@
+# Copyright 2010 by Andrea Pierleoni
+# Revisions copyright 2010, 2016 by Peter Cock
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the "uniprot-xml" file format.
+
+See Also:
+http://www.uniprot.org
+
+The UniProt XML format essentially replaces the old plain text file format
+originally introduced by SwissProt ("swiss" format in Bio.SeqIO).
+
+"""
+from xml.etree import ElementTree
+from xml.parsers.expat import errors
+
+from Bio import SeqFeature
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+
+NS = "{http://uniprot.org/uniprot}"
+REFERENCE_JOURNAL = "%(name)s %(volume)s:%(first)s-%(last)s(%(pub_date)s)"
+
+
+def UniprotIterator(source, alphabet=None, return_raw_comments=False):
+    """Iterate over UniProt XML as SeqRecord objects.
+
+    parses an XML entry at a time from any UniProt XML file
+    returns a SeqRecord for each iteration
+
+    This generator can be used in Bio.SeqIO
+
+    Argument source is a file-like object or a path to a file.
+
+    Optional argument alphabet should not be used anymore.
+
+    return_raw_comments = True --> comment fields are returned as complete XML to allow further processing
+    skip_parsing_errors = True --> if parsing errors are found, skip to next entry
+    """
+    if alphabet is not None:
+        raise ValueError("The alphabet argument is no longer supported")
+    try:
+        for event, elem in ElementTree.iterparse(source, events=("start", "end")):
+            if event == "end" and elem.tag == NS + "entry":
+                yield Parser(elem, return_raw_comments=return_raw_comments).parse()
+                elem.clear()
+    except ElementTree.ParseError as exception:
+        if errors.messages[exception.code] == errors.XML_ERROR_NO_ELEMENTS:
+            assert exception.position == (1, 0)  # line 1, column 0
+            raise ValueError("Empty file.") from None
+        else:
+            raise
+
+
+class Parser:
+    """Parse a UniProt XML entry to a SeqRecord.
+
+    Optional argument alphabet is no longer used.
+
+    return_raw_comments=True to get back the complete comment field in XML format
+    """
+
+    def __init__(self, elem, alphabet=None, return_raw_comments=False):
+        """Initialize the class."""
+        if alphabet is not None:
+            raise ValueError("The alphabet argument is no longer supported")
+        self.entry = elem
+        self.return_raw_comments = return_raw_comments
+
+    def parse(self):
+        """Parse the input."""
+        assert self.entry.tag == NS + "entry"
+
+        def append_to_annotations(key, value):
+            if key not in self.ParsedSeqRecord.annotations:
+                self.ParsedSeqRecord.annotations[key] = []
+            if value not in self.ParsedSeqRecord.annotations[key]:
+                self.ParsedSeqRecord.annotations[key].append(value)
+
+        def _parse_name(element):
+            self.ParsedSeqRecord.name = element.text
+            self.ParsedSeqRecord.dbxrefs.append(self.dbname + ":" + element.text)
+
+        def _parse_accession(element):
+            append_to_annotations(
+                "accessions", element.text
+            )  # to cope with SwissProt plain text parser
+            self.ParsedSeqRecord.dbxrefs.append(self.dbname + ":" + element.text)
+
+        def _parse_protein(element):
+            """Parse protein names (PRIVATE)."""
+            descr_set = False
+            for protein_element in element:
+                if protein_element.tag in [
+                    NS + "recommendedName",
+                    NS + "submittedName",
+                    NS + "alternativeName",
+                ]:  # recommendedName tag are parsed before
+                    # use protein fields for name and description
+                    for rec_name in protein_element:
+                        ann_key = "%s_%s" % (
+                            protein_element.tag.replace(NS, ""),
+                            rec_name.tag.replace(NS, ""),
+                        )
+                        append_to_annotations(ann_key, rec_name.text)
+                        if (rec_name.tag == NS + "fullName") and not descr_set:
+                            self.ParsedSeqRecord.description = rec_name.text
+                            descr_set = True
+                elif protein_element.tag == NS + "component":
+                    pass  # not parsed
+                elif protein_element.tag == NS + "domain":
+                    pass  # not parsed
+
+        def _parse_gene(element):
+            for genename_element in element:
+                if "type" in genename_element.attrib:
+                    ann_key = "gene_%s_%s" % (
+                        genename_element.tag.replace(NS, ""),
+                        genename_element.attrib["type"],
+                    )
+                    if genename_element.attrib["type"] == "primary":
+                        self.ParsedSeqRecord.annotations[
+                            ann_key
+                        ] = genename_element.text
+                    else:
+                        append_to_annotations(ann_key, genename_element.text)
+
+        def _parse_geneLocation(element):
+            append_to_annotations("geneLocation", element.attrib["type"])
+
+        def _parse_organism(element):
+            organism_name = com_name = sci_name = ""
+            for organism_element in element:
+                if organism_element.tag == NS + "name":
+                    if organism_element.text:
+                        if organism_element.attrib["type"] == "scientific":
+                            sci_name = organism_element.text
+                        elif organism_element.attrib["type"] == "common":
+                            com_name = organism_element.text
+                        else:
+                            # e.g. synonym
+                            append_to_annotations(
+                                "organism_name", organism_element.text
+                            )
+                elif organism_element.tag == NS + "dbReference":
+                    self.ParsedSeqRecord.dbxrefs.append(
+                        organism_element.attrib["type"]
+                        + ":"
+                        + organism_element.attrib["id"]
+                    )
+                elif organism_element.tag == NS + "lineage":
+                    for taxon_element in organism_element:
+                        if taxon_element.tag == NS + "taxon":
+                            append_to_annotations("taxonomy", taxon_element.text)
+            if sci_name and com_name:
+                organism_name = "%s (%s)" % (sci_name, com_name)
+            elif sci_name:
+                organism_name = sci_name
+            elif com_name:
+                organism_name = com_name
+            self.ParsedSeqRecord.annotations["organism"] = organism_name
+
+        def _parse_organismHost(element):
+            for organism_element in element:
+                if organism_element.tag == NS + "name":
+                    append_to_annotations("organism_host", organism_element.text)
+
+        def _parse_keyword(element):
+            append_to_annotations("keywords", element.text)
+
+        def _parse_comment(element):
+            """Parse comments (PRIVATE).
+
+            Comment fields are very heterogeneus. each type has his own (frequently mutated) schema.
+            To store all the contained data, more complex data structures are needed, such as
+            annotated dictionaries. This is left to end user, by optionally setting:
+
+            return_raw_comments=True
+
+            The original XML is returned in the annotation fields.
+
+            Available comment types at december 2009:
+             - "allergen"
+             - "alternative products"
+             - "biotechnology"
+             - "biophysicochemical properties"
+             - "catalytic activity"
+             - "caution"
+             - "cofactor"
+             - "developmental stage"
+             - "disease"
+             - "domain"
+             - "disruption phenotype"
+             - "enzyme regulation"
+             - "function"
+             - "induction"
+             - "miscellaneous"
+             - "pathway"
+             - "pharmaceutical"
+             - "polymorphism"
+             - "PTM"
+             - "RNA editing"
+             - "similarity"
+             - "subcellular location"
+             - "sequence caution"
+             - "subunit"
+             - "tissue specificity"
+             - "toxic dose"
+             - "online information"
+             - "mass spectrometry"
+             - "interaction"
+
+            """
+            simple_comments = [
+                "allergen",
+                "biotechnology",
+                "biophysicochemical properties",
+                "catalytic activity",
+                "caution",
+                "cofactor",
+                "developmental stage",
+                "disease",
+                "domain",
+                "disruption phenotype",
+                "enzyme regulation",
+                "function",
+                "induction",
+                "miscellaneous",
+                "pathway",
+                "pharmaceutical",
+                "polymorphism",
+                "PTM",
+                "RNA editing",  # positions not parsed
+                "similarity",
+                "subunit",
+                "tissue specificity",
+                "toxic dose",
+            ]
+
+            if element.attrib["type"] in simple_comments:
+                ann_key = "comment_%s" % element.attrib["type"].replace(" ", "")
+                for text_element in element.iter(NS + "text"):
+                    if text_element.text:
+                        append_to_annotations(ann_key, text_element.text)
+            elif element.attrib["type"] == "subcellular location":
+                for subloc_element in element.iter(NS + "subcellularLocation"):
+                    for el in subloc_element:
+                        if el.text:
+                            ann_key = "comment_%s_%s" % (
+                                element.attrib["type"].replace(" ", ""),
+                                el.tag.replace(NS, ""),
+                            )
+                            append_to_annotations(ann_key, el.text)
+            elif element.attrib["type"] == "interaction":
+                for interact_element in element.iter(NS + "interactant"):
+                    ann_key = "comment_%s_intactId" % element.attrib["type"]
+                    append_to_annotations(ann_key, interact_element.attrib["intactId"])
+            elif element.attrib["type"] == "alternative products":
+                for alt_element in element.iter(NS + "isoform"):
+                    ann_key = "comment_%s_isoform" % element.attrib["type"].replace(
+                        " ", ""
+                    )
+                    for id_element in alt_element.iter(NS + "id"):
+                        append_to_annotations(ann_key, id_element.text)
+            elif element.attrib["type"] == "mass spectrometry":
+                ann_key = "comment_%s" % element.attrib["type"].replace(" ", "")
+                start = end = 0
+                for el in element.iter(NS + "location"):
+                    pos_els = list(el.iter(NS + "position"))
+                    # this try should be avoided, maybe it is safer to skip position parsing for mass spectrometry
+                    try:
+                        if pos_els:
+                            end = int(pos_els[0].attrib["position"])
+                            start = end - 1
+                        else:
+                            start = int(next(el.iter(NS + "begin")).attrib["position"])
+                            start -= 1
+                            end = int(next(el.iter(NS + "end")).attrib["position"])
+                    except (ValueError, KeyError):
+                        # undefined positions or erroneously mapped
+                        pass
+                mass = element.attrib["mass"]
+                method = element.attrib["method"]
+                if start == end == 0:
+                    append_to_annotations(ann_key, "undefined:%s|%s" % (mass, method))
+                else:
+                    append_to_annotations(
+                        ann_key, "%s..%s:%s|%s" % (start, end, mass, method)
+                    )
+            elif element.attrib["type"] == "sequence caution":
+                pass  # not parsed: few information, complex structure
+            elif element.attrib["type"] == "online information":
+                for link_element in element.iter(NS + "link"):
+                    ann_key = "comment_%s" % element.attrib["type"].replace(" ", "")
+                    for id_element in link_element.iter(NS + "link"):
+                        append_to_annotations(
+                            ann_key,
+                            "%s@%s"
+                            % (element.attrib["name"], link_element.attrib["uri"]),
+                        )
+
+            # return raw XML comments if needed
+            if self.return_raw_comments:
+                ann_key = "comment_%s_xml" % element.attrib["type"].replace(" ", "")
+                append_to_annotations(ann_key, ElementTree.tostring(element))
+
+        def _parse_dbReference(element):
+            self.ParsedSeqRecord.dbxrefs.append(
+                element.attrib["type"] + ":" + element.attrib["id"]
+            )
+            # e.g.
+            # <dbReference type="PDB" key="11" id="2GEZ">
+            #   <property value="X-ray" type="method"/>
+            #   <property value="2.60 A" type="resolution"/>
+            #   <property value="A/C/E/G=1-192, B/D/F/H=193-325" type="chains"/>
+            # </dbReference>
+            if "type" in element.attrib:
+                if element.attrib["type"] == "PDB":
+                    method = ""
+                    resolution = ""
+                    for ref_element in element:
+                        if ref_element.tag == NS + "property":
+                            dat_type = ref_element.attrib["type"]
+                            if dat_type == "method":
+                                method = ref_element.attrib["value"]
+                            if dat_type == "resolution":
+                                resolution = ref_element.attrib["value"]
+                            if dat_type == "chains":
+                                pairs = ref_element.attrib["value"].split(",")
+                                for elem in pairs:
+                                    pair = elem.strip().split("=")
+                                    if pair[1] != "-":
+                                        # TODO - How best to store these, do SeqFeatures make sense?
+                                        feature = SeqFeature.SeqFeature()
+                                        feature.type = element.attrib["type"]
+                                        feature.qualifiers["name"] = element.attrib[
+                                            "id"
+                                        ]
+                                        feature.qualifiers["method"] = method
+                                        feature.qualifiers["resolution"] = resolution
+                                        feature.qualifiers["chains"] = pair[0].split(
+                                            "/"
+                                        )
+                                        start = int(pair[1].split("-")[0]) - 1
+                                        end = int(pair[1].split("-")[1])
+                                        feature.location = SeqFeature.FeatureLocation(
+                                            start, end
+                                        )
+                                        # self.ParsedSeqRecord.features.append(feature)
+
+            for ref_element in element:
+                if ref_element.tag == NS + "property":
+                    pass  # this data cannot be fitted in a seqrecord object with a simple list. however at least ensembl and EMBL parsing can be improved to add entries in dbxrefs
+
+        def _parse_reference(element):
+            reference = SeqFeature.Reference()
+            authors = []
+            scopes = []
+            tissues = []
+            journal_name = ""
+            pub_type = ""
+            pub_date = ""
+            for ref_element in element:
+                if ref_element.tag == NS + "citation":
+                    pub_type = ref_element.attrib["type"]
+                    if pub_type == "submission":
+                        pub_type += " to the " + ref_element.attrib["db"]
+                    if "name" in ref_element.attrib:
+                        journal_name = ref_element.attrib["name"]
+                    pub_date = ref_element.attrib.get("date", "")
+                    j_volume = ref_element.attrib.get("volume", "")
+                    j_first = ref_element.attrib.get("first", "")
+                    j_last = ref_element.attrib.get("last", "")
+                    for cit_element in ref_element:
+                        if cit_element.tag == NS + "title":
+                            reference.title = cit_element.text
+                        elif cit_element.tag == NS + "authorList":
+                            for person_element in cit_element:
+                                authors.append(person_element.attrib["name"])
+                        elif cit_element.tag == NS + "dbReference":
+                            self.ParsedSeqRecord.dbxrefs.append(
+                                cit_element.attrib["type"]
+                                + ":"
+                                + cit_element.attrib["id"]
+                            )
+                            if cit_element.attrib["type"] == "PubMed":
+                                reference.pubmed_id = cit_element.attrib["id"]
+                            elif ref_element.attrib["type"] == "MEDLINE":
+                                reference.medline_id = cit_element.attrib["id"]
+                elif ref_element.tag == NS + "scope":
+                    scopes.append(ref_element.text)
+                elif ref_element.tag == NS + "source":
+                    for source_element in ref_element:
+                        if source_element.tag == NS + "tissue":
+                            tissues.append(source_element.text)
+            if scopes:
+                scopes_str = "Scope: " + ", ".join(scopes)
+            else:
+                scopes_str = ""
+            if tissues:
+                tissues_str = "Tissue: " + ", ".join(tissues)
+            else:
+                tissues_str = ""
+
+            # locations cannot be parsed since they are actually written in
+            # free text inside scopes so all the references are put in the
+            # annotation.
+            reference.location = []
+            reference.authors = ", ".join(authors)
+            if journal_name:
+                if pub_date and j_volume and j_first and j_last:
+                    reference.journal = REFERENCE_JOURNAL % {
+                        "name": journal_name,
+                        "volume": j_volume,
+                        "first": j_first,
+                        "last": j_last,
+                        "pub_date": pub_date,
+                    }
+                else:
+                    reference.journal = journal_name
+            reference.comment = " | ".join(
+                (pub_type, pub_date, scopes_str, tissues_str)
+            )
+            append_to_annotations("references", reference)
+
+        def _parse_position(element, offset=0):
+            try:
+                position = int(element.attrib["position"]) + offset
+            except KeyError:
+                position = None
+            status = element.attrib.get("status", "")
+            if status == "unknown":
+                assert position is None
+                return SeqFeature.UnknownPosition()
+            elif not status:
+                return SeqFeature.ExactPosition(position)
+            elif status == "greater than":
+                return SeqFeature.AfterPosition(position)
+            elif status == "less than":
+                return SeqFeature.BeforePosition(position)
+            elif status == "uncertain":
+                return SeqFeature.UncertainPosition(position)
+            else:
+                raise NotImplementedError("Position status %r" % status)
+
+        def _parse_feature(element):
+            feature = SeqFeature.SeqFeature()
+            for k, v in element.attrib.items():
+                feature.qualifiers[k] = v
+            feature.type = element.attrib.get("type", "")
+            if "id" in element.attrib:
+                feature.id = element.attrib["id"]
+            for feature_element in element:
+                if feature_element.tag == NS + "location":
+                    position_elements = feature_element.findall(NS + "position")
+                    if position_elements:
+                        element = position_elements[0]
+                        start_position = _parse_position(element, -1)
+                        end_position = _parse_position(element)
+                    else:
+                        element = feature_element.findall(NS + "begin")[0]
+                        start_position = _parse_position(element, -1)
+                        element = feature_element.findall(NS + "end")[0]
+                        end_position = _parse_position(element)
+                    feature.location = SeqFeature.FeatureLocation(
+                        start_position, end_position
+                    )
+                else:
+                    try:
+                        feature.qualifiers[
+                            feature_element.tag.replace(NS, "")
+                        ] = feature_element.text
+                    except Exception:  # TODO - Which exceptions?
+                        pass  # skip unparsable tag
+            self.ParsedSeqRecord.features.append(feature)
+
+        def _parse_proteinExistence(element):
+            append_to_annotations("proteinExistence", element.attrib["type"])
+
+        def _parse_evidence(element):
+            for k, v in element.attrib.items():
+                ann_key = k
+                append_to_annotations(ann_key, v)
+
+        def _parse_sequence(element):
+            for k, v in element.attrib.items():
+                if k in ("length", "mass", "version"):
+                    self.ParsedSeqRecord.annotations["sequence_%s" % k] = int(v)
+                else:
+                    self.ParsedSeqRecord.annotations["sequence_%s" % k] = v
+            self.ParsedSeqRecord.seq = Seq("".join(element.text.split()))
+            self.ParsedSeqRecord.annotations["molecule_type"] = "protein"
+
+        # ============================================#
+        # Initialize SeqRecord
+        self.ParsedSeqRecord = SeqRecord("", id="")
+
+        # Entry attribs parsing
+        # Unknown dataset should not happen!
+        self.dbname = self.entry.attrib.get("dataset", "UnknownDataset")
+        # add attribs to annotations
+        for k, v in self.entry.attrib.items():
+            if k in ("version"):
+                # original
+                # self.ParsedSeqRecord.annotations["entry_%s" % k] = int(v)
+                # To cope with swissProt plain text parser. this can cause errors
+                # if the attrib has the same name of an other annotation
+                self.ParsedSeqRecord.annotations[k] = int(v)
+            else:
+                # self.ParsedSeqRecord.annotations["entry_%s" % k] = v
+                # to cope with swissProt plain text parser:
+                self.ParsedSeqRecord.annotations[k] = v
+
+        # Top-to-bottom entry children parsing
+        for element in self.entry:
+            if element.tag == NS + "name":
+                _parse_name(element)
+            elif element.tag == NS + "accession":
+                _parse_accession(element)
+            elif element.tag == NS + "protein":
+                _parse_protein(element)
+            elif element.tag == NS + "gene":
+                _parse_gene(element)
+            elif element.tag == NS + "geneLocation":
+                _parse_geneLocation(element)
+            elif element.tag == NS + "organism":
+                _parse_organism(element)
+            elif element.tag == NS + "organismHost":
+                _parse_organismHost(element)
+            elif element.tag == NS + "keyword":
+                _parse_keyword(element)
+            elif element.tag == NS + "comment":
+                _parse_comment(element)
+            elif element.tag == NS + "dbReference":
+                _parse_dbReference(element)
+            elif element.tag == NS + "reference":
+                _parse_reference(element)
+            elif element.tag == NS + "feature":
+                _parse_feature(element)
+            elif element.tag == NS + "proteinExistence":
+                _parse_proteinExistence(element)
+            elif element.tag == NS + "evidence":
+                _parse_evidence(element)
+            elif element.tag == NS + "sequence":
+                _parse_sequence(element)
+            else:
+                pass
+
+        # remove duplicate dbxrefs
+        self.ParsedSeqRecord.dbxrefs = sorted(set(self.ParsedSeqRecord.dbxrefs))
+
+        # use first accession as id
+        if not self.ParsedSeqRecord.id:
+            self.ParsedSeqRecord.id = self.ParsedSeqRecord.annotations["accessions"][0]
+
+        return self.ParsedSeqRecord
diff --git a/code/lib/Bio/SeqIO/XdnaIO.py b/code/lib/Bio/SeqIO/XdnaIO.py
new file mode 100644
index 0000000..74ade5e
--- /dev/null
+++ b/code/lib/Bio/SeqIO/XdnaIO.py
@@ -0,0 +1,366 @@
+# Copyright 2017-2019 Damien Goutte-Gattat.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Bio.SeqIO support for the "xdna" file format.
+
+The Xdna binary format is generated by Christian Marck's DNA Strider program
+and also used by Serial Cloner.
+"""
+import warnings
+
+from re import match
+from struct import pack
+from struct import unpack
+
+from Bio import BiopythonWarning
+from Bio.Seq import Seq
+from Bio.SeqFeature import ExactPosition
+from Bio.SeqFeature import FeatureLocation
+from Bio.SeqFeature import SeqFeature
+from Bio.SeqRecord import SeqRecord
+
+from .Interfaces import SequenceIterator
+from .Interfaces import SequenceWriter
+
+
+_seq_types = {
+    0: None,
+    1: "DNA",
+    2: "DNA",
+    3: "RNA",
+    4: "protein",
+}
+
+_seq_topologies = {0: "linear", 1: "circular"}
+
+
+def _read(handle, length):
+    """Read the specified number of bytes from the given handle."""
+    data = handle.read(length)
+    if len(data) < length:
+        raise ValueError("Cannot read %d bytes from handle" % length)
+    return data
+
+
+def _read_pstring(handle):
+    """Read a Pascal string.
+
+    A Pascal string comprises a single byte giving the length of the string
+    followed by as many bytes.
+    """
+    length = unpack(">B", _read(handle, 1))[0]
+    return unpack("%ds" % length, _read(handle, length))[0].decode("ASCII")
+
+
+def _read_pstring_as_integer(handle):
+    return int(_read_pstring(handle))
+
+
+def _read_overhang(handle):
+    """Read an overhang specification.
+
+    An overhang is represented in a XDNA file as:
+      - a Pascal string containing the text representation of the overhang
+        length, which also indicates the nature of the overhang:
+        - a length of zero means no overhang,
+        - a negative length means a 3' overhang,
+        - a positive length means a 5' overhang;
+      - the actual overhang sequence.
+
+    Examples:
+      - 0x01 0x30: no overhang ("0", as a P-string)
+      - 0x01 0x32 0x41 0x41: 5' AA overhang (P-string "2", then "AA")
+      - 0x02 0x2D 0x31 0x43: 3' C overhang (P-string "-1", then "C")
+
+    Returns a tuple (length, sequence).
+
+    """
+    length = _read_pstring_as_integer(handle)
+    if length != 0:
+        overhang = _read(handle, abs(length))
+        return (length, overhang)
+    else:
+        return (None, None)
+
+
+def _parse_feature_description(desc, qualifiers):
+    """Parse the description field of a Xdna feature.
+
+    The 'description' field of a feature sometimes contains several
+    GenBank-like qualifiers, separated by carriage returns (CR, 0x0D).
+    """
+    # Split the field's value in CR-separated lines, skipping empty lines
+    for line in [x for x in desc.split("\x0D") if len(x) > 0]:
+        # Is it a qualifier="value" line?
+        m = match('^([^=]+)="([^"]+)"?$', line)
+        if m:
+            # Store the qualifier as provided
+            qual, value = m.groups()
+            qualifiers[qual] = [value]
+        elif '"' not in line:  # Reject ill-formed qualifiers
+            # Store the entire line as a generic note qualifier
+            qualifiers["note"] = [line]
+
+
+def _read_feature(handle, record):
+    """Read a single sequence feature."""
+    name = _read_pstring(handle)
+    desc = _read_pstring(handle)
+    type = _read_pstring(handle) or "misc_feature"
+    start = _read_pstring_as_integer(handle)
+    end = _read_pstring_as_integer(handle)
+
+    # Feature flags (4 bytes):
+    # byte 1 is the strand (0: reverse strand, 1: forward strand);
+    # byte 2 tells whether to display the feature;
+    # byte 4 tells whether to draw an arrow when displaying the feature;
+    # meaning of byte 3 is unknown.
+    (forward, display, arrow) = unpack(">BBxB", _read(handle, 4))
+    if forward:
+        strand = 1
+    else:
+        strand = -1
+        start, end = end, start
+
+    # The last field is a Pascal string usually containing a
+    # comma-separated triplet of numbers ranging from 0 to 255.
+    # I suspect this represents the RGB color to use when displaying
+    # the feature. Skip it as we have no need for it.
+    _read_pstring(handle)
+
+    # Assemble the feature
+    # Shift start by -1 as XDNA feature coordinates are 1-based
+    # while Biopython uses 0-based couting.
+    location = FeatureLocation(start - 1, end, strand=strand)
+    qualifiers = {}
+    if name:
+        qualifiers["label"] = [name]
+    _parse_feature_description(desc, qualifiers)
+    feature = SeqFeature(location, type=type, qualifiers=qualifiers)
+    record.features.append(feature)
+
+
+class XdnaIterator(SequenceIterator):
+    """Parser for Xdna files."""
+
+    def __init__(self, source):
+        """Parse a Xdna file and return a SeqRecord object.
+
+        Argument source is a file-like object in binary mode or a path to a file.
+
+        Note that this is an "iterator" in name only since an Xdna file always
+        contain a single sequence.
+
+        """
+        super().__init__(source, mode="b", fmt="Xdna")
+
+    def parse(self, handle):
+        """Start parsing the file, and return a SeqRecord generator."""
+        # Parse fixed-size header and do some rudimentary checks
+        #
+        # The "neg_length" value is the length of the part of the sequence
+        # before the nucleotide considered as the "origin" (nucleotide number 1,
+        # which in DNA Strider is not always the first nucleotide).
+        # Biopython's SeqRecord has no such concept of a sequence origin as far
+        # as I know, so we ignore that value. SerialCloner has no such concept
+        # either and always generates files with a neg_length of zero.
+        header = handle.read(112)
+        if not header:
+            raise ValueError("Empty file.")
+        if len(header) < 112:
+            raise ValueError("Improper header, cannot read 112 bytes from handle")
+        records = self.iterate(handle, header)
+        return records
+
+    def iterate(self, handle, header):
+        """Parse the file and generate SeqRecord objects."""
+        (version, seq_type, topology, length, neg_length, com_length) = unpack(
+            ">BBB25xII60xI12x", header
+        )
+        if version != 0:
+            raise ValueError("Unsupported XDNA version")
+        if seq_type not in _seq_types:
+            raise ValueError("Unknown sequence type")
+        # Read actual sequence and comment found in all XDNA files
+        sequence = _read(handle, length).decode("ASCII")
+        comment = _read(handle, com_length).decode("ASCII")
+
+        # Try to derive a name from the first "word" of the comment
+        name = comment.split(" ")[0]
+
+        # Create record object
+        record = SeqRecord(Seq(sequence), description=comment, name=name, id=name)
+        if _seq_types[seq_type]:
+            record.annotations["molecule_type"] = _seq_types[seq_type]
+
+        if topology in _seq_topologies:
+            record.annotations["topology"] = _seq_topologies[topology]
+
+        if len(handle.read(1)) == 1:
+            # This is an XDNA file with an optional annotation section.
+
+            # Skip the overhangs as I don't know how to represent
+            # them in the SeqRecord model.
+            _read_overhang(handle)  # right-side overhang
+            _read_overhang(handle)  # left-side overhang
+
+            # Read the features
+            num_features = unpack(">B", _read(handle, 1))[0]
+            while num_features > 0:
+                _read_feature(handle, record)
+                num_features -= 1
+
+        yield record
+
+
+class XdnaWriter(SequenceWriter):
+    """Write files in the Xdna format."""
+
+    def __init__(self, target):
+        """Initialize an Xdna writer object.
+
+        Arguments:
+         - target - Output stream opened in binary mode, or a path to a file.
+
+        """
+        super().__init__(target, mode="wb")
+
+    def write_file(self, records):
+        """Write the specified record to a Xdna file.
+
+        Note that the function expects a list (or iterable) of records
+        as per the SequenceWriter interface, but the list should contain
+        only one record as the Xdna format is a mono-record format.
+        """
+        records = iter(records)
+
+        try:
+            record = next(records)
+        except StopIteration:
+            raise ValueError("Must have one sequence") from None
+
+        try:
+            next(records)
+            raise ValueError("More than one sequence found")
+        except StopIteration:
+            pass
+
+        self._has_truncated_strings = False
+
+        molecule_type = record.annotations.get("molecule_type")
+        if molecule_type is None:
+            seqtype = 0
+        elif "DNA" in molecule_type:
+            seqtype = 1
+        elif "RNA" in molecule_type:
+            seqtype = 3
+        elif "protein" in molecule_type:
+            seqtype = 4
+        else:
+            seqtype = 0
+
+        if record.annotations.get("topology", "linear") == "circular":
+            topology = 1
+        else:
+            topology = 0
+
+        # We store the record's id and description in the comment field.
+        # Make sure to avoid duplicating the id if it is already
+        # contained in the description.
+        if record.description.startswith(record.id):
+            comment = record.description
+        else:
+            comment = f"{record.id} {record.description}"
+
+        # Write header
+        self.handle.write(
+            pack(
+                ">BBB25xII60xI11xB",
+                0,  # version
+                seqtype,
+                topology,
+                len(record),
+                0,  # negative length
+                len(comment),
+                255,  # end of header
+            )
+        )
+
+        # Actual sequence and comment
+        self.handle.write(bytes(record.seq))
+        self.handle.write(comment.encode("ASCII"))
+
+        self.handle.write(pack(">B", 0))  # Annotation section marker
+        self._write_pstring("0")  # right-side overhang
+        self._write_pstring("0")  # left-side overhand
+
+        # Write features
+        # We must skip features with fuzzy locations as they cannot be
+        # represented in the Xdna format
+        features = [
+            f
+            for f in record.features
+            if type(f.location.start) == ExactPosition
+            and type(f.location.end) == ExactPosition
+        ]
+        drop = len(record.features) - len(features)
+        if drop > 0:
+            warnings.warn(
+                f"Dropping {drop} features with fuzzy locations", BiopythonWarning
+            )
+
+        # We also cannot store more than 255 features as the number of
+        # features is stored on a single byte...
+        if len(features) > 255:
+            drop = len(features) - 255
+            warnings.warn(
+                f"Too many features, dropping the last {drop}", BiopythonWarning
+            )
+            features = features[:255]
+
+        self.handle.write(pack(">B", len(features)))
+        for feature in features:
+            self._write_pstring(feature.qualifiers.get("label", [""])[0])
+
+            description = ""
+            for qname in feature.qualifiers:
+                if qname in ("label", "translation"):
+                    continue
+
+                for val in feature.qualifiers[qname]:
+                    if len(description) > 0:
+                        description = description + "\x0D"
+                    description = description + '%s="%s"' % (qname, val)
+            self._write_pstring(description)
+
+            self._write_pstring(feature.type)
+
+            start = feature.location.start.position + 1  # 1-based coordinates
+            end = feature.location.end.position
+            strand = 1
+            if feature.location.strand == -1:
+                start, end = end, start
+                strand = 0
+            self._write_pstring(str(start))
+            self._write_pstring(str(end))
+
+            self.handle.write(pack(">BBBB", strand, 1, 0, 1))
+            self._write_pstring("127,127,127")
+
+        if self._has_truncated_strings:
+            warnings.warn(
+                "Some annotations were truncated to 255 characters", BiopythonWarning
+            )
+
+        return 1
+
+    def _write_pstring(self, s):
+        """Write the given string as a Pascal string."""
+        if len(s) > 255:
+            self._has_truncated_strings = True
+            s = s[:255]
+        self.handle.write(pack(">B", len(s)))
+        self.handle.write(s.encode("ASCII"))
diff --git a/code/lib/Bio/SeqIO/__init__.py b/code/lib/Bio/SeqIO/__init__.py
new file mode 100644
index 0000000..7872ffe
--- /dev/null
+++ b/code/lib/Bio/SeqIO/__init__.py
@@ -0,0 +1,1092 @@
+# Copyright 2006-2018 by Peter Cock.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+r"""Sequence input/output as SeqRecord objects.
+
+Bio.SeqIO is also documented at SeqIO_ and by a whole chapter in our tutorial:
+
+  - `HTML Tutorial`_
+  - `PDF Tutorial`_
+
+.. _SeqIO: http://biopython.org/wiki/SeqIO
+.. _`HTML Tutorial`: http://biopython.org/DIST/docs/tutorial/Tutorial.html
+.. _`PDF Tutorial`: http://biopython.org/DIST/docs/tutorial/Tutorial.pdf
+
+Input
+-----
+The main function is Bio.SeqIO.parse(...) which takes an input file handle
+(or in recent versions of Biopython alternatively a filename as a string),
+and format string.  This returns an iterator giving SeqRecord objects:
+
+>>> from Bio import SeqIO
+>>> for record in SeqIO.parse("Fasta/f002", "fasta"):
+...     print("%s %i" % (record.id, len(record)))
+gi|1348912|gb|G26680|G26680 633
+gi|1348917|gb|G26685|G26685 413
+gi|1592936|gb|G29385|G29385 471
+
+Note that the parse() function will invoke the relevant parser for the
+format with its default settings.  You may want more control, in which case
+you need to create a format specific sequence iterator directly.
+
+Some of these parsers are wrappers around low-level parsers which build up
+SeqRecord objects for the consistent SeqIO interface. In cases where the
+run-time is critical, such as large FASTA or FASTQ files, calling these
+underlying parsers will be much faster - in this case these generator
+functions which return tuples of strings:
+
+>>> from Bio.SeqIO.FastaIO import SimpleFastaParser
+>>> from Bio.SeqIO.QualityIO import FastqGeneralIterator
+
+
+Input - Single Records
+----------------------
+If you expect your file to contain one-and-only-one record, then we provide
+the following 'helper' function which will return a single SeqRecord, or
+raise an exception if there are no records or more than one record:
+
+>>> from Bio import SeqIO
+>>> record = SeqIO.read("Fasta/f001", "fasta")
+>>> print("%s %i" % (record.id, len(record)))
+gi|3318709|pdb|1A91| 79
+
+This style is useful when you expect a single record only (and would
+consider multiple records an error).  For example, when dealing with GenBank
+files for bacterial genomes or chromosomes, there is normally only a single
+record.  Alternatively, use this with a handle when downloading a single
+record from the internet.
+
+However, if you just want the first record from a file containing multiple
+record, use the next() function on the iterator:
+
+>>> from Bio import SeqIO
+>>> record = next(SeqIO.parse("Fasta/f002", "fasta"))
+>>> print("%s %i" % (record.id, len(record)))
+gi|1348912|gb|G26680|G26680 633
+
+The above code will work as long as the file contains at least one record.
+Note that if there is more than one record, the remaining records will be
+silently ignored.
+
+
+Input - Multiple Records
+------------------------
+For non-interlaced files (e.g. Fasta, GenBank, EMBL) with multiple records
+using a sequence iterator can save you a lot of memory (RAM).  There is
+less benefit for interlaced file formats (e.g. most multiple alignment file
+formats).  However, an iterator only lets you access the records one by one.
+
+If you want random access to the records by number, turn this into a list:
+
+>>> from Bio import SeqIO
+>>> records = list(SeqIO.parse("Fasta/f002", "fasta"))
+>>> len(records)
+3
+>>> print(records[1].id)
+gi|1348917|gb|G26685|G26685
+
+If you want random access to the records by a key such as the record id,
+turn the iterator into a dictionary:
+
+>>> from Bio import SeqIO
+>>> record_dict = SeqIO.to_dict(SeqIO.parse("Fasta/f002", "fasta"))
+>>> len(record_dict)
+3
+>>> print(len(record_dict["gi|1348917|gb|G26685|G26685"]))
+413
+
+However, using list() or the to_dict() function will load all the records
+into memory at once, and therefore is not possible on very large files.
+Instead, for *some* file formats Bio.SeqIO provides an indexing approach
+providing dictionary like access to any record. For example,
+
+>>> from Bio import SeqIO
+>>> record_dict = SeqIO.index("Fasta/f002", "fasta")
+>>> len(record_dict)
+3
+>>> print(len(record_dict["gi|1348917|gb|G26685|G26685"]))
+413
+>>> record_dict.close()
+
+Many but not all of the supported input file formats can be indexed like
+this. For example "fasta", "fastq", "qual" and even the binary format "sff"
+work, but alignment formats like "phylip", "clustalw" and "nexus" will not.
+
+In most cases you can also use SeqIO.index to get the record from the file
+as a raw string (not a SeqRecord). This can be useful for example to extract
+a sub-set of records from a file where SeqIO cannot output the file format
+(e.g. the plain text SwissProt format, "swiss") or where it is important to
+keep the output 100% identical to the input). For example,
+
+>>> from Bio import SeqIO
+>>> record_dict = SeqIO.index("Fasta/f002", "fasta")
+>>> len(record_dict)
+3
+>>> print(record_dict.get_raw("gi|1348917|gb|G26685|G26685").decode())
+>gi|1348917|gb|G26685|G26685 human STS STS_D11734.
+CGGAGCCAGCGAGCATATGCTGCATGAGGACCTTTCTATCTTACATTATGGCTGGGAATCTTACTCTTTC
+ATCTGATACCTTGTTCAGATTTCAAAATAGTTGTAGCCTTATCCTGGTTTTACAGATGTGAAACTTTCAA
+GAGATTTACTGACTTTCCTAGAATAGTTTCTCTACTGGAAACCTGATGCTTTTATAAGCCATTGTGATTA
+GGATGACTGTTACAGGCTTAGCTTTGTGTGAAANCCAGTCACCTTTCTCCTAGGTAATGAGTAGTGCTGT
+TCATATTACTNTAAGTTCTATAGCATACTTGCNATCCTTTANCCATGCTTATCATANGTACCATTTGAGG
+AATTGNTTTGCCCTTTTGGGTTTNTTNTTGGTAAANNNTTCCCGGGTGGGGGNGGTNNNGAAA
+<BLANKLINE>
+>>> print(record_dict["gi|1348917|gb|G26685|G26685"].format("fasta"))
+>gi|1348917|gb|G26685|G26685 human STS STS_D11734.
+CGGAGCCAGCGAGCATATGCTGCATGAGGACCTTTCTATCTTACATTATGGCTGGGAATC
+TTACTCTTTCATCTGATACCTTGTTCAGATTTCAAAATAGTTGTAGCCTTATCCTGGTTT
+TACAGATGTGAAACTTTCAAGAGATTTACTGACTTTCCTAGAATAGTTTCTCTACTGGAA
+ACCTGATGCTTTTATAAGCCATTGTGATTAGGATGACTGTTACAGGCTTAGCTTTGTGTG
+AAANCCAGTCACCTTTCTCCTAGGTAATGAGTAGTGCTGTTCATATTACTNTAAGTTCTA
+TAGCATACTTGCNATCCTTTANCCATGCTTATCATANGTACCATTTGAGGAATTGNTTTG
+CCCTTTTGGGTTTNTTNTTGGTAAANNNTTCCCGGGTGGGGGNGGTNNNGAAA
+<BLANKLINE>
+>>> record_dict.close()
+
+Here the original file and what Biopython would output differ in the line
+wrapping. Also note that the get_raw method will return a bytes object,
+hence the use of decode to turn it into a string.
+
+Also note that the get_raw method will preserve the newline endings. This
+example FASTQ file uses Unix style endings (b"\n" only),
+
+>>> from Bio import SeqIO
+>>> fastq_dict = SeqIO.index("Quality/example.fastq", "fastq")
+>>> len(fastq_dict)
+3
+>>> raw = fastq_dict.get_raw("EAS54_6_R1_2_1_540_792")
+>>> raw.count(b"\n")
+4
+>>> raw.count(b"\r\n")
+0
+>>> b"\r" in raw
+False
+>>> len(raw)
+78
+>>> fastq_dict.close()
+
+Here is the same file but using DOS/Windows new lines (b"\r\n" instead),
+
+>>> from Bio import SeqIO
+>>> fastq_dict = SeqIO.index("Quality/example_dos.fastq", "fastq")
+>>> len(fastq_dict)
+3
+>>> raw = fastq_dict.get_raw("EAS54_6_R1_2_1_540_792")
+>>> raw.count(b"\n")
+4
+>>> raw.count(b"\r\n")
+4
+>>> b"\r\n" in raw
+True
+>>> len(raw)
+82
+>>> fastq_dict.close()
+
+Because this uses two bytes for each new line, the file is longer than
+the Unix equivalent with only one byte.
+
+
+Input - Alignments
+------------------
+You can read in alignment files as alignment objects using Bio.AlignIO.
+Alternatively, reading in an alignment file format via Bio.SeqIO will give
+you a SeqRecord for each row of each alignment:
+
+>>> from Bio import SeqIO
+>>> for record in SeqIO.parse("Clustalw/hedgehog.aln", "clustal"):
+...     print("%s %i" % (record.id, len(record)))
+gi|167877390|gb|EDS40773.1| 447
+gi|167234445|ref|NP_001107837. 447
+gi|74100009|gb|AAZ99217.1| 447
+gi|13990994|dbj|BAA33523.2| 447
+gi|56122354|gb|AAV74328.1| 447
+
+
+Output
+------
+Use the function Bio.SeqIO.write(...), which takes a complete set of
+SeqRecord objects (either as a list, or an iterator), an output file handle
+(or in recent versions of Biopython an output filename as a string) and of
+course the file format::
+
+  from Bio import SeqIO
+  records = ...
+  SeqIO.write(records, "example.faa", "fasta")
+
+Or, using a handle::
+
+    from Bio import SeqIO
+    records = ...
+    with open("example.faa", "w") as handle:
+      SeqIO.write(records, handle, "fasta")
+
+You are expected to call this function once (with all your records) and if
+using a handle, make sure you close it to flush the data to the hard disk.
+
+
+Output - Advanced
+-----------------
+The effect of calling write() multiple times on a single file will vary
+depending on the file format, and is best avoided unless you have a strong
+reason to do so.
+
+If you give a filename, then each time you call write() the existing file
+will be overwritten. For sequential files formats (e.g. fasta, genbank) each
+"record block" holds a single sequence.  For these files it would probably
+be safe to call write() multiple times by re-using the same handle.
+
+However, trying this for certain alignment formats (e.g. phylip, clustal,
+stockholm) would have the effect of concatenating several multiple sequence
+alignments together.  Such files are created by the PHYLIP suite of programs
+for bootstrap analysis, but it is clearer to do this via Bio.AlignIO instead.
+
+Worse, many fileformats have an explicit header and/or footer structure
+(e.g. any XMl format, and most binary file formats like SFF). Here making
+multiple calls to write() will result in an invalid file.
+
+
+Conversion
+----------
+The Bio.SeqIO.convert(...) function allows an easy interface for simple
+file format conversions. Additionally, it may use file format specific
+optimisations so this should be the fastest way too.
+
+In general however, you can combine the Bio.SeqIO.parse(...) function with
+the Bio.SeqIO.write(...) function for sequence file conversion. Using
+generator expressions or generator functions provides a memory efficient way
+to perform filtering or other extra operations as part of the process.
+
+
+File Formats
+------------
+When specifying the file format, use lowercase strings.  The same format
+names are also used in Bio.AlignIO and include the following:
+
+    - abi     - Applied Biosystem's sequencing trace format
+    - abi-trim - Same as "abi" but with quality trimming with Mott's algorithm
+    - ace     - Reads the contig sequences from an ACE assembly file.
+    - cif-atom - Uses Bio.PDB.MMCIFParser to determine the (partial) protein
+      sequence as it appears in the structure based on the atomic coordinates.
+    - cif-seqres - Reads a macromolecular Crystallographic Information File
+      (mmCIF) file to determine the complete protein sequence as defined by the
+      _pdbx_poly_seq_scheme records.
+    - embl    - The EMBL flat file format. Uses Bio.GenBank internally.
+    - fasta   - The generic sequence file format where each record starts with
+      an identifier line starting with a ">" character, followed by
+      lines of sequence.
+    - fasta-2line - Stricter interpretation of the FASTA format using exactly
+      two lines per record (no line wrapping).
+    - fastq   - A "FASTA like" format used by Sanger which also stores PHRED
+      sequence quality values (with an ASCII offset of 33).
+    - fastq-sanger - An alias for "fastq" for consistency with BioPerl and EMBOSS
+    - fastq-solexa - Original Solexa/Illumnia variant of the FASTQ format which
+      encodes Solexa quality scores (not PHRED quality scores) with an
+      ASCII offset of 64.
+    - fastq-illumina - Solexa/Illumina 1.3 to 1.7 variant of the FASTQ format
+      which encodes PHRED quality scores with an ASCII offset of 64
+      (not 33). Note as of version 1.8 of the CASAVA pipeline Illumina
+      will produce FASTQ files using the standard Sanger encoding.
+    - gck     - Gene Construction Kit's format.
+    - genbank - The GenBank or GenPept flat file format.
+    - gb      - An alias for "genbank", for consistency with NCBI Entrez Utilities
+    - ig      - The IntelliGenetics file format, apparently the same as the
+      MASE alignment format.
+    - imgt    - An EMBL like format from IMGT where the feature tables are more
+      indented to allow for longer feature types.
+    - nib     - UCSC's nib file format for nucleotide sequences, which uses one
+      nibble (4 bits) to represent each nucleotide, and stores two nucleotides in
+      one byte.
+    - pdb-seqres -  Reads a Protein Data Bank (PDB) file to determine the
+      complete protein sequence as it appears in the header (no dependencies).
+    - pdb-atom - Uses Bio.PDB to determine the (partial) protein sequence as
+      it appears in the structure based on the atom coordinate section of the
+      file (requires NumPy for Bio.PDB).
+    - phd     - Output from PHRED, used by PHRAP and CONSED for input.
+    - pir     - A "FASTA like" format introduced by the National Biomedical
+      Research Foundation (NBRF) for the Protein Information Resource
+      (PIR) database, now part of UniProt.
+    - seqxml  - SeqXML, simple XML format described in Schmitt et al (2011).
+    - sff     - Standard Flowgram Format (SFF), typical output from Roche 454.
+    - sff-trim - Standard Flowgram Format (SFF) with given trimming applied.
+    - snapgene - SnapGene's native format.
+    - swiss   - Plain text Swiss-Prot aka UniProt format.
+    - tab     - Simple two column tab separated sequence files, where each
+      line holds a record's identifier and sequence. For example,
+      this is used as by Aligent's eArray software when saving
+      microarray probes in a minimal tab delimited text file.
+    - qual    - A "FASTA like" format holding PHRED quality values from
+      sequencing DNA, but no actual sequences (usually provided
+      in separate FASTA files).
+    - uniprot-xml - The UniProt XML format (replacement for the SwissProt plain
+      text format which we call "swiss")
+    - xdna        - DNA Strider's and SerialCloner's native format.
+
+Note that while Bio.SeqIO can read all the above file formats, it cannot
+write to all of them.
+
+You can also use any file format supported by Bio.AlignIO, such as "nexus",
+"phylip" and "stockholm", which gives you access to the individual sequences
+making up each alignment as SeqRecords.
+"""
+# TODO
+# - define policy on reading aligned sequences with more than
+#   one gap character (see also AlignIO)
+#
+# - How best to handle unique/non unique record.id when writing.
+#   For most file formats reading such files is fine; The stockholm
+#   parser would fail.
+#
+# - MSF multiple alignment format, aka GCG, aka PileUp format (*.msf)
+#   http://www.bioperl.org/wiki/MSF_multiple_alignment_format
+#
+# FAO BioPython Developers
+# ------------------------
+# The way I envision this SeqIO system working as that for any sequence file
+# format we have an iterator that returns SeqRecord objects.
+#
+# This also applies to interlaced file formats (like clustal - although that
+# is now handled via Bio.AlignIO instead) where the file cannot be read record
+# by record.  You should still return an iterator, even if the implementation
+# could just as easily return a list.
+#
+# These file format specific sequence iterators may be implemented as:
+#    - Classes which take a handle for __init__ and provide the __iter__ method
+#    - Functions that take a handle, and return an iterator object
+#    - Generator functions that take a handle, and yield SeqRecord objects
+#
+# It is then trivial to turn this iterator into a list of SeqRecord objects,
+# an in memory dictionary, or a multiple sequence alignment object.
+#
+# For building the dictionary by default the id property of each SeqRecord is
+# used as the key.  You should always populate the id property, and it should
+# be unique in most cases. For some file formats the accession number is a good
+# choice.  If the file itself contains ambiguous identifiers, don't try and
+# dis-ambiguate them - return them as is.
+#
+# When adding a new file format, please use the same lower case format name
+# as BioPerl, or if they have not defined one, try the names used by EMBOSS.
+#
+# See also http://biopython.org/wiki/SeqIO_dev
+#
+# --Peter
+from Bio.Align import MultipleSeqAlignment
+from Bio.File import as_handle
+from Bio.SeqIO import AbiIO
+from Bio.SeqIO import AceIO
+from Bio.SeqIO import FastaIO
+from Bio.SeqIO import GckIO
+from Bio.SeqIO import IgIO  # IntelliGenetics or MASE format
+from Bio.SeqIO import InsdcIO  # EMBL and GenBank
+from Bio.SeqIO import NibIO
+from Bio.SeqIO import PdbIO
+from Bio.SeqIO import PhdIO
+from Bio.SeqIO import PirIO
+from Bio.SeqIO import QualityIO  # FastQ and qual files
+from Bio.SeqIO import SeqXmlIO
+from Bio.SeqIO import SffIO
+from Bio.SeqIO import SnapGeneIO
+from Bio.SeqIO import SwissIO
+from Bio.SeqIO import TabIO
+from Bio.SeqIO import TwoBitIO
+from Bio.SeqIO import UniprotIO
+from Bio.SeqIO import XdnaIO
+from Bio.SeqRecord import SeqRecord
+
+# Convention for format names is "mainname-subtype" in lower case.
+# Please use the same names as BioPerl or EMBOSS where possible.
+#
+# Note that this simple system copes with defining
+# multiple possible iterators for a given format/extension
+# with the -subtype suffix
+#
+# Most alignment file formats will be handled via Bio.AlignIO
+
+_FormatToIterator = {
+    "abi": AbiIO.AbiIterator,
+    "abi-trim": AbiIO._AbiTrimIterator,
+    "ace": AceIO.AceIterator,
+    "fasta": FastaIO.FastaIterator,
+    "fasta-2line": FastaIO.FastaTwoLineIterator,
+    "ig": IgIO.IgIterator,
+    "embl": InsdcIO.EmblIterator,
+    "embl-cds": InsdcIO.EmblCdsFeatureIterator,
+    "gb": InsdcIO.GenBankIterator,
+    "gck": GckIO.GckIterator,
+    "genbank": InsdcIO.GenBankIterator,
+    "genbank-cds": InsdcIO.GenBankCdsFeatureIterator,
+    "imgt": InsdcIO.ImgtIterator,
+    "nib": NibIO.NibIterator,
+    "cif-seqres": PdbIO.CifSeqresIterator,
+    "cif-atom": PdbIO.CifAtomIterator,
+    "pdb-atom": PdbIO.PdbAtomIterator,
+    "pdb-seqres": PdbIO.PdbSeqresIterator,
+    "phd": PhdIO.PhdIterator,
+    "pir": PirIO.PirIterator,
+    "fastq": QualityIO.FastqPhredIterator,
+    "fastq-sanger": QualityIO.FastqPhredIterator,
+    "fastq-solexa": QualityIO.FastqSolexaIterator,
+    "fastq-illumina": QualityIO.FastqIlluminaIterator,
+    "qual": QualityIO.QualPhredIterator,
+    "seqxml": SeqXmlIO.SeqXmlIterator,
+    "sff": SffIO.SffIterator,
+    "snapgene": SnapGeneIO.SnapGeneIterator,
+    "sff-trim": SffIO._SffTrimIterator,  # Not sure about this in the long run
+    "swiss": SwissIO.SwissIterator,
+    "tab": TabIO.TabIterator,
+    "twobit": TwoBitIO.TwoBitIterator,
+    "uniprot-xml": UniprotIO.UniprotIterator,
+    "xdna": XdnaIO.XdnaIterator,
+}
+
+_FormatToString = {
+    "fasta": FastaIO.as_fasta,
+    "fasta-2line": FastaIO.as_fasta_2line,
+    "tab": TabIO.as_tab,
+    "fastq": QualityIO.as_fastq,
+    "fastq-sanger": QualityIO.as_fastq,
+    "fastq-solexa": QualityIO.as_fastq_solexa,
+    "fastq-illumina": QualityIO.as_fastq_illumina,
+    "qual": QualityIO.as_qual,
+}
+
+# This could exclude file formats covered by _FormatToString?
+# Right now used in the unit tests as proxy for all supported outputs...
+_FormatToWriter = {
+    "fasta": FastaIO.FastaWriter,
+    "fasta-2line": FastaIO.FastaTwoLineWriter,
+    "gb": InsdcIO.GenBankWriter,
+    "genbank": InsdcIO.GenBankWriter,
+    "embl": InsdcIO.EmblWriter,
+    "imgt": InsdcIO.ImgtWriter,
+    "nib": NibIO.NibWriter,
+    "phd": PhdIO.PhdWriter,
+    "pir": PirIO.PirWriter,
+    "fastq": QualityIO.FastqPhredWriter,
+    "fastq-sanger": QualityIO.FastqPhredWriter,
+    "fastq-solexa": QualityIO.FastqSolexaWriter,
+    "fastq-illumina": QualityIO.FastqIlluminaWriter,
+    "qual": QualityIO.QualPhredWriter,
+    "seqxml": SeqXmlIO.SeqXmlWriter,
+    "sff": SffIO.SffWriter,
+    "tab": TabIO.TabWriter,
+    "xdna": XdnaIO.XdnaWriter,
+}
+
+
+def write(sequences, handle, format):
+    """Write complete set of sequences to a file.
+
+    Arguments:
+     - sequences - A list (or iterator) of SeqRecord objects, or a single
+       SeqRecord.
+     - handle    - File handle object to write to, or filename as string.
+     - format    - lower case string describing the file format to write.
+
+    Note if providing a file handle, your code should close the handle
+    after calling this function (to ensure the data gets flushed to disk).
+
+    Returns the number of records written (as an integer).
+    """
+    from Bio import AlignIO
+
+    # Try and give helpful error messages:
+    if not isinstance(format, str):
+        raise TypeError("Need a string for the file format (lower case)")
+    if not format:
+        raise ValueError("Format required (lower case string)")
+    if not format.islower():
+        raise ValueError("Format string '%s' should be lower case" % format)
+
+    if isinstance(handle, SeqRecord):
+        raise TypeError("Check arguments, handle should NOT be a SeqRecord")
+    if isinstance(handle, list):
+        # e.g. list of SeqRecord objects
+        raise TypeError("Check arguments, handle should NOT be a list")
+
+    if isinstance(sequences, SeqRecord):
+        # This raised an exception in older versions of Biopython
+        sequences = [sequences]
+
+    # Map the file format to a writer function/class
+    format_function = _FormatToString.get(format)
+    if format_function is not None:
+        count = 0
+        with as_handle(handle, "w") as fp:
+            for record in sequences:
+                fp.write(format_function(record))
+                count += 1
+        return count
+
+    writer_class = _FormatToWriter.get(format)
+    if writer_class is not None:
+        count = writer_class(handle).write_file(sequences)
+        if not isinstance(count, int):
+            raise RuntimeError(
+                "Internal error - the underlying %s writer "
+                "should have returned the record count, not %r" % (format, count)
+            )
+        return count
+
+    if format in AlignIO._FormatToWriter:
+        # Try and turn all the records into a single alignment,
+        # and write that using Bio.AlignIO
+        alignment = MultipleSeqAlignment(sequences)
+        alignment_count = AlignIO.write([alignment], handle, format)
+        if alignment_count != 1:
+            raise RuntimeError(
+                "Internal error - the underlying writer "
+                "should have returned 1, not %r" % alignment_count
+            )
+        count = len(alignment)
+        return count
+
+    if format in _FormatToIterator or format in AlignIO._FormatToIterator:
+        raise ValueError("Reading format '%s' is supported, but not writing" % format)
+
+    raise ValueError("Unknown format '%s'" % format)
+
+
+def parse(handle, format, alphabet=None):
+    r"""Turn a sequence file into an iterator returning SeqRecords.
+
+    Arguments:
+     - handle   - handle to the file, or the filename as a string
+       (note older versions of Biopython only took a handle).
+     - format   - lower case string describing the file format.
+     - alphabet - no longer used, should be None.
+
+    Typical usage, opening a file to read in, and looping over the record(s):
+
+    >>> from Bio import SeqIO
+    >>> filename = "Fasta/sweetpea.nu"
+    >>> for record in SeqIO.parse(filename, "fasta"):
+    ...    print("ID %s" % record.id)
+    ...    print("Sequence length %i" % len(record))
+    ID gi|3176602|gb|U78617.1|LOU78617
+    Sequence length 309
+
+    For lazy-loading file formats such as twobit, for which the file contents
+    is read on demand only, ensure that the file remains open while extracting
+    sequence data.
+
+    If you have a string 'data' containing the file contents, you must
+    first turn this into a handle in order to parse it:
+
+    >>> data = ">Alpha\nACCGGATGTA\n>Beta\nAGGCTCGGTTA\n"
+    >>> from Bio import SeqIO
+    >>> from io import StringIO
+    >>> for record in SeqIO.parse(StringIO(data), "fasta"):
+    ...     print("%s %s" % (record.id, record.seq))
+    Alpha ACCGGATGTA
+    Beta AGGCTCGGTTA
+
+    Use the Bio.SeqIO.read(...) function when you expect a single record
+    only.
+    """
+    # NOTE - The above docstring has some raw \n characters needed
+    # for the StringIO example, hence the whole docstring is in raw
+    # string mode (see the leading r before the opening quote).
+    from Bio import AlignIO
+
+    # Try and give helpful error messages:
+    if not isinstance(format, str):
+        raise TypeError("Need a string for the file format (lower case)")
+    if not format:
+        raise ValueError("Format required (lower case string)")
+    if not format.islower():
+        raise ValueError("Format string '%s' should be lower case" % format)
+    if alphabet is not None:
+        raise ValueError("The alphabet argument is no longer supported")
+
+    iterator_generator = _FormatToIterator.get(format)
+    if iterator_generator:
+        return iterator_generator(handle)
+    if format in AlignIO._FormatToIterator:
+        # Use Bio.AlignIO to read in the alignments
+        return (r for alignment in AlignIO.parse(handle, format) for r in alignment)
+    raise ValueError("Unknown format '%s'" % format)
+
+
+def read(handle, format, alphabet=None):
+    """Turn a sequence file into a single SeqRecord.
+
+    Arguments:
+     - handle   - handle to the file, or the filename as a string
+       (note older versions of Biopython only took a handle).
+     - format   - string describing the file format.
+     - alphabet - no longer used, should be None.
+
+    This function is for use parsing sequence files containing
+    exactly one record.  For example, reading a GenBank file:
+
+    >>> from Bio import SeqIO
+    >>> record = SeqIO.read("GenBank/arab1.gb", "genbank")
+    >>> print("ID %s" % record.id)
+    ID AC007323.5
+    >>> print("Sequence length %i" % len(record))
+    Sequence length 86436
+
+    If the handle contains no records, or more than one record,
+    an exception is raised.  For example:
+
+    >>> from Bio import SeqIO
+    >>> record = SeqIO.read("GenBank/cor6_6.gb", "genbank")
+    Traceback (most recent call last):
+        ...
+    ValueError: More than one record found in handle
+
+    If however you want the first record from a file containing
+    multiple records this function would raise an exception (as
+    shown in the example above).  Instead use:
+
+    >>> from Bio import SeqIO
+    >>> record = next(SeqIO.parse("GenBank/cor6_6.gb", "genbank"))
+    >>> print("First record's ID %s" % record.id)
+    First record's ID X55053.1
+
+    Use the Bio.SeqIO.parse(handle, format) function if you want
+    to read multiple records from the handle.
+    """
+    iterator = parse(handle, format, alphabet)
+    try:
+        record = next(iterator)
+    except StopIteration:
+        raise ValueError("No records found in handle") from None
+    try:
+        next(iterator)
+        raise ValueError("More than one record found in handle")
+    except StopIteration:
+        pass
+    return record
+
+
+def to_dict(sequences, key_function=None):
+    """Turn a sequence iterator or list into a dictionary.
+
+    Arguments:
+     - sequences  - An iterator that returns SeqRecord objects,
+       or simply a list of SeqRecord objects.
+     - key_function - Optional callback function which when given a
+       SeqRecord should return a unique key for the dictionary.
+
+    e.g. key_function = lambda rec : rec.name
+    or,  key_function = lambda rec : rec.description.split()[0]
+
+    If key_function is omitted then record.id is used, on the assumption
+    that the records objects returned are SeqRecords with a unique id.
+
+    If there are duplicate keys, an error is raised.
+
+    Since Python 3.7, the default dict class maintains key order, meaning
+    this dictionary will reflect the order of records given to it. For
+    CPython and PyPy, this was already implemented for Python 3.6, so
+    effectively you can always assume the record order is preserved.
+
+    Example usage, defaulting to using the record.id as key:
+
+    >>> from Bio import SeqIO
+    >>> filename = "GenBank/cor6_6.gb"
+    >>> format = "genbank"
+    >>> id_dict = SeqIO.to_dict(SeqIO.parse(filename, format))
+    >>> print(list(id_dict))
+    ['X55053.1', 'X62281.1', 'M81224.1', 'AJ237582.1', 'L31939.1', 'AF297471.1']
+    >>> print(id_dict["L31939.1"].description)
+    Brassica rapa (clone bif72) kin mRNA, complete cds
+
+    A more complex example, using the key_function argument in order to
+    use a sequence checksum as the dictionary key:
+
+    >>> from Bio import SeqIO
+    >>> from Bio.SeqUtils.CheckSum import seguid
+    >>> filename = "GenBank/cor6_6.gb"
+    >>> format = "genbank"
+    >>> seguid_dict = SeqIO.to_dict(SeqIO.parse(filename, format),
+    ...               key_function = lambda rec : seguid(rec.seq))
+    >>> for key, record in sorted(seguid_dict.items()):
+    ...     print("%s %s" % (key, record.id))
+    /wQvmrl87QWcm9llO4/efg23Vgg AJ237582.1
+    BUg6YxXSKWEcFFH0L08JzaLGhQs L31939.1
+    SabZaA4V2eLE9/2Fm5FnyYy07J4 X55053.1
+    TtWsXo45S3ZclIBy4X/WJc39+CY M81224.1
+    l7gjJFE6W/S1jJn5+1ASrUKW/FA X62281.1
+    uVEYeAQSV5EDQOnFoeMmVea+Oow AF297471.1
+
+    This approach is not suitable for very large sets of sequences, as all
+    the SeqRecord objects are held in memory. Instead, consider using the
+    Bio.SeqIO.index() function (if it supports your particular file format).
+
+    Since Python 3.6, the default dict class maintains key order, meaning
+    this dictionary will reflect the order of records given to it. As of
+    Biopython 1.72, on older versions of Python we explicitly use an
+    OrderedDict so that you can always assume the record order is preserved.
+    """
+    # This is to avoid a lambda function:
+
+    def _default_key_function(rec):
+        return rec.id
+
+    if key_function is None:
+        key_function = _default_key_function
+
+    d = {}
+    for record in sequences:
+        key = key_function(record)
+        if key in d:
+            raise ValueError("Duplicate key '%s'" % key)
+        d[key] = record
+    return d
+
+
+def index(filename, format, alphabet=None, key_function=None):
+    """Indexes a sequence file and returns a dictionary like object.
+
+    Arguments:
+     - filename - string giving name of file to be indexed
+     - format   - lower case string describing the file format
+     - alphabet - no longer used, leave as None
+     - key_function - Optional callback function which when given a
+       SeqRecord identifier string should return a unique key for the
+       dictionary.
+
+    This indexing function will return a dictionary like object, giving the
+    SeqRecord objects as values.
+
+    As of Biopython 1.69, this will preserve the ordering of the records in
+    file when iterating over the entries.
+
+    >>> from Bio import SeqIO
+    >>> records = SeqIO.index("Quality/example.fastq", "fastq")
+    >>> len(records)
+    3
+    >>> list(records)  # make a list of the keys
+    ['EAS54_6_R1_2_1_413_324', 'EAS54_6_R1_2_1_540_792', 'EAS54_6_R1_2_1_443_348']
+    >>> print(records["EAS54_6_R1_2_1_540_792"].format("fasta"))
+    >EAS54_6_R1_2_1_540_792
+    TTGGCAGGCCAAGGCCGATGGATCA
+    <BLANKLINE>
+    >>> "EAS54_6_R1_2_1_540_792" in records
+    True
+    >>> print(records.get("Missing", None))
+    None
+    >>> records.close()
+
+    If the file is BGZF compressed, this is detected automatically. Ordinary
+    GZIP files are not supported:
+
+    >>> from Bio import SeqIO
+    >>> records = SeqIO.index("Quality/example.fastq.bgz", "fastq")
+    >>> len(records)
+    3
+    >>> print(records["EAS54_6_R1_2_1_540_792"].seq)
+    TTGGCAGGCCAAGGCCGATGGATCA
+    >>> records.close()
+
+    When you call the index function, it will scan through the file, noting
+    the location of each record. When you access a particular record via the
+    dictionary methods, the code will jump to the appropriate part of the
+    file and then parse that section into a SeqRecord.
+
+    Note that not all the input formats supported by Bio.SeqIO can be used
+    with this index function. It is designed to work only with sequential
+    file formats (e.g. "fasta", "gb", "fastq") and is not suitable for any
+    interlaced file format (e.g. alignment formats such as "clustal").
+
+    For small files, it may be more efficient to use an in memory Python
+    dictionary, e.g.
+
+    >>> from Bio import SeqIO
+    >>> records = SeqIO.to_dict(SeqIO.parse("Quality/example.fastq", "fastq"))
+    >>> len(records)
+    3
+    >>> list(records)  # make a list of the keys
+    ['EAS54_6_R1_2_1_413_324', 'EAS54_6_R1_2_1_540_792', 'EAS54_6_R1_2_1_443_348']
+    >>> print(records["EAS54_6_R1_2_1_540_792"].format("fasta"))
+    >EAS54_6_R1_2_1_540_792
+    TTGGCAGGCCAAGGCCGATGGATCA
+    <BLANKLINE>
+
+    As with the to_dict() function, by default the id string of each record
+    is used as the key. You can specify a callback function to transform
+    this (the record identifier string) into your preferred key. For example:
+
+    >>> from Bio import SeqIO
+    >>> def make_tuple(identifier):
+    ...     parts = identifier.split("_")
+    ...     return int(parts[-2]), int(parts[-1])
+    >>> records = SeqIO.index("Quality/example.fastq", "fastq",
+    ...                       key_function=make_tuple)
+    >>> len(records)
+    3
+    >>> list(records)  # make a list of the keys
+    [(413, 324), (540, 792), (443, 348)]
+    >>> print(records[(540, 792)].format("fasta"))
+    >EAS54_6_R1_2_1_540_792
+    TTGGCAGGCCAAGGCCGATGGATCA
+    <BLANKLINE>
+    >>> (540, 792) in records
+    True
+    >>> "EAS54_6_R1_2_1_540_792" in records
+    False
+    >>> print(records.get("Missing", None))
+    None
+    >>> records.close()
+
+    Another common use case would be indexing an NCBI style FASTA file,
+    where you might want to extract the GI number from the FASTA identifier
+    to use as the dictionary key.
+
+    Notice that unlike the to_dict() function, here the key_function does
+    not get given the full SeqRecord to use to generate the key. Doing so
+    would impose a severe performance penalty as it would require the file
+    to be completely parsed while building the index. Right now this is
+    usually avoided.
+
+    See Also: Bio.SeqIO.index_db() and Bio.SeqIO.to_dict()
+
+    """
+    # Try and give helpful error messages:
+    if not isinstance(filename, str):
+        raise TypeError("Need a filename (not a handle)")
+    if not isinstance(format, str):
+        raise TypeError("Need a string for the file format (lower case)")
+    if not format:
+        raise ValueError("Format required (lower case string)")
+    if not format.islower():
+        raise ValueError("Format string '%s' should be lower case" % format)
+    if alphabet is not None:
+        raise ValueError("The alphabet argument is no longer supported")
+
+    # Map the file format to a sequence iterator:
+    from ._index import _FormatToRandomAccess  # Lazy import
+    from Bio.File import _IndexedSeqFileDict
+
+    try:
+        proxy_class = _FormatToRandomAccess[format]
+    except KeyError:
+        raise ValueError("Unsupported format %r" % format) from None
+    repr = "SeqIO.index(%r, %r, alphabet=%r, key_function=%r)" % (
+        filename,
+        format,
+        alphabet,
+        key_function,
+    )
+    return _IndexedSeqFileDict(
+        proxy_class(filename, format), key_function, repr, "SeqRecord"
+    )
+
+
+def index_db(
+    index_filename, filenames=None, format=None, alphabet=None, key_function=None
+):
+    """Index several sequence files and return a dictionary like object.
+
+    The index is stored in an SQLite database rather than in memory (as in the
+    Bio.SeqIO.index(...) function).
+
+    Arguments:
+     - index_filename - Where to store the SQLite index
+     - filenames - list of strings specifying file(s) to be indexed, or when
+       indexing a single file this can be given as a string.
+       (optional if reloading an existing index, but must match)
+     - format   - lower case string describing the file format
+       (optional if reloading an existing index, but must match)
+     - alphabet - no longer used, leave as None.
+     - key_function - Optional callback function which when given a
+       SeqRecord identifier string should return a unique
+       key for the dictionary.
+
+    This indexing function will return a dictionary like object, giving the
+    SeqRecord objects as values:
+
+    >>> from Bio import SeqIO
+    >>> files = ["GenBank/NC_000932.faa", "GenBank/NC_005816.faa"]
+    >>> def get_gi(name):
+    ...     parts = name.split("|")
+    ...     i = parts.index("gi")
+    ...     assert i != -1
+    ...     return parts[i+1]
+    >>> idx_name = ":memory:" #use an in memory SQLite DB for this test
+    >>> records = SeqIO.index_db(idx_name, files, "fasta", key_function=get_gi)
+    >>> len(records)
+    95
+    >>> records["7525076"].description
+    'gi|7525076|ref|NP_051101.1| Ycf2 [Arabidopsis thaliana]'
+    >>> records["45478717"].description
+    'gi|45478717|ref|NP_995572.1| pesticin [Yersinia pestis biovar Microtus str. 91001]'
+    >>> records.close()
+
+    In this example the two files contain 85 and 10 records respectively.
+
+    BGZF compressed files are supported, and detected automatically. Ordinary
+    GZIP compressed files are not supported.
+
+    See Also: Bio.SeqIO.index() and Bio.SeqIO.to_dict(), and the Python module
+    glob which is useful for building lists of files.
+
+    """
+    # Try and give helpful error messages:
+    if not isinstance(index_filename, str):
+        raise TypeError("Need a string for the index filename")
+    if isinstance(filenames, str):
+        # Make the API a little more friendly, and more similar
+        # to Bio.SeqIO.index(...) for indexing just one file.
+        filenames = [filenames]
+    if filenames is not None and not isinstance(filenames, list):
+        raise TypeError("Need a list of filenames (as strings), or one filename")
+    if format is not None and not isinstance(format, str):
+        raise TypeError("Need a string for the file format (lower case)")
+    if format and not format.islower():
+        raise ValueError("Format string '%s' should be lower case" % format)
+    if alphabet is not None:
+        raise ValueError("The alphabet argument is no longer supported")
+
+    # Map the file format to a sequence iterator:
+    from ._index import _FormatToRandomAccess  # Lazy import
+    from Bio.File import _SQLiteManySeqFilesDict
+
+    repr = "SeqIO.index_db(%r, filenames=%r, format=%r, key_function=%r)" % (
+        index_filename,
+        filenames,
+        format,
+        key_function,
+    )
+
+    def proxy_factory(format, filename=None):
+        """Given a filename returns proxy object, else boolean if format OK."""
+        if filename:
+            return _FormatToRandomAccess[format](filename, format)
+        else:
+            return format in _FormatToRandomAccess
+
+    return _SQLiteManySeqFilesDict(
+        index_filename, filenames, proxy_factory, format, key_function, repr
+    )
+
+
+# TODO? - Handling aliases explicitly would let us shorten this list:
+_converter = {
+    ("genbank", "fasta"): InsdcIO._genbank_convert_fasta,
+    ("gb", "fasta"): InsdcIO._genbank_convert_fasta,
+    ("embl", "fasta"): InsdcIO._embl_convert_fasta,
+    ("fastq", "fasta"): QualityIO._fastq_convert_fasta,
+    ("fastq-sanger", "fasta"): QualityIO._fastq_convert_fasta,
+    ("fastq-solexa", "fasta"): QualityIO._fastq_convert_fasta,
+    ("fastq-illumina", "fasta"): QualityIO._fastq_convert_fasta,
+    ("fastq", "tab"): QualityIO._fastq_convert_tab,
+    ("fastq-sanger", "tab"): QualityIO._fastq_convert_tab,
+    ("fastq-solexa", "tab"): QualityIO._fastq_convert_tab,
+    ("fastq-illumina", "tab"): QualityIO._fastq_convert_tab,
+    ("fastq", "fastq"): QualityIO._fastq_sanger_convert_fastq_sanger,
+    ("fastq-sanger", "fastq"): QualityIO._fastq_sanger_convert_fastq_sanger,
+    ("fastq-solexa", "fastq"): QualityIO._fastq_solexa_convert_fastq_sanger,
+    ("fastq-illumina", "fastq"): QualityIO._fastq_illumina_convert_fastq_sanger,
+    ("fastq", "fastq-sanger"): QualityIO._fastq_sanger_convert_fastq_sanger,
+    ("fastq-sanger", "fastq-sanger"): QualityIO._fastq_sanger_convert_fastq_sanger,
+    ("fastq-solexa", "fastq-sanger"): QualityIO._fastq_solexa_convert_fastq_sanger,
+    ("fastq-illumina", "fastq-sanger"): QualityIO._fastq_illumina_convert_fastq_sanger,
+    ("fastq", "fastq-solexa"): QualityIO._fastq_sanger_convert_fastq_solexa,
+    ("fastq-sanger", "fastq-solexa"): QualityIO._fastq_sanger_convert_fastq_solexa,
+    ("fastq-solexa", "fastq-solexa"): QualityIO._fastq_solexa_convert_fastq_solexa,
+    ("fastq-illumina", "fastq-solexa"): QualityIO._fastq_illumina_convert_fastq_solexa,
+    ("fastq", "fastq-illumina"): QualityIO._fastq_sanger_convert_fastq_illumina,
+    ("fastq-sanger", "fastq-illumina"): QualityIO._fastq_sanger_convert_fastq_illumina,
+    ("fastq-solexa", "fastq-illumina"): QualityIO._fastq_solexa_convert_fastq_illumina,
+    (
+        "fastq-illumina",
+        "fastq-illumina",
+    ): QualityIO._fastq_illumina_convert_fastq_illumina,
+    ("fastq", "qual"): QualityIO._fastq_sanger_convert_qual,
+    ("fastq-sanger", "qual"): QualityIO._fastq_sanger_convert_qual,
+    ("fastq-solexa", "qual"): QualityIO._fastq_solexa_convert_qual,
+    ("fastq-illumina", "qual"): QualityIO._fastq_illumina_convert_qual,
+}
+
+
+def convert(in_file, in_format, out_file, out_format, molecule_type=None):
+    """Convert between two sequence file formats, return number of records.
+
+    Arguments:
+     - in_file - an input handle or filename
+     - in_format - input file format, lower case string
+     - out_file - an output handle or filename
+     - out_format - output file format, lower case string
+     - molecule_type - optional molecule type to apply, string containing
+       "DNA", "RNA" or "protein".
+
+    **NOTE** - If you provide an output filename, it will be opened which will
+    overwrite any existing file without warning.
+
+    The idea here is that while doing this will work::
+
+        from Bio import SeqIO
+        records = SeqIO.parse(in_handle, in_format)
+        count = SeqIO.write(records, out_handle, out_format)
+
+    it is shorter to write::
+
+        from Bio import SeqIO
+        count = SeqIO.convert(in_handle, in_format, out_handle, out_format)
+
+    Also, Bio.SeqIO.convert is faster for some conversions as it can make some
+    optimisations.
+
+    For example, going from a filename to a handle:
+
+    >>> from Bio import SeqIO
+    >>> from io import StringIO
+    >>> handle = StringIO("")
+    >>> SeqIO.convert("Quality/example.fastq", "fastq", handle, "fasta")
+    3
+    >>> print(handle.getvalue())
+    >EAS54_6_R1_2_1_413_324
+    CCCTTCTTGTCTTCAGCGTTTCTCC
+    >EAS54_6_R1_2_1_540_792
+    TTGGCAGGCCAAGGCCGATGGATCA
+    >EAS54_6_R1_2_1_443_348
+    GTTGCTTCTGGCGTGGGTGGGGGGG
+    <BLANKLINE>
+
+    Note some formats like SeqXML require you to specify the molecule type
+    when it cannot be determined by the parser:
+
+    >>> from Bio import SeqIO
+    >>> from io import BytesIO
+    >>> handle = BytesIO()
+    >>> SeqIO.convert("Quality/example.fastq", "fastq", handle, "seqxml", "DNA")
+    3
+    """
+    if molecule_type:
+        if not isinstance(molecule_type, str):
+            raise TypeError("Molecule type should be a string, not %r" % molecule_type)
+        elif (
+            "DNA" in molecule_type
+            or "RNA" in molecule_type
+            or "protein" in molecule_type
+        ):
+            pass
+        else:
+            raise ValueError("Unexpected molecule type, %r" % molecule_type)
+    f = _converter.get((in_format, out_format))
+    if f:
+        count = f(in_file, out_file)
+    else:
+        records = parse(in_file, in_format)
+        if molecule_type:
+            # Edit the records on the fly to set molecule type
+
+            def over_ride(record):
+                """Over-ride molecule in-place."""
+                record.annotations["molecule_type"] = molecule_type
+                return record
+
+            records = (over_ride(_) for _ in records)
+        count = write(records, out_file, out_format)
+    return count
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SeqIO/__pycache__/AbiIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/AbiIO.cpython-37.pyc
new file mode 100644
index 0000000..c8d472e
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/AbiIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/AceIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/AceIO.cpython-37.pyc
new file mode 100644
index 0000000..fa2f285
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/AceIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/FastaIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/FastaIO.cpython-37.pyc
new file mode 100644
index 0000000..042a384
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/FastaIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/GckIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/GckIO.cpython-37.pyc
new file mode 100644
index 0000000..93cbcde
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/GckIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/IgIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/IgIO.cpython-37.pyc
new file mode 100644
index 0000000..a874469
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/IgIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/InsdcIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/InsdcIO.cpython-37.pyc
new file mode 100644
index 0000000..f24aec4
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/InsdcIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/Interfaces.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/Interfaces.cpython-37.pyc
new file mode 100644
index 0000000..4c8a80f
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/Interfaces.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/NibIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/NibIO.cpython-37.pyc
new file mode 100644
index 0000000..8e10f6c
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/NibIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/PdbIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/PdbIO.cpython-37.pyc
new file mode 100644
index 0000000..89efc3b
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/PdbIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/PhdIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/PhdIO.cpython-37.pyc
new file mode 100644
index 0000000..e9c0140
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/PhdIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/PirIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/PirIO.cpython-37.pyc
new file mode 100644
index 0000000..6c94458
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/PirIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/QualityIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/QualityIO.cpython-37.pyc
new file mode 100644
index 0000000..8c06475
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/QualityIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/SeqXmlIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/SeqXmlIO.cpython-37.pyc
new file mode 100644
index 0000000..51dc7db
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/SeqXmlIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/SffIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/SffIO.cpython-37.pyc
new file mode 100644
index 0000000..7c8f35b
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/SffIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/SnapGeneIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/SnapGeneIO.cpython-37.pyc
new file mode 100644
index 0000000..60c0ede
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/SnapGeneIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/SwissIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/SwissIO.cpython-37.pyc
new file mode 100644
index 0000000..180c01b
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/SwissIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/TabIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/TabIO.cpython-37.pyc
new file mode 100644
index 0000000..402ee74
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/TabIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/TwoBitIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/TwoBitIO.cpython-37.pyc
new file mode 100644
index 0000000..4611e38
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/TwoBitIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/UniprotIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/UniprotIO.cpython-37.pyc
new file mode 100644
index 0000000..dcb0cc7
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/UniprotIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/XdnaIO.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/XdnaIO.cpython-37.pyc
new file mode 100644
index 0000000..5ef7092
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/XdnaIO.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..7119cbc
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/__pycache__/_index.cpython-37.pyc b/code/lib/Bio/SeqIO/__pycache__/_index.cpython-37.pyc
new file mode 100644
index 0000000..2d5b738
Binary files /dev/null and b/code/lib/Bio/SeqIO/__pycache__/_index.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqIO/_index.py b/code/lib/Bio/SeqIO/_index.py
new file mode 100644
index 0000000..560b1c2
--- /dev/null
+++ b/code/lib/Bio/SeqIO/_index.py
@@ -0,0 +1,713 @@
+# Copyright 2009-2011 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Dictionary like indexing of sequence files (PRIVATE).
+
+You are not expected to access this module, or any of its code, directly. This
+is all handled internally by the Bio.SeqIO.index(...) and index_db(...)
+functions which are the public interface for this functionality.
+
+The basic idea is that we scan over a sequence file, looking for new record
+markers. We then try to extract the string that Bio.SeqIO.parse/read would
+use as the record id, ideally without actually parsing the full record. We
+then use a subclassed Python dictionary to record the file offset for the
+record start against the record id.
+
+Note that this means full parsing is on demand, so any invalid or problem
+record may not trigger an exception until it is accessed. This is by design.
+
+This means our dictionary like objects have in memory ALL the keys (all the
+record identifiers), which shouldn't be a problem even with second generation
+sequencing. If memory is an issue, the index_db(...) interface stores the
+keys and offsets in an SQLite database - which can be re-used to avoid
+re-indexing the file for use another time.
+"""
+import re
+
+from io import BytesIO
+from io import StringIO
+
+from Bio import SeqIO
+from Bio.File import _IndexedSeqFileProxy
+from Bio.File import _open_for_random_access
+
+
+class SeqFileRandomAccess(_IndexedSeqFileProxy):
+    """Base class for defining random access to sequence files."""
+
+    def __init__(self, filename, format):
+        """Initialize the class."""
+        self._handle = _open_for_random_access(filename)
+        self._format = format
+        # Load the parser class/function once an avoid the dict lookup in each
+        # __getitem__ call:
+        self._iterator = SeqIO._FormatToIterator[format]
+
+    def get(self, offset):
+        """Return SeqRecord."""
+        # Should be overridden for binary file formats etc:
+        return next(self._iterator(StringIO(self.get_raw(offset).decode())))
+
+
+####################
+# Special indexers #
+####################
+# Anything where the records cannot be read simply by parsing from
+# the record start. For example, anything requiring information from
+# a file header - e.g. SFF files where we would need to know the
+# number of flows.
+class SffRandomAccess(SeqFileRandomAccess):
+    """Random access to a Standard Flowgram Format (SFF) file."""
+
+    def __init__(self, filename, format):
+        """Initialize the class."""
+        SeqFileRandomAccess.__init__(self, filename, format)
+        (
+            header_length,
+            index_offset,
+            index_length,
+            number_of_reads,
+            self._flows_per_read,
+            self._flow_chars,
+            self._key_sequence,
+        ) = SeqIO.SffIO._sff_file_header(self._handle)
+
+    def __iter__(self):
+        """Load any index block in the file, or build it the slow way (PRIVATE)."""
+        handle = self._handle
+        handle.seek(0)
+        # Alread did this in __init__ but need handle in right place
+        (
+            header_length,
+            index_offset,
+            index_length,
+            number_of_reads,
+            self._flows_per_read,
+            self._flow_chars,
+            self._key_sequence,
+        ) = SeqIO.SffIO._sff_file_header(handle)
+        if index_offset and index_length:
+            # There is an index provided, try this the fast way:
+            count = 0
+            max_offset = 0
+            try:
+                for name, offset in SeqIO.SffIO._sff_read_roche_index(handle):
+                    max_offset = max(max_offset, offset)
+                    yield name, offset, 0
+                    count += 1
+                if count != number_of_reads:
+                    raise ValueError(
+                        "Indexed %i records, expected %i" % (count, number_of_reads)
+                    )
+                # If that worked, call _check_eof ...
+            except ValueError as err:
+                import warnings
+                from Bio import BiopythonParserWarning
+
+                warnings.warn(
+                    "Could not parse the SFF index: %s" % err, BiopythonParserWarning
+                )
+                assert count == 0, "Partially populated index"
+                handle.seek(0)
+                # Drop out to the slow way...
+            else:
+                # Fast way worked, check EOF
+                if index_offset + index_length <= max_offset:
+                    # Can have an index at start (or mid-file)
+                    handle.seek(max_offset)
+                    # Parse the final read,
+                    SeqIO.SffIO._sff_read_raw_record(handle, self._flows_per_read)
+                    # Should now be at the end of the file!
+                SeqIO.SffIO._check_eof(handle, index_offset, index_length)
+                return
+        # We used to give a warning in this case, but Ion Torrent's
+        # SFF files don't have an index so that would be annoying.
+        # Fall back on the slow way!
+        count = 0
+        for name, offset in SeqIO.SffIO._sff_do_slow_index(handle):
+            yield name, offset, 0
+            count += 1
+        if count != number_of_reads:
+            raise ValueError(
+                "Indexed %i records, expected %i" % (count, number_of_reads)
+            )
+        SeqIO.SffIO._check_eof(handle, index_offset, index_length)
+
+    def get(self, offset):
+        """Return the SeqRecord starting at the given offset."""
+        handle = self._handle
+        handle.seek(offset)
+        return SeqIO.SffIO._sff_read_seq_record(
+            handle, self._flows_per_read, self._flow_chars, self._key_sequence,
+        )
+
+    def get_raw(self, offset):
+        """Return the raw record from the file as a bytes string."""
+        handle = self._handle
+        handle.seek(offset)
+        return SeqIO.SffIO._sff_read_raw_record(handle, self._flows_per_read)
+
+
+class SffTrimedRandomAccess(SffRandomAccess):
+    """Random access to an SFF file with defined trimming applied to each sequence."""
+
+    def get(self, offset):
+        """Return the SeqRecord starting at the given offset."""
+        handle = self._handle
+        handle.seek(offset)
+        return SeqIO.SffIO._sff_read_seq_record(
+            handle,
+            self._flows_per_read,
+            self._flow_chars,
+            self._key_sequence,
+            trim=True,
+        )
+
+
+###################
+# Simple indexers #
+###################
+
+
+class SequentialSeqFileRandomAccess(SeqFileRandomAccess):
+    """Random access to a simple sequential sequence file."""
+
+    def __init__(self, filename, format):
+        """Initialize the class."""
+        SeqFileRandomAccess.__init__(self, filename, format)
+        marker = {
+            "ace": b"CO ",
+            "embl": b"ID ",
+            "fasta": b">",
+            "genbank": b"LOCUS ",
+            "gb": b"LOCUS ",
+            "imgt": b"ID ",
+            "phd": b"BEGIN_SEQUENCE",
+            "pir": b">..;",
+            "qual": b">",
+            "swiss": b"ID ",
+            "uniprot-xml": b"<entry ",
+        }[format]
+        self._marker = marker
+        self._marker_re = re.compile(b"^" + marker)
+
+    def __iter__(self):
+        """Return (id, offset, length) tuples."""
+        marker_offset = len(self._marker)
+        marker_re = self._marker_re
+        handle = self._handle
+        handle.seek(0)
+        # Skip any header before first record
+        while True:
+            start_offset = handle.tell()
+            line = handle.readline()
+            if marker_re.match(line) or not line:
+                break
+        # Should now be at the start of a record, or end of the file
+        while marker_re.match(line):
+            # Here we can assume the record.id is the first word after the
+            # marker. This is generally fine... but not for GenBank, EMBL, Swiss
+            id = line[marker_offset:].strip().split(None, 1)[0]
+            length = len(line)
+            while True:
+                end_offset = handle.tell()
+                line = handle.readline()
+                if marker_re.match(line) or not line:
+                    yield id.decode(), start_offset, length
+                    start_offset = end_offset
+                    break
+                else:
+                    # Track this explicitly as can't do file offset difference on BGZF
+                    length += len(line)
+        assert not line, repr(line)
+
+    def get_raw(self, offset):
+        """Return the raw record from the file as a bytes string."""
+        # For non-trivial file formats this must be over-ridden in the subclass
+        handle = self._handle
+        marker_re = self._marker_re
+        handle.seek(offset)
+        lines = [handle.readline()]
+        while True:
+            line = handle.readline()
+            if marker_re.match(line) or not line:
+                # End of file, or start of next record => end of this record
+                break
+            lines.append(line)
+        return b"".join(lines)
+
+
+#######################################
+# Fiddly indexers: GenBank, EMBL, ... #
+#######################################
+
+
+class GenBankRandomAccess(SequentialSeqFileRandomAccess):
+    """Indexed dictionary like access to a GenBank file."""
+
+    def __iter__(self):
+        """Iterate over the sequence records in the file."""
+        handle = self._handle
+        handle.seek(0)
+        marker_re = self._marker_re
+        accession_marker = b"ACCESSION "
+        version_marker = b"VERSION "
+        # Skip and header before first record
+        while True:
+            start_offset = handle.tell()
+            line = handle.readline()
+            if marker_re.match(line) or not line:
+                break
+        # Should now be at the start of a record, or end of the file
+        while marker_re.match(line):
+            # We cannot assume the record.id is the first word after LOCUS,
+            # normally the first entry on the VERSION or ACCESSION line is used.
+            # However if both missing, GenBank parser falls back on LOCUS entry.
+            try:
+                key = line[5:].split(None, 1)[0]
+            except ValueError:
+                # Warning?
+                # No content in LOCUS line
+                key = None
+            length = len(line)
+            while True:
+                end_offset = handle.tell()
+                line = handle.readline()
+                if marker_re.match(line) or not line:
+                    if not key:
+                        raise ValueError(
+                            "Did not find usable ACCESSION/VERSION/LOCUS lines"
+                        )
+                    yield key.decode(), start_offset, length
+                    start_offset = end_offset
+                    break
+                elif line.startswith(accession_marker):
+                    try:
+                        key = line.rstrip().split()[1]
+                    except IndexError:
+                        # No content in ACCESSION line
+                        pass
+                elif line.startswith(version_marker):
+                    try:
+                        version_id = line.rstrip().split()[1]
+                        if (
+                            version_id.count(b".") == 1
+                            and version_id.split(b".")[1].isdigit()
+                        ):
+                            # This should mimic the GenBank parser...
+                            key = version_id
+                    except IndexError:
+                        # No content in VERSION line
+                        pass
+
+                length += len(line)
+        assert not line, repr(line)
+
+
+class EmblRandomAccess(SequentialSeqFileRandomAccess):
+    """Indexed dictionary like access to an EMBL file."""
+
+    def __iter__(self):
+        """Iterate over the sequence records in the file."""
+        handle = self._handle
+        handle.seek(0)
+        marker_re = self._marker_re
+        sv_marker = b"SV "
+        ac_marker = b"AC "
+        # Skip any header before first record
+        while True:
+            start_offset = handle.tell()
+            line = handle.readline()
+            if marker_re.match(line) or not line:
+                break
+        # Should now be at the start of a record, or end of the file
+        while marker_re.match(line):
+            # We cannot assume the record.id is the first word after ID,
+            # normally the SV line is used.
+            setbysv = False  # resets sv as false
+            length = len(line)
+            if line[2:].count(b";") in [5, 6]:
+                # Looks like the semi colon separated style introduced in 2006
+                # Or style from IPD-IMGT/HLA after their v3.16.0 release
+                parts = line[3:].rstrip().split(b";")
+                if parts[1].strip().startswith(sv_marker):
+                    # The SV bit gives the version
+                    key = parts[0].strip() + b"." + parts[1].strip().split()[1]
+                    setbysv = True
+                else:
+                    key = parts[0].strip()
+            elif line[2:].count(b";") in [2, 3]:
+                # Looks like the pre 2006 style, take first word only
+                # Or, with two colons, the KIPO patent variation
+                key = line[3:].strip().split(None, 1)[0]
+                if key.endswith(b";"):
+                    key = key[:-1]
+            else:
+                raise ValueError("Did not recognise the ID line layout:\n%r" % line)
+            while True:
+                line = handle.readline()
+                if marker_re.match(line) or not line:
+                    end_offset = handle.tell() - len(line)
+                    yield key.decode(), start_offset, length
+                    start_offset = end_offset
+                    break
+                elif line.startswith(ac_marker) and not setbysv:
+                    key = line.rstrip().split()[1]
+                    if key.endswith(b";"):
+                        key = key[:-1]
+                elif line.startswith(sv_marker):
+                    key = line.rstrip().split()[1]
+                    setbysv = True
+                length += len(line)
+        assert not line, repr(line)
+
+
+class SwissRandomAccess(SequentialSeqFileRandomAccess):
+    """Random access to a SwissProt file."""
+
+    def __iter__(self):
+        """Iterate over the sequence records in the file."""
+        handle = self._handle
+        handle.seek(0)
+        marker_re = self._marker_re
+        # Skip any header before first record
+        while True:
+            start_offset = handle.tell()
+            line = handle.readline()
+            if marker_re.match(line) or not line:
+                break
+        # Should now be at the start of a record, or end of the file
+        while marker_re.match(line):
+            length = len(line)
+            # We cannot assume the record.id is the first word after ID,
+            # normally the following AC line is used.
+            line = handle.readline()
+            length += len(line)
+            assert line.startswith(b"AC ")
+            key = line[3:].strip().split(b";")[0].strip()
+            while True:
+                end_offset = handle.tell()
+                line = handle.readline()
+                if marker_re.match(line) or not line:
+                    yield key.decode(), start_offset, length
+                    start_offset = end_offset
+                    break
+                length += len(line)
+        assert not line, repr(line)
+
+
+class UniprotRandomAccess(SequentialSeqFileRandomAccess):
+    """Random access to a UniProt XML file."""
+
+    def __iter__(self):
+        """Iterate over the sequence records in the file."""
+        handle = self._handle
+        handle.seek(0)
+        marker_re = self._marker_re
+        start_acc_marker = b"<accession>"
+        end_acc_marker = b"</accession>"
+        end_entry_marker = b"</entry>"
+        # Skip any header before first record
+        while True:
+            start_offset = handle.tell()
+            line = handle.readline()
+            if marker_re.match(line) or not line:
+                break
+        # Should now be at the start of a record, or end of the file
+        while marker_re.match(line):
+            length = len(line)
+            # We expect the next line to be <accession>xxx</accession>
+            # (possibly with leading spaces)
+            # but allow it to be later on within the <entry>
+            key = None
+            while True:
+                line = handle.readline()
+                if key is None and start_acc_marker in line:
+                    assert end_acc_marker in line, line
+                    key = line[line.find(start_acc_marker) + 11 :].split(b"<", 1)[0]
+                    length += len(line)
+                elif end_entry_marker in line:
+                    length += line.find(end_entry_marker) + 8
+                    end_offset = (
+                        handle.tell() - len(line) + line.find(end_entry_marker) + 8
+                    )
+                    assert start_offset + length == end_offset
+                    break
+                elif marker_re.match(line) or not line:
+                    # Start of next record or end of file
+                    raise ValueError("Didn't find end of record")
+                else:
+                    length += len(line)
+            if not key:
+                raise ValueError(
+                    "Did not find <accession> line in bytes %i to %i"
+                    % (start_offset, start_offset + length)
+                )
+            yield key.decode(), start_offset, length
+            # Find start of next record
+            while not marker_re.match(line) and line:
+                start_offset = handle.tell()
+                line = handle.readline()
+        assert not line, repr(line)
+
+    def get_raw(self, offset):
+        """Return the raw record from the file as a bytes string."""
+        handle = self._handle
+        marker_re = self._marker_re
+        end_entry_marker = b"</entry>"
+        handle.seek(offset)
+        data = [handle.readline()]
+        while True:
+            line = handle.readline()
+            i = line.find(end_entry_marker)
+            if i != -1:
+                data.append(line[: i + 8])
+                break
+            if marker_re.match(line) or not line:
+                # End of file, or start of next record
+                raise ValueError("Didn't find end of record")
+            data.append(line)
+        return b"".join(data)
+
+    def get(self, offset):
+        """Return the SeqRecord starting at the given offset."""
+        # TODO - Can we handle this directly in the parser?
+        # This is a hack - use get_raw for <entry>...</entry> and wrap it with
+        # the apparently required XML header and footer.
+        data = (
+            b"""<?xml version='1.0' encoding='UTF-8'?>
+        <uniprot xmlns="http://uniprot.org/uniprot"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+        xsi:schemaLocation="http://uniprot.org/uniprot
+        http://www.uniprot.org/support/docs/uniprot.xsd">
+        """
+            + self.get_raw(offset)
+            + b"</uniprot>"
+        )
+        return next(SeqIO.UniprotIO.UniprotIterator(BytesIO(data)))
+
+
+class IntelliGeneticsRandomAccess(SeqFileRandomAccess):
+    """Random access to a IntelliGenetics file."""
+
+    def __init__(self, filename, format):
+        """Initialize the class."""
+        SeqFileRandomAccess.__init__(self, filename, format)
+        self._marker_re = re.compile(b"^;")
+
+    def __iter__(self):
+        """Iterate over the sequence records in the file."""
+        handle = self._handle
+        handle.seek(0)
+        # Skip any header
+        offset = 0
+        line = ""
+        while True:
+            offset += len(line)
+            line = handle.readline()
+            if not line:
+                break  # Premature end of file, or just empty?
+            if not line.startswith(b";;"):
+                break
+        while line:
+            length = 0
+            assert offset + len(line) == handle.tell()
+            if not line.startswith(b";"):
+                raise ValueError("Records should start with ';' and not:\n%r" % line)
+            while line.startswith(b";"):
+                length += len(line)
+                line = handle.readline()
+            key = line.rstrip()
+            # Now look for the first line which starts ";"
+            while line and not line.startswith(b";"):
+                length += len(line)
+                line = handle.readline()
+            yield key.decode(), offset, length
+            offset += length
+            assert offset + len(line) == handle.tell()
+
+    def get_raw(self, offset):
+        """Return the raw record from the file as a bytes string."""
+        handle = self._handle
+        handle.seek(offset)
+        marker_re = self._marker_re
+        lines = []
+        line = handle.readline()
+        while line.startswith(b";"):
+            lines.append(line)
+            line = handle.readline()
+        while line and not line.startswith(b";"):
+            lines.append(line)
+            line = handle.readline()
+        return b"".join(lines)
+
+
+class TabRandomAccess(SeqFileRandomAccess):
+    """Random access to a simple tabbed file."""
+
+    def __iter__(self):
+        """Iterate over the sequence records in the file."""
+        handle = self._handle
+        handle.seek(0)
+        tab_char = b"\t"
+        while True:
+            start_offset = handle.tell()
+            line = handle.readline()
+            if not line:
+                break  # End of file
+            try:
+                key = line.split(tab_char)[0]
+            except ValueError:
+                if not line.strip():
+                    # Ignore blank lines
+                    continue
+                else:
+                    raise
+            else:
+                yield key.decode(), start_offset, len(line)
+
+    def get_raw(self, offset):
+        """Return the raw record from the file as a bytes string."""
+        handle = self._handle
+        handle.seek(offset)
+        return handle.readline()
+
+
+##########################
+# Now the FASTQ indexers #
+##########################
+
+
+class FastqRandomAccess(SeqFileRandomAccess):
+    """Random access to a FASTQ file (any supported variant).
+
+    With FASTQ the records all start with a "@" line, but so can quality lines.
+    Note this will cope with line-wrapped FASTQ files.
+    """
+
+    def __iter__(self):
+        """Iterate over the sequence records in the file."""
+        handle = self._handle
+        handle.seek(0)
+        id = None
+        start_offset = handle.tell()
+        line = handle.readline()
+        if not line:
+            # Empty file!
+            return
+        if line[0:1] != b"@":
+            raise ValueError("Problem with FASTQ @ line:\n%r" % line)
+        while line:
+            # assert line[0]=="@"
+            # This record seems OK (so far)
+            id = line[1:].rstrip().split(None, 1)[0]
+            # Find the seq line(s)
+            seq_len = 0
+            length = len(line)
+            while line:
+                line = handle.readline()
+                length += len(line)
+                if line.startswith(b"+"):
+                    break
+                seq_len += len(line.strip())
+            if not line:
+                raise ValueError("Premature end of file in seq section")
+            # assert line[0]=="+"
+            # Find the qual line(s)
+            qual_len = 0
+            while line:
+                if seq_len == qual_len:
+                    if seq_len == 0:
+                        # Special case, quality line should be just "\n"
+                        line = handle.readline()
+                        if line.strip():
+                            raise ValueError(
+                                "Expected blank quality line, not %r" % line
+                            )
+                        length += len(line)  # Need to include the blank ling
+                    # Should be end of record...
+                    end_offset = handle.tell()
+                    line = handle.readline()
+                    if line and line[0:1] != b"@":
+                        raise ValueError("Problem with line %r" % line)
+                    break
+                else:
+                    line = handle.readline()
+                    qual_len += len(line.strip())
+                    length += len(line)
+            if seq_len != qual_len:
+                raise ValueError("Problem with quality section")
+            yield id.decode(), start_offset, length
+            start_offset = end_offset
+        # print("EOF")
+
+    def get_raw(self, offset):
+        """Return the raw record from the file as a bytes string."""
+        # TODO - Refactor this and the __init__ method to reduce code duplication?
+        handle = self._handle
+        handle.seek(offset)
+        line = handle.readline()
+        data = line
+        if line[0:1] != b"@":
+            raise ValueError("Problem with FASTQ @ line:\n%r" % line)
+        # Find the seq line(s)
+        seq_len = 0
+        while line:
+            line = handle.readline()
+            data += line
+            if line.startswith(b"+"):
+                break
+            seq_len += len(line.strip())
+        if not line:
+            raise ValueError("Premature end of file in seq section")
+        assert line[0:1] == b"+"
+        # Find the qual line(s)
+        qual_len = 0
+        while line:
+            if seq_len == qual_len:
+                if seq_len == 0:
+                    # Special case, quality line should be just "\n"
+                    line = handle.readline()
+                    if line.strip():
+                        raise ValueError("Expected blank quality line, not %r" % line)
+                    data += line
+                # Should be end of record...
+                line = handle.readline()
+                if line and line[0:1] != b"@":
+                    raise ValueError("Problem with line %r" % line)
+                break
+            else:
+                line = handle.readline()
+                data += line
+                qual_len += len(line.strip())
+        if seq_len != qual_len:
+            raise ValueError("Problem with quality section")
+        return data
+
+
+###############################################################################
+
+_FormatToRandomAccess = {
+    "ace": SequentialSeqFileRandomAccess,
+    "embl": EmblRandomAccess,
+    "fasta": SequentialSeqFileRandomAccess,
+    "fastq": FastqRandomAccess,  # Class handles all three variants
+    "fastq-sanger": FastqRandomAccess,  # alias of the above
+    "fastq-solexa": FastqRandomAccess,
+    "fastq-illumina": FastqRandomAccess,
+    "genbank": GenBankRandomAccess,
+    "gb": GenBankRandomAccess,  # alias of the above
+    "ig": IntelliGeneticsRandomAccess,
+    "imgt": EmblRandomAccess,
+    "phd": SequentialSeqFileRandomAccess,
+    "pir": SequentialSeqFileRandomAccess,
+    "sff": SffRandomAccess,
+    "sff-trim": SffTrimedRandomAccess,
+    "swiss": SwissRandomAccess,
+    "tab": TabRandomAccess,
+    "qual": SequentialSeqFileRandomAccess,
+    "uniprot-xml": UniprotRandomAccess,
+}
diff --git a/code/lib/Bio/SeqIO/_twoBitIO.c b/code/lib/Bio/SeqIO/_twoBitIO.c
new file mode 100644
index 0000000..6bfa373
--- /dev/null
+++ b/code/lib/Bio/SeqIO/_twoBitIO.c
@@ -0,0 +1,480 @@
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+
+
+static const char bases[][4] = {"TTTT",  /* 00 00 00 00 */
+                                "TTTC",  /* 00 00 00 01 */
+                                "TTTA",  /* 00 00 00 10 */
+                                "TTTG",  /* 00 00 00 11 */
+                                "TTCT",  /* 00 00 01 00 */
+                                "TTCC",  /* 00 00 01 01 */
+                                "TTCA",  /* 00 00 01 10 */
+                                "TTCG",  /* 00 00 01 11 */
+                                "TTAT",  /* 00 00 10 00 */
+                                "TTAC",  /* 00 00 10 01 */
+                                "TTAA",  /* 00 00 10 10 */
+                                "TTAG",  /* 00 00 10 11 */
+                                "TTGT",  /* 00 00 11 00 */
+                                "TTGC",  /* 00 00 11 01 */
+                                "TTGA",  /* 00 00 11 10 */
+                                "TTGG",  /* 00 00 11 11 */
+                                "TCTT",  /* 00 01 00 00 */
+                                "TCTC",  /* 00 01 00 01 */
+                                "TCTA",  /* 00 01 00 10 */
+                                "TCTG",  /* 00 01 00 11 */
+                                "TCCT",  /* 00 01 01 00 */
+                                "TCCC",  /* 00 01 01 01 */
+                                "TCCA",  /* 00 01 01 10 */
+                                "TCCG",  /* 00 01 01 11 */
+                                "TCAT",  /* 00 01 10 00 */
+                                "TCAC",  /* 00 01 10 01 */
+                                "TCAA",  /* 00 01 10 10 */
+                                "TCAG",  /* 00 01 10 11 */
+                                "TCGT",  /* 00 01 11 00 */
+                                "TCGC",  /* 00 01 11 01 */
+                                "TCGA",  /* 00 01 11 10 */
+                                "TCGG",  /* 00 01 11 11 */
+                                "TATT",  /* 00 10 00 00 */
+                                "TATC",  /* 00 10 00 01 */
+                                "TATA",  /* 00 10 00 10 */
+                                "TATG",  /* 00 10 00 11 */
+                                "TACT",  /* 00 10 01 00 */
+                                "TACC",  /* 00 10 01 01 */
+                                "TACA",  /* 00 10 01 10 */
+                                "TACG",  /* 00 10 01 11 */
+                                "TAAT",  /* 00 10 10 00 */
+                                "TAAC",  /* 00 10 10 01 */
+                                "TAAA",  /* 00 10 10 10 */
+                                "TAAG",  /* 00 10 10 11 */
+                                "TAGT",  /* 00 10 11 00 */
+                                "TAGC",  /* 00 10 11 01 */
+                                "TAGA",  /* 00 10 11 10 */
+                                "TAGG",  /* 00 10 11 11 */
+                                "TGTT",  /* 00 11 00 00 */
+                                "TGTC",  /* 00 11 00 01 */
+                                "TGTA",  /* 00 11 00 10 */
+                                "TGTG",  /* 00 11 00 11 */
+                                "TGCT",  /* 00 11 01 00 */
+                                "TGCC",  /* 00 11 01 01 */
+                                "TGCA",  /* 00 11 01 10 */
+                                "TGCG",  /* 00 11 01 11 */
+                                "TGAT",  /* 00 11 10 00 */
+                                "TGAC",  /* 00 11 10 01 */
+                                "TGAA",  /* 00 11 10 10 */
+                                "TGAG",  /* 00 11 10 11 */
+                                "TGGT",  /* 00 11 11 00 */
+                                "TGGC",  /* 00 11 11 01 */
+                                "TGGA",  /* 00 11 11 10 */
+                                "TGGG",  /* 00 11 11 11 */
+                                "CTTT",  /* 01 00 00 00 */
+                                "CTTC",  /* 01 00 00 01 */
+                                "CTTA",  /* 01 00 00 10 */
+                                "CTTG",  /* 01 00 00 11 */
+                                "CTCT",  /* 01 00 01 00 */
+                                "CTCC",  /* 01 00 01 01 */
+                                "CTCA",  /* 01 00 01 10 */
+                                "CTCG",  /* 01 00 01 11 */
+                                "CTAT",  /* 01 00 10 00 */
+                                "CTAC",  /* 01 00 10 01 */
+                                "CTAA",  /* 01 00 10 10 */
+                                "CTAG",  /* 01 00 10 11 */
+                                "CTGT",  /* 01 00 11 00 */
+                                "CTGC",  /* 01 00 11 01 */
+                                "CTGA",  /* 01 00 11 10 */
+                                "CTGG",  /* 01 00 11 11 */
+                                "CCTT",  /* 01 01 00 00 */
+                                "CCTC",  /* 01 01 00 01 */
+                                "CCTA",  /* 01 01 00 10 */
+                                "CCTG",  /* 01 01 00 11 */
+                                "CCCT",  /* 01 01 01 00 */
+                                "CCCC",  /* 01 01 01 01 */
+                                "CCCA",  /* 01 01 01 10 */
+                                "CCCG",  /* 01 01 01 11 */
+                                "CCAT",  /* 01 01 10 00 */
+                                "CCAC",  /* 01 01 10 01 */
+                                "CCAA",  /* 01 01 10 10 */
+                                "CCAG",  /* 01 01 10 11 */
+                                "CCGT",  /* 01 01 11 00 */
+                                "CCGC",  /* 01 01 11 01 */
+                                "CCGA",  /* 01 01 11 10 */
+                                "CCGG",  /* 01 01 11 11 */
+                                "CATT",  /* 01 10 00 00 */
+                                "CATC",  /* 01 10 00 01 */
+                                "CATA",  /* 01 10 00 10 */
+                                "CATG",  /* 01 10 00 11 */
+                                "CACT",  /* 01 10 01 00 */
+                                "CACC",  /* 01 10 01 01 */
+                                "CACA",  /* 01 10 01 10 */
+                                "CACG",  /* 01 10 01 11 */
+                                "CAAT",  /* 01 10 10 00 */
+                                "CAAC",  /* 01 10 10 01 */
+                                "CAAA",  /* 01 10 10 10 */
+                                "CAAG",  /* 01 10 10 11 */
+                                "CAGT",  /* 01 10 11 00 */
+                                "CAGC",  /* 01 10 11 01 */
+                                "CAGA",  /* 01 10 11 10 */
+                                "CAGG",  /* 01 10 11 11 */
+                                "CGTT",  /* 01 11 00 00 */
+                                "CGTC",  /* 01 11 00 01 */
+                                "CGTA",  /* 01 11 00 10 */
+                                "CGTG",  /* 01 11 00 11 */
+                                "CGCT",  /* 01 11 01 00 */
+                                "CGCC",  /* 01 11 01 01 */
+                                "CGCA",  /* 01 11 01 10 */
+                                "CGCG",  /* 01 11 01 11 */
+                                "CGAT",  /* 01 11 10 00 */
+                                "CGAC",  /* 01 11 10 01 */
+                                "CGAA",  /* 01 11 10 10 */
+                                "CGAG",  /* 01 11 10 11 */
+                                "CGGT",  /* 01 11 11 00 */
+                                "CGGC",  /* 01 11 11 01 */
+                                "CGGA",  /* 01 11 11 10 */
+                                "CGGG",  /* 01 11 11 11 */
+                                "ATTT",  /* 10 00 00 00 */
+                                "ATTC",  /* 10 00 00 01 */
+                                "ATTA",  /* 10 00 00 10 */
+                                "ATTG",  /* 10 00 00 11 */
+                                "ATCT",  /* 10 00 01 00 */
+                                "ATCC",  /* 10 00 01 01 */
+                                "ATCA",  /* 10 00 01 10 */
+                                "ATCG",  /* 10 00 01 11 */
+                                "ATAT",  /* 10 00 10 00 */
+                                "ATAC",  /* 10 00 10 01 */
+                                "ATAA",  /* 10 00 10 10 */
+                                "ATAG",  /* 10 00 10 11 */
+                                "ATGT",  /* 10 00 11 00 */
+                                "ATGC",  /* 10 00 11 01 */
+                                "ATGA",  /* 10 00 11 10 */
+                                "ATGG",  /* 10 00 11 11 */
+                                "ACTT",  /* 10 01 00 00 */
+                                "ACTC",  /* 10 01 00 01 */
+                                "ACTA",  /* 10 01 00 10 */
+                                "ACTG",  /* 10 01 00 11 */
+                                "ACCT",  /* 10 01 01 00 */
+                                "ACCC",  /* 10 01 01 01 */
+                                "ACCA",  /* 10 01 01 10 */
+                                "ACCG",  /* 10 01 01 11 */
+                                "ACAT",  /* 10 01 10 00 */
+                                "ACAC",  /* 10 01 10 01 */
+                                "ACAA",  /* 10 01 10 10 */
+                                "ACAG",  /* 10 01 10 11 */
+                                "ACGT",  /* 10 01 11 00 */
+                                "ACGC",  /* 10 01 11 01 */
+                                "ACGA",  /* 10 01 11 10 */
+                                "ACGG",  /* 10 01 11 11 */
+                                "AATT",  /* 10 10 00 00 */
+                                "AATC",  /* 10 10 00 01 */
+                                "AATA",  /* 10 10 00 10 */
+                                "AATG",  /* 10 10 00 11 */
+                                "AACT",  /* 10 10 01 00 */
+                                "AACC",  /* 10 10 01 01 */
+                                "AACA",  /* 10 10 01 10 */
+                                "AACG",  /* 10 10 01 11 */
+                                "AAAT",  /* 10 10 10 00 */
+                                "AAAC",  /* 10 10 10 01 */
+                                "AAAA",  /* 10 10 10 10 */
+                                "AAAG",  /* 10 10 10 11 */
+                                "AAGT",  /* 10 10 11 00 */
+                                "AAGC",  /* 10 10 11 01 */
+                                "AAGA",  /* 10 10 11 10 */
+                                "AAGG",  /* 10 10 11 11 */
+                                "AGTT",  /* 10 11 00 00 */
+                                "AGTC",  /* 10 11 00 01 */
+                                "AGTA",  /* 10 11 00 10 */
+                                "AGTG",  /* 10 11 00 11 */
+                                "AGCT",  /* 10 11 01 00 */
+                                "AGCC",  /* 10 11 01 01 */
+                                "AGCA",  /* 10 11 01 10 */
+                                "AGCG",  /* 10 11 01 11 */
+                                "AGAT",  /* 10 11 10 00 */
+                                "AGAC",  /* 10 11 10 01 */
+                                "AGAA",  /* 10 11 10 10 */
+                                "AGAG",  /* 10 11 10 11 */
+                                "AGGT",  /* 10 11 11 00 */
+                                "AGGC",  /* 10 11 11 01 */
+                                "AGGA",  /* 10 11 11 10 */
+                                "AGGG",  /* 10 11 11 11 */
+                                "GTTT",  /* 11 00 00 00 */
+                                "GTTC",  /* 11 00 00 01 */
+                                "GTTA",  /* 11 00 00 10 */
+                                "GTTG",  /* 11 00 00 11 */
+                                "GTCT",  /* 11 00 01 00 */
+                                "GTCC",  /* 11 00 01 01 */
+                                "GTCA",  /* 11 00 01 10 */
+                                "GTCG",  /* 11 00 01 11 */
+                                "GTAT",  /* 11 00 10 00 */
+                                "GTAC",  /* 11 00 10 01 */
+                                "GTAA",  /* 11 00 10 10 */
+                                "GTAG",  /* 11 00 10 11 */
+                                "GTGT",  /* 11 00 11 00 */
+                                "GTGC",  /* 11 00 11 01 */
+                                "GTGA",  /* 11 00 11 10 */
+                                "GTGG",  /* 11 00 11 11 */
+                                "GCTT",  /* 11 01 00 00 */
+                                "GCTC",  /* 11 01 00 01 */
+                                "GCTA",  /* 11 01 00 10 */
+                                "GCTG",  /* 11 01 00 11 */
+                                "GCCT",  /* 11 01 01 00 */
+                                "GCCC",  /* 11 01 01 01 */
+                                "GCCA",  /* 11 01 01 10 */
+                                "GCCG",  /* 11 01 01 11 */
+                                "GCAT",  /* 11 01 10 00 */
+                                "GCAC",  /* 11 01 10 01 */
+                                "GCAA",  /* 11 01 10 10 */
+                                "GCAG",  /* 11 01 10 11 */
+                                "GCGT",  /* 11 01 11 00 */
+                                "GCGC",  /* 11 01 11 01 */
+                                "GCGA",  /* 11 01 11 10 */
+                                "GCGG",  /* 11 01 11 11 */
+                                "GATT",  /* 11 10 00 00 */
+                                "GATC",  /* 11 10 00 01 */
+                                "GATA",  /* 11 10 00 10 */
+                                "GATG",  /* 11 10 00 11 */
+                                "GACT",  /* 11 10 01 00 */
+                                "GACC",  /* 11 10 01 01 */
+                                "GACA",  /* 11 10 01 10 */
+                                "GACG",  /* 11 10 01 11 */
+                                "GAAT",  /* 11 10 10 00 */
+                                "GAAC",  /* 11 10 10 01 */
+                                "GAAA",  /* 11 10 10 10 */
+                                "GAAG",  /* 11 10 10 11 */
+                                "GAGT",  /* 11 10 11 00 */
+                                "GAGC",  /* 11 10 11 01 */
+                                "GAGA",  /* 11 10 11 10 */
+                                "GAGG",  /* 11 10 11 11 */
+                                "GGTT",  /* 11 11 00 00 */
+                                "GGTC",  /* 11 11 00 01 */
+                                "GGTA",  /* 11 11 00 10 */
+                                "GGTG",  /* 11 11 00 11 */
+                                "GGCT",  /* 11 11 01 00 */
+                                "GGCC",  /* 11 11 01 01 */
+                                "GGCA",  /* 11 11 01 10 */
+                                "GGCG",  /* 11 11 01 11 */
+                                "GGAT",  /* 11 11 10 00 */
+                                "GGAC",  /* 11 11 10 01 */
+                                "GGAA",  /* 11 11 10 10 */
+                                "GGAG",  /* 11 11 10 11 */
+                                "GGGT",  /* 11 11 11 00 */
+                                "GGGC",  /* 11 11 11 01 */
+                                "GGGA",  /* 11 11 11 10 */
+                                "GGGG",  /* 11 11 11 11 */
+                               };
+
+static int
+extract(const unsigned char* bytes, uint32_t byteSize, uint32_t start, uint32_t end, char sequence[]) {
+    uint32_t i;
+    const uint32_t size = end - start;
+    const uint32_t byteStart = start / 4;
+    const uint32_t byteEnd = (end + 3) / 4;
+
+    if (byteSize != byteEnd - byteStart) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "unexpected number of bytes %u (expected %u)",
+                     byteSize, byteEnd - byteStart);
+        return -1;
+    }
+
+    start -= byteStart * 4;
+    if (byteStart + 1 == byteEnd) {
+        /* one byte only */
+        memcpy(sequence, &(bases[*bytes][start]), size);
+    }
+    else {
+        end -= byteEnd * 4;
+        /* end is now a negative number equal to the distance to the byte end */
+        memcpy(sequence, &(bases[*bytes][start]), 4 - start);
+        bytes++;
+        sequence += (4 - start);
+        for (i = byteStart+1; i < byteEnd-1; i++, bytes++, sequence += 4)
+            memcpy(sequence, bases[*bytes], 4);
+        memcpy(sequence, bases[*bytes], end + 4);
+        bytes++;
+        bytes -= byteSize;
+    }
+    return 0;
+}
+
+static void
+applyNs(char sequence[], uint32_t start, uint32_t end, Py_buffer *nBlocks)
+{
+    const Py_ssize_t nBlockCount = nBlocks->shape[0];
+    const uint32_t* const nBlockPositions = nBlocks->buf;
+
+    Py_ssize_t i;
+    for (i = 0; i < nBlockCount; i++) {
+        uint32_t nBlockStart = nBlockPositions[2*i];
+        uint32_t nBlockEnd = nBlockPositions[2*i+1];
+        if (nBlockEnd < start) continue;
+        if (end < nBlockStart) break;
+        if (nBlockStart < start) nBlockStart = start;
+        if (end < nBlockEnd) nBlockEnd = end;
+        memset(sequence + nBlockStart - start, 'N', nBlockEnd - nBlockStart);
+    }
+}
+
+static void
+applyMask(char sequence[], uint32_t start, uint32_t end, Py_buffer* maskBlocks)
+{
+    const Py_ssize_t maskBlockCount = maskBlocks->shape[0];
+    const uint32_t* const maskBlockPositions = maskBlocks->buf;
+    const char diff = 'a' - 'A';
+
+    Py_ssize_t i;
+    for (i = 0; i < maskBlockCount; i++) {
+        uint32_t j;
+        uint32_t maskBlockStart = maskBlockPositions[2*i];
+        uint32_t maskBlockEnd = maskBlockPositions[2*i+1];
+        if (maskBlockEnd < start) continue;
+        if (end < maskBlockStart) break;
+        if (maskBlockStart < start) maskBlockStart = start;
+        if (end < maskBlockEnd) maskBlockEnd = end;
+        for (j = maskBlockStart - start; j < maskBlockEnd - start; j++)
+            sequence[j] += diff;
+    }
+}
+
+static int
+blocks_converter(PyObject* object, void* pointer)
+{
+    const int flag = PyBUF_ND | PyBUF_FORMAT;
+    Py_buffer *view = pointer;
+
+    if (object == NULL) goto exit;
+
+    if (PyObject_GetBuffer(object, view, flag) == -1) {
+        PyErr_SetString(PyExc_RuntimeError, "blocks have unexpected format.");
+        return 0;
+    }
+
+    if (view->itemsize != sizeof(uint32_t)
+     || (strcmp(view->format, "I") != 0 && strcmp(view->format, "L") != 0 )) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "blocks have incorrect data type (itemsize %zd, format %s)",
+                     view->itemsize, view->format);
+        goto exit;
+    }
+    if (view->ndim != 2) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "blocks have incorrect rank %d (expected 2)", view->ndim);
+        goto exit;
+    }
+    if (view->shape[1] != 2) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "blocks should have two colums (found %zd)",
+                     view->shape[1]);
+        goto exit;
+    }
+    return Py_CLEANUP_SUPPORTED;
+
+exit:
+    PyBuffer_Release(view);
+    return 0;
+}
+
+static char TwoBit_convert__doc__[] = "convert twoBit data to the DNA sequence, apply blocks of N's (representing unknown sequences) and masked (lower case) blocks, and return the sequence as a bytes object";
+
+static PyObject*
+TwoBit_convert(PyObject* self, PyObject* args, PyObject* keywords)
+{
+    const unsigned char *data;
+    Py_ssize_t start;
+    Py_ssize_t end;
+    Py_ssize_t step;
+    Py_ssize_t size;
+    Py_ssize_t length;
+    Py_buffer nBlocks;
+    Py_buffer maskBlocks;
+    PyObject *object;
+    char *sequence;
+
+    static char* kwlist[] = {"data", "start", "end", "step",
+                             "nBlocks", "maskBlocks", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, keywords, "y#nnnO&O&", kwlist,
+                                     &data, &length, &start, &end, &step,
+                                     &blocks_converter, &nBlocks,
+                                     &blocks_converter, &maskBlocks))
+        return NULL;
+
+    size = (end - start) / step;
+    object = PyBytes_FromStringAndSize(NULL, size);
+    if (!object) goto exit;
+
+    sequence = PyBytes_AS_STRING(object);
+
+    if (step == 1) {
+        if (extract(data, length, start, end, sequence) < 0) {
+            Py_DECREF(object);
+            object = NULL;
+            goto exit;
+        }
+        applyNs(sequence, start, end, &nBlocks);
+        applyMask(sequence, start, end, &maskBlocks);
+    }
+    else {
+        Py_ssize_t current, i;
+        Py_ssize_t full_start, full_end;
+        char* full_sequence;
+        if (start <= end) {
+            full_start = start;
+            full_end = end;
+            current = 0; /* first position in sequence */
+        }
+        else {
+            full_start = end + 1;
+            full_end = start + 1;
+            current = start - end - 1; /* last position in sequence */
+        }
+        full_sequence = PyMem_Malloc((full_end-full_start+1)*sizeof(char));
+        full_sequence[full_end-full_start] = '\0';
+        if (!full_sequence) {
+            Py_DECREF(object);
+            object = NULL;
+            goto exit;
+        }
+        if (extract(data, length, full_start, full_end, full_sequence) < 0) {
+            PyMem_Free(full_sequence);
+            Py_DECREF(object);
+            object = NULL;
+            goto exit;
+        }
+        applyNs(full_sequence, full_start, full_end, &nBlocks);
+        applyMask(full_sequence, full_start, full_end, &maskBlocks);
+        for (i = 0; i < size; current += step, i++)
+            sequence[i] = full_sequence[current];
+        PyMem_Free(full_sequence);
+    }
+
+exit:
+    blocks_converter(NULL, &nBlocks);
+    blocks_converter(NULL, &maskBlocks);
+    return object;
+}
+
+static struct PyMethodDef _twoBitIO_methods[] = {
+    {"convert",
+     (PyCFunction)TwoBit_convert,
+     METH_VARARGS | METH_KEYWORDS,
+     TwoBit_convert__doc__
+    },
+    {NULL,          NULL, 0, NULL} /* sentinel */
+};
+
+
+static struct PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "_twoBitIO",
+    "Parser for DNA sequence data in 2bit format",
+    -1,
+    _twoBitIO_methods,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+
+PyObject *
+PyInit__twoBitIO(void)
+{
+    return PyModule_Create(&moduledef);
+}
diff --git a/code/lib/Bio/SeqIO/_twoBitIO.cp37-win_amd64.pyd b/code/lib/Bio/SeqIO/_twoBitIO.cp37-win_amd64.pyd
new file mode 100644
index 0000000..244dc40
Binary files /dev/null and b/code/lib/Bio/SeqIO/_twoBitIO.cp37-win_amd64.pyd differ
diff --git a/code/lib/Bio/SeqRecord.py b/code/lib/Bio/SeqRecord.py
new file mode 100644
index 0000000..c22b5b5
--- /dev/null
+++ b/code/lib/Bio/SeqRecord.py
@@ -0,0 +1,1372 @@
+# Copyright 2000-2002 Andrew Dalke.  All rights reserved.
+# Copyright 2002-2004 Brad Chapman.  All rights reserved.
+# Copyright 2006-2020 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Represent a Sequence Record, a sequence with annotation."""
+# NEEDS TO BE SYNCH WITH THE REST OF BIOPYTHON AND BIOPERL
+# In particular, the SeqRecord and BioSQL.BioSeq.DBSeqRecord classes
+# need to be in sync (this is the BioSQL "Database SeqRecord").
+from io import StringIO
+
+from Bio import StreamModeError
+from Bio.Seq import UndefinedSequenceError
+
+
+_NO_SEQRECORD_COMPARISON = "SeqRecord comparison is deliberately not implemented. Explicitly compare the attributes of interest."
+
+
+class _RestrictedDict(dict):
+    """Dict which only allows sequences of given length as values (PRIVATE).
+
+    This simple subclass of the Python dictionary is used in the SeqRecord
+    object for holding per-letter-annotations.  This class is intended to
+    prevent simple errors by only allowing python sequences (e.g. lists,
+    strings and tuples) to be stored, and only if their length matches that
+    expected (the length of the SeqRecord's seq object).  It cannot however
+    prevent the entries being edited in situ (for example appending entries
+    to a list).
+
+    >>> x = _RestrictedDict(5)
+    >>> x["test"] = "hello"
+    >>> x
+    {'test': 'hello'}
+
+    Adding entries which don't have the expected length are blocked:
+
+    >>> x["test"] = "hello world"
+    Traceback (most recent call last):
+    ...
+    TypeError: We only allow python sequences (lists, tuples or strings) of length 5.
+
+    The expected length is stored as a private attribute,
+
+    >>> x._length
+    5
+
+    In order that the SeqRecord (and other objects using this class) can be
+    pickled, for example for use in the multiprocessing library, we need to
+    be able to pickle the restricted dictionary objects.
+
+    Using the default protocol, which is 3 on Python 3,
+
+    >>> import pickle
+    >>> y = pickle.loads(pickle.dumps(x))
+    >>> y
+    {'test': 'hello'}
+    >>> y._length
+    5
+
+    Using the highest protocol, which is 4 on Python 3,
+
+    >>> import pickle
+    >>> z = pickle.loads(pickle.dumps(x, pickle.HIGHEST_PROTOCOL))
+    >>> z
+    {'test': 'hello'}
+    >>> z._length
+    5
+    """
+
+    def __init__(self, length):
+        """Create an EMPTY restricted dictionary."""
+        dict.__init__(self)
+        self._length = int(length)
+
+    def __setitem__(self, key, value):
+        # The check hasattr(self, "_length") is to cope with pickle protocol 2
+        # I couldn't seem to avoid this with __getstate__ and __setstate__
+        if (
+            not hasattr(value, "__len__")
+            or not hasattr(value, "__getitem__")
+            or (hasattr(self, "_length") and len(value) != self._length)
+        ):
+            raise TypeError(
+                "We only allow python sequences (lists, tuples or strings) "
+                f"of length {self._length}."
+            )
+        dict.__setitem__(self, key, value)
+
+    def update(self, new_dict):
+        # Force this to go via our strict __setitem__ method
+        for (key, value) in new_dict.items():
+            self[key] = value
+
+
+class SeqRecord:
+    """A SeqRecord object holds a sequence and information about it.
+
+    Main attributes:
+     - id          - Identifier such as a locus tag (string)
+     - seq         - The sequence itself (Seq object or similar)
+
+    Additional attributes:
+     - name        - Sequence name, e.g. gene name (string)
+     - description - Additional text (string)
+     - dbxrefs     - List of database cross references (list of strings)
+     - features    - Any (sub)features defined (list of SeqFeature objects)
+     - annotations - Further information about the whole sequence (dictionary).
+       Most entries are strings, or lists of strings.
+     - letter_annotations - Per letter/symbol annotation (restricted
+       dictionary). This holds Python sequences (lists, strings
+       or tuples) whose length matches that of the sequence.
+       A typical use would be to hold a list of integers
+       representing sequencing quality scores, or a string
+       representing the secondary structure.
+
+    You will typically use Bio.SeqIO to read in sequences from files as
+    SeqRecord objects.  However, you may want to create your own SeqRecord
+    objects directly (see the __init__ method for further details):
+
+    >>> from Bio.Seq import Seq
+    >>> from Bio.SeqRecord import SeqRecord
+    >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF"),
+    ...                    id="YP_025292.1", name="HokC",
+    ...                    description="toxic membrane protein")
+    >>> print(record)
+    ID: YP_025292.1
+    Name: HokC
+    Description: toxic membrane protein
+    Number of features: 0
+    Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF')
+
+    If you want to save SeqRecord objects to a sequence file, use Bio.SeqIO
+    for this.  For the special case where you want the SeqRecord turned into
+    a string in a particular file format there is a format method which uses
+    Bio.SeqIO internally:
+
+    >>> print(record.format("fasta"))
+    >YP_025292.1 toxic membrane protein
+    MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
+    <BLANKLINE>
+
+    You can also do things like slicing a SeqRecord, checking its length, etc
+
+    >>> len(record)
+    44
+    >>> edited = record[:10] + record[11:]
+    >>> print(edited.seq)
+    MKQHKAMIVAIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
+    >>> print(record.seq)
+    MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
+
+    """
+
+    def __init__(
+        self,
+        seq,
+        id="<unknown id>",
+        name="<unknown name>",
+        description="<unknown description>",
+        dbxrefs=None,
+        features=None,
+        annotations=None,
+        letter_annotations=None,
+    ):
+        """Create a SeqRecord.
+
+        Arguments:
+         - seq         - Sequence, required (Seq or MutableSeq)
+         - id          - Sequence identifier, recommended (string)
+         - name        - Sequence name, optional (string)
+         - description - Sequence description, optional (string)
+         - dbxrefs     - Database cross references, optional (list of strings)
+         - features    - Any (sub)features, optional (list of SeqFeature objects)
+         - annotations - Dictionary of annotations for the whole sequence
+         - letter_annotations - Dictionary of per-letter-annotations, values
+           should be strings, list or tuples of the same length as the full
+           sequence.
+
+        You will typically use Bio.SeqIO to read in sequences from files as
+        SeqRecord objects.  However, you may want to create your own SeqRecord
+        objects directly.
+
+        Note that while an id is optional, we strongly recommend you supply a
+        unique id string for each record.  This is especially important
+        if you wish to write your sequences to a file.
+
+        You can create a 'blank' SeqRecord object, and then populate the
+        attributes later.
+        """
+        if id is not None and not isinstance(id, str):
+            # Lots of existing code uses id=None... this may be a bad idea.
+            raise TypeError("id argument should be a string")
+        if not isinstance(name, str):
+            raise TypeError("name argument should be a string")
+        if not isinstance(description, str):
+            raise TypeError("description argument should be a string")
+        self._seq = seq
+        self.id = id
+        self.name = name
+        self.description = description
+
+        # database cross references (for the whole sequence)
+        if dbxrefs is None:
+            dbxrefs = []
+        elif not isinstance(dbxrefs, list):
+            raise TypeError("dbxrefs argument should be a list (of strings)")
+        self.dbxrefs = dbxrefs
+
+        # annotations about the whole sequence
+        if annotations is None:
+            annotations = {}
+        elif not isinstance(annotations, dict):
+            raise TypeError("annotations argument should be a dict")
+        self.annotations = annotations
+
+        if letter_annotations is None:
+            # annotations about each letter in the sequence
+            if seq is None:
+                # Should we allow this and use a normal unrestricted dict?
+                self._per_letter_annotations = _RestrictedDict(length=0)
+            else:
+                try:
+                    self._per_letter_annotations = _RestrictedDict(length=len(seq))
+                except TypeError:
+                    raise TypeError(
+                        "seq argument should be a Seq object or similar"
+                    ) from None
+        else:
+            # This will be handled via the property set function, which will
+            # turn this into a _RestrictedDict and thus ensure all the values
+            # in the dict are the right length
+            self.letter_annotations = letter_annotations
+
+        # annotations about parts of the sequence
+        if features is None:
+            features = []
+        elif not isinstance(features, list):
+            raise TypeError(
+                "features argument should be a list (of SeqFeature objects)"
+            )
+        self.features = features
+
+    # TODO - Just make this a read only property?
+    def _set_per_letter_annotations(self, value):
+        if not isinstance(value, dict):
+            raise TypeError(
+                "The per-letter-annotations should be a (restricted) dictionary."
+            )
+        # Turn this into a restricted-dictionary (and check the entries)
+        try:
+            self._per_letter_annotations = _RestrictedDict(length=len(self.seq))
+        except AttributeError:
+            # e.g. seq is None
+            self._per_letter_annotations = _RestrictedDict(length=0)
+        self._per_letter_annotations.update(value)
+
+    letter_annotations = property(
+        fget=lambda self: self._per_letter_annotations,
+        fset=_set_per_letter_annotations,
+        doc="""Dictionary of per-letter-annotation for the sequence.
+
+        For example, this can hold quality scores used in FASTQ or QUAL files.
+        Consider this example using Bio.SeqIO to read in an example Solexa
+        variant FASTQ file as a SeqRecord:
+
+        >>> from Bio import SeqIO
+        >>> record = SeqIO.read("Quality/solexa_faked.fastq", "fastq-solexa")
+        >>> print("%s %s" % (record.id, record.seq))
+        slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
+        >>> print(list(record.letter_annotations))
+        ['solexa_quality']
+        >>> print(record.letter_annotations["solexa_quality"])
+        [40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5]
+
+        The letter_annotations get sliced automatically if you slice the
+        parent SeqRecord, for example taking the last ten bases:
+
+        >>> sub_record = record[-10:]
+        >>> print("%s %s" % (sub_record.id, sub_record.seq))
+        slxa_0001_1_0001_01 ACGTNNNNNN
+        >>> print(sub_record.letter_annotations["solexa_quality"])
+        [4, 3, 2, 1, 0, -1, -2, -3, -4, -5]
+
+        Any python sequence (i.e. list, tuple or string) can be recorded in
+        the SeqRecord's letter_annotations dictionary as long as the length
+        matches that of the SeqRecord's sequence.  e.g.
+
+        >>> len(sub_record.letter_annotations)
+        1
+        >>> sub_record.letter_annotations["dummy"] = "abcdefghij"
+        >>> len(sub_record.letter_annotations)
+        2
+
+        You can delete entries from the letter_annotations dictionary as usual:
+
+        >>> del sub_record.letter_annotations["solexa_quality"]
+        >>> sub_record.letter_annotations
+        {'dummy': 'abcdefghij'}
+
+        You can completely clear the dictionary easily as follows:
+
+        >>> sub_record.letter_annotations = {}
+        >>> sub_record.letter_annotations
+        {}
+
+        Note that if replacing the record's sequence with a sequence of a
+        different length you must first clear the letter_annotations dict.
+        """,
+    )
+
+    def _set_seq(self, value):
+        # TODO - Add a deprecation warning that the seq should be write only?
+        if self._per_letter_annotations:
+            if len(self) != len(value):
+                # TODO - Make this a warning? Silently empty the dictionary?
+                raise ValueError("You must empty the letter annotations first!")
+            else:
+                # Leave the existing per letter annotations unchanged:
+                self._seq = value
+        else:
+            self._seq = value
+            # Reset the (empty) letter annotations dict with new length:
+            try:
+                self._per_letter_annotations = _RestrictedDict(length=len(self.seq))
+            except AttributeError:
+                # e.g. seq is None
+                self._per_letter_annotations = _RestrictedDict(length=0)
+
+    seq = property(
+        fget=lambda self: self._seq,
+        fset=_set_seq,
+        doc="The sequence itself, as a Seq or MutableSeq object.",
+    )
+
+    def __getitem__(self, index):
+        """Return a sub-sequence or an individual letter.
+
+        Slicing, e.g. my_record[5:10], returns a new SeqRecord for
+        that sub-sequence with some annotation preserved as follows:
+
+        * The name, id and description are kept as-is.
+        * Any per-letter-annotations are sliced to match the requested
+          sub-sequence.
+        * Unless a stride is used, all those features which fall fully
+          within the subsequence are included (with their locations
+          adjusted accordingly). If you want to preserve any truncated
+          features (e.g. GenBank/EMBL source features), you must
+          explicitly add them to the new SeqRecord yourself.
+        * With the exception of any molecule type, the annotations
+          dictionary and the dbxrefs list are not used for the new
+          SeqRecord, as in general they may not apply to the
+          subsequence. If you want to preserve them, you must explicitly
+          copy them to the new SeqRecord yourself.
+
+        Using an integer index, e.g. my_record[5] is shorthand for
+        extracting that letter from the sequence, my_record.seq[5].
+
+        For example, consider this short protein and its secondary
+        structure as encoded by the PDB (e.g. H for alpha helices),
+        plus a simple feature for its histidine self phosphorylation
+        site:
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
+        >>> rec = SeqRecord(Seq("MAAGVKQLADDRTLLMAGVSHDLRTPLTRIRLAT"
+        ...                     "EMMSEQDGYLAESINKDIEECNAIIEQFIDYLR"),
+        ...                 id="1JOY", name="EnvZ",
+        ...                 description="Homodimeric domain of EnvZ from E. coli")
+        >>> rec.letter_annotations["secondary_structure"] = "  S  SSSSSSHHHHHTTTHHHHHHHHHHHHHHHHHHHHHHTHHHHHHHHHHHHHHHHHHHHHTT  "
+        >>> rec.features.append(SeqFeature(FeatureLocation(20, 21),
+        ...                     type = "Site"))
+
+        Now let's have a quick look at the full record,
+
+        >>> print(rec)
+        ID: 1JOY
+        Name: EnvZ
+        Description: Homodimeric domain of EnvZ from E. coli
+        Number of features: 1
+        Per letter annotation for: secondary_structure
+        Seq('MAAGVKQLADDRTLLMAGVSHDLRTPLTRIRLATEMMSEQDGYLAESINKDIEE...YLR')
+        >>> rec.letter_annotations["secondary_structure"]
+        '  S  SSSSSSHHHHHTTTHHHHHHHHHHHHHHHHHHHHHHTHHHHHHHHHHHHHHHHHHHHHTT  '
+        >>> print(rec.features[0].location)
+        [20:21]
+
+        Now let's take a sub sequence, here chosen as the first (fractured)
+        alpha helix which includes the histidine phosphorylation site:
+
+        >>> sub = rec[11:41]
+        >>> print(sub)
+        ID: 1JOY
+        Name: EnvZ
+        Description: Homodimeric domain of EnvZ from E. coli
+        Number of features: 1
+        Per letter annotation for: secondary_structure
+        Seq('RTLLMAGVSHDLRTPLTRIRLATEMMSEQD')
+        >>> sub.letter_annotations["secondary_structure"]
+        'HHHHHTTTHHHHHHHHHHHHHHHHHHHHHH'
+        >>> print(sub.features[0].location)
+        [9:10]
+
+        You can also of course omit the start or end values, for
+        example to get the first ten letters only:
+
+        >>> print(rec[:10])
+        ID: 1JOY
+        Name: EnvZ
+        Description: Homodimeric domain of EnvZ from E. coli
+        Number of features: 0
+        Per letter annotation for: secondary_structure
+        Seq('MAAGVKQLAD')
+
+        Or for the last ten letters:
+
+        >>> print(rec[-10:])
+        ID: 1JOY
+        Name: EnvZ
+        Description: Homodimeric domain of EnvZ from E. coli
+        Number of features: 0
+        Per letter annotation for: secondary_structure
+        Seq('IIEQFIDYLR')
+
+        If you omit both, then you get a copy of the original record (although
+        lacking the annotations and dbxrefs):
+
+        >>> print(rec[:])
+        ID: 1JOY
+        Name: EnvZ
+        Description: Homodimeric domain of EnvZ from E. coli
+        Number of features: 1
+        Per letter annotation for: secondary_structure
+        Seq('MAAGVKQLADDRTLLMAGVSHDLRTPLTRIRLATEMMSEQDGYLAESINKDIEE...YLR')
+
+        Finally, indexing with a simple integer is shorthand for pulling out
+        that letter from the sequence directly:
+
+        >>> rec[5]
+        'K'
+        >>> rec.seq[5]
+        'K'
+        """
+        if isinstance(index, int):
+            # NOTE - The sequence level annotation like the id, name, etc
+            # do not really apply to a single character.  However, should
+            # we try and expose any per-letter-annotation here?  If so how?
+            return self.seq[index]
+        elif isinstance(index, slice):
+            if self.seq is None:
+                raise ValueError("If the sequence is None, we cannot slice it.")
+            parent_length = len(self)
+            try:
+                from BioSQL.BioSeq import DBSeqRecord
+
+                biosql_available = True
+            except ImportError:
+                biosql_available = False
+
+            if biosql_available and isinstance(self, DBSeqRecord):
+                answer = SeqRecord(
+                    self.seq[index],
+                    id=self.id,
+                    name=self.name,
+                    description=self.description,
+                )
+            else:
+                answer = self.__class__(
+                    self.seq[index],
+                    id=self.id,
+                    name=self.name,
+                    description=self.description,
+                )
+            # TODO - The description may no longer apply.
+            # It would be safer to change it to something
+            # generic like "edited" or the default value.
+
+            # Don't copy the annotation dict and dbxefs list,
+            # they may not apply to a subsequence.
+            # answer.annotations = dict(self.annotations.items())
+            # answer.dbxrefs = self.dbxrefs[:]
+            # TODO - Review this in light of adding SeqRecord objects?
+
+            if "molecule_type" in self.annotations:
+                # This will still apply, and we need it for GenBank/EMBL etc output
+                answer.annotations["molecule_type"] = self.annotations["molecule_type"]
+
+            # TODO - Cope with strides by generating ambiguous locations?
+            start, stop, step = index.indices(parent_length)
+            if step == 1:
+                # Select relevant features, add them with shifted locations
+                # assert str(self.seq)[index] == str(self.seq)[start:stop]
+                for f in self.features:
+                    if f.ref or f.ref_db:
+                        # TODO - Implement this (with lots of tests)?
+                        import warnings
+
+                        warnings.warn(
+                            "When slicing SeqRecord objects, any "
+                            "SeqFeature referencing other sequences (e.g. "
+                            "from segmented GenBank records) are ignored."
+                        )
+                        continue
+                    if (
+                        start <= f.location.nofuzzy_start
+                        and f.location.nofuzzy_end <= stop
+                    ):
+                        answer.features.append(f._shift(-start))
+
+            # Slice all the values to match the sliced sequence
+            # (this should also work with strides, even negative strides):
+            for key, value in self.letter_annotations.items():
+                answer._per_letter_annotations[key] = value[index]
+
+            return answer
+        raise ValueError("Invalid index")
+
+    def __iter__(self):
+        """Iterate over the letters in the sequence.
+
+        For example, using Bio.SeqIO to read in a protein FASTA file:
+
+        >>> from Bio import SeqIO
+        >>> record = SeqIO.read("Fasta/loveliesbleeding.pro", "fasta")
+        >>> for amino in record:
+        ...     print(amino)
+        ...     if amino == "L": break
+        X
+        A
+        G
+        L
+        >>> print(record.seq[3])
+        L
+
+        This is just a shortcut for iterating over the sequence directly:
+
+        >>> for amino in record.seq:
+        ...     print(amino)
+        ...     if amino == "L": break
+        X
+        A
+        G
+        L
+        >>> print(record.seq[3])
+        L
+
+        Note that this does not facilitate iteration together with any
+        per-letter-annotation.  However, you can achieve that using the
+        python zip function on the record (or its sequence) and the relevant
+        per-letter-annotation:
+
+        >>> from Bio import SeqIO
+        >>> rec = SeqIO.read("Quality/solexa_faked.fastq", "fastq-solexa")
+        >>> print("%s %s" % (rec.id, rec.seq))
+        slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
+        >>> print(list(rec.letter_annotations))
+        ['solexa_quality']
+        >>> for nuc, qual in zip(rec, rec.letter_annotations["solexa_quality"]):
+        ...     if qual > 35:
+        ...         print("%s %i" % (nuc, qual))
+        A 40
+        C 39
+        G 38
+        T 37
+        A 36
+
+        You may agree that using zip(rec.seq, ...) is more explicit than using
+        zip(rec, ...) as shown above.
+        """
+        return iter(self.seq)
+
+    def __contains__(self, char):
+        """Implement the 'in' keyword, searches the sequence.
+
+        e.g.
+
+        >>> from Bio import SeqIO
+        >>> record = SeqIO.read("Fasta/sweetpea.nu", "fasta")
+        >>> "GAATTC" in record
+        False
+        >>> "AAA" in record
+        True
+
+        This essentially acts as a proxy for using "in" on the sequence:
+
+        >>> "GAATTC" in record.seq
+        False
+        >>> "AAA" in record.seq
+        True
+
+        Note that you can also use Seq objects as the query,
+
+        >>> from Bio.Seq import Seq
+        >>> Seq("AAA") in record
+        True
+
+        See also the Seq object's __contains__ method.
+        """
+        return char in self.seq
+
+    def __str__(self):
+        """Return a human readable summary of the record and its annotation (string).
+
+        The python built in function str works by calling the object's ___str__
+        method.  e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF"),
+        ...                    id="YP_025292.1", name="HokC",
+        ...                    description="toxic membrane protein, small")
+        >>> print(str(record))
+        ID: YP_025292.1
+        Name: HokC
+        Description: toxic membrane protein, small
+        Number of features: 0
+        Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF')
+
+        In this example you don't actually need to call str explicity, as the
+        print command does this automatically:
+
+        >>> print(record)
+        ID: YP_025292.1
+        Name: HokC
+        Description: toxic membrane protein, small
+        Number of features: 0
+        Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF')
+
+        Note that long sequences are shown truncated.
+        """
+        lines = []
+        if self.id:
+            lines.append(f"ID: {self.id}")
+        if self.name:
+            lines.append(f"Name: {self.name}")
+        if self.description:
+            lines.append(f"Description: {self.description}")
+        if self.dbxrefs:
+            lines.append("Database cross-references: " + ", ".join(self.dbxrefs))
+        lines.append(f"Number of features: {len(self.features)}")
+        for a in self.annotations:
+            lines.append(f"/{a}={str(self.annotations[a])}")
+        if self.letter_annotations:
+            lines.append(
+                "Per letter annotation for: " + ", ".join(self.letter_annotations)
+            )
+        try:
+            bytes(self.seq)
+        except UndefinedSequenceError:
+            lines.append(f"Undefined sequence of length {len(self.seq)}")
+        else:
+            # Don't want to include the entire sequence
+            seq = repr(self.seq)
+            lines.append(seq)
+        return "\n".join(lines)
+
+    def __repr__(self):
+        """Return a concise summary of the record for debugging (string).
+
+        The python built in function repr works by calling the object's ___repr__
+        method.  e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> rec = SeqRecord(Seq("MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKAT"
+        ...                     "GEMKEQTEWHRVVLFGKLAEVASEYLRKGSQVYIEGQLRTRKWTDQ"
+        ...                     "SGQDRYTTEVVVNVGGTMQMLGGRQGGGAPAGGNIGGGQPQGGWGQ"
+        ...                     "PQQPQGGNQFSGGAQSRPQQSAPAAPSNEPPMDFDDDIPF"),
+        ...                 id="NP_418483.1", name="b4059",
+        ...                 description="ssDNA-binding protein",
+        ...                 dbxrefs=["ASAP:13298", "GI:16131885", "GeneID:948570"])
+        >>> print(repr(rec))
+        SeqRecord(seq=Seq('MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKATGEMKEQTE...IPF'), id='NP_418483.1', name='b4059', description='ssDNA-binding protein', dbxrefs=['ASAP:13298', 'GI:16131885', 'GeneID:948570'])
+
+        At the python prompt you can also use this shorthand:
+
+        >>> rec
+        SeqRecord(seq=Seq('MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKATGEMKEQTE...IPF'), id='NP_418483.1', name='b4059', description='ssDNA-binding protein', dbxrefs=['ASAP:13298', 'GI:16131885', 'GeneID:948570'])
+
+        Note that long sequences are shown truncated. Also note that any
+        annotations, letter_annotations and features are not shown (as they
+        would lead to a very long string).
+        """
+        return (
+            f"{self.__class__.__name__}(seq={self.seq!r}, id={self.id!r},"
+            f" name={self.name!r}, description={self.description!r},"
+            f" dbxrefs={self.dbxrefs!r})"
+        )
+
+    def format(self, format):
+        r"""Return the record as a string in the specified file format.
+
+        The format should be a lower case string supported as an output
+        format by Bio.SeqIO, which is used to turn the SeqRecord into a
+        string.  e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF"),
+        ...                    id="YP_025292.1", name="HokC",
+        ...                    description="toxic membrane protein")
+        >>> record.format("fasta")
+        '>YP_025292.1 toxic membrane protein\nMKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF\n'
+        >>> print(record.format("fasta"))
+        >YP_025292.1 toxic membrane protein
+        MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
+        <BLANKLINE>
+
+        The Python print function automatically appends a new line, meaning
+        in this example a blank line is shown.  If you look at the string
+        representation you can see there is a trailing new line (shown as
+        slash n) which is important when writing to a file or if
+        concatenating multiple sequence strings together.
+
+        Note that this method will NOT work on every possible file format
+        supported by Bio.SeqIO (e.g. some are for multiple sequences only,
+        and binary formats are not supported).
+        """
+        # See also the __format__ method
+        # See also the Bio.Align.Generic.Alignment class and its format()
+        return self.__format__(format)
+
+    def __format__(self, format_spec):
+        r"""Return the record as a string in the specified file format.
+
+        This method supports the Python format() function and f-strings.
+        The format_spec should be a lower case string supported by
+        Bio.SeqIO as a text output file format. Requesting a binary file
+        format raises a ValueError. e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF"),
+        ...                    id="YP_025292.1", name="HokC",
+        ...                    description="toxic membrane protein")
+        ...
+        >>> format(record, "fasta")
+        '>YP_025292.1 toxic membrane protein\nMKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF\n'
+        >>> print(f"Here is {record.id} in FASTA format:\n{record:fasta}")
+        Here is YP_025292.1 in FASTA format:
+        >YP_025292.1 toxic membrane protein
+        MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
+        <BLANKLINE>
+
+        See also the SeqRecord's format() method.
+        """
+        if not format_spec:
+            # Follow python convention and default to using __str__
+            return str(self)
+        from Bio import SeqIO
+
+        # Easy case, can call string-building function directly
+        if format_spec in SeqIO._FormatToString:
+            return SeqIO._FormatToString[format_spec](self)
+
+        # Harder case, make a temp handle instead
+        handle = StringIO()
+        try:
+            SeqIO.write(self, handle, format_spec)
+        except StreamModeError:
+            raise ValueError(
+                "Binary format %s cannot be used with SeqRecord format method"
+                % format_spec
+            ) from None
+        return handle.getvalue()
+
+    def __len__(self):
+        """Return the length of the sequence.
+
+        For example, using Bio.SeqIO to read in a FASTA nucleotide file:
+
+        >>> from Bio import SeqIO
+        >>> record = SeqIO.read("Fasta/sweetpea.nu", "fasta")
+        >>> len(record)
+        309
+        >>> len(record.seq)
+        309
+        """
+        return len(self.seq)
+
+    def __lt__(self, other):
+        """Define the less-than operand (not implemented)."""
+        raise NotImplementedError(_NO_SEQRECORD_COMPARISON)
+
+    def __le___(self, other):
+        """Define the less-than-or-equal-to operand (not implemented)."""
+        raise NotImplementedError(_NO_SEQRECORD_COMPARISON)
+
+    def __eq__(self, other):
+        """Define the equal-to operand (not implemented)."""
+        raise NotImplementedError(_NO_SEQRECORD_COMPARISON)
+
+    def __ne__(self, other):
+        """Define the not-equal-to operand (not implemented)."""
+        raise NotImplementedError(_NO_SEQRECORD_COMPARISON)
+
+    def __gt__(self, other):
+        """Define the greater-than operand (not implemented)."""
+        raise NotImplementedError(_NO_SEQRECORD_COMPARISON)
+
+    def __ge__(self, other):
+        """Define the greater-than-or-equal-to operand (not implemented)."""
+        raise NotImplementedError(_NO_SEQRECORD_COMPARISON)
+
+    def __bool__(self):
+        """Boolean value of an instance of this class (True).
+
+        This behaviour is for backwards compatibility, since until the
+        __len__ method was added, a SeqRecord always evaluated as True.
+
+        Note that in comparison, a Seq object will evaluate to False if it
+        has a zero length sequence.
+
+        WARNING: The SeqRecord may in future evaluate to False when its
+        sequence is of zero length (in order to better match the Seq
+        object behaviour)!
+        """
+        return True
+
+    def __add__(self, other):
+        """Add another sequence or string to this sequence.
+
+        The other sequence can be a SeqRecord object, a Seq object (or
+        similar, e.g. a MutableSeq) or a plain Python string. If you add
+        a plain string or a Seq (like) object, the new SeqRecord will simply
+        have this appended to the existing data. However, any per letter
+        annotation will be lost:
+
+        >>> from Bio import SeqIO
+        >>> record = SeqIO.read("Quality/solexa_faked.fastq", "fastq-solexa")
+        >>> print("%s %s" % (record.id, record.seq))
+        slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
+        >>> print(list(record.letter_annotations))
+        ['solexa_quality']
+
+        >>> new = record + "ACT"
+        >>> print("%s %s" % (new.id, new.seq))
+        slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNNACT
+        >>> print(list(new.letter_annotations))
+        []
+
+        The new record will attempt to combine the annotation, but for any
+        ambiguities (e.g. different names) it defaults to omitting that
+        annotation.
+
+        >>> from Bio import SeqIO
+        >>> with open("GenBank/pBAD30.gb") as handle:
+        ...     plasmid = SeqIO.read(handle, "gb")
+        >>> print("%s %i" % (plasmid.id, len(plasmid)))
+        pBAD30 4923
+
+        Now let's cut the plasmid into two pieces, and join them back up the
+        other way round (i.e. shift the starting point on this plasmid, have
+        a look at the annotated features in the original file to see why this
+        particular split point might make sense):
+
+        >>> left = plasmid[:3765]
+        >>> right = plasmid[3765:]
+        >>> new = right + left
+        >>> print("%s %i" % (new.id, len(new)))
+        pBAD30 4923
+        >>> str(new.seq) == str(right.seq + left.seq)
+        True
+        >>> len(new.features) == len(left.features) + len(right.features)
+        True
+
+        When we add the left and right SeqRecord objects, their annotation
+        is all consistent, so it is all conserved in the new SeqRecord:
+
+        >>> new.id == left.id == right.id == plasmid.id
+        True
+        >>> new.name == left.name == right.name == plasmid.name
+        True
+        >>> new.description == plasmid.description
+        True
+        >>> new.annotations == left.annotations == right.annotations
+        True
+        >>> new.letter_annotations == plasmid.letter_annotations
+        True
+        >>> new.dbxrefs == left.dbxrefs == right.dbxrefs
+        True
+
+        However, we should point out that when we sliced the SeqRecord,
+        any annotations dictionary or dbxrefs list entries were lost.
+        You can explicitly copy them like this:
+
+        >>> new.annotations = plasmid.annotations.copy()
+        >>> new.dbxrefs = plasmid.dbxrefs[:]
+        """
+        if not isinstance(other, SeqRecord):
+            # Assume it is a string or a Seq.
+            # Note can't transfer any per-letter-annotations
+            return SeqRecord(
+                self.seq + other,
+                id=self.id,
+                name=self.name,
+                description=self.description,
+                features=self.features[:],
+                annotations=self.annotations.copy(),
+                dbxrefs=self.dbxrefs[:],
+            )
+        # Adding two SeqRecord objects... must merge annotation.
+        answer = SeqRecord(
+            self.seq + other.seq, features=self.features[:], dbxrefs=self.dbxrefs[:]
+        )
+        # Will take all the features and all the db cross refs,
+        length = len(self)
+        for f in other.features:
+            answer.features.append(f._shift(length))
+        del length
+        for ref in other.dbxrefs:
+            if ref not in answer.dbxrefs:
+                answer.dbxrefs.append(ref)
+        # Take common id/name/description/annotation
+        if self.id == other.id:
+            answer.id = self.id
+        if self.name == other.name:
+            answer.name = self.name
+        if self.description == other.description:
+            answer.description = self.description
+        for k, v in self.annotations.items():
+            if k in other.annotations and other.annotations[k] == v:
+                answer.annotations[k] = v
+        # Can append matching per-letter-annotation
+        for k, v in self.letter_annotations.items():
+            if k in other.letter_annotations:
+                answer.letter_annotations[k] = v + other.letter_annotations[k]
+        return answer
+
+    def __radd__(self, other):
+        """Add another sequence or string to this sequence (from the left).
+
+        This method handles adding a Seq object (or similar, e.g. MutableSeq)
+        or a plain Python string (on the left) to a SeqRecord (on the right).
+        See the __add__ method for more details, but for example:
+
+        >>> from Bio import SeqIO
+        >>> record = SeqIO.read("Quality/solexa_faked.fastq", "fastq-solexa")
+        >>> print("%s %s" % (record.id, record.seq))
+        slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
+        >>> print(list(record.letter_annotations))
+        ['solexa_quality']
+
+        >>> new = "ACT" + record
+        >>> print("%s %s" % (new.id, new.seq))
+        slxa_0001_1_0001_01 ACTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
+        >>> print(list(new.letter_annotations))
+        []
+        """
+        if isinstance(other, SeqRecord):
+            raise RuntimeError(
+                "This should have happened via the __add__ of "
+                "the other SeqRecord being added!"
+            )
+        # Assume it is a string or a Seq.
+        # Note can't transfer any per-letter-annotations
+        offset = len(other)
+        return SeqRecord(
+            other + self.seq,
+            id=self.id,
+            name=self.name,
+            description=self.description,
+            features=[f._shift(offset) for f in self.features],
+            annotations=self.annotations.copy(),
+            dbxrefs=self.dbxrefs[:],
+        )
+
+    def upper(self):
+        """Return a copy of the record with an upper case sequence.
+
+        All the annotation is preserved unchanged. e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> record = SeqRecord(Seq("acgtACGT"), id="Test",
+        ...                    description = "Made up for this example")
+        >>> record.letter_annotations["phred_quality"] = [1, 2, 3, 4, 5, 6, 7, 8]
+        >>> print(record.upper().format("fastq"))
+        @Test Made up for this example
+        ACGTACGT
+        +
+        "#$%&'()
+        <BLANKLINE>
+
+        Naturally, there is a matching lower method:
+
+        >>> print(record.lower().format("fastq"))
+        @Test Made up for this example
+        acgtacgt
+        +
+        "#$%&'()
+        <BLANKLINE>
+        """
+        return SeqRecord(
+            self.seq.upper(),
+            id=self.id,
+            name=self.name,
+            description=self.description,
+            dbxrefs=self.dbxrefs[:],
+            features=self.features[:],
+            annotations=self.annotations.copy(),
+            letter_annotations=self.letter_annotations.copy(),
+        )
+
+    def lower(self):
+        """Return a copy of the record with a lower case sequence.
+
+        All the annotation is preserved unchanged. e.g.
+
+        >>> from Bio import SeqIO
+        >>> record = SeqIO.read("Fasta/aster.pro", "fasta")
+        >>> print(record.format("fasta"))
+        >gi|3298468|dbj|BAA31520.1| SAMIPF
+        GGHVNPAVTFGAFVGGNITLLRGIVYIIAQLLGSTVACLLLKFVTNDMAVGVFSLSAGVG
+        VTNALVFEIVMTFGLVYTVYATAIDPKKGSLGTIAPIAIGFIVGANI
+        <BLANKLINE>
+        >>> print(record.lower().format("fasta"))
+        >gi|3298468|dbj|BAA31520.1| SAMIPF
+        gghvnpavtfgafvggnitllrgivyiiaqllgstvaclllkfvtndmavgvfslsagvg
+        vtnalvfeivmtfglvytvyataidpkkgslgtiapiaigfivgani
+        <BLANKLINE>
+
+        To take a more annotation rich example,
+
+        >>> from Bio import SeqIO
+        >>> old = SeqIO.read("EMBL/TRBG361.embl", "embl")
+        >>> len(old.features)
+        3
+        >>> new = old.lower()
+        >>> len(old.features) == len(new.features)
+        True
+        >>> old.annotations["organism"] == new.annotations["organism"]
+        True
+        >>> old.dbxrefs == new.dbxrefs
+        True
+        """
+        return SeqRecord(
+            self.seq.lower(),
+            id=self.id,
+            name=self.name,
+            description=self.description,
+            dbxrefs=self.dbxrefs[:],
+            features=self.features[:],
+            annotations=self.annotations.copy(),
+            letter_annotations=self.letter_annotations.copy(),
+        )
+
+    def reverse_complement(
+        self,
+        id=False,
+        name=False,
+        description=False,
+        features=True,
+        annotations=False,
+        letter_annotations=True,
+        dbxrefs=False,
+    ):
+        """Return new SeqRecord with reverse complement sequence.
+
+        By default the new record does NOT preserve the sequence identifier,
+        name, description, general annotation or database cross-references -
+        these are unlikely to apply to the reversed sequence.
+
+        You can specify the returned record's id, name and description as
+        strings, or True to keep that of the parent, or False for a default.
+
+        You can specify the returned record's features with a list of
+        SeqFeature objects, or True to keep that of the parent, or False to
+        omit them. The default is to keep the original features (with the
+        strand and locations adjusted).
+
+        You can also specify both the returned record's annotations and
+        letter_annotations as dictionaries, True to keep that of the parent,
+        or False to omit them. The default is to keep the original
+        annotations (with the letter annotations reversed).
+
+        To show what happens to the pre-letter annotations, consider an
+        example Solexa variant FASTQ file with a single entry, which we'll
+        read in as a SeqRecord:
+
+        >>> from Bio import SeqIO
+        >>> record = SeqIO.read("Quality/solexa_faked.fastq", "fastq-solexa")
+        >>> print("%s %s" % (record.id, record.seq))
+        slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
+        >>> print(list(record.letter_annotations))
+        ['solexa_quality']
+        >>> print(record.letter_annotations["solexa_quality"])
+        [40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5]
+
+        Now take the reverse complement, here we explicitly give a new
+        identifier (the old identifier with a suffix):
+
+        >>> rc_record = record.reverse_complement(id=record.id + "_rc")
+        >>> print("%s %s" % (rc_record.id, rc_record.seq))
+        slxa_0001_1_0001_01_rc NNNNNNACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
+        Notice that the per-letter-annotations have also been reversed,
+        although this may not be appropriate for all cases.
+
+        >>> print(rc_record.letter_annotations["solexa_quality"])
+        [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]
+
+        Now for the features, we need a different example. Parsing a GenBank
+        file is probably the easiest way to get an nice example with features
+        in it...
+
+        >>> from Bio import SeqIO
+        >>> with open("GenBank/pBAD30.gb") as handle:
+        ...     plasmid = SeqIO.read(handle, "gb")
+        >>> print("%s %i" % (plasmid.id, len(plasmid)))
+        pBAD30 4923
+        >>> plasmid.seq
+        Seq('GCTAGCGGAGTGTATACTGGCTTACTATGTTGGCACTGATGAGGGTGTCAGTGA...ATG')
+        >>> len(plasmid.features)
+        13
+
+        Now, let's take the reverse complement of this whole plasmid:
+
+        >>> rc_plasmid = plasmid.reverse_complement(id=plasmid.id+"_rc")
+        >>> print("%s %i" % (rc_plasmid.id, len(rc_plasmid)))
+        pBAD30_rc 4923
+        >>> rc_plasmid.seq
+        Seq('CATGGGCAAATATTATACGCAAGGCGACAAGGTGCTGATGCCGCTGGCGATTCA...AGC')
+        >>> len(rc_plasmid.features)
+        13
+
+        Let's compare the first CDS feature - it has gone from being the
+        second feature (index 1) to the second last feature (index -2), its
+        strand has changed, and the location switched round.
+
+        >>> print(plasmid.features[1])
+        type: CDS
+        location: [1081:1960](-)
+        qualifiers:
+            Key: label, Value: ['araC']
+            Key: note, Value: ['araC regulator of the arabinose BAD promoter']
+            Key: vntifkey, Value: ['4']
+        <BLANKLINE>
+        >>> print(rc_plasmid.features[-2])
+        type: CDS
+        location: [2963:3842](+)
+        qualifiers:
+            Key: label, Value: ['araC']
+            Key: note, Value: ['araC regulator of the arabinose BAD promoter']
+            Key: vntifkey, Value: ['4']
+        <BLANKLINE>
+
+        You can check this new location, based on the length of the plasmid:
+
+        >>> len(plasmid) - 1081
+        3842
+        >>> len(plasmid) - 1960
+        2963
+
+        Note that if the SeqFeature annotation includes any strand specific
+        information (e.g. base changes for a SNP), this information is not
+        amended, and would need correction after the reverse complement.
+
+        Note trying to reverse complement a protein SeqRecord raises an
+        exception:
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> protein_rec = SeqRecord(Seq("MAIVMGR"), id="Test",
+        ...                         annotations={"molecule_type": "protein"})
+        >>> protein_rec.reverse_complement()
+        Traceback (most recent call last):
+           ...
+        ValueError: Proteins do not have complements!
+
+        If you have RNA without any U bases, it must be annotated as RNA
+        otherwise it will be treated as DNA by default with A mapped to T:
+
+        >>> from Bio.Seq import Seq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> rna1 = SeqRecord(Seq("ACG"), id="Test")
+        >>> rna2 = SeqRecord(Seq("ACG"), id="Test", annotations={"molecule_type": "RNA"})
+        >>> print(rna1.reverse_complement(id="RC", description="unk").format("fasta"))
+        >RC unk
+        CGT
+        <BLANKLINE>
+        >>> print(rna2.reverse_complement(id="RC", description="RNA").format("fasta"))
+        >RC RNA
+        CGU
+        <BLANKLINE>
+
+        Also note you can reverse complement a SeqRecord using a MutableSeq:
+
+        >>> from Bio.Seq import MutableSeq
+        >>> from Bio.SeqRecord import SeqRecord
+        >>> rec = SeqRecord(MutableSeq("ACGT"), id="Test")
+        >>> rec.seq[0] = "T"
+        >>> print("%s %s" % (rec.id, rec.seq))
+        Test TCGT
+        >>> rc = rec.reverse_complement(id=True)
+        >>> print("%s %s" % (rc.id, rc.seq))
+        Test ACGA
+        """
+        from Bio.Seq import Seq, MutableSeq  # Lazy to avoid circular imports
+
+        if "protein" in self.annotations.get("molecule_type", ""):
+            raise ValueError("Proteins do not have complements!")
+        if "RNA" in self.annotations.get("molecule_type", ""):
+            if isinstance(self.seq, MutableSeq):
+                # Does not currently have reverse_complement_rna method:
+                answer = SeqRecord(Seq(self.seq).reverse_complement_rna())
+            else:
+                answer = SeqRecord(self.seq.reverse_complement_rna())
+        else:
+            # Default to DNA
+            if isinstance(self.seq, MutableSeq):
+                # Currently the MutableSeq reverse complement is in situ
+                answer = SeqRecord(Seq(self.seq).reverse_complement())
+            else:
+                answer = SeqRecord(self.seq.reverse_complement())
+        if isinstance(id, str):
+            answer.id = id
+        elif id:
+            answer.id = self.id
+        if isinstance(name, str):
+            answer.name = name
+        elif name:
+            answer.name = self.name
+        if isinstance(description, str):
+            answer.description = description
+        elif description:
+            answer.description = self.description
+        if isinstance(dbxrefs, list):
+            answer.dbxrefs = dbxrefs
+        elif dbxrefs:
+            # Copy the old dbxrefs
+            answer.dbxrefs = self.dbxrefs[:]
+        if isinstance(features, list):
+            answer.features = features
+        elif features:
+            # Copy the old features, adjusting location and string
+            length = len(answer)
+            answer.features = [f._flip(length) for f in self.features]
+            # The old list should have been sorted by start location,
+            # reversing it will leave it sorted by what is now the end position,
+            # so we need to resort in case of overlapping features.
+            # NOTE - In the common case of gene before CDS (and similar) with
+            # the exact same locations, this will still maintain gene before CDS
+            answer.features.sort(key=lambda x: x.location.start.position)
+        if isinstance(annotations, dict):
+            answer.annotations = annotations
+        elif annotations:
+            # Copy the old annotations,
+            answer.annotations = self.annotations.copy()
+        if isinstance(letter_annotations, dict):
+            answer.letter_annotations = letter_annotations
+        elif letter_annotations:
+            # Copy the old per letter annotations, reversing them
+            for key, value in self.letter_annotations.items():
+                answer._per_letter_annotations[key] = value[::-1]
+        return answer
+
+    def translate(
+        self,
+        # Seq translation arguments:
+        table="Standard",
+        stop_symbol="*",
+        to_stop=False,
+        cds=False,
+        gap=None,
+        # SeqRecord annotation arguments:
+        id=False,
+        name=False,
+        description=False,
+        features=False,
+        annotations=False,
+        letter_annotations=False,
+        dbxrefs=False,
+    ):
+        """Return new SeqRecord with translated sequence.
+
+        This calls the record's .seq.translate() method (which describes
+        the translation related arguments, like table for the genetic code),
+
+        By default the new record does NOT preserve the sequence identifier,
+        name, description, general annotation or database cross-references -
+        these are unlikely to apply to the translated sequence.
+
+        You can specify the returned record's id, name and description as
+        strings, or True to keep that of the parent, or False for a default.
+
+        You can specify the returned record's features with a list of
+        SeqFeature objects, or False (default) to omit them.
+
+        You can also specify both the returned record's annotations and
+        letter_annotations as dictionaries, True to keep that of the parent
+        (annotations only), or False (default) to omit them.
+
+        e.g. Loading a FASTA gene and translating it,
+
+        >>> from Bio import SeqIO
+        >>> gene_record = SeqIO.read("Fasta/sweetpea.nu", "fasta")
+        >>> print(gene_record.format("fasta"))
+        >gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds
+        CAGGCTGCGCGGTTTCTATTTATGAAGAACAAGGTCCGTATGATAGTTGATTGTCATGCA
+        AAACATGTGAAGGTTCTTCAAGACGAAAAACTCCCATTTGATTTGACTCTGTGCGGTTCG
+        ACCTTAAGAGCTCCACATAGTTGCCATTTGCAGTACATGGCTAACATGGATTCAATTGCT
+        TCATTGGTTATGGCAGTGGTCGTCAATGACAGCGATGAAGATGGAGATAGCCGTGACGCA
+        GTTCTACCACAAAAGAAAAAGAGACTTTGGGGTTTGGTAGTTTGTCATAACACTACTCCG
+        AGGTTTGTT
+        <BLANKLINE>
+
+        And now translating the record, specifying the new ID and description:
+
+        >>> protein_record = gene_record.translate(table=11,
+        ...                                        id="phya",
+        ...                                        description="translation")
+        >>> print(protein_record.format("fasta"))
+        >phya translation
+        QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
+        SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV
+        <BLANKLINE>
+
+        """
+        if "protein" == self.annotations.get("molecule_type", ""):
+            raise ValueError("Proteins cannot be translated!")
+        answer = SeqRecord(
+            self.seq.translate(
+                table=table, stop_symbol=stop_symbol, to_stop=to_stop, cds=cds, gap=gap
+            )
+        )
+        if isinstance(id, str):
+            answer.id = id
+        elif id:
+            answer.id = self.id
+        if isinstance(name, str):
+            answer.name = name
+        elif name:
+            answer.name = self.name
+        if isinstance(description, str):
+            answer.description = description
+        elif description:
+            answer.description = self.description
+        if isinstance(dbxrefs, list):
+            answer.dbxrefs = dbxrefs
+        elif dbxrefs:
+            # Copy the old dbxrefs
+            answer.dbxrefs = self.dbxrefs[:]
+        if isinstance(features, list):
+            answer.features = features
+        elif features:
+            # Does not make sense to copy old features as locations wrong
+            raise TypeError("Unexpected features argument %r" % features)
+        if isinstance(annotations, dict):
+            answer.annotations = annotations
+        elif annotations:
+            # Copy the old annotations
+            answer.annotations = self.annotations.copy()
+        # Set/update to protein:
+        answer.annotations["molecule_type"] = "protein"
+        if isinstance(letter_annotations, dict):
+            answer.letter_annotations = letter_annotations
+        elif letter_annotations:
+            # Does not make sense to copy these as length now wrong
+            raise TypeError(
+                "Unexpected letter_annotations argument %r" % letter_annotations
+            )
+        return answer
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SeqUtils/CheckSum.py b/code/lib/Bio/SeqUtils/CheckSum.py
new file mode 100644
index 0000000..73f3e72
--- /dev/null
+++ b/code/lib/Bio/SeqUtils/CheckSum.py
@@ -0,0 +1,145 @@
+# Copyright 2002 by Yves Bastide and Brad Chapman.
+# Copyright 2007 by Sebastian Bassi
+# All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Functions to calculate assorted sequence checksums."""
+
+# crc32, crc64, gcg, and seguid
+# crc64 is adapted from BioPerl
+
+
+import binascii
+
+
+def crc32(seq):
+    """Return the crc32 checksum for a sequence (string or Seq object).
+
+    Note that the case is important:
+
+    >>> crc32("ACGTACGTACGT")
+    20049947
+    >>> crc32("acgtACGTacgt")
+    1688586483
+
+    """
+    try:
+        # Assume it's a Seq object
+        s = bytes(seq)
+    except TypeError:
+        # Assume it's a string
+        s = seq.encode()
+    return binascii.crc32(s)
+
+
+def _init_table_h():
+    _table_h = []
+    for i in range(256):
+        part_l = i
+        part_h = 0
+        for j in range(8):
+            rflag = part_l & 1
+            part_l >>= 1
+            if part_h & 1:
+                part_l |= 1 << 31
+            part_h >>= 1
+            if rflag:
+                part_h ^= 0xD8000000
+        _table_h.append(part_h)
+    return _table_h
+
+
+# Initialisation
+_table_h = _init_table_h()
+
+
+def crc64(s):
+    """Return the crc64 checksum for a sequence (string or Seq object).
+
+    Note that the case is important:
+
+    >>> crc64("ACGTACGTACGT")
+    'CRC-C4FBB762C4A87EBD'
+    >>> crc64("acgtACGTacgt")
+    'CRC-DA4509DC64A87EBD'
+
+    """
+    crcl = 0
+    crch = 0
+    for c in s:
+        shr = (crch & 0xFF) << 24
+        temp1h = crch >> 8
+        temp1l = (crcl >> 8) | shr
+        idx = (crcl ^ ord(c)) & 0xFF
+        crch = temp1h ^ _table_h[idx]
+        crcl = temp1l
+
+    return "CRC-%08X%08X" % (crch, crcl)
+
+
+def gcg(seq):
+    """Return the GCG checksum (int) for a sequence (string or Seq object).
+
+    Given a nucleotide or amino-acid sequence (or any string),
+    returns the GCG checksum (int). Checksum used by GCG program.
+    seq type = str.
+
+    Based on BioPerl GCG_checksum. Adapted by Sebastian Bassi
+    with the help of John Lenton, Pablo Ziliani, and Gabriel Genellina.
+
+    All sequences are converted to uppercase.
+
+    >>> gcg("ACGTACGTACGT")
+    5688
+    >>> gcg("acgtACGTacgt")
+    5688
+
+    """
+    index = checksum = 0
+    for char in seq:
+        index += 1
+        checksum += index * ord(char.upper())
+        if index == 57:
+            index = 0
+    return checksum % 10000
+
+
+def seguid(seq):
+    """Return the SEGUID (string) for a sequence (string or Seq object).
+
+    Given a nucleotide or amino-acid sequence (or any string),
+    returns the SEGUID string (A SEquence Globally Unique IDentifier).
+    seq type = str.
+
+    Note that the case is not important:
+
+    >>> seguid("ACGTACGTACGT")
+    'If6HIvcnRSQDVNiAoefAzySc6i4'
+    >>> seguid("acgtACGTacgt")
+    'If6HIvcnRSQDVNiAoefAzySc6i4'
+
+    For more information about SEGUID, see:
+    http://bioinformatics.anl.gov/seguid/
+    https://doi.org/10.1002/pmic.200600032
+    """
+    import hashlib
+    import base64
+
+    m = hashlib.sha1()
+    try:
+        # Assume it's a Seq object
+        seq = bytes(seq)
+    except TypeError:
+        # Assume it's a string
+        seq = seq.encode()
+    m.update(seq.upper())
+    tmp = base64.encodebytes(m.digest())
+    return tmp.decode().replace("\n", "").rstrip("=")
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SeqUtils/CodonUsage.py b/code/lib/Bio/SeqUtils/CodonUsage.py
new file mode 100644
index 0000000..c4e95e0
--- /dev/null
+++ b/code/lib/Bio/SeqUtils/CodonUsage.py
@@ -0,0 +1,187 @@
+# Copyright 2003 Yair Benita.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Methods for codon usage calculations."""
+
+import math
+from .CodonUsageIndices import SharpEcoliIndex
+from Bio import SeqIO  # To parse a FASTA file
+
+# Turn black code style off
+# fmt: off
+
+CodonsDict = {
+    "TTT": 0, "TTC": 0, "TTA": 0, "TTG": 0,
+    "CTT": 0, "CTC": 0, "CTA": 0, "CTG": 0,
+    "ATT": 0, "ATC": 0, "ATA": 0, "ATG": 0,
+    "GTT": 0, "GTC": 0, "GTA": 0, "GTG": 0,
+    "TAT": 0, "TAC": 0, "TAA": 0, "TAG": 0,
+    "CAT": 0, "CAC": 0, "CAA": 0, "CAG": 0,
+    "AAT": 0, "AAC": 0, "AAA": 0, "AAG": 0,
+    "GAT": 0, "GAC": 0, "GAA": 0, "GAG": 0,
+    "TCT": 0, "TCC": 0, "TCA": 0, "TCG": 0,
+    "CCT": 0, "CCC": 0, "CCA": 0, "CCG": 0,
+    "ACT": 0, "ACC": 0, "ACA": 0, "ACG": 0,
+    "GCT": 0, "GCC": 0, "GCA": 0, "GCG": 0,
+    "TGT": 0, "TGC": 0, "TGA": 0, "TGG": 0,
+    "CGT": 0, "CGC": 0, "CGA": 0, "CGG": 0,
+    "AGT": 0, "AGC": 0, "AGA": 0, "AGG": 0,
+    "GGT": 0, "GGC": 0, "GGA": 0, "GGG": 0}
+
+# Turn black code style on
+# fmt: on
+
+
+# this dictionary shows which codons encode the same AA
+SynonymousCodons = {
+    "CYS": ["TGT", "TGC"],
+    "ASP": ["GAT", "GAC"],
+    "SER": ["TCT", "TCG", "TCA", "TCC", "AGC", "AGT"],
+    "GLN": ["CAA", "CAG"],
+    "MET": ["ATG"],
+    "ASN": ["AAC", "AAT"],
+    "PRO": ["CCT", "CCG", "CCA", "CCC"],
+    "LYS": ["AAG", "AAA"],
+    "STOP": ["TAG", "TGA", "TAA"],
+    "THR": ["ACC", "ACA", "ACG", "ACT"],
+    "PHE": ["TTT", "TTC"],
+    "ALA": ["GCA", "GCC", "GCG", "GCT"],
+    "GLY": ["GGT", "GGG", "GGA", "GGC"],
+    "ILE": ["ATC", "ATA", "ATT"],
+    "LEU": ["TTA", "TTG", "CTC", "CTT", "CTG", "CTA"],
+    "HIS": ["CAT", "CAC"],
+    "ARG": ["CGA", "CGC", "CGG", "CGT", "AGG", "AGA"],
+    "TRP": ["TGG"],
+    "VAL": ["GTA", "GTC", "GTG", "GTT"],
+    "GLU": ["GAG", "GAA"],
+    "TYR": ["TAT", "TAC"],
+}
+
+
+class CodonAdaptationIndex:
+    """A codon adaptation index (CAI) implementation.
+
+    Implements the codon adaptation index (CAI) described by Sharp and
+    Li (Nucleic Acids Res. 1987 Feb 11;15(3):1281-95).
+
+    NOTE - This implementation does not currently cope with alternative genetic
+    codes: only the synonymous codons in the standard table are considered.
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.index = {}
+        self.codon_count = {}
+
+    # use this method with predefined CAI index
+    def set_cai_index(self, index):
+        """Set up an index to be used when calculating CAI for a gene.
+
+        Just pass a dictionary similar to the SharpEcoliIndex in the
+        CodonUsageIndices module.
+        """
+        self.index = index
+
+    def generate_index(self, fasta_file):
+        """Generate a codon usage index from a FASTA file of CDS sequences.
+
+        Takes a location of a Fasta file containing CDS sequences
+        (which must all have a whole number of codons) and generates a codon
+        usage index.
+
+        RCSU values
+        """
+        # first make sure we're not overwriting an existing index:
+        if self.index != {} or self.codon_count != {}:
+            raise ValueError(
+                "an index has already been set or a codon count "
+                "has been done. Cannot overwrite either."
+            )
+
+        # count codon occurrences in the file.
+        self._count_codons(fasta_file)
+
+        # now to calculate the index we first need to sum the number of times
+        # synonymous codons were used all together.
+        for aa in SynonymousCodons:
+            total = 0.0
+            # RCSU values are CodonCount/((1/num of synonymous codons) * sum of
+            # all synonymous codons)
+            rcsu = []
+            codons = SynonymousCodons[aa]
+
+            for codon in codons:
+                total += self.codon_count[codon]
+
+            # calculate the RSCU value for each of the codons
+            for codon in codons:
+                denominator = float(total) / len(codons)
+                rcsu.append(self.codon_count[codon] / denominator)
+
+            # now generate the index W=RCSUi/RCSUmax:
+            rcsu_max = max(rcsu)
+            for codon_index, codon in enumerate(codons):
+                self.index[codon] = rcsu[codon_index] / rcsu_max
+
+    def cai_for_gene(self, dna_sequence):
+        """Calculate the CAI (float) for the provided DNA sequence (string).
+
+        This method uses the Index (either the one you set or the one you
+        generated) and returns the CAI for the DNA sequence.
+        """
+        cai_value, cai_length = 0, 0
+
+        # if no index is set or generated, the default SharpEcoliIndex will
+        # be used.
+        if self.index == {}:
+            self.set_cai_index(SharpEcoliIndex)
+
+        if dna_sequence.islower():
+            dna_sequence = dna_sequence.upper()
+
+        for i in range(0, len(dna_sequence), 3):
+            codon = dna_sequence[i : i + 3]
+            if codon in self.index:
+                # these two codons are always one, exclude them:
+                if codon not in ["ATG", "TGG"]:
+                    cai_value += math.log(self.index[codon])
+                    cai_length += 1
+            # some indices may not include stop codons:
+            elif codon not in ["TGA", "TAA", "TAG"]:
+                raise TypeError(
+                    "illegal codon in sequence: %s.\n%s" % (codon, self.index)
+                )
+
+        return math.exp(cai_value / (cai_length - 1.0))
+
+    def _count_codons(self, fasta_file):
+        with open(fasta_file) as handle:
+
+            # make the codon dictionary local
+            self.codon_count = CodonsDict.copy()
+
+            # iterate over sequence and count all the codons in the FastaFile.
+            for cur_record in SeqIO.parse(handle, "fasta"):
+                # make sure the sequence is lower case
+                if str(cur_record.seq).islower():
+                    dna_sequence = str(cur_record.seq).upper()
+                else:
+                    dna_sequence = str(cur_record.seq)
+                for i in range(0, len(dna_sequence), 3):
+                    codon = dna_sequence[i : i + 3]
+                    if codon in self.codon_count:
+                        self.codon_count[codon] += 1
+                    else:
+                        raise TypeError(
+                            "illegal codon %s in gene: %s" % (codon, cur_record.id)
+                        )
+
+    def print_index(self):
+        """Print out the index used.
+
+        This just gives the index when the objects is printed.
+        """
+        for i in sorted(self.index):
+            print("%s\t%.3f" % (i, self.index[i]))
diff --git a/code/lib/Bio/SeqUtils/CodonUsageIndices.py b/code/lib/Bio/SeqUtils/CodonUsageIndices.py
new file mode 100644
index 0000000..99aa097
--- /dev/null
+++ b/code/lib/Bio/SeqUtils/CodonUsageIndices.py
@@ -0,0 +1,28 @@
+# Copyright 2003 Yair Benita.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Codon adaption indxes, including Sharp and Li (1987) E. coli index.
+
+Currently this module only defines a single codon adaption index from
+Sharp & Li, Nucleic Acids Res. 1987.
+"""
+# Turn black code style off
+# fmt: off
+
+SharpEcoliIndex = {
+    "GCA": 0.586, "GCC": 0.122, "GCG": 0.424, "GCT": 1, "AGA": 0.004,
+    "AGG": 0.002, "CGA": 0.004, "CGC": 0.356, "CGG": 0.004, "CGT": 1, "AAC": 1,
+    "AAT": 0.051, "GAC": 1, "GAT": 0.434, "TGC": 1, "TGT": 0.5, "CAA": 0.124,
+    "CAG": 1, "GAA": 1, "GAG": 0.259, "GGA": 0.01, "GGC": 0.724, "GGG": 0.019,
+    "GGT": 1, "CAC": 1, "CAT": 0.291, "ATA": 0.003, "ATC": 1, "ATT": 0.185,
+    "CTA": 0.007, "CTC": 0.037, "CTG": 1, "CTT": 0.042, "TTA": 0.02,
+    "TTG": 0.02, "AAA": 1, "AAG": 0.253, "ATG": 1, "TTC": 1, "TTT": 0.296,
+    "CCA": 0.135, "CCC": 0.012, "CCG": 1, "CCT": 0.07, "AGC": 0.41,
+    "AGT": 0.085, "TCA": 0.077, "TCC": 0.744, "TCG": 0.017, "TCT": 1,
+    "ACA": 0.076, "ACC": 1, "ACG": 0.099, "ACT": 0.965, "TGG": 1, "TAC": 1,
+    "TAT": 0.239, "GTA": 0.495, "GTC": 0.066, "GTG": 0.221, "GTT": 1}
+
+# Turn black code style on
+# fmt: on
diff --git a/code/lib/Bio/SeqUtils/IsoelectricPoint.py b/code/lib/Bio/SeqUtils/IsoelectricPoint.py
new file mode 100644
index 0000000..f4090f8
--- /dev/null
+++ b/code/lib/Bio/SeqUtils/IsoelectricPoint.py
@@ -0,0 +1,161 @@
+# Copyright 2003 Yair Benita.  All rights reserved.
+# Revisions copyright 2020 by Tianyi Shi.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Calculate isoelectric points of polypeptides using methods of Bjellqvist.
+
+pK values and the methos are taken from::
+
+    * Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
+    Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F.
+    The focusing positions of polypeptides in immobilized pH gradients can be
+    predicted from their amino acid sequences. Electrophoresis 1993, 14,
+    1023-1031.
+
+    * Bjellqvist, B., Basse, B., Olsen, E. and Celis, J.E.
+    Reference points for comparisons of two-dimensional maps of proteins from
+    different human cell types defined in a pH scale where isoelectric points
+    correlate with polypeptide compositions. Electrophoresis 1994, 15, 529-539.
+
+I designed the algorithm according to a note by David L. Tabb, available at:
+http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf
+"""
+
+positive_pKs = {"Nterm": 7.5, "K": 10.0, "R": 12.0, "H": 5.98}
+negative_pKs = {"Cterm": 3.55, "D": 4.05, "E": 4.45, "C": 9.0, "Y": 10.0}
+pKcterminal = {"D": 4.55, "E": 4.75}
+pKnterminal = {
+    "A": 7.59,
+    "M": 7.0,
+    "S": 6.93,
+    "P": 8.36,
+    "T": 6.82,
+    "V": 7.44,
+    "E": 7.7,
+}
+charged_aas = ("K", "R", "H", "D", "E", "C", "Y")
+
+
+class IsoelectricPoint:
+    """A class for calculating the IEP or charge at given pH of a protein.
+
+    Parameters
+    ----------
+    :protein_sequence: A ``Bio.Seq`` or string object containing a protein
+                       sequence.
+    :aa_content: A dictionary with amino acid letters as keys and its
+                 occurrences as integers, e.g. ``{"A": 3, "C": 0, ...}``.
+                 Default: ``None``. If ``None``, the dic will be calculated
+                 from the given sequence.
+
+    Methods
+    -------
+    :charge_at_pH(pH):  Calculates the charge of the protein for a given pH
+    :pi():              Calculates the isoelectric point
+
+
+    Examples
+    --------
+    The methods of this class can either be accessed from the class itself
+    or from a ``ProtParam.ProteinAnalysis`` object (with partially different
+    names):
+
+    >>> from Bio.SeqUtils.IsoelectricPoint import IsoelectricPoint as IP
+    >>> protein = IP("INGAR")
+    >>> print(f"IEP of peptide {protein.sequence} is {protein.pi():.2f}")
+    IEP of peptide INGAR is 9.75
+    >>> print(f"Its charge at pH 7 is {protein.charge_at_pH(7.0):.2f}")
+    Its charge at pH 7 is 0.76
+
+
+    >>> from Bio.SeqUtils.ProtParam import ProteinAnalysis as PA
+    >>> protein = PA("PETER")
+    >>> print(f"IEP of {protein.sequence}: {protein.isoelectric_point():.2f}")
+    IEP of PETER: 4.53
+    >>> print(f"Charge at pH 4.53: {protein.charge_at_pH(4.53):.2f}")
+    Charge at pH 4.53: 0.00
+
+    """
+
+    def __init__(self, protein_sequence, aa_content=None):
+        """Initialize the class."""
+        self.sequence = str(protein_sequence).upper()
+        if not aa_content:
+            from Bio.SeqUtils.ProtParam import ProteinAnalysis as _PA
+
+            aa_content = _PA(self.sequence).count_amino_acids()
+        self.charged_aas_content = self._select_charged(aa_content)
+
+        self.pos_pKs, self.neg_pKs = self._update_pKs_tables()
+
+    # This function creates a dictionary with the contents of each charged aa,
+    # plus Cterm and Nterm.
+    def _select_charged(self, aa_content):
+        charged = {}
+        for aa in charged_aas:
+            charged[aa] = float(aa_content[aa])
+        charged["Nterm"] = 1.0
+        charged["Cterm"] = 1.0
+        return charged
+
+    def _update_pKs_tables(self):
+        """Update pKs tables with seq specific values for N- and C-termini."""
+        pos_pKs = positive_pKs.copy()
+        neg_pKs = negative_pKs.copy()
+        nterm, cterm = self.sequence[0], self.sequence[-1]
+        if nterm in pKnterminal:
+            pos_pKs["Nterm"] = pKnterminal[nterm]
+        if cterm in pKcterminal:
+            neg_pKs["Cterm"] = pKcterminal[cterm]
+        return pos_pKs, neg_pKs
+
+    def charge_at_pH(self, pH):
+        """Calculate the charge of a protein at given pH."""
+        # derivation:
+        #   Henderson Hasselbalch equation: pH = pKa + log([A-]/[HA])
+        #   Rearranging: [HA]/[A-] = 10 ** (pKa - pH)
+        #   partial_charge =
+        #       [A-]/[A]total = [A-]/([A-] + [HA]) = 1 / { ([A-] + [HA])/[A-] } =
+        #       1 / (1 + [HA]/[A-]) = 1 / (1 + 10 ** (pKa - pH)) for acidic residues;
+        #                             1 / (1 + 10 ** (pH - pKa)) for basic residues
+        positive_charge = 0.0
+        for aa, pK in self.pos_pKs.items():
+            partial_charge = 1.0 / (10 ** (pH - pK) + 1.0)
+            positive_charge += self.charged_aas_content[aa] * partial_charge
+
+        negative_charge = 0.0
+        for aa, pK in self.neg_pKs.items():
+            partial_charge = 1.0 / (10 ** (pK - pH) + 1.0)
+            negative_charge += self.charged_aas_content[aa] * partial_charge
+
+        return positive_charge - negative_charge
+
+    # This is the action function, it tries different pH until the charge of
+    # the protein is 0 (or close).
+    def pi(self, pH=7.775, min_=4.05, max_=12):
+        r"""Calculate and return the isoelectric point as float.
+
+        This is a recursive function that uses bisection method.
+        Wiki on bisection: https://en.wikipedia.org/wiki/Bisection_method
+
+        Arguments:
+         - pH: the pH at which the current charge of the protein is computed.
+           This pH lies at the centre of the interval (mean of `min_` and `max_`).
+         - min\_: the minimum of the interval. Initial value defaults to 4.05,
+           which is below the theoretical minimum, when the protein is composed
+           exclusively of aspartate.
+         - max\_: the maximum of the the interval. Initial value defaults to 12,
+           which is above the theoretical maximum, when the protein is composed
+           exclusively of arginine.
+        """
+        charge = self.charge_at_pH(pH)
+        if max_ - min_ > 0.0001:
+            if charge > 0.0:
+                min_ = pH
+            else:
+                max_ = pH
+            next_pH = (min_ + max_) / 2
+            return self.pi(next_pH, min_, max_)
+        return pH
diff --git a/code/lib/Bio/SeqUtils/MeltingTemp.py b/code/lib/Bio/SeqUtils/MeltingTemp.py
new file mode 100644
index 0000000..90c47ae
--- /dev/null
+++ b/code/lib/Bio/SeqUtils/MeltingTemp.py
@@ -0,0 +1,1136 @@
+# Copyright 2004-2008 by Sebastian Bassi.
+# Copyright 2013-2018 by Markus Piotrowski.
+# All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Calculate the melting temperature of nucleotide sequences.
+
+This module contains three different methods to calculate the melting
+temperature of oligonucleotides:
+
+1. Tm_Wallace: 'Rule of thumb'
+2. Tm_GC: Empirical formulas based on GC content. Salt and mismatch corrections
+   can be included.
+3. Tm_NN: Calculation based on nearest neighbor thermodynamics. Several tables
+   for DNA/DNA, DNA/RNA and RNA/RNA hybridizations are included.
+   Correction for mismatches, dangling ends, salt concentration and other
+   additives are available.
+
+Tm_staluc is the 'old' NN calculation and is kept for compatibility. It is,
+however, recommended to use Tm_NN instead, since Tm_staluc may be depreceated
+in the future. Also, Tm_NN has much more options. Using Tm_staluc and Tm_NN
+with default parameters gives (essentially) the same results.
+
+General parameters for most Tm methods:
+ - seq -- A Biopython sequence object or a string.
+ - check -- Checks if the sequence is valid for the given method (default=
+   True). In general, whitespaces and non-base characters are removed and
+   characters are converted to uppercase. RNA will be backtranscribed.
+ - strict -- Do not allow base characters or neighbor duplex keys (e.g.
+   'AT/NA') that could not or not unambigiously be evaluated for the respective
+   method (default=True). Note that W (= A or T) and S (= C or G) are not
+   ambiguous for Tm_Wallace and Tm_GC. If 'False', average values (if
+   applicable) will be used.
+
+This module is not able to detect self-complementary and it will not use
+alignment tools to align an oligonucleotide sequence to its target sequence.
+Thus it can not detect dangling-ends and mismatches by itself (don't even think
+about bulbs and loops). These parameters have to be handed over to the
+respective method.
+
+Other public methods of this module:
+ - make_table     : To create a table with thermodynamic data.
+ - salt_correction: To adjust Tm to a given salt concentration by different
+   formulas. This method is called from Tm_GC and Tm_NN but may
+   also be accessed 'manually'. It returns a correction term, not
+   a corrected Tm!
+ - chem_correction: To adjust Tm regarding the chemical additives DMSO and
+   formaldehyde. The method returns a corrected Tm. Chemical
+   correction is not an integral part of the Tm methods and must
+   be called additionally.
+
+For example:
+
+    >>> from Bio.SeqUtils import MeltingTemp as mt
+    >>> from Bio.Seq import Seq
+    >>> mystring = 'CGTTCCAAAGATGTGGGCATGAGCTTAC'
+    >>> myseq = Seq(mystring)
+    >>> print('%0.2f' % mt.Tm_Wallace(mystring))
+    84.00
+    >>> print('%0.2f' % mt.Tm_Wallace(myseq))
+    84.00
+    >>> print('%0.2f' % mt.Tm_GC(myseq))
+    58.73
+    >>> print('%0.2f' % mt.Tm_NN(myseq))
+    60.32
+
+Tm_NN with default values gives same result as 'old' Tm_staluc. However, values
+differ for RNA, since Tm_staluc had some errors for RNA calculation. These
+errors have been fixed in this version.
+
+New Tm_NN can do slightly more:
+Using different thermodynamic tables, e.g. from Breslauer '86 or Sugimoto '96:
+
+    >>> print('%0.2f' % mt.Tm_NN(myseq, nn_table=mt.DNA_NN1))  # Breslauer '86
+    72.19
+    >>> print('%0.2f' % mt.Tm_NN(myseq, nn_table=mt.DNA_NN2))  # Sugimoto '96
+    65.47
+
+Tables for RNA and RNA/DNA hybrids are included:
+
+    >>> print('%0.2f' % mt.Tm_NN(myseq, nn_table=mt.RNA_NN1))  # Freier '86
+    73.35
+    >>> print('%0.2f' % mt.Tm_NN(myseq, nn_table=mt.R_DNA_NN1))  # Sugimoto '95
+    58.45
+
+Several types of salc correction (for Tm_NN and Tm_GC):
+
+    >>> for i in range(1, 8):
+    ...     print("Type: %d, Tm: %0.2f" % (i, Tm_NN(myseq, saltcorr=i)))
+    ...
+    Type: 1, Tm: 54.27
+    Type: 2, Tm: 54.02
+    Type: 3, Tm: 59.60
+    Type: 4, Tm: 60.64
+    Type: 5, Tm: 60.32
+    Type: 6, Tm: 59.78
+    Type: 7, Tm: 59.78
+
+Correction for other monovalent cations (K+, Tris), Mg2+ and dNTPs according
+to von Ahsen et al. (2001) or Owczarzy et al. (2008) (for Tm_NN and Tm_GC):
+
+    >>> print('%0.2f' % mt.Tm_NN(myseq, Na=50, Tris=10))
+    60.79
+    >>> print('%0.2f' % mt.Tm_NN(myseq, Na=50, Tris=10, Mg=1.5))
+    67.39
+    >>> print('%0.2f' % mt.Tm_NN(myseq, Na=50, Tris=10, Mg=1.5, saltcorr=7))
+    66.81
+    >>> print('%0.2f' % mt.Tm_NN(myseq, Na=50, Tris=10, Mg=1.5, dNTPs=0.6,
+    ...                          saltcorr=7))
+    66.04
+
+Dangling ends and mismatches, e.g.::
+
+    Oligo:     CGTTCCaAAGATGTGGGCATGAGCTTAC       CGTTCCaAAGATGTGGGCATGAGCTTAC
+               ::::::X:::::::::::::::::::::  or   ::::::X:::::::::::::::::::::
+    Template:  GCAAGGcTTCTACACCCGTACTCGAATG      TGCAAGGcTTCTACACCCGTACTCGAATGC
+
+Here:
+
+    >>> print('%0.2f' % mt.Tm_NN('CGTTCCAAAGATGTGGGCATGAGCTTAC'))
+    60.32
+    >>> print('%0.2f' % mt.Tm_NN('CGTTCCAAAGATGTGGGCATGAGCTTAC',
+    ...                    c_seq='GCAAGGcTTCTACACCCGTACTCGAATG'))
+    55.39
+    >>> print('%0.2f' % mt.Tm_NN('CGTTCCAAAGATGTGGGCATGAGCTTAC', shift=1,
+    ...                   c_seq='TGCAAGGcTTCTACACCCGTACTCGAATGC'))
+    55.69
+
+The same for RNA:
+
+    >>> print('%0.2f' % mt.Tm_NN('CGUUCCAAAGAUGUGGGCAUGAGCUUAC',
+    ...                   c_seq='UGCAAGGcUUCUACACCCGUACUCGAAUGC',
+    ...                   shift=1, nn_table=mt.RNA_NN3,
+    ...                   de_table=mt.RNA_DE1))
+    73.00
+
+Note, that thermodynamic data are not available for all kind of mismatches,
+e.g. most double mismatches or terminal mismatches combined with dangling ends:
+
+    >>> print('%0.2f' % mt.Tm_NN('CGTTCCAAAGATGTGGGCATGAGCTTAC',
+    ...                   c_seq='TtCAAGGcTTCTACACCCGTACTCGAATGC',
+    ...                   shift=1))
+    Traceback (most recent call last):
+    ValueError: no thermodynamic data for neighbors '.C/TT' available
+
+Make your own tables, or update/extend existing tables. E.g., add values for
+locked nucleotides. Here, 'locked A' (and its complement) should be represented
+by '1':
+
+    >>> mytable = mt.make_table(oldtable=mt.DNA_NN3,
+    ...                         values={'A1/T1':(-6.608, -17.235),
+    ...                         '1A/1T':(-6.893, -15.923)})
+    >>> print('%0.2f' % mt.Tm_NN('CGTTCCAAAGATGTGGGCATGAGCTTAC'))
+    60.32
+    >>> print('%0.2f' % mt.Tm_NN('CGTTCCA1AGATGTGGGCATGAGCTTAC',
+    ...                           nn_table=mytable, check=False))
+    ... # 'check' must be False, otherwise '1' would be discarded
+    62.53
+
+"""
+
+
+import math
+import warnings
+
+from Bio import SeqUtils, Seq
+from Bio import BiopythonWarning
+from Bio import BiopythonDeprecationWarning
+
+
+# Thermodynamic lookup tables (dictionaries):
+# Enthalpy (dH) and entropy (dS) values for nearest neighbors and initiation
+# process. Calculation of duplex initiation is quite different in several
+# papers; to allow for a general calculation, all different initiation
+# parameters are included in all tables and non-applicable parameters are set
+# to zero.
+# The key is either an initiation type (e.g., 'init_A/T') or a nearest neighbor
+# duplex sequence (e.g., GT/CA, to read 5'GT3'-3'CA5'). The values are tuples
+# of dH (kcal/mol), dS (cal/mol K).
+
+# Turn black code style off
+# fmt: off
+
+# DNA/DNA
+# Breslauer et al. (1986), Proc Natl Acad Sci USA 83: 3746-3750
+DNA_NN1 = {
+    "init": (0, 0), "init_A/T": (0, 0), "init_G/C": (0, 0),
+    "init_oneG/C": (0, -16.8), "init_allA/T": (0, -20.1), "init_5T/A": (0, 0),
+    "sym": (0, -1.3),
+    "AA/TT": (-9.1, -24.0), "AT/TA": (-8.6, -23.9), "TA/AT": (-6.0, -16.9),
+    "CA/GT": (-5.8, -12.9), "GT/CA": (-6.5, -17.3), "CT/GA": (-7.8, -20.8),
+    "GA/CT": (-5.6, -13.5), "CG/GC": (-11.9, -27.8), "GC/CG": (-11.1, -26.7),
+    "GG/CC": (-11.0, -26.6)}
+
+# Sugimoto et al. (1996), Nuc Acids Res 24 : 4501-4505
+DNA_NN2 = {
+    "init": (0.6, -9.0), "init_A/T": (0, 0), "init_G/C": (0, 0),
+    "init_oneG/C": (0, 0), "init_allA/T": (0, 0), "init_5T/A": (0, 0),
+    "sym": (0, -1.4),
+    "AA/TT": (-8.0, -21.9), "AT/TA": (-5.6, -15.2), "TA/AT": (-6.6, -18.4),
+    "CA/GT": (-8.2, -21.0), "GT/CA": (-9.4, -25.5), "CT/GA": (-6.6, -16.4),
+    "GA/CT": (-8.8, -23.5), "CG/GC": (-11.8, -29.0), "GC/CG": (-10.5, -26.4),
+    "GG/CC": (-10.9, -28.4)}
+
+# Allawi and SantaLucia (1997), Biochemistry 36: 10581-10594
+DNA_NN3 = {
+    "init": (0, 0), "init_A/T": (2.3, 4.1), "init_G/C": (0.1, -2.8),
+    "init_oneG/C": (0, 0), "init_allA/T": (0, 0), "init_5T/A": (0, 0),
+    "sym": (0, -1.4),
+    "AA/TT": (-7.9, -22.2), "AT/TA": (-7.2, -20.4), "TA/AT": (-7.2, -21.3),
+    "CA/GT": (-8.5, -22.7), "GT/CA": (-8.4, -22.4), "CT/GA": (-7.8, -21.0),
+    "GA/CT": (-8.2, -22.2), "CG/GC": (-10.6, -27.2), "GC/CG": (-9.8, -24.4),
+    "GG/CC": (-8.0, -19.9)}
+
+# SantaLucia & Hicks (2004), Annu. Rev. Biophys. Biomol. Struct 33: 415-440
+DNA_NN4 = {
+    "init": (0.2, -5.7), "init_A/T": (2.2, 6.9), "init_G/C": (0, 0),
+    "init_oneG/C": (0, 0), "init_allA/T": (0, 0), "init_5T/A": (0, 0),
+    "sym": (0, -1.4),
+    "AA/TT": (-7.6, -21.3), "AT/TA": (-7.2, -20.4), "TA/AT": (-7.2, -20.4),
+    "CA/GT": (-8.5, -22.7), "GT/CA": (-8.4, -22.4), "CT/GA": (-7.8, -21.0),
+    "GA/CT": (-8.2, -22.2), "CG/GC": (-10.6, -27.2), "GC/CG": (-9.8, -24.4),
+    "GG/CC": (-8.0, -19.0)}
+
+# RNA/RNA
+# Freier et al. (1986), Proc Natl Acad Sci USA 83: 9373-9377
+RNA_NN1 = {
+    "init": (0, -10.8), "init_A/T": (0, 0), "init_G/C": (0, 0),
+    "init_oneG/C": (0, 0), "init_allA/T": (0, 0), "init_5T/A": (0, 0),
+    "sym": (0, -1.4),
+    "AA/TT": (-6.6, -18.4), "AT/TA": (-5.7, -15.5), "TA/AT": (-8.1, -22.6),
+    "CA/GT": (-10.5, -27.8), "GT/CA": (-10.2, -26.2), "CT/GA": (-7.6, -19.2),
+    "GA/CT": (-13.3, -35.5), "CG/GC": (-8.0, -19.4), "GC/CG": (-14.2, -34.9),
+    "GG/CC": (-12.2, -29.7)}
+
+# Xia et al (1998), Biochemistry 37: 14719-14735
+RNA_NN2 = {
+    "init": (3.61, -1.5), "init_A/T": (3.72, 10.5), "init_G/C": (0, 0),
+    "init_oneG/C": (0, 0), "init_allA/T": (0, 0), "init_5T/A": (0, 0),
+    "sym": (0, -1.4),
+    "AA/TT": (-6.82, -19.0), "AT/TA": (-9.38, -26.7), "TA/AT": (-7.69, -20.5),
+    "CA/GT": (-10.44, -26.9), "GT/CA": (-11.40, -29.5),
+    "CT/GA": (-10.48, -27.1), "GA/CT": (-12.44, -32.5),
+    "CG/GC": (-10.64, -26.7), "GC/CG": (-14.88, -36.9),
+    "GG/CC": (-13.39, -32.7)}
+
+# Chen et al. (2012), Biochemistry 51: 3508-3522
+RNA_NN3 = {
+    "init": (6.40, 6.99), "init_A/T": (3.85, 11.04), "init_G/C": (0, 0),
+    "init_oneG/C": (0, 0), "init_allA/T": (0, 0), "init_5T/A": (0, 0),
+    "sym": (0, -1.4),
+    "AA/TT": (-7.09, -19.8), "AT/TA": (-9.11, -25.8), "TA/AT": (-8.50, -22.9),
+    "CA/GT": (-11.03, -28.8), "GT/CA": (-11.98, -31.3),
+    "CT/GA": (-10.90, -28.5), "GA/CT": (-13.21, -34.9),
+    "CG/GC": (-10.88, -27.4), "GC/CG": (-16.04, -40.6),
+    "GG/CC": (-14.18, -35.0), "GT/TG": (-13.83, -46.9),
+    "GG/TT": (-17.82, -56.7), "AG/TT": (-3.96, -11.6),
+    "TG/AT": (-0.96, -1.8), "TT/AG": (-10.38, -31.8), "TG/GT": (-12.64, -38.9),
+    "AT/TG": (-7.39, -21.0), "CG/GT": (-5.56, -13.9), "CT/GG": (-9.44, -24.7),
+    "GG/CT": (-7.03, -16.8), "GT/CG": (-11.09, -28.8)}
+
+# RNA/DNA
+# Sugimoto et al. (1995), Biochemistry 34: 11211-11216
+R_DNA_NN1 = {
+    "init": (1.9, -3.9), "init_A/T": (0, 0), "init_G/C": (0, 0),
+    "init_oneG/C": (0, 0), "init_allA/T": (0, 0), "init_5T/A": (0, 0),
+    "sym": (0, 0),
+    "TT/AA": (-11.5, -36.4), "GT/CA": (-7.8, -21.6), "CT/GA": (-7.0, -19.7),
+    "AT/TA": (-8.3, -23.9), "TG/AC": (-10.4, -28.4), "GG/CC": (-12.8, -31.9),
+    "CG/GC": (-16.3, -47.1), "AG/TC": (-9.1, -23.5), "TC/AG": (-8.6, -22.9),
+    "GC/CG": (-8.0, -17.1), "CC/GG": (-9.3, -23.2), "AC/TG": (-5.9, -12.3),
+    "TA/AT": (-7.8, -23.2), "GA/CT": (-5.5, -13.5), "CA/GT": (-9.0, -26.1),
+    "AA/TT": (-7.8, -21.9)}
+
+# Internal mismatch and inosine table (DNA)
+# Allawi & SantaLucia (1997), Biochemistry 36: 10581-10594
+# Allawi & SantaLucia (1998), Biochemistry 37: 9435-9444
+# Allawi & SantaLucia (1998), Biochemistry 37: 2170-2179
+# Allawi & SantaLucia (1998), Nucl Acids Res 26: 2694-2701
+# Peyret et al. (1999), Biochemistry 38: 3468-3477
+# Watkins & SantaLucia (2005), Nucl Acids Res 33: 6258-6267
+DNA_IMM1 = {
+    "AG/TT": (1.0, 0.9), "AT/TG": (-2.5, -8.3), "CG/GT": (-4.1, -11.7),
+    "CT/GG": (-2.8, -8.0), "GG/CT": (3.3, 10.4), "GG/TT": (5.8, 16.3),
+    "GT/CG": (-4.4, -12.3), "GT/TG": (4.1, 9.5), "TG/AT": (-0.1, -1.7),
+    "TG/GT": (-1.4, -6.2), "TT/AG": (-1.3, -5.3), "AA/TG": (-0.6, -2.3),
+    "AG/TA": (-0.7, -2.3), "CA/GG": (-0.7, -2.3), "CG/GA": (-4.0, -13.2),
+    "GA/CG": (-0.6, -1.0), "GG/CA": (0.5, 3.2), "TA/AG": (0.7, 0.7),
+    "TG/AA": (3.0, 7.4),
+    "AC/TT": (0.7, 0.2), "AT/TC": (-1.2, -6.2), "CC/GT": (-0.8, -4.5),
+    "CT/GC": (-1.5, -6.1), "GC/CT": (2.3, 5.4), "GT/CC": (5.2, 13.5),
+    "TC/AT": (1.2, 0.7), "TT/AC": (1.0, 0.7),
+    "AA/TC": (2.3, 4.6), "AC/TA": (5.3, 14.6), "CA/GC": (1.9, 3.7),
+    "CC/GA": (0.6, -0.6), "GA/CC": (5.2, 14.2), "GC/CA": (-0.7, -3.8),
+    "TA/AC": (3.4, 8.0), "TC/AA": (7.6, 20.2),
+    "AA/TA": (1.2, 1.7), "CA/GA": (-0.9, -4.2), "GA/CA": (-2.9, -9.8),
+    "TA/AA": (4.7, 12.9), "AC/TC": (0.0, -4.4), "CC/GC": (-1.5, -7.2),
+    "GC/CC": (3.6, 8.9), "TC/AC": (6.1, 16.4), "AG/TG": (-3.1, -9.5),
+    "CG/GG": (-4.9, -15.3), "GG/CG": (-6.0, -15.8), "TG/AG": (1.6, 3.6),
+    "AT/TT": (-2.7, -10.8), "CT/GT": (-5.0, -15.8), "GT/CT": (-2.2, -8.4),
+    "TT/AT": (0.2, -1.5),
+    "AI/TC": (-8.9, -25.5), "TI/AC": (-5.9, -17.4), "AC/TI": (-8.8, -25.4),
+    "TC/AI": (-4.9, -13.9), "CI/GC": (-5.4, -13.7), "GI/CC": (-6.8, -19.1),
+    "CC/GI": (-8.3, -23.8), "GC/CI": (-5.0, -12.6),
+    "AI/TA": (-8.3, -25.0), "TI/AA": (-3.4, -11.2), "AA/TI": (-0.7, -2.6),
+    "TA/AI": (-1.3, -4.6), "CI/GA": (2.6, 8.9), "GI/CA": (-7.8, -21.1),
+    "CA/GI": (-7.0, -20.0), "GA/CI": (-7.6, -20.2),
+    "AI/TT": (0.49, -0.7), "TI/AT": (-6.5, -22.0), "AT/TI": (-5.6, -18.7),
+    "TT/AI": (-0.8, -4.3), "CI/GT": (-1.0, -2.4), "GI/CT": (-3.5, -10.6),
+    "CT/GI": (0.1, -1.0), "GT/CI": (-4.3, -12.1),
+    "AI/TG": (-4.9, -15.8), "TI/AG": (-1.9, -8.5), "AG/TI": (0.1, -1.8),
+    "TG/AI": (1.0, 1.0), "CI/GG": (7.1, 21.3), "GI/CG": (-1.1, -3.2),
+    "CG/GI": (5.8, 16.9), "GG/CI": (-7.6, -22.0),
+    "AI/TI": (-3.3, -11.9), "TI/AI": (0.1, -2.3), "CI/GI": (1.3, 3.0),
+    "GI/CI": (-0.5, -1.3)}
+
+# Terminal mismatch table (DNA)
+# SantaLucia & Peyret (2001) Patent Application WO 01/94611
+DNA_TMM1 = {
+    "AA/TA": (-3.1, -7.8), "TA/AA": (-2.5, -6.3), "CA/GA": (-4.3, -10.7),
+    "GA/CA": (-8.0, -22.5),
+    "AC/TC": (-0.1, 0.5), "TC/AC": (-0.7, -1.3), "CC/GC": (-2.1, -5.1),
+    "GC/CC": (-3.9, -10.6),
+    "AG/TG": (-1.1, -2.1), "TG/AG": (-1.1, -2.7), "CG/GG": (-3.8, -9.5),
+    "GG/CG": (-0.7, -19.2),
+    "AT/TT": (-2.4, -6.5), "TT/AT": (-3.2, -8.9), "CT/GT": (-6.1, -16.9),
+    "GT/CT": (-7.4, -21.2),
+    "AA/TC": (-1.6, -4.0), "AC/TA": (-1.8, -3.8), "CA/GC": (-2.6, -5.9),
+    "CC/GA": (-2.7, -6.0), "GA/CC": (-5.0, -13.8), "GC/CA": (-3.2, -7.1),
+    "TA/AC": (-2.3, -5.9), "TC/AA": (-2.7, -7.0),
+    "AC/TT": (-0.9, -1.7), "AT/TC": (-2.3, -6.3), "CC/GT": (-3.2, -8.0),
+    "CT/GC": (-3.9, -10.6), "GC/CT": (-4.9, -13.5), "GT/CC": (-3.0, -7.8),
+    "TC/AT": (-2.5, -6.3), "TT/AC": (-0.7, -1.2),
+    "AA/TG": (-1.9, -4.4), "AG/TA": (-2.5, -5.9), "CA/GG": (-3.9, -9.6),
+    "CG/GA": (-6.0, -15.5), "GA/CG": (-4.3, -11.1), "GG/CA": (-4.6, -11.4),
+    "TA/AG": (-2.0, -4.7), "TG/AA": (-2.4, -5.8),
+    "AG/TT": (-3.2, -8.7), "AT/TG": (-3.5, -9.4), "CG/GT": (-3.8, -9.0),
+    "CT/GG": (-6.6, -18.7), "GG/CT": (-5.7, -15.9), "GT/CG": (-5.9, -16.1),
+    "TG/AT": (-3.9, -10.5), "TT/AG": (-3.6, -9.8)}
+
+# Dangling ends table (DNA)
+# Bommarito et al. (2000), Nucl Acids Res 28: 1929-1934
+DNA_DE1 = {
+    "AA/.T": (0.2, 2.3), "AC/.G": (-6.3, -17.1), "AG/.C": (-3.7, -10.0),
+    "AT/.A": (-2.9, -7.6), "CA/.T": (0.6, 3.3), "CC/.G": (-4.4, -12.6),
+    "CG/.C": (-4.0, -11.9), "CT/.A": (-4.1, -13.0), "GA/.T": (-1.1, -1.6),
+    "GC/.G": (-5.1, -14.0), "GG/.C": (-3.9, -10.9), "GT/.A": (-4.2, -15.0),
+    "TA/.T": (-6.9, -20.0), "TC/.G": (-4.0, -10.9), "TG/.C": (-4.9, -13.8),
+    "TT/.A": (-0.2, -0.5),
+    ".A/AT": (-0.7, -0.8), ".C/AG": (-2.1, -3.9), ".G/AC": (-5.9, -16.5),
+    ".T/AA": (-0.5, -1.1), ".A/CT": (4.4, 14.9), ".C/CG": (-0.2, -0.1),
+    ".G/CC": (-2.6, -7.4), ".T/CA": (4.7, 14.2), ".A/GT": (-1.6, -3.6),
+    ".C/GG": (-3.9, -11.2), ".G/GC": (-3.2, -10.4), ".T/GA": (-4.1, -13.1),
+    ".A/TT": (2.9, 10.4), ".C/TG": (-4.4, -13.1), ".G/TC": (-5.2, -15.0),
+    ".T/TA": (-3.8, -12.6)}
+
+# Dangling ends table (RNA)
+# Turner & Mathews (2010), Nucl Acids Res 38: D280-D282
+RNA_DE1 = {
+    ".T/AA": (-4.9, -13.2), ".T/CA": (-0.9, -1.3), ".T/GA": (-5.5, -15.1),
+    ".T/TA": (-2.3, -5.5),
+    ".G/AC": (-9.0, -23.5), ".G/CC": (-4.1, -10.6), ".G/GC": (-8.6, -22.2),
+    ".G/TC": (-7.5, -20.31),
+    ".C/AG": (-7.4, -20.3), ".C/CG": (-2.8, -7.7), ".C/GG": (-6.4, -16.4),
+    ".C/TG": (-3.6, -9.7),
+    ".T/AG": (-4.9, -13.2), ".T/CG": (-0.9, -1.3), ".T/GG": (-5.5, -15.1),
+    ".T/TG": (-2.3, -5.5),
+    ".A/AT": (-5.7, -16.1), ".A/CT": (-0.7, -1.9), ".A/GT": (-5.8, -16.4),
+    ".A/TT": (-2.2, -6.8),
+    ".G/AT": (-5.7, -16.1), ".G/CT": (-0.7, -1.9), ".G/GT": (-5.8, -16.4),
+    ".G/TT": (-2.2, -6.8),
+    "AT/.A": (-0.5, -0.6), "CT/.A": (6.9, 22.6), "GT/.A": (0.6, 2.6),
+    "TT/.A": (0.6, 2.6),
+    "AG/.C": (-1.6, -4.5), "CG/.C": (0.7, 3.2), "GG/.C": (-4.6, -14.8),
+    "TG/.C": (-0.4, -1.3),
+    "AC/.G": (-2.4, -6.1), "CC/.G": (3.3, 11.6), "GC/.G": (0.8, 3.2),
+    "TC/.G": (-1.4, -4.2),
+    "AT/.G": (-0.5, -0.6), "CT/.G": (6.9, 22.6), "GT/.G": (0.6, 2.6),
+    "TT/.G": (0.6, 2.6),
+    "AA/.T": (1.6, 6.1), "CA/.T": (2.2, 8.1), "GA/.T": (0.7, 3.5),
+    "TA/.T": (3.1, 10.6),
+    "AG/.T": (1.6, 6.1), "CG/.T": (2.2, 8.1), "GG/.T": (0.7, 3.5),
+    "TG/.T": (3.1, 10.6)}
+
+# Turn black code style on
+# fmt: on
+
+
+def make_table(oldtable=None, values=None):
+    """Return a table with thermodynamic parameters (as dictionary).
+
+    Arguments:
+     - oldtable: An existing dictionary with thermodynamic parameters.
+     - values: A dictionary with new or updated values.
+
+    E.g., to replace the initiation parameters in the Sugimoto '96 dataset with
+    the initiation parameters from Allawi & SantaLucia '97:
+
+    >>> from Bio.SeqUtils.MeltingTemp import make_table, DNA_NN2
+    >>> table = DNA_NN2                               # Sugimoto '96
+    >>> table['init_A/T']
+    (0, 0)
+    >>> newtable = make_table(oldtable=DNA_NN2, values={'init': (0, 0),
+    ...                       'init_A/T': (2.3, 4.1),
+    ...                       'init_G/C': (0.1, -2.8)})
+    >>> print("%0.1f, %0.1f" % newtable['init_A/T'])
+    2.3, 4.1
+
+    """
+    if oldtable is None:
+        table = {
+            "init": (0, 0),
+            "init_A/T": (0, 0),
+            "init_G/C": (0, 0),
+            "init_oneG/C": (0, 0),
+            "init_allA/T": (0, 0),
+            "init_5T/A": (0, 0),
+            "sym": (0, 0),
+            "AA/TT": (0, 0),
+            "AT/TA": (0, 0),
+            "TA/AT": (0, 0),
+            "CA/GT": (0, 0),
+            "GT/CA": (0, 0),
+            "CT/GA": (0, 0),
+            "GA/CT": (0, 0),
+            "CG/GC": (0, 0),
+            "GC/CG": (0, 0),
+            "GG/CC": (0, 0),
+        }
+    else:
+        table = oldtable.copy()
+    if values:
+        table.update(values)
+    return table
+
+
+def _check(seq, method):
+    """Return a sequence which fullfils the requirements of the given method (PRIVATE).
+
+    All Tm methods in this package require the sequence in uppercase format.
+    Most methods make use of the length of the sequence (directly or
+    indirectly), which can only be expressed as len(seq) if the sequence does
+    not contain whitespaces and other non-base characters. RNA sequences are
+    backtranscribed to DNA. This method is PRIVATE.
+
+    Arguments:
+     - seq: The sequence as given by the user (passed as string).
+     - method: Tm_Wallace, Tm_GC or Tm_NN.
+
+    >>> from Bio.SeqUtils import MeltingTemp as mt
+    >>> mt._check('10 ACGTTGCAAG tccatggtac', 'Tm_NN')
+    'ACGTTGCAAGTCCATGGTAC'
+
+    """
+    seq = "".join(seq.split()).upper()
+    seq = str(Seq.Seq(seq).back_transcribe())
+    if method == "Tm_Wallace":
+        return seq
+    if method == "Tm_GC":
+        baseset = (
+            "A",
+            "B",
+            "C",
+            "D",
+            "G",
+            "H",
+            "I",
+            "K",
+            "M",
+            "N",
+            "R",
+            "S",
+            "T",
+            "V",
+            "W",
+            "X",
+            "Y",
+        )
+    if method == "Tm_NN":
+        baseset = ("A", "C", "G", "T", "I")
+    seq = "".join([base for base in seq if base in baseset])
+    return seq
+
+
+def salt_correction(Na=0, K=0, Tris=0, Mg=0, dNTPs=0, method=1, seq=None):
+    """Calculate a term to correct Tm for salt ions.
+
+    Depending on the Tm calculation, the term will correct Tm or entropy. To
+    calculate corrected Tm values, different operations need to be applied:
+
+     - methods 1-4: Tm(new) = Tm(old) + corr
+     - method 5: deltaS(new) = deltaS(old) + corr
+     - methods 6+7: Tm(new) = 1/(1/Tm(old) + corr)
+
+    Arguments:
+     - Na, K, Tris, Mg, dNTPS: Millimolar concentration of respective ion. To
+       have a simple 'salt correction', just pass Na. If any of K, Tris, Mg and
+       dNTPS is non-zero, a 'sodium-equivalent' concentration is calculated
+       according to von Ahsen et al. (2001, Clin Chem 47: 1956-1961):
+       [Na_eq] = [Na+] + [K+] + [Tris]/2 + 120*([Mg2+] - [dNTPs])^0.5
+       If [dNTPs] >= [Mg2+]: [Na_eq] = [Na+] + [K+] + [Tris]/2
+     - method: Which method to be applied. Methods 1-4 correct Tm, method 5
+       corrects deltaS, methods 6 and 7 correct 1/Tm. The methods are:
+
+       1. 16.6 x log[Na+]
+          (Schildkraut & Lifson (1965), Biopolymers 3: 195-208)
+       2. 16.6 x log([Na+]/(1.0 + 0.7*[Na+]))
+          (Wetmur (1991), Crit Rev Biochem Mol Biol 126: 227-259)
+       3. 12.5 x log(Na+]
+          (SantaLucia et al. (1996), Biochemistry 35: 3555-3562
+       4. 11.7 x log[Na+]
+          (SantaLucia (1998), Proc Natl Acad Sci USA 95: 1460-1465
+       5. Correction for deltaS: 0.368 x (N-1) x ln[Na+]
+          (SantaLucia (1998), Proc Natl Acad Sci USA 95: 1460-1465)
+       6. (4.29(%GC)-3.95)x1e-5 x ln[Na+] + 9.40e-6 x ln[Na+]^2
+          (Owczarzy et al. (2004), Biochemistry 43: 3537-3554)
+       7. Complex formula with decision tree and 7 empirical constants.
+          Mg2+ is corrected for dNTPs binding (if present)
+          (Owczarzy et al. (2008), Biochemistry 47: 5336-5353)
+
+    Examples
+    --------
+    >>> from Bio.SeqUtils import MeltingTemp as mt
+    >>> print('%0.2f' % mt.salt_correction(Na=50, method=1))
+    -21.60
+    >>> print('%0.2f' % mt.salt_correction(Na=50, method=2))
+    -21.85
+    >>> print('%0.2f' % mt.salt_correction(Na=100, Tris=20, method=2))
+    -16.45
+    >>> print('%0.2f' % mt.salt_correction(Na=100, Tris=20, Mg=1.5, method=2))
+    -10.99
+
+    """
+    if method in (5, 6, 7) and not seq:
+        raise ValueError(
+            "sequence is missing (is needed to calculate GC content or sequence length)."
+        )
+    if seq:
+        seq = str(seq)
+    corr = 0
+    if not method:
+        return corr
+    Mon = Na + K + Tris / 2.0  # Note: all these values are millimolar
+    mg = Mg * 1e-3  # Lowercase ions (mg, mon, dntps) are molar
+    # Na equivalent according to von Ahsen et al. (2001):
+    if sum((K, Mg, Tris, dNTPs)) > 0 and not method == 7 and dNTPs < Mg:
+        # dNTPs bind Mg2+ strongly. If [dNTPs] is larger or equal than
+        # [Mg2+], free Mg2+ is considered not to be relevant.
+        Mon += 120 * math.sqrt(Mg - dNTPs)
+    mon = Mon * 1e-3
+    # Note: math.log = ln(), math.log10 = log()
+    if method in range(1, 7) and not mon:
+        raise ValueError(
+            "Total ion concentration of zero is not allowed in this method."
+        )
+    if method == 1:
+        corr = 16.6 * math.log10(mon)
+    if method == 2:
+        corr = 16.6 * math.log10((mon) / (1.0 + 0.7 * (mon)))
+    if method == 3:
+        corr = 12.5 * math.log10(mon)
+    if method == 4:
+        corr = 11.7 * math.log10(mon)
+    if method == 5:
+        corr = 0.368 * (len(seq) - 1) * math.log(mon)
+    if method == 6:
+        corr = (
+            (4.29 * SeqUtils.GC(seq) / 100 - 3.95) * 1e-5 * math.log(mon)
+        ) + 9.40e-6 * math.log(mon) ** 2
+    # Turn black code style off
+    # fmt: off
+    if method == 7:
+        a, b, c, d = 3.92, -0.911, 6.26, 1.42
+        e, f, g = -48.2, 52.5, 8.31
+        if dNTPs > 0:
+            dntps = dNTPs * 1e-3
+            ka = 3e4  # Dissociation constant for Mg:dNTP
+            # Free Mg2+ calculation:
+            mg = (-(ka * dntps - ka * mg + 1.0)
+                  + math.sqrt((ka * dntps - ka * mg + 1.0) ** 2
+                              + 4.0 * ka * mg)) / (2.0 * ka)
+        if Mon > 0:
+            R = math.sqrt(mg) / mon
+            if R < 0.22:
+                corr = (4.29 * SeqUtils.GC(seq) / 100 - 3.95) * \
+                    1e-5 * math.log(mon) + 9.40e-6 * math.log(mon) ** 2
+                return corr
+            elif R < 6.0:
+                a = 3.92 * (0.843 - 0.352 * math.sqrt(mon) * math.log(mon))
+                d = 1.42 * (1.279 - 4.03e-3 * math.log(mon)
+                            - 8.03e-3 * math.log(mon) ** 2)
+                g = 8.31 * (0.486 - 0.258 * math.log(mon)
+                            + 5.25e-3 * math.log(mon) ** 3)
+        corr = (a + b * math.log(mg) + (SeqUtils.GC(seq) / 100)
+                * (c + d * math.log(mg)) + (1 / (2.0 * (len(seq) - 1)))
+                * (e + f * math.log(mg) + g * math.log(mg) ** 2)) * 1e-5
+    # Turn black code style on
+    # fmt: on
+    if method > 7:
+        raise ValueError("Allowed values for parameter 'method' are 1-7.")
+    return corr
+
+
+def chem_correction(
+    melting_temp, DMSO=0, fmd=0, DMSOfactor=0.75, fmdfactor=0.65, fmdmethod=1, GC=None
+):
+    """Correct a given Tm for DMSO and formamide.
+
+    Please note that these corrections are +/- rough approximations.
+
+    Arguments:
+     - melting_temp: Melting temperature.
+     - DMSO: Percent DMSO.
+     - fmd: Formamide concentration in %(fmdmethod=1) or molar (fmdmethod=2).
+     - DMSOfactor: How much should Tm decreases per percent DMSO. Default=0.65
+       (von Ahsen et al. 2001). Other published values are 0.5, 0.6 and 0.675.
+     - fmdfactor: How much should Tm decrease per percent formamide.
+       Default=0.65. Several papers report factors between 0.6 and 0.72.
+     - fmdmethod:
+
+         1. Tm = Tm - factor(%formamide) (Default)
+         2. Tm = Tm + (0.453(f(GC)) - 2.88) x [formamide]
+
+       Here f(GC) is fraction of GC.
+       Note (again) that in fmdmethod=1 formamide concentration is given in %,
+       while in fmdmethod=2 it is given in molar.
+     - GC: GC content in percent.
+
+    Examples:
+        >>> from Bio.SeqUtils import MeltingTemp as mt
+        >>> mt.chem_correction(70)
+        70
+        >>> print('%0.2f' % mt.chem_correction(70, DMSO=3))
+        67.75
+        >>> print('%0.2f' % mt.chem_correction(70, fmd=5))
+        66.75
+        >>> print('%0.2f' % mt.chem_correction(70, fmdmethod=2, fmd=1.25,
+        ...                                    GC=50))
+        66.68
+
+    """
+    if DMSO:
+        melting_temp -= DMSOfactor * DMSO
+    if fmd:
+        # McConaughy et al. (1969), Biochemistry 8: 3289-3295
+        if fmdmethod == 1:
+            # Note: Here fmd is given in percent
+            melting_temp -= fmdfactor * fmd
+        # Blake & Delcourt (1996), Nucl Acids Res 11: 2095-2103
+        if fmdmethod == 2:
+            if GC is None or GC < 0:
+                raise ValueError("'GC' is missing or negative")
+            # Note: Here fmd is given in molar
+            melting_temp += (0.453 * (GC / 100.0) - 2.88) * fmd
+        if fmdmethod not in (1, 2):
+            raise ValueError("'fmdmethod' must be 1 or 2")
+    return melting_temp
+
+
+def Tm_Wallace(seq, check=True, strict=True):
+    """Calculate and return the Tm using the 'Wallace rule'.
+
+    Tm = 4 degC * (G + C) + 2 degC * (A+T)
+
+    The Wallace rule (Thein & Wallace 1986, in Human genetic diseases: a
+    practical approach, 33-50) is often used as rule of thumb for approximate
+    Tm calculations for primers of 14 to 20 nt length.
+
+    Non-DNA characters (e.g., E, F, J, !, 1, etc) are ignored by this method.
+
+    Examples:
+        >>> from Bio.SeqUtils import MeltingTemp as mt
+        >>> mt.Tm_Wallace('ACGTTGCAATGCCGTA')
+        48.0
+        >>> mt.Tm_Wallace('ACGT TGCA ATGC CGTA')
+        48.0
+        >>> mt.Tm_Wallace('1ACGT2TGCA3ATGC4CGTA')
+        48.0
+
+    """
+    seq = str(seq)
+    if check:
+        seq = _check(seq, "Tm_Wallace")
+
+    melting_temp = 2 * (sum(map(seq.count, ("A", "T", "W")))) + 4 * (
+        sum(map(seq.count, ("C", "G", "S")))
+    )
+
+    # Intermediate values for ambiguous positions:
+    tmp = (
+        3 * (sum(map(seq.count, ("K", "M", "N", "R", "Y"))))
+        + 10 / 3.0 * (sum(map(seq.count, ("B", "V"))))
+        + 8 / 3.0 * (sum(map(seq.count, ("D", "H"))))
+    )
+    if strict and tmp:
+        raise ValueError(
+            "ambiguous bases B, D, H, K, M, N, R, V, Y not allowed when strict=True"
+        )
+    else:
+        melting_temp += tmp
+    return melting_temp
+
+
+def Tm_GC(
+    seq,
+    check=True,
+    strict=True,
+    valueset=7,
+    userset=None,
+    Na=50,
+    K=0,
+    Tris=0,
+    Mg=0,
+    dNTPs=0,
+    saltcorr=0,
+    mismatch=True,
+):
+    """Return the Tm using empirical formulas based on GC content.
+
+    General format: Tm = A + B(%GC) - C/N + salt correction - D(%mismatch)
+
+    A, B, C, D: empirical constants, N: primer length
+    D (amount of decrease in Tm per % mismatch) is often 1, but sometimes other
+    values have been used (0.6-1.5). Use 'X' to indicate the mismatch position
+    in the sequence. Note that this mismatch correction is a rough estimate.
+
+    >>> from Bio.SeqUtils import MeltingTemp as mt
+    >>> print("%0.2f" % mt.Tm_GC('CTGCTGATXGCACGAGGTTATGG', valueset=2))
+    69.20
+
+    Arguments:
+     - valueset: A few often cited variants are included:
+
+        1. Tm = 69.3 + 0.41(%GC) - 650/N
+           (Marmur & Doty 1962, J Mol Biol 5: 109-118; Chester & Marshak 1993),
+           Anal Biochem 209: 284-290)
+        2. Tm = 81.5 + 0.41(%GC) - 675/N - %mismatch
+           'QuikChange' formula. Recommended (by the manufacturer) for the
+           design of primers for QuikChange mutagenesis.
+        3. Tm = 81.5 + 0.41(%GC) - 675/N + 16.6 x log[Na+]
+           (Marmur & Doty 1962, J Mol Biol 5: 109-118; Schildkraut & Lifson
+           1965, Biopolymers 3: 195-208)
+        4. Tm = 81.5 + 0.41(%GC) - 500/N + 16.6 x log([Na+]/(1.0 + 0.7 x
+           [Na+])) - %mismatch
+           (Wetmur 1991, Crit Rev Biochem Mol Biol 126: 227-259). This is the
+           standard formula in approximative mode of MELTING 4.3.
+        5. Tm = 78 + 0.7(%GC) - 500/N + 16.6 x log([Na+]/(1.0 + 0.7 x [Na+]))
+           - %mismatch
+           (Wetmur 1991, Crit Rev Biochem Mol Biol 126: 227-259). For RNA.
+        6. Tm = 67 + 0.8(%GC) - 500/N + 16.6 x log([Na+]/(1.0 + 0.7 x [Na+]))
+           - %mismatch
+           (Wetmur 1991, Crit Rev Biochem Mol Biol 126: 227-259). For RNA/DNA
+           hybrids.
+        7. Tm = 81.5 + 0.41(%GC) - 600/N + 16.6 x log[Na+]
+           Used by Primer3Plus to calculate the product Tm. Default set.
+        8. Tm = 77.1 + 0.41(%GC) - 528/N + 11.7 x log[Na+]
+           (von Ahsen et al. 2001, Clin Chem 47: 1956-1961). Recommended 'as a
+           tradeoff between accuracy and ease of use'.
+
+     - userset: Tuple of four values for A, B, C, and D. Usersets override
+       valuesets.
+     - Na, K, Tris, Mg, dNTPs: Concentration of the respective ions [mM]. If
+       any of K, Tris, Mg and dNTPS is non-zero, a 'sodium-equivalent'
+       concentration is calculated and used for salt correction (von Ahsen et
+       al., 2001).
+     - saltcorr: Type of salt correction (see method salt_correction).
+       Default=5. 0 or None means no salt correction.
+     - mismatch: If 'True' (default) every 'X' in the sequence is counted as
+       mismatch.
+
+    """
+    if saltcorr == 5:
+        raise ValueError("salt-correction method 5 not applicable to Tm_GC")
+    seq = str(seq)
+    if check:
+        seq = _check(seq, "Tm_GC")
+    percent_gc = SeqUtils.GC(seq)
+    # Ambiguous bases: add 0.5, 0.67 or 0.33% depending on G+C probability:
+    tmp = (
+        sum(map(seq.count, ("K", "M", "N", "R", "Y"))) * 50.0 / len(seq)
+        + sum(map(seq.count, ("B", "V"))) * 66.67 / len(seq)
+        + sum(map(seq.count, ("D", "H"))) * 33.33 / len(seq)
+    )
+    if strict and tmp:
+        raise ValueError(
+            "ambiguous bases B, D, H, K, M, N, R, V, Y not allowed when 'strict=True'"
+        )
+    else:
+        percent_gc += tmp
+    if userset:
+        A, B, C, D = userset
+    else:
+        if valueset == 1:
+            A, B, C, D = (69.3, 0.41, 650, 1)
+            saltcorr = 0
+        if valueset == 2:
+            A, B, C, D = (81.5, 0.41, 675, 1)
+            saltcorr = 0
+        if valueset == 3:
+            A, B, C, D = (81.5, 0.41, 675, 1)
+            saltcorr = 2
+        if valueset == 4:
+            A, B, C, D = (81.5, 0.41, 500, 1)
+            saltcorr = 3
+        if valueset == 5:
+            A, B, C, D = (78.0, 0.7, 500, 1)
+            saltcorr = 3
+        if valueset == 6:
+            A, B, C, D = (67.0, 0.8, 500, 1)
+            saltcorr = 3
+        if valueset == 7:
+            A, B, C, D = (81.5, 0.41, 600, 1)
+            saltcorr = 2
+        if valueset == 8:
+            A, B, C, D = (77.1, 0.41, 528, 1)
+            saltcorr = 4
+    if valueset > 8:
+        raise ValueError("allowed values for parameter 'valueset' are 0-8.")
+
+    melting_temp = A + B * percent_gc - C / (len(seq) * 1.0)
+    if saltcorr:
+        melting_temp += salt_correction(
+            Na=Na, K=K, Tris=Tris, Mg=Mg, dNTPs=dNTPs, seq=seq, method=saltcorr
+        )
+    if mismatch:
+        melting_temp -= D * (seq.count("X") * 100.0 / len(seq))
+    return melting_temp
+
+
+def _key_error(neighbors, strict):
+    """Throw an error or a warning if there is no data for the neighbors (PRIVATE)."""
+    # We haven't found the key in the tables
+    if strict:
+        raise ValueError("no thermodynamic data for neighbors %r available" % neighbors)
+    else:
+        warnings.warn(
+            "no themodynamic data for neighbors %r available. "
+            "Calculation will be wrong" % neighbors,
+            BiopythonWarning,
+        )
+
+
+def Tm_NN(
+    seq,
+    check=True,
+    strict=True,
+    c_seq=None,
+    shift=0,
+    nn_table=None,
+    tmm_table=None,
+    imm_table=None,
+    de_table=None,
+    dnac1=25,
+    dnac2=25,
+    selfcomp=False,
+    Na=50,
+    K=0,
+    Tris=0,
+    Mg=0,
+    dNTPs=0,
+    saltcorr=5,
+):
+    """Return the Tm using nearest neighbor thermodynamics.
+
+    Arguments:
+     - seq: The primer/probe sequence as string or Biopython sequence object.
+       For RNA/DNA hybridizations seq must be the RNA sequence.
+     - c_seq: Complementary sequence. The sequence of the template/target in
+       3'->5' direction. c_seq is necessary for mismatch correction and
+       dangling-ends correction. Both corrections will automatically be
+       applied if mismatches or dangling ends are present. Default=None.
+     - shift: Shift of the primer/probe sequence on the template/target
+       sequence, e.g.::
+
+                           shift=0       shift=1        shift= -1
+        Primer (seq):      5' ATGC...    5'  ATGC...    5' ATGC...
+        Template (c_seq):  3' TACG...    3' CTACG...    3'  ACG...
+
+       The shift parameter is necessary to align seq and c_seq if they have
+       different lengths or if they should have dangling ends. Default=0
+     - table: Thermodynamic NN values, eight tables are implemented:
+       For DNA/DNA hybridizations:
+
+        - DNA_NN1: values from Breslauer et al. (1986)
+        - DNA_NN2: values from Sugimoto et al. (1996)
+        - DNA_NN3: values from Allawi & SantaLucia (1997) (default)
+        - DNA_NN4: values from SantaLucia & Hicks (2004)
+
+       For RNA/RNA hybridizations:
+
+        - RNA_NN1: values from Freier et al. (1986)
+        - RNA_NN2: values from Xia et al. (1998)
+        - RNA_NN3: valuse from Chen et al. (2012)
+
+       For RNA/DNA hybridizations:
+
+        - R_DNA_NN1: values from Sugimoto et al. (1995)
+          Note that ``seq`` must be the RNA sequence.
+
+       Use the module's maketable method to make a new table or to update one
+       one of the implemented tables.
+     - tmm_table: Thermodynamic values for terminal mismatches.
+       Default: DNA_TMM1 (SantaLucia & Peyret, 2001)
+     - imm_table: Thermodynamic values for internal mismatches, may include
+       insosine mismatches. Default: DNA_IMM1 (Allawi & SantaLucia, 1997-1998;
+       Peyret et al., 1999; Watkins & SantaLucia, 2005)
+     - de_table: Thermodynamic values for dangling ends:
+
+        - DNA_DE1: for DNA. Values from Bommarito et al. (2000) (default)
+        - RNA_DE1: for RNA. Values from Turner & Mathews (2010)
+
+     - dnac1: Concentration of the higher concentrated strand [nM]. Typically
+       this will be the primer (for PCR) or the probe. Default=25.
+     - dnac2: Concentration of the lower concentrated strand [nM]. In PCR this
+       is the template strand which concentration is typically very low and may
+       be ignored (dnac2=0). In oligo/oligo hybridization experiments, dnac1
+       equals dnac1. Default=25.
+       MELTING and Primer3Plus use k = [Oligo(Total)]/4 by default. To mimic
+       this behaviour, you have to divide [Oligo(Total)] by 2 and assign this
+       concentration to dnac1 and dnac2. E.g., Total oligo concentration of
+       50 nM in Primer3Plus means dnac1=25, dnac2=25.
+     - selfcomp: Is the sequence self-complementary? Default=False. If 'True'
+       the primer is thought binding to itself, thus dnac2 is not considered.
+     - Na, K, Tris, Mg, dNTPs: See method 'Tm_GC' for details. Defaults: Na=50,
+       K=0, Tris=0, Mg=0, dNTPs=0.
+     - saltcorr: See method 'Tm_GC'. Default=5. 0 means no salt correction.
+
+    """
+    # Set defaults
+    if not nn_table:
+        nn_table = DNA_NN3
+    if not tmm_table:
+        tmm_table = DNA_TMM1
+    if not imm_table:
+        imm_table = DNA_IMM1
+    if not de_table:
+        de_table = DNA_DE1
+
+    seq = str(seq)
+    if not c_seq:
+        # c_seq must be provided by user if dangling ends or mismatches should
+        # be taken into account. Otherwise take perfect complement.
+        c_seq = Seq.Seq(seq).complement()
+    c_seq = str(c_seq)
+    if check:
+        seq = _check(seq, "Tm_NN")
+        c_seq = _check(c_seq, "Tm_NN")
+    tmp_seq = seq
+    tmp_cseq = c_seq
+    delta_h = 0
+    delta_s = 0
+    d_h = 0  # Names for indexes
+    d_s = 1  # 0 and 1
+
+    # Dangling ends?
+    if shift or len(seq) != len(c_seq):
+        # Align both sequences using the shift parameter
+        if shift > 0:
+            tmp_seq = "." * shift + seq
+        if shift < 0:
+            tmp_cseq = "." * abs(shift) + c_seq
+        if len(tmp_cseq) > len(tmp_seq):
+            tmp_seq += (len(tmp_cseq) - len(tmp_seq)) * "."
+        if len(tmp_cseq) < len(tmp_seq):
+            tmp_cseq += (len(tmp_seq) - len(tmp_cseq)) * "."
+        # Remove 'over-dangling' ends
+        while tmp_seq.startswith("..") or tmp_cseq.startswith(".."):
+            tmp_seq = tmp_seq[1:]
+            tmp_cseq = tmp_cseq[1:]
+        while tmp_seq.endswith("..") or tmp_cseq.endswith(".."):
+            tmp_seq = tmp_seq[:-1]
+            tmp_cseq = tmp_cseq[:-1]
+        # Now for the dangling ends
+        if tmp_seq.startswith(".") or tmp_cseq.startswith("."):
+            left_de = tmp_seq[:2] + "/" + tmp_cseq[:2]
+            try:
+                delta_h += de_table[left_de][d_h]
+                delta_s += de_table[left_de][d_s]
+            except KeyError:
+                _key_error(left_de, strict)
+            tmp_seq = tmp_seq[1:]
+            tmp_cseq = tmp_cseq[1:]
+        if tmp_seq.endswith(".") or tmp_cseq.endswith("."):
+            right_de = tmp_cseq[-2:][::-1] + "/" + tmp_seq[-2:][::-1]
+            try:
+                delta_h += de_table[right_de][d_h]
+                delta_s += de_table[right_de][d_s]
+            except KeyError:
+                _key_error(right_de, strict)
+            tmp_seq = tmp_seq[:-1]
+            tmp_cseq = tmp_cseq[:-1]
+
+    # Now for terminal mismatches
+    left_tmm = tmp_cseq[:2][::-1] + "/" + tmp_seq[:2][::-1]
+    if left_tmm in tmm_table:
+        delta_h += tmm_table[left_tmm][d_h]
+        delta_s += tmm_table[left_tmm][d_s]
+        tmp_seq = tmp_seq[1:]
+        tmp_cseq = tmp_cseq[1:]
+    right_tmm = tmp_seq[-2:] + "/" + tmp_cseq[-2:]
+    if right_tmm in tmm_table:
+        delta_h += tmm_table[right_tmm][d_h]
+        delta_s += tmm_table[right_tmm][d_s]
+        tmp_seq = tmp_seq[:-1]
+        tmp_cseq = tmp_cseq[:-1]
+
+    # Now everything 'unusual' at the ends is handled and removed and we can
+    # look at the initiation.
+    # One or several of the following initiation types may apply:
+
+    # Type: General initiation value
+    delta_h += nn_table["init"][d_h]
+    delta_s += nn_table["init"][d_s]
+
+    # Type: Duplex with no (allA/T) or at least one (oneG/C) GC pair
+    if SeqUtils.GC(seq) == 0:
+        delta_h += nn_table["init_allA/T"][d_h]
+        delta_s += nn_table["init_allA/T"][d_s]
+    else:
+        delta_h += nn_table["init_oneG/C"][d_h]
+        delta_s += nn_table["init_oneG/C"][d_s]
+
+    # Type: Penalty if 5' end is T
+    if seq.startswith("T"):
+        delta_h += nn_table["init_5T/A"][d_h]
+        delta_s += nn_table["init_5T/A"][d_s]
+    if seq.endswith("A"):
+        delta_h += nn_table["init_5T/A"][d_h]
+        delta_s += nn_table["init_5T/A"][d_s]
+
+    # Type: Different values for G/C or A/T terminal basepairs
+    ends = seq[0] + seq[-1]
+    AT = ends.count("A") + ends.count("T")
+    GC = ends.count("G") + ends.count("C")
+    delta_h += nn_table["init_A/T"][d_h] * AT
+    delta_s += nn_table["init_A/T"][d_s] * AT
+    delta_h += nn_table["init_G/C"][d_h] * GC
+    delta_s += nn_table["init_G/C"][d_s] * GC
+
+    # Finally, the 'zipping'
+    for basenumber in range(len(tmp_seq) - 1):
+        neighbors = (
+            tmp_seq[basenumber : basenumber + 2]
+            + "/"
+            + tmp_cseq[basenumber : basenumber + 2]
+        )
+        if neighbors in imm_table:
+            delta_h += imm_table[neighbors][d_h]
+            delta_s += imm_table[neighbors][d_s]
+        elif neighbors[::-1] in imm_table:
+            delta_h += imm_table[neighbors[::-1]][d_h]
+            delta_s += imm_table[neighbors[::-1]][d_s]
+        elif neighbors in nn_table:
+            delta_h += nn_table[neighbors][d_h]
+            delta_s += nn_table[neighbors][d_s]
+        elif neighbors[::-1] in nn_table:
+            delta_h += nn_table[neighbors[::-1]][d_h]
+            delta_s += nn_table[neighbors[::-1]][d_s]
+        else:
+            # We haven't found the key...
+            _key_error(neighbors, strict)
+
+    k = (dnac1 - (dnac2 / 2.0)) * 1e-9
+    if selfcomp:
+        k = dnac1 * 1e-9
+        delta_h += nn_table["sym"][d_h]
+        delta_s += nn_table["sym"][d_s]
+    R = 1.987  # universal gas constant in Cal/degrees C*Mol
+    if saltcorr:
+        corr = salt_correction(
+            Na=Na, K=K, Tris=Tris, Mg=Mg, dNTPs=dNTPs, method=saltcorr, seq=seq
+        )
+    if saltcorr == 5:
+        delta_s += corr
+    melting_temp = (1000 * delta_h) / (delta_s + (R * (math.log(k)))) - 273.15
+    if saltcorr in (1, 2, 3, 4):
+        melting_temp += corr
+    if saltcorr in (6, 7):
+        # Tm = 1/(1/Tm + corr)
+        melting_temp = 1 / (1 / (melting_temp + 273.15) + corr) - 273.15
+
+    return melting_temp
+
+
+def Tm_staluc(s, dnac=50, saltc=50, rna=0):
+    """Return DNA/DNA Tm using nearest neighbor thermodynamics (OBSOLETE).
+
+    This method may be depreceated in the future. Use Tm_NN instead. Tm_NN
+    with default values gives the same result as Tm_staluc.
+
+    s is the sequence as string or Seq object
+    dnac is DNA concentration [nM]
+    saltc is salt concentration [mM].
+    rna=0 is for DNA/DNA (default), use 1 for RNA/RNA hybridisation.
+
+    For DNA/DNA, see Allawi & SantaLucia (1997), Biochemistry 36: 10581-10594
+    For RNA/RNA, see Xia et al (1998), Biochemistry 37: 14719-14735
+
+    Examples
+    --------
+    >>> print("%0.2f" % Tm_staluc('CAGTCAGTACGTACGTGTACTGCCGTA'))
+    59.87
+    >>> print("%0.2f" % Tm_staluc('CAGTCAGTACGTACGTGTACTGCCGTA', rna=True))
+    77.90
+
+    You can also use a Seq object instead of a string,
+
+    >>> from Bio.Seq import Seq
+    >>> s = Seq('CAGTCAGTACGTACGTGTACTGCCGTA')
+    >>> print("%0.2f" % Tm_staluc(s))
+    59.87
+    >>> print("%0.2f" % Tm_staluc(s, rna=True))
+    77.90
+
+    """
+    # Original method was by Sebastian Bassi <sbassi@genesdigitales.com>. It is
+    # now superseded by Tm_NN.
+
+    warnings.warn(
+        "Tm_staluc is deprecated; please use Tm_NN instead.",
+        BiopythonDeprecationWarning,
+    )
+    if not rna:
+        return Tm_NN(s, dnac1=dnac / 2.0, dnac2=dnac / 2.0, Na=saltc)
+    elif rna == 1:
+        return Tm_NN(s, dnac1=dnac / 2.0, dnac2=dnac / 2.0, Na=saltc, nn_table=RNA_NN2)
+    else:
+        raise ValueError(f"rna={rna} not supported")
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SeqUtils/ProtParam.py b/code/lib/Bio/SeqUtils/ProtParam.py
new file mode 100644
index 0000000..937e3c5
--- /dev/null
+++ b/code/lib/Bio/SeqUtils/ProtParam.py
@@ -0,0 +1,356 @@
+# Copyright 2003 Yair Benita.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Simple protein analysis.
+
+Examples
+--------
+>>> from Bio.SeqUtils.ProtParam import ProteinAnalysis
+>>> X = ProteinAnalysis("MAEGEITTFTALTEKFNLPPGNYKKPKLLYCSNGGHFLRILPDGTVDGT"
+...                     "RDRSDQHIQLQLSAESVGEVYIKSTETGQYLAMDTSGLLYGSQTPSEEC"
+...                     "LFLERLEENHYNTYTSKKHAEKNWFVGLKKNGSCKRGPRTHYGQKAILF"
+...                     "LPLPV")
+>>> print(X.count_amino_acids()['A'])
+6
+>>> print(X.count_amino_acids()['E'])
+12
+>>> print("%0.2f" % X.get_amino_acids_percent()['A'])
+0.04
+>>> print("%0.2f" % X.get_amino_acids_percent()['L'])
+0.12
+>>> print("%0.2f" % X.molecular_weight())
+17103.16
+>>> print("%0.2f" % X.aromaticity())
+0.10
+>>> print("%0.2f" % X.instability_index())
+41.98
+>>> print("%0.2f" % X.isoelectric_point())
+7.72
+>>> sec_struc = X.secondary_structure_fraction()  # [helix, turn, sheet]
+>>> print("%0.2f" % sec_struc[0])  # helix
+0.28
+>>> epsilon_prot = X.molar_extinction_coefficient()  # [reduced, oxidized]
+>>> print(epsilon_prot[0])  # with reduced cysteines
+17420
+>>> print(epsilon_prot[1])  # with disulfid bridges
+17545
+
+Other public methods are:
+ - gravy
+ - protein_scale
+ - flexibility
+ - charge_at_pH
+
+"""
+
+
+import sys
+from Bio.SeqUtils import ProtParamData  # Local
+from Bio.SeqUtils import IsoelectricPoint  # Local
+from Bio.Seq import Seq
+from Bio.Data import IUPACData
+from Bio.SeqUtils import molecular_weight
+
+
+class ProteinAnalysis:
+    """Class containing methods for protein analysis.
+
+    The constructor takes two arguments.
+    The first is the protein sequence as a string, which is then converted to a
+    sequence object using the Bio.Seq module. This is done just to make sure
+    the sequence is a protein sequence and not anything else.
+
+    The second argument is optional. If set to True, the weight of the amino
+    acids will be calculated using their monoisotopic mass (the weight of the
+    most abundant isotopes for each element), instead of the average molecular
+    mass (the averaged weight of all stable isotopes for each element).
+    If set to false (the default value) or left out, the IUPAC average
+    molecular mass will be used for the calculation.
+
+    """
+
+    def __init__(self, prot_sequence, monoisotopic=False):
+        """Initialize the class."""
+        if prot_sequence.islower():
+            self.sequence = Seq(prot_sequence.upper())
+        else:
+            self.sequence = Seq(prot_sequence)
+        self.amino_acids_content = None
+        self.amino_acids_percent = None
+        self.length = len(self.sequence)
+        self.monoisotopic = monoisotopic
+
+    def count_amino_acids(self):
+        """Count standard amino acids, return a dict.
+
+        Counts the number times each amino acid is in the protein
+        sequence. Returns a dictionary {AminoAcid:Number}.
+
+        The return value is cached in self.amino_acids_content.
+        It is not recalculated upon subsequent calls.
+        """
+        if self.amino_acids_content is None:
+            prot_dic = {k: 0 for k in IUPACData.protein_letters}
+            for aa in prot_dic:
+                prot_dic[aa] = self.sequence.count(aa)
+
+            self.amino_acids_content = prot_dic
+
+        return self.amino_acids_content
+
+    def get_amino_acids_percent(self):
+        """Calculate the amino acid content in percentages.
+
+        The same as count_amino_acids only returns the Number in percentage of
+        entire sequence. Returns a dictionary of {AminoAcid:percentage}.
+
+        The return value is cached in self.amino_acids_percent.
+
+        input is the dictionary self.amino_acids_content.
+        output is a dictionary with amino acids as keys.
+        """
+        if self.amino_acids_percent is None:
+            aa_counts = self.count_amino_acids()
+
+            percentages = {}
+            for aa in aa_counts:
+                percentages[aa] = aa_counts[aa] / float(self.length)
+
+            self.amino_acids_percent = percentages
+
+        return self.amino_acids_percent
+
+    def molecular_weight(self):
+        """Calculate MW from Protein sequence."""
+        return molecular_weight(
+            self.sequence, seq_type="protein", monoisotopic=self.monoisotopic
+        )
+
+    def aromaticity(self):
+        """Calculate the aromaticity according to Lobry, 1994.
+
+        Calculates the aromaticity value of a protein according to Lobry, 1994.
+        It is simply the relative frequency of Phe+Trp+Tyr.
+        """
+        aromatic_aas = "YWF"
+        aa_percentages = self.get_amino_acids_percent()
+
+        aromaticity = sum(aa_percentages[aa] for aa in aromatic_aas)
+
+        return aromaticity
+
+    def instability_index(self):
+        """Calculate the instability index according to Guruprasad et al 1990.
+
+        Implementation of the method of Guruprasad et al. 1990 to test a
+        protein for stability. Any value above 40 means the protein is unstable
+        (has a short half life).
+
+        See: Guruprasad K., Reddy B.V.B., Pandit M.W.
+        Protein Engineering 4:155-161(1990).
+        """
+        index = ProtParamData.DIWV
+        score = 0.0
+
+        for i in range(self.length - 1):
+            this, next = self.sequence[i : i + 2]
+            dipeptide_value = index[this][next]
+            score += dipeptide_value
+
+        return (10.0 / self.length) * score
+
+    def flexibility(self):
+        """Calculate the flexibility according to Vihinen, 1994.
+
+        No argument to change window size because parameters are specific for
+        a window=9. The parameters used are optimized for determining the
+        flexibility.
+        """
+        flexibilities = ProtParamData.Flex
+        window_size = 9
+        weights = [0.25, 0.4375, 0.625, 0.8125, 1]
+        scores = []
+
+        for i in range(self.length - window_size):
+            subsequence = self.sequence[i : i + window_size]
+            score = 0.0
+
+            for j in range(window_size // 2):
+                front = subsequence[j]
+                back = subsequence[window_size - j - 1]
+                score += (flexibilities[front] + flexibilities[back]) * weights[j]
+
+            middle = subsequence[window_size // 2 + 1]
+            score += flexibilities[middle]
+
+            scores.append(score / 5.25)
+
+        return scores
+
+    def gravy(self, scale="KyteDoolitle"):
+        """Calculate the GRAVY (Grand Average of Hydropathy) according to Kyte and Doolitle, 1982.
+
+        Utilizes the given Hydrophobicity scale, by default uses the original
+        proposed by Kyte and Doolittle (KyteDoolitle). Other options are:
+        Aboderin, AbrahamLeo, Argos, BlackMould, BullBreese, Casari, Cid,
+        Cowan3.4, Cowan7.5, Eisenberg, Engelman, Fasman, Fauchere, GoldSack,
+        Guy, Jones, Juretic, Kidera, Miyazawa, Parker,Ponnuswamy, Rose,
+        Roseman, Sweet, Tanford, Wilson and Zimmerman.
+
+        New scales can be added in ProtParamData.
+        """
+        selected_scale = ProtParamData.gravy_scales.get(scale, -1)
+
+        if selected_scale == -1:
+            raise ValueError(f"scale: {scale} not known")
+
+        total_gravy = sum(selected_scale[aa] for aa in self.sequence)
+
+        return total_gravy / self.length
+
+    def _weight_list(self, window, edge):
+        """Make list of relative weight of window edges (PRIVATE).
+
+        The relative weight of window edges are compared to the window
+        center. The weights are linear. It actually generates half a list.
+        For a window of size 9 and edge 0.4 you get a list of
+        [0.4, 0.55, 0.7, 0.85].
+        """
+        unit = 2 * (1.0 - edge) / (window - 1)
+        weights = [0.0] * (window // 2)
+
+        for i in range(window // 2):
+            weights[i] = edge + unit * i
+
+        return weights
+
+    def protein_scale(self, param_dict, window, edge=1.0):
+        """Compute a profile by any amino acid scale.
+
+        An amino acid scale is defined by a numerical value assigned to each
+        type of amino acid. The most frequently used scales are the
+        hydrophobicity or hydrophilicity scales and the secondary structure
+        conformational parameters scales, but many other scales exist which
+        are based on different chemical and physical properties of the
+        amino acids.  You can set several parameters that control the
+        computation of a scale profile, such as the window size and the window
+        edge relative weight value.
+
+        WindowSize: The window size is the length of the interval to use for
+        the profile computation. For a window size n, we use the i-(n-1)/2
+        neighboring residues on each side to compute the score for residue i.
+        The score for residue i is the sum of the scaled values for these
+        amino acids, optionally weighted according to their position in the
+        window.
+
+        Edge: The central amino acid of the window always has a weight of 1.
+        By default, the amino acids at the remaining window positions have the
+        same weight, but you can make the residue at the center of the window
+        have a larger weight than the others by setting the edge value for the
+        residues at the beginning and end of the interval to a value between
+        0 and 1. For instance, for Edge=0.4 and a window size of 5 the weights
+        will be: 0.4, 0.7, 1.0, 0.7, 0.4.
+
+        The method returns a list of values which can be plotted to view the
+        change along a protein sequence.  Many scales exist. Just add your
+        favorites to the ProtParamData modules.
+
+        Similar to expasy's ProtScale:
+        http://www.expasy.org/cgi-bin/protscale.pl
+        """
+        # generate the weights
+        #   _weight_list returns only one tail. If the list should be
+        #   [0.4,0.7,1.0,0.7,0.4] what you actually get from _weights_list
+        #   is [0.4,0.7]. The correct calculation is done in the loop.
+        weights = self._weight_list(window, edge)
+        scores = []
+
+        # the score in each Window is divided by the sum of weights
+        # (* 2 + 1) since the weight list is one sided:
+        sum_of_weights = sum(weights) * 2 + 1
+
+        for i in range(self.length - window + 1):
+            subsequence = self.sequence[i : i + window]
+            score = 0.0
+
+            for j in range(window // 2):
+                # walk from the outside of the Window towards the middle.
+                # Iddo: try/except clauses added to avoid raising an exception
+                # on a non-standard amino acid
+                try:
+                    front = param_dict[subsequence[j]]
+                    back = param_dict[subsequence[window - j - 1]]
+                    score += weights[j] * front + weights[j] * back
+                except KeyError:
+                    sys.stderr.write(
+                        "warning: %s or %s is not a standard "
+                        "amino acid.\n" % (subsequence[j], subsequence[window - j - 1])
+                    )
+
+            # Now add the middle value, which always has a weight of 1.
+            middle = subsequence[window // 2]
+            if middle in param_dict:
+                score += param_dict[middle]
+            else:
+                sys.stderr.write(
+                    "warning: %s  is not a standard amino acid.\n" % middle
+                )
+
+            scores.append(score / sum_of_weights)
+
+        return scores
+
+    def isoelectric_point(self):
+        """Calculate the isoelectric point.
+
+        Uses the module IsoelectricPoint to calculate the pI of a protein.
+        """
+        aa_content = self.count_amino_acids()
+
+        ie_point = IsoelectricPoint.IsoelectricPoint(self.sequence, aa_content)
+        return ie_point.pi()
+
+    def charge_at_pH(self, pH):
+        """Calculate the charge of a protein at given pH."""
+        aa_content = self.count_amino_acids()
+        charge = IsoelectricPoint.IsoelectricPoint(self.sequence, aa_content)
+        return charge.charge_at_pH(pH)
+
+    def secondary_structure_fraction(self):
+        """Calculate fraction of helix, turn and sheet.
+
+        Returns a list of the fraction of amino acids which tend
+        to be in Helix, Turn or Sheet.
+
+        Amino acids in helix: V, I, Y, F, W, L.
+        Amino acids in Turn: N, P, G, S.
+        Amino acids in sheet: E, M, A, L.
+
+        Returns a tuple of three floats (Helix, Turn, Sheet).
+        """
+        aa_percentages = self.get_amino_acids_percent()
+
+        helix = sum(aa_percentages[r] for r in "VIYFWL")
+        turn = sum(aa_percentages[r] for r in "NPGS")
+        sheet = sum(aa_percentages[r] for r in "EMAL")
+
+        return helix, turn, sheet
+
+    def molar_extinction_coefficient(self):
+        """Calculate the molar extinction coefficient.
+
+        Calculates the molar extinction coefficient assuming cysteines
+        (reduced) and cystines residues (Cys-Cys-bond)
+        """
+        num_aa = self.count_amino_acids()
+        mec_reduced = num_aa["W"] * 5500 + num_aa["Y"] * 1490
+        mec_cystines = mec_reduced + (num_aa["C"] // 2) * 125
+        return (mec_reduced, mec_cystines)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SeqUtils/ProtParamData.py b/code/lib/Bio/SeqUtils/ProtParamData.py
new file mode 100644
index 0000000..59f9ce8
--- /dev/null
+++ b/code/lib/Bio/SeqUtils/ProtParamData.py
@@ -0,0 +1,390 @@
+# Copyright 2003 Yair Benita.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Indices to be used with ProtParam."""
+
+# Turn black code style off
+# fmt: off
+
+# Hydrophobicity
+
+# Kyte & Doolittle index of hydrophobicity
+# J. Mol. Biol. 157:105-132(1982).
+# "KyteDoolittle"
+kd = {"A": 1.8, "R": -4.5, "N": -3.5, "D": -3.5, "C": 2.5,
+      "Q": -3.5, "E": -3.5, "G": -0.4, "H": -3.2, "I": 4.5,
+      "L": 3.8, "K": -3.9, "M": 1.9, "F": 2.8, "P": -1.6,
+      "S": -0.8, "T": -0.7, "W": -0.9, "Y": -1.3, "V": 4.2}
+
+# Aboderin hydrophobicity index
+# International J. of Biochemistry, 2(11), 537-544.
+# "Aboderin"
+ab = {"A": 5.1, "R": 2.0, "N": 0.6, "D": 0.7, "C": 0.0,
+      "Q": 1.4, "E": 1.8, "G": 4.1, "H": 1.6, "I": 9.3,
+      "L": 10.0, "K": 1.3, "M": 8.7, "F": 9.6, "P": 4.9,
+      "S": 3.1, "T": 3.5, "W": 9.2, "Y": 8.0, "V": 8.5}
+
+# Abraham & Leo hydrophobicity index
+# Proteins: Structure, Function and Genetics 2:130-152(1987).
+# "AbrahamLeo"
+al = {"A": 0.44, "R": -2.42, "N": -1.32, "D": -0.31, "C": 0.58,
+      "Q": -0.71, "E": -0.34, "G": 0.0, "H": -0.01, "I": 2.46,
+      "L": 2.46, "K": -2.45, "M": 1.1, "F": 2.54, "P": 1.29,
+      "S": -0.84, "T": -0.41, "W": 2.56, "Y": 1.63, "V": 1.73}
+
+# Argos hydrophobicity index
+# European Journal of Biochemistry, 128(2-3), 565-575.
+# "Argos"
+ag = {"A": 0.61, "R": 0.6, "N": 0.06, "D": 0.46, "C": 1.07,
+      "Q": 0.0, "E": 0.47, "G": 0.07, "H": 0.61, "I": 2.22,
+      "L": 1.53, "K": 1.15, "M": 1.18, "F": 2.02, "P": 1.95,
+      "S": 0.05, "T": 0.05, "W": 2.65, "Y": 1.88, "V": 1.32}
+
+# Black & Mould hydrophobicity index
+# Anal. Biochem. 193:72-82(1991).
+# "BlackMould"
+bm = {"A": 0.616, "R": 0.0, "N": 0.236, "D": 0.028, "C": 0.68,
+      "Q": 0.251, "E": 0.043, "G": 0.501, "H": 0.165, "I": 0.943,
+      "L": 0.943, "K": 0.283, "M": 0.738, "F": 1.0, "P": 0.711,
+      "S": 0.359, "T": 0.45, "W": 0.878, "Y": 0.88, "V": 0.825}
+
+# Bull & Breese hydrophobicity index
+# Arch. Biochem. Biophys. 161:665-670(1974)
+# "BullBreese"
+bb = {"A": 0.61, "R": 0.69, "N": 0.89, "D": 0.61, "C": 0.36,
+      "Q": 0.97, "E": 0.51, "G": 0.81, "H": 0.69, "I": -1.45,
+      "L": -1.65, "K": 0.46, "M": -0.66, "F": -1.52, "P": -0.17,
+      "S": 0.42, "T": 0.29, "W": -1.2, "Y": -1.43, "V": -0.75}
+
+# Casari & Sippl hydrophobic potential
+# Journal of molecular biology, 224(3), 725-732.
+# "Casari"
+cs = {"A": 0.2, "R": -0.7, "N": -0.5, "D": -1.4, "C": 1.9,
+      "Q": -1.1, "E": -1.3, "G": -0.1, "H": 0.4, "I": 1.4,
+      "L": 0.5, "K": -1.6, "M": 0.5, "F": 1.0, "P": -1.0,
+      "S": -0.7, "T": -0.4, "W": 1.6, "Y": 0.5, "V": 0.7}
+
+# Cid hydrophobicity index
+# Protein engineering, 5(5), 373-375.
+# "Cid"
+ci = {"A": 0.02, "R": -0.42, "N": -0.77, "D": -1.04, "C": 0.77,
+      "Q": -1.1, "E": -1.14, "G": -0.8, "H": 0.26, "I": 1.81,
+      "L": 1.14, "K": -0.41, "M": 1.0, "F": 1.35, "P": -0.09,
+      "S": -0.97, "T": -0.77, "W": 1.71, "Y": 1.11, "V": 1.13}
+
+# Cowan hydrophobicity indices at ph 3.4 and 7.5
+# Peptide Research 3:75-80(1990).
+# "Cowan3.4" "Conwan7.5"
+cw = {3.4 : {"A": 0.42, "R": -1.56, "N": -1.03, "D": -0.51, "C": 0.84,
+             "Q": -0.96, "E": -0.37, "G": 0.0, "H": -2.28, "I": 1.81,
+             "L": 1.8, "K": -2.03, "M": 1.18, "F": 1.74, "P": 0.86,
+             "S": -0.64, "T": -0.26, "W": 1.46, "Y": 0.51, "V": 1.34},
+      7.5 : {"A": 0.35, "R": -1.5, "N": -0.99, "D": -2.15, "C": 0.76,
+             "Q": -0.93, "E": -1.95, "G": 0.0, "H": -0.65, "I": 1.83,
+             "L": 1.8, "K": -1.54, "M": 1.1, "F": 1.69, "P": 0.84,
+             "S": -0.63, "T": -0.27, "W": 1.35, "Y": 0.39, "V": 1.32}
+      }
+
+# Eisenberg Normalized consensus hydrophobicity scale
+# J. Mol. Biol. 179:125-142(1984)
+# "Eisenberg"
+es = {"A": 0.62, "R": -2.53, "N": -0.78, "D": -0.9, "C": 0.29,
+      "Q": -0.85, "E": -0.74, "G": 0.48, "H": -0.4, "I": 1.38,
+      "L": 1.06, "K": -1.5, "M": 0.64, "F": 1.19, "P": 0.12,
+      "S": -0.18, "T": -0.05, "W": 0.81, "Y": 0.26, "V": 1.08}
+
+# Engelman Hydrophobic Transfer Free Energies
+# Annual review of biophysics and biophysical chemistry, 15(1), 321-353.
+# "Engelman"
+eg = {"A": -1.6, "R": 12.3, "N": 4.8, "D": 9.2, "C": -2,
+      "Q": 4.1, "E": 8.2, "G": -1, "H": 3, "I": -3.1,
+      "L": -2.8, "K": 8.8, "M": -3.4, "F": -3.7, "P": 0.2,
+      "S": -0.6, "T": -1.2, "W": -1.9, "Y": 0.7, "V": -2.6}
+
+# Fasman hydrophobicity index
+# (1989). Prediction of protein structure and the principles of protein conformation. Springer.
+# "Fasman"
+fs = {"A": -0.21, "R": 2.11, "N": 0.96, "D": 1.36, "C": -6.04,
+      "Q": 1.52, "E": 2.3, "G": 0, "H": -1.23, "I": -4.81,
+      "L": -4.68, "K": 3.88, "M": -3.66, "F": -4.65, "P": 0.75,
+      "S": 1.74, "T": 0.78, "W": -3.32, "Y": -1.01, "V": -3.5}
+
+# Fauchere Hydrophobicity scale
+# Eur. J. Med. Chem. 18:369-375(1983).
+# "Fauchere"
+fc = {"A": 0.31, "R": -1.01, "N": -0.6, "D": -0.77, "C": 1.54,
+      "Q": -0.22, "E": -0.64, "G": 0, "H": 0.13, "I": 1.8,
+      "L": 1.7, "K": -0.99, "M": 1.23, "F": 1.79, "P": 0.72,
+      "S": -0.04, "T": 0.26, "W": 2.25, "Y": 0.96, "V": 1.22}
+
+# Goldsack & Chalifoux Free Energy of Mixing of the Hydrophobic Side Chains
+# Journal of theoretical biology, 39(3), 645-651.
+# "Goldsack"
+gd = {"A": 0.75, "R": 0.75, "N": 0.69, "D": 0, "C": 1,
+      "Q": 0.59, "E": 0, "G": 0, "H": 0, "I": 2.95,
+      "L": 2.4, "K": 1.5, "M": 1.3, "F": 2.65, "P": 2.6,
+      "S": 0, "T": 0.45, "W": 3, "Y": 2.85, "V": 1.7}
+
+# Guy Hydrophobicity scale based on free energy of transfer (kcal/mole).
+# Biophys J. 47:61-70(1985)
+# "Guy"
+gy = {"A": 0.1, "R": 1.91, "N": 0.48, "D": 0.78, "C": -1.42,
+      "Q": 0.95, "E": 0.83, "G": 0.33, "H": -0.5, "I": -1.13,
+      "L": -1.18, "K": 1.4, "M": -1.59, "F": -2.12, "P": 0.73,
+      "S": 0.52, "T": 0.07, "W": -0.51, "Y": -0.21, "V": -1.27}
+
+# Jones Hydrophobicity scale
+# Journal of theoretical biology, 50(1), 167-183.
+# "Jones"
+jo = {"A": 0.87, "R": 0.85, "N": 0.09, "D": 0.66, "C": 1.52,
+      "Q": 0, "E": 0.67, "G": 0.1, "H": 0.87, "I": 3.15,
+      "L": 2.17, "K": 1.64, "M": 1.67, "F": 2.87, "P": 2.77,
+      "S": 0.07, "T": 0.07, "W": 3.77, "Y": 2.67, "V": 1.87}
+
+# Juretic Hydrophobicity scale
+# Theoretical and computational chemistry, 5, 405-445.
+# "Juretic"
+ju = {"A": 1.1, "R": -5.1, "N": -3.5, "D": -3.6, "C": 2.5,
+      "Q": -3.68, "E": -3.2, "G": -0.64, "H": -3.2, "I": 4.5,
+      "L": 3.8, "K": -4.11, "M": 1.9, "F": 2.8, "P": -1.9,
+      "S": -0.5, "T": -0.7, "W": -0.46, "Y": -1.3, "V": 4.2}
+
+# Kidera Hydrophobicity Factors
+# Journal of Protein Chemistry, 4(1), 23-55.
+# "Kidera"
+ki = {"A": -0.27, "R": 1.87, "N": 0.81, "D": 0.81, "C": -1.05,
+      "Q": 1.1, "E": 1.17, "G": -0.16, "H": 0.28, "I": -0.77,
+      "L": -1.1, "K": 1.7, "M": -0.73, "F": -1.43, "P": -0.75,
+      "S": 0.42, "T": 0.63, "W": -1.57, "Y": -0.56, "V": -0.4}
+
+# Miyazawa Hydrophobicity scale (contact energy derived from 3D data)
+# Macromolecules 18:534-552(1985)
+# "Miyazawa"
+mi = {"A": 5.33, "R": 4.18, "N": 3.71, "D": 3.59, "C": 7.93,
+      "Q": 3.87, "E": 3.65, "G": 4.48, "H": 5.1, "I": 8.83,
+      "L": 8.47, "K": 2.95, "M": 8.95, "F": 9.03, "P": 3.87,
+      "S": 4.09, "T": 4.49, "W": 7.66, "Y": 5.89, "V": 7.63}
+
+# Parker Hydrophilicity scale derived from HPLC peptide retention times
+# Biochemistry 25:5425-5431(1986)
+# "Parker"
+pa = {"A": 2.1, "R": 4.2, "N": 7, "D": 10, "C": 1.4,
+      "Q": 6, "E": 7.8, "G": 5.7, "H": 2.1, "I": -8,
+      "L": -9.2, "K": 5.7, "M": -4.2, "F": -9.2, "P": 2.1,
+      "S": 6.5, "T": 5.2, "W": -10, "Y": -1.9, "V": -3.7}
+
+# Ponnuswamy Hydrophobic characteristics of folded proteins
+# Progress in biophysics and molecular biology, 59(1), 57-103.
+# "Ponnuswamy"
+po = {"A": 0.85, "R": 0.2, "N": -0.48, "D": -1.1, "C": 2.1,
+      "Q": -0.42, "E": -0.79, "G": 0, "H": 0.22, "I": 3.14,
+      "L": 1.99, "K": -1.19, "M": 1.42, "F": 1.69, "P": -1.14,
+      "S": -0.52, "T": -0.08, "W": 1.76, "Y": 1.37, "V": 2.53}
+
+# Rose Hydrophobicity scale
+# Science 229:834-838(1985)
+# "Rose"
+ro = {"A": 0.74, "R": 0.64, "N": 0.63, "D": 0.62, "C": 0.91,
+      "Q": 0.62, "E": 0.62, "G": 0.72, "H": 0.78, "I": 0.88,
+      "L": 0.85, "K": 0.52, "M": 0.85, "F": 0.88, "P": 0.64,
+      "S": 0.66, "T": 0.7, "W": 0.85, "Y": 0.76, "V": 0.86}
+
+# Roseman Hydrophobicity scale
+# J. Mol. Biol. 200:513-522(1988)
+# "Roseman"
+rm = {"A": 0.39, "R": -3.95, "N": -1.91, "D": -3.81, "C": 0.25,
+      "Q": -1.3, "E": -2.91, "G": 0, "H": -0.64, "I": 1.82,
+      "L": 1.82, "K": -2.77, "M": 0.96, "F": 2.27, "P": 0.99,
+      "S": -1.24, "T": -1, "W": 2.13, "Y": 1.47, "V": 1.3}
+
+# Sweet Optimized Matchig Hydrophobicity (OMH)
+# J. Mol. Biol. 171:479-488(1983).
+# "Sweet
+sw = {"A": -0.4, "R": -0.59, "N": -0.92, "D": -1.31, "C": 0.17,
+      "Q": -0.91, "E": -1.22, "G": -0.67, "H": -0.64, "I": 1.25,
+      "L": 1.22, "K": -0.67, "M": 1.02, "F": 1.92, "P": -0.49,
+      "S": -0.55, "T": -0.28, "W": 0.5, "Y": 1.67, "V": 0.91}
+
+# Tanford Hydrophobicity scale
+# J. Am. Chem. Soc. 84:4240-4274(1962)
+# "Tanford"
+ta = {"A": 0.62, "R": -2.53, "N": -0.78, "D": -0.09, "C": 0.29,
+      "Q": -0.85, "E": -0.74, "G": 0.48, "H": -0.4, "I": 1.38,
+      "L": 1.53, "K": -1.5, "M": 0.64, "F": 1.19, "P": 0.12,
+      "S": -0.18, "T": -0.05, "W": 0.81, "Y": 0.26, "V": 1.8}
+
+# Wilson Hydrophobic constants derived from HPLC peptide retention times
+# Biochem. J. 199:31-41(1981)
+# "Wilson"
+wi = {"A": -0.3, "R": -1.1, "N": -0.2, "D": -1.4, "C": 6.3,
+      "Q": -0.2, "E": 0, "G": 1.2, "H": -1.3, "I": 4.3,
+      "L": 6.6, "K": -3.6, "M": 2.5, "F": 7.5, "P": 2.2,
+      "S": -0.6, "T": -2.2, "W": 7.9, "Y": 7.1, "V": 5.9}
+
+# Zimmerman Hydrophobicity scale
+# Journal of theoretical biology, 21(2), 170-201.
+# "Zimmerman"
+zi = {"A": 0.83, "R": 0.83, "N": 0.09, "D": 0.64, "C": 1.48,
+      "Q": 0, "E": 0.65, "G": 0.1, "H": 1.1, "I": 3.07,
+      "L": 2.52, "K": 1.6, "M": 1.4, "F": 2.75, "P": 2.7,
+      "S": 0.14, "T": 0.54, "W": 0.31, "Y": 2.97, "V": 1.79}
+
+gravy_scales = {"KyteDoolitle": kd, "Aboderin": ab,
+                "AbrahamLeo": al, "Argos": ag,
+                "BlackMould": bm, "BullBreese": bb,
+                "Casari": cs, "Cid": ci,
+                "Cowan3.4": cw[3.4], "Cowan7.5": cw[7.5],
+                "Eisenberg": es, "Engelman": eg,
+                "Fasman": fs, "Fauchere": fc,
+                "GoldSack": gd, "Guy": gy,
+                "Jones": jo, "Juretic": ju,
+                "Kidera": ki, "Miyazawa": mi,
+                "Parker": pa, "Ponnuswamy": po,
+                "Rose": ro, "Roseman": rm,
+                "Sweet": sw, "Tanford": ta,
+                "Wilson": wi, "Zimmerman": zi}
+
+
+# Flexibility
+# Normalized flexibility parameters (B-values), average
+# Vihinen M., Torkkila E., Riikonen P. Proteins. 19(2):141-9(1994).
+Flex = {"A": 0.984, "C": 0.906, "E": 1.094, "D": 1.068,
+        "G": 1.031, "F": 0.915, "I": 0.927, "H": 0.950,
+        "K": 1.102, "M": 0.952, "L": 0.935, "N": 1.048,
+        "Q": 1.037, "P": 1.049, "S": 1.046, "R": 1.008,
+        "T": 0.997, "W": 0.904, "V": 0.931, "Y": 0.929}
+
+# Hydrophilicity
+# 1 Hopp & Wood
+# Proc. Natl. Acad. Sci. U.S.A. 78:3824-3828(1981).
+hw = {"A": -0.5, "R": 3.0, "N": 0.2, "D": 3.0, "C": -1.0,
+      "Q": 0.2, "E": 3.0, "G": 0.0, "H": -0.5, "I": -1.8,
+      "L": -1.8, "K": 3.0, "M": -1.3, "F": -2.5, "P": 0.0,
+      "S": 0.3, "T": -0.4, "W": -3.4, "Y": -2.3, "V": -1.5}
+
+# Surface accessibility
+# Vergoten G & Theophanides T, Biomolecular Structure and Dynamics,
+# pg.138 (1997).
+# 1 Emini Surface fractional probability
+em = {"A": 0.815, "R": 1.475, "N": 1.296, "D": 1.283, "C": 0.394,
+      "Q": 1.348, "E": 1.445, "G": 0.714, "H": 1.180, "I": 0.603,
+      "L": 0.603, "K": 1.545, "M": 0.714, "F": 0.695, "P": 1.236,
+      "S": 1.115, "T": 1.184, "W": 0.808, "Y": 1.089, "V": 0.606}
+
+# 2 Janin Interior to surface transfer energy scale
+ja = {"A": 0.28, "R": -1.14, "N": -0.55, "D": -0.52, "C": 0.97,
+      "Q": -0.69, "E": -1.01, "G": 0.43, "H": -0.31, "I": 0.60,
+      "L": 0.60, "K": -1.62, "M": 0.43, "F": 0.46, "P": -0.42,
+      "S": -0.19, "T": -0.32, "W": 0.29, "Y": -0.15, "V": 0.60}
+
+
+# A two dimensional dictionary for calculating the instability index.
+# Guruprasad K., Reddy B.V.B., Pandit M.W. Protein Engineering 4:155-161(1990).
+# It is based on dipeptide values; therefore, the value for the dipeptide DG
+# is DIWV['D']['G'].
+DIWV = {"A": {"A": 1.0, "C": 44.94, "E": 1.0, "D": -7.49,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": -7.49,
+              "K": 1.0, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 1.0, "P": 20.26, "S": 1.0, "R": 1.0,
+              "T": 1.0, "W": 1.0, "V": 1.0, "Y": 1.0},
+        "C": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 20.26,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": 33.60,
+              "K": 1.0, "M": 33.60, "L": 20.26, "N": 1.0,
+              "Q": -6.54, "P": 20.26, "S": 1.0, "R": 1.0,
+              "T": 33.60, "W": 24.68, "V": -6.54, "Y": 1.0},
+        "E": {"A": 1.0, "C": 44.94, "E": 33.60, "D": 20.26,
+              "G": 1.0, "F": 1.0, "I": 20.26, "H": -6.54,
+              "K": 1.0, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 20.26, "P": 20.26, "S": 20.26, "R": 1.0,
+              "T": 1.0, "W": -14.03, "V": 1.0, "Y": 1.0},
+        "D": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": 1.0, "F": -6.54, "I": 1.0, "H": 1.0,
+              "K": -7.49, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 1.0, "P": 1.0, "S": 20.26, "R": -6.54,
+              "T": -14.03, "W": 1.0, "V": 1.0, "Y": 1.0},
+        "G": {"A": -7.49, "C": 1.0, "E": -6.54, "D": 1.0,
+              "G": 13.34, "F": 1.0, "I": -7.49, "H": 1.0,
+              "K": -7.49, "M": 1.0, "L": 1.0, "N": -7.49,
+              "Q": 1.0, "P": 1.0, "S": 1.0, "R": 1.0,
+              "T": -7.49, "W": 13.34, "V": 1.0, "Y": -7.49},
+        "F": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 13.34,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": 1.0,
+              "K": -14.03, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 1.0, "P": 20.26, "S": 1.0, "R": 1.0,
+              "T": 1.0, "W": 1.0, "V": 1.0, "Y": 33.601},
+        "I": {"A": 1.0, "C": 1.0, "E": 44.94, "D": 1.0,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": 13.34,
+              "K": -7.49, "M": 1.0, "L": 20.26, "N": 1.0,
+              "Q": 1.0, "P": -1.88, "S": 1.0, "R": 1.0,
+              "T": 1.0, "W": 1.0, "V": -7.49, "Y": 1.0},
+        "H": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": -9.37, "F": -9.37, "I": 44.94, "H": 1.0,
+              "K": 24.68, "M": 1.0, "L": 1.0, "N": 24.68,
+              "Q": 1.0, "P": -1.88, "S": 1.0, "R": 1.0,
+              "T": -6.54, "W": -1.88, "V": 1.0, "Y": 44.94},
+        "K": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": -7.49, "F": 1.0, "I": -7.49, "H": 1.0,
+              "K": 1.0, "M": 33.60, "L": -7.49, "N": 1.0,
+              "Q": 24.64, "P": -6.54, "S": 1.0, "R": 33.60,
+              "T": 1.0, "W": 1.0, "V": -7.49, "Y": 1.0},
+        "M": {"A": 13.34, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": 58.28,
+              "K": 1.0, "M": -1.88, "L": 1.0, "N": 1.0,
+              "Q": -6.54, "P": 44.94, "S": 44.94, "R": -6.54,
+              "T": -1.88, "W": 1.0, "V": 1.0, "Y": 24.68},
+        "L": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": 1.0,
+              "K": -7.49, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 33.60, "P": 20.26, "S": 1.0, "R": 20.26,
+              "T": 1.0, "W": 24.68, "V": 1.0, "Y": 1.0},
+        "N": {"A": 1.0, "C": -1.88, "E": 1.0, "D": 1.0,
+              "G": -14.03, "F": -14.03, "I": 44.94, "H": 1.0,
+              "K": 24.68, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": -6.54, "P": -1.88, "S": 1.0, "R": 1.0,
+              "T": -7.49, "W": -9.37, "V": 1.0, "Y": 1.0},
+        "Q": {"A": 1.0, "C": -6.54, "E": 20.26, "D": 20.26,
+              "G": 1.0, "F": -6.54, "I": 1.0, "H": 1.0,
+              "K": 1.0, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 20.26, "P": 20.26, "S": 44.94, "R": 1.0,
+              "T": 1.0, "W": 1.0, "V": -6.54, "Y": -6.54},
+        "P": {"A": 20.26, "C": -6.54, "E": 18.38, "D": -6.54,
+              "G": 1.0, "F": 20.26, "I": 1.0, "H": 1.0,
+              "K": 1.0, "M": -6.54, "L": 1.0, "N": 1.0,
+              "Q": 20.26, "P": 20.26, "S": 20.26, "R": -6.54,
+              "T": 1.0, "W": -1.88, "V": 20.26, "Y": 1.0},
+        "S": {"A": 1.0, "C": 33.60, "E": 20.26, "D": 1.0,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": 1.0,
+              "K": 1.0, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 20.26, "P": 44.94, "S": 20.26, "R": 20.26,
+              "T": 1.0, "W": 1.0, "V": 1.0, "Y": 1.0},
+        "R": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": -7.49, "F": 1.0, "I": 1.0, "H": 20.26,
+              "K": 1.0, "M": 1.0, "L": 1.0, "N": 13.34,
+              "Q": 20.26, "P": 20.26, "S": 44.94, "R": 58.28,
+              "T": 1.0, "W": 58.28, "V": 1.0, "Y": -6.54},
+        "T": {"A": 1.0, "C": 1.0, "E": 20.26, "D": 1.0,
+              "G": -7.49, "F": 13.34, "I": 1.0, "H": 1.0,
+              "K": 1.0, "M": 1.0, "L": 1.0, "N": -14.03,
+              "Q": -6.54, "P": 1.0, "S": 1.0, "R": 1.0,
+              "T": 1.0, "W": -14.03, "V": 1.0, "Y": 1.0},
+        "W": {"A": -14.03, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": -9.37, "F": 1.0, "I": 1.0, "H": 24.68,
+              "K": 1.0, "M": 24.68, "L": 13.34, "N": 13.34,
+              "Q": 1.0, "P": 1.0, "S": 1.0, "R": 1.0,
+              "T": -14.03, "W": 1.0, "V": -7.49, "Y": 1.0},
+        "V": {"A": 1.0, "C": 1.0, "E": 1.0, "D": -14.03,
+              "G": -7.49, "F": 1.0, "I": 1.0, "H": 1.0,
+              "K": -1.88, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 1.0, "P": 20.26, "S": 1.0, "R": 1.0,
+              "T": -7.49, "W": 1.0, "V": 1.0, "Y": -6.54},
+        "Y": {"A": 24.68, "C": 1.0, "E": -6.54, "D": 24.68,
+              "G": -7.49, "F": 1.0, "I": 1.0, "H": 13.34,
+              "K": 1.0, "M": 44.94, "L": 1.0, "N": 1.0,
+              "Q": 1.0, "P": 13.34, "S": 1.0, "R": -15.91,
+              "T": -7.49, "W": -9.37, "V": 1.0, "Y": 13.34},
+        }
+
+# Turn black code style on
+# fmt: on
diff --git a/code/lib/Bio/SeqUtils/__init__.py b/code/lib/Bio/SeqUtils/__init__.py
new file mode 100644
index 0000000..9ea58d8
--- /dev/null
+++ b/code/lib/Bio/SeqUtils/__init__.py
@@ -0,0 +1,467 @@
+#!/usr/bin/env python
+# Copyright 2002 by Thomas Sicheritz-Ponten and Cecilia Alsmark.
+# Revisions copyright 2014 by Markus Piotrowski.
+# Revisions copyright 2014-2016 by Peter Cock.
+# All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Miscellaneous functions for dealing with sequences."""
+
+
+import re
+from math import pi, sin, cos
+
+from Bio.Seq import Seq
+from Bio.Data import IUPACData
+
+
+######################################
+# DNA
+######################
+# {{{
+
+
+def GC(seq):
+    """Calculate G+C content, return percentage (as float between 0 and 100).
+
+    Copes mixed case sequences, and with the ambiguous nucleotide S (G or C)
+    when counting the G and C content.  The percentage is calculated against
+    the full length, e.g.:
+
+    >>> from Bio.SeqUtils import GC
+    >>> GC("ACTGN")
+    40.0
+
+    Note that this will return zero for an empty sequence.
+    """
+    gc = sum(seq.count(x) for x in ["G", "C", "g", "c", "S", "s"])
+    try:
+        return gc * 100.0 / len(seq)
+    except ZeroDivisionError:
+        return 0.0
+
+
+def GC123(seq):
+    """Calculate G+C content: total, for first, second and third positions.
+
+    Returns a tuple of four floats (percentages between 0 and 100) for the
+    entire sequence, and the three codon positions.  e.g.
+
+    >>> from Bio.SeqUtils import GC123
+    >>> GC123("ACTGTN")
+    (40.0, 50.0, 50.0, 0.0)
+
+    Copes with mixed case sequences, but does NOT deal with ambiguous
+    nucleotides.
+    """
+    d = {}
+    for nt in ["A", "T", "G", "C"]:
+        d[nt] = [0, 0, 0]
+
+    for i in range(0, len(seq), 3):
+        codon = seq[i : i + 3]
+        if len(codon) < 3:
+            codon += "  "
+        for pos in range(0, 3):
+            for nt in ["A", "T", "G", "C"]:
+                if codon[pos] == nt or codon[pos] == nt.lower():
+                    d[nt][pos] += 1
+    gc = {}
+    gcall = 0
+    nall = 0
+    for i in range(0, 3):
+        try:
+            n = d["G"][i] + d["C"][i] + d["T"][i] + d["A"][i]
+            gc[i] = (d["G"][i] + d["C"][i]) * 100.0 / n
+        except Exception:  # TODO - ValueError?
+            gc[i] = 0
+
+        gcall = gcall + d["G"][i] + d["C"][i]
+        nall = nall + n
+
+    gcall = 100.0 * gcall / nall
+    return gcall, gc[0], gc[1], gc[2]
+
+
+def GC_skew(seq, window=100):
+    """Calculate GC skew (G-C)/(G+C) for multiple windows along the sequence.
+
+    Returns a list of ratios (floats), controlled by the length of the sequence
+    and the size of the window.
+
+    Returns 0 for windows without any G/C by handling zero division errors.
+
+    Does NOT look at any ambiguous nucleotides.
+    """
+    # 8/19/03: Iddo: added lowercase
+    values = []
+    for i in range(0, len(seq), window):
+        s = seq[i : i + window]
+        g = s.count("G") + s.count("g")
+        c = s.count("C") + s.count("c")
+        try:
+            skew = (g - c) / float(g + c)
+        except ZeroDivisionError:
+            skew = 0.0
+        values.append(skew)
+    return values
+
+
+def xGC_skew(seq, window=1000, zoom=100, r=300, px=100, py=100):
+    """Calculate and plot normal and accumulated GC skew (GRAPHICS !!!)."""
+    import tkinter
+
+    yscroll = tkinter.Scrollbar(orient=tkinter.VERTICAL)
+    xscroll = tkinter.Scrollbar(orient=tkinter.HORIZONTAL)
+    canvas = tkinter.Canvas(
+        yscrollcommand=yscroll.set, xscrollcommand=xscroll.set, background="white"
+    )
+    win = canvas.winfo_toplevel()
+    win.geometry("700x700")
+
+    yscroll.config(command=canvas.yview)
+    xscroll.config(command=canvas.xview)
+    yscroll.pack(side=tkinter.RIGHT, fill=tkinter.Y)
+    xscroll.pack(side=tkinter.BOTTOM, fill=tkinter.X)
+    canvas.pack(fill=tkinter.BOTH, side=tkinter.LEFT, expand=1)
+    canvas.update()
+
+    X0, Y0 = r + px, r + py
+    x1, x2, y1, y2 = X0 - r, X0 + r, Y0 - r, Y0 + r
+
+    ty = Y0
+    canvas.create_text(X0, ty, text="%s...%s (%d nt)" % (seq[:7], seq[-7:], len(seq)))
+    ty += 20
+    canvas.create_text(X0, ty, text="GC %3.2f%%" % (GC(seq)))
+    ty += 20
+    canvas.create_text(X0, ty, text="GC Skew", fill="blue")
+    ty += 20
+    canvas.create_text(X0, ty, text="Accumulated GC Skew", fill="magenta")
+    ty += 20
+    canvas.create_oval(x1, y1, x2, y2)
+
+    acc = 0
+    start = 0
+    for gc in GC_skew(seq, window):
+        r1 = r
+        acc += gc
+        # GC skew
+        alpha = pi - (2 * pi * start) / len(seq)
+        r2 = r1 - gc * zoom
+        x1 = X0 + r1 * sin(alpha)
+        y1 = Y0 + r1 * cos(alpha)
+        x2 = X0 + r2 * sin(alpha)
+        y2 = Y0 + r2 * cos(alpha)
+        canvas.create_line(x1, y1, x2, y2, fill="blue")
+        # accumulated GC skew
+        r1 = r - 50
+        r2 = r1 - acc
+        x1 = X0 + r1 * sin(alpha)
+        y1 = Y0 + r1 * cos(alpha)
+        x2 = X0 + r2 * sin(alpha)
+        y2 = Y0 + r2 * cos(alpha)
+        canvas.create_line(x1, y1, x2, y2, fill="magenta")
+
+        canvas.update()
+        start += window
+
+    canvas.configure(scrollregion=canvas.bbox(tkinter.ALL))
+
+
+def nt_search(seq, subseq):
+    """Search for a DNA subseq in sequence, return list of [subseq, positions].
+
+    Use ambiguous values (like N = A or T or C or G, R = A or G etc.),
+    searches only on forward strand.
+    """
+    pattern = ""
+    for nt in subseq:
+        value = IUPACData.ambiguous_dna_values[nt]
+        if len(value) == 1:
+            pattern += value
+        else:
+            pattern += "[%s]" % value
+
+    pos = -1
+    result = [pattern]
+    while True:
+        pos += 1
+        s = seq[pos:]
+        m = re.search(pattern, s)
+        if not m:
+            break
+        pos += int(m.start(0))
+        result.append(pos)
+    return result
+
+
+######################################
+# Protein
+######################
+
+
+def seq3(seq, custom_map=None, undef_code="Xaa"):
+    """Convert protein sequence from one-letter to three-letter code.
+
+    The single required input argument 'seq' should be a protein sequence using
+    single letter codes, either as a Python string or as a Seq or MutableSeq
+    object.
+
+    This function returns the amino acid sequence as a string using the three
+    letter amino acid codes. Output follows the IUPAC standard (including
+    ambiguous characters B for "Asx", J for "Xle" and X for "Xaa", and also U
+    for "Sel" and O for "Pyl") plus "Ter" for a terminator given as an
+    asterisk. Any unknown character (including possible gap characters),
+    is changed into 'Xaa' by default.
+
+    e.g.
+
+    >>> from Bio.SeqUtils import seq3
+    >>> seq3("MAIVMGRWKGAR*")
+    'MetAlaIleValMetGlyArgTrpLysGlyAlaArgTer'
+
+    You can set a custom translation of the codon termination code using the
+    dictionary "custom_map" argument (which defaults to {'*': 'Ter'}), e.g.
+
+    >>> seq3("MAIVMGRWKGAR*", custom_map={"*": "***"})
+    'MetAlaIleValMetGlyArgTrpLysGlyAlaArg***'
+
+    You can also set a custom translation for non-amino acid characters, such
+    as '-', using the "undef_code" argument, e.g.
+
+    >>> seq3("MAIVMGRWKGA--R*", undef_code='---')
+    'MetAlaIleValMetGlyArgTrpLysGlyAla------ArgTer'
+
+    If not given, "undef_code" defaults to "Xaa", e.g.
+
+    >>> seq3("MAIVMGRWKGA--R*")
+    'MetAlaIleValMetGlyArgTrpLysGlyAlaXaaXaaArgTer'
+
+    This function was inspired by BioPerl's seq3.
+    """
+    if custom_map is None:
+        custom_map = {"*": "Ter"}
+    # not doing .update() on IUPACData dict with custom_map dict
+    # to preserve its initial state (may be imported in other modules)
+    threecode = dict(
+        list(IUPACData.protein_letters_1to3_extended.items()) + list(custom_map.items())
+    )
+    # We use a default of 'Xaa' for undefined letters
+    # Note this will map '-' to 'Xaa' which may be undesirable!
+    return "".join(threecode.get(aa, undef_code) for aa in seq)
+
+
+def seq1(seq, custom_map=None, undef_code="X"):
+    """Convert protein sequence from three-letter to one-letter code.
+
+    The single required input argument 'seq' should be a protein sequence
+    using three-letter codes, either as a Python string or as a Seq or
+    MutableSeq object.
+
+    This function returns the amino acid sequence as a string using the one
+    letter amino acid codes. Output follows the IUPAC standard (including
+    ambiguous characters "B" for "Asx", "J" for "Xle", "X" for "Xaa", "U" for
+    "Sel", and "O" for "Pyl") plus "*" for a terminator given the "Ter" code.
+    Any unknown character (including possible gap characters), is changed
+    into '-' by default.
+
+    e.g.
+
+    >>> from Bio.SeqUtils import seq1
+    >>> seq1("MetAlaIleValMetGlyArgTrpLysGlyAlaArgTer")
+    'MAIVMGRWKGAR*'
+
+    The input is case insensitive, e.g.
+
+    >>> from Bio.SeqUtils import seq1
+    >>> seq1("METalaIlEValMetGLYArgtRplysGlyAlaARGTer")
+    'MAIVMGRWKGAR*'
+
+    You can set a custom translation of the codon termination code using the
+    dictionary "custom_map" argument (defaulting to {'Ter': '*'}), e.g.
+
+    >>> seq1("MetAlaIleValMetGlyArgTrpLysGlyAla***", custom_map={"***": "*"})
+    'MAIVMGRWKGA*'
+
+    You can also set a custom translation for non-amino acid characters, such
+    as '-', using the "undef_code" argument, e.g.
+
+    >>> seq1("MetAlaIleValMetGlyArgTrpLysGlyAla------ArgTer", undef_code='?')
+    'MAIVMGRWKGA??R*'
+
+    If not given, "undef_code" defaults to "X", e.g.
+
+    >>> seq1("MetAlaIleValMetGlyArgTrpLysGlyAla------ArgTer")
+    'MAIVMGRWKGAXXR*'
+
+    """
+    if custom_map is None:
+        custom_map = {"Ter": "*"}
+    # reverse map of threecode
+    # upper() on all keys to enable caps-insensitive input seq handling
+    onecode = {k.upper(): v for k, v in IUPACData.protein_letters_3to1_extended.items()}
+    # add the given termination codon code and custom maps
+    onecode.update((k.upper(), v) for k, v in custom_map.items())
+    seqlist = [seq[3 * i : 3 * (i + 1)] for i in range(len(seq) // 3)]
+    return "".join(onecode.get(aa.upper(), undef_code) for aa in seqlist)
+
+
+######################################
+# Mixed ???
+######################
+
+
+def molecular_weight(
+    seq, seq_type="DNA", double_stranded=False, circular=False, monoisotopic=False
+):
+    """Calculate the molecular mass of DNA, RNA or protein sequences as float.
+
+    Only unambiguous letters are allowed. Nucleotide sequences are assumed to
+    have a 5' phosphate.
+
+    Arguments:
+     - seq: String or Biopython sequence object.
+     - seq_type: The default is to assume DNA; override this with a string
+       "DNA", "RNA", or "protein".
+     - double_stranded: Calculate the mass for the double stranded molecule?
+     - circular: Is the molecule circular (has no ends)?
+     - monoisotopic: Use the monoisotopic mass tables?
+
+    >>> print("%0.2f" % molecular_weight("AGC"))
+    949.61
+    >>> print("%0.2f" % molecular_weight(Seq("AGC")))
+    949.61
+
+    However, it is better to be explicit - for example with strings:
+
+    >>> print("%0.2f" % molecular_weight("AGC", "DNA"))
+    949.61
+    >>> print("%0.2f" % molecular_weight("AGC", "RNA"))
+    997.61
+    >>> print("%0.2f" % molecular_weight("AGC", "protein"))
+    249.29
+
+    """
+    # Rewritten by Markus Piotrowski, 2014
+
+    seq = "".join(str(seq).split()).upper()  # Do the minimum formatting
+
+    if seq_type == "DNA":
+        if monoisotopic:
+            weight_table = IUPACData.monoisotopic_unambiguous_dna_weights
+        else:
+            weight_table = IUPACData.unambiguous_dna_weights
+    elif seq_type == "RNA":
+        if monoisotopic:
+            weight_table = IUPACData.monoisotopic_unambiguous_rna_weights
+        else:
+            weight_table = IUPACData.unambiguous_rna_weights
+    elif seq_type == "protein":
+        if monoisotopic:
+            weight_table = IUPACData.monoisotopic_protein_weights
+        else:
+            weight_table = IUPACData.protein_weights
+    else:
+        raise ValueError("Allowed seq_types are DNA, RNA or protein, not %r" % seq_type)
+
+    if monoisotopic:
+        water = 18.010565
+    else:
+        water = 18.0153
+
+    try:
+        weight = sum(weight_table[x] for x in seq) - (len(seq) - 1) * water
+        if circular:
+            weight -= water
+    except KeyError as e:
+        raise ValueError(
+            "%s is not a valid unambiguous letter for %s" % (e, seq_type)
+        ) from None
+
+    if seq_type in ("DNA", "RNA") and double_stranded:
+        seq = str(Seq(seq).complement())
+        weight += sum(weight_table[x] for x in seq) - (len(seq) - 1) * water
+        if circular:
+            weight -= water
+    elif seq_type == "protein" and double_stranded:
+        raise ValueError("double-stranded proteins await their discovery")
+
+    return weight
+
+
+def six_frame_translations(seq, genetic_code=1):
+    """Return pretty string showing the 6 frame translations and GC content.
+
+    Nice looking 6 frame translation with GC content - code from xbbtools
+    similar to DNA Striders six-frame translation
+
+    >>> from Bio.SeqUtils import six_frame_translations
+    >>> print(six_frame_translations("AUGGCCAUUGUAAUGGGCCGCUGA"))
+    GC_Frame: a:5 t:0 g:8 c:5 
+    Sequence: auggccauug ... gggccgcuga, 24 nt, 54.17 %GC
+    <BLANKLINE>
+    <BLANKLINE>
+    1/1
+      G  H  C  N  G  P  L
+     W  P  L  *  W  A  A
+    M  A  I  V  M  G  R  *
+    auggccauuguaaugggccgcuga   54 %
+    uaccgguaacauuacccggcgacu
+    A  M  T  I  P  R  Q 
+     H  G  N  Y  H  A  A  S
+      P  W  Q  L  P  G  S
+    <BLANKLINE>
+    <BLANKLINE>
+
+    """  # noqa for pep8 W291 trailing whitespace
+    from Bio.Seq import reverse_complement, translate
+
+    anti = reverse_complement(seq)
+    comp = anti[::-1]
+    length = len(seq)
+    frames = {}
+    for i in range(0, 3):
+        fragment_length = 3 * ((length - i) // 3)
+        frames[i + 1] = translate(seq[i : i + fragment_length], genetic_code)
+        frames[-(i + 1)] = translate(anti[i : i + fragment_length], genetic_code)[::-1]
+
+    # create header
+    if length > 20:
+        short = "%s ... %s" % (seq[:10], seq[-10:])
+    else:
+        short = seq
+    header = "GC_Frame: "
+    for nt in ["a", "t", "g", "c"]:
+        header += "%s:%d " % (nt, seq.count(nt.upper()))
+
+    header += "\nSequence: %s, %d nt, %0.2f %%GC\n\n\n" % (
+        short.lower(),
+        length,
+        GC(seq),
+    )
+    res = header
+
+    for i in range(0, length, 60):
+        subseq = seq[i : i + 60]
+        csubseq = comp[i : i + 60]
+        p = i // 3
+        res += "%d/%d\n" % (i + 1, i / 3 + 1)
+        res += "  " + "  ".join(frames[3][p : p + 20]) + "\n"
+        res += " " + "  ".join(frames[2][p : p + 20]) + "\n"
+        res += "  ".join(frames[1][p : p + 20]) + "\n"
+        # seq
+        res += subseq.lower() + "%5d %%\n" % int(GC(subseq))
+        res += csubseq.lower() + "\n"
+        # - frames
+        res += "  ".join(frames[-2][p : p + 20]) + " \n"
+        res += " " + "  ".join(frames[-1][p : p + 20]) + "\n"
+        res += "  " + "  ".join(frames[-3][p : p + 20]) + "\n\n"
+    return res
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/SeqUtils/__pycache__/CheckSum.cpython-37.pyc b/code/lib/Bio/SeqUtils/__pycache__/CheckSum.cpython-37.pyc
new file mode 100644
index 0000000..40884cc
Binary files /dev/null and b/code/lib/Bio/SeqUtils/__pycache__/CheckSum.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqUtils/__pycache__/CodonUsage.cpython-37.pyc b/code/lib/Bio/SeqUtils/__pycache__/CodonUsage.cpython-37.pyc
new file mode 100644
index 0000000..f7c7c32
Binary files /dev/null and b/code/lib/Bio/SeqUtils/__pycache__/CodonUsage.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqUtils/__pycache__/CodonUsageIndices.cpython-37.pyc b/code/lib/Bio/SeqUtils/__pycache__/CodonUsageIndices.cpython-37.pyc
new file mode 100644
index 0000000..c183b23
Binary files /dev/null and b/code/lib/Bio/SeqUtils/__pycache__/CodonUsageIndices.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqUtils/__pycache__/IsoelectricPoint.cpython-37.pyc b/code/lib/Bio/SeqUtils/__pycache__/IsoelectricPoint.cpython-37.pyc
new file mode 100644
index 0000000..fe9eeea
Binary files /dev/null and b/code/lib/Bio/SeqUtils/__pycache__/IsoelectricPoint.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqUtils/__pycache__/MeltingTemp.cpython-37.pyc b/code/lib/Bio/SeqUtils/__pycache__/MeltingTemp.cpython-37.pyc
new file mode 100644
index 0000000..ed9a06b
Binary files /dev/null and b/code/lib/Bio/SeqUtils/__pycache__/MeltingTemp.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqUtils/__pycache__/ProtParam.cpython-37.pyc b/code/lib/Bio/SeqUtils/__pycache__/ProtParam.cpython-37.pyc
new file mode 100644
index 0000000..ae29e36
Binary files /dev/null and b/code/lib/Bio/SeqUtils/__pycache__/ProtParam.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqUtils/__pycache__/ProtParamData.cpython-37.pyc b/code/lib/Bio/SeqUtils/__pycache__/ProtParamData.cpython-37.pyc
new file mode 100644
index 0000000..b2e06bc
Binary files /dev/null and b/code/lib/Bio/SeqUtils/__pycache__/ProtParamData.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqUtils/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SeqUtils/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..5e08256
Binary files /dev/null and b/code/lib/Bio/SeqUtils/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqUtils/__pycache__/lcc.cpython-37.pyc b/code/lib/Bio/SeqUtils/__pycache__/lcc.cpython-37.pyc
new file mode 100644
index 0000000..6e95c51
Binary files /dev/null and b/code/lib/Bio/SeqUtils/__pycache__/lcc.cpython-37.pyc differ
diff --git a/code/lib/Bio/SeqUtils/lcc.py b/code/lib/Bio/SeqUtils/lcc.py
new file mode 100644
index 0000000..dd2a976
--- /dev/null
+++ b/code/lib/Bio/SeqUtils/lcc.py
@@ -0,0 +1,162 @@
+# Copyright 2003, 2007 by Sebastian Bassi. sbassi@genesdigitales.com
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Local Composition Complexity."""
+
+import math
+
+
+def lcc_mult(seq, wsize):
+    """Calculate Local Composition Complexity (LCC) values over sliding window.
+
+    Returns a list of floats, the LCC values for a sliding window over
+    the sequence.
+
+    seq - an unambiguous DNA sequence (a string or Seq object)
+    wsize - window size, integer
+
+    The result is the same as applying lcc_simp multiple times, but this
+    version is optimized for speed. The optimization works by using the
+    value of previous window as a base to compute the next one.
+    """
+    l2 = math.log(2)
+    tamseq = len(seq)
+    upper = str(seq).upper()
+    compone = [0]
+    lccsal = [0]
+    for i in range(wsize):
+        compone.append(
+            ((i + 1) / float(wsize)) * ((math.log((i + 1) / float(wsize))) / l2)
+        )
+    window = seq[0:wsize]
+    cant_a = window.count("A")
+    cant_c = window.count("C")
+    cant_t = window.count("T")
+    cant_g = window.count("G")
+    term_a = compone[cant_a]
+    term_c = compone[cant_c]
+    term_t = compone[cant_t]
+    term_g = compone[cant_g]
+    lccsal.append(-(term_a + term_c + term_t + term_g))
+    tail = seq[0]
+    for x in range(tamseq - wsize):
+        window = upper[x + 1 : wsize + x + 1]
+        if tail == window[-1]:
+            lccsal.append(lccsal[-1])
+        elif tail == "A":
+            cant_a -= 1
+            if window.endswith("C"):
+                cant_c += 1
+                term_a = compone[cant_a]
+                term_c = compone[cant_c]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("T"):
+                cant_t += 1
+                term_a = compone[cant_a]
+                term_t = compone[cant_t]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("G"):
+                cant_g += 1
+                term_a = compone[cant_a]
+                term_g = compone[cant_g]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+        elif tail == "C":
+            cant_c -= 1
+            if window.endswith("A"):
+                cant_a += 1
+                term_a = compone[cant_a]
+                term_c = compone[cant_c]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("T"):
+                cant_t += 1
+                term_c = compone[cant_c]
+                term_t = compone[cant_t]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("G"):
+                cant_g += 1
+                term_c = compone[cant_c]
+                term_g = compone[cant_g]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+        elif tail == "T":
+            cant_t -= 1
+            if window.endswith("A"):
+                cant_a += 1
+                term_a = compone[cant_a]
+                term_t = compone[cant_t]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("C"):
+                cant_c += 1
+                term_c = compone[cant_c]
+                term_t = compone[cant_t]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("G"):
+                cant_g += 1
+                term_t = compone[cant_t]
+                term_g = compone[cant_g]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+        elif tail == "G":
+            cant_g -= 1
+            if window.endswith("A"):
+                cant_a += 1
+                term_a = compone[cant_a]
+                term_g = compone[cant_g]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("C"):
+                cant_c += 1
+                term_c = compone[cant_c]
+                term_g = compone[cant_g]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("T"):
+                cant_t += 1
+                term_t = compone[cant_t]
+                term_g = compone[cant_g]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+        tail = window[0]
+    return lccsal
+
+
+def lcc_simp(seq):
+    """Calculate Local Composition Complexity (LCC) for a sequence.
+
+    seq - an unambiguous DNA sequence (a string or Seq object)
+
+    Returns the Local Composition Complexity (LCC) value for the entire
+    sequence (as a float).
+
+    Reference:
+    Andrzej K Konopka (2005) Sequence Complexity and Composition
+    https://doi.org/10.1038/npg.els.0005260
+    """
+    wsize = len(seq)
+    upper = str(seq).upper()
+    l2 = math.log(2)
+    if "A" not in seq:
+        term_a = 0
+        # Check to avoid calculating the log of 0.
+    else:
+        term_a = ((upper.count("A")) / float(wsize)) * (
+            (math.log((upper.count("A")) / float(wsize))) / l2
+        )
+    if "C" not in seq:
+        term_c = 0
+    else:
+        term_c = ((upper.count("C")) / float(wsize)) * (
+            (math.log((upper.count("C")) / float(wsize))) / l2
+        )
+    if "T" not in seq:
+        term_t = 0
+    else:
+        term_t = ((upper.count("T")) / float(wsize)) * (
+            (math.log((upper.count("T")) / float(wsize))) / l2
+        )
+    if "G" not in seq:
+        term_g = 0
+    else:
+        term_g = ((upper.count("G")) / float(wsize)) * (
+            (math.log((upper.count("G")) / float(wsize))) / l2
+        )
+    return -(term_a + term_c + term_t + term_g)
diff --git a/code/lib/Bio/Sequencing/Ace.py b/code/lib/Bio/Sequencing/Ace.py
new file mode 100644
index 0000000..5b446b4
--- /dev/null
+++ b/code/lib/Bio/Sequencing/Ace.py
@@ -0,0 +1,594 @@
+# Copyright 2004 by Frank Kauff and Cymon J. Cox.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Parser for ACE files output by PHRAP.
+
+Written by Frank Kauff (fkauff@duke.edu) and
+Cymon J. Cox (cymon@duke.edu)
+
+Usage:
+
+There are two ways of reading an ace file:
+
+1. The function 'read' reads the whole file at once;
+2. The function 'parse' reads the file contig after contig.
+
+First option, parse whole ace file at once::
+
+        from Bio.Sequencing import Ace
+        acefilerecord = Ace.read(open('my_ace_file.ace'))
+
+This gives you:
+ - acefilerecord.ncontigs (the number of contigs in the ace file)
+ - acefilerecord.nreads (the number of reads in the ace file)
+ - acefilerecord.contigs[] (one instance of the Contig class for each contig)
+
+The Contig class holds the info of the CO tag, CT and WA tags, and all the reads used
+for this contig in a list of instances of the Read class, e.g.::
+
+        contig3 = acefilerecord.contigs[2]
+        read4 = contig3.reads[3]
+        RD_of_read4 = read4.rd
+        DS_of_read4 = read4.ds
+
+CT, WA, RT tags from the end of the file can appear anywhere are automatically
+sorted into the right place.
+
+see _RecordConsumer for details.
+
+The second option is to  iterate over the contigs of an ace file one by one
+in the ususal way::
+
+    from Bio.Sequencing import Ace
+    contigs = Ace.parse(open('my_ace_file.ace'))
+    for contig in contigs:
+        print(contig.name)
+        ...
+
+Please note that for memory efficiency, when using the iterator approach, only one
+contig is kept in memory at once.  However, there can be a footer to the ACE file
+containing WA, CT, RT or WR tags which contain additional meta-data on the contigs.
+Because the parser doesn't see this data until the final record, it cannot be added to
+the appropriate records.  Instead these tags will be returned with the last contig record.
+Thus an ace file does not entirerly suit the concept of iterating. If WA, CT, RT, WR tags
+are needed, the 'read' function rather than the 'parse' function might be more appropriate.
+"""
+
+
+class rd:
+    """RD (reads), store a read with its name, sequence etc.
+
+    The location and strand each read is mapped to is held in the AF lines.
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.name = ""
+        self.padded_bases = None
+        self.info_items = None
+        self.read_tags = None
+        self.sequence = ""
+
+
+class qa:
+    """QA (read quality), including which part if any was used as the consensus."""
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.qual_clipping_start = None
+        self.qual_clipping_end = None
+        self.align_clipping_start = None
+        self.align_clipping_end = None
+        if line:
+            header = line.split()
+            self.qual_clipping_start = int(header[1])
+            self.qual_clipping_end = int(header[2])
+            self.align_clipping_start = int(header[3])
+            self.align_clipping_end = int(header[4])
+
+
+class ds:
+    """DS lines, include file name of a read's chromatogram file."""
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.chromat_file = ""
+        self.phd_file = ""
+        self.time = ""
+        self.chem = ""
+        self.dye = ""
+        self.template = ""
+        self.direction = ""
+        if line:
+            tags = [
+                "CHROMAT_FILE",
+                "PHD_FILE",
+                "TIME",
+                "CHEM",
+                "DYE",
+                "TEMPLATE",
+                "DIRECTION",
+            ]
+            poss = [line.find(x) for x in tags]
+            tagpos = dict(zip(poss, tags))
+            if -1 in tagpos:
+                del tagpos[-1]
+            ps = sorted(tagpos)  # the keys
+            for (p1, p2) in zip(ps, ps[1:] + [len(line) + 1]):
+                setattr(
+                    self,
+                    tagpos[p1].lower(),
+                    line[p1 + len(tagpos[p1]) + 1 : p2].strip(),
+                )
+
+
+class af:
+    """AF lines, define the location of the read within the contig.
+
+    Note attribute coru is short for complemented (C) or uncomplemented (U),
+    since the strand information is stored in an ACE file using either the
+    C or U character.
+    """
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.name = ""
+        self.coru = None
+        self.padded_start = None
+        if line:
+            header = line.split()
+            self.name = header[1]
+            self.coru = header[2]
+            self.padded_start = int(header[3])
+
+
+class bs:
+    """BS (base segment), which read was chosen as the consensus at each position."""
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.name = ""
+        self.padded_start = None
+        self.padded_end = None
+        if line:
+            header = line.split()
+            self.padded_start = int(header[1])
+            self.padded_end = int(header[2])
+            self.name = header[3]
+
+
+class rt:
+    """RT (transient read tags), generated by crossmatch and phrap."""
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.name = ""
+        self.tag_type = ""
+        self.program = ""
+        self.padded_start = None
+        self.padded_end = None
+        self.date = ""
+        self.comment = []
+        if line:
+            header = line.split()
+            self.name = header[0]
+            self.tag_type = header[1]
+            self.program = header[2]
+            self.padded_start = int(header[3])
+            self.padded_end = int(header[4])
+            self.date = header[5]
+
+
+class ct:
+    """CT (consensus tags)."""
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.name = ""
+        self.tag_type = ""
+        self.program = ""
+        self.padded_start = None
+        self.padded_end = None
+        self.date = ""
+        self.notrans = ""
+        self.info = []
+        self.comment = []
+        if line:
+            header = line.split()
+            self.name = header[0]
+            self.tag_type = header[1]
+            self.program = header[2]
+            self.padded_start = int(header[3])
+            self.padded_end = int(header[4])
+            self.date = header[5]
+            if len(header) == 7:
+                self.notrans = header[6]
+
+
+class wa:
+    """WA (whole assembly tag), holds the assembly program name, version, etc."""
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.tag_type = ""
+        self.program = ""
+        self.date = ""
+        self.info = []
+        if line:
+            header = line.split()
+            self.tag_type = header[0]
+            self.program = header[1]
+            self.date = header[2]
+
+
+class wr:
+    """WR lines."""
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.name = ""
+        self.aligned = ""
+        self.program = ""
+        self.date = []
+        if line:
+            header = line.split()
+            self.name = header[0]
+            self.aligned = header[1]
+            self.program = header[2]
+            self.date = header[3]
+
+
+class Reads:
+    """Holds information about a read supporting an ACE contig."""
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.rd = None  # one per read
+        self.qa = None  # one per read
+        self.ds = None  # none or one per read
+        self.rt = None  # none or many per read
+        self.wr = None  # none or many per read
+        if line:
+            self.rd = rd()
+            header = line.split()
+            self.rd.name = header[1]
+            self.rd.padded_bases = int(header[2])
+            self.rd.info_items = int(header[3])
+            self.rd.read_tags = int(header[4])
+
+
+class Contig:
+    """Holds information about a contig from an ACE record."""
+
+    def __init__(self, line=None):
+        """Initialize the class."""
+        self.name = ""
+        self.nbases = None
+        self.nreads = None
+        self.nsegments = None
+        self.uorc = None
+        self.sequence = ""
+        self.quality = []
+        self.af = []
+        self.bs = []
+        self.reads = []
+        self.ct = None  # none or many
+        self.wa = None  # none or many
+        if line:
+            header = line.split()
+            self.name = header[1]
+            self.nbases = int(header[2])
+            self.nreads = int(header[3])
+            self.nsegments = int(header[4])
+            self.uorc = header[5]
+
+
+def parse(source):
+    """Iterate of ACE file contig by contig.
+
+    Argument source is a file-like object or a path to a file.
+
+    This function returns an iterator that allows you to iterate
+    over the ACE file record by record::
+
+        records = parse(source)
+        for record in records:
+            # do something with the record
+
+    where each record is a Contig object.
+    """
+    try:
+        handle = open(source)
+    except TypeError:
+        handle = source
+        if handle.read(0) != "":
+            raise ValueError("Ace files must be opened in text mode.") from None
+
+    try:
+        line = ""
+        while True:
+            # at beginning, skip the AS and look for first CO command
+            try:
+                while True:
+                    if line.startswith("CO"):
+                        break
+                    line = next(handle)
+            except StopIteration:
+                return
+
+            record = Contig(line)
+
+            for line in handle:
+                line = line.strip()
+                if not line:
+                    break
+                record.sequence += line
+
+            for line in handle:
+                if line.strip():
+                    break
+            if not line.startswith("BQ"):
+                raise ValueError("Failed to find BQ line")
+
+            for line in handle:
+                if not line.strip():
+                    break
+                record.quality.extend(int(x) for x in line.split())
+
+            for line in handle:
+                if line.strip():
+                    break
+
+            while True:
+                if not line.startswith("AF "):
+                    break
+                record.af.append(af(line))
+                try:
+                    line = next(handle)
+                except StopIteration:
+                    raise ValueError("Unexpected end of AF block") from None
+
+            while True:
+                if line.strip():
+                    break
+                try:
+                    line = next(handle)
+                except StopIteration:
+                    raise ValueError("Unexpected end of file") from None
+
+            while True:
+                if not line.startswith("BS "):
+                    break
+                record.bs.append(bs(line))
+                try:
+                    line = next(handle)
+                except StopIteration:
+                    raise ValueError("Failed to find end of BS block") from None
+
+            # now read all the read data
+            # it starts with a 'RD', and then a mandatory QA
+            # then follows an optional DS
+            # CT,RT,WA,WR may or may not be there in unlimited quantity.
+            # They might refer to the actual read or contig, or, if
+            # encountered at the end of file, to any previous read or contig.
+            # The sort() method deals with that later.
+            while True:
+
+                # each read must have a rd and qa
+                try:
+                    while True:
+                        # If I've met the condition, then stop reading the line.
+                        if line.startswith("RD "):
+                            break
+                        line = next(handle)
+                except StopIteration:
+                    raise ValueError("Failed to find RD line") from None
+
+                record.reads.append(Reads(line))
+
+                for line in handle:
+                    line = line.strip()
+                    if not line:
+                        break
+                    record.reads[-1].rd.sequence += line
+
+                for line in handle:
+                    if line.strip():
+                        break
+                if not line.startswith("QA "):
+                    raise ValueError("Failed to find QA line")
+                record.reads[-1].qa = qa(line)
+
+                # now one ds can follow
+                for line in handle:
+                    if line.strip():
+                        break
+                else:
+                    break
+
+                if line.startswith("DS "):
+                    record.reads[-1].ds = ds(line)
+                    line = ""
+                # the file could just end, or there's some more stuff.
+                # In ace files, anything can happen.
+                # the following tags are interspersed between reads and can appear multiple times.
+                while True:
+                    # something left
+                    try:
+                        while True:
+                            if line.strip():
+                                break
+                            line = next(handle)
+                    except StopIteration:
+                        # file ends here
+                        break
+                    if line.startswith("RT{"):
+                        # now if we're at the end of the file, this rt could
+                        # belong to a previous read, not the actual one.
+                        # we store it here were it appears, the user can sort later.
+                        if record.reads[-1].rt is None:
+                            record.reads[-1].rt = []
+                        for line in handle:
+                            line = line.strip()
+                            # if line=="COMMENT{":
+                            if line.startswith("COMMENT{"):
+                                if line[8:].strip():
+                                    # MIRA 3.0.5 would miss the new line out :(
+                                    record.reads[-1].rt[-1].comment.append(line[8:])
+                                for line in handle:
+                                    line = line.strip()
+                                    if line.endswith("C}"):
+                                        break
+                                    record.reads[-1].rt[-1].comment.append(line)
+                            elif line == "}":
+                                break
+                            else:
+                                record.reads[-1].rt.append(rt(line))
+                        line = ""
+                    elif line.startswith("WR{"):
+                        if record.reads[-1].wr is None:
+                            record.reads[-1].wr = []
+                        for line in handle:
+                            line = line.strip()
+                            if line == "}":
+                                break
+                            record.reads[-1].wr.append(wr(line))
+                        line = ""
+                    elif line.startswith("WA{"):
+                        if record.wa is None:
+                            record.wa = []
+                        try:
+                            line = next(handle)
+                        except StopIteration:
+                            raise ValueError("Failed to read WA block") from None
+                        record.wa.append(wa(line))
+                        for line in handle:
+                            line = line.strip()
+                            if line == "}":
+                                break
+                            record.wa[-1].info.append(line)
+                        line = ""
+                    elif line.startswith("CT{"):
+                        if record.ct is None:
+                            record.ct = []
+                        try:
+                            line = next(handle)
+                        except StopIteration:
+                            raise ValueError("Failed to read CT block") from None
+                        record.ct.append(ct(line))
+                        for line in handle:
+                            line = line.strip()
+                            if line == "COMMENT{":
+                                for line in handle:
+                                    line = line.strip()
+                                    if line.endswith("C}"):
+                                        break
+                                    record.ct[-1].comment.append(line)
+                            elif line == "}":
+                                break
+                            else:
+                                record.ct[-1].info.append(line)
+                        line = ""
+                    else:
+                        break
+
+                if not line.startswith("RD"):  # another read?
+                    break
+
+            yield record
+
+    finally:
+        if handle is not source:
+            handle.close()
+
+
+class ACEFileRecord:
+    """Holds data of an ACE file."""
+
+    def __init__(self):
+        """Initialize the class."""
+        self.ncontigs = None
+        self.nreads = None
+        self.contigs = []
+        self.wa = None  # none or many
+
+    def sort(self):
+        """Sorts wr, rt and ct tags into the appropriate contig / read instance, if possible."""
+        ct = []
+        rt = []
+        wr = []
+        # search for tags that aren't in the right position
+        for i, c in enumerate(self.contigs):
+            if c.wa:
+                if not self.wa:
+                    self.wa = []
+                self.wa.extend(c.wa)
+            if c.ct:
+                newcts = [ct_tag for ct_tag in c.ct if ct_tag.name != c.name]
+                for x in newcts:
+                    self.contigs[i].ct.remove(x)
+                ct.extend(newcts)
+            for j, r in enumerate(c.reads):
+                if r.rt:
+                    newrts = [rt_tag for rt_tag in r.rt if rt_tag.name != r.rd.name]
+                    for x in newrts:
+                        self.contigs[i].reads[j].rt.remove(x)
+                    rt.extend(newrts)
+                if r.wr:
+                    newwrs = [wr_tag for wr_tag in r.wr if wr_tag.name != r.rd.name]
+                    for x in newwrs:
+                        self.contigs[i].reads[j].wr.remove(x)
+                    wr.extend(newwrs)
+        # now sort them into their proper place
+        for i, c in enumerate(self.contigs):
+            for ct_tag in ct:
+                if ct_tag.name == c.name:
+                    if self.contigs[i].ct is None:
+                        self.contigs[i].ct = []
+                    self.contigs[i].ct.append(ct_tag)
+            if rt or wr:
+                for j, r in enumerate(c.reads):
+                    for rt_tag in rt:
+                        if rt_tag.name == r.rd.name:
+                            if self.contigs[i].reads[j].rt is None:
+                                self.contigs[i].reads[j].rt = []
+                            self.contigs[i].reads[j].rt.append(rt_tag)
+                    for wr_tag in wr:
+                        if wr_tag.name == r.rd.name:
+                            if self.contigs[i].reads[j].wr is None:
+                                self.contigs[i].reads[j].wr = []
+                            self.contigs[i].reads[j].wr.append(wr_tag)
+
+
+def read(handle):
+    """Parse a full ACE file into a list of contigs."""
+    handle = iter(handle)
+
+    record = ACEFileRecord()
+
+    try:
+        line = next(handle)
+    except StopIteration:
+        raise ValueError("Premature end of file") from None
+
+    # check if the file starts correctly
+    if not line.startswith("AS"):
+        raise ValueError("File does not start with 'AS'.")
+
+    words = line.split()
+    record.ncontigs = int(words[1])
+    record.nreads = int(words[2])
+
+    # now read all the records
+    record.contigs = list(parse(handle))
+    # wa, ct, rt rags are usually at the end of the file, but not necessarily (correct?).
+    # If the iterator is used, the tags are returned with the contig or the read after which they appear,
+    # if all tags are at the end, they are read with the last contig. The concept of an
+    # iterator leaves no other choice. But if the user uses the ACEParser, we can check
+    # them and put them into the appropriate contig/read instance.
+    # Conclusion: An ACE file is not a filetype for which iteration is 100% suitable...
+    record.sort()
+    return record
diff --git a/code/lib/Bio/Sequencing/Applications/_Novoalign.py b/code/lib/Bio/Sequencing/Applications/_Novoalign.py
new file mode 100644
index 0000000..97a0a44
--- /dev/null
+++ b/code/lib/Bio/Sequencing/Applications/_Novoalign.py
@@ -0,0 +1,217 @@
+# Copyright 2009 by Osvaldo Zagordi.  All rights reserved.
+# Revisions copyright 2010 by Peter Cock.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command line wrapper for the short read aligner Novoalign by Novocraft."""
+
+
+from Bio.Application import _Option, AbstractCommandline
+
+
+class NovoalignCommandline(AbstractCommandline):
+    """Command line wrapper for novoalign by Novocraft.
+
+    See www.novocraft.com - novoalign is a short read alignment program.
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import NovoalignCommandline
+    >>> novoalign_cline = NovoalignCommandline(database='some_db',
+    ...                                        readfile='some_seq.txt')
+    >>> print(novoalign_cline)
+    novoalign -d some_db -f some_seq.txt
+
+    As with all the Biopython application wrappers, you can also add or
+    change options after creating the object:
+
+    >>> novoalign_cline.format = 'PRBnSEQ'
+    >>> novoalign_cline.r_method='0.99' # limited valid values
+    >>> novoalign_cline.fragment = '250 20' # must be given as a string
+    >>> novoalign_cline.miRNA = 100
+    >>> print(novoalign_cline)
+    novoalign -d some_db -f some_seq.txt -F PRBnSEQ -r 0.99 -i 250 20 -m 100
+
+    You would typically run the command line with novoalign_cline() or via
+    the Python subprocess module, as described in the Biopython tutorial.
+
+    Last checked against version: 2.05.04
+
+    """
+
+    def __init__(self, cmd="novoalign", **kwargs):
+        """Initialize the class."""
+        READ_FORMAT = ["FA", "SLXFQ", "STDFQ", "ILMFQ", "PRB", "PRBnSEQ"]
+        REPORT_FORMAT = ["Native", "Pairwise", "SAM"]
+        REPEAT_METHOD = ["None", "Random", "All", "Exhaustive", "0.99"]
+
+        self.parameters = [
+            _Option(
+                ["-d", "database"], "database filename", filename=True, equate=False
+            ),
+            _Option(["-f", "readfile"], "read file", filename=True, equate=False),
+            _Option(
+                ["-F", "format"],
+                "Format of read files.\n\nAllowed values: %s" % ", ".join(READ_FORMAT),
+                checker_function=lambda x: x in READ_FORMAT,
+                equate=False,
+            ),
+            # Alignment scoring options
+            _Option(
+                ["-t", "threshold"],
+                "Threshold for alignment score",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-g", "gap_open"],
+                "Gap opening penalty [default: 40]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-x", "gap_extend"],
+                "Gap extend penalty [default: 15]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-u", "unconverted"],
+                "Experimental: unconverted cytosines penalty in bisulfite mode\n\n"
+                "Default: no penalty",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # Quality control and read filtering
+            _Option(
+                ["-l", "good_bases"],
+                "Minimum number of good quality bases [default: log(N_g, 4) + 5]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-h", "homopolymer"],
+                "Homopolymer read filter [default: 20; disable: negative value]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # Read preprocessing options
+            _Option(
+                ["-a", "adapter3"],
+                "Strips a 3' adapter sequence prior to alignment.\n\n"
+                "With paired ends two adapters can be specified",
+                checker_function=lambda x: isinstance(x, str),
+                equate=False,
+            ),
+            _Option(
+                ["-n", "truncate"],
+                "Truncate to specific length before alignment",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-s", "trimming"],
+                "If fail to align, trim by s bases until they map or become shorter than l.\n\n"
+                "Ddefault: 2",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-5", "adapter5"],
+                "Strips a 5' adapter sequence.\n\n"
+                "Similar to -a (adaptor3), but on the 5' end.",
+                checker_function=lambda x: isinstance(x, str),
+                equate=False,
+            ),
+            # Reporting options
+            _Option(
+                ["-o", "report"],
+                "Specifies the report format.\n\nAllowed values: %s\nDefault: Native"
+                % ", ".join(REPORT_FORMAT),
+                checker_function=lambda x: x in REPORT_FORMAT,
+                equate=False,
+            ),
+            _Option(
+                ["-Q", "quality"],
+                "Lower threshold for an alignment to be reported [default: 0]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-R", "repeats"],
+                "If score difference is higher, report repeats.\n\n"
+                "Otherwise -r read method applies [default: 5]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-r", "r_method"],
+                "Methods to report reads with multiple matches.\n\n"
+                "Allowed values: %s\n"
+                "'All' and 'Exhaustive' accept limits." % ", ".join(REPEAT_METHOD),
+                checker_function=lambda x: x.split()[0] in REPEAT_METHOD,
+                equate=False,
+            ),
+            _Option(
+                ["-e", "recorded"],
+                "Alignments recorded with score equal to the best.\n\n"
+                "Default: 1000 in default read method, otherwise no limit.",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-q", "qual_digits"],
+                "Decimal digits for quality scores [default: 0]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # Paired end options
+            _Option(
+                ["-i", "fragment"],
+                "Fragment length (2 reads + insert) and standard deviation [default: 250 30]",
+                checker_function=lambda x: len(x.split()) == 2,
+                equate=False,
+            ),
+            _Option(
+                ["-v", "variation"],
+                "Structural variation penalty [default: 70]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # miRNA mode
+            _Option(
+                ["-m", "miRNA"],
+                "Sets miRNA mode and optionally sets a value for the region scanned [default: off]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # Multithreading
+            _Option(
+                ["-c", "cores"],
+                "Number of threads, disabled on free versions [default: number of cores]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # Quality calibrations
+            _Option(
+                ["-k", "read_cal"],
+                "Read quality calibration from file (mismatch counts)",
+                checker_function=lambda x: isinstance(x, str),
+                equate=False,
+            ),
+            _Option(
+                ["-K", "write_cal"],
+                "Accumulate mismatch counts and write to file",
+                checker_function=lambda x: isinstance(x, str),
+                equate=False,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Sequencing/Applications/__init__.py b/code/lib/Bio/Sequencing/Applications/__init__.py
new file mode 100644
index 0000000..e53d906
--- /dev/null
+++ b/code/lib/Bio/Sequencing/Applications/__init__.py
@@ -0,0 +1,56 @@
+# Copyright 2009 by Osvaldo Zagordi.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Sequencing related command line application wrappers (OBSOLETE).
+
+We have decided to remove this module in future, and instead recommend
+building your command and invoking it via the subprocess module directly.
+"""
+
+from ._Novoalign import NovoalignCommandline
+from ._bwa import BwaIndexCommandline, BwaAlignCommandline, BwaSamseCommandline
+from ._bwa import BwaSampeCommandline, BwaBwaswCommandline, BwaMemCommandline
+from ._samtools import SamtoolsViewCommandline, SamtoolsCalmdCommandline
+from ._samtools import SamtoolsCatCommandline, SamtoolsFaidxCommandline
+from ._samtools import SamtoolsFixmateCommandline, SamtoolsIdxstatsCommandline
+from ._samtools import SamtoolsIndexCommandline, SamtoolsMergeCommandline
+from ._samtools import SamtoolsMpileupCommandline, SamtoolsPhaseCommandline
+from ._samtools import SamtoolsReheaderCommandline, SamtoolsRmdupCommandline
+from ._samtools import (
+    SamtoolsVersion0xSortCommandline,
+    SamtoolsVersion1xSortCommandline,
+    SamtoolsTargetcutCommandline,
+)
+from ._samtools import SamtoolsVersion0xSortCommandline as SamtoolsSortCommandline
+
+
+# Make this explicit, then they show up in the API docs
+__all__ = (
+    "BwaIndexCommandline",
+    "BwaAlignCommandline",
+    "BwaSamseCommandline",
+    "BwaSampeCommandline",
+    "BwaBwaswCommandline",
+    "BwaMemCommandline",
+    "NovoalignCommandline",
+    "SamtoolsViewCommandline",
+    "SamtoolsCalmdCommandline",
+    "SamtoolsCatCommandline",
+    "SamtoolsFaidxCommandline",
+    "SamtoolsFixmateCommandline",
+    "SamtoolsIdxstatsCommandline",
+    "SamtoolsIndexCommandline",
+    "SamtoolsMergeCommandline",
+    "SamtoolsMpileupCommandline",
+    "SamtoolsPhaseCommandline",
+    "SamtoolsReheaderCommandline",
+    "SamtoolsRmdupCommandline",
+    "SamtoolsSortCommandline",
+    "SamtoolsVersion0xSortCommandline",
+    "SamtoolsVersion1xSortCommandline",
+    "SamtoolsTargetcutCommandline",
+)
diff --git a/code/lib/Bio/Sequencing/Applications/__pycache__/_Novoalign.cpython-37.pyc b/code/lib/Bio/Sequencing/Applications/__pycache__/_Novoalign.cpython-37.pyc
new file mode 100644
index 0000000..23aa246
Binary files /dev/null and b/code/lib/Bio/Sequencing/Applications/__pycache__/_Novoalign.cpython-37.pyc differ
diff --git a/code/lib/Bio/Sequencing/Applications/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Sequencing/Applications/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..8a77d3b
Binary files /dev/null and b/code/lib/Bio/Sequencing/Applications/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Sequencing/Applications/__pycache__/_bwa.cpython-37.pyc b/code/lib/Bio/Sequencing/Applications/__pycache__/_bwa.cpython-37.pyc
new file mode 100644
index 0000000..744a837
Binary files /dev/null and b/code/lib/Bio/Sequencing/Applications/__pycache__/_bwa.cpython-37.pyc differ
diff --git a/code/lib/Bio/Sequencing/Applications/__pycache__/_samtools.cpython-37.pyc b/code/lib/Bio/Sequencing/Applications/__pycache__/_samtools.cpython-37.pyc
new file mode 100644
index 0000000..4563761
Binary files /dev/null and b/code/lib/Bio/Sequencing/Applications/__pycache__/_samtools.cpython-37.pyc differ
diff --git a/code/lib/Bio/Sequencing/Applications/_bwa.py b/code/lib/Bio/Sequencing/Applications/_bwa.py
new file mode 100644
index 0000000..f07ec6a
--- /dev/null
+++ b/code/lib/Bio/Sequencing/Applications/_bwa.py
@@ -0,0 +1,640 @@
+# Copyright 2013 Saket Choudhary.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command line wrapper for bwa."""
+
+from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline
+from Bio.Application import _StaticArgument
+
+
+class BwaIndexCommandline(AbstractCommandline):
+    """Command line wrapper for Burrows Wheeler Aligner (BWA) index.
+
+    Index database sequences in the FASTA format, equivalent to::
+
+        $ bwa index [-p prefix] [-a algoType] [-c] <in.db.fasta>
+
+    See http://bio-bwa.sourceforge.net/bwa.shtml for details.
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import BwaIndexCommandline
+    >>> reference_genome = "/path/to/reference_genome.fasta"
+    >>> index_cmd = BwaIndexCommandline(infile=reference_genome, algorithm="bwtsw")
+    >>> print(index_cmd)
+    bwa index -a bwtsw /path/to/reference_genome.fasta
+
+    You would typically run the command using index_cmd() or via the
+    Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="bwa", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("index"),
+            _Option(
+                ["-a", "a", "algorithm"],
+                """Algorithm for constructing BWT index.
+
+                    Available options are:
+                        - is:    IS linear-time algorithm for constructing suffix array.
+                          It requires 5.37N memory where N is the size of the database.
+                          IS is moderately fast, but does not work with database larger
+                          than 2GB. IS is the default algorithm due to its simplicity.
+                        - bwtsw: Algorithm implemented in BWT-SW. This method works with the
+                          whole human genome, but it does not work with database
+                          smaller than 10MB and it is usually slower than IS.""",
+                checker_function=lambda x: x in ["is", "bwtsw"],
+                equate=False,
+                is_required=True,
+            ),
+            _Option(
+                ["-p", "p", "prefix"],
+                "Prefix of the output database [same as db filename]",
+                equate=False,
+                is_required=False,
+            ),
+            _Argument(["infile"], "Input file name", filename=True, is_required=True),
+            _Switch(
+                ["-c", "c"],
+                "Build color-space index. The input fasta should be in nucleotide space.",
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class BwaAlignCommandline(AbstractCommandline):
+    """Command line wrapper for Burrows Wheeler Aligner (BWA) aln.
+
+    Run a BWA alignment, equivalent to::
+
+        $ bwa aln [...] <in.db.fasta> <in.query.fq> > <out.sai>
+
+    See http://bio-bwa.sourceforge.net/bwa.shtml for details.
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import BwaAlignCommandline
+    >>> reference_genome = "/path/to/reference_genome.fasta"
+    >>> read_file = "/path/to/read_1.fq"
+    >>> output_sai_file = "/path/to/read_1.sai"
+    >>> align_cmd = BwaAlignCommandline(reference=reference_genome, read_file=read_file)
+    >>> print(align_cmd)
+    bwa aln /path/to/reference_genome.fasta /path/to/read_1.fq
+
+    You would typically run the command line using align_cmd(stdout=output_sai_file)
+    or via the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="bwa", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("aln"),
+            _Argument(
+                ["reference"], "Reference file name", filename=True, is_required=True
+            ),
+            _Argument(["read_file"], "Read file name", filename=True, is_required=True),
+            _Option(
+                ["-n", "n"],
+                "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
+                checker_function=lambda x: isinstance(x, (int, float)),
+                equate=False,
+            ),
+            _Option(
+                ["-o", "o"],
+                "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
+                checker_function=lambda x: isinstance(x, (int, float)),
+                equate=False,
+            ),
+            _Option(
+                ["-e", "e"],
+                "Maximum number of gap extensions, -1 for k-difference mode (disallowing long gaps) [-1]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-d", "d"],
+                "Disallow a long deletion within INT bp towards the 3-end [16]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-i", "i"],
+                "Disallow an indel within INT bp towards the ends [5]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-l", "l"],
+                """Take the first INT subsequence as seed.
+
+                    If INT is larger than the query sequence, seeding will be disabled.
+                    For long reads, this option is typically ranged from 25 to 35 for
+                    -k 2. [inf]""",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-k", "k"],
+                "Maximum edit distance in the seed [2]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-t", "t"],
+                "Number of threads (multi-threading mode) [1]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-M", "M"],
+                "Mismatch penalty. BWA will not search for suboptimal hits with a score lower than (bestScore-misMsc). [3]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-O", "O"],
+                "Gap open penalty [11]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-E", "E"],
+                "Gap extension penalty [4]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-R", "R"],
+                """Proceed with suboptimal alignments if there are no more than INT equally best hits.
+
+                    This option only affects paired-end mapping. Increasing this threshold helps
+                    to improve the pairing accuracy at the cost of speed, especially for short
+                    reads (~32bp).""",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-q", "q"],
+                r"""Parameter for read trimming [0].
+
+                    BWA trims a read down to argmax_x{\sum_{i=x+1}^l(INT-q_i)} if q_l<INT
+                    where l is the original read length.""",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-B", "B"],
+                "Length of barcode starting from the 5-end. When INT is positive, the barcode of each read will be trimmed before mapping and will be written at the BC SAM tag. For paired-end reads, the barcode from both ends are concatenated. [0]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Switch(
+                ["-c", "c"],
+                "Reverse query but not complement it, which is required for alignment in the color space.",
+            ),
+            _Switch(
+                ["-N", "N"],
+                "Disable iterative search. All hits with no more than maxDiff differences will be found. This mode is much slower than the default.",
+            ),
+            _Switch(
+                ["-I", "I"],
+                "The input is in the Illumina 1.3+ read format (quality equals ASCII-64).",
+            ),
+            _Switch(
+                ["-b", "b"], "Specify the input read sequence file is the BAM format"
+            ),
+            _Switch(
+                ["-b1", "b1"],
+                "When -b is specified, only use the first read in a read pair in mapping (skip single-end reads and the second reads).",
+            ),
+            _Switch(
+                ["-b2", "b2"],
+                "When -b is specified, only use the second read in a read pair in mapping.",
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class BwaSamseCommandline(AbstractCommandline):
+    """Command line wrapper for Burrows Wheeler Aligner (BWA) samse.
+
+    Generate alignments in the SAM format given single-end reads.
+    Equvialent to::
+
+        $ bwa samse [-n maxOcc] <in.db.fasta> <in.sai> <in.fq> > <out.sam>
+
+    See http://bio-bwa.sourceforge.net/bwa.shtml for details.
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import BwaSamseCommandline
+    >>> reference_genome = "/path/to/reference_genome.fasta"
+    >>> read_file = "/path/to/read_1.fq"
+    >>> sai_file = "/path/to/read_1.sai"
+    >>> output_sam_file = "/path/to/read_1.sam"
+    >>> samse_cmd = BwaSamseCommandline(reference=reference_genome,
+    ...                                 read_file=read_file, sai_file=sai_file)
+    >>> print(samse_cmd)
+    bwa samse /path/to/reference_genome.fasta /path/to/read_1.sai /path/to/read_1.fq
+
+    You would typically run the command line using samse_cmd(stdout=output_sam_file)
+    or via the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="bwa", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("samse"),
+            _Argument(
+                ["reference"], "Reference file name", filename=True, is_required=True
+            ),
+            _Argument(["sai_file"], "Sai file name", filename=True, is_required=True),
+            _Argument(
+                ["read_file"], "Read  file name", filename=True, is_required=True
+            ),
+            _Option(
+                ["-n", "n"],
+                """Maximum number of alignments to output in the XA tag for reads paired properly.
+
+                    If a read has more than INT hits, the XA tag will not be written. [3]""",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-r", "r"],
+                "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class BwaSampeCommandline(AbstractCommandline):
+    r"""Command line wrapper for Burrows Wheeler Aligner (BWA) sampe.
+
+    Generate alignments in the SAM format given paired-end reads.
+    Equivalent to::
+
+        $ bwa sampe [...] <in.db.fasta> <in1.sai> <in2.sai> <in1.fq> <in2.fq> > <out.sam>
+
+    See http://bio-bwa.sourceforge.net/bwa.shtml for details.
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import BwaSampeCommandline
+    >>> reference_genome = "/path/to/reference_genome.fasta"
+    >>> read_file1 = "/path/to/read_1.fq"
+    >>> read_file2 = "/path/to/read_2.fq"
+    >>> sai_file1 = "/path/to/read_1.sai"
+    >>> sai_file2 = "/path/to/read_2.sai"
+    >>> output_sam_file = "/path/to/output.sam"
+    >>> read_group = r"@RG\tID:foo\tSM:bar"  # BWA will turn backslash-t into tab
+    >>> sampe_cmd = BwaSampeCommandline(reference=reference_genome,
+    ...                                 sai_file1=sai_file1, sai_file2=sai_file2,
+    ...                                 read_file1=read_file1, read_file2=read_file2,
+    ...                                 r=read_group)
+    >>> print(sampe_cmd)
+    bwa sampe /path/to/reference_genome.fasta /path/to/read_1.sai /path/to/read_2.sai /path/to/read_1.fq /path/to/read_2.fq -r @RG\tID:foo\tSM:bar
+
+    You would typically run the command line using sampe_cmd(stdout=output_sam_file)
+    or via the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    # TODO - Should the read group have a raw tab in it, or \t?
+
+    def __init__(self, cmd="bwa", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("sampe"),
+            _Argument(
+                ["reference"], "Reference file name", filename=True, is_required=True
+            ),
+            _Argument(["sai_file1"], "Sai file 1", filename=True, is_required=True),
+            _Argument(["sai_file2"], "Sai file 2", filename=True, is_required=True),
+            _Argument(["read_file1"], "Read  file 1", filename=True, is_required=True),
+            _Argument(["read_file2"], "Read  file 2", filename=True, is_required=True),
+            _Option(
+                ["-a", "a"],
+                """Maximum insert size for a read pair to be considered being mapped properly [500].
+
+                    Since 0.4.5, this option is only used when there are not enough
+                    good alignments to infer the distribution of insert sizes.""",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-o", "o"],
+                """Maximum occurrences of a read for pairing [100000].
+
+                        A read with more occurrences will be treated as a single-end read.
+                        Reducing this parameter helps faster pairing.""",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-n", "n"],
+                """Maximum number of alignments to output in the XA tag for reads paired properly [3].
+
+                    If a read has more than INT hits, the XA tag will not be written.""",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-N", "N"],
+                """Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) [10].
+
+                    If a read has more than INT hits, the XA tag will not be written.""",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-r", "r"],
+                "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
+                checker_function=lambda x: isinstance(x, str),
+                equate=False,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class BwaBwaswCommandline(AbstractCommandline):
+    """Command line wrapper for Burrows Wheeler Aligner (BWA) bwasw.
+
+    Align query sequences from FASTQ files. Equivalent to::
+
+        $ bwa bwasw [...] <in.db.fasta> <in.fq>
+
+    See http://bio-bwa.sourceforge.net/bwa.shtml for details.
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import BwaBwaswCommandline
+    >>> reference_genome = "/path/to/reference_genome.fasta"
+    >>> read_file = "/path/to/read_1.fq"
+    >>> bwasw_cmd = BwaBwaswCommandline(reference=reference_genome, read_file=read_file)
+    >>> print(bwasw_cmd)
+    bwa bwasw /path/to/reference_genome.fasta /path/to/read_1.fq
+
+    You would typically run the command line using bwasw_cmd() or via the
+    Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="bwa", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("bwasw"),
+            _Argument(
+                ["reference"], "Reference file name", filename=True, is_required=True
+            ),
+            _Argument(["read_file"], "Read file", filename=True, is_required=True),
+            _Argument(["mate_file"], "Mate file", filename=True, is_required=False),
+            _Option(
+                ["-a", "a"],
+                "Score of a match [1]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-b", "b"],
+                "Mismatch penalty [3]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-q", "q"],
+                "Gap open penalty [5]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-r", "r"],
+                "Gap extension penalty. The penalty for a contiguous gap of size k is q+k*r. [2]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-t", "t"],
+                "Number of threads in the multi-threading mode [1]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-w", "w"],
+                "Band width in the banded alignment [33]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-T", "T"],
+                "Minimum score threshold divided by a [37]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-c", "c"],
+                """Coefficient for threshold adjustment according to query length [5.5].
+
+                    Given an l-long query, the threshold for a hit to be retained is
+                    a*max{T,c*log(l)}.""",
+                checker_function=lambda x: isinstance(x, float),
+                equate=False,
+            ),
+            _Option(
+                ["-z", "z"],
+                "Z-best heuristics. Higher -z increases accuracy at the cost of speed. [1]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-s", "s"],
+                """Maximum SA interval size for initiating a seed [3].
+
+                    Higher -s increases accuracy at the cost of speed.""",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-N", "N"],
+                "Minimum number of seeds supporting the resultant alignment to skip reverse alignment. [5]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class BwaMemCommandline(AbstractCommandline):
+    """Command line wrapper for Burrows Wheeler Aligner (BWA) mem.
+
+    Run a BWA-MEM alignment, with single- or paired-end reads, equivalent to::
+
+        $ bwa mem [...] <in.db.fasta> <in1.fq> <in2.fq> > <out.sam>
+
+    See http://bio-bwa.sourceforge.net/bwa.shtml for details.
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import BwaMemCommandline
+    >>> reference_genome = "/path/to/reference_genome.fasta"
+    >>> read_file = "/path/to/read_1.fq"
+    >>> output_sam_file = "/path/to/output.sam"
+    >>> align_cmd = BwaMemCommandline(reference=reference_genome, read_file1=read_file)
+    >>> print(align_cmd)
+    bwa mem /path/to/reference_genome.fasta /path/to/read_1.fq
+
+    You would typically run the command line using align_cmd(stdout=output_sam_file)
+    or via the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="bwa", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("mem"),
+            _Argument(
+                ["reference"], "Reference file name", filename=True, is_required=True
+            ),
+            _Argument(
+                ["read_file1"], "Read 1 file name", filename=True, is_required=True
+            ),
+            _Argument(
+                ["read_file2"], "Read 2 file name", filename=True, is_required=False
+            ),
+            _Option(
+                ["-t", "t"],
+                "Number of threads [1]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-k", "k"],
+                "Minimum seed length. Matches shorter than INT will be missed. The alignment speed is usually insensitive to this value unless it significantly deviates 20. [19]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-w", "w"],
+                "Band width. Essentially, gaps longer than INT will not be found. Note that the maximum gap length is also affected by the scoring matrix and the hit length, not solely determined by this option. [100]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-d", "d"],
+                r"Off-diagonal X-dropoff (Z-dropoff). Stop extension when the difference between the best and the current extension score is above \|i-j\|*A+INT, where i and j are the current positions of the query and reference, respectively, and A is the matching score. Z-dropoff is similar to BLAST's X-dropoff except that it doesn't penalize gaps in one of the sequences in the alignment. Z-dropoff not only avoids unnecessary extension, but also reduces poor alignments inside a long good alignment. [100]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-r", "r"],
+                "Trigger re-seeding for a MEM longer than minSeedLen*FLOAT. This is a key heuristic parameter for tuning the performance. Larger value yields fewer seeds, which leads to faster alignment speed but lower accuracy. [1.5]",
+                checker_function=lambda x: isinstance(x, (int, float)),
+                equate=False,
+            ),
+            _Option(
+                ["-c", "c"],
+                "Discard a MEM if it has more than INT occurrence in the genome. This is an insensitive parameter. [10000]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-A", "A"],
+                "Matching score. [1]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-B", "B"],
+                "Mismatch penalty. The sequence error rate is approximately: {.75 * exp[-log(4) * B/A]}. [4]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-O", "O"],
+                "Gap open penalty. [6]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-E", "E"],
+                "Gap extension penalty. A gap of length k costs O + k*E (i.e. -O is for opening a zero-length gap). [1]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-L", "L"],
+                "Clipping penalty. When performing SW extension, BWA-MEM keeps track of the best score reaching the end of query. If this score is larger than the best SW score minus the clipping penalty, clipping will not be applied. Note that in this case, the SAM AS tag reports the best SW score; clipping penalty is not deducted. [5]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-U", "U"],
+                "Penalty for an unpaired read pair. BWA-MEM scores an unpaired read pair as scoreRead1+scoreRead2-INT and scores a paired as scoreRead1+scoreRead2-insertPenalty. It compares these two scores to determine whether we should force pairing. [9] ",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-R", "R"],
+                "Complete read group header line. 't' can be used in STR and will be converted to a TAB in the output SAM. The read group ID will be attached to every read in the output. An example is '@RG\tID:foo\tSM:bar'. [null]",
+                checker_function=lambda x: isinstance(x, str),
+                equate=False,
+            ),
+            _Option(
+                ["-T", "T"],
+                "Don't output alignment with score lower than INT. This option only affects output. [30]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-v", "v"],
+                "Control the verbose level of the output. This option has not been fully supported throughout BWA. Ideally, a value 0 for disabling all the output to stderr; 1 for outputting errors only; 2 for warnings and errors; 3 for all normal messages; 4 or higher for debugging. When this option takes value 4, the output is not SAM. [3]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Switch(
+                ["-P", "P"],
+                "In the paired-end mode, perform SW to rescue missing hits only but do not try to find hits that fit a proper pair.",
+            ),
+            _Switch(
+                ["-p", "p"],
+                "Assume the first input query file is interleaved paired-end FASTA/Q. See the command description for details.",
+            ),
+            _Switch(
+                ["-a", "a"],
+                "Output all found alignments for single-end or unpaired paired-end reads. These alignments will be flagged as secondary alignments.",
+            ),
+            _Switch(
+                ["-C", "C"],
+                "Append FASTA/Q comment to SAM output. This option can be used to transfer read meta information (e.g. barcode) to the SAM output. Note that the FASTA/Q comment (the string after a space in the header line) must conform the SAM spec (e.g. BC:Z:CGTAC). Malformated comments lead to incorrect SAM output.",
+            ),
+            _Switch(
+                ["-H", "H"],
+                "Use hard clipping 'H' in the SAM output. This option may dramatically reduce the redundancy of output when mapping long contig or BAC sequences.",
+            ),
+            _Switch(
+                ["-M", "M"],
+                "Mark shorter split hits as secondary (for Picard compatibility).",
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Sequencing/Applications/_samtools.py b/code/lib/Bio/Sequencing/Applications/_samtools.py
new file mode 100644
index 0000000..546a358
--- /dev/null
+++ b/code/lib/Bio/Sequencing/Applications/_samtools.py
@@ -0,0 +1,1035 @@
+# Copyright 2014 Saket Choudhary. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Command line wrapper for samtools."""
+# Last Checked with samtools [0.1.20 and 1.2]
+# TODO samtools 1.x has additional options over 0.x which
+# are missing from this wrapper
+
+
+from Bio.Application import _Option, _Argument, _Switch
+from Bio.Application import AbstractCommandline, _ArgumentList
+from Bio.Application import _StaticArgument
+
+
+class SamtoolsViewCommandline(AbstractCommandline):
+    """Command line wrapper for samtools view.
+
+    Extract/print all or sub alignments in SAM or BAM format, equivalent to::
+
+        $ samtools view [-bchuHS] [-t in.refList] [-o output] [-f reqFlag]
+                        [-F skipFlag] [-q minMapQ] [-l library] [-r readGroup]
+                        [-R rgFile] <in.bam>|<in.sam> [region1 [...]]
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsViewCommandline
+    >>> input_file = "/path/to/sam_or_bam_file"
+    >>> samtools_view_cmd = SamtoolsViewCommandline(input_file=input_file)
+    >>> print(samtools_view_cmd)
+    samtools view /path/to/sam_or_bam_file
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("view"),
+            _Switch(["-b", "b"], "Output in the BAM format"),
+            _Switch(
+                ["-c", "c"],
+                """Instead of printing the alignments, only count them and
+                    print the total number.
+
+                    All filter options, such as '-f', '-F' and '-q',
+                    are taken into account""",
+            ),
+            _Switch(["-h", "h"], "Include the header in the output"),
+            _Switch(
+                ["-u", "u"],
+                """Output uncompressed BAM.
+
+                    This option saves time spent on compression/decompression
+                    and is thus preferred when the output is piped to
+                    another samtools command""",
+            ),
+            _Switch(["-H", "H"], "Output the header only"),
+            _Switch(
+                ["-S", "S"],
+                """Input is in SAM.
+                    If @SQ header lines are absent,
+                    the '-t' option is required.""",
+            ),
+            _Option(
+                ["-t", "t"],
+                """This file is TAB-delimited.
+                    Each line must contain the reference name and the
+                    length of the reference, one line for each
+                    distinct reference; additional fields are ignored.
+
+                    This file also defines the order of the reference
+                    sequences in sorting.
+                    If you run   'samtools faidx <ref.fa>',
+                    the resultant index file <ref.fa>.fai can be used
+                    as this <in.ref_list> file.""",
+                filename=True,
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-o", "o"],
+                "Output file",
+                filename=True,
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-f", "f"],
+                """Only output alignments with all bits in
+                    INT present in the FLAG field""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-F", "F"],
+                "Skip alignments with bits present in INT",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-q", "q"],
+                "Skip alignments with MAPQ smaller than INT",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-r", "r"],
+                "Only output reads in read group STR",
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-R", "R"],
+                "Output reads in read groups listed in FILE",
+                filename=True,
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-l", "l"],
+                "Only output reads in library STR",
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Switch(
+                ["-1", "fast_bam"],
+                "Use zlib compression level 1 to compress the output",
+            ),
+            _Argument(
+                ["input", "input_file"],
+                "Input File Name",
+                filename=True,
+                is_required=True,
+            ),
+            _Argument(["region"], "Region", is_required=False),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsMpileupCommandline(AbstractCommandline):
+    """Command line wrapper for samtools mpileup.
+
+    Generate BCF or pileup for one or multiple BAM files, equivalent to::
+
+        $ samtools mpileup [-EBug] [-C capQcoef] [-r reg] [-f in.fa]
+                           [-l list] [-M capMapQ] [-Q minBaseQ]
+                           [-q minMapQ] in.bam [in2.bam [...]]
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsMpileupCommandline
+    >>> input = ["/path/to/sam_or_bam_file"]
+    >>> samtools_mpileup_cmd = SamtoolsMpileupCommandline(input_file=input)
+    >>> print(samtools_mpileup_cmd)
+    samtools mpileup /path/to/sam_or_bam_file
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("mpileup"),
+            _Switch(
+                ["-E", "E"],
+                """Extended BAQ computation.
+                    This option helps sensitivity especially
+                    for MNPs, but may hurt specificity a little bit""",
+            ),
+            _Switch(
+                ["-B", "B"],
+                """Disable probabilistic realignment for the
+                    computation of base alignment quality (BAQ).
+
+                    BAQ is the Phred-scaled probability of a read base being
+                    misaligned.
+                    Applying this option greatly helps to reduce false SNPs
+                    caused by misalignments""",
+            ),
+            _Switch(
+                ["-g", "g"],
+                """Compute genotype likelihoods and output them in the
+                    binary call format (BCF)""",
+            ),
+            _Switch(
+                ["-u", "u"],
+                """Similar to -g except that the output is
+                    uncompressed BCF, which is preferred for piping""",
+            ),
+            _Option(
+                ["-C", "C"],
+                """Coefficient for downgrading mapping quality for
+                    reads containing excessive mismatches.
+
+                    Given a read with a phred-scaled probability q of
+                    being generated from the mapped position,
+                    the new mapping quality is about sqrt((INT-q)/INT)*INT.
+                    A zero value disables this functionality;
+                    if enabled, the recommended value for BWA is 50""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-r", "r"],
+                "Only generate pileup in region STR",
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-f", "f"],
+                """The faidx-indexed reference file in the FASTA format.
+
+                    The file can be optionally compressed by razip""",
+                filename=True,
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-l", "l"],
+                """BED or position list file containing a list of regions
+                    or sites where pileup or BCF should be generated""",
+                filename=True,
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-M", "M"],
+                "Cap Mapping Quality at M",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-q", "q"],
+                "Minimum mapping quality for an alignment to be used",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-Q", "Q"],
+                "Minimum base quality for a base to be considered",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Switch(
+                ["-6", "illumina_13"],
+                "Assume the quality is in the Illumina 1.3+ encoding",
+            ),
+            _Switch(
+                ["-A", "A"], "Do not skip anomalous read pairs in variant calling."
+            ),
+            _Option(
+                ["-b", "b"],
+                "List of input BAM files, one file per line",
+                filename=True,
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-d", "d"],
+                "At a position, read maximally INT reads per input BAM",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Switch(["-D", "D"], "Output per-sample read depth"),
+            _Switch(
+                ["-S", "S"],
+                """Output per-sample Phred-scaled
+                                strand bias P-value""",
+            ),
+            _Option(
+                ["-e", "e"],
+                """Phred-scaled gap extension sequencing error probability.
+
+                    Reducing INT leads to longer indels""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-h", "h"],
+                """Coefficient for modeling homopolymer errors.
+
+                    Given an l-long homopolymer run, the sequencing error
+                    of an indel of size s is modeled as INT*s/l""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Switch(["-I", "I"], "Do not perform INDEL calling"),
+            _Option(
+                ["-L", "L"],
+                """Skip INDEL calling if the average per-sample
+                    depth is above INT""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-o", "o"],
+                """Phred-scaled gap open sequencing error probability.
+
+                    Reducing INT leads to more indel calls.""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-p", "p"],
+                """Comma delimited list of platforms (determined by @RG-PL)
+                    from which indel candidates are obtained.
+
+                    It is recommended to collect indel candidates from
+                    sequencing technologies that have low indel error rate
+                    such as ILLUMINA""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _ArgumentList(
+                ["input_file"],
+                "Input File for generating mpileup",
+                filename=True,
+                is_required=True,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsReheaderCommandline(AbstractCommandline):
+    """Command line wrapper for samtools reheader.
+
+    Replace the header in in.bam with the header
+    in in.header.sam, equivalent to::
+
+    $ samtools reheader <in.header.sam> <in.bam>
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsReheaderCommandline
+    >>> input_header = "/path/to/header_sam_file"
+    >>> input_bam = "/path/to/input_bam_file"
+    >>> reheader_cmd = SamtoolsReheaderCommandline(input_header=input_header,
+    ...                                            input_bam=input_bam)
+    >>> print(reheader_cmd)
+    samtools reheader /path/to/header_sam_file /path/to/input_bam_file
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("reheader"),
+            _Argument(
+                ["input_header", "header_sam", "sam_file"],
+                "Sam file with header",
+                filename=True,
+                is_required=True,
+            ),
+            _Argument(
+                ["input_bam", "input_file", "bam_file"],
+                "BAM file for writing header to",
+                filename=True,
+                is_required=True,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsCatCommandline(AbstractCommandline):
+    """Command line wrapper for samtools cat.
+
+    Concatenate BAMs, equivalent to::
+
+        $ samtools cat [-h header.sam] [-o out.bam] <in1.bam> <in2.bam> [ ... ]
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsCatCommandline
+    >>> input_bam1 = "/path/to/input_bam1"
+    >>> input_bam2 = "/path/to/input_bam2"
+    >>> input_bams = [input_bam1, input_bam2]
+    >>> samtools_cat_cmd = SamtoolsCatCommandline(input_bam=input_bams)
+    >>> print(samtools_cat_cmd)
+    samtools cat /path/to/input_bam1 /path/to/input_bam2
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("cat"),
+            _Option(
+                ["-h", "h"],
+                "Header SAM file",
+                filename=True,
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-o", "o"],
+                "Output SAM file",
+                filename=True,
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _ArgumentList(
+                ["input", "input_bam", "bams"],
+                "Input BAM files",
+                filename=True,
+                is_required=True,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsVersion0xSortCommandline(AbstractCommandline):
+    """Command line wrapper for samtools version 0.1.x sort.
+
+    Concatenate BAMs, equivalent to::
+
+    $ samtools sort [-no] [-m maxMem] <in.bam> <out.prefix>
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsVersion0xSortCommandline
+    >>> input_bam = "/path/to/input_bam"
+    >>> out_prefix = "/path/to/out_prefix"
+    >>> samtools_sort_cmd = SamtoolsVersion0xSortCommandline(input=input_bam, out_prefix=out_prefix)
+    >>> print(samtools_sort_cmd)
+    samtools sort /path/to/input_bam /path/to/out_prefix
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+
+        # options for version samtools 0.0.19
+        self.parameters = [
+            _StaticArgument("sort"),
+            _Switch(
+                ["-o", "o"],
+                """Output the final alignment
+                                    to the standard output""",
+            ),
+            _Switch(
+                ["-n", "n"],
+                """Sort by read names rather
+                                    than by chromosomal coordinates""",
+            ),
+            _Option(
+                ["-m", "m"],
+                "Approximately the maximum required memory",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Argument(["input"], "Input BAM file", filename=True, is_required=True),
+            _Argument(["out_prefix"], "Output prefix", filename=True, is_required=True),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsVersion1xSortCommandline(AbstractCommandline):
+    """Command line wrapper for samtools version 1.3.x sort.
+
+    Concatenate BAMs, equivalent to::
+
+    $ samtools sort [-n] [-T FREFIX] [-o file] [-I INT] [-m maxMem] <in.bam>
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsVersion1xSortCommandline
+    >>> input_bam = "/path/to/input_bam"
+    >>> FREFIX = "/path/to/out_prefix"
+    >>> file_name = "/path/to/out_file"
+    >>> samtools_sort_cmd = SamtoolsVersion1xSortCommandline(input=input_bam, T=FREFIX, o=file_name)
+    >>> print(samtools_sort_cmd)
+    samtools sort -o /path/to/out_file -T /path/to/out_prefix /path/to/input_bam
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+
+        # options for version samtools 1.3.1
+        self.parameters = [
+            _StaticArgument("sort"),
+            _Switch(
+                ["-n", "n"],
+                """Sort by read names rather
+                                    than by chromosomal coordinates""",
+            ),
+            _Option(
+                ["-o", "o"],
+                """(file) Write the final sorted output to FILE,
+                    rather than to standard output""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-O", "O"],
+                """(FORMAT) Write the final output as sam, bam, or cram""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-T", "T"],
+                """(PREFIX) Write temporary files to PREFIX.nnnn.bam, or if the specified PREFIX
+                    is an existing directory, to PREFIX/samtools.mmm.mmm.tmp.nnnn.bam,
+                    where mmm is unique to this invocation of the sort command""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-I", "I"],
+                """(INT) Set the desired compression level for the final output file,
+                    ranging from 0 (uncompressed) or 1 (fastest but minimal compression)
+                    to 9 (best compression but slowest to write), similarly to gzip(1)'s compression level setting.""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-m", "m"],
+                "Approximately the maximum required memory",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Argument(
+                ["input"], "Input SAM/BAM/CRAM file", filename=True, is_required=True
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsMergeCommandline(AbstractCommandline):
+    """Command line wrapper for samtools merge.
+
+    Merge multiple sorted alignments, equivalent to::
+
+        $ samtools merge [-nur1f] [-h inh.sam] [-R reg]
+                         <out.bam> <in1.bam> <in2.bam> [...]
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsMergeCommandline
+    >>> out_bam = "/path/to/out_bam"
+    >>> in_bam = ["/path/to/input_bam1", "/path/to/input_bam2"]
+    >>> merge_cmd = SamtoolsMergeCommandline(out_bam=out_bam,
+    ...                                      input_bam=in_bam)
+    >>> print(merge_cmd)
+    samtools merge /path/to/out_bam /path/to/input_bam1 /path/to/input_bam2
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("merge"),
+            _Switch(
+                ["-n", "n"],
+                """The input alignments are sorted by read names
+                    rather than by chromosomal coordinates""",
+            ),
+            _Switch(
+                ["-r", "r"],
+                """Attach an RG tag to each alignment.
+                    The tag value is inferred from file names""",
+            ),
+            _Switch(["-u", "u"], "Uncompressed BAM output"),
+            _Switch(
+                ["-1", "fast_bam"],
+                """Use zlib compression level 1
+                                           to compress the output""",
+            ),
+            _Switch(
+                ["-f", "f"],
+                """Force to overwrite the
+                                    output file if present""",
+            ),
+            _Option(
+                ["-h", "h"],
+                """Use the lines of FILE as '@'
+                                    headers to be copied to out.bam""",
+                filename=True,
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-R", "R"],
+                "Merge files in the specified region indicated by STR",
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Argument(
+                ["output_bam", "out_bam", "out", "output"],
+                "Output BAM file",
+                filename=True,
+                is_required=True,
+            ),
+            _ArgumentList(
+                ["input_bam", "in_bam", "input", "bam"],
+                "Input BAM",
+                filename=True,
+                is_required=True,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsIndexCommandline(AbstractCommandline):
+    """Command line wrapper for samtools index.
+
+    Index sorted alignment for fast random access, equivalent to::
+
+    $ samtools index <aln.bam>
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsIndexCommandline
+    >>> input = "/path/to/aln_bam"
+    >>> samtools_index_cmd = SamtoolsIndexCommandline(input_bam=input)
+    >>> print(samtools_index_cmd)
+    samtools index /path/to/aln_bam
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("index"),
+            _Argument(["input", "in_bam", "input_bam"], "BAM file to be indexed"),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsIdxstatsCommandline(AbstractCommandline):
+    """Command line wrapper for samtools idxstats.
+
+    Retrieve and print stats in the index file, equivalent to::
+
+    $ samtools idxstats <aln.bam>
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsIdxstatsCommandline
+    >>> input = "/path/to/aln_bam"
+    >>> samtools_idxstats_cmd = SamtoolsIdxstatsCommandline(input_bam=input)
+    >>> print(samtools_idxstats_cmd)
+    samtools idxstats /path/to/aln_bam
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("idxstats"),
+            _Argument(["input", "in_bam", "input_bam"], "BAM file to be indexed"),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsFaidxCommandline(AbstractCommandline):
+    """Command line wrapper for samtools faidx.
+
+    Retrieve and print stats in the index file, equivalent to::
+
+    $ samtools faidx <ref.fasta> [region1 [...]]
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsFaidxCommandline
+    >>> reference = "/path/to/reference.fasta"
+    >>> samtools_faidx_cmd = SamtoolsFaidxCommandline(reference=reference)
+    >>> print(samtools_faidx_cmd)
+    samtools faidx /path/to/reference.fasta
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("faidx"),
+            _Argument(
+                ["reference", "reference_fasta", "ref"],
+                "Reference FASTA to be indexed",
+                filename=True,
+                is_required=True,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsFixmateCommandline(AbstractCommandline):
+    """Command line wrapper for samtools fixmate.
+
+    Fill in mate coordinates, ISIZE and mate related
+    flags from a name-sorted alignment, equivalent to::
+
+    $ samtools fixmate <in.nameSrt.bam> <out.bam>
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsFixmateCommandline
+    >>> in_bam = "/path/to/in.nameSrt.bam"
+    >>> out_bam = "/path/to/out.bam"
+    >>> fixmate_cmd = SamtoolsFixmateCommandline(input_bam=in_bam,
+    ...                                          out_bam=out_bam)
+    >>> print(fixmate_cmd)
+    samtools fixmate /path/to/in.nameSrt.bam /path/to/out.bam
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("fixmate"),
+            _Argument(
+                ["in_bam", "sorted_bam", "input_bam", "input", "input_file"],
+                "Name Sorted Alignment File ",
+                filename=True,
+                is_required=True,
+            ),
+            _Argument(
+                ["out_bam", "output_bam", "output", "output_file"],
+                "Output file",
+                filename=True,
+                is_required=True,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsRmdupCommandline(AbstractCommandline):
+    """Command line wrapper for samtools rmdup.
+
+    Remove potential PCR duplicates, equivalent to::
+
+    $ samtools rmdup [-sS] <input.srt.bam> <out.bam>
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsRmdupCommandline
+    >>> input_sorted_bam = "/path/to/input.srt.bam"
+    >>> out_bam = "/path/to/out.bam"
+    >>> rmdup_cmd = SamtoolsRmdupCommandline(input_bam=input_sorted_bam,
+    ...                                      out_bam=out_bam)
+    >>> print(rmdup_cmd)
+    samtools rmdup /path/to/input.srt.bam /path/to/out.bam
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("rmdup"),
+            _Switch(
+                ["-s", "s"],
+                """Remove duplicates for single-end reads.
+
+                    By default, the command works for paired-end
+                    reads only""",
+            ),
+            _Switch(
+                ["-S", "S"],
+                """Treat paired-end reads
+                                    as single-end reads""",
+            ),
+            _Argument(
+                ["in_bam", "sorted_bam", "input_bam", "input", "input_file"],
+                "Name Sorted Alignment File ",
+                filename=True,
+                is_required=True,
+            ),
+            _Argument(
+                ["out_bam", "output_bam", "output", "output_file"],
+                "Output file",
+                filename=True,
+                is_required=True,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsCalmdCommandline(AbstractCommandline):
+    """Command line wrapper for samtools calmd.
+
+    Generate the MD tag, equivalent to::
+
+    $ samtools calmd [-EeubSr] [-C capQcoef] <aln.bam> <ref.fasta>
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsCalmdCommandline
+    >>> input_bam = "/path/to/aln.bam"
+    >>> reference_fasta = "/path/to/reference.fasta"
+    >>> calmd_cmd = SamtoolsCalmdCommandline(input_bam=input_bam,
+    ...                                      reference=reference_fasta)
+    >>> print(calmd_cmd)
+    samtools calmd /path/to/aln.bam /path/to/reference.fasta
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("calmd"),
+            _Switch(
+                ["-E", "E"],
+                """Extended BAQ calculation.
+                    This option trades specificity for sensitivity,
+                    though the effect is minor.""",
+            ),
+            _Switch(
+                ["-e", "e"],
+                """Convert the read base to = if it is
+                    identical to the aligned reference base.
+
+                    Indel caller does not support the = bases
+                    at the moment.""",
+            ),
+            _Switch(["-u", "u"], "Output uncompressed BAM"),
+            _Switch(["-b", "b"], "Output compressed BAM "),
+            _Switch(["-S", "S"], "The input is SAM with header lines "),
+            _Switch(
+                ["-r", "r"],
+                """Compute the BQ tag (without -A)
+                    or cap base quality by BAQ (with -A).""",
+            ),
+            _Switch(
+                ["-A", "A"],
+                """When used jointly with -r this option overwrites
+                    the original base quality""",
+            ),
+            _Option(
+                ["-C", "C"],
+                """Coefficient to cap mapping quality
+                    of poorly mapped reads.
+
+                    See the pileup command for details.""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Argument(
+                ["input", "input_file", "in_bam", "infile", "input_bam"],
+                "Input BAM",
+                filename=True,
+                is_required=True,
+            ),
+            _Argument(
+                ["reference", "reference_fasta", "ref"],
+                "Reference FASTA to be indexed",
+                filename=True,
+                is_required=True,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsTargetcutCommandline(AbstractCommandline):
+    """Command line wrapper for samtools targetcut.
+
+    This command identifies target regions by examining the continuity
+    of read depth, computes haploid consensus sequences of targets
+    and outputs a SAM with each sequence corresponding to a target,
+    equivalent to::
+
+        $ samtools targetcut [-Q minBaseQ] [-i inPenalty] [-0 em0]
+                             [-1 em1] [-2 em2] [-f ref] <in.bam>
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsTargetcutCommandline
+    >>> input_bam = "/path/to/aln.bam"
+    >>> samtools_targetcut_cmd = SamtoolsTargetcutCommandline(input_bam=input_bam)
+    >>> print(samtools_targetcut_cmd)
+    samtools targetcut /path/to/aln.bam
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("targetcut"),
+            _Option(
+                ["-Q", "Q"],
+                "Minimum Base Quality ",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-i", "i"],
+                "Insertion Penalty",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-f", "f"],
+                "Reference Filename",
+                filename=True,
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-0", "em0"],
+                "em0",
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-1", "em1"],
+                "em1",
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Option(
+                ["-2", "em2"],
+                "em2",
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Argument(
+                ["input", "input_bam", "in_bam"],
+                "Input file",
+                filename=True,
+                is_required=True,
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+class SamtoolsPhaseCommandline(AbstractCommandline):
+    """Command line wrapper for samtools phase.
+
+    Call and phase heterozygous SNPs, equivalent to::
+
+        $ samtools phase [-AF] [-k len] [-b prefix]
+                         [-q minLOD] [-Q minBaseQ] <in.bam>
+
+    See http://samtools.sourceforge.net/samtools.shtml for more details
+
+    Examples
+    --------
+    >>> from Bio.Sequencing.Applications import SamtoolsPhaseCommandline
+    >>> input_bam = "/path/to/in.bam"
+    >>> samtools_phase_cmd = SamtoolsPhaseCommandline(input_bam=input_bam)
+    >>> print(samtools_phase_cmd)
+    samtools phase /path/to/in.bam
+
+    """
+
+    def __init__(self, cmd="samtools", **kwargs):
+        """Initialize the class."""
+        self.program_name = cmd
+        self.parameters = [
+            _StaticArgument("phase"),
+            _Argument(
+                ["input", "input_bam", "in_bam"],
+                "Input file",
+                filename=True,
+                is_required=True,
+            ),
+            _Switch(["-A", "A"], "Drop reads with ambiguous phase"),
+            _Option(
+                ["-b", "b"],
+                "Prefix of BAM output",
+                filename=True,
+                equate=False,
+                checker_function=lambda x: isinstance(x, str),
+            ),
+            _Switch(["-F", "F"], "Do not attempt to fix chimeric reads"),
+            _Option(
+                ["-k", "k"],
+                "Maximum length for local phasing",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-q", "q"],
+                """Minimum Phred-scaled LOD to
+                    call a heterozygote""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+            _Option(
+                ["-Q", "Q"],
+                """Minimum base quality to be
+                    used in het calling""",
+                equate=False,
+                checker_function=lambda x: isinstance(x, int),
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/Sequencing/Phd.py b/code/lib/Bio/Sequencing/Phd.py
new file mode 100644
index 0000000..6e7cc8d
--- /dev/null
+++ b/code/lib/Bio/Sequencing/Phd.py
@@ -0,0 +1,199 @@
+# Copyright 2004 by Cymon J. Cox and Frank Kauff.  All rights reserved.
+# Copyright 2008 by Michiel de Hoon.  All rights reserved.
+# Revisions copyright 2009 by Cymon J. Cox.  All rights reserved.
+# Revisions copyright 2009 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Parser for PHD files output by PHRED and used by PHRAP and CONSED.
+
+This module can be used directly, which will return Record objects
+containing all the original data in the file.
+
+Alternatively, using Bio.SeqIO with the "phd" format will call this module
+internally.  This will give SeqRecord objects for each contig sequence.
+"""
+
+from Bio import Seq
+
+
+CKEYWORDS = [
+    "CHROMAT_FILE",
+    "ABI_THUMBPRINT",
+    "PHRED_VERSION",
+    "CALL_METHOD",
+    "QUALITY_LEVELS",
+    "TIME",
+    "TRACE_ARRAY_MIN_INDEX",
+    "TRACE_ARRAY_MAX_INDEX",
+    "TRIM",
+    "TRACE_PEAK_AREA_RATIO",
+    "CHEM",
+    "DYE",
+]
+
+
+class Record:
+    """Hold information from a PHD file."""
+
+    def __init__(self):
+        """Initialize the class."""
+        self.file_name = ""
+        self.comments = {}
+        for kw in CKEYWORDS:
+            self.comments[kw.lower()] = None
+        self.sites = []
+        self.seq = ""
+        self.seq_trimmed = ""
+
+
+def read(source):
+    """Read one PHD record from the file and return it as a Record object.
+
+    Argument source is a file-like object opened in text mode, or a path
+    to a file.
+
+    This function reads PHD file data line by line from the source, and
+    returns a single Record object. A ValueError is raised if more than
+    one record is found in the file.
+    """
+    handle = _open(source)
+    try:
+        record = _read(handle)
+        try:
+            next(handle)
+        except StopIteration:
+            return record
+        else:
+            raise ValueError("More than one PHD record found")
+    finally:
+        if handle is not source:
+            handle.close()
+
+
+def parse(source):
+    """Iterate over a file yielding multiple PHD records.
+
+    Argument source is a file-like object opened in text mode, or a path
+    to a file.
+
+    The data is read line by line from the source.
+
+    Typical usage::
+
+        records = parse(handle)
+        for record in records:
+            # do something with the record object
+
+    """
+    handle = _open(source)
+    try:
+        while True:
+            record = _read(handle)
+            if not record:
+                return
+            yield record
+    finally:
+        if handle is not source:
+            handle.close()
+
+
+# Everything below is considered private
+
+
+def _open(source):
+    try:
+        handle = open(source)
+    except TypeError:
+        handle = source
+        if handle.read(0) != "":
+            raise ValueError("PHD files must be opened in text mode.") from None
+    return handle
+
+
+def _read(handle):
+    for line in handle:
+        if line.startswith("BEGIN_SEQUENCE"):
+            record = Record()
+            record.file_name = line[15:].rstrip()
+            break
+    else:
+        return  # No record found
+
+    for line in handle:
+        if line.startswith("BEGIN_COMMENT"):
+            break
+    else:
+        raise ValueError("Failed to find BEGIN_COMMENT line")
+
+    for line in handle:
+        line = line.strip()
+        if not line:
+            continue
+        if line == "END_COMMENT":
+            break
+        keyword, value = line.split(":", 1)
+        keyword = keyword.lower()
+        value = value.strip()
+        if keyword in (
+            "chromat_file",
+            "phred_version",
+            "call_method",
+            "chem",
+            "dye",
+            "time",
+            "basecaller_version",
+            "trace_processor_version",
+        ):
+            record.comments[keyword] = value
+        elif keyword in (
+            "abi_thumbprint",
+            "quality_levels",
+            "trace_array_min_index",
+            "trace_array_max_index",
+        ):
+            record.comments[keyword] = int(value)
+        elif keyword == "trace_peak_area_ratio":
+            record.comments[keyword] = float(value)
+        elif keyword == "trim":
+            first, last, prob = value.split()
+            record.comments[keyword] = (int(first), int(last), float(prob))
+    else:
+        raise ValueError("Failed to find END_COMMENT line")
+
+    for line in handle:
+        if line.startswith("BEGIN_DNA"):
+            break
+    else:
+        raise ValueError("Failed to find BEGIN_DNA line")
+
+    for line in handle:
+        if line.startswith("END_DNA"):
+            break
+        else:
+            # Line is: "site quality peak_location"
+            # Peak location is optional according to
+            # David Gordon (the Consed author)
+            parts = line.split()
+            if len(parts) in [2, 3]:
+                record.sites.append(tuple(parts))
+            else:
+                raise ValueError(
+                    "DNA line must contain a base and quality "
+                    "score, and optionally a peak location."
+                )
+
+    for line in handle:
+        if line.startswith("END_SEQUENCE"):
+            break
+    else:
+        raise ValueError("Failed to find END_SEQUENCE line")
+
+    record.seq = Seq.Seq("".join(n[0] for n in record.sites))
+    if record.comments["trim"] is not None:
+        first, last = record.comments["trim"][:2]
+        record.seq_trimmed = record.seq[first:last]
+
+    return record
diff --git a/code/lib/Bio/Sequencing/__init__.py b/code/lib/Bio/Sequencing/__init__.py
new file mode 100644
index 0000000..927b866
--- /dev/null
+++ b/code/lib/Bio/Sequencing/__init__.py
@@ -0,0 +1,12 @@
+# Copyright 2004 Frank Kauff. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Code to deal with various programs for sequencing and assembly.
+
+This code deals with programs such as Phred, Phrap and Consed -- which provide
+utilities for calling bases from sequencing reads, and assembling sequences
+into contigs.
+"""
diff --git a/code/lib/Bio/Sequencing/__pycache__/Ace.cpython-37.pyc b/code/lib/Bio/Sequencing/__pycache__/Ace.cpython-37.pyc
new file mode 100644
index 0000000..98990a6
Binary files /dev/null and b/code/lib/Bio/Sequencing/__pycache__/Ace.cpython-37.pyc differ
diff --git a/code/lib/Bio/Sequencing/__pycache__/Phd.cpython-37.pyc b/code/lib/Bio/Sequencing/__pycache__/Phd.cpython-37.pyc
new file mode 100644
index 0000000..89f4d63
Binary files /dev/null and b/code/lib/Bio/Sequencing/__pycache__/Phd.cpython-37.pyc differ
diff --git a/code/lib/Bio/Sequencing/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Sequencing/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..912e97a
Binary files /dev/null and b/code/lib/Bio/Sequencing/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SubsMat/FreqTable.py b/code/lib/Bio/SubsMat/FreqTable.py
new file mode 100644
index 0000000..f0af932
--- /dev/null
+++ b/code/lib/Bio/SubsMat/FreqTable.py
@@ -0,0 +1,107 @@
+# Copyright 2000 by Iddo Friedberg idoerg@cc.huji.ac.il
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+r"""A class to handle frequency tables or letter count files.
+
+Example files for a DNA alphabet:
+
+A count file (whitespace separated)::
+
+ A  50
+ C  37
+ G  23
+ T  58
+
+The same info as a frequency file::
+
+ A 0.2976
+ C 0.2202
+ G 0.1369
+ T 0.3452
+
+Functions:
+  :read_count(f): read a count file from stream f. Then convert to
+                  frequencies.
+  :read_freq(f): read a frequency data file from stream f. Of course, we then
+                 don't have the counts, but it is usually the letter frequencies
+                 which are interesting.
+
+Methods:
+  (all internal)
+
+Attributes:
+  :alphabet: The letters you are using as indices into the table.
+  :data: Frequency dictionary.
+  :count: Count dictionary. Empty if no counts are provided.
+
+Example of use:
+    >>> import io
+    >>> from Bio.SubsMat import FreqTable
+    >>> f_count = io.StringIO(u"A  50\nC  37\nG  23\nT  58")
+    >>> ftab = FreqTable.read_count(f_count)
+    >>> for nb in sorted(ftab):
+    ...     print("%s %0.4f" %(nb, ftab[nb]))
+    ...
+    A 0.2976
+    C 0.2202
+    G 0.1369
+    T 0.3452
+
+"""
+
+
+COUNT = 1
+FREQ = 2
+
+
+class FreqTable(dict):
+    """Define class to handle frequency tables or letter count files."""
+
+    def _freq_from_count(self):
+        """Calculate frequency from count values (PRIVATE)."""
+        total = float(sum(self.count.values()))
+        for i, v in self.count.items():
+            self[i] = v / total
+
+    def _alphabet_from_input(self):
+        """Order the alphabet (PRIVATE)."""
+        s = ""
+        for i in sorted(self):
+            s += i
+        return s
+
+    def __init__(self, in_dict, dict_type, alphabet=None):
+        """Initialize the class."""
+        self.alphabet = alphabet
+        if dict_type == COUNT:
+            self.count = in_dict
+            self._freq_from_count()
+        elif dict_type == FREQ:
+            self.count = {}
+            self.update(in_dict)
+        else:
+            raise ValueError("bad dict_type")
+        if not alphabet:
+            self.alphabet = self._alphabet_from_input()
+
+
+def read_count(f):
+    """Read a count file f and load values to the Frequency Table."""
+    count = {}
+    for line in f:
+        key, value = line.strip().split()
+        count[key] = int(value)
+    return FreqTable(count, COUNT)
+
+
+def read_freq(f):
+    """Read a frequency data file f and load values to the Frequency Table."""
+    freq_dict = {}
+    for line in f:
+        key, value = line.strip().split()
+        freq_dict[key] = float(value)
+    return FreqTable(freq_dict, FREQ)
diff --git a/code/lib/Bio/SubsMat/MatrixInfo.py b/code/lib/Bio/SubsMat/MatrixInfo.py
new file mode 100644
index 0000000..fc86cb0
--- /dev/null
+++ b/code/lib/Bio/SubsMat/MatrixInfo.py
@@ -0,0 +1,2724 @@
+# Copyright 2000 by Iddo Friedberg
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Substitution matrices for use in alignments, etc.
+
+The information on this page was originally obtained from Dr. Gerhard Vogt's
+page http://www.embl-heidelberg.de/~vogt/matrices/mlist1.html (dead link),
+and was extracted using a script.
+
+You can view an archive copy of this webpage from 1999 here;
+https://web.archive.org/web/19991014010917/http://www.embl-heidelberg.de/%7Evogt/matrices/mlist1.html
+"""
+
+# The data rich dictionaries do not lend themselves to black style, turn it off:
+# fmt: off
+
+# a list of all available substitution matrices
+available_matrices = ["benner6", "benner22", "benner74", "blosum100",
+                      "blosum30", "blosum35", "blosum40", "blosum45",
+                      "blosum50", "blosum55", "blosum60", "blosum62",
+                      "blosum65", "blosum70", "blosum75", "blosum80",
+                      "blosum85", "blosum90", "blosum95", "feng",
+                      "fitch", "genetic", "gonnet", "grant",
+                      "ident", "johnson", "levin", "mclach",
+                      "miyata", "nwsgappep", "pam120", "pam180",
+                      "pam250", "pam30", "pam300", "pam60",
+                      "pam90", "rao", "risler", "structure",
+                      ]
+
+# http://www.embl-heidelberg.de/~vogt/matrices/benner6.cmp
+benner6 = {
+    ("W", "F"): -1.6, ("L", "R"): -3.2, ("I", "I"): 4.4, ("Q", "Q"): 5.3,
+    ("W", "N"): -4.4, ("V", "I"): 3.9, ("H", "T"): -1.7, ("H", "P"): -0.4,
+    ("W", "V"): -4.8, ("Q", "E"): 2.1, ("W", "R"): 2.0, ("Q", "A"): -1.7,
+    ("H", "H"): 6.1, ("H", "D"): 0.1, ("L", "N"): -3.4, ("Y", "M"): -3.6,
+    ("Y", "I"): -3.3, ("Y", "E"): -4.1, ("E", "S"): -1.2, ("Y", "A"): -4.0,
+    ("Y", "Y"): 9.5, ("T", "C"): -1.5, ("E", "C"): -4.7, ("Y", "Q"): -1.4,
+    ("E", "G"): 0.5, ("V", "A"): 0.7, ("C", "C"): 12.1, ("M", "R"): -3.0,
+    ("P", "T"): 0.6, ("V", "E"): -3.0, ("P", "P"): 6.5, ("I", "T"): 0.7,
+    ("K", "S"): -1.2, ("R", "G"): -0.1, ("I", "P"): -2.0, ("R", "C"): -0.4,
+    ("A", "T"): 1.7, ("K", "K"): 5.6, ("A", "P"): 1.1, ("V", "M"): 3.3,
+    ("I", "D"): -4.2, ("K", "C"): -2.8, ("K", "G"): -1.4, ("R", "S"): -0.9,
+    ("F", "Q"): -4.4, ("F", "A"): -3.2, ("V", "V"): 4.0, ("M", "N"): -2.5,
+    ("F", "E"): -6.7, ("D", "N"): 2.5, ("F", "I"): 0.0, ("F", "M"): -0.1,
+    ("M", "S"): -1.3, ("S", "S"): 2.1, ("L", "Q"): -2.4, ("W", "E"): -5.6,
+    ("W", "A"): -4.3, ("W", "M"): -4.4, ("H", "S"): -0.9, ("W", "I"): -5.0,
+    ("S", "C"): 0.9, ("L", "A"): -1.3, ("L", "E"): -5.0, ("W", "Q"): -2.6,
+    ("H", "G"): -2.1, ("Q", "N"): 0.1, ("H", "C"): -1.2, ("L", "M"): -2.9,
+    ("W", "Y"): -0.3, ("Y", "N"): -0.9, ("E", "P"): -2.6, ("Y", "F"): 5.6,
+    ("E", "T"): -1.6, ("A", "A"): 2.5, ("I", "N"): -2.5, ("G", "A"): 0.8,
+    ("Y", "V"): -3.8, ("E", "D"): 4.4, ("W", "H"): -2.8, ("Y", "R"): -2.6,
+    ("M", "Q"): -3.1, ("P", "S"): 1.4, ("R", "H"): 1.8, ("A", "C"): -1.7,
+    ("R", "D"): -1.5, ("K", "P"): -2.3, ("L", "D"): -5.3, ("K", "T"): -1.1,
+    ("V", "N"): -2.4, ("M", "A"): -0.2, ("K", "H"): 0.9, ("V", "R"): -3.7,
+    ("P", "C"): -2.7, ("M", "E"): -4.1, ("A", "S"): 1.4, ("T", "T"): 2.4,
+    ("R", "T"): -1.3, ("I", "G"): -3.4, ("R", "P"): -1.3, ("K", "D"): -0.2,
+    ("I", "C"): -3.6, ("F", "R"): -4.9, ("F", "V"): -0.5, ("L", "C"): -3.8,
+    ("F", "F"): 8.3, ("D", "A"): -0.6, ("F", "N"): -3.5, ("W", "D"): -6.3,
+    ("L", "P"): -0.2, ("Q", "S"): -1.4, ("N", "C"): -1.6, ("N", "G"): -0.1,
+    ("H", "N"): 1.4, ("W", "T"): -2.6, ("Q", "G"): -1.6, ("W", "P"): -4.8,
+    ("Q", "C"): -3.2, ("N", "S"): 1.2, ("L", "H"): -2.2, ("L", "L"): 4.8,
+    ("G", "T"): -0.5, ("M", "M"): 4.8, ("G", "P"): -1.7, ("Y", "K"): -4.0,
+    ("Y", "G"): -4.9, ("Y", "C"): 2.6, ("E", "A"): -0.7, ("E", "E"): 5.2,
+    ("Y", "S"): -1.8, ("M", "P"): -1.8, ("V", "C"): -3.1, ("M", "T"): 0.6,
+    ("V", "G"): -2.3, ("R", "E"): -0.4, ("V", "K"): -3.8, ("K", "Q"): 2.5,
+    ("R", "A"): -1.7, ("I", "R"): -3.8, ("N", "A"): 0.0, ("V", "S"): -0.9,
+    ("M", "D"): -4.3, ("M", "H"): -3.4, ("K", "A"): -1.9, ("R", "Q"): 2.5,
+    ("K", "E"): 0.9, ("F", "S"): -1.8, ("I", "K"): -3.8, ("D", "P"): -2.8,
+    ("D", "T"): -1.2, ("I", "M"): 4.0, ("F", "C"): -0.1, ("W", "L"): -3.0,
+    ("F", "G"): -5.7, ("F", "K"): -6.3, ("F", "T"): -2.4, ("D", "D"): 5.2,
+    ("Q", "T"): -1.7, ("W", "G"): -1.7, ("Q", "P"): 0.1, ("W", "C"): 1.6,
+    ("W", "K"): -1.4, ("H", "Q"): 3.2, ("Q", "D"): 0.6, ("W", "W"): 14.7,
+    ("V", "L"): 1.9, ("L", "G"): -4.6, ("W", "S"): -2.9, ("L", "K"): -4.1,
+    ("N", "P"): -1.1, ("H", "E"): -0.2, ("N", "T"): 0.5, ("H", "A"): -2.1,
+    ("Y", "L"): -1.6, ("Y", "H"): 4.4, ("G", "S"): 0.8, ("Y", "D"): -2.3,
+    ("V", "Q"): -3.5, ("L", "T"): -0.4, ("G", "G"): 5.8, ("G", "C"): -1.3,
+    ("E", "N"): 1.1, ("Y", "T"): -3.4, ("Y", "P"): -3.8, ("R", "N"): -0.1,
+    ("V", "D"): -3.3, ("K", "R"): 4.3, ("V", "H"): -3.8, ("I", "Q"): -3.8,
+    ("V", "P"): -1.6, ("M", "C"): -3.7, ("K", "N"): 1.0, ("V", "T"): 0.6,
+    ("M", "G"): -3.7, ("T", "S"): 1.5, ("I", "E"): -4.1, ("M", "K"): -2.9,
+    ("I", "A"): 0.1, ("N", "N"): 3.6, ("R", "R"): 5.1, ("F", "P"): -3.2,
+    ("L", "I"): 2.4, ("I", "S"): -1.2, ("D", "S"): -0.4, ("L", "S"): -1.5,
+    ("I", "H"): -3.7, ("F", "D"): -5.7, ("D", "C"): -3.7, ("F", "H"): 0.1,
+    ("D", "G"): 0.8, ("F", "L"): 2.4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/benner22.cmp
+benner22 = {
+    ("W", "F"): 0.5, ("L", "R"): -2.9, ("I", "I"): 4.2, ("Q", "Q"): 4.2,
+    ("W", "N"): -5.2, ("V", "I"): 3.6, ("H", "T"): -1.1, ("H", "P"): -0.4,
+    ("W", "V"): -4.5, ("Q", "E"): 1.7, ("W", "R"): -1.1, ("Q", "A"): -0.9,
+    ("H", "H"): 6.1, ("H", "D"): 0.3, ("L", "N"): -3.5, ("Y", "M"): -1.8,
+    ("Y", "I"): -2.2, ("Y", "E"): -4.0, ("E", "S"): -0.5, ("Y", "A"): -3.5,
+    ("Y", "Y"): 9.0, ("T", "C"): -1.1, ("E", "C"): -4.3, ("Y", "Q"): -1.9,
+    ("E", "G"): 0.5, ("V", "A"): 0.4, ("C", "C"): 12.6, ("M", "R"): -2.1,
+    ("P", "T"): 0.4, ("V", "E"): -2.7, ("P", "P"): 7.0, ("I", "T"): 0.3,
+    ("K", "S"): -0.4, ("R", "G"): -0.7, ("I", "P"): -2.3, ("R", "C"): -1.6,
+    ("A", "T"): 1.4, ("K", "K"): 4.4, ("A", "P"): 0.8, ("V", "M"): 2.5,
+    ("I", "D"): -4.0, ("K", "C"): -3.3, ("K", "G"): -1.0, ("R", "S"): -0.5,
+    ("F", "Q"): -3.6, ("F", "A"): -3.1, ("V", "V"): 3.7, ("M", "N"): -2.6,
+    ("F", "E"): -5.7, ("D", "N"): 2.4, ("F", "I"): 0.5, ("F", "M"): 0.7,
+    ("M", "S"): -1.5, ("S", "S"): 2.0, ("L", "Q"): -2.0, ("W", "E"): -6.3,
+    ("W", "A"): -5.5, ("W", "M"): -2.8, ("H", "S"): -0.5, ("W", "I"): -4.4,
+    ("S", "C"): 0.3, ("L", "A"): -1.7, ("L", "E"): -4.4, ("W", "Q"): -3.3,
+    ("H", "G"): -2.0, ("Q", "N"): 0.5, ("H", "C"): -1.5, ("L", "M"): 3.2,
+    ("W", "Y"): 1.5, ("Y", "N"): -1.2, ("E", "P"): -1.7, ("Y", "F"): 5.9,
+    ("E", "T"): -0.9, ("A", "A"): 2.5, ("I", "N"): -2.7, ("G", "A"): 0.8,
+    ("Y", "V"): -2.6, ("E", "D"): 3.9, ("W", "H"): -2.7, ("Y", "R"): -2.7,
+    ("M", "Q"): -1.7, ("P", "S"): 1.1, ("R", "H"): 1.5, ("A", "C"): -1.2,
+    ("R", "D"): -1.0, ("K", "P"): -1.6, ("L", "D"): -4.9, ("K", "T"): -0.4,
+    ("V", "N"): -2.3, ("M", "A"): -0.8, ("K", "H"): 0.8, ("V", "R"): -2.9,
+    ("P", "C"): -3.1, ("M", "E"): -3.4, ("A", "S"): 1.3, ("T", "T"): 2.5,
+    ("R", "T"): -0.7, ("I", "G"): -3.8, ("R", "P"): -1.2, ("K", "D"): 0.2,
+    ("I", "C"): -2.4, ("F", "R"): -4.3, ("F", "V"): -0.1, ("L", "C"): -2.6,
+    ("F", "F"): 7.7, ("D", "A"): -0.2, ("F", "N"): -3.5, ("W", "D"): -6.4,
+    ("L", "P"): -1.3, ("Q", "S"): -0.6, ("N", "C"): -1.9, ("N", "G"): 0.4,
+    ("H", "N"): 1.4, ("W", "T"): -4.5, ("Q", "G"): -1.4, ("W", "P"): -5.8,
+    ("Q", "C"): -3.3, ("N", "S"): 1.1, ("L", "H"): -2.1, ("L", "L"): 4.6,
+    ("G", "T"): -0.7, ("M", "M"): 4.9, ("G", "P"): -1.8, ("Y", "K"): -3.6,
+    ("Y", "G"): -4.8, ("Y", "C"): 0.6, ("E", "A"): -0.3, ("E", "E"): 4.6,
+    ("Y", "S"): -1.9, ("M", "P"): -2.0, ("V", "C"): -1.7, ("M", "T"): 0.1,
+    ("V", "G"): -2.5, ("R", "E"): -0.1, ("V", "K"): -2.7, ("K", "Q"): 2.2,
+    ("R", "A"): -1.2, ("I", "R"): -3.2, ("N", "A"): 0.0, ("V", "S"): -0.9,
+    ("M", "D"): -3.9, ("M", "H"): -2.4, ("K", "A"): -1.0, ("R", "Q"): 2.2,
+    ("K", "E"): 1.0, ("F", "S"): -2.2, ("I", "K"): -3.0, ("D", "P"): -1.8,
+    ("D", "T"): -0.7, ("I", "M"): 3.1, ("F", "C"): -0.1, ("W", "L"): -1.8,
+    ("F", "G"): -5.8, ("F", "K"): -5.1, ("F", "T"): -2.6, ("D", "D"): 4.8,
+    ("Q", "T"): -0.7, ("W", "G"): -4.5, ("Q", "P"): -0.1, ("W", "C"): 0.5,
+    ("W", "K"): -3.7, ("H", "Q"): 2.4, ("Q", "D"): 0.6, ("W", "W"): 15.7,
+    ("V", "L"): 2.0, ("L", "G"): -4.9, ("W", "S"): -3.9, ("L", "K"): -3.3,
+    ("N", "P"): -1.1, ("H", "E"): -0.2, ("N", "T"): 0.5, ("H", "A"): -1.6,
+    ("Y", "L"): -0.7, ("Y", "H"): 3.7, ("G", "S"): 0.6, ("Y", "D"): -3.0,
+    ("V", "Q"): -2.4, ("L", "T"): -1.0, ("G", "G"): 6.2, ("G", "C"): -1.7,
+    ("E", "N"): 1.2, ("Y", "T"): -3.0, ("Y", "P"): -3.5, ("R", "N"): 0.4,
+    ("V", "D"): -3.0, ("K", "R"): 3.9, ("V", "H"): -3.0, ("I", "Q"): -2.7,
+    ("V", "P"): -1.7, ("M", "C"): -2.5, ("K", "N"): 1.0, ("V", "T"): 0.4,
+    ("M", "G"): -3.8, ("T", "S"): 1.5, ("I", "E"): -3.6, ("M", "K"): -2.0,
+    ("I", "A"): -0.4, ("N", "N"): 3.3, ("R", "R"): 5.0, ("F", "P"): -3.4,
+    ("L", "I"): 2.7, ("I", "S"): -1.4, ("D", "S"): 0.1, ("L", "S"): -2.1,
+    ("I", "H"): -3.2, ("F", "D"): -5.4, ("D", "C"): -3.7, ("F", "H"): 0.3,
+    ("D", "G"): 0.7, ("F", "L"): 2.2
+}
+
+
+assert benner6 != benner22
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/benner74.cmp
+benner74 = {
+    ("W", "F"): 3.0, ("L", "R"): -2.4, ("I", "I"): 4.0, ("Q", "Q"): 3.0,
+    ("W", "N"): -4.0, ("V", "I"): 3.2, ("H", "T"): -0.5, ("H", "P"): -1.0,
+    ("W", "V"): -2.9, ("Q", "E"): 1.7, ("W", "R"): -1.6, ("Q", "A"): -0.3,
+    ("H", "H"): 6.1, ("H", "D"): 0.4, ("L", "N"): -3.1, ("Y", "M"): -0.5,
+    ("Y", "I"): -1.0, ("Y", "E"): -3.0, ("E", "S"): 0.1, ("Y", "A"): -2.6,
+    ("Y", "Y"): 8.1, ("T", "C"): -0.6, ("E", "C"): -3.2, ("Y", "Q"): -1.8,
+    ("E", "G"): -0.5, ("V", "A"): 0.1, ("C", "C"): 11.8, ("M", "R"): -1.8,
+    ("P", "T"): 0.1, ("V", "E"): -2.1, ("P", "P"): 7.5, ("I", "T"): -0.3,
+    ("K", "S"): 0.0, ("R", "G"): -1.0, ("I", "P"): -2.6, ("R", "C"): -2.2,
+    ("A", "T"): 0.7, ("K", "K"): 3.4, ("A", "P"): 0.4, ("V", "M"): 1.8,
+    ("I", "D"): -3.9, ("K", "C"): -2.9, ("K", "G"): -1.1, ("R", "S"): -0.2,
+    ("F", "Q"): -2.8, ("F", "A"): -2.6, ("V", "V"): 3.4, ("M", "N"): -2.2,
+    ("F", "E"): -4.3, ("D", "N"): 2.2, ("F", "I"): 0.9, ("F", "M"): 1.3,
+    ("M", "S"): -1.4, ("S", "S"): 2.1, ("L", "Q"): -1.7, ("W", "E"): -4.7,
+    ("W", "A"): -4.1, ("W", "M"): -1.3, ("H", "S"): -0.3, ("W", "I"): -2.3,
+    ("S", "C"): 0.1, ("L", "A"): -1.4, ("L", "E"): -3.1, ("W", "Q"): -2.8,
+    ("H", "G"): -1.6, ("Q", "N"): 0.7, ("H", "C"): -1.3, ("L", "M"): 2.9,
+    ("W", "Y"): 3.6, ("Y", "N"): -1.4, ("E", "P"): -0.7, ("Y", "F"): 5.3,
+    ("E", "T"): -0.2, ("A", "A"): 2.4, ("I", "N"): -2.8, ("G", "A"): 0.6,
+    ("Y", "V"): -1.4, ("E", "D"): 2.9, ("W", "H"): -1.0, ("Y", "R"): -2.0,
+    ("M", "Q"): -1.0, ("P", "S"): 0.5, ("R", "H"): 1.0, ("A", "C"): 0.3,
+    ("R", "D"): -0.5, ("K", "P"): -0.8, ("L", "D"): -4.2, ("K", "T"): 0.1,
+    ("V", "N"): -2.2, ("M", "A"): -0.8, ("K", "H"): 0.6, ("V", "R"): -2.2,
+    ("P", "C"): -3.1, ("M", "E"): -2.2, ("A", "S"): 1.1, ("T", "T"): 2.5,
+    ("R", "T"): -0.3, ("I", "G"): -4.3, ("R", "P"): -0.1, ("K", "D"): 0.4,
+    ("I", "C"): -1.2, ("F", "R"): -3.5, ("F", "V"): 0.1, ("L", "C"): -1.6,
+    ("F", "F"): 7.2, ("D", "A"): -0.3, ("F", "N"): -3.2, ("W", "D"): -5.5,
+    ("L", "P"): -2.2, ("Q", "S"): 0.1, ("N", "C"): -1.8, ("N", "G"): 0.4,
+    ("H", "N"): 1.2, ("W", "T"): -3.7, ("Q", "G"): -1.1, ("W", "P"): -5.2,
+    ("Q", "C"): -2.6, ("N", "S"): 0.9, ("L", "H"): -1.9, ("L", "L"): 4.2,
+    ("G", "T"): -1.0, ("M", "M"): 4.5, ("G", "P"): -1.7, ("Y", "K"): -2.4,
+    ("Y", "G"): -4.3, ("Y", "C"): -0.4, ("E", "A"): -0.1, ("E", "E"): 3.7,
+    ("Y", "S"): -1.9, ("M", "P"): -2.4, ("V", "C"): -0.2, ("M", "T"): -0.4,
+    ("V", "G"): -3.1, ("R", "E"): 0.3, ("V", "K"): -1.9, ("K", "Q"): 1.7,
+    ("R", "A"): -0.8, ("I", "R"): -2.6, ("N", "A"): -0.2, ("V", "S"): -1.0,
+    ("M", "D"): -3.2, ("M", "H"): -1.5, ("K", "A"): -0.4, ("R", "Q"): 1.6,
+    ("K", "E"): 1.2, ("F", "S"): -2.6, ("I", "K"): -2.3, ("D", "P"): -1.0,
+    ("D", "T"): -0.2, ("I", "M"): 2.6, ("F", "C"): -0.7, ("W", "L"): -0.9,
+    ("F", "G"): -5.4, ("F", "K"): -3.6, ("F", "T"): -2.2, ("D", "D"): 4.8,
+    ("Q", "T"): -0.1, ("W", "G"): -4.1, ("Q", "P"): -0.2, ("W", "C"): -0.9,
+    ("W", "K"): -3.6, ("H", "Q"): 1.4, ("Q", "D"): 0.8, ("W", "W"): 14.7,
+    ("V", "L"): 1.9, ("L", "G"): -4.6, ("W", "S"): -3.4, ("L", "K"): -2.4,
+    ("N", "P"): -1.0, ("H", "E"): 0.2, ("N", "T"): 0.4, ("H", "A"): -1.0,
+    ("Y", "L"): -0.1, ("Y", "H"): 2.5, ("G", "S"): 0.4, ("Y", "D"): -2.8,
+    ("V", "Q"): -1.7, ("L", "T"): -1.1, ("G", "G"): 6.6, ("G", "C"): -2.0,
+    ("E", "N"): 1.0, ("Y", "T"): -2.1, ("Y", "P"): -3.4, ("R", "N"): 0.3,
+    ("V", "D"): -2.9, ("K", "R"): 2.9, ("V", "H"): -2.1, ("I", "Q"): -2.0,
+    ("V", "P"): -1.9, ("M", "C"): -1.2, ("K", "N"): 0.9, ("V", "T"): 0.2,
+    ("M", "G"): -3.5, ("T", "S"): 1.4, ("I", "E"): -2.9, ("M", "K"): -1.5,
+    ("I", "A"): -0.8, ("N", "N"): 3.6, ("R", "R"): 4.8, ("F", "P"): -3.8,
+    ("L", "I"): 2.8, ("I", "S"): -1.8, ("D", "S"): 0.4, ("L", "S"): -2.2,
+    ("I", "H"): -2.3, ("F", "D"): -4.7, ("D", "C"): -3.2, ("F", "H"): 0.0,
+    ("D", "G"): 0.2, ("F", "L"): 2.1
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum100.cmp
+blosum100 = {
+    ("W", "F"): 0, ("L", "R"): -4, ("S", "P"): -2, ("V", "T"): -1,
+    ("Q", "Q"): 7, ("N", "A"): -2, ("Z", "Y"): -4, ("W", "R"): -4,
+    ("Q", "A"): -1, ("S", "D"): -1, ("H", "H"): 9, ("S", "H"): -2,
+    ("H", "D"): -2, ("L", "N"): -5, ("W", "A"): -4, ("Y", "M"): -3,
+    ("G", "R"): -4, ("Y", "I"): -3, ("Y", "E"): -4, ("B", "Y"): -4,
+    ("Y", "A"): -4, ("V", "D"): -5, ("B", "S"): -1, ("Y", "Y"): 8,
+    ("G", "N"): -2, ("E", "C"): -6, ("Y", "Q"): -3, ("Z", "Z"): 4,
+    ("V", "A"): -1, ("C", "C"): 9, ("M", "R"): -2, ("V", "E"): -3,
+    ("T", "N"): -1, ("P", "P"): 8, ("V", "I"): 2, ("V", "S"): -3,
+    ("Z", "P"): -3, ("V", "M"): 0, ("T", "F"): -3, ("V", "Q"): -3,
+    ("K", "K"): 6, ("P", "D"): -3, ("I", "H"): -5, ("I", "D"): -6,
+    ("T", "R"): -2, ("P", "L"): -4, ("K", "G"): -3, ("M", "N"): -4,
+    ("P", "H"): -3, ("F", "Q"): -4, ("Z", "G"): -4, ("X", "L"): -2,
+    ("T", "M"): -2, ("Z", "C"): -6, ("X", "H"): -2, ("D", "R"): -3,
+    ("B", "W"): -6, ("X", "D"): -3, ("Z", "K"): 0, ("F", "A"): -4,
+    ("Z", "W"): -4, ("F", "E"): -5, ("D", "N"): 1, ("B", "K"): -1,
+    ("X", "X"): -2, ("F", "I"): -1, ("B", "G"): -2, ("X", "T"): -1,
+    ("F", "M"): -1, ("B", "C"): -5, ("Z", "I"): -4, ("Z", "V"): -3,
+    ("S", "S"): 6, ("L", "Q"): -3, ("W", "E"): -5, ("Q", "R"): 0,
+    ("N", "N"): 7, ("W", "M"): -3, ("Q", "C"): -5, ("W", "I"): -4,
+    ("S", "C"): -2, ("L", "A"): -3, ("S", "G"): -1, ("L", "E"): -5,
+    ("W", "Q"): -3, ("H", "G"): -4, ("S", "K"): -1, ("Q", "N"): -1,
+    ("N", "R"): -1, ("H", "C"): -5, ("Y", "N"): -3, ("G", "Q"): -3,
+    ("Y", "F"): 3, ("C", "A"): -1, ("V", "L"): 0, ("G", "E"): -4,
+    ("G", "A"): -1, ("K", "R"): 2, ("E", "D"): 1, ("Y", "R"): -3,
+    ("M", "Q"): -1, ("T", "I"): -2, ("C", "D"): -5, ("V", "F"): -2,
+    ("T", "A"): -1, ("T", "P"): -3, ("B", "P"): -3, ("T", "E"): -2,
+    ("V", "N"): -4, ("P", "G"): -4, ("M", "A"): -2, ("K", "H"): -2,
+    ("V", "R"): -4, ("P", "C"): -5, ("M", "E"): -4, ("K", "L"): -4,
+    ("V", "V"): 5, ("M", "I"): 1, ("T", "Q"): -2, ("I", "G"): -6,
+    ("P", "K"): -2, ("M", "M"): 8, ("K", "D"): -2, ("I", "C"): -2,
+    ("Z", "D"): 0, ("F", "R"): -4, ("X", "K"): -2, ("Q", "D"): -2,
+    ("X", "G"): -3, ("Z", "L"): -4, ("X", "C"): -3, ("Z", "H"): -1,
+    ("B", "L"): -5, ("B", "H"): -1, ("F", "F"): 7, ("X", "W"): -4,
+    ("B", "D"): 4, ("D", "A"): -3, ("S", "L"): -4, ("X", "S"): -1,
+    ("F", "N"): -5, ("S", "R"): -2, ("W", "D"): -7, ("V", "Y"): -3,
+    ("W", "L"): -4, ("H", "R"): -1, ("W", "H"): -3, ("H", "N"): 0,
+    ("W", "T"): -5, ("T", "T"): 6, ("S", "F"): -3, ("W", "P"): -6,
+    ("L", "D"): -6, ("B", "I"): -5, ("L", "H"): -4, ("S", "N"): 0,
+    ("B", "T"): -2, ("L", "L"): 5, ("Y", "K"): -4, ("E", "Q"): 1,
+    ("Y", "G"): -6, ("Z", "S"): -1, ("Y", "C"): -4, ("G", "D"): -3,
+    ("B", "V"): -5, ("E", "A"): -2, ("Y", "W"): 1, ("E", "E"): 6,
+    ("Y", "S"): -3, ("C", "N"): -4, ("V", "C"): -2, ("T", "H"): -3,
+    ("P", "R"): -3, ("V", "G"): -5, ("T", "L"): -3, ("V", "K"): -4,
+    ("K", "Q"): 1, ("R", "A"): -2, ("I", "R"): -4, ("T", "D"): -2,
+    ("P", "F"): -5, ("I", "N"): -5, ("K", "I"): -4, ("M", "D"): -5,
+    ("V", "W"): -4, ("W", "W"): 11, ("M", "H"): -3, ("P", "N"): -4,
+    ("K", "A"): -2, ("M", "L"): 2, ("K", "E"): 0, ("Z", "E"): 5,
+    ("X", "N"): -2, ("Z", "A"): -2, ("Z", "M"): -3, ("X", "F"): -3,
+    ("K", "C"): -5, ("B", "Q"): -1, ("X", "B"): -2, ("B", "M"): -4,
+    ("F", "C"): -3, ("Z", "Q"): 3, ("X", "Z"): -2, ("F", "G"): -5,
+    ("B", "E"): 0, ("X", "V"): -2, ("F", "K"): -4, ("B", "A"): -3,
+    ("X", "R"): -2, ("D", "D"): 7, ("W", "G"): -5, ("Z", "F"): -5,
+    ("S", "Q"): -1, ("W", "C"): -5, ("W", "K"): -5, ("H", "Q"): 0,
+    ("L", "C"): -3, ("W", "N"): -6, ("S", "A"): 1, ("L", "G"): -5,
+    ("W", "S"): -4, ("S", "E"): -1, ("H", "E"): -1, ("S", "I"): -4,
+    ("H", "A"): -3, ("S", "M"): -3, ("Y", "L"): -3, ("Y", "H"): 1,
+    ("Y", "D"): -5, ("E", "R"): -2, ("X", "P"): -3, ("G", "G"): 6,
+    ("G", "C"): -5, ("E", "N"): -1, ("Y", "T"): -3, ("Y", "P"): -5,
+    ("T", "K"): -2, ("A", "A"): 5, ("P", "Q"): -2, ("T", "C"): -2,
+    ("V", "H"): -5, ("T", "G"): -3, ("I", "Q"): -4, ("Z", "T"): -2,
+    ("C", "R"): -5, ("V", "P"): -4, ("P", "E"): -3, ("M", "C"): -3,
+    ("K", "N"): -1, ("I", "I"): 5, ("P", "A"): -1, ("M", "G"): -5,
+    ("T", "S"): 1, ("I", "E"): -5, ("P", "M"): -4, ("M", "K"): -2,
+    ("I", "A"): -3, ("P", "I"): -4, ("R", "R"): 7, ("X", "M"): -2,
+    ("L", "I"): 1, ("X", "I"): -2, ("Z", "B"): 1, ("X", "E"): -2,
+    ("Z", "N"): -1, ("X", "A"): -1, ("B", "R"): -2, ("B", "N"): 4,
+    ("F", "D"): -5, ("X", "Y"): -3, ("Z", "R"): -1, ("F", "H"): -2,
+    ("B", "F"): -5, ("F", "L"): 0, ("X", "Q"): -2, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum30.cmp
+blosum30 = {
+    ("W", "F"): 1, ("L", "R"): -2, ("S", "P"): -1, ("V", "T"): 1,
+    ("Q", "Q"): 8, ("N", "A"): 0, ("Z", "Y"): -2, ("W", "R"): 0,
+    ("Q", "A"): 1, ("S", "D"): 0, ("H", "H"): 14, ("S", "H"): -1,
+    ("H", "D"): -2, ("L", "N"): -2, ("W", "A"): -5, ("Y", "M"): -1,
+    ("G", "R"): -2, ("Y", "I"): -1, ("Y", "E"): -2, ("B", "Y"): -3,
+    ("Y", "A"): -4, ("V", "D"): -2, ("B", "S"): 0, ("Y", "Y"): 9,
+    ("G", "N"): 0, ("E", "C"): 1, ("Y", "Q"): -1, ("Z", "Z"): 4,
+    ("V", "A"): 1, ("C", "C"): 17, ("M", "R"): 0, ("V", "E"): -3,
+    ("T", "N"): 1, ("P", "P"): 11, ("V", "I"): 4, ("V", "S"): -1,
+    ("Z", "P"): 0, ("V", "M"): 0, ("T", "F"): -2, ("V", "Q"): -3,
+    ("K", "K"): 4, ("P", "D"): -1, ("I", "H"): -2, ("I", "D"): -4,
+    ("T", "R"): -3, ("P", "L"): -3, ("K", "G"): -1, ("M", "N"): 0,
+    ("P", "H"): 1, ("F", "Q"): -3, ("Z", "G"): -2, ("X", "L"): 0,
+    ("T", "M"): 0, ("Z", "C"): 0, ("X", "H"): -1, ("D", "R"): -1,
+    ("B", "W"): -5, ("X", "D"): -1, ("Z", "K"): 1, ("F", "A"): -2,
+    ("Z", "W"): -1, ("F", "E"): -4, ("D", "N"): 1, ("B", "K"): 0,
+    ("X", "X"): -1, ("F", "I"): 0, ("B", "G"): 0, ("X", "T"): 0,
+    ("F", "M"): -2, ("B", "C"): -2, ("Z", "I"): -3, ("Z", "V"): -3,
+    ("S", "S"): 4, ("L", "Q"): -2, ("W", "E"): -1, ("Q", "R"): 3,
+    ("N", "N"): 8, ("W", "M"): -3, ("Q", "C"): -2, ("W", "I"): -3,
+    ("S", "C"): -2, ("L", "A"): -1, ("S", "G"): 0, ("L", "E"): -1,
+    ("W", "Q"): -1, ("H", "G"): -3, ("S", "K"): 0, ("Q", "N"): -1,
+    ("N", "R"): -2, ("H", "C"): -5, ("Y", "N"): -4, ("G", "Q"): -2,
+    ("Y", "F"): 3, ("C", "A"): -3, ("V", "L"): 1, ("G", "E"): -2,
+    ("G", "A"): 0, ("K", "R"): 1, ("E", "D"): 1, ("Y", "R"): 0,
+    ("M", "Q"): -1, ("T", "I"): 0, ("C", "D"): -3, ("V", "F"): 1,
+    ("T", "A"): 1, ("T", "P"): 0, ("B", "P"): -2, ("T", "E"): -2,
+    ("V", "N"): -2, ("P", "G"): -1, ("M", "A"): 1, ("K", "H"): -2,
+    ("V", "R"): -1, ("P", "C"): -3, ("M", "E"): -1, ("K", "L"): -2,
+    ("V", "V"): 5, ("M", "I"): 1, ("T", "Q"): 0, ("I", "G"): -1,
+    ("P", "K"): 1, ("M", "M"): 6, ("K", "D"): 0, ("I", "C"): -2,
+    ("Z", "D"): 0, ("F", "R"): -1, ("X", "K"): 0, ("Q", "D"): -1,
+    ("X", "G"): -1, ("Z", "L"): -1, ("X", "C"): -2, ("Z", "H"): 0,
+    ("B", "L"): -1, ("B", "H"): -2, ("F", "F"): 10, ("X", "W"): -2,
+    ("B", "D"): 5, ("D", "A"): 0, ("S", "L"): -2, ("X", "S"): 0,
+    ("F", "N"): -1, ("S", "R"): -1, ("W", "D"): -4, ("V", "Y"): 1,
+    ("W", "L"): -2, ("H", "R"): -1, ("W", "H"): -5, ("H", "N"): -1,
+    ("W", "T"): -5, ("T", "T"): 5, ("S", "F"): -1, ("W", "P"): -3,
+    ("L", "D"): -1, ("B", "I"): -2, ("L", "H"): -1, ("S", "N"): 0,
+    ("B", "T"): 0, ("L", "L"): 4, ("Y", "K"): -1, ("E", "Q"): 2,
+    ("Y", "G"): -3, ("Z", "S"): -1, ("Y", "C"): -6, ("G", "D"): -1,
+    ("B", "V"): -2, ("E", "A"): 0, ("Y", "W"): 5, ("E", "E"): 6,
+    ("Y", "S"): -2, ("C", "N"): -1, ("V", "C"): -2, ("T", "H"): -2,
+    ("P", "R"): -1, ("V", "G"): -3, ("T", "L"): 0, ("V", "K"): -2,
+    ("K", "Q"): 0, ("R", "A"): -1, ("I", "R"): -3, ("T", "D"): -1,
+    ("P", "F"): -4, ("I", "N"): 0, ("K", "I"): -2, ("M", "D"): -3,
+    ("V", "W"): -3, ("W", "W"): 20, ("M", "H"): 2, ("P", "N"): -3,
+    ("K", "A"): 0, ("M", "L"): 2, ("K", "E"): 2, ("Z", "E"): 5,
+    ("X", "N"): 0, ("Z", "A"): 0, ("Z", "M"): -1, ("X", "F"): -1,
+    ("K", "C"): -3, ("B", "Q"): -1, ("X", "B"): -1, ("B", "M"): -2,
+    ("F", "C"): -3, ("Z", "Q"): 4, ("X", "Z"): 0, ("F", "G"): -3,
+    ("B", "E"): 0, ("X", "V"): 0, ("F", "K"): -1, ("B", "A"): 0,
+    ("X", "R"): -1, ("D", "D"): 9, ("W", "G"): 1, ("Z", "F"): -4,
+    ("S", "Q"): -1, ("W", "C"): -2, ("W", "K"): -2, ("H", "Q"): 0,
+    ("L", "C"): 0, ("W", "N"): -7, ("S", "A"): 1, ("L", "G"): -2,
+    ("W", "S"): -3, ("S", "E"): 0, ("H", "E"): 0, ("S", "I"): -1,
+    ("H", "A"): -2, ("S", "M"): -2, ("Y", "L"): 3, ("Y", "H"): 0,
+    ("Y", "D"): -1, ("E", "R"): -1, ("X", "P"): -1, ("G", "G"): 8,
+    ("G", "C"): -4, ("E", "N"): -1, ("Y", "T"): -1, ("Y", "P"): -2,
+    ("T", "K"): -1, ("A", "A"): 4, ("P", "Q"): 0, ("T", "C"): -2,
+    ("V", "H"): -3, ("T", "G"): -2, ("I", "Q"): -2, ("Z", "T"): -1,
+    ("C", "R"): -2, ("V", "P"): -4, ("P", "E"): 1, ("M", "C"): -2,
+    ("K", "N"): 0, ("I", "I"): 6, ("P", "A"): -1, ("M", "G"): -2,
+    ("T", "S"): 2, ("I", "E"): -3, ("P", "M"): -4, ("M", "K"): 2,
+    ("I", "A"): 0, ("P", "I"): -3, ("R", "R"): 8, ("X", "M"): 0,
+    ("L", "I"): 2, ("X", "I"): 0, ("Z", "B"): 0, ("X", "E"): -1,
+    ("Z", "N"): -1, ("X", "A"): 0, ("B", "R"): -2, ("B", "N"): 4,
+    ("F", "D"): -5, ("X", "Y"): -1, ("Z", "R"): 0, ("F", "H"): -3,
+    ("B", "F"): -3, ("F", "L"): 2, ("X", "Q"): 0, ("B", "B"): 5
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum35.cmp
+blosum35 = {
+    ("W", "F"): 1, ("L", "R"): -2, ("S", "P"): -2, ("V", "T"): 1,
+    ("Q", "Q"): 7, ("N", "A"): -1, ("Z", "Y"): -1, ("W", "R"): 0,
+    ("Q", "A"): 0, ("S", "D"): -1, ("H", "H"): 12, ("S", "H"): -1,
+    ("H", "D"): 0, ("L", "N"): -2, ("W", "A"): -2, ("Y", "M"): 0,
+    ("G", "R"): -2, ("Y", "I"): 0, ("Y", "E"): -1, ("B", "Y"): -2,
+    ("Y", "A"): -1, ("V", "D"): -2, ("B", "S"): 0, ("Y", "Y"): 8,
+    ("G", "N"): 1, ("E", "C"): -1, ("Y", "Q"): 0, ("Z", "Z"): 4,
+    ("V", "A"): 0, ("C", "C"): 15, ("M", "R"): 0, ("V", "E"): -2,
+    ("T", "N"): 0, ("P", "P"): 10, ("V", "I"): 4, ("V", "S"): -1,
+    ("Z", "P"): 0, ("V", "M"): 1, ("T", "F"): -1, ("V", "Q"): -3,
+    ("K", "K"): 5, ("P", "D"): -1, ("I", "H"): -3, ("I", "D"): -3,
+    ("T", "R"): -2, ("P", "L"): -3, ("K", "G"): -1, ("M", "N"): -1,
+    ("P", "H"): -1, ("F", "Q"): -4, ("Z", "G"): -2, ("X", "L"): 0,
+    ("T", "M"): 0, ("Z", "C"): -2, ("X", "H"): -1, ("D", "R"): -1,
+    ("B", "W"): -3, ("X", "D"): -1, ("Z", "K"): 1, ("F", "A"): -2,
+    ("Z", "W"): -1, ("F", "E"): -3, ("D", "N"): 1, ("B", "K"): 0,
+    ("X", "X"): -1, ("F", "I"): 1, ("B", "G"): 0, ("X", "T"): 0,
+    ("F", "M"): 0, ("B", "C"): -2, ("Z", "I"): -3, ("Z", "V"): -2,
+    ("S", "S"): 4, ("L", "Q"): -2, ("W", "E"): -1, ("Q", "R"): 2,
+    ("N", "N"): 7, ("W", "M"): 1, ("Q", "C"): -3, ("W", "I"): -1,
+    ("S", "C"): -3, ("L", "A"): -2, ("S", "G"): 1, ("L", "E"): -1,
+    ("W", "Q"): -1, ("H", "G"): -2, ("S", "K"): 0, ("Q", "N"): 1,
+    ("N", "R"): -1, ("H", "C"): -4, ("Y", "N"): -2, ("G", "Q"): -2,
+    ("Y", "F"): 3, ("C", "A"): -2, ("V", "L"): 2, ("G", "E"): -2,
+    ("G", "A"): 0, ("K", "R"): 2, ("E", "D"): 2, ("Y", "R"): 0,
+    ("M", "Q"): -1, ("T", "I"): -1, ("C", "D"): -3, ("V", "F"): 1,
+    ("T", "A"): 0, ("T", "P"): 0, ("B", "P"): -1, ("T", "E"): -1,
+    ("V", "N"): -2, ("P", "G"): -2, ("M", "A"): 0, ("K", "H"): -2,
+    ("V", "R"): -1, ("P", "C"): -4, ("M", "E"): -2, ("K", "L"): -2,
+    ("V", "V"): 5, ("M", "I"): 1, ("T", "Q"): 0, ("I", "G"): -3,
+    ("P", "K"): 0, ("M", "M"): 6, ("K", "D"): -1, ("I", "C"): -4,
+    ("Z", "D"): 1, ("F", "R"): -1, ("X", "K"): 0, ("Q", "D"): -1,
+    ("X", "G"): -1, ("Z", "L"): -2, ("X", "C"): -2, ("Z", "H"): -1,
+    ("B", "L"): -2, ("B", "H"): 0, ("F", "F"): 8, ("X", "W"): -1,
+    ("B", "D"): 5, ("D", "A"): -1, ("S", "L"): -2, ("X", "S"): 0,
+    ("F", "N"): -1, ("S", "R"): -1, ("W", "D"): -3, ("V", "Y"): 0,
+    ("W", "L"): 0, ("H", "R"): -1, ("W", "H"): -4, ("H", "N"): 1,
+    ("W", "T"): -2, ("T", "T"): 5, ("S", "F"): -1, ("W", "P"): -4,
+    ("L", "D"): -2, ("B", "I"): -2, ("L", "H"): -2, ("S", "N"): 0,
+    ("B", "T"): -1, ("L", "L"): 5, ("Y", "K"): -1, ("E", "Q"): 2,
+    ("Y", "G"): -2, ("Z", "S"): 0, ("Y", "C"): -5, ("G", "D"): -2,
+    ("B", "V"): -2, ("E", "A"): -1, ("Y", "W"): 3, ("E", "E"): 6,
+    ("Y", "S"): -1, ("C", "N"): -1, ("V", "C"): -2, ("T", "H"): -2,
+    ("P", "R"): -2, ("V", "G"): -3, ("T", "L"): 0, ("V", "K"): -2,
+    ("K", "Q"): 0, ("R", "A"): -1, ("I", "R"): -3, ("T", "D"): -1,
+    ("P", "F"): -4, ("I", "N"): -1, ("K", "I"): -2, ("M", "D"): -3,
+    ("V", "W"): -2, ("W", "W"): 16, ("M", "H"): 1, ("P", "N"): -2,
+    ("K", "A"): 0, ("M", "L"): 3, ("K", "E"): 1, ("Z", "E"): 5,
+    ("X", "N"): 0, ("Z", "A"): -1, ("Z", "M"): -2, ("X", "F"): -1,
+    ("K", "C"): -2, ("B", "Q"): 0, ("X", "B"): -1, ("B", "M"): -2,
+    ("F", "C"): -4, ("Z", "Q"): 4, ("X", "Z"): 0, ("F", "G"): -3,
+    ("B", "E"): 0, ("X", "V"): 0, ("F", "K"): -1, ("B", "A"): -1,
+    ("X", "R"): -1, ("D", "D"): 8, ("W", "G"): -1, ("Z", "F"): -3,
+    ("S", "Q"): 0, ("W", "C"): -5, ("W", "K"): 0, ("H", "Q"): -1,
+    ("L", "C"): -2, ("W", "N"): -2, ("S", "A"): 1, ("L", "G"): -3,
+    ("W", "S"): -2, ("S", "E"): 0, ("H", "E"): -1, ("S", "I"): -2,
+    ("H", "A"): -2, ("S", "M"): -1, ("Y", "L"): 0, ("Y", "H"): 0,
+    ("Y", "D"): -2, ("E", "R"): -1, ("X", "P"): -1, ("G", "G"): 7,
+    ("G", "C"): -3, ("E", "N"): -1, ("Y", "T"): -2, ("Y", "P"): -3,
+    ("T", "K"): 0, ("A", "A"): 5, ("P", "Q"): 0, ("T", "C"): -1,
+    ("V", "H"): -4, ("T", "G"): -2, ("I", "Q"): -2, ("Z", "T"): -1,
+    ("C", "R"): -3, ("V", "P"): -3, ("P", "E"): 0, ("M", "C"): -4,
+    ("K", "N"): 0, ("I", "I"): 5, ("P", "A"): -2, ("M", "G"): -1,
+    ("T", "S"): 2, ("I", "E"): -3, ("P", "M"): -3, ("M", "K"): 0,
+    ("I", "A"): -1, ("P", "I"): -1, ("R", "R"): 8, ("X", "M"): 0,
+    ("L", "I"): 2, ("X", "I"): 0, ("Z", "B"): 0, ("X", "E"): -1,
+    ("Z", "N"): 0, ("X", "A"): 0, ("B", "R"): -1, ("B", "N"): 4,
+    ("F", "D"): -3, ("X", "Y"): -1, ("Z", "R"): 0, ("F", "H"): -3,
+    ("B", "F"): -2, ("F", "L"): 2, ("X", "Q"): -1, ("B", "B"): 5
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum40.cmp
+blosum40 = {
+    ("W", "F"): 1, ("L", "R"): -2, ("S", "P"): -1, ("V", "T"): 1,
+    ("Q", "Q"): 8, ("N", "A"): -1, ("Z", "Y"): -2, ("W", "R"): -2,
+    ("Q", "A"): 0, ("S", "D"): 0, ("H", "H"): 13, ("S", "H"): -1,
+    ("H", "D"): 0, ("L", "N"): -3, ("W", "A"): -3, ("Y", "M"): 1,
+    ("G", "R"): -3, ("Y", "I"): 0, ("Y", "E"): -2, ("B", "Y"): -3,
+    ("Y", "A"): -2, ("V", "D"): -3, ("B", "S"): 0, ("Y", "Y"): 9,
+    ("G", "N"): 0, ("E", "C"): -2, ("Y", "Q"): -1, ("Z", "Z"): 5,
+    ("V", "A"): 0, ("C", "C"): 16, ("M", "R"): -1, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 11, ("V", "I"): 4, ("V", "S"): -1,
+    ("Z", "P"): -1, ("V", "M"): 1, ("T", "F"): -1, ("V", "Q"): -3,
+    ("K", "K"): 6, ("P", "D"): -2, ("I", "H"): -3, ("I", "D"): -4,
+    ("T", "R"): -2, ("P", "L"): -4, ("K", "G"): -2, ("M", "N"): -2,
+    ("P", "H"): -2, ("F", "Q"): -4, ("Z", "G"): -2, ("X", "L"): -1,
+    ("T", "M"): -1, ("Z", "C"): -3, ("X", "H"): -1, ("D", "R"): -1,
+    ("B", "W"): -4, ("X", "D"): -1, ("Z", "K"): 1, ("F", "A"): -3,
+    ("Z", "W"): -2, ("F", "E"): -3, ("D", "N"): 2, ("B", "K"): 0,
+    ("X", "X"): -1, ("F", "I"): 1, ("B", "G"): -1, ("X", "T"): 0,
+    ("F", "M"): 0, ("B", "C"): -2, ("Z", "I"): -4, ("Z", "V"): -3,
+    ("S", "S"): 5, ("L", "Q"): -2, ("W", "E"): -2, ("Q", "R"): 2,
+    ("N", "N"): 8, ("W", "M"): -2, ("Q", "C"): -4, ("W", "I"): -3,
+    ("S", "C"): -1, ("L", "A"): -2, ("S", "G"): 0, ("L", "E"): -2,
+    ("W", "Q"): -1, ("H", "G"): -2, ("S", "K"): 0, ("Q", "N"): 1,
+    ("N", "R"): 0, ("H", "C"): -4, ("Y", "N"): -2, ("G", "Q"): -2,
+    ("Y", "F"): 4, ("C", "A"): -2, ("V", "L"): 2, ("G", "E"): -3,
+    ("G", "A"): 1, ("K", "R"): 3, ("E", "D"): 2, ("Y", "R"): -1,
+    ("M", "Q"): -1, ("T", "I"): -1, ("C", "D"): -2, ("V", "F"): 0,
+    ("T", "A"): 0, ("T", "P"): 0, ("B", "P"): -2, ("T", "E"): -1,
+    ("V", "N"): -3, ("P", "G"): -1, ("M", "A"): -1, ("K", "H"): -1,
+    ("V", "R"): -2, ("P", "C"): -5, ("M", "E"): -2, ("K", "L"): -2,
+    ("V", "V"): 5, ("M", "I"): 1, ("T", "Q"): -1, ("I", "G"): -4,
+    ("P", "K"): -1, ("M", "M"): 7, ("K", "D"): 0, ("I", "C"): -4,
+    ("Z", "D"): 1, ("F", "R"): -2, ("X", "K"): -1, ("Q", "D"): -1,
+    ("X", "G"): -1, ("Z", "L"): -2, ("X", "C"): -2, ("Z", "H"): 0,
+    ("B", "L"): -3, ("B", "H"): 0, ("F", "F"): 9, ("X", "W"): -2,
+    ("B", "D"): 6, ("D", "A"): -1, ("S", "L"): -3, ("X", "S"): 0,
+    ("F", "N"): -3, ("S", "R"): -1, ("W", "D"): -5, ("V", "Y"): -1,
+    ("W", "L"): -1, ("H", "R"): 0, ("W", "H"): -5, ("H", "N"): 1,
+    ("W", "T"): -4, ("T", "T"): 6, ("S", "F"): -2, ("W", "P"): -4,
+    ("L", "D"): -3, ("B", "I"): -3, ("L", "H"): -2, ("S", "N"): 1,
+    ("B", "T"): 0, ("L", "L"): 6, ("Y", "K"): -1, ("E", "Q"): 2,
+    ("Y", "G"): -3, ("Z", "S"): 0, ("Y", "C"): -4, ("G", "D"): -2,
+    ("B", "V"): -3, ("E", "A"): -1, ("Y", "W"): 3, ("E", "E"): 7,
+    ("Y", "S"): -2, ("C", "N"): -2, ("V", "C"): -2, ("T", "H"): -2,
+    ("P", "R"): -3, ("V", "G"): -4, ("T", "L"): -1, ("V", "K"): -2,
+    ("K", "Q"): 1, ("R", "A"): -2, ("I", "R"): -3, ("T", "D"): -1,
+    ("P", "F"): -4, ("I", "N"): -2, ("K", "I"): -3, ("M", "D"): -3,
+    ("V", "W"): -3, ("W", "W"): 19, ("M", "H"): 1, ("P", "N"): -2,
+    ("K", "A"): -1, ("M", "L"): 3, ("K", "E"): 1, ("Z", "E"): 5,
+    ("X", "N"): -1, ("Z", "A"): -1, ("Z", "M"): -2, ("X", "F"): -1,
+    ("K", "C"): -3, ("B", "Q"): 0, ("X", "B"): -1, ("B", "M"): -3,
+    ("F", "C"): -2, ("Z", "Q"): 4, ("X", "Z"): -1, ("F", "G"): -3,
+    ("B", "E"): 1, ("X", "V"): -1, ("F", "K"): -3, ("B", "A"): -1,
+    ("X", "R"): -1, ("D", "D"): 9, ("W", "G"): -2, ("Z", "F"): -4,
+    ("S", "Q"): 1, ("W", "C"): -6, ("W", "K"): -2, ("H", "Q"): 0,
+    ("L", "C"): -2, ("W", "N"): -4, ("S", "A"): 1, ("L", "G"): -4,
+    ("W", "S"): -5, ("S", "E"): 0, ("H", "E"): 0, ("S", "I"): -2,
+    ("H", "A"): -2, ("S", "M"): -2, ("Y", "L"): 0, ("Y", "H"): 2,
+    ("Y", "D"): -3, ("E", "R"): -1, ("X", "P"): -2, ("G", "G"): 8,
+    ("G", "C"): -3, ("E", "N"): -1, ("Y", "T"): -1, ("Y", "P"): -3,
+    ("T", "K"): 0, ("A", "A"): 5, ("P", "Q"): -2, ("T", "C"): -1,
+    ("V", "H"): -4, ("T", "G"): -2, ("I", "Q"): -3, ("Z", "T"): -1,
+    ("C", "R"): -3, ("V", "P"): -3, ("P", "E"): 0, ("M", "C"): -3,
+    ("K", "N"): 0, ("I", "I"): 6, ("P", "A"): -2, ("M", "G"): -2,
+    ("T", "S"): 2, ("I", "E"): -4, ("P", "M"): -2, ("M", "K"): -1,
+    ("I", "A"): -1, ("P", "I"): -2, ("R", "R"): 9, ("X", "M"): 0,
+    ("L", "I"): 2, ("X", "I"): -1, ("Z", "B"): 2, ("X", "E"): -1,
+    ("Z", "N"): 0, ("X", "A"): 0, ("B", "R"): -1, ("B", "N"): 4,
+    ("F", "D"): -4, ("X", "Y"): -1, ("Z", "R"): 0, ("F", "H"): -2,
+    ("B", "F"): -3, ("F", "L"): 2, ("X", "Q"): -1, ("B", "B"): 5
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum45.cmp
+blosum45 = {
+    ("W", "F"): 1, ("L", "R"): -2, ("S", "P"): -1, ("V", "T"): 0,
+    ("Q", "Q"): 6, ("N", "A"): -1, ("Z", "Y"): -2, ("W", "R"): -2,
+    ("Q", "A"): -1, ("S", "D"): 0, ("H", "H"): 10, ("S", "H"): -1,
+    ("H", "D"): 0, ("L", "N"): -3, ("W", "A"): -2, ("Y", "M"): 0,
+    ("G", "R"): -2, ("Y", "I"): 0, ("Y", "E"): -2, ("B", "Y"): -2,
+    ("Y", "A"): -2, ("V", "D"): -3, ("B", "S"): 0, ("Y", "Y"): 8,
+    ("G", "N"): 0, ("E", "C"): -3, ("Y", "Q"): -1, ("Z", "Z"): 4,
+    ("V", "A"): 0, ("C", "C"): 12, ("M", "R"): -1, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 9, ("V", "I"): 3, ("V", "S"): -1,
+    ("Z", "P"): -1, ("V", "M"): 1, ("T", "F"): -1, ("V", "Q"): -3,
+    ("K", "K"): 5, ("P", "D"): -1, ("I", "H"): -3, ("I", "D"): -4,
+    ("T", "R"): -1, ("P", "L"): -3, ("K", "G"): -2, ("M", "N"): -2,
+    ("P", "H"): -2, ("F", "Q"): -4, ("Z", "G"): -2, ("X", "L"): -1,
+    ("T", "M"): -1, ("Z", "C"): -3, ("X", "H"): -1, ("D", "R"): -1,
+    ("B", "W"): -4, ("X", "D"): -1, ("Z", "K"): 1, ("F", "A"): -2,
+    ("Z", "W"): -2, ("F", "E"): -3, ("D", "N"): 2, ("B", "K"): 0,
+    ("X", "X"): -1, ("F", "I"): 0, ("B", "G"): -1, ("X", "T"): 0,
+    ("F", "M"): 0, ("B", "C"): -2, ("Z", "I"): -3, ("Z", "V"): -3,
+    ("S", "S"): 4, ("L", "Q"): -2, ("W", "E"): -3, ("Q", "R"): 1,
+    ("N", "N"): 6, ("W", "M"): -2, ("Q", "C"): -3, ("W", "I"): -2,
+    ("S", "C"): -1, ("L", "A"): -1, ("S", "G"): 0, ("L", "E"): -2,
+    ("W", "Q"): -2, ("H", "G"): -2, ("S", "K"): -1, ("Q", "N"): 0,
+    ("N", "R"): 0, ("H", "C"): -3, ("Y", "N"): -2, ("G", "Q"): -2,
+    ("Y", "F"): 3, ("C", "A"): -1, ("V", "L"): 1, ("G", "E"): -2,
+    ("G", "A"): 0, ("K", "R"): 3, ("E", "D"): 2, ("Y", "R"): -1,
+    ("M", "Q"): 0, ("T", "I"): -1, ("C", "D"): -3, ("V", "F"): 0,
+    ("T", "A"): 0, ("T", "P"): -1, ("B", "P"): -2, ("T", "E"): -1,
+    ("V", "N"): -3, ("P", "G"): -2, ("M", "A"): -1, ("K", "H"): -1,
+    ("V", "R"): -2, ("P", "C"): -4, ("M", "E"): -2, ("K", "L"): -3,
+    ("V", "V"): 5, ("M", "I"): 2, ("T", "Q"): -1, ("I", "G"): -4,
+    ("P", "K"): -1, ("M", "M"): 6, ("K", "D"): 0, ("I", "C"): -3,
+    ("Z", "D"): 1, ("F", "R"): -2, ("X", "K"): -1, ("Q", "D"): 0,
+    ("X", "G"): -1, ("Z", "L"): -2, ("X", "C"): -2, ("Z", "H"): 0,
+    ("B", "L"): -3, ("B", "H"): 0, ("F", "F"): 8, ("X", "W"): -2,
+    ("B", "D"): 5, ("D", "A"): -2, ("S", "L"): -3, ("X", "S"): 0,
+    ("F", "N"): -2, ("S", "R"): -1, ("W", "D"): -4, ("V", "Y"): -1,
+    ("W", "L"): -2, ("H", "R"): 0, ("W", "H"): -3, ("H", "N"): 1,
+    ("W", "T"): -3, ("T", "T"): 5, ("S", "F"): -2, ("W", "P"): -3,
+    ("L", "D"): -3, ("B", "I"): -3, ("L", "H"): -2, ("S", "N"): 1,
+    ("B", "T"): 0, ("L", "L"): 5, ("Y", "K"): -1, ("E", "Q"): 2,
+    ("Y", "G"): -3, ("Z", "S"): 0, ("Y", "C"): -3, ("G", "D"): -1,
+    ("B", "V"): -3, ("E", "A"): -1, ("Y", "W"): 3, ("E", "E"): 6,
+    ("Y", "S"): -2, ("C", "N"): -2, ("V", "C"): -1, ("T", "H"): -2,
+    ("P", "R"): -2, ("V", "G"): -3, ("T", "L"): -1, ("V", "K"): -2,
+    ("K", "Q"): 1, ("R", "A"): -2, ("I", "R"): -3, ("T", "D"): -1,
+    ("P", "F"): -3, ("I", "N"): -2, ("K", "I"): -3, ("M", "D"): -3,
+    ("V", "W"): -3, ("W", "W"): 15, ("M", "H"): 0, ("P", "N"): -2,
+    ("K", "A"): -1, ("M", "L"): 2, ("K", "E"): 1, ("Z", "E"): 4,
+    ("X", "N"): -1, ("Z", "A"): -1, ("Z", "M"): -1, ("X", "F"): -1,
+    ("K", "C"): -3, ("B", "Q"): 0, ("X", "B"): -1, ("B", "M"): -2,
+    ("F", "C"): -2, ("Z", "Q"): 4, ("X", "Z"): -1, ("F", "G"): -3,
+    ("B", "E"): 1, ("X", "V"): -1, ("F", "K"): -3, ("B", "A"): -1,
+    ("X", "R"): -1, ("D", "D"): 7, ("W", "G"): -2, ("Z", "F"): -3,
+    ("S", "Q"): 0, ("W", "C"): -5, ("W", "K"): -2, ("H", "Q"): 1,
+    ("L", "C"): -2, ("W", "N"): -4, ("S", "A"): 1, ("L", "G"): -3,
+    ("W", "S"): -4, ("S", "E"): 0, ("H", "E"): 0, ("S", "I"): -2,
+    ("H", "A"): -2, ("S", "M"): -2, ("Y", "L"): 0, ("Y", "H"): 2,
+    ("Y", "D"): -2, ("E", "R"): 0, ("X", "P"): -1, ("G", "G"): 7,
+    ("G", "C"): -3, ("E", "N"): 0, ("Y", "T"): -1, ("Y", "P"): -3,
+    ("T", "K"): -1, ("A", "A"): 5, ("P", "Q"): -1, ("T", "C"): -1,
+    ("V", "H"): -3, ("T", "G"): -2, ("I", "Q"): -2, ("Z", "T"): -1,
+    ("C", "R"): -3, ("V", "P"): -3, ("P", "E"): 0, ("M", "C"): -2,
+    ("K", "N"): 0, ("I", "I"): 5, ("P", "A"): -1, ("M", "G"): -2,
+    ("T", "S"): 2, ("I", "E"): -3, ("P", "M"): -2, ("M", "K"): -1,
+    ("I", "A"): -1, ("P", "I"): -2, ("R", "R"): 7, ("X", "M"): -1,
+    ("L", "I"): 2, ("X", "I"): -1, ("Z", "B"): 2, ("X", "E"): -1,
+    ("Z", "N"): 0, ("X", "A"): 0, ("B", "R"): -1, ("B", "N"): 4,
+    ("F", "D"): -4, ("X", "Y"): -1, ("Z", "R"): 0, ("F", "H"): -2,
+    ("B", "F"): -3, ("F", "L"): 1, ("X", "Q"): -1, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum50.cmp
+blosum50 = {
+    ("W", "F"): 1, ("L", "R"): -3, ("S", "P"): -1, ("V", "T"): 0,
+    ("Q", "Q"): 7, ("N", "A"): -1, ("Z", "Y"): -2, ("W", "R"): -3,
+    ("Q", "A"): -1, ("S", "D"): 0, ("H", "H"): 10, ("S", "H"): -1,
+    ("H", "D"): -1, ("L", "N"): -4, ("W", "A"): -3, ("Y", "M"): 0,
+    ("G", "R"): -3, ("Y", "I"): -1, ("Y", "E"): -2, ("B", "Y"): -3,
+    ("Y", "A"): -2, ("V", "D"): -4, ("B", "S"): 0, ("Y", "Y"): 8,
+    ("G", "N"): 0, ("E", "C"): -3, ("Y", "Q"): -1, ("Z", "Z"): 5,
+    ("V", "A"): 0, ("C", "C"): 13, ("M", "R"): -2, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 10, ("V", "I"): 4, ("V", "S"): -2,
+    ("Z", "P"): -1, ("V", "M"): 1, ("T", "F"): -2, ("V", "Q"): -3,
+    ("K", "K"): 6, ("P", "D"): -1, ("I", "H"): -4, ("I", "D"): -4,
+    ("T", "R"): -1, ("P", "L"): -4, ("K", "G"): -2, ("M", "N"): -2,
+    ("P", "H"): -2, ("F", "Q"): -4, ("Z", "G"): -2, ("X", "L"): -1,
+    ("T", "M"): -1, ("Z", "C"): -3, ("X", "H"): -1, ("D", "R"): -2,
+    ("B", "W"): -5, ("X", "D"): -1, ("Z", "K"): 1, ("F", "A"): -3,
+    ("Z", "W"): -2, ("F", "E"): -3, ("D", "N"): 2, ("B", "K"): 0,
+    ("X", "X"): -1, ("F", "I"): 0, ("B", "G"): -1, ("X", "T"): 0,
+    ("F", "M"): 0, ("B", "C"): -3, ("Z", "I"): -3, ("Z", "V"): -3,
+    ("S", "S"): 5, ("L", "Q"): -2, ("W", "E"): -3, ("Q", "R"): 1,
+    ("N", "N"): 7, ("W", "M"): -1, ("Q", "C"): -3, ("W", "I"): -3,
+    ("S", "C"): -1, ("L", "A"): -2, ("S", "G"): 0, ("L", "E"): -3,
+    ("W", "Q"): -1, ("H", "G"): -2, ("S", "K"): 0, ("Q", "N"): 0,
+    ("N", "R"): -1, ("H", "C"): -3, ("Y", "N"): -2, ("G", "Q"): -2,
+    ("Y", "F"): 4, ("C", "A"): -1, ("V", "L"): 1, ("G", "E"): -3,
+    ("G", "A"): 0, ("K", "R"): 3, ("E", "D"): 2, ("Y", "R"): -1,
+    ("M", "Q"): 0, ("T", "I"): -1, ("C", "D"): -4, ("V", "F"): -1,
+    ("T", "A"): 0, ("T", "P"): -1, ("B", "P"): -2, ("T", "E"): -1,
+    ("V", "N"): -3, ("P", "G"): -2, ("M", "A"): -1, ("K", "H"): 0,
+    ("V", "R"): -3, ("P", "C"): -4, ("M", "E"): -2, ("K", "L"): -3,
+    ("V", "V"): 5, ("M", "I"): 2, ("T", "Q"): -1, ("I", "G"): -4,
+    ("P", "K"): -1, ("M", "M"): 7, ("K", "D"): -1, ("I", "C"): -2,
+    ("Z", "D"): 1, ("F", "R"): -3, ("X", "K"): -1, ("Q", "D"): 0,
+    ("X", "G"): -2, ("Z", "L"): -3, ("X", "C"): -2, ("Z", "H"): 0,
+    ("B", "L"): -4, ("B", "H"): 0, ("F", "F"): 8, ("X", "W"): -3,
+    ("B", "D"): 5, ("D", "A"): -2, ("S", "L"): -3, ("X", "S"): -1,
+    ("F", "N"): -4, ("S", "R"): -1, ("W", "D"): -5, ("V", "Y"): -1,
+    ("W", "L"): -2, ("H", "R"): 0, ("W", "H"): -3, ("H", "N"): 1,
+    ("W", "T"): -3, ("T", "T"): 5, ("S", "F"): -3, ("W", "P"): -4,
+    ("L", "D"): -4, ("B", "I"): -4, ("L", "H"): -3, ("S", "N"): 1,
+    ("B", "T"): 0, ("L", "L"): 5, ("Y", "K"): -2, ("E", "Q"): 2,
+    ("Y", "G"): -3, ("Z", "S"): 0, ("Y", "C"): -3, ("G", "D"): -1,
+    ("B", "V"): -4, ("E", "A"): -1, ("Y", "W"): 2, ("E", "E"): 6,
+    ("Y", "S"): -2, ("C", "N"): -2, ("V", "C"): -1, ("T", "H"): -2,
+    ("P", "R"): -3, ("V", "G"): -4, ("T", "L"): -1, ("V", "K"): -3,
+    ("K", "Q"): 2, ("R", "A"): -2, ("I", "R"): -4, ("T", "D"): -1,
+    ("P", "F"): -4, ("I", "N"): -3, ("K", "I"): -3, ("M", "D"): -4,
+    ("V", "W"): -3, ("W", "W"): 15, ("M", "H"): -1, ("P", "N"): -2,
+    ("K", "A"): -1, ("M", "L"): 3, ("K", "E"): 1, ("Z", "E"): 5,
+    ("X", "N"): -1, ("Z", "A"): -1, ("Z", "M"): -1, ("X", "F"): -2,
+    ("K", "C"): -3, ("B", "Q"): 0, ("X", "B"): -1, ("B", "M"): -3,
+    ("F", "C"): -2, ("Z", "Q"): 4, ("X", "Z"): -1, ("F", "G"): -4,
+    ("B", "E"): 1, ("X", "V"): -1, ("F", "K"): -4, ("B", "A"): -2,
+    ("X", "R"): -1, ("D", "D"): 8, ("W", "G"): -3, ("Z", "F"): -4,
+    ("S", "Q"): 0, ("W", "C"): -5, ("W", "K"): -3, ("H", "Q"): 1,
+    ("L", "C"): -2, ("W", "N"): -4, ("S", "A"): 1, ("L", "G"): -4,
+    ("W", "S"): -4, ("S", "E"): -1, ("H", "E"): 0, ("S", "I"): -3,
+    ("H", "A"): -2, ("S", "M"): -2, ("Y", "L"): -1, ("Y", "H"): 2,
+    ("Y", "D"): -3, ("E", "R"): 0, ("X", "P"): -2, ("G", "G"): 8,
+    ("G", "C"): -3, ("E", "N"): 0, ("Y", "T"): -2, ("Y", "P"): -3,
+    ("T", "K"): -1, ("A", "A"): 5, ("P", "Q"): -1, ("T", "C"): -1,
+    ("V", "H"): -4, ("T", "G"): -2, ("I", "Q"): -3, ("Z", "T"): -1,
+    ("C", "R"): -4, ("V", "P"): -3, ("P", "E"): -1, ("M", "C"): -2,
+    ("K", "N"): 0, ("I", "I"): 5, ("P", "A"): -1, ("M", "G"): -3,
+    ("T", "S"): 2, ("I", "E"): -4, ("P", "M"): -3, ("M", "K"): -2,
+    ("I", "A"): -1, ("P", "I"): -3, ("R", "R"): 7, ("X", "M"): -1,
+    ("L", "I"): 2, ("X", "I"): -1, ("Z", "B"): 2, ("X", "E"): -1,
+    ("Z", "N"): 0, ("X", "A"): -1, ("B", "R"): -1, ("B", "N"): 4,
+    ("F", "D"): -5, ("X", "Y"): -1, ("Z", "R"): 0, ("F", "H"): -1,
+    ("B", "F"): -4, ("F", "L"): 1, ("X", "Q"): -1, ("B", "B"): 5
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum55.cmp
+blosum55 = {
+    ("W", "F"): 1, ("L", "R"): -3, ("S", "P"): -1, ("V", "T"): 0,
+    ("Q", "Q"): 7, ("N", "A"): -1, ("Z", "Y"): -2, ("W", "R"): -3,
+    ("Q", "A"): -1, ("S", "D"): 0, ("H", "H"): 10, ("S", "H"): -1,
+    ("H", "D"): -1, ("L", "N"): -4, ("W", "A"): -3, ("Y", "M"): 0,
+    ("G", "R"): -3, ("Y", "I"): -1, ("Y", "E"): -2, ("B", "Y"): -3,
+    ("Y", "A"): -2, ("V", "D"): -4, ("B", "S"): 0, ("Y", "Y"): 8,
+    ("G", "N"): 0, ("E", "C"): -3, ("Y", "Q"): -1, ("Z", "Z"): 5,
+    ("V", "A"): 0, ("C", "C"): 13, ("M", "R"): -2, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 10, ("V", "I"): 4, ("V", "S"): -2,
+    ("Z", "P"): -1, ("V", "M"): 1, ("T", "F"): -2, ("V", "Q"): -3,
+    ("K", "K"): 6, ("P", "D"): -1, ("I", "H"): -4, ("I", "D"): -4,
+    ("T", "R"): -1, ("P", "L"): -4, ("K", "G"): -2, ("M", "N"): -2,
+    ("P", "H"): -2, ("F", "Q"): -4, ("Z", "G"): -2, ("X", "L"): -1,
+    ("T", "M"): -1, ("Z", "C"): -3, ("X", "H"): -1, ("D", "R"): -2,
+    ("B", "W"): -5, ("X", "D"): -1, ("Z", "K"): 1, ("F", "A"): -3,
+    ("Z", "W"): -2, ("F", "E"): -3, ("D", "N"): 2, ("B", "K"): 0,
+    ("X", "X"): -1, ("F", "I"): 0, ("B", "G"): -1, ("X", "T"): 0,
+    ("F", "M"): 0, ("B", "C"): -3, ("Z", "I"): -3, ("Z", "V"): -3,
+    ("S", "S"): 5, ("L", "Q"): -2, ("W", "E"): -3, ("Q", "R"): 1,
+    ("N", "N"): 7, ("W", "M"): -1, ("Q", "C"): -3, ("W", "I"): -3,
+    ("S", "C"): -1, ("L", "A"): -2, ("S", "G"): 0, ("L", "E"): -3,
+    ("W", "Q"): -1, ("H", "G"): -2, ("S", "K"): 0, ("Q", "N"): 0,
+    ("N", "R"): -1, ("H", "C"): -3, ("Y", "N"): -2, ("G", "Q"): -2,
+    ("Y", "F"): 4, ("C", "A"): -1, ("V", "L"): 1, ("G", "E"): -3,
+    ("G", "A"): 0, ("K", "R"): 3, ("E", "D"): 2, ("Y", "R"): -1,
+    ("M", "Q"): 0, ("T", "I"): -1, ("C", "D"): -4, ("V", "F"): -1,
+    ("T", "A"): 0, ("T", "P"): -1, ("B", "P"): -2, ("T", "E"): -1,
+    ("V", "N"): -3, ("P", "G"): -2, ("M", "A"): -1, ("K", "H"): 0,
+    ("V", "R"): -3, ("P", "C"): -4, ("M", "E"): -2, ("K", "L"): -3,
+    ("V", "V"): 5, ("M", "I"): 2, ("T", "Q"): -1, ("I", "G"): -4,
+    ("P", "K"): -1, ("M", "M"): 7, ("K", "D"): -1, ("I", "C"): -2,
+    ("Z", "D"): 1, ("F", "R"): -3, ("X", "K"): -1, ("Q", "D"): 0,
+    ("X", "G"): -2, ("Z", "L"): -3, ("X", "C"): -2, ("Z", "H"): 0,
+    ("B", "L"): -4, ("B", "H"): 0, ("F", "F"): 8, ("X", "W"): -3,
+    ("B", "D"): 5, ("D", "A"): -2, ("S", "L"): -3, ("X", "S"): -1,
+    ("F", "N"): -4, ("S", "R"): -1, ("W", "D"): -5, ("V", "Y"): -1,
+    ("W", "L"): -2, ("H", "R"): 0, ("W", "H"): -3, ("H", "N"): 1,
+    ("W", "T"): -3, ("T", "T"): 5, ("S", "F"): -3, ("W", "P"): -4,
+    ("L", "D"): -4, ("B", "I"): -4, ("L", "H"): -3, ("S", "N"): 1,
+    ("B", "T"): 0, ("L", "L"): 5, ("Y", "K"): -2, ("E", "Q"): 2,
+    ("Y", "G"): -3, ("Z", "S"): 0, ("Y", "C"): -3, ("G", "D"): -1,
+    ("B", "V"): -4, ("E", "A"): -1, ("Y", "W"): 2, ("E", "E"): 6,
+    ("Y", "S"): -2, ("C", "N"): -2, ("V", "C"): -1, ("T", "H"): -2,
+    ("P", "R"): -3, ("V", "G"): -4, ("T", "L"): -1, ("V", "K"): -3,
+    ("K", "Q"): 2, ("R", "A"): -2, ("I", "R"): -4, ("T", "D"): -1,
+    ("P", "F"): -4, ("I", "N"): -3, ("K", "I"): -3, ("M", "D"): -4,
+    ("V", "W"): -3, ("W", "W"): 15, ("M", "H"): -1, ("P", "N"): -2,
+    ("K", "A"): -1, ("M", "L"): 3, ("K", "E"): 1, ("Z", "E"): 5,
+    ("X", "N"): -1, ("Z", "A"): -1, ("Z", "M"): -1, ("X", "F"): -2,
+    ("K", "C"): -3, ("B", "Q"): 0, ("X", "B"): -1, ("B", "M"): -3,
+    ("F", "C"): -2, ("Z", "Q"): 4, ("X", "Z"): -1, ("F", "G"): -4,
+    ("B", "E"): 1, ("X", "V"): -1, ("F", "K"): -4, ("B", "A"): -2,
+    ("X", "R"): -1, ("D", "D"): 8, ("W", "G"): -3, ("Z", "F"): -4,
+    ("S", "Q"): 0, ("W", "C"): -5, ("W", "K"): -3, ("H", "Q"): 1,
+    ("L", "C"): -2, ("W", "N"): -4, ("S", "A"): 1, ("L", "G"): -4,
+    ("W", "S"): -4, ("S", "E"): -1, ("H", "E"): 0, ("S", "I"): -3,
+    ("H", "A"): -2, ("S", "M"): -2, ("Y", "L"): -1, ("Y", "H"): 2,
+    ("Y", "D"): -3, ("E", "R"): 0, ("X", "P"): -2, ("G", "G"): 8,
+    ("G", "C"): -3, ("E", "N"): 0, ("Y", "T"): -2, ("Y", "P"): -3,
+    ("T", "K"): -1, ("A", "A"): 5, ("P", "Q"): -1, ("T", "C"): -1,
+    ("V", "H"): -4, ("T", "G"): -2, ("I", "Q"): -3, ("Z", "T"): -1,
+    ("C", "R"): -4, ("V", "P"): -3, ("P", "E"): -1, ("M", "C"): -2,
+    ("K", "N"): 0, ("I", "I"): 5, ("P", "A"): -1, ("M", "G"): -3,
+    ("T", "S"): 2, ("I", "E"): -4, ("P", "M"): -3, ("M", "K"): -2,
+    ("I", "A"): -1, ("P", "I"): -3, ("R", "R"): 7, ("X", "M"): -1,
+    ("L", "I"): 2, ("X", "I"): -1, ("Z", "B"): 2, ("X", "E"): -1,
+    ("Z", "N"): 0, ("X", "A"): -1, ("B", "R"): -1, ("B", "N"): 4,
+    ("F", "D"): -5, ("X", "Y"): -1, ("Z", "R"): 0, ("F", "H"): -1,
+    ("B", "F"): -4, ("F", "L"): 1, ("X", "Q"): -1, ("B", "B"): 5
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum60.cmp
+blosum60 = {
+    ("W", "F"): 1, ("L", "R"): -2, ("S", "P"): -1, ("V", "T"): 0,
+    ("Q", "Q"): 5, ("N", "A"): -1, ("Z", "Y"): -2, ("W", "R"): -3,
+    ("Q", "A"): -1, ("S", "D"): 0, ("H", "H"): 7, ("S", "H"): -1,
+    ("H", "D"): -1, ("L", "N"): -3, ("W", "A"): -3, ("Y", "M"): -1,
+    ("G", "R"): -2, ("Y", "I"): -1, ("Y", "E"): -2, ("B", "Y"): -2,
+    ("Y", "A"): -2, ("V", "D"): -3, ("B", "S"): 0, ("Y", "Y"): 6,
+    ("G", "N"): 0, ("E", "C"): -3, ("Y", "Q"): -1, ("Z", "Z"): 3,
+    ("V", "A"): 0, ("C", "C"): 9, ("M", "R"): -1, ("V", "E"): -2,
+    ("T", "N"): 0, ("P", "P"): 7, ("V", "I"): 3, ("V", "S"): -2,
+    ("Z", "P"): -1, ("V", "M"): 1, ("T", "F"): -2, ("V", "Q"): -2,
+    ("K", "K"): 4, ("P", "D"): -1, ("I", "H"): -3, ("I", "D"): -3,
+    ("T", "R"): -1, ("P", "L"): -3, ("K", "G"): -1, ("M", "N"): -2,
+    ("P", "H"): -2, ("F", "Q"): -3, ("Z", "G"): -2, ("X", "L"): -1,
+    ("T", "M"): -1, ("Z", "C"): -3, ("X", "H"): -1, ("D", "R"): -1,
+    ("B", "W"): -4, ("X", "D"): -1, ("Z", "K"): 1, ("F", "A"): -2,
+    ("Z", "W"): -2, ("F", "E"): -3, ("D", "N"): 1, ("B", "K"): 0,
+    ("X", "X"): -1, ("F", "I"): 0, ("B", "G"): -1, ("X", "T"): 0,
+    ("F", "M"): 0, ("B", "C"): -3, ("Z", "I"): -3, ("Z", "V"): -2,
+    ("S", "S"): 4, ("L", "Q"): -2, ("W", "E"): -3, ("Q", "R"): 1,
+    ("N", "N"): 6, ("W", "M"): -1, ("Q", "C"): -3, ("W", "I"): -2,
+    ("S", "C"): -1, ("L", "A"): -1, ("S", "G"): 0, ("L", "E"): -3,
+    ("W", "Q"): -2, ("H", "G"): -2, ("S", "K"): 0, ("Q", "N"): 0,
+    ("N", "R"): 0, ("H", "C"): -3, ("Y", "N"): -2, ("G", "Q"): -2,
+    ("Y", "F"): 3, ("C", "A"): 0, ("V", "L"): 1, ("G", "E"): -2,
+    ("G", "A"): 0, ("K", "R"): 2, ("E", "D"): 2, ("Y", "R"): -2,
+    ("M", "Q"): 0, ("T", "I"): -1, ("C", "D"): -3, ("V", "F"): -1,
+    ("T", "A"): 0, ("T", "P"): -1, ("B", "P"): -2, ("T", "E"): -1,
+    ("V", "N"): -3, ("P", "G"): -2, ("M", "A"): -1, ("K", "H"): -1,
+    ("V", "R"): -2, ("P", "C"): -3, ("M", "E"): -2, ("K", "L"): -2,
+    ("V", "V"): 4, ("M", "I"): 1, ("T", "Q"): -1, ("I", "G"): -3,
+    ("P", "K"): -1, ("M", "M"): 5, ("K", "D"): -1, ("I", "C"): -1,
+    ("Z", "D"): 1, ("F", "R"): -3, ("X", "K"): -1, ("Q", "D"): 0,
+    ("X", "G"): -1, ("Z", "L"): -2, ("X", "C"): -2, ("Z", "H"): 0,
+    ("B", "L"): -3, ("B", "H"): 0, ("F", "F"): 6, ("X", "W"): -2,
+    ("B", "D"): 4, ("D", "A"): -2, ("S", "L"): -2, ("X", "S"): 0,
+    ("F", "N"): -3, ("S", "R"): -1, ("W", "D"): -4, ("V", "Y"): -1,
+    ("W", "L"): -2, ("H", "R"): 0, ("W", "H"): -2, ("H", "N"): 1,
+    ("W", "T"): -2, ("T", "T"): 4, ("S", "F"): -2, ("W", "P"): -4,
+    ("L", "D"): -3, ("B", "I"): -3, ("L", "H"): -3, ("S", "N"): 1,
+    ("B", "T"): 0, ("L", "L"): 4, ("Y", "K"): -2, ("E", "Q"): 2,
+    ("Y", "G"): -3, ("Z", "S"): 0, ("Y", "C"): -2, ("G", "D"): -1,
+    ("B", "V"): -3, ("E", "A"): -1, ("Y", "W"): 2, ("E", "E"): 5,
+    ("Y", "S"): -2, ("C", "N"): -2, ("V", "C"): -1, ("T", "H"): -2,
+    ("P", "R"): -2, ("V", "G"): -3, ("T", "L"): -1, ("V", "K"): -2,
+    ("K", "Q"): 1, ("R", "A"): -1, ("I", "R"): -3, ("T", "D"): -1,
+    ("P", "F"): -4, ("I", "N"): -3, ("K", "I"): -3, ("M", "D"): -3,
+    ("V", "W"): -3, ("W", "W"): 10, ("M", "H"): -1, ("P", "N"): -2,
+    ("K", "A"): -1, ("M", "L"): 2, ("K", "E"): 1, ("Z", "E"): 4,
+    ("X", "N"): -1, ("Z", "A"): -1, ("Z", "M"): -1, ("X", "F"): -1,
+    ("K", "C"): -3, ("B", "Q"): 0, ("X", "B"): -1, ("B", "M"): -3,
+    ("F", "C"): -2, ("Z", "Q"): 3, ("X", "Z"): -1, ("F", "G"): -3,
+    ("B", "E"): 1, ("X", "V"): -1, ("F", "K"): -3, ("B", "A"): -2,
+    ("X", "R"): -1, ("D", "D"): 6, ("W", "G"): -2, ("Z", "F"): -3,
+    ("S", "Q"): 0, ("W", "C"): -2, ("W", "K"): -3, ("H", "Q"): 1,
+    ("L", "C"): -1, ("W", "N"): -4, ("S", "A"): 1, ("L", "G"): -4,
+    ("W", "S"): -3, ("S", "E"): 0, ("H", "E"): 0, ("S", "I"): -2,
+    ("H", "A"): -2, ("S", "M"): -1, ("Y", "L"): -1, ("Y", "H"): 2,
+    ("Y", "D"): -3, ("E", "R"): 0, ("X", "P"): -2, ("G", "G"): 6,
+    ("G", "C"): -2, ("E", "N"): 0, ("Y", "T"): -2, ("Y", "P"): -3,
+    ("T", "K"): -1, ("A", "A"): 4, ("P", "Q"): -1, ("T", "C"): -1,
+    ("V", "H"): -3, ("T", "G"): -2, ("I", "Q"): -3, ("Z", "T"): -1,
+    ("C", "R"): -3, ("V", "P"): -2, ("P", "E"): -1, ("M", "C"): -1,
+    ("K", "N"): 0, ("I", "I"): 4, ("P", "A"): -1, ("M", "G"): -2,
+    ("T", "S"): 1, ("I", "E"): -3, ("P", "M"): -2, ("M", "K"): -1,
+    ("I", "A"): -1, ("P", "I"): -3, ("R", "R"): 5, ("X", "M"): -1,
+    ("L", "I"): 2, ("X", "I"): -1, ("Z", "B"): 1, ("X", "E"): -1,
+    ("Z", "N"): 0, ("X", "A"): 0, ("B", "R"): -1, ("B", "N"): 3,
+    ("F", "D"): -3, ("X", "Y"): -1, ("Z", "R"): 0, ("F", "H"): -1,
+    ("B", "F"): -3, ("F", "L"): 0, ("X", "Q"): -1, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum62.cmp
+blosum62 = {
+    ("W", "F"): 1, ("L", "R"): -2, ("S", "P"): -1, ("V", "T"): 0,
+    ("Q", "Q"): 5, ("N", "A"): -2, ("Z", "Y"): -2, ("W", "R"): -3,
+    ("Q", "A"): -1, ("S", "D"): 0, ("H", "H"): 8, ("S", "H"): -1,
+    ("H", "D"): -1, ("L", "N"): -3, ("W", "A"): -3, ("Y", "M"): -1,
+    ("G", "R"): -2, ("Y", "I"): -1, ("Y", "E"): -2, ("B", "Y"): -3,
+    ("Y", "A"): -2, ("V", "D"): -3, ("B", "S"): 0, ("Y", "Y"): 7,
+    ("G", "N"): 0, ("E", "C"): -4, ("Y", "Q"): -1, ("Z", "Z"): 4,
+    ("V", "A"): 0, ("C", "C"): 9, ("M", "R"): -1, ("V", "E"): -2,
+    ("T", "N"): 0, ("P", "P"): 7, ("V", "I"): 3, ("V", "S"): -2,
+    ("Z", "P"): -1, ("V", "M"): 1, ("T", "F"): -2, ("V", "Q"): -2,
+    ("K", "K"): 5, ("P", "D"): -1, ("I", "H"): -3, ("I", "D"): -3,
+    ("T", "R"): -1, ("P", "L"): -3, ("K", "G"): -2, ("M", "N"): -2,
+    ("P", "H"): -2, ("F", "Q"): -3, ("Z", "G"): -2, ("X", "L"): -1,
+    ("T", "M"): -1, ("Z", "C"): -3, ("X", "H"): -1, ("D", "R"): -2,
+    ("B", "W"): -4, ("X", "D"): -1, ("Z", "K"): 1, ("F", "A"): -2,
+    ("Z", "W"): -3, ("F", "E"): -3, ("D", "N"): 1, ("B", "K"): 0,
+    ("X", "X"): -1, ("F", "I"): 0, ("B", "G"): -1, ("X", "T"): 0,
+    ("F", "M"): 0, ("B", "C"): -3, ("Z", "I"): -3, ("Z", "V"): -2,
+    ("S", "S"): 4, ("L", "Q"): -2, ("W", "E"): -3, ("Q", "R"): 1,
+    ("N", "N"): 6, ("W", "M"): -1, ("Q", "C"): -3, ("W", "I"): -3,
+    ("S", "C"): -1, ("L", "A"): -1, ("S", "G"): 0, ("L", "E"): -3,
+    ("W", "Q"): -2, ("H", "G"): -2, ("S", "K"): 0, ("Q", "N"): 0,
+    ("N", "R"): 0, ("H", "C"): -3, ("Y", "N"): -2, ("G", "Q"): -2,
+    ("Y", "F"): 3, ("C", "A"): 0, ("V", "L"): 1, ("G", "E"): -2,
+    ("G", "A"): 0, ("K", "R"): 2, ("E", "D"): 2, ("Y", "R"): -2,
+    ("M", "Q"): 0, ("T", "I"): -1, ("C", "D"): -3, ("V", "F"): -1,
+    ("T", "A"): 0, ("T", "P"): -1, ("B", "P"): -2, ("T", "E"): -1,
+    ("V", "N"): -3, ("P", "G"): -2, ("M", "A"): -1, ("K", "H"): -1,
+    ("V", "R"): -3, ("P", "C"): -3, ("M", "E"): -2, ("K", "L"): -2,
+    ("V", "V"): 4, ("M", "I"): 1, ("T", "Q"): -1, ("I", "G"): -4,
+    ("P", "K"): -1, ("M", "M"): 5, ("K", "D"): -1, ("I", "C"): -1,
+    ("Z", "D"): 1, ("F", "R"): -3, ("X", "K"): -1, ("Q", "D"): 0,
+    ("X", "G"): -1, ("Z", "L"): -3, ("X", "C"): -2, ("Z", "H"): 0,
+    ("B", "L"): -4, ("B", "H"): 0, ("F", "F"): 6, ("X", "W"): -2,
+    ("B", "D"): 4, ("D", "A"): -2, ("S", "L"): -2, ("X", "S"): 0,
+    ("F", "N"): -3, ("S", "R"): -1, ("W", "D"): -4, ("V", "Y"): -1,
+    ("W", "L"): -2, ("H", "R"): 0, ("W", "H"): -2, ("H", "N"): 1,
+    ("W", "T"): -2, ("T", "T"): 5, ("S", "F"): -2, ("W", "P"): -4,
+    ("L", "D"): -4, ("B", "I"): -3, ("L", "H"): -3, ("S", "N"): 1,
+    ("B", "T"): -1, ("L", "L"): 4, ("Y", "K"): -2, ("E", "Q"): 2,
+    ("Y", "G"): -3, ("Z", "S"): 0, ("Y", "C"): -2, ("G", "D"): -1,
+    ("B", "V"): -3, ("E", "A"): -1, ("Y", "W"): 2, ("E", "E"): 5,
+    ("Y", "S"): -2, ("C", "N"): -3, ("V", "C"): -1, ("T", "H"): -2,
+    ("P", "R"): -2, ("V", "G"): -3, ("T", "L"): -1, ("V", "K"): -2,
+    ("K", "Q"): 1, ("R", "A"): -1, ("I", "R"): -3, ("T", "D"): -1,
+    ("P", "F"): -4, ("I", "N"): -3, ("K", "I"): -3, ("M", "D"): -3,
+    ("V", "W"): -3, ("W", "W"): 11, ("M", "H"): -2, ("P", "N"): -2,
+    ("K", "A"): -1, ("M", "L"): 2, ("K", "E"): 1, ("Z", "E"): 4,
+    ("X", "N"): -1, ("Z", "A"): -1, ("Z", "M"): -1, ("X", "F"): -1,
+    ("K", "C"): -3, ("B", "Q"): 0, ("X", "B"): -1, ("B", "M"): -3,
+    ("F", "C"): -2, ("Z", "Q"): 3, ("X", "Z"): -1, ("F", "G"): -3,
+    ("B", "E"): 1, ("X", "V"): -1, ("F", "K"): -3, ("B", "A"): -2,
+    ("X", "R"): -1, ("D", "D"): 6, ("W", "G"): -2, ("Z", "F"): -3,
+    ("S", "Q"): 0, ("W", "C"): -2, ("W", "K"): -3, ("H", "Q"): 0,
+    ("L", "C"): -1, ("W", "N"): -4, ("S", "A"): 1, ("L", "G"): -4,
+    ("W", "S"): -3, ("S", "E"): 0, ("H", "E"): 0, ("S", "I"): -2,
+    ("H", "A"): -2, ("S", "M"): -1, ("Y", "L"): -1, ("Y", "H"): 2,
+    ("Y", "D"): -3, ("E", "R"): 0, ("X", "P"): -2, ("G", "G"): 6,
+    ("G", "C"): -3, ("E", "N"): 0, ("Y", "T"): -2, ("Y", "P"): -3,
+    ("T", "K"): -1, ("A", "A"): 4, ("P", "Q"): -1, ("T", "C"): -1,
+    ("V", "H"): -3, ("T", "G"): -2, ("I", "Q"): -3, ("Z", "T"): -1,
+    ("C", "R"): -3, ("V", "P"): -2, ("P", "E"): -1, ("M", "C"): -1,
+    ("K", "N"): 0, ("I", "I"): 4, ("P", "A"): -1, ("M", "G"): -3,
+    ("T", "S"): 1, ("I", "E"): -3, ("P", "M"): -2, ("M", "K"): -1,
+    ("I", "A"): -1, ("P", "I"): -3, ("R", "R"): 5, ("X", "M"): -1,
+    ("L", "I"): 2, ("X", "I"): -1, ("Z", "B"): 1, ("X", "E"): -1,
+    ("Z", "N"): 0, ("X", "A"): 0, ("B", "R"): -1, ("B", "N"): 3,
+    ("F", "D"): -3, ("X", "Y"): -1, ("Z", "R"): 0, ("F", "H"): -1,
+    ("B", "F"): -3, ("F", "L"): 0, ("X", "Q"): -1, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum65.cmp
+blosum65 = {
+    ("W", "F"): 1, ("L", "R"): -2, ("S", "P"): -1, ("V", "T"): 0,
+    ("Q", "Q"): 6, ("N", "A"): -2, ("Z", "Y"): -2, ("W", "R"): -3,
+    ("Q", "A"): -1, ("S", "D"): 0, ("H", "H"): 8, ("S", "H"): -1,
+    ("H", "D"): -1, ("L", "N"): -4, ("W", "A"): -3, ("Y", "M"): -1,
+    ("G", "R"): -2, ("Y", "I"): -1, ("Y", "E"): -2, ("B", "Y"): -3,
+    ("Y", "A"): -2, ("V", "D"): -3, ("B", "S"): 0, ("Y", "Y"): 7,
+    ("G", "N"): -1, ("E", "C"): -4, ("Y", "Q"): -2, ("Z", "Z"): 4,
+    ("V", "A"): 0, ("C", "C"): 9, ("M", "R"): -2, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 8, ("V", "I"): 3, ("V", "S"): -2,
+    ("Z", "P"): -1, ("V", "M"): 1, ("T", "F"): -2, ("V", "Q"): -2,
+    ("K", "K"): 5, ("P", "D"): -2, ("I", "H"): -3, ("I", "D"): -3,
+    ("T", "R"): -1, ("P", "L"): -3, ("K", "G"): -2, ("M", "N"): -2,
+    ("P", "H"): -2, ("F", "Q"): -3, ("Z", "G"): -2, ("X", "L"): -1,
+    ("T", "M"): -1, ("Z", "C"): -4, ("X", "H"): -1, ("D", "R"): -2,
+    ("B", "W"): -4, ("X", "D"): -1, ("Z", "K"): 1, ("F", "A"): -2,
+    ("Z", "W"): -3, ("F", "E"): -3, ("D", "N"): 1, ("B", "K"): 0,
+    ("X", "X"): -1, ("F", "I"): 0, ("B", "G"): -1, ("X", "T"): -1,
+    ("F", "M"): 0, ("B", "C"): -3, ("Z", "I"): -3, ("Z", "V"): -2,
+    ("S", "S"): 4, ("L", "Q"): -2, ("W", "E"): -3, ("Q", "R"): 1,
+    ("N", "N"): 6, ("W", "M"): -2, ("Q", "C"): -3, ("W", "I"): -2,
+    ("S", "C"): -1, ("L", "A"): -2, ("S", "G"): 0, ("L", "E"): -3,
+    ("W", "Q"): -2, ("H", "G"): -2, ("S", "K"): 0, ("Q", "N"): 0,
+    ("N", "R"): 0, ("H", "C"): -3, ("Y", "N"): -2, ("G", "Q"): -2,
+    ("Y", "F"): 3, ("C", "A"): 0, ("V", "L"): 1, ("G", "E"): -2,
+    ("G", "A"): 0, ("K", "R"): 2, ("E", "D"): 2, ("Y", "R"): -2,
+    ("M", "Q"): 0, ("T", "I"): -1, ("C", "D"): -4, ("V", "F"): -1,
+    ("T", "A"): 0, ("T", "P"): -1, ("B", "P"): -2, ("T", "E"): -1,
+    ("V", "N"): -3, ("P", "G"): -2, ("M", "A"): -1, ("K", "H"): -1,
+    ("V", "R"): -3, ("P", "C"): -3, ("M", "E"): -2, ("K", "L"): -3,
+    ("V", "V"): 4, ("M", "I"): 1, ("T", "Q"): -1, ("I", "G"): -4,
+    ("P", "K"): -1, ("M", "M"): 6, ("K", "D"): -1, ("I", "C"): -1,
+    ("Z", "D"): 1, ("F", "R"): -3, ("X", "K"): -1, ("Q", "D"): 0,
+    ("X", "G"): -2, ("Z", "L"): -3, ("X", "C"): -2, ("Z", "H"): 0,
+    ("B", "L"): -4, ("B", "H"): 0, ("F", "F"): 6, ("X", "W"): -2,
+    ("B", "D"): 4, ("D", "A"): -2, ("S", "L"): -3, ("X", "S"): -1,
+    ("F", "N"): -3, ("S", "R"): -1, ("W", "D"): -5, ("V", "Y"): -1,
+    ("W", "L"): -2, ("H", "R"): 0, ("W", "H"): -2, ("H", "N"): 1,
+    ("W", "T"): -3, ("T", "T"): 5, ("S", "F"): -2, ("W", "P"): -4,
+    ("L", "D"): -4, ("B", "I"): -3, ("L", "H"): -3, ("S", "N"): 1,
+    ("B", "T"): -1, ("L", "L"): 4, ("Y", "K"): -2, ("E", "Q"): 2,
+    ("Y", "G"): -3, ("Z", "S"): 0, ("Y", "C"): -2, ("G", "D"): -1,
+    ("B", "V"): -3, ("E", "A"): -1, ("Y", "W"): 2, ("E", "E"): 5,
+    ("Y", "S"): -2, ("C", "N"): -3, ("V", "C"): -1, ("T", "H"): -2,
+    ("P", "R"): -2, ("V", "G"): -3, ("T", "L"): -1, ("V", "K"): -2,
+    ("K", "Q"): 1, ("R", "A"): -1, ("I", "R"): -3, ("T", "D"): -1,
+    ("P", "F"): -4, ("I", "N"): -3, ("K", "I"): -3, ("M", "D"): -3,
+    ("V", "W"): -3, ("W", "W"): 10, ("M", "H"): -2, ("P", "N"): -2,
+    ("K", "A"): -1, ("M", "L"): 2, ("K", "E"): 1, ("Z", "E"): 4,
+    ("X", "N"): -1, ("Z", "A"): -1, ("Z", "M"): -2, ("X", "F"): -2,
+    ("K", "C"): -3, ("B", "Q"): 0, ("X", "B"): -1, ("B", "M"): -3,
+    ("F", "C"): -2, ("Z", "Q"): 3, ("X", "Z"): -1, ("F", "G"): -3,
+    ("B", "E"): 1, ("X", "V"): -1, ("F", "K"): -3, ("B", "A"): -2,
+    ("X", "R"): -1, ("D", "D"): 6, ("W", "G"): -3, ("Z", "F"): -3,
+    ("S", "Q"): 0, ("W", "C"): -2, ("W", "K"): -3, ("H", "Q"): 1,
+    ("L", "C"): -1, ("W", "N"): -4, ("S", "A"): 1, ("L", "G"): -4,
+    ("W", "S"): -3, ("S", "E"): 0, ("H", "E"): 0, ("S", "I"): -2,
+    ("H", "A"): -2, ("S", "M"): -2, ("Y", "L"): -1, ("Y", "H"): 2,
+    ("Y", "D"): -3, ("E", "R"): 0, ("X", "P"): -2, ("G", "G"): 6,
+    ("G", "C"): -3, ("E", "N"): 0, ("Y", "T"): -2, ("Y", "P"): -3,
+    ("T", "K"): -1, ("A", "A"): 4, ("P", "Q"): -1, ("T", "C"): -1,
+    ("V", "H"): -3, ("T", "G"): -2, ("I", "Q"): -3, ("Z", "T"): -1,
+    ("C", "R"): -4, ("V", "P"): -2, ("P", "E"): -1, ("M", "C"): -2,
+    ("K", "N"): 0, ("I", "I"): 4, ("P", "A"): -1, ("M", "G"): -3,
+    ("T", "S"): 1, ("I", "E"): -3, ("P", "M"): -3, ("M", "K"): -2,
+    ("I", "A"): -1, ("P", "I"): -3, ("R", "R"): 6, ("X", "M"): -1,
+    ("L", "I"): 2, ("X", "I"): -1, ("Z", "B"): 1, ("X", "E"): -1,
+    ("Z", "N"): 0, ("X", "A"): -1, ("B", "R"): -1, ("B", "N"): 3,
+    ("F", "D"): -4, ("X", "Y"): -1, ("Z", "R"): 0, ("F", "H"): -1,
+    ("B", "F"): -3, ("F", "L"): 0, ("X", "Q"): -1, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum70.cmp
+blosum70 = {
+    ("W", "F"): 1, ("L", "R"): -3, ("S", "P"): -1, ("V", "T"): 0,
+    ("Q", "Q"): 6, ("N", "A"): -2, ("Z", "Y"): -2, ("W", "R"): -3,
+    ("Q", "A"): -1, ("S", "D"): 0, ("H", "H"): 8, ("S", "H"): -1,
+    ("H", "D"): -1, ("L", "N"): -4, ("W", "A"): -3, ("Y", "M"): -1,
+    ("G", "R"): -3, ("Y", "I"): -1, ("Y", "E"): -3, ("B", "Y"): -3,
+    ("Y", "A"): -2, ("V", "D"): -4, ("B", "S"): 0, ("Y", "Y"): 7,
+    ("G", "N"): -1, ("E", "C"): -4, ("Y", "Q"): -2, ("Z", "Z"): 4,
+    ("V", "A"): 0, ("C", "C"): 9, ("M", "R"): -2, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 8, ("V", "I"): 3, ("V", "S"): -2,
+    ("Z", "P"): -1, ("V", "M"): 1, ("T", "F"): -2, ("V", "Q"): -2,
+    ("K", "K"): 5, ("P", "D"): -2, ("I", "H"): -4, ("I", "D"): -4,
+    ("T", "R"): -1, ("P", "L"): -3, ("K", "G"): -2, ("M", "N"): -2,
+    ("P", "H"): -2, ("F", "Q"): -3, ("Z", "G"): -2, ("X", "L"): -1,
+    ("T", "M"): -1, ("Z", "C"): -4, ("X", "H"): -1, ("D", "R"): -2,
+    ("B", "W"): -4, ("X", "D"): -2, ("Z", "K"): 1, ("F", "A"): -2,
+    ("Z", "W"): -3, ("F", "E"): -4, ("D", "N"): 1, ("B", "K"): -1,
+    ("X", "X"): -1, ("F", "I"): 0, ("B", "G"): -1, ("X", "T"): -1,
+    ("F", "M"): 0, ("B", "C"): -4, ("Z", "I"): -3, ("Z", "V"): -3,
+    ("S", "S"): 4, ("L", "Q"): -2, ("W", "E"): -4, ("Q", "R"): 1,
+    ("N", "N"): 6, ("W", "M"): -2, ("Q", "C"): -3, ("W", "I"): -3,
+    ("S", "C"): -1, ("L", "A"): -2, ("S", "G"): -1, ("L", "E"): -3,
+    ("W", "Q"): -2, ("H", "G"): -2, ("S", "K"): 0, ("Q", "N"): 0,
+    ("N", "R"): -1, ("H", "C"): -4, ("Y", "N"): -2, ("G", "Q"): -2,
+    ("Y", "F"): 3, ("C", "A"): -1, ("V", "L"): 1, ("G", "E"): -2,
+    ("G", "A"): 0, ("K", "R"): 2, ("E", "D"): 1, ("Y", "R"): -2,
+    ("M", "Q"): 0, ("T", "I"): -1, ("C", "D"): -4, ("V", "F"): -1,
+    ("T", "A"): 0, ("T", "P"): -1, ("B", "P"): -2, ("T", "E"): -1,
+    ("V", "N"): -3, ("P", "G"): -3, ("M", "A"): -1, ("K", "H"): -1,
+    ("V", "R"): -3, ("P", "C"): -3, ("M", "E"): -2, ("K", "L"): -3,
+    ("V", "V"): 4, ("M", "I"): 1, ("T", "Q"): -1, ("I", "G"): -4,
+    ("P", "K"): -1, ("M", "M"): 6, ("K", "D"): -1, ("I", "C"): -1,
+    ("Z", "D"): 1, ("F", "R"): -3, ("X", "K"): -1, ("Q", "D"): -1,
+    ("X", "G"): -2, ("Z", "L"): -3, ("X", "C"): -2, ("Z", "H"): 0,
+    ("B", "L"): -4, ("B", "H"): -1, ("F", "F"): 6, ("X", "W"): -3,
+    ("B", "D"): 4, ("D", "A"): -2, ("S", "L"): -3, ("X", "S"): -1,
+    ("F", "N"): -3, ("S", "R"): -1, ("W", "D"): -5, ("V", "Y"): -2,
+    ("W", "L"): -2, ("H", "R"): 0, ("W", "H"): -2, ("H", "N"): 0,
+    ("W", "T"): -3, ("T", "T"): 5, ("S", "F"): -3, ("W", "P"): -4,
+    ("L", "D"): -4, ("B", "I"): -4, ("L", "H"): -3, ("S", "N"): 0,
+    ("B", "T"): -1, ("L", "L"): 4, ("Y", "K"): -2, ("E", "Q"): 2,
+    ("Y", "G"): -4, ("Z", "S"): 0, ("Y", "C"): -3, ("G", "D"): -2,
+    ("B", "V"): -3, ("E", "A"): -1, ("Y", "W"): 2, ("E", "E"): 5,
+    ("Y", "S"): -2, ("C", "N"): -3, ("V", "C"): -1, ("T", "H"): -2,
+    ("P", "R"): -2, ("V", "G"): -4, ("T", "L"): -2, ("V", "K"): -3,
+    ("K", "Q"): 1, ("R", "A"): -2, ("I", "R"): -3, ("T", "D"): -1,
+    ("P", "F"): -4, ("I", "N"): -4, ("K", "I"): -3, ("M", "D"): -3,
+    ("V", "W"): -3, ("W", "W"): 11, ("M", "H"): -2, ("P", "N"): -2,
+    ("K", "A"): -1, ("M", "L"): 2, ("K", "E"): 1, ("Z", "E"): 4,
+    ("X", "N"): -1, ("Z", "A"): -1, ("Z", "M"): -2, ("X", "F"): -2,
+    ("K", "C"): -4, ("B", "Q"): 0, ("X", "B"): -1, ("B", "M"): -3,
+    ("F", "C"): -2, ("Z", "Q"): 3, ("X", "Z"): -1, ("F", "G"): -4,
+    ("B", "E"): 1, ("X", "V"): -1, ("F", "K"): -3, ("B", "A"): -2,
+    ("X", "R"): -1, ("D", "D"): 6, ("W", "G"): -3, ("Z", "F"): -4,
+    ("S", "Q"): 0, ("W", "C"): -3, ("W", "K"): -3, ("H", "Q"): 1,
+    ("L", "C"): -2, ("W", "N"): -4, ("S", "A"): 1, ("L", "G"): -4,
+    ("W", "S"): -3, ("S", "E"): 0, ("H", "E"): 0, ("S", "I"): -3,
+    ("H", "A"): -2, ("S", "M"): -2, ("Y", "L"): -1, ("Y", "H"): 2,
+    ("Y", "D"): -4, ("E", "R"): 0, ("X", "P"): -2, ("G", "G"): 6,
+    ("G", "C"): -3, ("E", "N"): 0, ("Y", "T"): -2, ("Y", "P"): -3,
+    ("T", "K"): -1, ("A", "A"): 4, ("P", "Q"): -2, ("T", "C"): -1,
+    ("V", "H"): -3, ("T", "G"): -2, ("I", "Q"): -3, ("Z", "T"): -1,
+    ("C", "R"): -4, ("V", "P"): -3, ("P", "E"): -1, ("M", "C"): -2,
+    ("K", "N"): 0, ("I", "I"): 4, ("P", "A"): -1, ("M", "G"): -3,
+    ("T", "S"): 1, ("I", "E"): -4, ("P", "M"): -3, ("M", "K"): -2,
+    ("I", "A"): -2, ("P", "I"): -3, ("R", "R"): 6, ("X", "M"): -1,
+    ("L", "I"): 2, ("X", "I"): -1, ("Z", "B"): 0, ("X", "E"): -1,
+    ("Z", "N"): 0, ("X", "A"): -1, ("B", "R"): -1, ("B", "N"): 3,
+    ("F", "D"): -4, ("X", "Y"): -2, ("Z", "R"): 0, ("F", "H"): -1,
+    ("B", "F"): -4, ("F", "L"): 0, ("X", "Q"): -1, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum75.cmp
+blosum75 = {
+    ("W", "F"): 1, ("L", "R"): -3, ("S", "P"): -1, ("V", "T"): 0,
+    ("Q", "Q"): 6, ("N", "A"): -2, ("Z", "Y"): -3, ("W", "R"): -3,
+    ("Q", "A"): -1, ("S", "D"): -1, ("H", "H"): 8, ("S", "H"): -1,
+    ("H", "D"): -1, ("L", "N"): -4, ("W", "A"): -3, ("Y", "M"): -2,
+    ("G", "R"): -3, ("Y", "I"): -2, ("Y", "E"): -3, ("B", "Y"): -3,
+    ("Y", "A"): -2, ("V", "D"): -4, ("B", "S"): 0, ("Y", "Y"): 7,
+    ("G", "N"): -1, ("E", "C"): -5, ("Y", "Q"): -2, ("Z", "Z"): 4,
+    ("V", "A"): 0, ("C", "C"): 9, ("M", "R"): -2, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 8, ("V", "I"): 3, ("V", "S"): -2,
+    ("Z", "P"): -2, ("V", "M"): 1, ("T", "F"): -2, ("V", "Q"): -2,
+    ("K", "K"): 5, ("P", "D"): -2, ("I", "H"): -4, ("I", "D"): -4,
+    ("T", "R"): -1, ("P", "L"): -3, ("K", "G"): -2, ("M", "N"): -3,
+    ("P", "H"): -2, ("F", "Q"): -4, ("Z", "G"): -2, ("X", "L"): -1,
+    ("T", "M"): -1, ("Z", "C"): -4, ("X", "H"): -1, ("D", "R"): -2,
+    ("B", "W"): -5, ("X", "D"): -2, ("Z", "K"): 1, ("F", "A"): -3,
+    ("Z", "W"): -3, ("F", "E"): -4, ("D", "N"): 1, ("B", "K"): -1,
+    ("X", "X"): -1, ("F", "I"): 0, ("B", "G"): -1, ("X", "T"): -1,
+    ("F", "M"): 0, ("B", "C"): -4, ("Z", "I"): -4, ("Z", "V"): -3,
+    ("S", "S"): 5, ("L", "Q"): -3, ("W", "E"): -4, ("Q", "R"): 1,
+    ("N", "N"): 6, ("W", "M"): -2, ("Q", "C"): -3, ("W", "I"): -3,
+    ("S", "C"): -1, ("L", "A"): -2, ("S", "G"): -1, ("L", "E"): -4,
+    ("W", "Q"): -2, ("H", "G"): -2, ("S", "K"): 0, ("Q", "N"): 0,
+    ("N", "R"): -1, ("H", "C"): -4, ("Y", "N"): -3, ("G", "Q"): -2,
+    ("Y", "F"): 3, ("C", "A"): -1, ("V", "L"): 1, ("G", "E"): -3,
+    ("G", "A"): 0, ("K", "R"): 2, ("E", "D"): 1, ("Y", "R"): -2,
+    ("M", "Q"): 0, ("T", "I"): -1, ("C", "D"): -4, ("V", "F"): -1,
+    ("T", "A"): 0, ("T", "P"): -1, ("B", "P"): -2, ("T", "E"): -1,
+    ("V", "N"): -3, ("P", "G"): -3, ("M", "A"): -1, ("K", "H"): -1,
+    ("V", "R"): -3, ("P", "C"): -4, ("M", "E"): -2, ("K", "L"): -3,
+    ("V", "V"): 4, ("M", "I"): 1, ("T", "Q"): -1, ("I", "G"): -5,
+    ("P", "K"): -1, ("M", "M"): 6, ("K", "D"): -1, ("I", "C"): -1,
+    ("Z", "D"): 1, ("F", "R"): -3, ("X", "K"): -1, ("Q", "D"): -1,
+    ("X", "G"): -2, ("Z", "L"): -3, ("X", "C"): -2, ("Z", "H"): 0,
+    ("B", "L"): -4, ("B", "H"): -1, ("F", "F"): 6, ("X", "W"): -3,
+    ("B", "D"): 4, ("D", "A"): -2, ("S", "L"): -3, ("X", "S"): -1,
+    ("F", "N"): -4, ("S", "R"): -1, ("W", "D"): -5, ("V", "Y"): -2,
+    ("W", "L"): -2, ("H", "R"): 0, ("W", "H"): -2, ("H", "N"): 0,
+    ("W", "T"): -3, ("T", "T"): 5, ("S", "F"): -3, ("W", "P"): -5,
+    ("L", "D"): -4, ("B", "I"): -4, ("L", "H"): -3, ("S", "N"): 0,
+    ("B", "T"): -1, ("L", "L"): 4, ("Y", "K"): -2, ("E", "Q"): 2,
+    ("Y", "G"): -4, ("Z", "S"): 0, ("Y", "C"): -3, ("G", "D"): -2,
+    ("B", "V"): -4, ("E", "A"): -1, ("Y", "W"): 2, ("E", "E"): 5,
+    ("Y", "S"): -2, ("C", "N"): -3, ("V", "C"): -1, ("T", "H"): -2,
+    ("P", "R"): -2, ("V", "G"): -4, ("T", "L"): -2, ("V", "K"): -3,
+    ("K", "Q"): 1, ("R", "A"): -2, ("I", "R"): -3, ("T", "D"): -1,
+    ("P", "F"): -4, ("I", "N"): -4, ("K", "I"): -3, ("M", "D"): -4,
+    ("V", "W"): -3, ("W", "W"): 11, ("M", "H"): -2, ("P", "N"): -3,
+    ("K", "A"): -1, ("M", "L"): 2, ("K", "E"): 1, ("Z", "E"): 4,
+    ("X", "N"): -1, ("Z", "A"): -1, ("Z", "M"): -2, ("X", "F"): -2,
+    ("K", "C"): -4, ("B", "Q"): 0, ("X", "B"): -2, ("B", "M"): -3,
+    ("F", "C"): -2, ("Z", "Q"): 3, ("X", "Z"): -1, ("F", "G"): -4,
+    ("B", "E"): 1, ("X", "V"): -1, ("F", "K"): -4, ("B", "A"): -2,
+    ("X", "R"): -1, ("D", "D"): 6, ("W", "G"): -3, ("Z", "F"): -4,
+    ("S", "Q"): 0, ("W", "C"): -3, ("W", "K"): -4, ("H", "Q"): 1,
+    ("L", "C"): -2, ("W", "N"): -4, ("S", "A"): 1, ("L", "G"): -4,
+    ("W", "S"): -3, ("S", "E"): 0, ("H", "E"): 0, ("S", "I"): -3,
+    ("H", "A"): -2, ("S", "M"): -2, ("Y", "L"): -1, ("Y", "H"): 2,
+    ("Y", "D"): -4, ("E", "R"): 0, ("X", "P"): -2, ("G", "G"): 6,
+    ("G", "C"): -3, ("E", "N"): -1, ("Y", "T"): -2, ("Y", "P"): -4,
+    ("T", "K"): -1, ("A", "A"): 4, ("P", "Q"): -2, ("T", "C"): -1,
+    ("V", "H"): -4, ("T", "G"): -2, ("I", "Q"): -3, ("Z", "T"): -1,
+    ("C", "R"): -4, ("V", "P"): -3, ("P", "E"): -1, ("M", "C"): -2,
+    ("K", "N"): 0, ("I", "I"): 4, ("P", "A"): -1, ("M", "G"): -3,
+    ("T", "S"): 1, ("I", "E"): -4, ("P", "M"): -3, ("M", "K"): -2,
+    ("I", "A"): -2, ("P", "I"): -3, ("R", "R"): 6, ("X", "M"): -1,
+    ("L", "I"): 1, ("X", "I"): -2, ("Z", "B"): 0, ("X", "E"): -1,
+    ("Z", "N"): 0, ("X", "A"): -1, ("B", "R"): -1, ("B", "N"): 3,
+    ("F", "D"): -4, ("X", "Y"): -2, ("Z", "R"): 0, ("F", "H"): -2,
+    ("B", "F"): -4, ("F", "L"): 0, ("X", "Q"): -1, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum80.cmp
+blosum80 = {
+    ("W", "F"): 0, ("L", "R"): -3, ("S", "P"): -1, ("V", "T"): 0,
+    ("Q", "Q"): 6, ("N", "A"): -2, ("Z", "Y"): -3, ("W", "R"): -4,
+    ("Q", "A"): -1, ("S", "D"): -1, ("H", "H"): 8, ("S", "H"): -1,
+    ("H", "D"): -2, ("L", "N"): -4, ("W", "A"): -3, ("Y", "M"): -2,
+    ("G", "R"): -3, ("Y", "I"): -2, ("Y", "E"): -3, ("B", "Y"): -3,
+    ("Y", "A"): -2, ("V", "D"): -4, ("B", "S"): 0, ("Y", "Y"): 7,
+    ("G", "N"): -1, ("E", "C"): -5, ("Y", "Q"): -2, ("Z", "Z"): 4,
+    ("V", "A"): 0, ("C", "C"): 9, ("M", "R"): -2, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 8, ("V", "I"): 3, ("V", "S"): -2,
+    ("Z", "P"): -2, ("V", "M"): 1, ("T", "F"): -2, ("V", "Q"): -3,
+    ("K", "K"): 5, ("P", "D"): -2, ("I", "H"): -4, ("I", "D"): -4,
+    ("T", "R"): -1, ("P", "L"): -3, ("K", "G"): -2, ("M", "N"): -3,
+    ("P", "H"): -3, ("F", "Q"): -4, ("Z", "G"): -3, ("X", "L"): -2,
+    ("T", "M"): -1, ("Z", "C"): -4, ("X", "H"): -2, ("D", "R"): -2,
+    ("B", "W"): -5, ("X", "D"): -2, ("Z", "K"): 1, ("F", "A"): -3,
+    ("Z", "W"): -4, ("F", "E"): -4, ("D", "N"): 1, ("B", "K"): -1,
+    ("X", "X"): -1, ("F", "I"): -1, ("B", "G"): -1, ("X", "T"): -1,
+    ("F", "M"): 0, ("B", "C"): -4, ("Z", "I"): -4, ("Z", "V"): -3,
+    ("S", "S"): 5, ("L", "Q"): -3, ("W", "E"): -4, ("Q", "R"): 1,
+    ("N", "N"): 6, ("W", "M"): -2, ("Q", "C"): -4, ("W", "I"): -3,
+    ("S", "C"): -2, ("L", "A"): -2, ("S", "G"): -1, ("L", "E"): -4,
+    ("W", "Q"): -3, ("H", "G"): -3, ("S", "K"): -1, ("Q", "N"): 0,
+    ("N", "R"): -1, ("H", "C"): -4, ("Y", "N"): -3, ("G", "Q"): -2,
+    ("Y", "F"): 3, ("C", "A"): -1, ("V", "L"): 1, ("G", "E"): -3,
+    ("G", "A"): 0, ("K", "R"): 2, ("E", "D"): 1, ("Y", "R"): -3,
+    ("M", "Q"): 0, ("T", "I"): -1, ("C", "D"): -4, ("V", "F"): -1,
+    ("T", "A"): 0, ("T", "P"): -2, ("B", "P"): -2, ("T", "E"): -1,
+    ("V", "N"): -4, ("P", "G"): -3, ("M", "A"): -1, ("K", "H"): -1,
+    ("V", "R"): -3, ("P", "C"): -4, ("M", "E"): -2, ("K", "L"): -3,
+    ("V", "V"): 4, ("M", "I"): 1, ("T", "Q"): -1, ("I", "G"): -5,
+    ("P", "K"): -1, ("M", "M"): 6, ("K", "D"): -1, ("I", "C"): -2,
+    ("Z", "D"): 1, ("F", "R"): -4, ("X", "K"): -1, ("Q", "D"): -1,
+    ("X", "G"): -2, ("Z", "L"): -3, ("X", "C"): -3, ("Z", "H"): 0,
+    ("B", "L"): -4, ("B", "H"): -1, ("F", "F"): 6, ("X", "W"): -3,
+    ("B", "D"): 4, ("D", "A"): -2, ("S", "L"): -3, ("X", "S"): -1,
+    ("F", "N"): -4, ("S", "R"): -1, ("W", "D"): -6, ("V", "Y"): -2,
+    ("W", "L"): -2, ("H", "R"): 0, ("W", "H"): -3, ("H", "N"): 0,
+    ("W", "T"): -4, ("T", "T"): 5, ("S", "F"): -3, ("W", "P"): -5,
+    ("L", "D"): -5, ("B", "I"): -4, ("L", "H"): -3, ("S", "N"): 0,
+    ("B", "T"): -1, ("L", "L"): 4, ("Y", "K"): -3, ("E", "Q"): 2,
+    ("Y", "G"): -4, ("Z", "S"): 0, ("Y", "C"): -3, ("G", "D"): -2,
+    ("B", "V"): -4, ("E", "A"): -1, ("Y", "W"): 2, ("E", "E"): 6,
+    ("Y", "S"): -2, ("C", "N"): -3, ("V", "C"): -1, ("T", "H"): -2,
+    ("P", "R"): -2, ("V", "G"): -4, ("T", "L"): -2, ("V", "K"): -3,
+    ("K", "Q"): 1, ("R", "A"): -2, ("I", "R"): -3, ("T", "D"): -1,
+    ("P", "F"): -4, ("I", "N"): -4, ("K", "I"): -3, ("M", "D"): -4,
+    ("V", "W"): -3, ("W", "W"): 11, ("M", "H"): -2, ("P", "N"): -3,
+    ("K", "A"): -1, ("M", "L"): 2, ("K", "E"): 1, ("Z", "E"): 4,
+    ("X", "N"): -1, ("Z", "A"): -1, ("Z", "M"): -2, ("X", "F"): -2,
+    ("K", "C"): -4, ("B", "Q"): 0, ("X", "B"): -2, ("B", "M"): -3,
+    ("F", "C"): -3, ("Z", "Q"): 3, ("X", "Z"): -1, ("F", "G"): -4,
+    ("B", "E"): 1, ("X", "V"): -1, ("F", "K"): -4, ("B", "A"): -2,
+    ("X", "R"): -1, ("D", "D"): 6, ("W", "G"): -4, ("Z", "F"): -4,
+    ("S", "Q"): 0, ("W", "C"): -3, ("W", "K"): -4, ("H", "Q"): 1,
+    ("L", "C"): -2, ("W", "N"): -4, ("S", "A"): 1, ("L", "G"): -4,
+    ("W", "S"): -4, ("S", "E"): 0, ("H", "E"): 0, ("S", "I"): -3,
+    ("H", "A"): -2, ("S", "M"): -2, ("Y", "L"): -2, ("Y", "H"): 2,
+    ("Y", "D"): -4, ("E", "R"): -1, ("X", "P"): -2, ("G", "G"): 6,
+    ("G", "C"): -4, ("E", "N"): -1, ("Y", "T"): -2, ("Y", "P"): -4,
+    ("T", "K"): -1, ("A", "A"): 5, ("P", "Q"): -2, ("T", "C"): -1,
+    ("V", "H"): -4, ("T", "G"): -2, ("I", "Q"): -3, ("Z", "T"): -1,
+    ("C", "R"): -4, ("V", "P"): -3, ("P", "E"): -2, ("M", "C"): -2,
+    ("K", "N"): 0, ("I", "I"): 5, ("P", "A"): -1, ("M", "G"): -4,
+    ("T", "S"): 1, ("I", "E"): -4, ("P", "M"): -3, ("M", "K"): -2,
+    ("I", "A"): -2, ("P", "I"): -4, ("R", "R"): 6, ("X", "M"): -1,
+    ("L", "I"): 1, ("X", "I"): -2, ("Z", "B"): 0, ("X", "E"): -1,
+    ("Z", "N"): 0, ("X", "A"): -1, ("B", "R"): -2, ("B", "N"): 4,
+    ("F", "D"): -4, ("X", "Y"): -2, ("Z", "R"): 0, ("F", "H"): -2,
+    ("B", "F"): -4, ("F", "L"): 0, ("X", "Q"): -1, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum85.cmp
+blosum85 = {
+    ("W", "F"): 0, ("L", "R"): -3, ("S", "P"): -1, ("V", "T"): 0,
+    ("Q", "Q"): 6, ("N", "A"): -2, ("Z", "Y"): -3, ("W", "R"): -4,
+    ("Q", "A"): -1, ("S", "D"): -1, ("H", "H"): 8, ("S", "H"): -1,
+    ("H", "D"): -2, ("L", "N"): -4, ("W", "A"): -3, ("Y", "M"): -2,
+    ("G", "R"): -3, ("Y", "I"): -2, ("Y", "E"): -4, ("B", "Y"): -4,
+    ("Y", "A"): -3, ("V", "D"): -4, ("B", "S"): 0, ("Y", "Y"): 7,
+    ("G", "N"): -1, ("E", "C"): -5, ("Y", "Q"): -2, ("Z", "Z"): 4,
+    ("V", "A"): -1, ("C", "C"): 9, ("M", "R"): -2, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 8, ("V", "I"): 3, ("V", "S"): -2,
+    ("Z", "P"): -2, ("V", "M"): 0, ("T", "F"): -3, ("V", "Q"): -3,
+    ("K", "K"): 6, ("P", "D"): -2, ("I", "H"): -4, ("I", "D"): -5,
+    ("T", "R"): -2, ("P", "L"): -4, ("K", "G"): -2, ("M", "N"): -3,
+    ("P", "H"): -3, ("F", "Q"): -4, ("Z", "G"): -3, ("X", "L"): -2,
+    ("T", "M"): -1, ("Z", "C"): -5, ("X", "H"): -2, ("D", "R"): -2,
+    ("B", "W"): -5, ("X", "D"): -2, ("Z", "K"): 1, ("F", "A"): -3,
+    ("Z", "W"): -4, ("F", "E"): -4, ("D", "N"): 1, ("B", "K"): -1,
+    ("X", "X"): -2, ("F", "I"): -1, ("B", "G"): -1, ("X", "T"): -1,
+    ("F", "M"): -1, ("B", "C"): -4, ("Z", "I"): -4, ("Z", "V"): -3,
+    ("S", "S"): 5, ("L", "Q"): -3, ("W", "E"): -4, ("Q", "R"): 1,
+    ("N", "N"): 7, ("W", "M"): -2, ("Q", "C"): -4, ("W", "I"): -3,
+    ("S", "C"): -2, ("L", "A"): -2, ("S", "G"): -1, ("L", "E"): -4,
+    ("W", "Q"): -3, ("H", "G"): -3, ("S", "K"): -1, ("Q", "N"): 0,
+    ("N", "R"): -1, ("H", "C"): -5, ("Y", "N"): -3, ("G", "Q"): -3,
+    ("Y", "F"): 3, ("C", "A"): -1, ("V", "L"): 0, ("G", "E"): -3,
+    ("G", "A"): 0, ("K", "R"): 2, ("E", "D"): 1, ("Y", "R"): -3,
+    ("M", "Q"): 0, ("T", "I"): -1, ("C", "D"): -5, ("V", "F"): -1,
+    ("T", "A"): 0, ("T", "P"): -2, ("B", "P"): -3, ("T", "E"): -1,
+    ("V", "N"): -4, ("P", "G"): -3, ("M", "A"): -2, ("K", "H"): -1,
+    ("V", "R"): -3, ("P", "C"): -4, ("M", "E"): -3, ("K", "L"): -3,
+    ("V", "V"): 5, ("M", "I"): 1, ("T", "Q"): -1, ("I", "G"): -5,
+    ("P", "K"): -2, ("M", "M"): 7, ("K", "D"): -1, ("I", "C"): -2,
+    ("Z", "D"): 1, ("F", "R"): -4, ("X", "K"): -1, ("Q", "D"): -1,
+    ("X", "G"): -2, ("Z", "L"): -4, ("X", "C"): -3, ("Z", "H"): 0,
+    ("B", "L"): -5, ("B", "H"): -1, ("F", "F"): 7, ("X", "W"): -3,
+    ("B", "D"): 4, ("D", "A"): -2, ("S", "L"): -3, ("X", "S"): -1,
+    ("F", "N"): -4, ("S", "R"): -1, ("W", "D"): -6, ("V", "Y"): -2,
+    ("W", "L"): -3, ("H", "R"): 0, ("W", "H"): -3, ("H", "N"): 0,
+    ("W", "T"): -4, ("T", "T"): 5, ("S", "F"): -3, ("W", "P"): -5,
+    ("L", "D"): -5, ("B", "I"): -5, ("L", "H"): -3, ("S", "N"): 0,
+    ("B", "T"): -1, ("L", "L"): 4, ("Y", "K"): -3, ("E", "Q"): 2,
+    ("Y", "G"): -5, ("Z", "S"): -1, ("Y", "C"): -3, ("G", "D"): -2,
+    ("B", "V"): -4, ("E", "A"): -1, ("Y", "W"): 2, ("E", "E"): 6,
+    ("Y", "S"): -2, ("C", "N"): -4, ("V", "C"): -1, ("T", "H"): -2,
+    ("P", "R"): -2, ("V", "G"): -4, ("T", "L"): -2, ("V", "K"): -3,
+    ("K", "Q"): 1, ("R", "A"): -2, ("I", "R"): -4, ("T", "D"): -2,
+    ("P", "F"): -4, ("I", "N"): -4, ("K", "I"): -3, ("M", "D"): -4,
+    ("V", "W"): -3, ("W", "W"): 11, ("M", "H"): -3, ("P", "N"): -3,
+    ("K", "A"): -1, ("M", "L"): 2, ("K", "E"): 0, ("Z", "E"): 4,
+    ("X", "N"): -2, ("Z", "A"): -1, ("Z", "M"): -2, ("X", "F"): -2,
+    ("K", "C"): -4, ("B", "Q"): -1, ("X", "B"): -2, ("B", "M"): -4,
+    ("F", "C"): -3, ("Z", "Q"): 4, ("X", "Z"): -1, ("F", "G"): -4,
+    ("B", "E"): 0, ("X", "V"): -1, ("F", "K"): -4, ("B", "A"): -2,
+    ("X", "R"): -2, ("D", "D"): 7, ("W", "G"): -4, ("Z", "F"): -4,
+    ("S", "Q"): -1, ("W", "C"): -4, ("W", "K"): -5, ("H", "Q"): 1,
+    ("L", "C"): -2, ("W", "N"): -5, ("S", "A"): 1, ("L", "G"): -5,
+    ("W", "S"): -4, ("S", "E"): -1, ("H", "E"): -1, ("S", "I"): -3,
+    ("H", "A"): -2, ("S", "M"): -2, ("Y", "L"): -2, ("Y", "H"): 2,
+    ("Y", "D"): -4, ("E", "R"): -1, ("X", "P"): -2, ("G", "G"): 6,
+    ("G", "C"): -4, ("E", "N"): -1, ("Y", "T"): -2, ("Y", "P"): -4,
+    ("T", "K"): -1, ("A", "A"): 5, ("P", "Q"): -2, ("T", "C"): -2,
+    ("V", "H"): -4, ("T", "G"): -2, ("I", "Q"): -4, ("Z", "T"): -1,
+    ("C", "R"): -4, ("V", "P"): -3, ("P", "E"): -2, ("M", "C"): -2,
+    ("K", "N"): 0, ("I", "I"): 5, ("P", "A"): -1, ("M", "G"): -4,
+    ("T", "S"): 1, ("I", "E"): -4, ("P", "M"): -3, ("M", "K"): -2,
+    ("I", "A"): -2, ("P", "I"): -4, ("R", "R"): 6, ("X", "M"): -1,
+    ("L", "I"): 1, ("X", "I"): -2, ("Z", "B"): 0, ("X", "E"): -1,
+    ("Z", "N"): -1, ("X", "A"): -1, ("B", "R"): -2, ("B", "N"): 4,
+    ("F", "D"): -4, ("X", "Y"): -2, ("Z", "R"): 0, ("F", "H"): -2,
+    ("B", "F"): -4, ("F", "L"): 0, ("X", "Q"): -1, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum90.cmp
+blosum90 = {
+    ("W", "F"): 0, ("L", "R"): -3, ("S", "P"): -2, ("V", "T"): -1,
+    ("Q", "Q"): 7, ("N", "A"): -2, ("Z", "Y"): -3, ("W", "R"): -4,
+    ("Q", "A"): -1, ("S", "D"): -1, ("H", "H"): 8, ("S", "H"): -2,
+    ("H", "D"): -2, ("L", "N"): -4, ("W", "A"): -4, ("Y", "M"): -2,
+    ("G", "R"): -3, ("Y", "I"): -2, ("Y", "E"): -4, ("B", "Y"): -4,
+    ("Y", "A"): -3, ("V", "D"): -5, ("B", "S"): 0, ("Y", "Y"): 8,
+    ("G", "N"): -1, ("E", "C"): -6, ("Y", "Q"): -3, ("Z", "Z"): 4,
+    ("V", "A"): -1, ("C", "C"): 9, ("M", "R"): -2, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 8, ("V", "I"): 3, ("V", "S"): -2,
+    ("Z", "P"): -2, ("V", "M"): 0, ("T", "F"): -3, ("V", "Q"): -3,
+    ("K", "K"): 6, ("P", "D"): -3, ("I", "H"): -4, ("I", "D"): -5,
+    ("T", "R"): -2, ("P", "L"): -4, ("K", "G"): -2, ("M", "N"): -3,
+    ("P", "H"): -3, ("F", "Q"): -4, ("Z", "G"): -3, ("X", "L"): -2,
+    ("T", "M"): -1, ("Z", "C"): -5, ("X", "H"): -2, ("D", "R"): -3,
+    ("B", "W"): -6, ("X", "D"): -2, ("Z", "K"): 1, ("F", "A"): -3,
+    ("Z", "W"): -4, ("F", "E"): -5, ("D", "N"): 1, ("B", "K"): -1,
+    ("X", "X"): -2, ("F", "I"): -1, ("B", "G"): -2, ("X", "T"): -1,
+    ("F", "M"): -1, ("B", "C"): -4, ("Z", "I"): -4, ("Z", "V"): -3,
+    ("S", "S"): 5, ("L", "Q"): -3, ("W", "E"): -5, ("Q", "R"): 1,
+    ("N", "N"): 7, ("W", "M"): -2, ("Q", "C"): -4, ("W", "I"): -4,
+    ("S", "C"): -2, ("L", "A"): -2, ("S", "G"): -1, ("L", "E"): -4,
+    ("W", "Q"): -3, ("H", "G"): -3, ("S", "K"): -1, ("Q", "N"): 0,
+    ("N", "R"): -1, ("H", "C"): -5, ("Y", "N"): -3, ("G", "Q"): -3,
+    ("Y", "F"): 3, ("C", "A"): -1, ("V", "L"): 0, ("G", "E"): -3,
+    ("G", "A"): 0, ("K", "R"): 2, ("E", "D"): 1, ("Y", "R"): -3,
+    ("M", "Q"): 0, ("T", "I"): -1, ("C", "D"): -5, ("V", "F"): -2,
+    ("T", "A"): 0, ("T", "P"): -2, ("B", "P"): -3, ("T", "E"): -1,
+    ("V", "N"): -4, ("P", "G"): -3, ("M", "A"): -2, ("K", "H"): -1,
+    ("V", "R"): -3, ("P", "C"): -4, ("M", "E"): -3, ("K", "L"): -3,
+    ("V", "V"): 5, ("M", "I"): 1, ("T", "Q"): -1, ("I", "G"): -5,
+    ("P", "K"): -2, ("M", "M"): 7, ("K", "D"): -1, ("I", "C"): -2,
+    ("Z", "D"): 0, ("F", "R"): -4, ("X", "K"): -1, ("Q", "D"): -1,
+    ("X", "G"): -2, ("Z", "L"): -4, ("X", "C"): -3, ("Z", "H"): 0,
+    ("B", "L"): -5, ("B", "H"): -1, ("F", "F"): 7, ("X", "W"): -3,
+    ("B", "D"): 4, ("D", "A"): -3, ("S", "L"): -3, ("X", "S"): -1,
+    ("F", "N"): -4, ("S", "R"): -1, ("W", "D"): -6, ("V", "Y"): -3,
+    ("W", "L"): -3, ("H", "R"): 0, ("W", "H"): -3, ("H", "N"): 0,
+    ("W", "T"): -4, ("T", "T"): 6, ("S", "F"): -3, ("W", "P"): -5,
+    ("L", "D"): -5, ("B", "I"): -5, ("L", "H"): -4, ("S", "N"): 0,
+    ("B", "T"): -1, ("L", "L"): 5, ("Y", "K"): -3, ("E", "Q"): 2,
+    ("Y", "G"): -5, ("Z", "S"): -1, ("Y", "C"): -4, ("G", "D"): -2,
+    ("B", "V"): -4, ("E", "A"): -1, ("Y", "W"): 2, ("E", "E"): 6,
+    ("Y", "S"): -3, ("C", "N"): -4, ("V", "C"): -2, ("T", "H"): -2,
+    ("P", "R"): -3, ("V", "G"): -5, ("T", "L"): -2, ("V", "K"): -3,
+    ("K", "Q"): 1, ("R", "A"): -2, ("I", "R"): -4, ("T", "D"): -2,
+    ("P", "F"): -4, ("I", "N"): -4, ("K", "I"): -4, ("M", "D"): -4,
+    ("V", "W"): -3, ("W", "W"): 11, ("M", "H"): -3, ("P", "N"): -3,
+    ("K", "A"): -1, ("M", "L"): 2, ("K", "E"): 0, ("Z", "E"): 4,
+    ("X", "N"): -2, ("Z", "A"): -1, ("Z", "M"): -2, ("X", "F"): -2,
+    ("K", "C"): -4, ("B", "Q"): -1, ("X", "B"): -2, ("B", "M"): -4,
+    ("F", "C"): -3, ("Z", "Q"): 4, ("X", "Z"): -1, ("F", "G"): -5,
+    ("B", "E"): 0, ("X", "V"): -2, ("F", "K"): -4, ("B", "A"): -2,
+    ("X", "R"): -2, ("D", "D"): 7, ("W", "G"): -4, ("Z", "F"): -4,
+    ("S", "Q"): -1, ("W", "C"): -4, ("W", "K"): -5, ("H", "Q"): 1,
+    ("L", "C"): -2, ("W", "N"): -5, ("S", "A"): 1, ("L", "G"): -5,
+    ("W", "S"): -4, ("S", "E"): -1, ("H", "E"): -1, ("S", "I"): -3,
+    ("H", "A"): -2, ("S", "M"): -2, ("Y", "L"): -2, ("Y", "H"): 1,
+    ("Y", "D"): -4, ("E", "R"): -1, ("X", "P"): -2, ("G", "G"): 6,
+    ("G", "C"): -4, ("E", "N"): -1, ("Y", "T"): -2, ("Y", "P"): -4,
+    ("T", "K"): -1, ("A", "A"): 5, ("P", "Q"): -2, ("T", "C"): -2,
+    ("V", "H"): -4, ("T", "G"): -3, ("I", "Q"): -4, ("Z", "T"): -1,
+    ("C", "R"): -5, ("V", "P"): -3, ("P", "E"): -2, ("M", "C"): -2,
+    ("K", "N"): 0, ("I", "I"): 5, ("P", "A"): -1, ("M", "G"): -4,
+    ("T", "S"): 1, ("I", "E"): -4, ("P", "M"): -3, ("M", "K"): -2,
+    ("I", "A"): -2, ("P", "I"): -4, ("R", "R"): 6, ("X", "M"): -1,
+    ("L", "I"): 1, ("X", "I"): -2, ("Z", "B"): 0, ("X", "E"): -2,
+    ("Z", "N"): -1, ("X", "A"): -1, ("B", "R"): -2, ("B", "N"): 4,
+    ("F", "D"): -5, ("X", "Y"): -2, ("Z", "R"): 0, ("F", "H"): -2,
+    ("B", "F"): -4, ("F", "L"): 0, ("X", "Q"): -1, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/blosum95.cmp
+blosum95 = {
+    ("W", "F"): 0, ("L", "R"): -3, ("S", "P"): -2, ("V", "T"): -1,
+    ("Q", "Q"): 7, ("N", "A"): -2, ("Z", "Y"): -4, ("W", "R"): -4,
+    ("Q", "A"): -1, ("S", "D"): -1, ("H", "H"): 9, ("S", "H"): -2,
+    ("H", "D"): -2, ("L", "N"): -5, ("W", "A"): -4, ("Y", "M"): -3,
+    ("G", "R"): -4, ("Y", "I"): -2, ("Y", "E"): -4, ("B", "Y"): -4,
+    ("Y", "A"): -3, ("V", "D"): -5, ("B", "S"): -1, ("Y", "Y"): 8,
+    ("G", "N"): -1, ("E", "C"): -6, ("Y", "Q"): -3, ("Z", "Z"): 4,
+    ("V", "A"): -1, ("C", "C"): 9, ("M", "R"): -2, ("V", "E"): -3,
+    ("T", "N"): -1, ("P", "P"): 8, ("V", "I"): 3, ("V", "S"): -3,
+    ("Z", "P"): -2, ("V", "M"): 0, ("T", "F"): -3, ("V", "Q"): -3,
+    ("K", "K"): 6, ("P", "D"): -3, ("I", "H"): -4, ("I", "D"): -5,
+    ("T", "R"): -2, ("P", "L"): -4, ("K", "G"): -3, ("M", "N"): -3,
+    ("P", "H"): -3, ("F", "Q"): -4, ("Z", "G"): -3, ("X", "L"): -2,
+    ("T", "M"): -1, ("Z", "C"): -5, ("X", "H"): -2, ("D", "R"): -3,
+    ("B", "W"): -6, ("X", "D"): -2, ("Z", "K"): 0, ("F", "A"): -3,
+    ("Z", "W"): -4, ("F", "E"): -5, ("D", "N"): 1, ("B", "K"): -1,
+    ("X", "X"): -2, ("F", "I"): -1, ("B", "G"): -2, ("X", "T"): -1,
+    ("F", "M"): -1, ("B", "C"): -4, ("Z", "I"): -4, ("Z", "V"): -3,
+    ("S", "S"): 5, ("L", "Q"): -3, ("W", "E"): -5, ("Q", "R"): 0,
+    ("N", "N"): 7, ("W", "M"): -2, ("Q", "C"): -4, ("W", "I"): -4,
+    ("S", "C"): -2, ("L", "A"): -2, ("S", "G"): -1, ("L", "E"): -4,
+    ("W", "Q"): -3, ("H", "G"): -3, ("S", "K"): -1, ("Q", "N"): 0,
+    ("N", "R"): -1, ("H", "C"): -5, ("Y", "N"): -3, ("G", "Q"): -3,
+    ("Y", "F"): 3, ("C", "A"): -1, ("V", "L"): 0, ("G", "E"): -3,
+    ("G", "A"): -1, ("K", "R"): 2, ("E", "D"): 1, ("Y", "R"): -3,
+    ("M", "Q"): -1, ("T", "I"): -2, ("C", "D"): -5, ("V", "F"): -2,
+    ("T", "A"): 0, ("T", "P"): -2, ("B", "P"): -3, ("T", "E"): -2,
+    ("V", "N"): -4, ("P", "G"): -4, ("M", "A"): -2, ("K", "H"): -1,
+    ("V", "R"): -4, ("P", "C"): -5, ("M", "E"): -3, ("K", "L"): -3,
+    ("V", "V"): 5, ("M", "I"): 1, ("T", "Q"): -1, ("I", "G"): -6,
+    ("P", "K"): -2, ("M", "M"): 7, ("K", "D"): -2, ("I", "C"): -2,
+    ("Z", "D"): 0, ("F", "R"): -4, ("X", "K"): -1, ("Q", "D"): -1,
+    ("X", "G"): -3, ("Z", "L"): -4, ("X", "C"): -3, ("Z", "H"): 0,
+    ("B", "L"): -5, ("B", "H"): -1, ("F", "F"): 7, ("X", "W"): -4,
+    ("B", "D"): 4, ("D", "A"): -3, ("S", "L"): -3, ("X", "S"): -1,
+    ("F", "N"): -4, ("S", "R"): -2, ("W", "D"): -6, ("V", "Y"): -3,
+    ("W", "L"): -3, ("H", "R"): -1, ("W", "H"): -3, ("H", "N"): 0,
+    ("W", "T"): -4, ("T", "T"): 6, ("S", "F"): -3, ("W", "P"): -5,
+    ("L", "D"): -5, ("B", "I"): -5, ("L", "H"): -4, ("S", "N"): 0,
+    ("B", "T"): -1, ("L", "L"): 5, ("Y", "K"): -3, ("E", "Q"): 2,
+    ("Y", "G"): -5, ("Z", "S"): -1, ("Y", "C"): -4, ("G", "D"): -2,
+    ("B", "V"): -5, ("E", "A"): -1, ("Y", "W"): 2, ("E", "E"): 6,
+    ("Y", "S"): -3, ("C", "N"): -4, ("V", "C"): -2, ("T", "H"): -2,
+    ("P", "R"): -3, ("V", "G"): -5, ("T", "L"): -2, ("V", "K"): -3,
+    ("K", "Q"): 1, ("R", "A"): -2, ("I", "R"): -4, ("T", "D"): -2,
+    ("P", "F"): -5, ("I", "N"): -4, ("K", "I"): -4, ("M", "D"): -5,
+    ("V", "W"): -3, ("W", "W"): 11, ("M", "H"): -3, ("P", "N"): -3,
+    ("K", "A"): -1, ("M", "L"): 2, ("K", "E"): 0, ("Z", "E"): 4,
+    ("X", "N"): -2, ("Z", "A"): -1, ("Z", "M"): -2, ("X", "F"): -2,
+    ("K", "C"): -5, ("B", "Q"): -1, ("X", "B"): -2, ("B", "M"): -4,
+    ("F", "C"): -3, ("Z", "Q"): 4, ("X", "Z"): -1, ("F", "G"): -5,
+    ("B", "E"): 0, ("X", "V"): -2, ("F", "K"): -4, ("B", "A"): -3,
+    ("X", "R"): -2, ("D", "D"): 7, ("W", "G"): -5, ("Z", "F"): -4,
+    ("S", "Q"): -1, ("W", "C"): -4, ("W", "K"): -5, ("H", "Q"): 1,
+    ("L", "C"): -3, ("W", "N"): -5, ("S", "A"): 1, ("L", "G"): -5,
+    ("W", "S"): -4, ("S", "E"): -1, ("H", "E"): -1, ("S", "I"): -3,
+    ("H", "A"): -3, ("S", "M"): -3, ("Y", "L"): -2, ("Y", "H"): 1,
+    ("Y", "D"): -5, ("E", "R"): -1, ("X", "P"): -3, ("G", "G"): 6,
+    ("G", "C"): -5, ("E", "N"): -1, ("Y", "T"): -3, ("Y", "P"): -5,
+    ("T", "K"): -1, ("A", "A"): 5, ("P", "Q"): -2, ("T", "C"): -2,
+    ("V", "H"): -4, ("T", "G"): -3, ("I", "Q"): -4, ("Z", "T"): -2,
+    ("C", "R"): -5, ("V", "P"): -4, ("P", "E"): -2, ("M", "C"): -3,
+    ("K", "N"): 0, ("I", "I"): 5, ("P", "A"): -1, ("M", "G"): -4,
+    ("T", "S"): 1, ("I", "E"): -4, ("P", "M"): -3, ("M", "K"): -2,
+    ("I", "A"): -2, ("P", "I"): -4, ("R", "R"): 7, ("X", "M"): -2,
+    ("L", "I"): 1, ("X", "I"): -2, ("Z", "B"): 0, ("X", "E"): -2,
+    ("Z", "N"): -1, ("X", "A"): -1, ("B", "R"): -2, ("B", "N"): 4,
+    ("F", "D"): -5, ("X", "Y"): -2, ("Z", "R"): -1, ("F", "H"): -2,
+    ("B", "F"): -5, ("F", "L"): 0, ("X", "Q"): -1, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/feng.cmp
+feng = {
+    ("W", "F"): 3, ("L", "R"): 2, ("I", "I"): 6, ("Q", "Q"): 6,
+    ("W", "N"): 0, ("V", "I"): 5, ("H", "T"): 2, ("H", "P"): 3,
+    ("W", "V"): 3, ("Q", "E"): 4, ("W", "R"): 2, ("Q", "A"): 3,
+    ("H", "H"): 6, ("H", "D"): 3, ("L", "N"): 1, ("Y", "M"): 2,
+    ("Y", "I"): 3, ("Y", "E"): 1, ("E", "S"): 3, ("Y", "A"): 2,
+    ("Y", "Y"): 6, ("T", "C"): 2, ("E", "C"): 0, ("Y", "Q"): 2,
+    ("E", "G"): 4, ("V", "A"): 5, ("C", "C"): 6, ("M", "R"): 2,
+    ("P", "T"): 4, ("V", "E"): 4, ("P", "P"): 6, ("I", "T"): 3,
+    ("K", "S"): 3, ("R", "G"): 3, ("I", "P"): 2, ("R", "C"): 2,
+    ("A", "T"): 5, ("K", "K"): 6, ("A", "P"): 5, ("V", "M"): 4,
+    ("I", "D"): 1, ("K", "C"): 0, ("K", "G"): 2, ("R", "S"): 3,
+    ("F", "Q"): 1, ("F", "A"): 2, ("V", "V"): 6, ("M", "N"): 1,
+    ("F", "E"): 0, ("D", "N"): 5, ("F", "I"): 4, ("F", "M"): 2,
+    ("M", "S"): 1, ("S", "S"): 6, ("L", "Q"): 2, ("W", "E"): 1,
+    ("W", "A"): 2, ("W", "M"): 3, ("H", "S"): 3, ("W", "I"): 2,
+    ("S", "C"): 4, ("L", "A"): 2, ("L", "E"): 1, ("W", "Q"): 1,
+    ("H", "G"): 1, ("Q", "N"): 3, ("H", "C"): 2, ("L", "M"): 5,
+    ("W", "Y"): 3, ("Y", "N"): 3, ("E", "P"): 3, ("Y", "F"): 5,
+    ("E", "T"): 3, ("A", "A"): 6, ("I", "N"): 2, ("G", "A"): 5,
+    ("Y", "V"): 3, ("E", "D"): 5, ("W", "H"): 1, ("Y", "R"): 1,
+    ("M", "Q"): 2, ("P", "S"): 4, ("R", "H"): 4, ("A", "C"): 2,
+    ("R", "D"): 2, ("K", "P"): 2, ("L", "D"): 1, ("K", "T"): 4,
+    ("V", "N"): 2, ("M", "A"): 2, ("K", "H"): 3, ("V", "R"): 2,
+    ("P", "C"): 2, ("M", "E"): 1, ("A", "S"): 5, ("T", "T"): 6,
+    ("R", "T"): 3, ("I", "G"): 2, ("R", "P"): 3, ("K", "D"): 3,
+    ("I", "C"): 2, ("F", "R"): 1, ("F", "V"): 4, ("L", "C"): 2,
+    ("F", "F"): 6, ("D", "A"): 4, ("F", "N"): 1, ("W", "D"): 0,
+    ("L", "P"): 3, ("Q", "S"): 3, ("N", "C"): 2, ("N", "G"): 3,
+    ("H", "N"): 4, ("W", "T"): 1, ("Q", "G"): 2, ("W", "P"): 2,
+    ("Q", "C"): 1, ("N", "S"): 5, ("L", "H"): 3, ("L", "L"): 6,
+    ("G", "T"): 2, ("M", "M"): 6, ("G", "P"): 3, ("Y", "K"): 1,
+    ("Y", "G"): 2, ("Y", "C"): 3, ("E", "A"): 4, ("E", "E"): 6,
+    ("Y", "S"): 3, ("M", "P"): 2, ("V", "C"): 2, ("M", "T"): 3,
+    ("V", "G"): 4, ("R", "E"): 2, ("V", "K"): 3, ("K", "Q"): 4,
+    ("R", "A"): 2, ("I", "R"): 2, ("N", "A"): 3, ("V", "S"): 2,
+    ("M", "D"): 0, ("M", "H"): 1, ("K", "A"): 3, ("R", "Q"): 3,
+    ("K", "E"): 4, ("F", "S"): 3, ("I", "K"): 2, ("D", "P"): 2,
+    ("D", "T"): 2, ("I", "M"): 4, ("F", "C"): 3, ("W", "L"): 4,
+    ("F", "G"): 1, ("F", "K"): 0, ("F", "T"): 1, ("D", "D"): 6,
+    ("Q", "T"): 3, ("W", "G"): 3, ("Q", "P"): 3, ("W", "C"): 3,
+    ("W", "K"): 1, ("H", "Q"): 4, ("Q", "D"): 4, ("W", "W"): 6,
+    ("V", "L"): 5, ("L", "G"): 2, ("W", "S"): 2, ("L", "K"): 2,
+    ("N", "P"): 2, ("H", "E"): 2, ("N", "T"): 4, ("H", "A"): 2,
+    ("Y", "L"): 3, ("Y", "H"): 3, ("G", "S"): 5, ("Y", "D"): 2,
+    ("V", "Q"): 2, ("L", "T"): 2, ("G", "G"): 6, ("G", "C"): 3,
+    ("E", "N"): 3, ("Y", "T"): 2, ("Y", "P"): 2, ("R", "N"): 2,
+    ("V", "D"): 3, ("K", "R"): 5, ("V", "H"): 1, ("I", "Q"): 1,
+    ("V", "P"): 3, ("M", "C"): 2, ("K", "N"): 4, ("V", "T"): 3,
+    ("M", "G"): 1, ("T", "S"): 5, ("I", "E"): 1, ("M", "K"): 2,
+    ("I", "A"): 2, ("N", "N"): 6, ("R", "R"): 6, ("F", "P"): 2,
+    ("L", "I"): 5, ("I", "S"): 2, ("D", "S"): 3, ("L", "S"): 2,
+    ("I", "H"): 1, ("F", "D"): 1, ("D", "C"): 1, ("F", "H"): 2,
+    ("D", "G"): 4, ("F", "L"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/fitch.cmp
+fitch = {
+    ("W", "F"): 2, ("U", "I"): 1, ("W", "T"): 0, ("R", "R"): 3,
+    ("N", "M"): 1, ("U", "M"): 2, ("U", "O"): 1, ("N", "A"): 2,
+    ("U", "A"): 1, ("N", "C"): 1, ("U", "C"): 1, ("N", "E"): 2,
+    ("U", "E"): 1, ("W", "H"): 1, ("W", "R"): 1, ("W", "V"): 2,
+    ("Q", "E"): 1, ("S", "S"): 3, ("T", "T"): 3, ("S", "F"): 2,
+    ("Q", "A"): 1, ("Q", "C"): 1, ("H", "F"): 1, ("S", "H"): 1,
+    ("L", "H"): 1, ("S", "N"): 2, ("Q", "I"): 2, ("S", "L"): 2,
+    ("L", "L"): 3, ("Y", "M"): 1, ("M", "M"): 3, ("Y", "I"): 1,
+    ("W", "N"): 2, ("Y", "R"): 2, ("Y", "E"): 1, ("O", "O"): 3,
+    ("Y", "A"): 2, ("Y", "C"): 2, ("U", "S"): 2, ("Y", "U"): 2,
+    ("Q", "Q"): 3, ("E", "A"): 1, ("N", "L"): 1, ("E", "C"): 1,
+    ("W", "W"): 3, ("E", "E"): 3, ("Y", "S"): 2, ("V", "A"): 2,
+    ("C", "C"): 3, ("V", "C"): 2, ("T", "H"): 0, ("V", "E"): 1,
+    ("T", "N"): 0, ("O", "I"): 1, ("R", "E"): 2, ("O", "E"): 1,
+    ("R", "A"): 1, ("O", "C"): 2, ("R", "C"): 2, ("O", "A"): 2,
+    ("V", "Q"): 1, ("V", "M"): 2, ("V", "S"): 1, ("Q", "O"): 2,
+    ("I", "H"): 1, ("M", "F"): 1, ("R", "Q"): 2, ("M", "H"): 0,
+    ("O", "F"): 2, ("T", "F"): 1, ("M", "I"): 2, ("M", "L"): 1,
+    ("V", "T"): 1, ("Q", "H"): 2, ("Q", "N"): 1, ("T", "M"): 1,
+    ("W", "M"): 2, ("Y", "Y"): 3, ("T", "S"): 2, ("R", "O"): 1,
+    ("T", "R"): 2, ("H", "H"): 3, ("F", "A"): 1, ("F", "C"): 2,
+    ("F", "E"): 1, ("W", "L"): 1, ("T", "L"): 1, ("U", "R"): 2,
+    ("U", "H"): 2, ("N", "H"): 2, ("U", "Q"): 2, ("W", "E"): 2,
+    ("U", "L"): 1, ("W", "C"): 1, ("U", "N"): 1, ("W", "A"): 1,
+    ("W", "O"): 1, ("I", "F"): 0, ("U", "F"): 2, ("N", "F"): 1,
+    ("L", "C"): 1, ("S", "C"): 2, ("L", "A"): 2, ("S", "A"): 1,
+    ("W", "S"): 2, ("L", "E"): 2, ("S", "E"): 2, ("Q", "L"): 1,
+    ("H", "E"): 1, ("S", "I"): 1, ("H", "C"): 1, ("S", "O"): 2,
+    ("H", "A"): 2, ("S", "M"): 1, ("Y", "L"): 2, ("Y", "N"): 1,
+    ("Y", "H"): 1, ("O", "M"): 0, ("Y", "Q"): 1, ("Y", "F"): 1,
+    ("W", "I"): 1, ("C", "A"): 1, ("R", "I"): 2, ("Y", "O"): 1,
+    ("Q", "M"): 1, ("S", "Q"): 2, ("U", "T"): 2, ("Y", "T"): 2,
+    ("Y", "V"): 2, ("O", "L"): 1, ("R", "N"): 1, ("A", "A"): 3,
+    ("N", "I"): 2, ("R", "L"): 1, ("T", "I"): 1, ("L", "F"): 1,
+    ("T", "O"): 1, ("R", "H"): 2, ("O", "H"): 2, ("V", "F"): 2,
+    ("T", "C"): 2, ("V", "H"): 1, ("T", "A"): 0, ("R", "F"): 1,
+    ("V", "L"): 2, ("T", "E"): 1, ("V", "N"): 1, ("M", "A"): 0,
+    ("Q", "F"): 1, ("M", "C"): 0, ("W", "U"): 2, ("I", "I"): 3,
+    ("V", "V"): 3, ("O", "N"): 2, ("I", "E"): 2, ("T", "Q"): 2,
+    ("I", "A"): 1, ("N", "N"): 3, ("I", "C"): 0, ("S", "R"): 2,
+    ("V", "R"): 1, ("L", "I"): 1, ("V", "I"): 1, ("R", "M"): 2,
+    ("Y", "W"): 1, ("M", "E"): 2, ("V", "U"): 2, ("W", "Q"): 0,
+    ("U", "U"): 3, ("V", "O"): 1, ("F", "F"): 3
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/genetic.cmp
+genetic = {
+    ("W", "F"): 0.0, ("L", "R"): -0.4, ("I", "I"): 4.1, ("Q", "Q"): 5.5,
+    ("W", "N"): -3.0, ("V", "I"): 1.0, ("H", "T"): -1.8, ("H", "P"): 0.7,
+    ("W", "V"): -2.1, ("Q", "E"): 2.0, ("W", "R"): 1.8, ("Q", "A"): -2.1,
+    ("H", "H"): 4.7, ("H", "D"): 1.7, ("L", "N"): -2.2, ("Y", "M"): -2.9,
+    ("Y", "I"): -1.6, ("Y", "E"): -0.9, ("E", "S"): -2.8, ("Y", "A"): -2.4,
+    ("Y", "Y"): 6.5, ("T", "C"): -1.9, ("E", "C"): -3.0, ("Y", "Q"): -0.8,
+    ("E", "G"): 1.4, ("V", "A"): 1.0, ("C", "C"): 5.5, ("M", "R"): -0.4,
+    ("P", "T"): 1.1, ("V", "E"): 1.3, ("P", "P"): 3.8, ("I", "T"): 0.8,
+    ("K", "S"): -1.5, ("R", "G"): 0.8, ("I", "P"): -1.6, ("R", "C"): 0.7,
+    ("A", "T"): 0.9, ("K", "K"): 5.6, ("A", "P"): 0.8, ("V", "M"): 1.0,
+    ("I", "D"): -2.1, ("K", "C"): -3.2, ("K", "G"): -2.2, ("R", "S"): 0.3,
+    ("F", "Q"): -2.1, ("F", "A"): -2.4, ("V", "V"): 4.1, ("M", "N"): 0.1,
+    ("F", "E"): -2.9, ("D", "N"): 1.7, ("F", "I"): 1.3, ("F", "M"): 0.5,
+    ("M", "S"): -1.3, ("S", "S"): 2.6, ("L", "Q"): 0.1, ("W", "E"): -3.2,
+    ("W", "A"): -2.2, ("W", "M"): -2.0, ("H", "S"): -1.6, ("W", "I"): -2.2,
+    ("S", "C"): 1.5, ("L", "A"): -2.3, ("L", "E"): -2.5, ("W", "Q"): -2.3,
+    ("H", "G"): -2.2, ("Q", "N"): 0.4, ("H", "C"): -1.6, ("L", "M"): 1.5,
+    ("W", "Y"): -0.5, ("Y", "N"): 2.5, ("E", "P"): -2.1, ("Y", "F"): 2.0,
+    ("E", "T"): -2.1, ("A", "A"): 4.0, ("I", "N"): 0.9, ("G", "A"): 1.2,
+    ("Y", "V"): -2.2, ("E", "D"): 3.8, ("W", "H"): -2.1, ("Y", "R"): -1.9,
+    ("M", "Q"): -1.2, ("P", "S"): 0.4, ("R", "H"): 3.6, ("A", "C"): -1.9,
+    ("R", "D"): -2.3, ("K", "P"): -1.5, ("L", "D"): -2.4, ("K", "T"): 1.0,
+    ("V", "N"): -2.2, ("M", "A"): -2.0, ("K", "H"): 0.6, ("V", "R"): -2.1,
+    ("P", "C"): -1.9, ("M", "E"): -1.8, ("A", "S"): 0.1, ("T", "T"): 4.0,
+    ("R", "T"): -0.6, ("I", "G"): -2.5, ("R", "P"): 0.3, ("K", "D"): 0.3,
+    ("I", "C"): -1.9, ("F", "R"): -1.5, ("F", "V"): 1.0, ("L", "C"): -1.3,
+    ("F", "F"): 4.5, ("D", "A"): 1.0, ("F", "N"): -1.3, ("W", "D"): -2.9,
+    ("L", "P"): 0.0, ("Q", "S"): -2.3, ("N", "C"): -1.5, ("N", "G"): -2.6,
+    ("H", "N"): 1.8, ("W", "T"): -2.2, ("Q", "G"): -2.1, ("W", "P"): -1.6,
+    ("Q", "C"): -3.1, ("N", "S"): -0.3, ("L", "H"): -0.1, ("L", "L"): 3.4,
+    ("G", "T"): -2.1, ("M", "M"): 5.4, ("G", "P"): -1.8, ("Y", "K"): -0.8,
+    ("Y", "G"): -1.8, ("Y", "C"): 2.6, ("E", "A"): 1.3, ("E", "E"): 5.7,
+    ("Y", "S"): 0.3, ("M", "P"): -1.4, ("V", "C"): -2.2, ("M", "T"): 0.7,
+    ("V", "G"): 1.1, ("R", "E"): -2.0, ("V", "K"): -2.1, ("K", "Q"): 2.2,
+    ("R", "A"): -1.6, ("I", "R"): -1.2, ("N", "A"): -1.7, ("V", "S"): -2.2,
+    ("M", "D"): -2.5, ("M", "H"): -1.8, ("K", "A"): -1.9, ("R", "Q"): 0.3,
+    ("K", "E"): 2.0, ("F", "S"): 0.0, ("I", "K"): 0.7, ("D", "P"): -2.2,
+    ("D", "T"): -2.1, ("I", "M"): 3.3, ("F", "C"): 1.8, ("W", "L"): -0.3,
+    ("F", "G"): -1.9, ("F", "K"): -2.8, ("F", "T"): -2.1, ("D", "D"): 4.8,
+    ("Q", "T"): -1.7, ("W", "G"): 1.4, ("Q", "P"): 1.0, ("W", "C"): 4.1,
+    ("W", "K"): -3.0, ("H", "Q"): 3.6, ("Q", "D"): 0.3, ("W", "W"): 7.5,
+    ("V", "L"): 1.1, ("L", "G"): -2.2, ("W", "S"): 0.8, ("L", "K"): -2.0,
+    ("N", "P"): -1.6, ("H", "E"): 0.3, ("N", "T"): 0.9, ("H", "A"): -2.1,
+    ("Y", "L"): -1.6, ("Y", "H"): 2.3, ("G", "S"): -0.6, ("Y", "D"): 2.3,
+    ("V", "Q"): -2.0, ("L", "T"): -1.9, ("G", "G"): 4.2, ("G", "C"): 1.0,
+    ("E", "N"): 0.3, ("Y", "T"): -2.1, ("Y", "P"): -2.3, ("R", "N"): -1.5,
+    ("V", "D"): 1.0, ("K", "R"): -0.2, ("V", "H"): -2.1, ("I", "Q"): -1.9,
+    ("V", "P"): -2.1, ("M", "C"): -2.7, ("K", "N"): 3.5, ("V", "T"): -2.2,
+    ("M", "G"): -2.3, ("T", "S"): 1.0, ("I", "E"): -2.3, ("M", "K"): 1.6,
+    ("I", "A"): -1.8, ("N", "N"): 4.7, ("R", "R"): 2.9, ("F", "P"): -1.8,
+    ("L", "I"): 1.2, ("I", "S"): -0.5, ("D", "S"): -2.1, ("L", "S"): -1.2,
+    ("I", "H"): -1.8, ("F", "D"): -1.7, ("D", "C"): -1.6, ("F", "H"): -1.1,
+    ("D", "G"): 1.1, ("F", "L"): 2.2
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/gonnet.cmp
+gonnet = {
+    ("W", "F"): 3.6, ("L", "R"): -2.2, ("I", "I"): 4.0, ("Q", "Q"): 2.7,
+    ("W", "N"): -3.6, ("V", "I"): 3.1, ("H", "T"): -0.3, ("H", "P"): -1.1,
+    ("W", "V"): -2.6, ("Q", "E"): 1.7, ("W", "R"): -1.6, ("Q", "A"): -0.2,
+    ("H", "H"): 6.0, ("H", "D"): 0.4, ("L", "N"): -3.0, ("Y", "M"): -0.2,
+    ("Y", "I"): -0.7, ("Y", "E"): -2.7, ("E", "S"): 0.2, ("Y", "A"): -2.2,
+    ("Y", "Y"): 7.8, ("T", "C"): -0.5, ("E", "C"): -3.0, ("Y", "Q"): -1.7,
+    ("E", "G"): -0.8, ("V", "A"): 0.1, ("C", "C"): 11.5, ("M", "R"): -1.7,
+    ("P", "T"): 0.1, ("V", "E"): -1.9, ("P", "P"): 7.6, ("I", "T"): -0.6,
+    ("K", "S"): 0.1, ("R", "G"): -1.0, ("I", "P"): -2.6, ("R", "C"): -2.2,
+    ("A", "T"): 0.6, ("K", "K"): 3.2, ("A", "P"): 0.3, ("V", "M"): 1.6,
+    ("I", "D"): -3.8, ("K", "C"): -2.8, ("K", "G"): -1.1, ("R", "S"): -0.2,
+    ("F", "Q"): -2.6, ("F", "A"): -2.3, ("V", "V"): 3.4, ("M", "N"): -2.2,
+    ("F", "E"): -3.9, ("D", "N"): 2.2, ("F", "I"): 1.0, ("F", "M"): 1.6,
+    ("M", "S"): -1.4, ("S", "S"): 2.2, ("L", "Q"): -1.6, ("W", "E"): -4.3,
+    ("W", "A"): -3.6, ("W", "M"): -1.0, ("H", "S"): -0.2, ("W", "I"): -1.8,
+    ("S", "C"): 0.1, ("L", "A"): -1.2, ("L", "E"): -2.8, ("W", "Q"): -2.7,
+    ("H", "G"): -1.4, ("Q", "N"): 0.7, ("H", "C"): -1.3, ("L", "M"): 2.8,
+    ("W", "Y"): 4.1, ("Y", "N"): -1.4, ("E", "P"): -0.5, ("Y", "F"): 5.1,
+    ("E", "T"): -0.1, ("A", "A"): 2.4, ("I", "N"): -2.8, ("G", "A"): 0.5,
+    ("Y", "V"): -1.1, ("E", "D"): 2.7, ("W", "H"): -0.8, ("Y", "R"): -1.8,
+    ("M", "Q"): -1.0, ("P", "S"): 0.4, ("R", "H"): 0.6, ("A", "C"): 0.5,
+    ("R", "D"): -0.3, ("K", "P"): -0.6, ("L", "D"): -4.0, ("K", "T"): 0.1,
+    ("V", "N"): -2.2, ("M", "A"): -0.7, ("K", "H"): 0.6, ("V", "R"): -2.0,
+    ("P", "C"): -3.1, ("M", "E"): -2.0, ("A", "S"): 1.1, ("T", "T"): 2.5,
+    ("R", "T"): -0.2, ("I", "G"): -4.5, ("R", "P"): -0.9, ("K", "D"): 0.5,
+    ("I", "C"): -1.1, ("F", "R"): -3.2, ("F", "V"): 0.1, ("L", "C"): -1.5,
+    ("F", "F"): 7.0, ("D", "A"): -0.3, ("F", "N"): -3.1, ("W", "D"): -5.2,
+    ("L", "P"): -2.3, ("Q", "S"): 0.2, ("N", "C"): -1.8, ("N", "G"): 0.4,
+    ("H", "N"): 1.2, ("W", "T"): -3.5, ("Q", "G"): -1.0, ("W", "P"): -5.0,
+    ("Q", "C"): -2.4, ("N", "S"): 0.9, ("L", "H"): -1.9, ("L", "L"): 4.0,
+    ("G", "T"): -1.1, ("M", "M"): 4.3, ("G", "P"): -1.6, ("Y", "K"): -2.1,
+    ("Y", "G"): -4.0, ("Y", "C"): -0.5, ("E", "A"): 0.0, ("E", "E"): 3.6,
+    ("Y", "S"): -1.9, ("M", "P"): -2.4, ("V", "C"): 0.0, ("M", "T"): -0.6,
+    ("V", "G"): -3.3, ("R", "E"): 0.4, ("V", "K"): -1.7, ("K", "Q"): 1.5,
+    ("R", "A"): -0.6, ("I", "R"): -2.4, ("N", "A"): -0.3, ("V", "S"): -1.0,
+    ("M", "D"): -3.0, ("M", "H"): -1.3, ("K", "A"): -0.4, ("R", "Q"): 1.5,
+    ("K", "E"): 1.2, ("F", "S"): -2.8, ("I", "K"): -2.1, ("D", "P"): -0.7,
+    ("D", "T"): 0.0, ("I", "M"): 2.5, ("F", "C"): -0.8, ("W", "L"): -0.7,
+    ("F", "G"): -5.2, ("F", "K"): -3.3, ("F", "T"): -2.2, ("D", "D"): 4.7,
+    ("Q", "T"): 0.0, ("W", "G"): -4.0, ("Q", "P"): -0.2, ("W", "C"): -1.0,
+    ("W", "K"): -3.5, ("H", "Q"): 1.2, ("Q", "D"): 0.9, ("W", "W"): 14.2,
+    ("V", "L"): 1.8, ("L", "G"): -4.4, ("W", "S"): -3.3, ("L", "K"): -2.1,
+    ("N", "P"): -0.9, ("H", "E"): 0.4, ("N", "T"): 0.5, ("H", "A"): -0.8,
+    ("Y", "L"): 0.0, ("Y", "H"): 2.2, ("G", "S"): 0.4, ("Y", "D"): -2.8,
+    ("V", "Q"): -1.5, ("L", "T"): -1.3, ("G", "G"): 6.6, ("G", "C"): -2.0,
+    ("E", "N"): 0.9, ("Y", "T"): -1.9, ("Y", "P"): -3.1, ("R", "N"): 0.3,
+    ("V", "D"): -2.9, ("K", "R"): 2.7, ("V", "H"): -2.0, ("I", "Q"): -1.9,
+    ("V", "P"): -1.8, ("M", "C"): -0.9, ("K", "N"): 0.8, ("V", "T"): 0.0,
+    ("M", "G"): -3.5, ("T", "S"): 1.5, ("I", "E"): -2.7, ("M", "K"): -1.4,
+    ("I", "A"): -0.8, ("N", "N"): 3.8, ("R", "R"): 4.7, ("F", "P"): -3.8,
+    ("L", "I"): 2.8, ("I", "S"): -1.8, ("D", "S"): 0.5, ("L", "S"): -2.1,
+    ("I", "H"): -2.2, ("F", "D"): -4.5, ("D", "C"): -3.2, ("F", "H"): -0.1,
+    ("D", "G"): 0.1, ("F", "L"): 2.0
+}
+
+
+# from https://www.genome.jp/dbget-bin/www_bget?aaindex:GRAR740104,
+# but subtracted from 215
+# as per original reference now available at
+# https://web.archive.org/web/19991111011852/http://www.embl-heidelberg.de:80/~vogt/matrices/grant.cmp
+# which was found to have incorrect amino acid characters (issue 308)
+grant = {
+    ("A", "A"): 215, ("C", "A"): 20, ("C", "C"): 215, ("C", "D"): 61,
+    ("C", "N"): 76, ("C", "R"): 35, ("D", "A"): 89, ("D", "D"): 215,
+    ("D", "N"): 192, ("D", "R"): 119, ("E", "A"): 108, ("E", "C"): 45,
+    ("E", "D"): 170, ("E", "E"): 215, ("E", "N"): 173, ("E", "Q"): 186,
+    ("E", "R"): 161, ("F", "A"): 102, ("F", "C"): 10, ("F", "D"): 38,
+    ("F", "E"): 75, ("F", "F"): 215, ("F", "G"): 62, ("F", "H"): 115,
+    ("F", "I"): 194, ("F", "K"): 113, ("F", "L"): 193, ("F", "M"): 187,
+    ("F", "N"): 57, ("F", "Q"): 99, ("F", "R"): 118, ("G", "A"): 155,
+    ("G", "C"): 56, ("G", "D"): 121, ("G", "E"): 117, ("G", "G"): 215,
+    ("G", "N"): 135, ("G", "Q"): 128, ("G", "R"): 90, ("H", "A"): 129,
+    ("H", "C"): 41, ("H", "D"): 134, ("H", "E"): 175, ("H", "G"): 117,
+    ("H", "H"): 215, ("H", "N"): 147, ("H", "Q"): 191, ("H", "R"): 186,
+    ("I", "A"): 121, ("I", "C"): 17, ("I", "D"): 47, ("I", "E"): 81,
+    ("I", "G"): 80, ("I", "H"): 121, ("I", "I"): 215, ("I", "N"): 66,
+    ("I", "Q"): 106, ("I", "R"): 118, ("K", "A"): 109, ("K", "C"): 13,
+    ("K", "D"): 114, ("K", "E"): 159, ("K", "G"): 88, ("K", "H"): 183,
+    ("K", "I"): 113, ("K", "K"): 215, ("K", "L"): 108, ("K", "N"): 121,
+    ("K", "Q"): 162, ("K", "R"): 189, ("L", "A"): 119, ("L", "C"): 17,
+    ("L", "D"): 43, ("L", "E"): 77, ("L", "G"): 77, ("L", "H"): 116,
+    ("L", "I"): 210, ("L", "L"): 215, ("L", "N"): 62, ("L", "Q"): 102,
+    ("L", "R"): 113, ("M", "A"): 131, ("M", "C"): 19, ("M", "D"): 55,
+    ("M", "E"): 89, ("M", "G"): 88, ("M", "H"): 128, ("M", "I"): 205,
+    ("M", "K"): 120, ("M", "L"): 200, ("M", "M"): 215, ("M", "N"): 73,
+    ("M", "Q"): 114, ("M", "R"): 124, ("N", "A"): 104, ("N", "N"): 215,
+    ("N", "R"): 129, ("P", "A"): 188, ("P", "C"): 46, ("P", "D"): 107,
+    ("P", "E"): 122, ("P", "F"): 101, ("P", "G"): 173, ("P", "H"): 138,
+    ("P", "I"): 120, ("P", "K"): 112, ("P", "L"): 117, ("P", "M"): 128,
+    ("P", "N"): 124, ("P", "P"): 215, ("P", "Q"): 139, ("P", "R"): 112,
+    ("Q", "A"): 124, ("Q", "C"): 61, ("Q", "D"): 154, ("Q", "N"): 169,
+    ("Q", "Q"): 215, ("Q", "R"): 172, ("R", "A"): 103, ("R", "R"): 215,
+    ("S", "A"): 116, ("S", "C"): 103, ("S", "D"): 150, ("S", "E"): 135,
+    ("S", "F"): 60, ("S", "G"): 159, ("S", "H"): 126, ("S", "I"): 73,
+    ("S", "K"): 94, ("S", "L"): 70, ("S", "M"): 80, ("S", "N"): 169,
+    ("S", "P"): 141, ("S", "Q"): 147, ("S", "R"): 105, ("S", "S"): 215,
+    ("T", "A"): 157, ("T", "C"): 66, ("T", "D"): 130, ("T", "E"): 150,
+    ("T", "F"): 112, ("T", "G"): 156, ("T", "H"): 168, ("T", "I"): 126,
+    ("T", "K"): 137, ("T", "L"): 123, ("T", "M"): 134, ("T", "N"): 150,
+    ("T", "P"): 177, ("T", "Q"): 173, ("T", "R"): 144, ("T", "S"): 157,
+    ("T", "T"): 215, ("V", "A"): 151, ("V", "C"): 23, ("V", "D"): 63,
+    ("V", "E"): 94, ("V", "F"): 165, ("V", "G"): 106, ("V", "H"): 131,
+    ("V", "I"): 186, ("V", "K"): 118, ("V", "L"): 183, ("V", "M"): 194,
+    ("V", "N"): 82, ("V", "P"): 147, ("V", "Q"): 119, ("V", "R"): 119,
+    ("V", "S"): 91, ("V", "T"): 146, ("V", "V"): 215, ("V", "W"): 127,
+    ("V", "Y"): 160, ("W", "A"): 67, ("W", "C"): 0, ("W", "D"): 34,
+    ("W", "E"): 63, ("W", "F"): 175, ("W", "G"): 31, ("W", "H"): 100,
+    ("W", "I"): 154, ("W", "K"): 105, ("W", "L"): 154, ("W", "M"): 148,
+    ("W", "N"): 41, ("W", "P"): 68, ("W", "Q"): 85, ("W", "R"): 114,
+    ("W", "S"): 38, ("W", "T"): 87, ("W", "W"): 215, ("Y", "A"): 103,
+    ("Y", "C"): 21, ("Y", "D"): 55, ("Y", "E"): 93, ("Y", "F"): 193,
+    ("Y", "G"): 68, ("Y", "H"): 132, ("Y", "I"): 182, ("Y", "K"): 130,
+    ("Y", "L"): 179, ("Y", "M"): 179, ("Y", "N"): 72, ("Y", "P"): 105,
+    ("Y", "Q"): 116, ("Y", "R"): 138, ("Y", "S"): 71, ("Y", "T"): 123,
+    ("Y", "W"): 178, ("Y", "Y"): 215
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/ident.cmp
+ident = {
+    ("W", "F"): -1, ("L", "R"): -1, ("I", "I"): 6, ("Q", "Q"): 6,
+    ("W", "N"): -1, ("V", "I"): -1, ("H", "T"): -1, ("H", "P"): -1,
+    ("W", "V"): -1, ("Q", "E"): -1, ("W", "R"): -1, ("Q", "A"): -1,
+    ("H", "H"): 6, ("H", "D"): -1, ("L", "N"): -1, ("Y", "M"): -1,
+    ("Y", "I"): -1, ("Y", "E"): -1, ("E", "S"): -1, ("Y", "A"): -1,
+    ("Y", "Y"): 6, ("T", "C"): -1, ("E", "C"): -1, ("Y", "Q"): -1,
+    ("E", "G"): -1, ("V", "A"): -1, ("C", "C"): 6, ("M", "R"): -1,
+    ("P", "T"): -1, ("V", "E"): -1, ("P", "P"): 6, ("I", "T"): -1,
+    ("K", "S"): -1, ("R", "G"): -1, ("I", "P"): -1, ("R", "C"): -1,
+    ("A", "T"): -1, ("K", "K"): 6, ("A", "P"): -1, ("V", "M"): -1,
+    ("I", "D"): -1, ("K", "C"): -1, ("K", "G"): -1, ("R", "S"): -1,
+    ("F", "Q"): -1, ("F", "A"): -1, ("V", "V"): 6, ("M", "N"): -1,
+    ("F", "E"): -1, ("D", "N"): -1, ("F", "I"): -1, ("F", "M"): -1,
+    ("M", "S"): -1, ("S", "S"): 6, ("L", "Q"): -1, ("W", "E"): -1,
+    ("W", "A"): -1, ("W", "M"): -1, ("H", "S"): -1, ("W", "I"): -1,
+    ("S", "C"): -1, ("L", "A"): -1, ("L", "E"): -1, ("W", "Q"): -1,
+    ("H", "G"): -1, ("Q", "N"): -1, ("H", "C"): -1, ("L", "M"): -1,
+    ("W", "Y"): -1, ("Y", "N"): -1, ("E", "P"): -1, ("Y", "F"): -1,
+    ("E", "T"): -1, ("A", "A"): 6, ("I", "N"): -1, ("G", "A"): -1,
+    ("Y", "V"): -1, ("E", "D"): -1, ("W", "H"): -1, ("Y", "R"): -1,
+    ("M", "Q"): -1, ("P", "S"): -1, ("R", "H"): -1, ("A", "C"): -1,
+    ("R", "D"): -1, ("K", "P"): -1, ("L", "D"): -1, ("K", "T"): -1,
+    ("V", "N"): -1, ("M", "A"): -1, ("K", "H"): -1, ("V", "R"): -1,
+    ("P", "C"): -1, ("M", "E"): -1, ("A", "S"): -1, ("T", "T"): 6,
+    ("R", "T"): -1, ("I", "G"): -1, ("R", "P"): -1, ("K", "D"): -1,
+    ("I", "C"): -1, ("F", "R"): -1, ("F", "V"): -1, ("L", "C"): -1,
+    ("F", "F"): 6, ("D", "A"): -1, ("F", "N"): -1, ("W", "D"): -1,
+    ("L", "P"): -1, ("Q", "S"): -1, ("N", "C"): -1, ("N", "G"): -1,
+    ("H", "N"): -1, ("W", "T"): -1, ("Q", "G"): -1, ("W", "P"): -1,
+    ("Q", "C"): -1, ("N", "S"): -1, ("L", "H"): -1, ("L", "L"): 6,
+    ("G", "T"): -1, ("M", "M"): 6, ("G", "P"): -1, ("Y", "K"): -1,
+    ("Y", "G"): -1, ("Y", "C"): -1, ("E", "A"): -1, ("E", "E"): 6,
+    ("Y", "S"): -1, ("M", "P"): -1, ("V", "C"): -1, ("M", "T"): -1,
+    ("V", "G"): -1, ("R", "E"): -1, ("V", "K"): -1, ("K", "Q"): -1,
+    ("R", "A"): -1, ("I", "R"): -1, ("N", "A"): -1, ("V", "S"): -1,
+    ("M", "D"): -1, ("M", "H"): -1, ("K", "A"): -1, ("R", "Q"): -1,
+    ("K", "E"): -1, ("F", "S"): -1, ("I", "K"): -1, ("D", "P"): -1,
+    ("D", "T"): -1, ("I", "M"): -1, ("F", "C"): -1, ("W", "L"): -1,
+    ("F", "G"): -1, ("F", "K"): -1, ("F", "T"): -1, ("D", "D"): 6,
+    ("Q", "T"): -1, ("W", "G"): -1, ("Q", "P"): -1, ("W", "C"): -1,
+    ("W", "K"): -1, ("H", "Q"): -1, ("Q", "D"): -1, ("W", "W"): 6,
+    ("V", "L"): -1, ("L", "G"): -1, ("W", "S"): -1, ("L", "K"): -1,
+    ("N", "P"): -1, ("H", "E"): -1, ("N", "T"): -1, ("H", "A"): -1,
+    ("Y", "L"): -1, ("Y", "H"): -1, ("G", "S"): -1, ("Y", "D"): -1,
+    ("V", "Q"): -1, ("L", "T"): -1, ("G", "G"): 6, ("G", "C"): -1,
+    ("E", "N"): -1, ("Y", "T"): -1, ("Y", "P"): -1, ("R", "N"): -1,
+    ("V", "D"): -1, ("K", "R"): -1, ("V", "H"): -1, ("I", "Q"): -1,
+    ("V", "P"): -1, ("M", "C"): -1, ("K", "N"): -1, ("V", "T"): -1,
+    ("M", "G"): -1, ("T", "S"): -1, ("I", "E"): -1, ("M", "K"): -1,
+    ("I", "A"): -1, ("N", "N"): 6, ("R", "R"): 6, ("F", "P"): -1,
+    ("L", "I"): -1, ("I", "S"): -1, ("D", "S"): -1, ("L", "S"): -1,
+    ("I", "H"): -1, ("F", "D"): -1, ("D", "C"): -1, ("F", "H"): -1,
+    ("D", "G"): -1, ("F", "L"): -1
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/johnson.cmp
+johnson = {
+    ("W", "F"): 3.4, ("S", "P"): -1.0, ("N", "M"): -3.7, ("Q", "Q"): 9.0,
+    ("N", "A"): -1.4, ("N", "E"): -0.7, ("W", "V"): -4.9, ("Q", "E"): 2.4,
+    ("L", "H"): -4.2, ("W", "R"): -3.8, ("Q", "A"): -0.6, ("S", "D"): -0.2,
+    ("H", "H"): 12.7, ("Q", "M"): -0.6, ("S", "H"): -2.6, ("H", "D"): -0.7,
+    ("Q", "I"): -7.0, ("S", "L"): -5.2, ("Y", "M"): -1.3, ("Y", "I"): -2.5,
+    ("Y", "E"): -3.7, ("Y", "A"): -4.0, ("G", "F"): -8.6, ("V", "T"): -1.9,
+    ("Y", "Y"): 10.5, ("V", "H"): -3.9, ("E", "C"): -6.9, ("Y", "Q"): -5.1,
+    ("V", "A"): -0.5, ("C", "C"): 16.1, ("V", "E"): -4.2, ("T", "N"): 0.1,
+    ("R", "K"): 3.2, ("P", "P"): 10.3, ("V", "I"): 3.9, ("R", "G"): -2.8,
+    ("V", "M"): 0.7, ("T", "F"): -5.0, ("R", "C"): -5.6, ("V", "Q"): -3.6,
+    ("K", "K"): 7.6, ("P", "D"): -1.0, ("I", "H"): -5.1, ("M", "F"): -0.6,
+    ("I", "D"): -4.8, ("K", "C"): -8.7, ("P", "L"): -2.8, ("K", "G"): -3.5,
+    ("P", "H"): -4.3, ("T", "R"): -1.4, ("F", "A"): -3.2, ("F", "E"): -6.4,
+    ("S", "S"): 5.8, ("W", "E"): -7.6, ("N", "N"): 8.0, ("W", "M"): -0.9,
+    ("Q", "C"): -6.9, ("N", "F"): -3.8, ("S", "C"): -7.7, ("L", "A"): -3.3,
+    ("S", "G"): -1.3, ("L", "E"): -5.6, ("W", "Q"): -8.2, ("H", "G"): -3.2,
+    ("S", "K"): -1.5, ("Q", "N"): -0.8, ("V", "D"): -5.2, ("H", "C"): -8.2,
+    ("Y", "N"): -1.3, ("Y", "F"): 3.4, ("W", "I"): -3.3, ("C", "A"): -3.4,
+    ("G", "E"): -2.5, ("G", "A"): -0.5, ("Y", "V"): -1.8, ("E", "D"): 2.4,
+    ("W", "H"): -4.0, ("Y", "R"): -2.1, ("N", "I"): -4.7, ("R", "L"): -3.7,
+    ("T", "I"): -3.2, ("Q", "L"): -4.4, ("R", "H"): 0.1, ("T", "M"): -3.2,
+    ("V", "F"): -1.3, ("R", "D"): -3.4, ("T", "A"): -0.8, ("T", "P"): -2.0,
+    ("T", "E"): -0.5, ("V", "N"): -5.7, ("P", "G"): -2.5, ("M", "A"): -1.5,
+    ("K", "H"): 0.1, ("V", "R"): -4.9, ("P", "C"): -8.9, ("M", "E"): -2.8,
+    ("V", "V"): 7.0, ("T", "T"): 6.8, ("M", "I"): 2.6, ("T", "Q"): -0.4,
+    ("I", "G"): -5.5, ("P", "K"): -0.6, ("M", "M"): 11.2, ("K", "D"): -1.5,
+    ("I", "C"): -7.7, ("L", "C"): -8.7, ("F", "F"): 10.4, ("D", "A"): -1.6,
+    ("S", "R"): -0.6, ("W", "D"): -6.0, ("N", "C"): -7.6, ("N", "G"): -1.4,
+    ("W", "T"): -9.3, ("Q", "G"): -2.8, ("S", "F"): -4.8, ("W", "P"): -7.4,
+    ("L", "D"): -8.0, ("H", "F"): -1.7, ("Q", "K"): 1.1, ("S", "N"): 1.0,
+    ("L", "L"): 7.3, ("Q", "F"): -6.4, ("Y", "K"): -3.7, ("Y", "G"): -5.4,
+    ("Y", "C"): -7.7, ("G", "D"): -2.1, ("E", "A"): -0.7, ("Y", "W"): 2.3,
+    ("E", "E"): 8.6, ("Y", "S"): -3.4, ("R", "M"): -4.2, ("V", "C"): -4.8,
+    ("T", "H"): -3.0, ("R", "I"): -5.4, ("V", "G"): -5.6, ("T", "L"): -4.6,
+    ("R", "E"): -0.2, ("V", "K"): -3.7, ("R", "Q"): 2.1, ("R", "A"): -1.6,
+    ("T", "D"): -1.8, ("P", "F"): -5.0, ("V", "S"): -4.3, ("K", "I"): -4.7,
+    ("M", "D"): -5.9, ("W", "W"): 15.2, ("M", "H"): -2.3, ("P", "N"): -2.4,
+    ("I", "F"): 0.5, ("K", "A"): -0.9, ("M", "L"): 4.4, ("K", "E"): 1.1,
+    ("N", "K"): 0.1, ("R", "P"): -3.6, ("L", "F"): 1.8, ("F", "C"): -4.4,
+    ("W", "G"): -6.3, ("W", "L"): -1.0, ("D", "D"): 8.5, ("N", "H"): 1.7,
+    ("S", "Q"): -1.2, ("Q", "P"): -3.6, ("N", "L"): -4.8, ("W", "K"): -5.4,
+    ("Q", "D"): -1.1, ("W", "N"): -6.1, ("S", "A"): 0.0, ("L", "G"): -7.2,
+    ("W", "S"): -6.2, ("S", "E"): -2.2, ("L", "K"): -3.4, ("H", "E"): -2.3,
+    ("S", "I"): -4.7, ("Q", "H"): 1.4, ("H", "A"): -3.1, ("S", "M"): -4.8,
+    ("Y", "L"): -2.4, ("Y", "H"): -0.4, ("Y", "D"): -3.8, ("G", "G"): 8.0,
+    ("G", "C"): -8.2, ("Y", "T"): -2.7, ("W", "C"): -9.1, ("Y", "P"): -7.0,
+    ("T", "K"): -0.2, ("R", "N"): -1.5, ("A", "A"): 6.0, ("W", "A"): -5.8,
+    ("T", "C"): -6.0, ("N", "D"): 2.6, ("R", "F"): -6.0, ("T", "G"): -3.8,
+    ("V", "L"): 1.8, ("V", "P"): -5.2, ("P", "E"): -1.5, ("M", "C"): -4.4,
+    ("I", "I"): 8.1, ("P", "A"): -1.0, ("M", "G"): -5.2, ("T", "S"): 2.0,
+    ("I", "E"): -4.8, ("P", "M"): -9.8, ("M", "K"): -1.9, ("K", "F"): -5.6,
+    ("I", "A"): -2.2, ("P", "I"): -5.7, ("R", "R"): 10.0, ("L", "I"): 2.6,
+    ("F", "D"): -7.0, ("D", "C"): -9.7
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/levin.cmp
+levin = {
+    ("W", "F"): 0, ("L", "R"): -1, ("S", "P"): 0, ("I", "I"): 2,
+    ("Q", "Q"): 2, ("N", "A"): 0, ("H", "T"): 0, ("N", "E"): 0,
+    ("H", "P"): 0, ("W", "V"): 0, ("Q", "E"): 1, ("W", "R"): 0,
+    ("Q", "A"): 0, ("S", "D"): 0, ("H", "H"): 2, ("H", "D"): 0,
+    ("L", "N"): -1, ("W", "A"): -1, ("Y", "M"): 0, ("Y", "I"): 0,
+    ("Y", "E"): -1, ("Y", "A"): -1, ("Y", "Y"): 2, ("Y", "Q"): -1,
+    ("E", "G"): 0, ("A", "D"): 0, ("C", "C"): 2, ("M", "R"): -1,
+    ("V", "E"): -1, ("T", "N"): 0, ("R", "K"): 1, ("P", "P"): 3,
+    ("I", "T"): 0, ("K", "S"): 0, ("R", "G"): 0, ("I", "P"): -1,
+    ("C", "G"): 0, ("C", "S"): 0, ("A", "P"): -1, ("I", "D"): -1,
+    ("M", "I"): 0, ("K", "G"): 0, ("M", "N"): -1, ("F", "Q"): -1,
+    ("I", "V"): 1, ("F", "A"): -1, ("V", "V"): 2, ("F", "E"): -1,
+    ("C", "M"): 0, ("F", "I"): 1, ("F", "M"): 0, ("S", "S"): 2,
+    ("L", "Q"): -1, ("W", "E"): -1, ("N", "N"): 3, ("V", "A"): 0,
+    ("C", "K"): 0, ("W", "M"): 0, ("H", "S"): 0, ("L", "V"): 1,
+    ("L", "A"): 0, ("H", "K"): 0, ("S", "G"): 0, ("L", "E"): -1,
+    ("W", "Q"): -1, ("H", "G"): 0, ("Q", "N"): 1, ("T", "A"): 0,
+    ("L", "M"): 2, ("W", "Y"): 0, ("Y", "N"): -1, ("E", "P"): -1,
+    ("Y", "F"): 1, ("W", "I"): 0, ("R", "S"): 0, ("Y", "V"): 0,
+    ("E", "D"): 1, ("W", "H"): -1, ("Y", "R"): -1, ("M", "Q"): -1,
+    ("A", "G"): 0, ("C", "D"): 0, ("R", "D"): 0, ("C", "H"): 0,
+    ("T", "P"): 0, ("K", "T"): 0, ("V", "N"): -1, ("P", "G"): 0,
+    ("M", "A"): 0, ("C", "P"): 0, ("V", "R"): -1, ("M", "V"): 0,
+    ("M", "E"): -1, ("C", "T"): 0, ("I", "K"): -1, ("R", "T"): 0,
+    ("T", "Q"): 0, ("I", "G"): -1, ("R", "P"): 0, ("K", "D"): 0,
+    ("F", "R"): -1, ("F", "V"): 0, ("K", "P"): 0, ("L", "C"): 0,
+    ("F", "F"): 2, ("F", "N"): -1, ("V", "D"): -1, ("L", "P"): -1,
+    ("W", "K"): -1, ("L", "T"): 0, ("I", "N"): -1, ("I", "S"): -1,
+    ("H", "R"): 0, ("N", "G"): 0, ("C", "I"): 0, ("H", "N"): 0,
+    ("W", "T"): -1, ("Q", "G"): 0, ("W", "P"): -1, ("L", "D"): -1,
+    ("L", "H"): -1, ("S", "N"): 0, ("L", "L"): 2, ("M", "M"): 2,
+    ("Y", "K"): -1, ("Y", "G"): -1, ("Y", "C"): -1, ("E", "E"): 2,
+    ("Y", "S"): -1, ("M", "P"): -1, ("C", "A"): 0, ("M", "T"): 0,
+    ("V", "G"): -1, ("C", "E"): 0, ("R", "E"): 0, ("V", "K"): -1,
+    ("K", "Q"): 0, ("R", "A"): 0, ("I", "R"): -1, ("T", "D"): 0,
+    ("V", "S"): -1, ("C", "Q"): 0, ("M", "D"): -1, ("W", "W"): 2,
+    ("M", "H"): -1, ("T", "G"): 0, ("K", "A"): 0, ("R", "Q"): 0,
+    ("T", "T"): 2, ("F", "S"): -1, ("D", "P"): 0, ("F", "C"): -1,
+    ("W", "L"): 0, ("F", "G"): -1, ("F", "K"): -1, ("D", "D"): 2,
+    ("L", "S"): -1, ("W", "G"): -1, ("S", "Q"): 0, ("Q", "P"): 0,
+    ("W", "C"): -1, ("N", "D"): 1, ("H", "Q"): 0, ("Q", "D"): 0,
+    ("W", "N"): -1, ("S", "A"): 1, ("L", "G"): -1, ("W", "S"): -1,
+    ("S", "E"): 0, ("L", "K"): -1, ("N", "P"): 0, ("H", "E"): 0,
+    ("H", "A"): 0, ("Y", "L"): 0, ("Y", "H"): 0, ("Y", "D"): -1,
+    ("V", "Q"): -1, ("G", "G"): 2, ("Y", "T"): -1, ("R", "N"): 0,
+    ("Y", "P"): -1, ("A", "E"): 1, ("C", "V"): 0, ("M", "S"): -1,
+    ("A", "A"): 2, ("V", "H"): -1, ("T", "E"): 0, ("C", "N"): 0,
+    ("I", "Q"): -1, ("C", "R"): 0, ("V", "P"): -1, ("K", "E"): 0,
+    ("K", "N"): 1, ("V", "T"): 0, ("M", "G"): -1, ("T", "S"): 0,
+    ("I", "E"): -1, ("M", "K"): -1, ("I", "A"): 0, ("R", "R"): 2,
+    ("F", "P"): -1, ("L", "I"): 0, ("W", "D"): -1, ("F", "T"): -1,
+    ("K", "K"): 2, ("I", "H"): -1, ("F", "D"): -1, ("F", "H"): -1,
+    ("D", "G"): 0, ("F", "L"): 0
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/mclach.cmp
+mclach = {
+    ("N", "I"): 1, ("K", "V"): 2, ("S", "P"): 3, ("N", "M"): 2,
+    ("L", "V"): 5, ("N", "A"): 3, ("H", "T"): 4, ("N", "E"): 4,
+    ("Q", "Y"): 1, ("H", "P"): 3, ("W", "V"): 2, ("H", "L"): 2,
+    ("Q", "A"): 3, ("H", "H"): 8, ("N", "Q"): 4, ("Q", "M"): 3,
+    ("Q", "I"): 0, ("S", "L"): 2, ("G", "V"): 2, ("Y", "M"): 2,
+    ("K", "L"): 2, ("Y", "I"): 3, ("E", "S"): 4, ("K", "A"): 3,
+    ("E", "W"): 1, ("G", "F"): 0, ("E", "K"): 4, ("Y", "Y"): 9,
+    ("K", "F"): 0, ("E", "G"): 3, ("C", "C"): 9, ("C", "G"): 1,
+    ("M", "V"): 4, ("P", "P"): 8, ("A", "L"): 2, ("K", "S"): 3,
+    ("R", "G"): 3, ("K", "W"): 1, ("R", "C"): 1, ("I", "L"): 5,
+    ("C", "S"): 2, ("C", "W"): 2, ("K", "C"): 0, ("R", "W"): 3,
+    ("P", "L"): 1, ("K", "G"): 3, ("R", "S"): 4, ("D", "R"): 1,
+    ("D", "V"): 1, ("D", "N"): 5, ("F", "I"): 3, ("F", "M"): 5,
+    ("D", "F"): 1, ("S", "S"): 8, ("Q", "V"): 2, ("S", "W"): 3,
+    ("Q", "R"): 5, ("N", "N"): 8, ("H", "W"): 3, ("W", "M"): 1,
+    ("H", "S"): 3, ("W", "I"): 3, ("T", "F"): 1, ("Q", "F"): 0,
+    ("S", "G"): 3, ("H", "G"): 2, ("C", "P"): 0, ("N", "R"): 3,
+    ("H", "C"): 3, ("N", "V"): 1, ("E", "P"): 4, ("Y", "F"): 6,
+    ("E", "T"): 4, ("G", "Y"): 0, ("E", "H"): 2, ("E", "L"): 1,
+    ("Y", "V"): 3, ("G", "M"): 1, ("G", "I"): 1, ("P", "W"): 0,
+    ("R", "L"): 2, ("T", "I"): 3, ("A", "G"): 3, ("R", "H"): 5,
+    ("T", "M"): 3, ("T", "A"): 3, ("K", "T"): 3, ("P", "G"): 3,
+    ("T", "Y"): 1, ("A", "W"): 1, ("C", "T"): 2, ("V", "V"): 8,
+    ("R", "T"): 3, ("R", "P"): 3, ("D", "Y"): 1, ("F", "V"): 3,
+    ("D", "Q"): 4, ("K", "P"): 3, ("D", "I"): 1, ("D", "M"): 2,
+    ("F", "F"): 9, ("D", "A"): 3, ("D", "E"): 5, ("N", "K"): 4,
+    ("Q", "W"): 2, ("S", "V"): 2, ("Q", "S"): 4, ("H", "V"): 2,
+    ("W", "L"): 3, ("N", "G"): 3, ("Q", "G"): 2, ("S", "F"): 2,
+    ("Q", "C"): 0, ("H", "F"): 4, ("N", "S"): 5, ("Q", "K"): 4,
+    ("N", "W"): 0, ("L", "L"): 8, ("E", "Y"): 2, ("M", "M"): 8,
+    ("E", "Q"): 5, ("E", "I"): 1, ("E", "M"): 1, ("E", "A"): 4,
+    ("G", "L"): 1, ("Y", "W"): 6, ("E", "E"): 8, ("R", "M"): 1,
+    ("P", "V"): 2, ("A", "F"): 1, ("C", "A"): 1, ("R", "I"): 1,
+    ("T", "L"): 3, ("I", "V"): 5, ("C", "I"): 1, ("R", "A"): 2,
+    ("C", "Y"): 1, ("C", "M"): 3, ("P", "F"): 1, ("A", "V"): 3,
+    ("K", "I"): 1, ("R", "Y"): 2, ("K", "M"): 1, ("K", "H"): 4,
+    ("T", "P"): 3, ("M", "L"): 6, ("T", "T"): 8, ("C", "L"): 0,
+    ("D", "P"): 3, ("N", "F"): 0, ("K", "Y"): 1, ("D", "T"): 3,
+    ("D", "H"): 4, ("D", "L"): 1, ("K", "K"): 8, ("D", "D"): 8,
+    ("Q", "T"): 3, ("N", "H"): 4, ("Q", "P"): 3, ("N", "L"): 1,
+    ("H", "Y"): 4, ("S", "Y"): 3, ("W", "W"): 9, ("H", "M"): 3,
+    ("S", "A"): 4, ("H", "I"): 2, ("Q", "L"): 3, ("N", "P"): 1,
+    ("S", "I"): 2, ("Q", "H"): 4, ("N", "T"): 3, ("H", "A"): 3,
+    ("S", "M"): 2, ("Y", "L"): 3, ("G", "W"): 1, ("E", "R"): 3,
+    ("E", "V"): 2, ("G", "G"): 8, ("T", "V"): 3, ("E", "F"): 0,
+    ("C", "F"): 0, ("A", "A"): 8, ("K", "R"): 5, ("A", "M"): 3,
+    ("Q", "Q"): 8, ("R", "F"): 1, ("T", "G"): 2, ("A", "I"): 2,
+    ("P", "Y"): 0, ("C", "V"): 1, ("I", "I"): 8, ("P", "A"): 4,
+    ("T", "S"): 5, ("P", "M"): 1, ("R", "V"): 2, ("T", "W"): 2,
+    ("A", "Y"): 1, ("P", "I"): 1, ("R", "R"): 8, ("N", "Y"): 2,
+    ("D", "S"): 3, ("D", "W"): 0, ("M", "I"): 5, ("D", "K"): 3,
+    ("N", "C"): 1, ("E", "C"): 0, ("D", "C"): 1, ("D", "G"): 3,
+    ("F", "L"): 5, ("W", "F"): 6
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/miyata.cmp
+# This similarity matrix is converted from the original miyata physicochemical distance matrix (PMID: 439147) via this formula: similarity = 1.25 - distance
+# The offset 1.25 is chosen by Dr. Gerhard Vogt. Be cautious about this offset since there is no known solid reason to use it.
+miyata = {
+    ("W", "F"): 0.14, ("L", "R"): -1.37, ("S", "P"): 0.69, ("I", "I"): 1.25,
+    ("Q", "Q"): 1.25, ("N", "A"): -0.53, ("H", "T"): -0.07, ("N", "E"): 0.4,
+    ("H", "P"): -0.9, ("W", "V"): -1.26, ("W", "R"): -1.47, ("Q", "A"): -0.67,
+    ("H", "H"): 1.25, ("N", "Q"): 0.26, ("H", "D"): -0.47, ("L", "N"): -2.24,
+    ("Y", "M"): 0.32, ("Y", "I"): 0.39, ("Y", "E"): -1.97, ("E", "S"): -0.81,
+    ("Y", "A"): -1.93, ("Y", "Y"): 1.25, ("E", "C"): -2.01, ("Y", "Q"): -1.23,
+    ("E", "G"): -1.53, ("V", "A"): -0.6, ("C", "C"): 1.25, ("M", "R"): -1.04,
+    ("V", "E"): -1.72, ("R", "K"): 0.85, ("P", "P"): 1.25, ("I", "T"): -0.89,
+    ("K", "S"): -1.46, ("R", "G"): -2.33, ("I", "P"): -1.37, ("R", "C"): -1.81,
+    ("V", "Q"): -0.88, ("K", "K"): 1.25, ("A", "P"): 1.19, ("I", "D"): -2.73,
+    ("K", "C"): -2.02, ("M", "I"): 0.96, ("K", "G"): -2.29, ("R", "S"): -1.49,
+    ("F", "Q"): -1.56, ("I", "V"): 0.4, ("M", "V"): 0.63, ("F", "A"): -1.98,
+    ("V", "V"): 1.25, ("M", "N"): -1.83, ("F", "E"): -2.34, ("D", "N"): 0.6,
+    ("F", "I"): 0.64, ("F", "M"): 0.43, ("M", "S"): -1.42, ("S", "S"): 1.25,
+    ("L", "Q"): -1.45, ("W", "E"): -2.83, ("W", "A"): -2.98, ("W", "M"): -0.64,
+    ("H", "S"): -0.69, ("L", "V"): 0.34, ("S", "C"): -0.59, ("L", "A"): -1.51,
+    ("S", "G"): 0.4, ("L", "E"): -2.28, ("W", "Q"): -2.17, ("H", "G"): -1.53,
+    ("H", "C"): -1.31, ("W", "Y"): 0.19, ("Y", "N"): -2.17, ("E", "P"): -1.23,
+    ("I", "L"): 1.11, ("E", "T"): -0.58, ("W", "I"): -0.47, ("A", "A"): 1.25,
+    ("I", "N"): -2.12, ("G", "A"): 0.34, ("Y", "V"): -0.27, ("W", "H"): -1.91,
+    ("Y", "R"): -0.77, ("M", "Q"): -1.05, ("R", "H"): 0.43, ("A", "C"): -0.14,
+    ("R", "D"): -1.09, ("T", "A"): 0.35, ("T", "P"): 0.38, ("L", "D"): -2.85,
+    ("K", "T"): -0.85, ("V", "N"): -1.51, ("M", "A"): -1.17, ("K", "H"): 0.46,
+    ("V", "R"): -1.18, ("P", "C"): -0.08, ("M", "E"): -1.88, ("I", "K"): -1.59,
+    ("T", "T"): 1.25, ("R", "T"): -0.78, ("I", "G"): -2.35, ("R", "P"): -1.65,
+    ("K", "D"): -0.8, ("I", "C"): -0.38, ("F", "R"): -1.22, ("F", "V"): -0.18,
+    ("D", "Q"): -0.22, ("K", "P"): -1.69, ("F", "F"): 1.25, ("D", "A"): -1.12,
+    ("D", "E"): 0.35, ("F", "N"): -2.45, ("W", "D"): -3.63, ("L", "P"): -1.45,
+    ("Q", "S"): -0.4, ("N", "C"): -1.58, ("N", "G"): -0.71, ("H", "N"): -0.04,
+    ("W", "T"): -2.25, ("Q", "G"): -1.23, ("W", "P"): -2.92, ("Q", "C"): -1.23,
+    ("N", "S"): -0.06, ("L", "H"): -1.34, ("L", "L"): 1.25, ("M", "M"): 1.25,
+    ("G", "P"): 0.28, ("Y", "K"): -1.17, ("E", "Q"): 0.41, ("Y", "G"): -2.83,
+    ("Y", "C"): -1.13, ("E", "A"): -1.21, ("E", "E"): 1.25, ("Y", "S"): -2.08,
+    ("M", "P"): -1.11, ("V", "C"): 0.39, ("M", "T"): -0.61, ("V", "G"): -1.51,
+    ("R", "E"): -0.2, ("V", "K"): -1.45, ("K", "Q"): 0.19, ("R", "A"): -1.67,
+    ("I", "R"): -1.24, ("V", "S"): -0.9, ("M", "L"): 0.84, ("M", "D"): -2.44,
+    ("W", "W"): 1.25, ("M", "H"): -0.94, ("K", "A"): -1.71, ("R", "Q"): 0.12,
+    ("K", "E"): 0.11, ("F", "S"): -2.2, ("D", "P"): -1.15, ("D", "T"): -0.8,
+    ("F", "C"): -0.99, ("W", "L"): -0.48, ("F", "G"): -2.89, ("F", "K"): -1.6,
+    ("F", "T"): -1.35, ("D", "D"): 1.25, ("Q", "T"): 0.13, ("W", "G"): -3.88,
+    ("Q", "P"): -0.67, ("W", "C"): -2.09, ("W", "K"): -1.86, ("H", "Q"): 0.93,
+    ("L", "C"): -0.4, ("W", "N"): -3.14, ("S", "A"): 0.74, ("L", "G"): -2.42,
+    ("W", "S"): -3.13, ("L", "K"): -1.73, ("N", "P"): -0.55, ("H", "E"): 0.29,
+    ("N", "T"): -0.15, ("H", "A"): -0.92, ("Y", "L"): 0.31, ("Y", "H"): -1.02,
+    ("Y", "D"): -2.7, ("L", "T"): -1.0, ("G", "G"): 1.25, ("G", "C"): -0.97,
+    ("Y", "T"): -1.2, ("Y", "P"): -1.87, ("R", "N"): -0.79, ("V", "D"): -2.15,
+    ("T", "C"): -0.2, ("V", "H"): -0.86, ("T", "G"): -0.45, ("I", "Q"): -1.32,
+    ("V", "P"): -0.54, ("M", "C"): -0.21, ("K", "N"): -0.59, ("V", "T"): -0.17,
+    ("M", "G"): -2.09, ("T", "S"): 0.36, ("I", "E"): -2.14, ("M", "K"): -1.38,
+    ("I", "A"): -1.44, ("N", "N"): 1.25, ("R", "R"): 1.25, ("F", "P"): -1.92,
+    ("I", "S"): -1.7, ("D", "S"): -0.62, ("Y", "F"): 0.77, ("L", "S"): -1.79,
+    ("I", "H"): -1.2, ("F", "D"): -3.02, ("D", "C"): -2.23, ("F", "H"): -1.38,
+    ("D", "G"): -1.12, ("F", "L"): 0.62
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/nwsgappep.cmp
+nwsgappep = {
+    ("W", "F"): 1.3, ("S", "P"): 0.4, ("W", "B"): -0.7, ("N", "N"): 1.5,
+    ("N", "A"): 0.2, ("N", "E"): 0.5, ("Z", "Y"): -0.6, ("W", "V"): -0.8,
+    ("L", "B"): -0.5, ("W", "R"): 1.4, ("Q", "A"): 0.2, ("S", "D"): 0.2,
+    ("H", "H"): 1.5, ("Q", "M"): 0.0, ("S", "H"): -0.2, ("H", "D"): 0.4,
+    ("Q", "I"): -0.3, ("S", "L"): -0.4, ("Y", "M"): -0.1, ("Q", "E"): 0.7,
+    ("Y", "I"): 0.1, ("Y", "E"): -0.5, ("Y", "A"): -0.3, ("G", "F"): -0.6,
+    ("V", "T"): 0.2, ("G", "B"): 0.6, ("Y", "Y"): 1.5, ("N", "L"): -0.4,
+    ("E", "C"): -0.6, ("Y", "Q"): -0.6, ("Z", "Z"): 1.1, ("V", "A"): 0.2,
+    ("C", "C"): 1.5, ("V", "E"): -0.2, ("T", "N"): 0.2, ("R", "K"): 0.8,
+    ("P", "P"): 1.5, ("V", "I"): 1.1, ("T", "B"): 0.2, ("R", "G"): -0.3,
+    ("V", "M"): 0.6, ("T", "F"): -0.3, ("R", "C"): -0.3, ("V", "Q"): -0.2,
+    ("K", "K"): 1.5, ("M", "B"): -0.3, ("P", "D"): 0.1, ("I", "H"): -0.3,
+    ("M", "F"): 0.5, ("I", "D"): -0.2, ("K", "C"): -0.6, ("L", "L"): 1.5,
+    ("K", "G"): -0.1, ("P", "H"): 0.2, ("Z", "G"): 0.3, ("W", "M"): -0.3,
+    ("Z", "C"): -0.6, ("T", "R"): -0.1, ("Z", "K"): 0.4, ("F", "A"): -0.5,
+    ("Z", "W"): -0.8, ("F", "E"): -0.7, ("Z", "S"): 0.0, ("D", "B"): 1.1,
+    ("S", "S"): 1.5, ("W", "E"): -1.1, ("W", "A"): -0.8, ("N", "B"): 1.1,
+    ("Q", "C"): -0.6, ("Z", "Q"): 1.1, ("N", "F"): -0.5, ("S", "C"): 0.7,
+    ("Q", "F"): -0.8, ("S", "G"): 0.6, ("Q", "B"): 0.5, ("W", "Q"): -0.5,
+    ("H", "G"): -0.2, ("S", "K"): 0.2, ("L", "I"): 0.8, ("V", "D"): -0.2,
+    ("H", "C"): -0.1, ("L", "E"): -0.3, ("Y", "N"): -0.1, ("Y", "F"): 1.4,
+    ("W", "I"): -0.5, ("C", "A"): 0.3, ("G", "E"): 0.5, ("G", "A"): 0.7,
+    ("Y", "V"): -0.1, ("E", "D"): 1.0, ("W", "H"): -0.1, ("Y", "R"): -0.6,
+    ("N", "I"): -0.3, ("R", "L"): -0.4, ("T", "I"): 0.2, ("V", "B"): -0.2,
+    ("R", "H"): 0.5, ("T", "M"): 0.0, ("V", "F"): 0.2, ("R", "D"): 0.0,
+    ("T", "A"): 0.4, ("T", "P"): 0.3, ("T", "E"): 0.2, ("V", "N"): -0.3,
+    ("P", "G"): 0.3, ("M", "A"): 0.0, ("K", "H"): 0.1, ("V", "R"): -0.3,
+    ("P", "C"): 0.1, ("M", "E"): -0.2, ("V", "V"): 1.5, ("T", "T"): 1.5,
+    ("M", "I"): 0.6, ("T", "Q"): -0.1, ("I", "G"): -0.3, ("P", "K"): 0.1,
+    ("M", "M"): 1.5, ("K", "D"): 0.3, ("I", "C"): 0.2, ("Z", "D"): 0.9,
+    ("Y", "W"): 1.1, ("Z", "L"): -0.2, ("P", "L"): -0.3, ("Z", "I"): -0.2,
+    ("Z", "T"): 0.1, ("L", "C"): -0.8, ("F", "B"): -0.7, ("Z", "P"): 0.2,
+    ("F", "F"): 1.5, ("D", "A"): 0.3, ("S", "R"): 0.1, ("W", "D"): -1.1,
+    ("R", "R"): 1.5, ("W", "K"): 0.1, ("N", "M"): -0.3, ("N", "C"): -0.3,
+    ("N", "G"): 0.4, ("S", "B"): 0.3, ("W", "T"): -0.6, ("Q", "G"): 0.2,
+    ("S", "F"): -0.3, ("W", "P"): -0.8, ("L", "D"): -0.5, ("H", "F"): -0.1,
+    ("L", "H"): -0.2, ("S", "N"): 0.3, ("H", "B"): 0.4, ("Q", "K"): 0.4,
+    ("R", "P"): 0.3, ("Y", "K"): -0.6, ("Y", "B"): -0.3, ("Y", "G"): -0.7,
+    ("Y", "C"): 1.0, ("G", "D"): 0.7, ("E", "A"): 0.3, ("T", "S"): 0.3,
+    ("E", "E"): 1.5, ("Y", "S"): -0.4, ("R", "M"): 0.2, ("V", "C"): 0.2,
+    ("T", "H"): -0.1, ("R", "I"): -0.3, ("V", "S"): -0.1, ("V", "G"): 0.2,
+    ("T", "L"): -0.1, ("R", "E"): 0.0, ("V", "K"): -0.2, ("R", "Q"): 0.4,
+    ("R", "A"): -0.3, ("Z", "H"): 0.5, ("T", "D"): 0.2, ("P", "F"): -0.7,
+    ("L", "A"): -0.1, ("K", "I"): -0.2, ("M", "D"): -0.4, ("P", "B"): 0.1,
+    ("W", "W"): 1.5, ("M", "H"): -0.3, ("P", "N"): 0.0, ("I", "F"): 0.7,
+    ("K", "A"): 0.0, ("M", "L"): 1.3, ("I", "B"): -0.2, ("K", "E"): 0.3,
+    ("Z", "E"): 1.1, ("Q", "N"): 0.4, ("Z", "A"): 0.2, ("Z", "M"): -0.1,
+    ("L", "F"): 1.2, ("F", "C"): -0.1, ("W", "G"): -1.0, ("W", "L"): 0.5,
+    ("C", "B"): -0.4, ("B", "A"): 0.2, ("D", "D"): 1.5, ("N", "H"): 0.5,
+    ("S", "Q"): -0.1, ("Q", "P"): 0.3, ("W", "C"): -1.2, ("N", "D"): 0.7,
+    ("Q", "D"): 0.7, ("W", "N"): -0.3, ("S", "A"): 0.4, ("L", "G"): -0.5,
+    ("W", "S"): 0.3, ("S", "E"): 0.2, ("L", "K"): -0.3, ("H", "E"): 0.4,
+    ("S", "I"): -0.1, ("Q", "H"): 0.7, ("H", "A"): -0.1, ("S", "M"): -0.3,
+    ("Y", "L"): 0.3, ("Y", "H"): 0.3, ("Y", "D"): -0.5, ("G", "G"): 1.5,
+    ("G", "C"): 0.2, ("Y", "T"): -0.3, ("E", "B"): 0.7, ("Y", "P"): -0.8,
+    ("T", "K"): 0.2, ("R", "N"): 0.1, ("A", "A"): 1.5, ("N", "K"): 0.4,
+    ("T", "C"): 0.2, ("V", "H"): -0.3, ("Q", "Q"): 1.5, ("R", "F"): -0.5,
+    ("T", "G"): 0.4, ("V", "L"): 0.8, ("R", "B"): 0.1, ("V", "P"): 0.1,
+    ("P", "E"): 0.1, ("M", "C"): -0.6, ("I", "I"): 1.5, ("P", "A"): 0.5,
+    ("M", "G"): -0.3, ("K", "B"): 0.4, ("I", "E"): -0.2, ("P", "M"): -0.2,
+    ("M", "K"): 0.2, ("K", "F"): -0.7, ("I", "A"): 0.0, ("P", "I"): -0.2,
+    ("Q", "L"): -0.1, ("Z", "F"): -0.7, ("Z", "B"): 0.6, ("Z", "N"): 0.4,
+    ("Z", "V"): -0.2, ("F", "D"): -1.0, ("Z", "R"): 0.2, ("D", "C"): -0.5,
+    ("B", "B"): 1.1
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/pam120.cmp
+pam120 = {
+    ("W", "F"): -1, ("L", "R"): -4, ("S", "P"): 1, ("V", "T"): 0,
+    ("Q", "Q"): 6, ("N", "A"): -1, ("Z", "Y"): -5, ("W", "R"): 1,
+    ("Q", "A"): -1, ("S", "D"): 0, ("H", "H"): 7, ("S", "H"): -2,
+    ("H", "D"): 0, ("L", "N"): -4, ("W", "A"): -7, ("Y", "M"): -4,
+    ("G", "R"): -4, ("Y", "I"): -2, ("Y", "E"): -5, ("B", "Y"): -3,
+    ("Y", "A"): -4, ("V", "D"): -3, ("B", "S"): 0, ("Y", "Y"): 8,
+    ("G", "N"): 0, ("E", "C"): -7, ("Y", "Q"): -5, ("Z", "Z"): 4,
+    ("V", "A"): 0, ("C", "C"): 9, ("M", "R"): -1, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 6, ("V", "I"): 3, ("V", "S"): -2,
+    ("Z", "P"): -1, ("V", "M"): 1, ("T", "F"): -4, ("V", "Q"): -3,
+    ("K", "K"): 5, ("P", "D"): -3, ("I", "H"): -4, ("I", "D"): -3,
+    ("T", "R"): -2, ("P", "L"): -3, ("K", "G"): -3, ("M", "N"): -3,
+    ("P", "H"): -1, ("F", "Q"): -6, ("Z", "G"): -2, ("X", "L"): -2,
+    ("T", "M"): -1, ("Z", "C"): -7, ("X", "H"): -2, ("D", "R"): -3,
+    ("B", "W"): -6, ("X", "D"): -2, ("Z", "K"): -1, ("F", "A"): -4,
+    ("Z", "W"): -7, ("F", "E"): -7, ("D", "N"): 2, ("B", "K"): 0,
+    ("X", "X"): -2, ("F", "I"): 0, ("B", "G"): 0, ("X", "T"): -1,
+    ("F", "M"): -1, ("B", "C"): -6, ("Z", "I"): -3, ("Z", "V"): -3,
+    ("S", "S"): 3, ("L", "Q"): -2, ("W", "E"): -8, ("Q", "R"): 1,
+    ("N", "N"): 4, ("W", "M"): -6, ("Q", "C"): -7, ("W", "I"): -6,
+    ("S", "C"): 0, ("L", "A"): -3, ("S", "G"): 1, ("L", "E"): -4,
+    ("W", "Q"): -6, ("H", "G"): -4, ("S", "K"): -1, ("Q", "N"): 0,
+    ("N", "R"): -1, ("H", "C"): -4, ("Y", "N"): -2, ("G", "Q"): -3,
+    ("Y", "F"): 4, ("C", "A"): -3, ("V", "L"): 1, ("G", "E"): -1,
+    ("G", "A"): 1, ("K", "R"): 2, ("E", "D"): 3, ("Y", "R"): -5,
+    ("M", "Q"): -1, ("T", "I"): 0, ("C", "D"): -7, ("V", "F"): -3,
+    ("T", "A"): 1, ("T", "P"): -1, ("B", "P"): -2, ("T", "E"): -2,
+    ("V", "N"): -3, ("P", "G"): -2, ("M", "A"): -2, ("K", "H"): -2,
+    ("V", "R"): -3, ("P", "C"): -4, ("M", "E"): -3, ("K", "L"): -4,
+    ("V", "V"): 5, ("M", "I"): 1, ("T", "Q"): -2, ("I", "G"): -4,
+    ("P", "K"): -2, ("M", "M"): 8, ("K", "D"): -1, ("I", "C"): -3,
+    ("Z", "D"): 3, ("F", "R"): -5, ("X", "K"): -2, ("Q", "D"): 1,
+    ("X", "G"): -2, ("Z", "L"): -3, ("X", "C"): -4, ("Z", "H"): 1,
+    ("B", "L"): -4, ("B", "H"): 1, ("F", "F"): 8, ("X", "W"): -5,
+    ("B", "D"): 4, ("D", "A"): 0, ("S", "L"): -4, ("X", "S"): -1,
+    ("F", "N"): -4, ("S", "R"): -1, ("W", "D"): -8, ("V", "Y"): -3,
+    ("W", "L"): -3, ("H", "R"): 1, ("W", "H"): -3, ("H", "N"): 2,
+    ("W", "T"): -6, ("T", "T"): 4, ("S", "F"): -3, ("W", "P"): -7,
+    ("L", "D"): -5, ("B", "I"): -3, ("L", "H"): -3, ("S", "N"): 1,
+    ("B", "T"): 0, ("L", "L"): 5, ("Y", "K"): -5, ("E", "Q"): 2,
+    ("Y", "G"): -6, ("Z", "S"): -1, ("Y", "C"): -1, ("G", "D"): 0,
+    ("B", "V"): -3, ("E", "A"): 0, ("Y", "W"): -2, ("E", "E"): 5,
+    ("Y", "S"): -3, ("C", "N"): -5, ("V", "C"): -3, ("T", "H"): -3,
+    ("P", "R"): -1, ("V", "G"): -2, ("T", "L"): -3, ("V", "K"): -4,
+    ("K", "Q"): 0, ("R", "A"): -3, ("I", "R"): -2, ("T", "D"): -1,
+    ("P", "F"): -5, ("I", "N"): -2, ("K", "I"): -3, ("M", "D"): -4,
+    ("V", "W"): -8, ("W", "W"): 12, ("M", "H"): -4, ("P", "N"): -2,
+    ("K", "A"): -2, ("M", "L"): 3, ("K", "E"): -1, ("Z", "E"): 4,
+    ("X", "N"): -1, ("Z", "A"): -1, ("Z", "M"): -2, ("X", "F"): -3,
+    ("K", "C"): -7, ("B", "Q"): 0, ("X", "B"): -1, ("B", "M"): -4,
+    ("F", "C"): -6, ("Z", "Q"): 4, ("X", "Z"): -1, ("F", "G"): -5,
+    ("B", "E"): 3, ("X", "V"): -1, ("F", "K"): -7, ("B", "A"): 0,
+    ("X", "R"): -2, ("D", "D"): 5, ("W", "G"): -8, ("Z", "F"): -6,
+    ("S", "Q"): -2, ("W", "C"): -8, ("W", "K"): -5, ("H", "Q"): 3,
+    ("L", "C"): -7, ("W", "N"): -4, ("S", "A"): 1, ("L", "G"): -5,
+    ("W", "S"): -2, ("S", "E"): -1, ("H", "E"): -1, ("S", "I"): -2,
+    ("H", "A"): -3, ("S", "M"): -2, ("Y", "L"): -2, ("Y", "H"): -1,
+    ("Y", "D"): -5, ("E", "R"): -3, ("X", "P"): -2, ("G", "G"): 5,
+    ("G", "C"): -4, ("E", "N"): 1, ("Y", "T"): -3, ("Y", "P"): -6,
+    ("T", "K"): -1, ("A", "A"): 3, ("P", "Q"): 0, ("T", "C"): -3,
+    ("V", "H"): -3, ("T", "G"): -1, ("I", "Q"): -3, ("Z", "T"): -2,
+    ("C", "R"): -4, ("V", "P"): -2, ("P", "E"): -2, ("M", "C"): -6,
+    ("K", "N"): 1, ("I", "I"): 6, ("P", "A"): 1, ("M", "G"): -4,
+    ("T", "S"): 2, ("I", "E"): -3, ("P", "M"): -3, ("M", "K"): 0,
+    ("I", "A"): -1, ("P", "I"): -3, ("R", "R"): 6, ("X", "M"): -2,
+    ("L", "I"): 1, ("X", "I"): -1, ("Z", "B"): 2, ("X", "E"): -1,
+    ("Z", "N"): 0, ("X", "A"): -1, ("B", "R"): -2, ("B", "N"): 3,
+    ("F", "D"): -7, ("X", "Y"): -3, ("Z", "R"): -1, ("F", "H"): -3,
+    ("B", "F"): -5, ("F", "L"): 0, ("X", "Q"): -1, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/pam180.cmp
+pam180 = {
+    ("W", "F"): 0, ("L", "R"): -4, ("S", "P"): 1, ("V", "T"): 0,
+    ("Q", "Q"): 6, ("N", "A"): 0, ("Z", "Y"): -6, ("W", "R"): 2,
+    ("Q", "A"): -1, ("S", "D"): 0, ("H", "H"): 8, ("S", "H"): -2,
+    ("H", "D"): 0, ("L", "N"): -4, ("W", "A"): -8, ("Y", "M"): -4,
+    ("G", "R"): -4, ("Y", "I"): -2, ("Y", "E"): -6, ("B", "Y"): -4,
+    ("Y", "A"): -5, ("V", "D"): -3, ("B", "S"): 1, ("Y", "Y"): 11,
+    ("G", "N"): 0, ("E", "C"): -7, ("Y", "Q"): -6, ("Z", "Z"): 5,
+    ("V", "A"): 0, ("C", "C"): 13, ("M", "R"): -1, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 8, ("V", "I"): 5, ("V", "S"): -2,
+    ("Z", "P"): -1, ("V", "M"): 2, ("T", "F"): -4, ("V", "Q"): -3,
+    ("K", "K"): 6, ("P", "D"): -2, ("I", "H"): -4, ("I", "D"): -3,
+    ("T", "R"): -2, ("P", "L"): -4, ("K", "G"): -3, ("M", "N"): -3,
+    ("P", "H"): -1, ("F", "Q"): -6, ("Z", "G"): -1, ("X", "L"): -2,
+    ("T", "M"): -1, ("Z", "C"): -7, ("X", "H"): -1, ("D", "R"): -3,
+    ("B", "W"): -7, ("X", "D"): -1, ("Z", "K"): 0, ("F", "A"): -5,
+    ("Z", "W"): -8, ("F", "E"): -7, ("D", "N"): 3, ("B", "K"): 0,
+    ("X", "X"): -1, ("F", "I"): 1, ("B", "G"): 0, ("X", "T"): -1,
+    ("F", "M"): 0, ("B", "C"): -6, ("Z", "I"): -3, ("Z", "V"): -3,
+    ("S", "S"): 3, ("L", "Q"): -2, ("W", "E"): -9, ("Q", "R"): 1,
+    ("N", "N"): 4, ("W", "M"): -6, ("Q", "C"): -7, ("W", "I"): -7,
+    ("S", "C"): 0, ("L", "A"): -3, ("S", "G"): 1, ("L", "E"): -5,
+    ("W", "Q"): -6, ("H", "G"): -3, ("S", "K"): -1, ("Q", "N"): 0,
+    ("N", "R"): -1, ("H", "C"): -4, ("Y", "N"): -2, ("G", "Q"): -2,
+    ("Y", "F"): 7, ("C", "A"): -3, ("V", "L"): 2, ("G", "E"): 0,
+    ("G", "A"): 1, ("K", "R"): 4, ("E", "D"): 4, ("Y", "R"): -6,
+    ("M", "Q"): -1, ("T", "I"): 0, ("C", "D"): -7, ("V", "F"): -2,
+    ("T", "A"): 2, ("T", "P"): 0, ("B", "P"): -2, ("T", "E"): -1,
+    ("V", "N"): -3, ("P", "G"): -1, ("M", "A"): -2, ("K", "H"): -1,
+    ("V", "R"): -4, ("P", "C"): -4, ("M", "E"): -3, ("K", "L"): -4,
+    ("V", "V"): 6, ("M", "I"): 2, ("T", "Q"): -2, ("I", "G"): -4,
+    ("P", "K"): -2, ("M", "M"): 9, ("K", "D"): 0, ("I", "C"): -3,
+    ("Z", "D"): 3, ("F", "R"): -6, ("X", "K"): -1, ("Q", "D"): 2,
+    ("X", "G"): -2, ("Z", "L"): -3, ("X", "C"): -4, ("Z", "H"): 2,
+    ("B", "L"): -5, ("B", "H"): 1, ("F", "F"): 10, ("X", "W"): -6,
+    ("B", "D"): 4, ("D", "A"): 0, ("S", "L"): -4, ("X", "S"): 0,
+    ("F", "N"): -5, ("S", "R"): -1, ("W", "D"): -9, ("V", "Y"): -4,
+    ("W", "L"): -3, ("H", "R"): 2, ("W", "H"): -4, ("H", "N"): 2,
+    ("W", "T"): -7, ("T", "T"): 4, ("S", "F"): -4, ("W", "P"): -7,
+    ("L", "D"): -6, ("B", "I"): -3, ("L", "H"): -3, ("S", "N"): 1,
+    ("B", "T"): 0, ("L", "L"): 7, ("Y", "K"): -6, ("E", "Q"): 3,
+    ("Y", "G"): -7, ("Z", "S"): -1, ("Y", "C"): 0, ("G", "D"): 0,
+    ("B", "V"): -3, ("E", "A"): 0, ("Y", "W"): -1, ("E", "E"): 5,
+    ("Y", "S"): -4, ("C", "N"): -5, ("V", "C"): -3, ("T", "H"): -2,
+    ("P", "R"): -1, ("V", "G"): -2, ("T", "L"): -3, ("V", "K"): -4,
+    ("K", "Q"): 0, ("R", "A"): -3, ("I", "R"): -3, ("T", "D"): -1,
+    ("P", "F"): -6, ("I", "N"): -3, ("K", "I"): -3, ("M", "D"): -4,
+    ("V", "W"): -8, ("W", "W"): 18, ("M", "H"): -3, ("P", "N"): -1,
+    ("K", "A"): -2, ("M", "L"): 4, ("K", "E"): -1, ("Z", "E"): 5,
+    ("X", "N"): -1, ("Z", "A"): 0, ("Z", "M"): -2, ("X", "F"): -3,
+    ("K", "C"): -7, ("B", "Q"): 1, ("X", "B"): -1, ("B", "M"): -3,
+    ("F", "C"): -6, ("Z", "Q"): 5, ("X", "Z"): -1, ("F", "G"): -6,
+    ("B", "E"): 3, ("X", "V"): -1, ("F", "K"): -7, ("B", "A"): 0,
+    ("X", "R"): -2, ("D", "D"): 5, ("W", "G"): -9, ("Z", "F"): -7,
+    ("S", "Q"): -1, ("W", "C"): -10, ("W", "K"): -5, ("H", "Q"): 4,
+    ("L", "C"): -8, ("W", "N"): -5, ("S", "A"): 1, ("L", "G"): -6,
+    ("W", "S"): -3, ("S", "E"): -1, ("H", "E"): 0, ("S", "I"): -2,
+    ("H", "A"): -2, ("S", "M"): -2, ("Y", "L"): -2, ("Y", "H"): 0,
+    ("Y", "D"): -6, ("E", "R"): -2, ("X", "P"): -1, ("G", "G"): 6,
+    ("G", "C"): -5, ("E", "N"): 2, ("Y", "T"): -4, ("Y", "P"): -7,
+    ("T", "K"): 0, ("A", "A"): 3, ("P", "Q"): 0, ("T", "C"): -3,
+    ("V", "H"): -3, ("T", "G"): -1, ("I", "Q"): -3, ("Z", "T"): -1,
+    ("C", "R"): -5, ("V", "P"): -2, ("P", "E"): -1, ("M", "C"): -7,
+    ("K", "N"): 1, ("I", "I"): 6, ("P", "A"): 1, ("M", "G"): -4,
+    ("T", "S"): 2, ("I", "E"): -3, ("P", "M"): -3, ("M", "K"): 1,
+    ("I", "A"): -1, ("P", "I"): -3, ("R", "R"): 8, ("X", "M"): -1,
+    ("L", "I"): 2, ("X", "I"): -1, ("Z", "B"): 3, ("X", "E"): -1,
+    ("Z", "N"): 1, ("X", "A"): -1, ("B", "R"): -2, ("B", "N"): 3,
+    ("F", "D"): -8, ("X", "Y"): -3, ("Z", "R"): 0, ("F", "H"): -3,
+    ("B", "F"): -6, ("F", "L"): 1, ("X", "Q"): -1, ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/pam250.cmp
+pam250 = {
+    ("W", "F"): 0, ("L", "R"): -3, ("S", "P"): 1, ("V", "T"): 0,
+    ("Q", "Q"): 4, ("N", "A"): 0, ("Z", "Y"): -4, ("W", "R"): 2,
+    ("Q", "A"): 0, ("S", "D"): 0, ("H", "H"): 6, ("S", "H"): -1,
+    ("H", "D"): 1, ("L", "N"): -3, ("W", "A"): -6, ("Y", "M"): -2,
+    ("G", "R"): -3, ("Y", "I"): -1, ("Y", "E"): -4, ("B", "Y"): -3,
+    ("Y", "A"): -3, ("V", "D"): -2, ("B", "S"): 0, ("Y", "Y"): 10,
+    ("G", "N"): 0, ("E", "C"): -5, ("Y", "Q"): -4, ("Z", "Z"): 3,
+    ("V", "A"): 0, ("C", "C"): 12, ("M", "R"): 0, ("V", "E"): -2,
+    ("T", "N"): 0, ("P", "P"): 6, ("V", "I"): 4, ("V", "S"): -1,
+    ("Z", "P"): 0, ("V", "M"): 2, ("T", "F"): -3, ("V", "Q"): -2,
+    ("K", "K"): 5, ("P", "D"): -1, ("I", "H"): -2, ("I", "D"): -2,
+    ("T", "R"): -1, ("P", "L"): -3, ("K", "G"): -2, ("M", "N"): -2,
+    ("P", "H"): 0, ("F", "Q"): -5, ("Z", "G"): 0, ("X", "L"): -1,
+    ("T", "M"): -1, ("Z", "C"): -5, ("X", "H"): -1, ("D", "R"): -1,
+    ("B", "W"): -5, ("X", "D"): -1, ("Z", "K"): 0, ("F", "A"): -3,
+    ("Z", "W"): -6, ("F", "E"): -5, ("D", "N"): 2, ("B", "K"): 1,
+    ("X", "X"): -1, ("F", "I"): 1, ("B", "G"): 0, ("X", "T"): 0,
+    ("F", "M"): 0, ("B", "C"): -4, ("Z", "I"): -2, ("Z", "V"): -2,
+    ("S", "S"): 2, ("L", "Q"): -2, ("W", "E"): -7, ("Q", "R"): 1,
+    ("N", "N"): 2, ("W", "M"): -4, ("Q", "C"): -5, ("W", "I"): -5,
+    ("S", "C"): 0, ("L", "A"): -2, ("S", "G"): 1, ("L", "E"): -3,
+    ("W", "Q"): -5, ("H", "G"): -2, ("S", "K"): 0, ("Q", "N"): 1,
+    ("N", "R"): 0, ("H", "C"): -3, ("Y", "N"): -2, ("G", "Q"): -1,
+    ("Y", "F"): 7, ("C", "A"): -2, ("V", "L"): 2, ("G", "E"): 0,
+    ("G", "A"): 1, ("K", "R"): 3, ("E", "D"): 3, ("Y", "R"): -4,
+    ("M", "Q"): -1, ("T", "I"): 0, ("C", "D"): -5, ("V", "F"): -1,
+    ("T", "A"): 1, ("T", "P"): 0, ("B", "P"): -1, ("T", "E"): 0,
+    ("V", "N"): -2, ("P", "G"): 0, ("M", "A"): -1, ("K", "H"): 0,
+    ("V", "R"): -2, ("P", "C"): -3, ("M", "E"): -2, ("K", "L"): -3,
+    ("V", "V"): 4, ("M", "I"): 2, ("T", "Q"): -1, ("I", "G"): -3,
+    ("P", "K"): -1, ("M", "M"): 6, ("K", "D"): 0, ("I", "C"): -2,
+    ("Z", "D"): 3, ("F", "R"): -4, ("X", "K"): -1, ("Q", "D"): 2,
+    ("X", "G"): -1, ("Z", "L"): -3, ("X", "C"): -3, ("Z", "H"): 2,
+    ("B", "L"): -3, ("B", "H"): 1, ("F", "F"): 9, ("X", "W"): -4,
+    ("B", "D"): 3, ("D", "A"): 0, ("S", "L"): -3, ("X", "S"): 0,
+    ("F", "N"): -3, ("S", "R"): 0, ("W", "D"): -7, ("V", "Y"): -2,
+    ("W", "L"): -2, ("H", "R"): 2, ("W", "H"): -3, ("H", "N"): 2,
+    ("W", "T"): -5, ("T", "T"): 3, ("S", "F"): -3, ("W", "P"): -6,
+    ("L", "D"): -4, ("B", "I"): -2, ("L", "H"): -2, ("S", "N"): 1,
+    ("B", "T"): 0, ("L", "L"): 6, ("Y", "K"): -4, ("E", "Q"): 2,
+    ("Y", "G"): -5, ("Z", "S"): 0, ("Y", "C"): 0, ("G", "D"): 1,
+    ("B", "V"): -2, ("E", "A"): 0, ("Y", "W"): 0, ("E", "E"): 4,
+    ("Y", "S"): -3, ("C", "N"): -4, ("V", "C"): -2, ("T", "H"): -1,
+    ("P", "R"): 0, ("V", "G"): -1, ("T", "L"): -2, ("V", "K"): -2,
+    ("K", "Q"): 1, ("R", "A"): -2, ("I", "R"): -2, ("T", "D"): 0,
+    ("P", "F"): -5, ("I", "N"): -2, ("K", "I"): -2, ("M", "D"): -3,
+    ("V", "W"): -6, ("W", "W"): 17, ("M", "H"): -2, ("P", "N"): 0,
+    ("K", "A"): -1, ("M", "L"): 4, ("K", "E"): 0, ("Z", "E"): 3,
+    ("X", "N"): 0, ("Z", "A"): 0, ("Z", "M"): -2, ("X", "F"): -2,
+    ("K", "C"): -5, ("B", "Q"): 1, ("X", "B"): -1, ("B", "M"): -2,
+    ("F", "C"): -4, ("Z", "Q"): 3, ("X", "Z"): -1, ("F", "G"): -5,
+    ("B", "E"): 3, ("X", "V"): -1, ("F", "K"): -5, ("B", "A"): 0,
+    ("X", "R"): -1, ("D", "D"): 4, ("W", "G"): -7, ("Z", "F"): -5,
+    ("S", "Q"): -1, ("W", "C"): -8, ("W", "K"): -3, ("H", "Q"): 3,
+    ("L", "C"): -6, ("W", "N"): -4, ("S", "A"): 1, ("L", "G"): -4,
+    ("W", "S"): -2, ("S", "E"): 0, ("H", "E"): 1, ("S", "I"): -1,
+    ("H", "A"): -1, ("S", "M"): -2, ("Y", "L"): -1, ("Y", "H"): 0,
+    ("Y", "D"): -4, ("E", "R"): -1, ("X", "P"): -1, ("G", "G"): 5,
+    ("G", "C"): -3, ("E", "N"): 1, ("Y", "T"): -3, ("Y", "P"): -5,
+    ("T", "K"): 0, ("A", "A"): 2, ("P", "Q"): 0, ("T", "C"): -2,
+    ("V", "H"): -2, ("T", "G"): 0, ("I", "Q"): -2, ("Z", "T"): -1,
+    ("C", "R"): -4, ("V", "P"): -1, ("P", "E"): -1, ("M", "C"): -5,
+    ("K", "N"): 1, ("I", "I"): 5, ("P", "A"): 1, ("M", "G"): -3,
+    ("T", "S"): 1, ("I", "E"): -2, ("P", "M"): -2, ("M", "K"): 0,
+    ("I", "A"): -1, ("P", "I"): -2, ("R", "R"): 6, ("X", "M"): -1,
+    ("L", "I"): 2, ("X", "I"): -1, ("Z", "B"): 2, ("X", "E"): -1,
+    ("Z", "N"): 1, ("X", "A"): 0, ("B", "R"): -1, ("B", "N"): 2,
+    ("F", "D"): -6, ("X", "Y"): -2, ("Z", "R"): 0, ("F", "H"): -2,
+    ("B", "F"): -4, ("F", "L"): 2, ("X", "Q"): -1, ("B", "B"): 3
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/pam30.cmp
+pam30 = {
+    ("W", "F"): -4, ("L", "R"): -8, ("S", "P"): -2, ("V", "T"): -3,
+    ("Q", "Q"): 8, ("N", "A"): -4, ("Z", "Y"): -9, ("W", "R"): -2,
+    ("Q", "A"): -4, ("S", "D"): -4, ("H", "H"): 9, ("S", "H"): -6,
+    ("H", "D"): -4, ("L", "N"): -7, ("W", "A"): -13, ("Y", "M"): -11,
+    ("G", "R"): -9, ("Y", "I"): -6, ("Y", "E"): -8, ("B", "Y"): -6,
+    ("Y", "A"): -8, ("V", "D"): -8, ("B", "S"): -1, ("Y", "Y"): 10,
+    ("G", "N"): -3, ("E", "C"): -14, ("Y", "Q"): -12, ("Z", "Z"): 6,
+    ("V", "A"): -2, ("C", "C"): 10, ("M", "R"): -4, ("V", "E"): -6,
+    ("T", "N"): -2, ("P", "P"): 8, ("V", "I"): 2, ("V", "S"): -6,
+    ("Z", "P"): -4, ("V", "M"): -1, ("T", "F"): -9, ("V", "Q"): -7,
+    ("K", "K"): 7, ("P", "D"): -8, ("I", "H"): -9, ("I", "D"): -7,
+    ("T", "R"): -6, ("P", "L"): -7, ("K", "G"): -7, ("M", "N"): -9,
+    ("P", "H"): -4, ("F", "Q"): -13, ("Z", "G"): -5, ("X", "L"): -6,
+    ("T", "M"): -4, ("Z", "C"): -14, ("X", "H"): -5, ("D", "R"): -10,
+    ("B", "W"): -10, ("X", "D"): -5, ("Z", "K"): -4, ("F", "A"): -8,
+    ("Z", "W"): -14, ("F", "E"): -14, ("D", "N"): 2, ("B", "K"): -2,
+    ("X", "X"): -5, ("F", "I"): -2, ("B", "G"): -3, ("X", "T"): -4,
+    ("F", "M"): -4, ("B", "C"): -12, ("Z", "I"): -6, ("Z", "V"): -6,
+    ("S", "S"): 6, ("L", "Q"): -5, ("W", "E"): -17, ("Q", "R"): -2,
+    ("N", "N"): 8, ("W", "M"): -13, ("Q", "C"): -14, ("W", "I"): -14,
+    ("S", "C"): -3, ("L", "A"): -6, ("S", "G"): -2, ("L", "E"): -9,
+    ("W", "Q"): -13, ("H", "G"): -9, ("S", "K"): -4, ("Q", "N"): -3,
+    ("N", "R"): -6, ("H", "C"): -7, ("Y", "N"): -4, ("G", "Q"): -7,
+    ("Y", "F"): 2, ("C", "A"): -6, ("V", "L"): -2, ("G", "E"): -4,
+    ("G", "A"): -2, ("K", "R"): 0, ("E", "D"): 2, ("Y", "R"): -10,
+    ("M", "Q"): -4, ("T", "I"): -2, ("C", "D"): -14, ("V", "F"): -8,
+    ("T", "A"): -1, ("T", "P"): -4, ("B", "P"): -7, ("T", "E"): -6,
+    ("V", "N"): -8, ("P", "G"): -6, ("M", "A"): -5, ("K", "H"): -6,
+    ("V", "R"): -8, ("P", "C"): -8, ("M", "E"): -7, ("K", "L"): -8,
+    ("V", "V"): 7, ("M", "I"): -1, ("T", "Q"): -5, ("I", "G"): -11,
+    ("P", "K"): -6, ("M", "M"): 11, ("K", "D"): -4, ("I", "C"): -6,
+    ("Z", "D"): 1, ("F", "R"): -9, ("X", "K"): -5, ("Q", "D"): -2,
+    ("X", "G"): -5, ("Z", "L"): -7, ("X", "C"): -9, ("Z", "H"): -1,
+    ("B", "L"): -9, ("B", "H"): -1, ("F", "F"): 9, ("X", "W"): -11,
+    ("B", "D"): 6, ("D", "A"): -3, ("S", "L"): -8, ("X", "S"): -3,
+    ("F", "N"): -9, ("S", "R"): -3, ("W", "D"): -15, ("V", "Y"): -7,
+    ("W", "L"): -6, ("H", "R"): -2, ("W", "H"): -7, ("H", "N"): 0,
+    ("W", "T"): -13, ("T", "T"): 7, ("S", "F"): -6, ("W", "P"): -14,
+    ("L", "D"): -12, ("B", "I"): -6, ("L", "H"): -6, ("S", "N"): 0,
+    ("B", "T"): -3, ("L", "L"): 7, ("Y", "K"): -9, ("E", "Q"): 1,
+    ("Y", "G"): -14, ("Z", "S"): -5, ("Y", "C"): -4, ("G", "D"): -3,
+    ("B", "V"): -8, ("E", "A"): -2, ("Y", "W"): -5, ("E", "E"): 8,
+    ("Y", "S"): -7, ("C", "N"): -11, ("V", "C"): -6, ("T", "H"): -7,
+    ("P", "R"): -4, ("V", "G"): -5, ("T", "L"): -7, ("V", "K"): -9,
+    ("K", "Q"): -3, ("R", "A"): -7, ("I", "R"): -5, ("T", "D"): -5,
+    ("P", "F"): -10, ("I", "N"): -5, ("K", "I"): -6, ("M", "D"): -11,
+    ("V", "W"): -15, ("W", "W"): 13, ("M", "H"): -10, ("P", "N"): -6,
+    ("K", "A"): -7, ("M", "L"): 1, ("K", "E"): -4, ("Z", "E"): 6,
+    ("X", "N"): -3, ("Z", "A"): -3, ("Z", "M"): -5, ("X", "F"): -8,
+    ("K", "C"): -14, ("B", "Q"): -3, ("X", "B"): -5, ("B", "M"): -10,
+    ("F", "C"): -13, ("Z", "Q"): 6, ("X", "Z"): -5, ("F", "G"): -9,
+    ("B", "E"): 1, ("X", "V"): -5, ("F", "K"): -14, ("B", "A"): -3,
+    ("X", "R"): -6, ("D", "D"): 8, ("W", "G"): -15, ("Z", "F"): -13,
+    ("S", "Q"): -5, ("W", "C"): -15, ("W", "K"): -12, ("H", "Q"): 1,
+    ("L", "C"): -15, ("W", "N"): -8, ("S", "A"): 0, ("L", "G"): -10,
+    ("W", "S"): -5, ("S", "E"): -4, ("H", "E"): -5, ("S", "I"): -7,
+    ("H", "A"): -7, ("S", "M"): -5, ("Y", "L"): -7, ("Y", "H"): -3,
+    ("Y", "D"): -11, ("E", "R"): -9, ("X", "P"): -5, ("G", "G"): 6,
+    ("G", "C"): -9, ("E", "N"): -2, ("Y", "T"): -6, ("Y", "P"): -13,
+    ("T", "K"): -3, ("A", "A"): 6, ("P", "Q"): -3, ("T", "C"): -8,
+    ("V", "H"): -6, ("T", "G"): -6, ("I", "Q"): -8, ("Z", "T"): -6,
+    ("C", "R"): -8, ("V", "P"): -6, ("P", "E"): -5, ("M", "C"): -13,
+    ("K", "N"): -1, ("I", "I"): 8, ("P", "A"): -2, ("M", "G"): -8,
+    ("T", "S"): 0, ("I", "E"): -5, ("P", "M"): -8, ("M", "K"): -2,
+    ("I", "A"): -5, ("P", "I"): -8, ("R", "R"): 8, ("X", "M"): -5,
+    ("L", "I"): -1, ("X", "I"): -5, ("Z", "B"): 0, ("X", "E"): -5,
+    ("Z", "N"): -3, ("X", "A"): -3, ("B", "R"): -7, ("B", "N"): 6,
+    ("F", "D"): -15, ("X", "Y"): -7, ("Z", "R"): -4, ("F", "H"): -6,
+    ("B", "F"): -10, ("F", "L"): -3, ("X", "Q"): -5, ("B", "B"): 6
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/pam300.cmp
+pam300 = {
+    ("W", "F"): 1, ("L", "R"): -3, ("S", "P"): 1, ("V", "T"): 0,
+    ("Q", "Q"): 4, ("N", "A"): 0, ("Z", "Y"): -5, ("W", "R"): 3,
+    ("Q", "A"): 0, ("S", "D"): 0, ("H", "H"): 7, ("S", "H"): -1,
+    ("H", "D"): 1, ("L", "N"): -3, ("W", "A"): -6, ("Y", "M"): -2,
+    ("G", "R"): -2, ("Y", "I"): -1, ("Y", "E"): -5, ("B", "Y"): -4,
+    ("Y", "A"): -4, ("V", "D"): -2, ("B", "S"): 1, ("Y", "Y"): 12,
+    ("G", "N"): 1, ("E", "C"): -6, ("Y", "Q"): -4, ("Z", "Z"): 3,
+    ("V", "A"): 0, ("C", "C"): 15, ("M", "R"): 0, ("V", "E"): -2,
+    ("T", "N"): 0, ("P", "P"): 6, ("V", "I"): 4, ("V", "S"): -1,
+    ("Z", "P"): 0, ("V", "M"): 2, ("T", "F"): -3, ("V", "Q"): -2,
+    ("K", "K"): 5, ("P", "D"): -1, ("I", "H"): -2, ("I", "D"): -2,
+    ("T", "R"): -1, ("P", "L"): -3, ("K", "G"): -2, ("M", "N"): -2,
+    ("P", "H"): 0, ("F", "Q"): -5, ("Z", "G"): 0, ("X", "L"): -1,
+    ("T", "M"): -1, ("Z", "C"): -6, ("X", "H"): 0, ("D", "R"): -1,
+    ("B", "W"): -6, ("X", "D"): -1, ("Z", "K"): 1, ("F", "A"): -4,
+    ("Z", "W"): -6, ("F", "E"): -6, ("D", "N"): 2, ("B", "K"): 1,
+    ("X", "X"): -1, ("F", "I"): 1, ("B", "G"): 1, ("X", "T"): 0,
+    ("F", "M"): 1, ("B", "C"): -5, ("Z", "I"): -2, ("Z", "V"): -2,
+    ("S", "S"): 1, ("L", "Q"): -2, ("W", "E"): -8, ("Q", "R"): 2,
+    ("N", "N"): 2, ("W", "M"): -5, ("Q", "C"): -6, ("W", "I"): -6,
+    ("S", "C"): 0, ("L", "A"): -2, ("S", "G"): 1, ("L", "E"): -4,
+    ("W", "Q"): -5, ("H", "G"): -2, ("S", "K"): 0, ("Q", "N"): 1,
+    ("N", "R"): 0, ("H", "C"): -4, ("Y", "N"): -2, ("G", "Q"): -1,
+    ("Y", "F"): 9, ("C", "A"): -2, ("V", "L"): 2, ("G", "E"): 0,
+    ("G", "A"): 2, ("K", "R"): 4, ("E", "D"): 4, ("Y", "R"): -5,
+    ("M", "Q"): -1, ("T", "I"): 0, ("C", "D"): -6, ("V", "F"): -1,
+    ("T", "A"): 1, ("T", "P"): 1, ("B", "P"): 0, ("T", "E"): 0,
+    ("V", "N"): -2, ("P", "G"): 0, ("M", "A"): -1, ("K", "H"): 0,
+    ("V", "R"): -3, ("P", "C"): -3, ("M", "E"): -2, ("K", "L"): -3,
+    ("V", "V"): 5, ("M", "I"): 3, ("T", "Q"): -1, ("I", "G"): -3,
+    ("P", "K"): -1, ("M", "M"): 6, ("K", "D"): 0, ("I", "C"): -3,
+    ("Z", "D"): 3, ("F", "R"): -5, ("X", "K"): -1, ("Q", "D"): 2,
+    ("X", "G"): -1, ("Z", "L"): -3, ("X", "C"): -3, ("Z", "H"): 2,
+    ("B", "L"): -4, ("B", "H"): 1, ("F", "F"): 11, ("X", "W"): -4,
+    ("B", "D"): 3, ("D", "A"): 0, ("S", "L"): -3, ("X", "S"): 0,
+    ("F", "N"): -4, ("S", "R"): 0, ("W", "D"): -7, ("V", "Y"): -3,
+    ("W", "L"): -2, ("H", "R"): 2, ("W", "H"): -3, ("H", "N"): 2,
+    ("W", "T"): -6, ("T", "T"): 2, ("S", "F"): -4, ("W", "P"): -6,
+    ("L", "D"): -4, ("B", "I"): -2, ("L", "H"): -2, ("S", "N"): 1,
+    ("B", "T"): 0, ("L", "L"): 7, ("Y", "K"): -5, ("E", "Q"): 3,
+    ("Y", "G"): -6, ("Z", "S"): 0, ("Y", "C"): 1, ("G", "D"): 1,
+    ("B", "V"): -2, ("E", "A"): 0, ("Y", "W"): 0, ("E", "E"): 4,
+    ("Y", "S"): -3, ("C", "N"): -4, ("V", "C"): -2, ("T", "H"): -1,
+    ("P", "R"): 0, ("V", "G"): -1, ("T", "L"): -2, ("V", "K"): -2,
+    ("K", "Q"): 1, ("R", "A"): -1, ("I", "R"): -2, ("T", "D"): 0,
+    ("P", "F"): -5, ("I", "N"): -2, ("K", "I"): -2, ("M", "D"): -3,
+    ("V", "W"): -7, ("W", "W"): 22, ("M", "H"): -2, ("P", "N"): 0,
+    ("K", "A"): -1, ("M", "L"): 4, ("K", "E"): 0, ("Z", "E"): 3,
+    ("X", "N"): 0, ("Z", "A"): 0, ("Z", "M"): -2, ("X", "F"): -2,
+    ("K", "C"): -6, ("B", "Q"): 2, ("X", "B"): 0, ("B", "M"): -2,
+    ("F", "C"): -5, ("Z", "Q"): 3, ("X", "Z"): -1, ("F", "G"): -5,
+    ("B", "E"): 3, ("X", "V"): 0, ("F", "K"): -6, ("B", "A"): 0,
+    ("X", "R"): -1, ("D", "D"): 4, ("W", "G"): -8, ("Z", "F"): -5,
+    ("S", "Q"): 0, ("W", "C"): -9, ("W", "K"): -4, ("H", "Q"): 3,
+    ("L", "C"): -7, ("W", "N"): -5, ("S", "A"): 1, ("L", "G"): -4,
+    ("W", "S"): -3, ("S", "E"): 0, ("H", "E"): 1, ("S", "I"): -1,
+    ("H", "A"): -1, ("S", "M"): -2, ("Y", "L"): 0, ("Y", "H"): 0,
+    ("Y", "D"): -5, ("E", "R"): -1, ("X", "P"): -1, ("G", "G"): 5,
+    ("G", "C"): -4, ("E", "N"): 2, ("Y", "T"): -3, ("Y", "P"): -5,
+    ("T", "K"): 0, ("A", "A"): 2, ("P", "Q"): 0, ("T", "C"): -2,
+    ("V", "H"): -2, ("T", "G"): 0, ("I", "Q"): -2, ("Z", "T"): 0,
+    ("C", "R"): -4, ("V", "P"): -1, ("P", "E"): 0, ("M", "C"): -6,
+    ("K", "N"): 1, ("I", "I"): 5, ("P", "A"): 1, ("M", "G"): -3,
+    ("T", "S"): 1, ("I", "E"): -2, ("P", "M"): -2, ("M", "K"): 0,
+    ("I", "A"): 0, ("P", "I"): -2, ("R", "R"): 7, ("X", "M"): -1,
+    ("L", "I"): 3, ("X", "I"): -1, ("Z", "B"): 2, ("X", "E"): -1,
+    ("Z", "N"): 1, ("X", "A"): 0, ("B", "R"): 0, ("B", "N"): 2,
+    ("F", "D"): -6, ("X", "Y"): -2, ("Z", "R"): 0, ("F", "H"): -2,
+    ("B", "F"): -5, ("F", "L"): 3, ("X", "Q"): 0, ("B", "B"): 3
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/pam60.cmp
+pam60 = {
+    ("W", "F"): -3, ("L", "R"): -6, ("S", "P"): 0, ("V", "T"): -1,
+    ("Q", "Q"): 7, ("N", "A"): -2, ("Z", "Y"): -7, ("W", "R"): 0,
+    ("Q", "A"): -3, ("S", "D"): -2, ("H", "H"): 8, ("S", "H"): -4,
+    ("H", "D"): -2, ("L", "N"): -5, ("W", "A"): -10, ("Y", "M"): -7,
+    ("G", "R"): -7, ("Y", "I"): -4, ("Y", "E"): -7, ("B", "Y"): -5,
+    ("Y", "A"): -6, ("V", "D"): -6, ("B", "S"): 0, ("Y", "Y"): 9,
+    ("G", "N"): -1, ("E", "C"): -10, ("Y", "Q"): -8, ("Z", "Z"): 5,
+    ("V", "A"): -1, ("C", "C"): 9, ("M", "R"): -2, ("V", "E"): -4,
+    ("T", "N"): -1, ("P", "P"): 7, ("V", "I"): 3, ("V", "S"): -4,
+    ("Z", "P"): -2, ("V", "M"): 0, ("T", "F"): -6, ("V", "Q"): -5,
+    ("K", "K"): 6, ("P", "D"): -5, ("I", "H"): -6, ("I", "D"): -5,
+    ("T", "R"): -4, ("P", "L"): -5, ("K", "G"): -5, ("M", "N"): -6,
+    ("P", "H"): -2, ("F", "Q"): -9, ("Z", "G"): -3, ("X", "L"): -4,
+    ("T", "M"): -2, ("Z", "C"): -10, ("X", "H"): -3, ("D", "R"): -6,
+    ("B", "W"): -8, ("X", "D"): -3, ("Z", "K"): -2, ("F", "A"): -6,
+    ("Z", "W"): -11, ("F", "E"): -10, ("D", "N"): 2, ("B", "K"): -1,
+    ("X", "X"): -3, ("F", "I"): -1, ("B", "G"): -2, ("X", "T"): -2,
+    ("F", "M"): -2, ("B", "C"): -9, ("Z", "I"): -4, ("Z", "V"): -5,
+    ("S", "S"): 5, ("L", "Q"): -3, ("W", "E"): -12, ("Q", "R"): 0,
+    ("N", "N"): 6, ("W", "M"): -9, ("Q", "C"): -10, ("W", "I"): -10,
+    ("S", "C"): -1, ("L", "A"): -4, ("S", "G"): 0, ("L", "E"): -7,
+    ("W", "Q"): -9, ("H", "G"): -6, ("S", "K"): -2, ("Q", "N"): -2,
+    ("N", "R"): -3, ("H", "C"): -6, ("Y", "N"): -3, ("G", "Q"): -5,
+    ("Y", "F"): 3, ("C", "A"): -5, ("V", "L"): -1, ("G", "E"): -2,
+    ("G", "A"): 0, ("K", "R"): 2, ("E", "D"): 3, ("Y", "R"): -8,
+    ("M", "Q"): -2, ("T", "I"): -1, ("C", "D"): -10, ("V", "F"): -5,
+    ("T", "A"): 1, ("T", "P"): -2, ("B", "P"): -4, ("T", "E"): -4,
+    ("V", "N"): -5, ("P", "G"): -4, ("M", "A"): -3, ("K", "H"): -4,
+    ("V", "R"): -5, ("P", "C"): -6, ("M", "E"): -5, ("K", "L"): -6,
+    ("V", "V"): 6, ("M", "I"): 1, ("T", "Q"): -4, ("I", "G"): -7,
+    ("P", "K"): -4, ("M", "M"): 10, ("K", "D"): -2, ("I", "C"): -4,
+    ("Z", "D"): 2, ("F", "R"): -7, ("X", "K"): -3, ("Q", "D"): -1,
+    ("X", "G"): -3, ("Z", "L"): -5, ("X", "C"): -6, ("Z", "H"): 0,
+    ("B", "L"): -7, ("B", "H"): 0, ("F", "F"): 8, ("X", "W"): -8,
+    ("B", "D"): 5, ("D", "A"): -2, ("S", "L"): -6, ("X", "S"): -2,
+    ("F", "N"): -6, ("S", "R"): -2, ("W", "D"): -11, ("V", "Y"): -5,
+    ("W", "L"): -4, ("H", "R"): 0, ("W", "H"): -5, ("H", "N"): 1,
+    ("W", "T"): -9, ("T", "T"): 6, ("S", "F"): -5, ("W", "P"): -10,
+    ("L", "D"): -9, ("B", "I"): -4, ("L", "H"): -4, ("S", "N"): 1,
+    ("B", "T"): -2, ("L", "L"): 6, ("Y", "K"): -7, ("E", "Q"): 2,
+    ("Y", "G"): -10, ("Z", "S"): -3, ("Y", "C"): -2, ("G", "D"): -2,
+    ("B", "V"): -5, ("E", "A"): -1, ("Y", "W"): -3, ("E", "E"): 7,
+    ("Y", "S"): -5, ("C", "N"): -7, ("V", "C"): -4, ("T", "H"): -5,
+    ("P", "R"): -2, ("V", "G"): -4, ("T", "L"): -5, ("V", "K"): -6,
+    ("K", "Q"): -1, ("R", "A"): -5, ("I", "R"): -4, ("T", "D"): -3,
+    ("P", "F"): -7, ("I", "N"): -4, ("K", "I"): -4, ("M", "D"): -7,
+    ("V", "W"): -11, ("W", "W"): 13, ("M", "H"): -7, ("P", "N"): -4,
+    ("K", "A"): -5, ("M", "L"): 2, ("K", "E"): -3, ("Z", "E"): 5,
+    ("X", "N"): -2, ("Z", "A"): -2, ("Z", "M"): -4, ("X", "F"): -5,
+    ("K", "C"): -10, ("B", "Q"): -1, ("X", "B"): -3, ("B", "M"): -6,
+    ("F", "C"): -9, ("Z", "Q"): 6, ("X", "Z"): -3, ("F", "G"): -7,
+    ("B", "E"): 2, ("X", "V"): -3, ("F", "K"): -10, ("B", "A"): -2,
+    ("X", "R"): -4, ("D", "D"): 7, ("W", "G"): -11, ("Z", "F"): -10,
+    ("S", "Q"): -3, ("W", "C"): -12, ("W", "K"): -8, ("H", "Q"): 2,
+    ("L", "C"): -11, ("W", "N"): -6, ("S", "A"): 1, ("L", "G"): -8,
+    ("W", "S"): -4, ("S", "E"): -2, ("H", "E"): -3, ("S", "I"): -4,
+    ("H", "A"): -5, ("S", "M"): -4, ("Y", "L"): -5, ("Y", "H"): -2,
+    ("Y", "D"): -8, ("E", "R"): -6, ("X", "P"): -3, ("G", "G"): 6,
+    ("G", "C"): -7, ("E", "N"): 0, ("Y", "T"): -5, ("Y", "P"): -10,
+    ("T", "K"): -2, ("A", "A"): 5, ("P", "Q"): -1, ("T", "C"): -5,
+    ("V", "H"): -5, ("T", "G"): -3, ("I", "Q"): -5, ("Z", "T"): -4,
+    ("C", "R"): -6, ("V", "P"): -4, ("P", "E"): -3, ("M", "C"): -10,
+    ("K", "N"): 0, ("I", "I"): 7, ("P", "A"): 0, ("M", "G"): -6,
+    ("T", "S"): 1, ("I", "E"): -4, ("P", "M"): -6, ("M", "K"): 0,
+    ("I", "A"): -3, ("P", "I"): -6, ("R", "R"): 8, ("X", "M"): -3,
+    ("L", "I"): 0, ("X", "I"): -3, ("Z", "B"): 1, ("X", "E"): -3,
+    ("Z", "N"): -1, ("X", "A"): -2, ("B", "R"): -5, ("B", "N"): 5,
+    ("F", "D"): -11, ("X", "Y"): -5, ("Z", "R"): -2, ("F", "H"): -4,
+    ("B", "F"): -8, ("F", "L"): -1, ("X", "Q"): -3, ("B", "B"): 5
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/pam90.cmp
+pam90 = {
+    ("W", "F"): -2, ("L", "R"): -5, ("S", "P"): 0, ("V", "T"): -1,
+    ("Q", "Q"): 6, ("N", "A"): -1, ("Z", "Y"): -6, ("W", "R"): 0,
+    ("Q", "A"): -2, ("S", "D"): -1, ("H", "H"): 8, ("S", "H"): -3,
+    ("H", "D"): -1, ("L", "N"): -4, ("W", "A"): -8, ("Y", "M"): -6,
+    ("G", "R"): -5, ("Y", "I"): -3, ("Y", "E"): -6, ("B", "Y"): -4,
+    ("Y", "A"): -5, ("V", "D"): -4, ("B", "S"): 0, ("Y", "Y"): 9,
+    ("G", "N"): -1, ("E", "C"): -8, ("Y", "Q"): -6, ("Z", "Z"): 5,
+    ("V", "A"): 0, ("C", "C"): 9, ("M", "R"): -2, ("V", "E"): -3,
+    ("T", "N"): 0, ("P", "P"): 7, ("V", "I"): 3, ("V", "S"): -3,
+    ("Z", "P"): -2, ("V", "M"): 1, ("T", "F"): -5, ("V", "Q"): -4,
+    ("K", "K"): 5, ("P", "D"): -4, ("I", "H"): -5, ("I", "D"): -4,
+    ("T", "R"): -3, ("P", "L"): -4, ("K", "G"): -4, ("M", "N"): -4,
+    ("P", "H"): -2, ("F", "Q"): -7, ("Z", "G"): -2, ("T", "M"): -2,
+    ("Z", "C"): -8, ("D", "R"): -5, ("B", "W"): -7, ("Z", "K"): -1,
+    ("F", "A"): -5, ("Z", "W"): -8, ("F", "E"): -8, ("D", "N"): 3,
+    ("B", "K"): 0, ("F", "I"): 0, ("B", "G"): -1, ("F", "M"): -1,
+    ("B", "C"): -7, ("Z", "I"): -3, ("Z", "V"): -3, ("S", "S"): 4,
+    ("L", "Q"): -3, ("W", "E"): -10, ("Q", "R"): 0, ("N", "N"): 5,
+    ("W", "M"): -7, ("Q", "C"): -8, ("W", "I"): -8, ("S", "C"): -1,
+    ("L", "A"): -3, ("S", "G"): 0, ("L", "E"): -5, ("W", "Q"): -7,
+    ("H", "G"): -5, ("S", "K"): -1, ("L", "I"): 1, ("N", "R"): -2,
+    ("H", "C"): -5, ("Y", "N"): -2, ("G", "Q"): -3, ("Y", "F"): 4,
+    ("C", "A"): -3, ("V", "L"): 0, ("G", "E"): -1, ("G", "A"): 0,
+    ("K", "R"): 2, ("E", "D"): 4, ("Y", "R"): -6, ("M", "Q"): -2,
+    ("T", "I"): 0, ("C", "D"): -8, ("V", "F"): -4, ("T", "A"): 1,
+    ("T", "P"): -1, ("B", "P"): -3, ("T", "E"): -2, ("V", "N"): -4,
+    ("P", "G"): -3, ("M", "A"): -2, ("K", "H"): -2, ("V", "R"): -4,
+    ("P", "C"): -5, ("M", "E"): -4, ("K", "L"): -5, ("V", "V"): 6,
+    ("M", "I"): 1, ("T", "Q"): -3, ("I", "G"): -5, ("P", "K"): -3,
+    ("M", "M"): 9, ("K", "D"): -2, ("I", "C"): -3, ("Z", "D"): 3,
+    ("F", "R"): -6, ("Q", "D"): 0, ("Z", "L"): -4, ("Z", "H"): 1,
+    ("B", "L"): -5, ("B", "H"): 1, ("F", "F"): 8, ("B", "D"): 5,
+    ("D", "A"): -1, ("S", "L"): -5, ("F", "N"): -5, ("S", "R"): -1,
+    ("W", "D"): -9, ("W", "L"): -3, ("H", "R"): 1, ("W", "H"): -4,
+    ("H", "N"): 2, ("W", "T"): -7, ("T", "T"): 5, ("S", "F"): -4,
+    ("W", "P"): -8, ("L", "D"): -7, ("B", "I"): -3, ("L", "H"): -3,
+    ("S", "N"): 1, ("B", "T"): -1, ("L", "L"): 6, ("Y", "K"): -6,
+    ("E", "Q"): 2, ("Y", "G"): -8, ("Z", "S"): -2, ("Y", "C"): -1,
+    ("G", "D"): -1, ("B", "V"): -4, ("E", "A"): 0, ("Y", "W"): -2,
+    ("E", "E"): 6, ("Y", "S"): -4, ("C", "N"): -6, ("V", "C"): -3,
+    ("T", "H"): -3, ("P", "R"): -1, ("V", "G"): -3, ("T", "L"): -3,
+    ("V", "K"): -5, ("K", "Q"): -1, ("R", "A"): -4, ("I", "R"): -3,
+    ("T", "D"): -2, ("P", "F"): -6, ("I", "N"): -3, ("K", "I"): -3,
+    ("M", "D"): -5, ("V", "W"): -9, ("W", "W"): 13, ("M", "H"): -5,
+    ("P", "N"): -2, ("K", "A"): -3, ("M", "L"): 2, ("K", "E"): -2,
+    ("Z", "E"): 5, ("Q", "N"): -1, ("Z", "A"): -1, ("Z", "M"): -3,
+    ("K", "C"): -8, ("B", "Q"): 0, ("B", "M"): -5, ("F", "C"): -7,
+    ("Z", "Q"): 5, ("F", "G"): -6, ("B", "E"): 2, ("F", "K"): -8,
+    ("B", "A"): -1, ("D", "D"): 6, ("W", "G"): -9, ("S", "Q"): -2,
+    ("W", "C"): -10, ("W", "K"): -6, ("H", "Q"): 2, ("L", "C"): -9,
+    ("W", "N"): -5, ("S", "A"): 1, ("L", "G"): -6, ("W", "S"): -3,
+    ("S", "E"): -2, ("H", "E"): -1, ("S", "I"): -3, ("H", "A"): -4,
+    ("S", "M"): -3, ("Y", "L"): -3, ("Y", "H"): -1, ("Y", "D"): -6,
+    ("E", "R"): -4, ("G", "G"): 5, ("G", "C"): -5, ("E", "N"): 0,
+    ("Y", "T"): -4, ("Y", "P"): -8, ("T", "K"): -1, ("A", "A"): 4,
+    ("P", "Q"): -1, ("T", "C"): -4, ("V", "H"): -4, ("T", "G"): -2,
+    ("I", "Q"): -4, ("Z", "T"): -2, ("C", "R"): -5, ("V", "P"): -3,
+    ("P", "E"): -2, ("M", "C"): -8, ("K", "N"): 1, ("I", "I"): 6,
+    ("P", "A"): 0, ("M", "G"): -5, ("T", "S"): 2, ("I", "E"): -3,
+    ("P", "M"): -4, ("M", "K"): 0, ("I", "A"): -2, ("P", "I"): -4,
+    ("R", "R"): 7, ("Z", "F"): -8, ("Z", "B"): 2, ("Z", "N"): 0,
+    ("B", "R"): -3, ("B", "N"): 4, ("F", "D"): -8, ("Z", "R"): -1,
+    ("F", "H"): -3, ("B", "F"): -6, ("F", "L"): 0, ("V", "Y"): -4,
+    ("B", "B"): 4
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/rao.cmp
+rao = {
+    ("W", "F"): 11, ("S", "P"): 10, ("N", "M"): 6, ("Q", "Q"): 16,
+    ("N", "A"): 9, ("N", "E"): 10, ("W", "V"): 11, ("Q", "E"): 11,
+    ("L", "H"): 10, ("W", "R"): 7, ("Q", "A"): 11, ("S", "D"): 10,
+    ("H", "H"): 16, ("Q", "M"): 9, ("S", "H"): 10, ("H", "D"): 9,
+    ("Q", "I"): 6, ("S", "L"): 8, ("Y", "M"): 8, ("Y", "I"): 10,
+    ("Y", "E"): 6, ("Y", "A"): 9, ("G", "F"): 7, ("V", "T"): 10,
+    ("Y", "Y"): 16, ("V", "H"): 9, ("E", "C"): 9, ("Y", "Q"): 8,
+    ("V", "A"): 9, ("C", "C"): 16, ("V", "E"): 4, ("T", "N"): 10,
+    ("R", "K"): 11, ("P", "P"): 16, ("V", "I"): 12, ("R", "G"): 7,
+    ("V", "M"): 9, ("T", "F"): 10, ("R", "C"): 8, ("V", "Q"): 6,
+    ("K", "K"): 16, ("P", "D"): 8, ("I", "H"): 8, ("M", "F"): 10,
+    ("I", "D"): 3, ("K", "C"): 9, ("P", "L"): 4, ("K", "G"): 7,
+    ("P", "H"): 5, ("T", "R"): 9, ("F", "A"): 10, ("F", "E"): 6,
+    ("S", "S"): 16, ("W", "E"): 7, ("N", "N"): 16, ("W", "M"): 10,
+    ("Q", "C"): 10, ("N", "F"): 6, ("S", "C"): 10, ("L", "A"): 11,
+    ("S", "G"): 11, ("L", "E"): 7, ("W", "Q"): 9, ("H", "G"): 7,
+    ("S", "K"): 10, ("Q", "N"): 11, ("V", "D"): 3, ("H", "C"): 10,
+    ("Y", "N"): 8, ("Y", "F"): 10, ("W", "I"): 11, ("C", "A"): 11,
+    ("G", "E"): 6, ("G", "A"): 8, ("Y", "V"): 10, ("E", "D"): 11,
+    ("W", "H"): 10, ("Y", "R"): 7, ("N", "I"): 5, ("R", "L"): 6,
+    ("T", "I"): 10, ("Q", "L"): 9, ("R", "H"): 10, ("T", "M"): 8,
+    ("V", "F"): 11, ("R", "D"): 10, ("T", "A"): 10, ("T", "P"): 8,
+    ("T", "E"): 8, ("V", "N"): 5, ("P", "G"): 11, ("M", "A"): 11,
+    ("K", "H"): 11, ("V", "R"): 5, ("P", "C"): 7, ("M", "E"): 8,
+    ("V", "V"): 16, ("T", "T"): 16, ("M", "I"): 9, ("T", "Q"): 10,
+    ("I", "G"): 6, ("P", "K"): 6, ("M", "M"): 16, ("K", "D"): 11,
+    ("I", "C"): 8, ("L", "C"): 11, ("F", "F"): 16, ("D", "A"): 9,
+    ("S", "R"): 9, ("W", "D"): 6, ("N", "C"): 9, ("N", "G"): 10,
+    ("W", "T"): 11, ("Q", "G"): 8, ("S", "F"): 8, ("W", "P"): 6,
+    ("L", "D"): 6, ("H", "F"): 9, ("Q", "K"): 12, ("S", "N"): 11,
+    ("L", "L"): 16, ("Q", "F"): 7, ("Y", "K"): 7, ("Y", "G"): 10,
+    ("Y", "C"): 10, ("G", "D"): 9, ("E", "A"): 10, ("Y", "W"): 11,
+    ("E", "E"): 16, ("Y", "S"): 11, ("R", "M"): 6, ("V", "C"): 8,
+    ("T", "H"): 10, ("R", "I"): 4, ("V", "G"): 6, ("T", "L"): 9,
+    ("R", "E"): 9, ("V", "K"): 5, ("R", "Q"): 10, ("R", "A"): 8,
+    ("T", "D"): 9, ("P", "F"): 4, ("V", "S"): 8, ("K", "I"): 4,
+    ("M", "D"): 5, ("W", "W"): 16, ("M", "H"): 10, ("P", "N"): 9,
+    ("I", "F"): 12, ("K", "A"): 10, ("M", "L"): 11, ("K", "E"): 11,
+    ("N", "K"): 11, ("R", "P"): 6, ("L", "F"): 11, ("F", "C"): 10,
+    ("W", "G"): 8, ("W", "L"): 11, ("D", "D"): 16, ("N", "H"): 10,
+    ("S", "Q"): 10, ("Q", "P"): 7, ("N", "L"): 7, ("W", "K"): 7,
+    ("Q", "D"): 11, ("W", "N"): 8, ("S", "A"): 10, ("L", "G"): 6,
+    ("W", "S"): 10, ("S", "E"): 9, ("L", "K"): 7, ("H", "E"): 11,
+    ("S", "I"): 8, ("Q", "H"): 11, ("H", "A"): 11, ("S", "M"): 7,
+    ("Y", "L"): 9, ("Y", "H"): 9, ("Y", "D"): 7, ("G", "G"): 16,
+    ("G", "C"): 8, ("Y", "T"): 11, ("W", "C"): 11, ("Y", "P"): 8,
+    ("T", "K"): 9, ("R", "N"): 10, ("A", "A"): 16, ("W", "A"): 11,
+    ("T", "C"): 10, ("N", "D"): 11, ("R", "F"): 5, ("T", "G"): 10,
+    ("V", "L"): 10, ("V", "P"): 3, ("P", "E"): 5, ("M", "C"): 10,
+    ("I", "I"): 16, ("P", "A"): 6, ("M", "G"): 4, ("T", "S"): 11,
+    ("I", "E"): 4, ("P", "M"): 2, ("M", "K"): 8, ("K", "F"): 6,
+    ("I", "A"): 9, ("P", "I"): 3, ("R", "R"): 16, ("L", "I"): 10,
+    ("F", "D"): 4, ("D", "C"): 8
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/risler.cmp
+risler = {
+    ("W", "F"): -0.9, ("S", "P"): -0.3, ("N", "M"): 0.0, ("Q", "Q"): 2.2,
+    ("N", "A"): 1.3, ("N", "E"): 1.4, ("W", "V"): -0.7, ("Q", "E"): 2.1,
+    ("L", "H"): -0.9, ("W", "R"): -0.8, ("Q", "A"): 1.8, ("S", "D"): 0.7,
+    ("H", "H"): 2.2, ("Q", "M"): 1.2, ("S", "H"): -0.4, ("H", "D"): -1.3,
+    ("Q", "I"): 1.4, ("S", "L"): 1.3, ("Y", "M"): -0.2, ("Y", "I"): 0.4,
+    ("Y", "E"): 0.2, ("Y", "A"): 0.2, ("G", "F"): -0.4, ("V", "T"): 1.6,
+    ("Y", "Y"): 2.2, ("V", "H"): -0.7, ("E", "C"): -1.5, ("Y", "Q"): 0.5,
+    ("V", "A"): 2.0, ("C", "C"): 2.2, ("V", "E"): 1.6, ("T", "N"): 1.1,
+    ("R", "K"): 2.1, ("P", "P"): 2.2, ("V", "I"): 2.2, ("R", "G"): 0.1,
+    ("V", "M"): 0.8, ("T", "F"): 0.3, ("R", "C"): -1.5, ("V", "Q"): 1.5,
+    ("K", "K"): 2.2, ("P", "D"): -1.2, ("I", "H"): -0.8, ("M", "F"): -0.2,
+    ("I", "D"): 0.0, ("K", "C"): -1.6, ("P", "L"): -0.8, ("K", "G"): -0.1,
+    ("P", "H"): -1.6, ("T", "R"): 1.9, ("F", "A"): 0.6, ("F", "E"): 0.6,
+    ("S", "S"): 2.2, ("W", "E"): -1.0, ("N", "N"): 2.2, ("W", "M"): -1.3,
+    ("Q", "C"): -1.4, ("N", "F"): 0.4, ("S", "C"): -1.3, ("L", "A"): 1.3,
+    ("S", "G"): 0.7, ("L", "E"): 0.9, ("W", "Q"): -1.0, ("H", "G"): -1.2,
+    ("S", "K"): 1.4, ("Q", "N"): 1.6, ("V", "D"): 0.0, ("H", "C"): -1.8,
+    ("Y", "N"): -0.1, ("Y", "F"): 2.0, ("W", "I"): -0.7, ("C", "A"): -1.5,
+    ("G", "E"): 0.3, ("G", "A"): 0.6, ("Y", "V"): 0.3, ("E", "D"): 1.0,
+    ("W", "H"): -1.7, ("Y", "R"): 0.8, ("N", "I"): 0.9, ("R", "L"): 1.2,
+    ("T", "I"): 1.6, ("Q", "L"): 1.1, ("R", "H"): -0.4, ("T", "M"): 0.8,
+    ("V", "F"): 0.8, ("R", "D"): -0.1, ("T", "A"): 1.9, ("T", "P"): -0.5,
+    ("T", "E"): 1.6, ("V", "N"): 1.1, ("P", "G"): -1.2, ("M", "A"): 1.0,
+    ("K", "H"): -1.0, ("V", "R"): 1.5, ("P", "C"): -1.8, ("M", "E"): 0.6,
+    ("V", "V"): 2.2, ("T", "T"): 2.2, ("M", "I"): 0.9, ("T", "Q"): 1.7,
+    ("I", "G"): 0.0, ("P", "K"): -0.7, ("M", "M"): 2.2, ("K", "D"): 0.1,
+    ("I", "C"): -1.6, ("L", "C"): -1.5, ("F", "F"): 2.2, ("D", "A"): 0.2,
+    ("S", "R"): 2.0, ("W", "D"): -1.4, ("N", "C"): -1.6, ("N", "G"): 0.2,
+    ("W", "T"): -1.0, ("Q", "G"): 0.2, ("S", "F"): 0.5, ("W", "P"): -1.6,
+    ("L", "D"): -0.2, ("H", "F"): -1.1, ("Q", "K"): 1.7, ("S", "N"): 1.9,
+    ("L", "L"): 2.2, ("Q", "F"): 0.7, ("Y", "K"): 0.5, ("Y", "G"): -0.2,
+    ("Y", "C"): -1.1, ("G", "D"): -0.4, ("E", "A"): 1.7, ("Y", "W"): -0.6,
+    ("E", "E"): 2.2, ("Y", "S"): 0.4, ("R", "M"): 1.1, ("V", "C"): -1.4,
+    ("T", "H"): -0.9, ("R", "I"): 1.4, ("V", "G"): 0.1, ("T", "L"): 1.2,
+    ("R", "E"): 1.9, ("V", "K"): 1.2, ("R", "Q"): 2.0, ("R", "A"): 1.5,
+    ("T", "D"): 0.0, ("P", "F"): -1.1, ("V", "S"): 1.8, ("K", "I"): 1.0,
+    ("M", "D"): -0.5, ("W", "W"): 2.2, ("M", "H"): -1.2, ("P", "N"): -1.0,
+    ("I", "F"): 1.0, ("K", "A"): 1.4, ("M", "L"): 1.8, ("K", "E"): 1.4,
+    ("N", "K"): 1.0, ("R", "P"): -0.3, ("L", "F"): 1.0, ("F", "C"): -1.6,
+    ("W", "G"): -1.3, ("W", "L"): -0.8, ("D", "D"): 2.2, ("N", "H"): -0.3,
+    ("S", "Q"): 1.8, ("Q", "P"): -0.6, ("N", "L"): 0.8, ("W", "K"): -1.1,
+    ("Q", "D"): 0.6, ("W", "N"): -1.1, ("S", "A"): 2.0, ("L", "G"): -0.2,
+    ("W", "S"): -0.8, ("S", "E"): 1.8, ("L", "K"): 0.7, ("H", "E"): -0.6,
+    ("S", "I"): 1.6, ("Q", "H"): -0.5, ("H", "A"): -0.6, ("S", "M"): 0.6,
+    ("Y", "L"): 0.5, ("Y", "H"): -0.8, ("Y", "D"): -0.4, ("G", "G"): 2.2,
+    ("G", "C"): -1.7, ("Y", "T"): 0.3, ("W", "C"): -1.8, ("Y", "P"): -1.2,
+    ("T", "K"): 1.2, ("R", "N"): 1.2, ("A", "A"): 2.2, ("W", "A"): -0.9,
+    ("T", "C"): -1.4, ("N", "D"): 0.8, ("R", "F"): 0.4, ("T", "G"): 0.2,
+    ("V", "L"): 2.0, ("V", "P"): -0.6, ("P", "E"): -0.1, ("M", "C"): -1.6,
+    ("I", "I"): 2.2, ("P", "A"): -0.2, ("M", "G"): -0.4, ("T", "S"): 2.1,
+    ("I", "E"): 1.5, ("P", "M"): -1.2, ("M", "K"): 0.4, ("K", "F"): 0.1,
+    ("I", "A"): 1.7, ("P", "I"): -0.6, ("R", "R"): 2.2, ("L", "I"): 2.1,
+    ("F", "D"): -0.3, ("D", "C"): -1.7
+}
+
+
+# http://www.embl-heidelberg.de/~vogt/matrices/str.cmp
+structure = {
+    ("W", "F"): 2, ("L", "R"): -3, ("I", "I"): 6, ("Q", "Q"): 6,
+    ("W", "N"): -5, ("V", "I"): 2, ("H", "T"): -2, ("H", "P"): -3,
+    ("W", "V"): -4, ("Q", "E"): 2, ("W", "R"): -2, ("Q", "A"): 0,
+    ("H", "H"): 8, ("H", "D"): 0, ("L", "N"): -3, ("Y", "M"): -1,
+    ("Y", "I"): -1, ("Y", "E"): -2, ("E", "S"): -1, ("Y", "A"): -3,
+    ("Y", "Y"): 7, ("T", "C"): -5, ("E", "C"): -3, ("Y", "Q"): -3,
+    ("E", "G"): -2, ("V", "A"): 0, ("C", "C"): 11, ("M", "R"): -4,
+    ("P", "T"): -1, ("V", "E"): -2, ("P", "P"): 7, ("I", "T"): -2,
+    ("K", "S"): -1, ("R", "G"): -2, ("I", "P"): -4, ("R", "C"): -2,
+    ("A", "T"): -1, ("K", "K"): 5, ("A", "P"): -1, ("V", "M"): 0,
+    ("I", "D"): -3, ("K", "C"): -4, ("K", "G"): -3, ("R", "S"): 0,
+    ("F", "Q"): -4, ("F", "A"): -3, ("V", "V"): 5, ("M", "N"): -2,
+    ("F", "E"): -4, ("D", "N"): 2, ("F", "I"): 1, ("F", "M"): 0,
+    ("M", "S"): -4, ("S", "S"): 4, ("L", "Q"): -3, ("W", "E"): -6,
+    ("W", "A"): -3, ("W", "M"): -2, ("H", "S"): -2, ("W", "I"): -2,
+    ("S", "C"): -4, ("L", "A"): -2, ("L", "E"): -4, ("W", "Q"): -5,
+    ("H", "G"): -3, ("Q", "N"): 0, ("H", "C"): -6, ("L", "M"): 3,
+    ("W", "Y"): 2, ("Y", "N"): -1, ("E", "P"): -1, ("Y", "F"): 3,
+    ("E", "T"): 0, ("A", "A"): 4, ("I", "N"): -3, ("G", "A"): 0,
+    ("Y", "V"): -1, ("E", "D"): 2, ("W", "H"): -3, ("Y", "R"): -1,
+    ("M", "Q"): 1, ("P", "S"): -1, ("R", "H"): 0, ("A", "C"): -2,
+    ("R", "D"): -2, ("K", "P"): -1, ("L", "D"): -6, ("K", "T"): 0,
+    ("V", "N"): -4, ("M", "A"): 0, ("K", "H"): 0, ("V", "R"): -3,
+    ("P", "C"): -8, ("M", "E"): -2, ("A", "S"): 0, ("T", "T"): 5,
+    ("R", "T"): -1, ("I", "G"): -5, ("R", "P"): -2, ("K", "D"): -1,
+    ("I", "C"): -4, ("F", "R"): -4, ("F", "V"): -1, ("L", "C"): -6,
+    ("F", "F"): 7, ("D", "A"): -1, ("F", "N"): -3, ("W", "D"): -6,
+    ("L", "P"): -3, ("Q", "S"): -1, ("N", "C"): -6, ("N", "G"): -1,
+    ("H", "N"): 2, ("W", "T"): -5, ("Q", "G"): -2, ("W", "P"): -4,
+    ("Q", "C"): -3, ("N", "S"): 0, ("L", "H"): -3, ("L", "L"): 5,
+    ("G", "T"): -3, ("M", "M"): 8, ("G", "P"): -2, ("Y", "K"): -2,
+    ("Y", "G"): -3, ("Y", "C"): -6, ("E", "A"): 0, ("E", "E"): 5,
+    ("Y", "S"): -2, ("M", "P"): -6, ("V", "C"): -4, ("M", "T"): -2,
+    ("V", "G"): -4, ("R", "E"): 0, ("V", "K"): -3, ("K", "Q"): 1,
+    ("R", "A"): -1, ("I", "R"): -3, ("N", "A"): -1, ("V", "S"): -3,
+    ("M", "D"): -4, ("M", "H"): -2, ("K", "A"): -1, ("R", "Q"): 1,
+    ("K", "E"): 1, ("F", "S"): -3, ("I", "K"): -3, ("D", "P"): -1,
+    ("D", "T"): -1, ("I", "M"): 1, ("F", "C"): -2, ("W", "L"): -1,
+    ("F", "G"): -6, ("F", "K"): -3, ("F", "T"): -3, ("D", "D"): 6,
+    ("Q", "T"): 0, ("W", "G"): -4, ("Q", "P"): -2, ("W", "C"): -6,
+    ("W", "K"): -3, ("H", "Q"): 0, ("Q", "D"): 0, ("W", "W"): 10,
+    ("V", "L"): 1, ("L", "G"): -5, ("W", "S"): -5, ("L", "K"): -2,
+    ("N", "P"): -2, ("H", "E"): -2, ("N", "T"): 0, ("H", "A"): -2,
+    ("Y", "L"): -2, ("Y", "H"): 0, ("G", "S"): -1, ("Y", "D"): -3,
+    ("V", "Q"): -2, ("L", "T"): -3, ("G", "G"): 5, ("G", "C"): -6,
+    ("E", "N"): 0, ("Y", "T"): -2, ("Y", "P"): -6, ("R", "N"): -1,
+    ("V", "D"): -4, ("K", "R"): 2, ("V", "H"): -2, ("I", "Q"): -5,
+    ("V", "P"): -4, ("M", "C"): -5, ("K", "N"): 0, ("V", "T"): -1,
+    ("M", "G"): -4, ("T", "S"): 1, ("I", "E"): -3, ("M", "K"): -1,
+    ("I", "A"): -2, ("N", "N"): 5, ("R", "R"): 7, ("F", "P"): -5,
+    ("L", "I"): 2, ("I", "S"): -3, ("D", "S"): 0, ("L", "S"): -4,
+    ("I", "H"): -5, ("F", "D"): -5, ("D", "C"): -7, ("F", "H"): -2,
+    ("D", "G"): -1, ("F", "L"): 2
+}
diff --git a/code/lib/Bio/SubsMat/__init__.py b/code/lib/Bio/SubsMat/__init__.py
new file mode 100644
index 0000000..17c1955
--- /dev/null
+++ b/code/lib/Bio/SubsMat/__init__.py
@@ -0,0 +1,600 @@
+# Copyright 2000-2009 by Iddo Friedberg.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+#
+# Iddo Friedberg idoerg@cc.huji.ac.il
+
+"""Substitution matrices, log odds matrices, and operations on them.
+
+General:
+--------
+
+This module provides a class and a few routines for generating
+substitution matrices, similar ot BLOSUM or PAM matrices, but based on
+user-provided data.
+The class used for these matrices is SeqMat
+
+Matrices are implemented as a dictionary. Each index contains a 2-tuple,
+which are the two residue/nucleotide types replaced. The value differs
+according to the matrix's purpose: e.g in a log-odds frequency matrix, the
+value would be log(Pij/(Pi*Pj)) where:
+Pij: frequency of substitution of letter (residue/nucleotide) i by j
+Pi, Pj: expected frequencies of i and j, respectively.
+
+Usage:
+------
+The following section is laid out in the order by which most people wish
+to generate a log-odds matrix. Of course, interim matrices can be
+generated and investigated. Most people just want a log-odds matrix,
+that's all.
+
+Generating an Accepted Replacement Matrix:
+------------------------------------------
+Initially, you should generate an accepted replacement matrix (ARM)
+from your data. The values in ARM are the _counted_ number of
+replacements according to your data. The data could be a set of pairs
+or multiple alignments. So for instance if Alanine was replaced by
+Cysteine 10 times, and Cysteine by Alanine 12 times, the corresponding
+ARM entries would be:
+['A','C']: 10,
+['C','A'] 12
+As order doesn't matter, user can already provide only one entry:
+['A','C']: 22
+A SeqMat instance may be initialized with either a full (first
+method of counting: 10, 12) or half (the latter method, 22) matrix. A
+Full protein alphabet matrix would be of the size 20x20 = 400. A Half
+matrix of that alphabet would be 20x20/2 + 20/2 = 210. That is because
+same-letter entries don't change. (The matrix diagonal). Given an
+alphabet size of N:
+Full matrix size:N*N
+Half matrix size: N(N+1)/2
+
+If you provide a full matrix, the constructor will create a half-matrix
+automatically.
+If you provide a half-matrix, make sure of a (low, high) sorted order in
+the keys: there should only be
+a ('A','C') not a ('C','A').
+
+Internal functions:
+
+Generating the observed frequency matrix (OFM):
+-----------------------------------------------
+Use: OFM = _build_obs_freq_mat(ARM)
+The OFM is generated from the ARM, only instead of replacement counts, it
+contains replacement frequencies.
+
+Generating an expected frequency matrix (EFM):
+----------------------------------------------
+Use: EFM = _build_exp_freq_mat(OFM,exp_freq_table)
+exp_freq_table: should be a freqTableC instantiation. See freqTable.py for
+detailed information. Briefly, the expected frequency table has the
+frequencies of appearance for each member of the alphabet
+
+Generating a substitution frequency matrix (SFM):
+-------------------------------------------------
+Use: SFM = _build_subs_mat(OFM,EFM)
+Accepts an OFM, EFM. Provides the division product of the corresponding
+values.
+
+Generating a log-odds matrix (LOM):
+-----------------------------------
+Use: LOM=_build_log_odds_mat(SFM[,logbase=10,factor=10.0,roundit=1])
+Accepts an SFM. logbase: base of the logarithm used to generate the
+log-odds values. factor: factor used to multiply the log-odds values.
+roundit: default - true. Whether to round the values.
+Each entry is generated by log(LOM[key])*factor
+And rounded if required.
+
+External:
+---------
+In most cases, users will want to generate a log-odds matrix only, without
+explicitly calling the OFM --> EFM --> SFM stages. The function
+build_log_odds_matrix does that. User provides an ARM and an expected
+frequency table. The function returns the log-odds matrix.
+
+Methods for subtraction, addition and multiplication of matrices:
+-----------------------------------------------------------------
+
+* Generation of an expected frequency table from an observed frequency
+  matrix.
+* Calculation of linear correlation coefficient between two matrices.
+* Calculation of relative entropy is now done using the
+  _make_relative_entropy method and is stored in the member
+  self.relative_entropy
+* Calculation of entropy is now done using the _make_entropy method and
+  is stored in the member self.entropy.
+* Jensen-Shannon distance between the distributions from which the
+  matrices are derived. This is a distance function based on the
+  distribution's entropies.
+
+"""
+
+import re
+import sys
+import copy
+import math
+
+# BioPython imports
+from Bio.SubsMat import FreqTable
+from Bio import BiopythonDeprecationWarning
+
+import warnings
+
+warnings.warn(
+    "Bio.SubsMat has been deprecated, and we intend to remove it in a future "
+    "release of Biopython. As an alternative, please consider using "
+    "Bio.Align.substitution_matrices as a replacement, and contact the "
+    "Biopython developers if you still need the Bio.SubsMat module.",
+    BiopythonDeprecationWarning,
+)
+
+
+log = math.log
+# Matrix types
+NOTYPE = 0
+ACCREP = 1
+OBSFREQ = 2
+SUBS = 3
+EXPFREQ = 4
+LO = 5
+EPSILON = 0.00000000000001
+
+
+class SeqMat(dict):
+    """A Generic sequence matrix class.
+
+    The key is a 2-tuple containing the letter indices of the matrix. Those
+    should be sorted in the tuple (low, high). Because each matrix is dealt
+    with as a half-matrix.
+    """
+
+    def _alphabet_from_matrix(self):
+        """Set alphabet letters from the matrix entries (PRIVATE)."""
+        ab_set = set()
+        for i in self:
+            ab_set.add(i[0])
+            ab_set.add(i[1])
+        self.alphabet = "".join(sorted(ab_set))
+
+    def __init__(self, data=None, alphabet=None, mat_name="", build_later=0):
+        """Initialize.
+
+        User may supply:
+
+        - data: matrix itself
+        - mat_name: its name. See below.
+        - alphabet: an iterable over the letters allowed as indices into the
+          matrix. If not supplied, constructor builds its own from that matrix.
+        - build_later: skip the matrix size assertion. User will build the
+          matrix after creating the instance. Constructor builds a half matrix
+          filled with zeroes.
+
+        """
+        assert isinstance(mat_name, str)
+        # "data" may be:
+        # 1) None --> then self.data is an empty dictionary
+        # 2) type({}) --> then self takes the items in data
+        # 3) An instance of SeqMat
+        # This whole creation-during-execution is done to avoid changing
+        # default values, the way Python does because default values are
+        # created when the function is defined, not when it is created.
+        if data:
+            try:
+                self.update(data)
+            except ValueError:
+                raise ValueError("Failed to store data in a dictionary")
+
+        # If passed alphabet is empty, use the letters in the matrix itself
+        if alphabet is None:
+            self._alphabet_from_matrix()
+        else:
+            self.alphabet = "".join(alphabet)
+        # Assert matrix size: half or full
+        if not build_later:
+            N = len(self.alphabet)
+            assert len(self) == N ** 2 or len(self) == N * (N + 1) / 2
+        self.ab_list = list(self.alphabet)
+        self.ab_list.sort()
+        # Names: a string like "BLOSUM62" or "PAM250"
+        self.mat_name = mat_name
+        if build_later:
+            self._init_zero()
+        else:
+            # Convert full to half
+            self._full_to_half()
+            self._correct_matrix()
+        self.sum_letters = {}
+        self.relative_entropy = 0
+
+    def _correct_matrix(self):
+        """Sort key tuples (PRIVATE)."""
+        for key in list(self):  # iterate over a copy
+            if key[0] > key[1]:
+                self[(key[1], key[0])] = self[key]
+                del self[key]
+
+    def _full_to_half(self):
+        """Convert a full-matrix to a half-matrix (PRIVATE)."""
+        # For instance: two entries ('A','C'):13 and ('C','A'):20 will be summed
+        # into ('A','C'): 33 and the index ('C','A') will be deleted
+        # ('A','A') and ('C','C') will remain the same.
+
+        N = len(self.alphabet)
+        # Do nothing if this is already a half-matrix
+        if len(self) == N * (N + 1) / 2:
+            return
+        for i in self.ab_list:
+            for j in self.ab_list[: self.ab_list.index(i) + 1]:
+                if i != j:
+                    self[j, i] = self[j, i] + self[i, j]
+                    del self[i, j]
+
+    def _init_zero(self):
+        """Initialize the ab_list values to zero (PRIVATE)."""
+        for i in self.ab_list:
+            for j in self.ab_list[: self.ab_list.index(i) + 1]:
+                self[j, i] = 0.0
+
+    def make_entropy(self):
+        """Calculate and set the entropy attribute."""
+        self.entropy = 0
+        for i in self:
+            if self[i] > EPSILON:
+                self.entropy += self[i] * log(self[i]) / log(2)
+        self.entropy = -self.entropy
+
+    def sum(self):
+        """Return sum of the results."""
+        result = {}
+        for letter in self.alphabet:
+            result[letter] = 0.0
+        for pair, value in self.items():
+            i1, i2 = pair
+            if i1 == i2:
+                result[i1] += value
+            else:
+                result[i1] += value / 2
+                result[i2] += value / 2
+        return result
+
+    def format(
+        self, fmt="%4d", letterfmt="%4s", alphabet=None, non_sym=None, full=False
+    ):
+        """Create a string with the bottom-half (default) or a full matrix.
+
+        User may pass own alphabet, which should contain all letters in the
+        alphabet of the matrix, but may be in a different order. This
+        order will be the order of the letters on the axes.
+        """
+        if not alphabet:
+            alphabet = self.ab_list
+        lines = []
+        assert non_sym is None or isinstance(non_sym, float) or isinstance(non_sym, int)
+        letterline = ""
+        for i in alphabet:
+            letterline += letterfmt % i
+        if full:
+            lines.append(letterline)
+        for i in alphabet:
+            line = i
+            flag = False
+            for j in alphabet:
+                if flag:
+                    val = non_sym
+                else:
+                    try:
+                        val = self[i, j]
+                    except KeyError:
+                        val = self[j, i]
+                if val <= -999:
+                    cur_str = "  ND"
+                else:
+                    cur_str = fmt % val
+                line += cur_str
+                if j == i:
+                    if not full:
+                        break
+                    if non_sym is not None:
+                        flag = True
+            lines.append(line)
+        if not full:
+            lines.append(letterline)
+        return "\n".join(lines)
+
+    def __str__(self):
+        """Print a nice half-matrix."""
+        return self.format()
+
+    def __sub__(self, other):
+        """Return integer subtraction product of the two matrices."""
+        mat_diff = 0
+        for i in self:
+            mat_diff += self[i] - other[i]
+        return mat_diff
+
+    def __mul__(self, other):
+        """Element-wise matrix multiplication.
+
+        Returns a new matrix created by multiplying each element by other (if
+        other is scalar), or by performing element-wise multiplication of the
+        two matrices (if other is a matrix of the same size).
+        """
+        new_mat = copy.copy(self)
+        try:  # first try and see if other is a matrix
+            for i in self:
+                new_mat[i] *= other[i]
+        except TypeError:  # other is a scalar value
+            for i in self:
+                new_mat[i] *= other
+        return new_mat
+
+    def __rmul__(self, other):
+        """Element-wise matrix multiplication.
+
+        Returns a new matrix created by multiplying each element by other (if
+        other is scalar), or by performing element-wise multiplication of the
+        two matrices (if other is a matrix of the same size).
+        """
+        return self.__mul__(other)
+
+    def __add__(self, other):
+        """Matrix addition."""
+        new_mat = copy.copy(self)
+        for i in self:
+            new_mat[i] += other[i]
+        return new_mat
+
+
+class SubstitutionMatrix(SeqMat):
+    """Substitution matrix."""
+
+    def calculate_relative_entropy(self, obs_freq_mat):
+        """Calculate and return relative entropy w.r.t. observed frequency matrix."""
+        relative_entropy = 0.0
+        for key, value in self.items():
+            if value > EPSILON:
+                relative_entropy += obs_freq_mat[key] * log(value)
+        relative_entropy /= log(2)
+        return relative_entropy
+
+
+class LogOddsMatrix(SeqMat):
+    """Log odds matrix."""
+
+    def calculate_relative_entropy(self, obs_freq_mat):
+        """Calculate and return relative entropy w.r.t. observed frequency matrix."""
+        relative_entropy = 0.0
+        for key, value in self.items():
+            relative_entropy += obs_freq_mat[key] * value / log(2)
+        return relative_entropy
+
+
+def _build_obs_freq_mat(acc_rep_mat):
+    """Build observed frequency matrix (PRIVATE).
+
+    Build the observed frequency matrix. from an accepted replacements matrix.
+    The acc_rep_mat matrix should be generated by the user.
+    """
+    # Note: acc_rep_mat should already be a half_matrix!!
+    total = float(sum(acc_rep_mat.values()))
+    obs_freq_mat = SeqMat(alphabet=acc_rep_mat.alphabet, build_later=1)
+    for i in acc_rep_mat:
+        obs_freq_mat[i] = acc_rep_mat[i] / total
+    return obs_freq_mat
+
+
+def _exp_freq_table_from_obs_freq(obs_freq_mat):
+    """Build expected frequence table from observed frequences (PRIVATE)."""
+    exp_freq_table = {}
+    for i in obs_freq_mat.alphabet:
+        exp_freq_table[i] = 0.0
+    for i in obs_freq_mat:
+        if i[0] == i[1]:
+            exp_freq_table[i[0]] += obs_freq_mat[i]
+        else:
+            exp_freq_table[i[0]] += obs_freq_mat[i] / 2.0
+            exp_freq_table[i[1]] += obs_freq_mat[i] / 2.0
+    return FreqTable.FreqTable(exp_freq_table, FreqTable.FREQ)
+
+
+def _build_exp_freq_mat(exp_freq_table):
+    """Build an expected frequency matrix (PRIVATE).
+
+    exp_freq_table: should be a FreqTable instance
+    """
+    exp_freq_mat = SeqMat(alphabet=exp_freq_table.alphabet, build_later=1)
+    for i in exp_freq_mat:
+        if i[0] == i[1]:
+            exp_freq_mat[i] = exp_freq_table[i[0]] ** 2
+        else:
+            exp_freq_mat[i] = 2.0 * exp_freq_table[i[0]] * exp_freq_table[i[1]]
+    return exp_freq_mat
+
+
+#
+# Build the substitution matrix
+#
+def _build_subs_mat(obs_freq_mat, exp_freq_mat):
+    """Build the substitution matrix (PRIVATE)."""
+    if obs_freq_mat.ab_list != exp_freq_mat.ab_list:
+        raise ValueError("Alphabet mismatch in passed matrices")
+    subs_mat = SubstitutionMatrix(obs_freq_mat)
+    for i in obs_freq_mat:
+        subs_mat[i] = obs_freq_mat[i] / exp_freq_mat[i]
+    return subs_mat
+
+
+#
+# Build a log-odds matrix
+#
+def _build_log_odds_mat(subs_mat, logbase=2, factor=10.0, round_digit=0, keep_nd=0):
+    """Build a log-odds matrix (PRIVATE).
+
+    - logbase=2: base of logarithm used to build (default 2)
+    - factor=10.: a factor by which each matrix entry is multiplied
+    - round_digit: roundoff place after decimal point
+    - keep_nd: if true, keeps the -999 value for non-determined values (for which
+      there are no substitutions in the frequency substitutions matrix). If false,
+      plants the minimum log-odds value of the matrix in entries containing -999.
+
+    """
+    lo_mat = LogOddsMatrix(subs_mat)
+    for key, value in subs_mat.items():
+        if value < EPSILON:
+            lo_mat[key] = -999
+        else:
+            lo_mat[key] = round(factor * log(value) / log(logbase), round_digit)
+    mat_min = min(lo_mat.values())
+    if not keep_nd:
+        for i in lo_mat:
+            if lo_mat[i] <= -999:
+                lo_mat[i] = mat_min
+    return lo_mat
+
+
+#
+# External function. User provides an accepted replacement matrix, and,
+# optionally the following: expected frequency table, log base, mult. factor,
+# and rounding factor. Generates a log-odds matrix, calling internal SubsMat
+# functions.
+#
+def make_log_odds_matrix(
+    acc_rep_mat, exp_freq_table=None, logbase=2, factor=1.0, round_digit=9, keep_nd=0
+):
+    """Make log-odds matrix."""
+    obs_freq_mat = _build_obs_freq_mat(acc_rep_mat)
+    if not exp_freq_table:
+        exp_freq_table = _exp_freq_table_from_obs_freq(obs_freq_mat)
+    exp_freq_mat = _build_exp_freq_mat(exp_freq_table)
+    subs_mat = _build_subs_mat(obs_freq_mat, exp_freq_mat)
+    lo_mat = _build_log_odds_mat(subs_mat, logbase, factor, round_digit, keep_nd)
+    return lo_mat
+
+
+def observed_frequency_to_substitution_matrix(obs_freq_mat):
+    """Convert observed frequency table into substitution matrix."""
+    exp_freq_table = _exp_freq_table_from_obs_freq(obs_freq_mat)
+    exp_freq_mat = _build_exp_freq_mat(exp_freq_table)
+    subs_mat = _build_subs_mat(obs_freq_mat, exp_freq_mat)
+    return subs_mat
+
+
+def read_text_matrix(data_file):
+    """Read a matrix from a text file."""
+    matrix = {}
+    tmp = data_file.read().split("\n")
+    table = []
+    for i in tmp:
+        table.append(i.split())
+    # remove records beginning with ``#''
+    for rec in table[:]:
+        if rec.count("#") > 0:
+            table.remove(rec)
+
+    # remove null lists
+    while table.count([]) > 0:
+        table.remove([])
+    # build a dictionary
+    alphabet = table[0]
+    j = 0
+    for rec in table[1:]:
+        # print(j)
+        row = alphabet[j]
+        # row = rec[0]
+        if re.compile(r"[A-z\*]").match(rec[0]):
+            first_col = 1
+        else:
+            first_col = 0
+        i = 0
+        for field in rec[first_col:]:
+            col = alphabet[i]
+            matrix[(row, col)] = float(field)
+            i += 1
+        j += 1
+    # delete entries with an asterisk
+    for i in matrix:
+        if "*" in i:
+            del matrix[i]
+    ret_mat = SeqMat(matrix)
+    return ret_mat
+
+
+diagNO = 1
+diagONLY = 2
+diagALL = 3
+
+
+def two_mat_relative_entropy(mat_1, mat_2, logbase=2, diag=diagALL):
+    """Return relative entropy of two matrices."""
+    rel_ent = 0.0
+    key_list_1 = sorted(mat_1)
+    key_list_2 = sorted(mat_2)
+    key_list = []
+    sum_ent_1 = 0.0
+    sum_ent_2 = 0.0
+    for i in key_list_1:
+        if i in key_list_2:
+            key_list.append(i)
+    if len(key_list_1) != len(key_list_2):
+        sys.stderr.write("Warning: first matrix has more entries than the second\n")
+    if key_list_1 != key_list_2:
+        sys.stderr.write("Warning: indices not the same between matrices\n")
+    for key in key_list:
+        if diag == diagNO and key[0] == key[1]:
+            continue
+        if diag == diagONLY and key[0] != key[1]:
+            continue
+        if mat_1[key] > EPSILON and mat_2[key] > EPSILON:
+            sum_ent_1 += mat_1[key]
+            sum_ent_2 += mat_2[key]
+
+    for key in key_list:
+        if diag == diagNO and key[0] == key[1]:
+            continue
+        if diag == diagONLY and key[0] != key[1]:
+            continue
+        if mat_1[key] > EPSILON and mat_2[key] > EPSILON:
+            val_1 = mat_1[key] / sum_ent_1
+            val_2 = mat_2[key] / sum_ent_2
+            rel_ent += val_1 * log(val_1 / val_2) / log(logbase)
+    return rel_ent
+
+
+def two_mat_correlation(mat_1, mat_2):
+    """Return linear correlation coefficient between two matrices."""
+    try:
+        import numpy
+    except ImportError:
+        raise ImportError(
+            "Please install Numerical Python (numpy) if you want to use this function"
+        )
+    values = []
+    assert mat_1.ab_list == mat_2.ab_list
+    for ab_pair in mat_1:
+        try:
+            values.append((mat_1[ab_pair], mat_2[ab_pair]))
+        except KeyError:
+            raise ValueError("%s is not a common key" % ab_pair)
+    correlation_matrix = numpy.corrcoef(values, rowvar=0)
+    correlation = correlation_matrix[0, 1]
+    return correlation
+
+
+def two_mat_DJS(mat_1, mat_2, pi_1=0.5, pi_2=0.5):
+    """Return Jensen-Shannon Distance between two observed frequence matrices."""
+    assert mat_1.ab_list == mat_2.ab_list
+    assert pi_1 > 0 and pi_2 > 0 and pi_1 < 1 and pi_2 < 1
+    assert not (pi_1 + pi_2 - 1.0 > EPSILON)
+    sum_mat = SeqMat(build_later=1)
+    sum_mat.ab_list = mat_1.ab_list
+    for i in mat_1:
+        sum_mat[i] = pi_1 * mat_1[i] + pi_2 * mat_2[i]
+    sum_mat.make_entropy()
+    mat_1.make_entropy()
+    mat_2.make_entropy()
+    # print(mat_1.entropy, mat_2.entropy)
+    dJS = sum_mat.entropy - pi_1 * mat_1.entropy - pi_2 * mat_2.entropy
+    return dJS
diff --git a/code/lib/Bio/SubsMat/__pycache__/FreqTable.cpython-37.pyc b/code/lib/Bio/SubsMat/__pycache__/FreqTable.cpython-37.pyc
new file mode 100644
index 0000000..3452a68
Binary files /dev/null and b/code/lib/Bio/SubsMat/__pycache__/FreqTable.cpython-37.pyc differ
diff --git a/code/lib/Bio/SubsMat/__pycache__/MatrixInfo.cpython-37.pyc b/code/lib/Bio/SubsMat/__pycache__/MatrixInfo.cpython-37.pyc
new file mode 100644
index 0000000..0571cff
Binary files /dev/null and b/code/lib/Bio/SubsMat/__pycache__/MatrixInfo.cpython-37.pyc differ
diff --git a/code/lib/Bio/SubsMat/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SubsMat/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..a992921
Binary files /dev/null and b/code/lib/Bio/SubsMat/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/SwissProt/KeyWList.py b/code/lib/Bio/SwissProt/KeyWList.py
new file mode 100644
index 0000000..fa27d9a
--- /dev/null
+++ b/code/lib/Bio/SwissProt/KeyWList.py
@@ -0,0 +1,91 @@
+# Copyright 1999 by Jeffrey Chang.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Code to parse the keywlist.txt file from SwissProt/UniProt.
+
+See:
+ - https://www.uniprot.org/docs/keywlist
+ - https://www.uniprot.org/docs/keywlist.txt
+
+Classes:
+ - Record            Stores the information about one keyword or one category
+   in the keywlist.txt file.
+
+Functions:
+ - parse             Parses the keywlist.txt file and returns an iterator to
+   the records it contains.
+
+"""
+
+
+class Record(dict):
+    """Store information of one keyword or category from the keywords list.
+
+    This record stores the information of one keyword or category in the
+    keywlist.txt as a Python dictionary. The keys in this dictionary are
+    the line codes that can appear in the keywlist.txt file::
+
+        ---------  ---------------------------     ----------------------
+        Line code  Content                         Occurrence in an entry
+        ---------  ---------------------------     ----------------------
+        ID         Identifier (keyword)            Once; starts a keyword entry
+        IC         Identifier (category)           Once; starts a category entry
+        AC         Accession (KW-xxxx)             Once
+        DE         Definition                      Once or more
+        SY         Synonyms                        Optional; once or more
+        GO         Gene ontology (GO) mapping      Optional; once or more
+        HI         Hierarchy                       Optional; once or more
+        WW         Relevant WWW site               Optional; once or more
+        CA         Category                        Once per keyword entry; absent
+                                                   in category entries
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        dict.__init__(self)
+        for keyword in ("DE", "SY", "GO", "HI", "WW"):
+            self[keyword] = []
+
+
+def parse(handle):
+    """Parse the keyword list from file handle.
+
+    Returns a generator object which yields keyword entries as
+    Bio.SwissProt.KeyWList.Record() object.
+    """
+    record = Record()
+    # First, skip the header - look for start of a record
+    for line in handle:
+        if line.startswith("ID   "):
+            # Looks like there was no header
+            record["ID"] = line[5:].strip()
+            break
+        if line.startswith("IC   "):
+            # Looks like there was no header
+            record["IC"] = line[5:].strip()
+            break
+    # Now parse the records
+    for line in handle:
+        if line.startswith("-------------------------------------"):
+            # We have reached the footer
+            break
+        key = line[:2]
+        if key == "//":
+            record["DE"] = " ".join(record["DE"])
+            record["SY"] = " ".join(record["SY"])
+            yield record
+            record = Record()
+        elif line[2:5] == "   ":
+            value = line[5:].strip()
+            if key in ("ID", "IC", "AC", "CA"):
+                record[key] = value
+            elif key in ("DE", "SY", "GO", "HI", "WW"):
+                record[key].append(value)
+            else:
+                print("Ignoring: %s" % line.strip())
+    # Read the footer and throw it away
+    for line in handle:
+        pass
diff --git a/code/lib/Bio/SwissProt/__init__.py b/code/lib/Bio/SwissProt/__init__.py
new file mode 100644
index 0000000..a311ce0
--- /dev/null
+++ b/code/lib/Bio/SwissProt/__init__.py
@@ -0,0 +1,861 @@
+# Copyright 2007 by Michiel de Hoon.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""Code to work with the sprotXX.dat file from SwissProt.
+
+https://web.expasy.org/docs/userman.html
+
+Classes:
+ - Record             Holds SwissProt data.
+ - Reference          Holds reference data from a SwissProt record.
+
+Functions:
+ - read               Read one SwissProt record
+ - parse              Read multiple SwissProt records
+
+"""
+
+
+import io
+
+from Bio.SeqFeature import (
+    SeqFeature,
+    FeatureLocation,
+    ExactPosition,
+    BeforePosition,
+    AfterPosition,
+    UncertainPosition,
+    UnknownPosition,
+)
+
+
+class SwissProtParserError(ValueError):
+    """An error occurred while parsing a SwissProt file."""
+
+    def __init__(self, *args, line=None):
+        """Create a SwissProtParserError object with the offending line."""
+        super().__init__(*args)
+        self.line = line
+
+
+class Record:
+    """Holds information from a SwissProt record.
+
+    Attributes:
+     - entry_name        Name of this entry, e.g. RL1_ECOLI.
+     - data_class        Either 'STANDARD' or 'PRELIMINARY'.
+     - molecule_type     Type of molecule, 'PRT',
+     - sequence_length   Number of residues.
+     - accessions        List of the accession numbers, e.g. ['P00321']
+     - created           A tuple of (date, release).
+     - sequence_update   A tuple of (date, release).
+     - annotation_update A tuple of (date, release).
+     - description       Free-format description.
+     - gene_name         Gene name.  See userman.txt for description.
+     - organism          The source of the sequence.
+     - organelle         The origin of the sequence.
+     - organism_classification  The taxonomy classification.  List of strings.
+       (http://www.ncbi.nlm.nih.gov/Taxonomy/)
+     - taxonomy_id       A list of NCBI taxonomy id's.
+     - host_organism     A list of names of the hosts of a virus, if any.
+     - host_taxonomy_id  A list of NCBI taxonomy id's of the hosts, if any.
+     - references        List of Reference objects.
+     - comments          List of strings.
+     - cross_references  List of tuples (db, id1[, id2][, id3]).  See the docs.
+     - keywords          List of the keywords.
+     - features          List of tuples (key name, from, to, description).
+       from and to can be either integers for the residue
+       numbers, '<', '>', or '?'
+     - protein_existence Numerical value describing the evidence for the existence of the protein.
+     - seqinfo           tuple of (length, molecular weight, CRC32 value)
+     - sequence          The sequence.
+
+    Examples
+    --------
+    >>> from Bio import SwissProt
+    >>> example_filename = "SwissProt/sp008"
+    >>> with open(example_filename) as handle:
+    ...     records = SwissProt.parse(handle)
+    ...     for record in records:
+    ...         print(record.entry_name)
+    ...         print(",".join(record.accessions))
+    ...         print(record.keywords)
+    ...         print(repr(record.organism))
+    ...         print(record.sequence[:20] + "...")
+    ...
+    1A02_HUMAN
+    P01892,P06338,P30514,P30444,P30445,P30446,Q29680,Q29899,Q95352,Q29837,Q95380
+    ['MHC I', 'Transmembrane', 'Glycoprotein', 'Signal', 'Polymorphism', '3D-structure']
+    'Homo sapiens (Human).'
+    MAVMAPRTLVLLLSGALALT...
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.entry_name = None
+        self.data_class = None
+        self.molecule_type = None
+        self.sequence_length = None
+
+        self.accessions = []
+        self.created = None
+        self.sequence_update = None
+        self.annotation_update = None
+
+        self.description = []
+        self.gene_name = ""
+        self.organism = []
+        self.organelle = ""
+        self.organism_classification = []
+        self.taxonomy_id = []
+        self.host_organism = []
+        self.host_taxonomy_id = []
+        self.references = []
+        self.comments = []
+        self.cross_references = []
+        self.keywords = []
+        self.features = []
+        self.protein_existence = ""
+
+        self.seqinfo = None
+        self.sequence = ""
+
+
+class Reference:
+    """Holds information from one reference in a SwissProt entry.
+
+    Attributes:
+     - number      Number of reference in an entry.
+     - evidence    Evidence code.  List of strings.
+     - positions   Describes extent of work.  List of strings.
+     - comments    Comments.  List of (token, text).
+     - references  References.  List of (dbname, identifier).
+     - authors     The authors of the work.
+     - title       Title of the work.
+     - location    A citation for the work.
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.number = None
+        self.positions = []
+        self.comments = []
+        self.references = []
+        self.authors = []
+        self.title = []
+        self.location = []
+
+
+class FeatureTable(SeqFeature):
+    """Stores feature annotations for specific regions of the sequence.
+
+    This is a subclass of SeqFeature, defined in Bio.SeqFeature, where the
+    attributes are used as follows:
+
+     - ``location``: location of the feature on the canonical or isoform
+       sequence; the location is stored as an instance of FeatureLocation,
+       defined in Bio.SeqFeature, with the ref attribute set to the isoform
+       ID referring to the canonical or isoform sequence on which the feature
+       is defined
+     - ``id``: unique and stable identifier (FTId), only provided for features
+       belonging to the types CARBOHYD, CHAIN, PEPTIDE, PROPEP, VARIANT, or
+       VAR_SEQ
+     - ``type``: indicates the type of feature, as defined by the UniProt
+       Knowledgebase documentation:
+
+        - ACT_SITE: amino acid(s) involved in the activity of an enzyme
+        - BINDING:  binding site for any chemical group
+        - CARBOHYD: glycosylation site; an FTId identifier to the GlyConnect
+          database is provided if annotated there
+        - CA_BIND:  calcium-binding region
+        - CHAIN:    polypeptide chain in the mature protein
+        - COILED:   coiled-coil region
+        - COMPBIAS: compositionally biased region
+        - CONFLICT: different sources report differing sequences
+        - CROSSLNK: posttransationally formed amino acid bond
+        - DISULFID: disulfide bond
+        - DNA_BIND: DNA-binding region
+        - DOMAIN:   domain, defined as a specific combination of secondary
+          structures organized into a characteristic three-dimensional
+          structure or fold
+        - INIT_MET: initiator methionine
+        - INTRAMEM: region located in a membrane without crossing it
+        - HELIX:    alpha-, 3(10)-, or pi-helix secondary structure
+        - LIPID:    covalent binding of a lipid moiety
+        - METAL:    binding site for a metal ion
+        - MOD_RES:  posttranslational modification (PTM) of a residue,
+          annotated by the controlled vocabulary defined by the ptmlist.txt
+          document on the UniProt website
+        - MOTIF:    short sequence motif of biological interest
+        - MUTAGEN:  site experimentally altered by mutagenesis
+        - NON_CONS: non-consecutive residues
+        - NON_STD:  non-standard amino acid
+        - NON_TER:  the residue at an extremity of the sequence is not the
+          terminal residue
+        - NP_BIND:  nucleotide phosphate-binding region
+        - PEPTIDE:  released active mature polypeptide
+        - PROPEP:   any processed propeptide
+        - REGION:   region of interest in the sequence
+        - REPEAT:   internal sequence repetition
+        - SIGNAL:   signal sequence (prepeptide)
+        - SITE:     amino-acid site of interest not represented by another
+          feature key
+        - STRAND:   beta-strand secondary structure; either a hydrogen-bonded
+          extended beta strand or a residue in an isolated beta-bridge
+        - TOPO_DOM: topological domain
+        - TRANSIT:  transit peptide (mitochondrion, chloroplast, thylakoid,
+          cyanelle, peroxisome, etc.)
+        - TRANSMEM: transmembrane region
+        - TURN:     H-bonded turn (3-, 4-, or 5-turn)
+        - UNSURE:   uncertainties in the sequence
+        - VARIANT:  sequence variant; an FTId is provided for protein sequence
+          variants of Hominidae (great apes and humans)
+        - VAR_SEQ:  sequence variant produced by alternative splicing,
+          alternative promoter usage, alternative initiation, or ribosomal
+          frameshifting
+        - ZN_FING:  zinc finger region
+
+     - qualifiers   A dictionary of additional information, which may include
+       the feature evidence and free-text notes. While SwissProt includes the
+       feature identifier code (FTId) as a qualifier, it is stored as the
+       attribute ID of the FeatureTable object.
+
+    """
+
+
+def parse(source):
+    """Read multiple SwissProt records from file.
+
+    Argument source is a file-like object or a path to a file.
+
+    Returns a generator object which yields Bio.SwissProt.Record() objects.
+    """
+    handle = _open(source)
+    try:
+        while True:
+            record = _read(handle)
+            if not record:
+                return
+            yield record
+    finally:
+        if handle is not source:
+            handle.close()
+
+
+def read(source):
+    """Read one SwissProt record from file.
+
+    Argument source is a file-like object or a path to a file.
+
+    Returns a Record() object.
+    """
+    handle = _open(source)
+    try:
+        record = _read(handle)
+        if not record:
+            raise ValueError("No SwissProt record found")
+        # We should have reached the end of the record by now.
+        # Try to read one more line to be sure:
+        try:
+            next(handle)
+        except StopIteration:
+            return record
+        raise ValueError("More than one SwissProt record found")
+    finally:
+        if handle is not source:
+            handle.close()
+
+
+# Everything below is considered private
+
+
+def _open(source):
+    try:
+        handle = open(source)
+        return handle
+    except TypeError:
+        handle = source
+        if handle.read(0) == "":
+            # handle is text; assume the encoding is compatible with ASCII
+            return handle
+        # handle is binary; SwissProt encoding is always ASCII
+        return io.TextIOWrapper(handle, encoding="ASCII")
+
+
+def _read(handle):
+    record = None
+    unread = ""
+    try:
+        line = next(handle)
+    except StopIteration:
+        return record
+    key, value = line[:2], line[5:].rstrip()
+    if key != "ID":
+        raise SwissProtParserError("Failed to find ID in first line", line=line)
+    record = Record()
+    _read_id(record, line)
+    _sequence_lines = []
+    for line in handle:
+        key, value = line[:2], line[5:].rstrip()
+        if unread:
+            value = unread + " " + value
+            unread = ""
+        if key == "AC":
+            accessions = value.rstrip(";").split("; ")
+            record.accessions.extend(accessions)
+        elif key == "DT":
+            _read_dt(record, line)
+        elif key == "DE":
+            record.description.append(value.strip())
+        elif key == "GN":
+            if record.gene_name:
+                record.gene_name += " "
+            record.gene_name += value
+        elif key == "OS":
+            record.organism.append(value)
+        elif key == "OG":
+            record.organelle += line[5:]
+        elif key == "OC":
+            cols = value.rstrip(";.").split("; ")
+            record.organism_classification.extend(cols)
+        elif key == "OX":
+            _read_ox(record, line)
+        elif key == "OH":
+            _read_oh(record, line)
+        elif key == "RN":
+            reference = Reference()
+            _read_rn(reference, value)
+            record.references.append(reference)
+        elif key == "RP":
+            assert record.references, "RP: missing RN"
+            record.references[-1].positions.append(value)
+        elif key == "RC":
+            assert record.references, "RC: missing RN"
+            reference = record.references[-1]
+            unread = _read_rc(reference, value)
+        elif key == "RX":
+            assert record.references, "RX: missing RN"
+            reference = record.references[-1]
+            _read_rx(reference, value)
+        elif key == "RL":
+            assert record.references, "RL: missing RN"
+            reference = record.references[-1]
+            reference.location.append(value)
+        # In UniProt release 1.12 of 6/21/04, there is a new RG
+        # (Reference Group) line, which references a group instead of
+        # an author.  Each block must have at least 1 RA or RG line.
+        elif key == "RA":
+            assert record.references, "RA: missing RN"
+            reference = record.references[-1]
+            reference.authors.append(value)
+        elif key == "RG":
+            assert record.references, "RG: missing RN"
+            reference = record.references[-1]
+            reference.authors.append(value)
+        elif key == "RT":
+            assert record.references, "RT: missing RN"
+            reference = record.references[-1]
+            reference.title.append(value)
+        elif key == "CC":
+            _read_cc(record, line)
+        elif key == "DR":
+            _read_dr(record, value)
+        elif key == "PE":
+            _read_pe(record, value)
+        elif key == "KW":
+            _read_kw(record, value)
+        elif key == "FT":
+            _read_ft(record, line)
+        elif key == "SQ":
+            cols = value.split()
+            assert len(cols) == 7, "I don't understand SQ line %s" % line
+            # Do more checking here?
+            record.seqinfo = int(cols[1]), int(cols[3]), cols[5]
+        elif key == "  ":
+            _sequence_lines.append(value.replace(" ", "").rstrip())
+        elif key == "//":
+            # Join multiline data into one string
+            record.description = " ".join(record.description)
+            record.organism = " ".join(record.organism)
+            record.organelle = record.organelle.rstrip()
+            for reference in record.references:
+                reference.authors = " ".join(reference.authors).rstrip(";")
+                if reference.title:
+                    title = reference.title[0]
+                    for fragment in reference.title[1:]:
+                        if not title.endswith("-"):
+                            title += " "
+                        title += fragment
+                    title = title.rstrip(";")
+                    if title.startswith('"') and title.endswith('"'):
+                        title = title[1:-1]  # remove quotes
+                else:
+                    title = ""
+                reference.title = title
+                reference.location = " ".join(reference.location)
+            record.sequence = "".join(_sequence_lines)
+            return record
+        elif key == "**":
+            # Do this one last, as it will almost never occur.
+            # See Bug 2353, some files from the EBI have extra lines
+            # starting "**" (two asterisks/stars).  They appear
+            # to be unofficial automated annotations. e.g.
+            # **
+            # **   #################    INTERNAL SECTION    ##################
+            # **HA SAM; Annotated by PicoHamap 1.88; MF_01138.1; 09-NOV-2003.
+            pass
+        else:
+            raise SwissProtParserError("Unknown keyword '%s' found" % key, line=line)
+    if record:
+        raise ValueError("Unexpected end of stream.")
+
+
+def _read_id(record, line):
+    cols = line[5:].split()
+    # Prior to release 51, included with MoleculeType:
+    # ID   EntryName DataClass; MoleculeType; SequenceLength AA.
+    #
+    # Newer files lack the MoleculeType:
+    # ID   EntryName DataClass; SequenceLength AA.
+    if len(cols) == 5:
+        record.entry_name = cols[0]
+        record.data_class = cols[1].rstrip(";")
+        record.molecule_type = cols[2].rstrip(";")
+        record.sequence_length = int(cols[3])
+    elif len(cols) == 4:
+        record.entry_name = cols[0]
+        record.data_class = cols[1].rstrip(";")
+        record.molecule_type = None
+        record.sequence_length = int(cols[2])
+    else:
+        raise SwissProtParserError("ID line has unrecognised format", line=line)
+    # check if the data class is one of the allowed values
+    allowed = ("STANDARD", "PRELIMINARY", "IPI", "Reviewed", "Unreviewed")
+    if record.data_class not in allowed:
+        message = "Unrecognized data class '%s'" % record.data_class
+        raise SwissProtParserError(message, line=line)
+
+    # molecule_type should be 'PRT' for PRoTein
+    # Note that has been removed in recent releases (set to None)
+    if record.molecule_type not in (None, "PRT"):
+        message = "Unrecognized molecule type '%s'" % record.molecule_type
+        raise SwissProtParserError(message, line=line)
+
+
+def _read_dt(record, line):
+    value = line[5:]
+    uprline = value.upper()
+    cols = value.rstrip().split()
+    if (
+        "CREATED" in uprline
+        or "LAST SEQUENCE UPDATE" in uprline
+        or "LAST ANNOTATION UPDATE" in uprline
+    ):
+        # Old style DT line
+        # =================
+        # e.g.
+        # DT   01-FEB-1995 (Rel. 31, Created)
+        # DT   01-FEB-1995 (Rel. 31, Last sequence update)
+        # DT   01-OCT-2000 (Rel. 40, Last annotation update)
+        #
+        # or:
+        # DT   08-JAN-2002 (IPI Human rel. 2.3, Created)
+        # ...
+
+        # find where the version information will be located
+        # This is needed for when you have cases like IPI where
+        # the release version is in a different spot:
+        # DT   08-JAN-2002 (IPI Human rel. 2.3, Created)
+        uprcols = uprline.split()
+        rel_index = -1
+        for index in range(len(uprcols)):
+            if "REL." in uprcols[index]:
+                rel_index = index
+        assert rel_index >= 0, "Could not find Rel. in DT line: %s" % line
+        version_index = rel_index + 1
+        # get the version information
+        str_version = cols[version_index].rstrip(",")
+        # no version number
+        if str_version == "":
+            version = 0
+        # dot versioned
+        elif "." in str_version:
+            version = str_version
+        # integer versioned
+        else:
+            version = int(str_version)
+        date = cols[0]
+
+        if "CREATED" in uprline:
+            record.created = date, version
+        elif "LAST SEQUENCE UPDATE" in uprline:
+            record.sequence_update = date, version
+        elif "LAST ANNOTATION UPDATE" in uprline:
+            record.annotation_update = date, version
+        else:
+            raise SwissProtParserError("Unrecognised DT (DaTe) line", line=line)
+    elif (
+        "INTEGRATED INTO" in uprline
+        or "SEQUENCE VERSION" in uprline
+        or "ENTRY VERSION" in uprline
+    ):
+        # New style DT line
+        # =================
+        # As of UniProt Knowledgebase release 7.0 (including
+        # Swiss-Prot release 49.0 and TrEMBL release 32.0) the
+        # format of the DT lines and the version information
+        # in them was changed - the release number was dropped.
+        #
+        # For more information see bug 1948 and
+        # http://ca.expasy.org/sprot/relnotes/sp_news.html#rel7.0
+        #
+        # e.g.
+        # DT   01-JAN-1998, integrated into UniProtKB/Swiss-Prot.
+        # DT   15-OCT-2001, sequence version 3.
+        # DT   01-APR-2004, entry version 14.
+        #
+        # This is a new style DT line...
+
+        # The date should be in string cols[1]
+        # Get the version number if there is one.
+        # For the three DT lines above: 0, 3, 14
+        try:
+            version = 0
+            for s in cols[-1].split("."):
+                if s.isdigit():
+                    version = int(s)
+        except ValueError:
+            version = 0
+        date = cols[0].rstrip(",")
+
+        # Re-use the historical property names, even though
+        # the meaning has changed slighty:
+        if "INTEGRATED" in uprline:
+            record.created = date, version
+        elif "SEQUENCE VERSION" in uprline:
+            record.sequence_update = date, version
+        elif "ENTRY VERSION" in uprline:
+            record.annotation_update = date, version
+        else:
+            raise SwissProtParserError("Unrecognised DT (DaTe) line", line=line)
+    else:
+        raise SwissProtParserError("Failed to parse DT (DaTe) line", line=line)
+
+
+def _read_ox(record, line):
+    # The OX line used to be in the simple format:
+    # OX   DESCRIPTION=ID[, ID]...;
+    # If there are too many id's to fit onto a line, then the ID's
+    # continue directly onto the next line, e.g.
+    # OX   DESCRIPTION=ID[, ID]...
+    # OX   ID[, ID]...;
+    # Currently, the description is always "NCBI_TaxID".
+    # To parse this, I need to check to see whether I'm at the
+    # first line.  If I am, grab the description and make sure
+    # it's an NCBI ID.  Then, grab all the id's.
+    #
+    # As of the 2014-10-01 release, there may be an evidence code, e.g.
+    # OX   NCBI_TaxID=418404 {ECO:0000313|EMBL:AEX14553.1};
+    # In the short term, we will ignore any evidence codes:
+    line = line.split("{")[0]
+    if record.taxonomy_id:
+        ids = line[5:].rstrip().rstrip(";")
+    else:
+        descr, ids = line[5:].rstrip().rstrip(";").split("=")
+        assert descr == "NCBI_TaxID", "Unexpected taxonomy type %s" % descr
+    record.taxonomy_id.extend(ids.split(", "))
+
+
+def _read_oh(record, line):
+    # Line type OH (Organism Host) for viral hosts
+    assert line[5:].startswith("NCBI_TaxID="), "Unexpected %s" % line
+    line = line[16:].rstrip()
+    assert line[-1] == "." and line.count(";") == 1, line
+    taxid, name = line[:-1].split(";")
+    record.host_taxonomy_id.append(taxid.strip())
+    record.host_organism.append(name.strip())
+
+
+def _read_rn(reference, rn):
+    # This used to be a very simple line with a reference number, e.g.
+    # RN   [1]
+    # As of the 2014-10-01 release, there may be an evidence code, e.g.
+    # RN   [1] {ECO:0000313|EMBL:AEX14553.1}
+    words = rn.split(None, 1)
+    number = words[0]
+    assert number.startswith("[") and number.endswith("]"), (
+        "Missing brackets %s" % number
+    )
+    reference.number = int(number[1:-1])
+    if len(words) > 1:
+        evidence = words[1]
+        assert evidence.startswith("{") and evidence.endswith("}"), (
+            "Missing braces %s" % evidence
+        )
+        reference.evidence = evidence[1:-1].split("|")
+
+
+def _read_rc(reference, value):
+    cols = value.split(";")
+    if value[-1] == ";":
+        unread = ""
+    else:
+        cols, unread = cols[:-1], cols[-1]
+    for col in cols:
+        if not col:  # last column will be the empty string
+            return
+        # The token is everything before the first '=' character.
+        i = col.find("=")
+        if i >= 0:
+            token, text = col[:i], col[i + 1 :]
+            comment = token.lstrip(), text
+            reference.comments.append(comment)
+        else:
+            comment = reference.comments[-1]
+            comment = "%s %s" % (comment, col)
+            reference.comments[-1] = comment
+    return unread
+
+
+def _read_rx(reference, value):
+    # The basic (older?) RX line is of the form:
+    # RX   MEDLINE; 85132727.
+    # but there are variants of this that need to be dealt with (see below)
+
+    # CLD1_HUMAN in Release 39 and DADR_DIDMA in Release 33
+    # have extraneous information in the RX line.  Check for
+    # this and chop it out of the line.
+    # (noticed by katel@worldpath.net)
+    value = value.replace(" [NCBI, ExPASy, Israel, Japan]", "")
+
+    # RX lines can also be used of the form
+    # RX   PubMed=9603189;
+    # reported by edvard@farmasi.uit.no
+    # and these can be more complicated like:
+    # RX   MEDLINE=95385798; PubMed=7656980;
+    # RX   PubMed=15060122; DOI=10.1136/jmg 2003.012781;
+    # We look for these cases first and deal with them
+    warn = False
+    if "=" in value:
+        cols = value.split("; ")
+        cols = [x.strip() for x in cols]
+        cols = [x for x in cols if x]
+        for col in cols:
+            x = col.split("=")
+            if len(x) != 2 or x == ("DOI", "DOI"):
+                warn = True
+                break
+            assert len(x) == 2, "I don't understand RX line %s" % value
+            reference.references.append((x[0], x[1].rstrip(";")))
+    # otherwise we assume we have the type 'RX   MEDLINE; 85132727.'
+    else:
+        cols = value.split("; ")
+        # normally we split into the three parts
+        if len(cols) != 2:
+            warn = True
+        else:
+            reference.references.append((cols[0].rstrip(";"), cols[1].rstrip(".")))
+    if warn:
+        import warnings
+        from Bio import BiopythonParserWarning
+
+        warnings.warn("Possibly corrupt RX line %r" % value, BiopythonParserWarning)
+
+
+def _read_cc(record, line):
+    key, value = line[5:8], line[9:].rstrip()
+    if key == "-!-":  # Make a new comment
+        record.comments.append(value)
+    elif key == "   ":  # add to the previous comment
+        if not record.comments:
+            # TCMO_STRGA in Release 37 has comment with no topic
+            record.comments.append(value)
+        else:
+            record.comments[-1] += " " + value
+
+
+def _read_dr(record, value):
+    cols = value.rstrip(".").split("; ")
+    record.cross_references.append(tuple(cols))
+
+
+def _read_pe(record, value):
+    pe = value.split(":")
+    record.protein_existence = int(pe[0])
+
+
+def _read_kw(record, value):
+    # Old style - semi-colon separated, multi-line. e.g. Q13639.txt
+    # KW   Alternative splicing; Cell membrane; Complete proteome;
+    # KW   Disulfide bond; Endosome; G-protein coupled receptor; Glycoprotein;
+    # KW   Lipoprotein; Membrane; Palmitate; Polymorphism; Receptor; Transducer;
+    # KW   Transmembrane.
+    #
+    # New style as of 2014-10-01 release with evidence codes, e.g. H2CNN8.txt
+    # KW   Monooxygenase {ECO:0000313|EMBL:AEX14553.1};
+    # KW   Oxidoreductase {ECO:0000313|EMBL:AEX14553.1}.
+    # For now to match the XML parser, drop the evidence codes.
+    for value in value.rstrip(";.").split("; "):
+        if value.endswith("}"):
+            # Discard the evidence code
+            value = value.rsplit("{", 1)[0]
+        record.keywords.append(value.strip())
+
+
+def _read_ft(record, line):
+    name = line[5:13].rstrip()
+    if name:
+        if line[13:21] == "        ":  # new-style FT line
+            location = line[21:80].rstrip()
+            try:
+                isoform_id, location = location.split(":")
+            except ValueError:
+                isoform_id = None
+            try:
+                from_res, to_res = location.split("..")
+            except ValueError:
+                from_res = location
+                to_res = ""
+            qualifiers = {}
+        else:  # old-style FT line
+            from_res = line[14:20].lstrip()
+            to_res = line[21:27].lstrip()
+            isoform_id = None
+            description = line[34:75].rstrip()
+            qualifiers = {"description": description}
+        if from_res == "?":
+            from_res = UnknownPosition()
+        elif from_res.startswith("?"):
+            position = int(from_res[1:]) - 1  # Python zero-based counting
+            from_res = UncertainPosition(position)
+        elif from_res.startswith("<"):
+            position = int(from_res[1:]) - 1  # Python zero-based counting
+            from_res = BeforePosition(position)
+        else:
+            position = int(from_res) - 1  # Python zero-based counting
+            from_res = ExactPosition(position)
+        if to_res == "":
+            position = from_res + 1
+            to_res = ExactPosition(position)
+        elif to_res == "?":
+            to_res = UnknownPosition()
+        elif to_res.startswith("?"):
+            position = int(to_res[1:])
+            to_res = UncertainPosition(position)
+        elif to_res.startswith(">"):
+            position = int(to_res[1:])
+            to_res = AfterPosition(position)
+        else:
+            position = int(to_res)
+            to_res = ExactPosition(position)
+        location = FeatureLocation(from_res, to_res, ref=isoform_id)
+        feature = FeatureTable(
+            location=location, type=name, id=None, qualifiers=qualifiers
+        )
+        record.features.append(feature)
+        return
+    # this line is a continuation of the previous feature
+    feature = record.features[-1]
+    if line[5:34] == "                             ":  # old-style FT line
+        description = line[34:75].rstrip()
+        if description.startswith("/FTId="):
+            # store the FTId as the feature ID
+            feature.id = description[6:].rstrip(".")
+            return
+        # this line is a continuation of the description of the previous feature
+        old_description = feature.qualifiers["description"]
+        if old_description.endswith("-"):
+            description = "%s%s" % (old_description, description)
+        else:
+            description = "%s %s" % (old_description, description)
+
+        if feature.type in ("VARSPLIC", "VAR_SEQ"):  # special case
+            # Remove unwanted spaces in sequences.
+            # During line carryover, the sequences in VARSPLIC/VAR_SEQ can get
+            # mangled with unwanted spaces like:
+            # 'DISSTKLQALPSHGLESIQT -> PCRATGWSPFRRSSPC LPTH'
+            # We want to check for this case and correct it as it happens.
+            try:
+                first_seq, second_seq = description.split(" -> ")
+            except ValueError:
+                pass
+            else:
+                extra_info = ""
+                # we might have more information at the end of the
+                # second sequence, which should be in parenthesis
+                extra_info_pos = second_seq.find(" (")
+                if extra_info_pos != -1:
+                    extra_info = second_seq[extra_info_pos:]
+                    second_seq = second_seq[:extra_info_pos]
+                # now clean spaces out of the first and second string
+                first_seq = first_seq.replace(" ", "")
+                second_seq = second_seq.replace(" ", "")
+                # reassemble the description
+                description = first_seq + " -> " + second_seq + extra_info
+        feature.qualifiers["description"] = description
+    else:  # new-style FT line
+        value = line[21:].rstrip()
+        if value.startswith("/id="):
+            qualifier_type = "id"
+            value = value[4:]
+            assert value.startswith('"')
+            assert value.endswith('"')
+            feature.id = value[1:-1]
+            return
+        elif value.startswith("/evidence="):
+            value = value[10:]
+            assert value.startswith('"')
+            if value.endswith('"'):
+                value = value[1:-1]
+            else:  # continues on the next line
+                value = value[1:]
+            assert "evidence" not in feature.qualifiers
+            feature.qualifiers["evidence"] = value
+            return
+        elif value.startswith("/note="):
+            value = value[6:]
+            assert value.startswith('"')
+            if value.endswith('"'):
+                value = value[1:-1]
+            else:  # continues on the next line
+                value = value[1:]
+            assert "note" not in feature.qualifiers
+            feature.qualifiers["note"] = value
+            return
+        # this line is a continuation of the description of the previous feature
+        keys = list(feature.qualifiers.keys())
+        key = keys[-1]
+        description = value.rstrip('"')
+        old_description = feature.qualifiers[key]
+        if key == "evidence" or old_description.endswith("-"):
+            description = "%s%s" % (old_description, description)
+        else:
+            description = "%s %s" % (old_description, description)
+        if feature.type == "VAR_SEQ":  # see VARSPLIC above
+            try:
+                first_seq, second_seq = description.split(" -> ")
+            except ValueError:
+                pass
+            else:
+                extra_info = ""
+                # we might have more information at the end of the
+                # second sequence, which should be in parenthesis
+                extra_info_pos = second_seq.find(" (")
+                if extra_info_pos != -1:
+                    extra_info = second_seq[extra_info_pos:]
+                    second_seq = second_seq[:extra_info_pos]
+                # now clean spaces out of the first and second string
+                first_seq = first_seq.replace(" ", "")
+                second_seq = second_seq.replace(" ", "")
+                # reassemble the description
+                description = first_seq + " -> " + second_seq + extra_info
+        feature.qualifiers[key] = description
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/SwissProt/__pycache__/KeyWList.cpython-37.pyc b/code/lib/Bio/SwissProt/__pycache__/KeyWList.cpython-37.pyc
new file mode 100644
index 0000000..431122b
Binary files /dev/null and b/code/lib/Bio/SwissProt/__pycache__/KeyWList.cpython-37.pyc differ
diff --git a/code/lib/Bio/SwissProt/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/SwissProt/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..24f622f
Binary files /dev/null and b/code/lib/Bio/SwissProt/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/TogoWS/__init__.py b/code/lib/Bio/TogoWS/__init__.py
new file mode 100644
index 0000000..d7072d1
--- /dev/null
+++ b/code/lib/Bio/TogoWS/__init__.py
@@ -0,0 +1,376 @@
+# Copyright 2010-2011, 2013-2014, 2016-2018 by Peter Cock.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Provides code to access the TogoWS integrated websevices of DBCLS, Japan.
+
+This module aims to make the TogoWS (from DBCLS, Japan) easier to use. See:
+http://togows.dbcls.jp/
+
+The TogoWS REST service provides simple access to a range of databases, acting
+as a proxy to shield you from all the different provider APIs. This works using
+simple URLs (which this module will construct for you). For more details, see
+http://togows.dbcls.jp/site/en/rest.html
+
+The functionality is somewhat similar to Biopython's Bio.Entrez module which
+provides access to the NCBI's Entrez Utilities (E-Utils) which also covers a
+wide range of databases.
+
+Currently TogoWS does not provide any usage guidelines (unlike the NCBI whose
+requirements are reasonably clear). To avoid risking overloading the service,
+Biopython will only allow three calls per second.
+
+The TogoWS SOAP service offers a more complex API for calling web services
+(essentially calling remote functions) provided by DDBJ, KEGG and PDBj. For
+example, this allows you to run a remote BLAST search at the DDBJ. This is
+not yet covered by this module, however there are lots of Python examples
+on the TogoWS website using the SOAPpy python library. See:
+http://togows.dbcls.jp/site/en/soap.html
+http://soapy.sourceforge.net/
+"""
+
+
+import io
+import time
+
+from urllib.request import urlopen
+from urllib.parse import quote
+
+
+# Constant
+_BASE_URL = "http://togows.dbcls.jp"
+
+# Caches:
+_search_db_names = None
+_entry_db_names = None
+_entry_db_fields = {}
+_entry_db_formats = {}
+_convert_formats = []
+
+
+def _get_fields(url):
+    """Query a TogoWS URL for a plain text list of values (PRIVATE)."""
+    handle = _open(url)
+    fields = handle.read().strip().split()
+    handle.close()
+    return fields
+
+
+def _get_entry_dbs():
+    return _get_fields(_BASE_URL + "/entry")
+
+
+def _get_entry_fields(db):
+    return _get_fields(_BASE_URL + "/entry/%s?fields" % db)
+
+
+def _get_entry_formats(db):
+    return _get_fields(_BASE_URL + "/entry/%s?formats" % db)
+
+
+def _get_convert_formats():
+    return [pair.split(".") for pair in _get_fields(_BASE_URL + "/convert/")]
+
+
+def entry(db, id, format=None, field=None):
+    """Call TogoWS 'entry' to fetch a record.
+
+    Arguments:
+     - db - database (string), see list below.
+     - id - identier (string) or a list of identifiers (either as a list of
+       strings or a single string with comma separators).
+     - format - return data file format (string), options depend on the database
+       e.g. "xml", "json", "gff", "fasta", "ttl" (RDF Turtle)
+     - field - specific field from within the database record (string)
+       e.g. "au" or "authors" for pubmed.
+
+    At the time of writing, this includes the following::
+
+        KEGG: compound, drug, enzyme, genes, glycan, orthology, reaction,
+              module, pathway
+        DDBj: ddbj, dad, pdb
+        NCBI: nuccore, nucest, nucgss, nucleotide, protein, gene, onim,
+              homologue, snp, mesh, pubmed
+        EBI:  embl, uniprot, uniparc, uniref100, uniref90, uniref50
+
+    For the current list, please see http://togows.dbcls.jp/entry/
+
+    This function is essentially equivalent to the NCBI Entrez service
+    EFetch, available in Biopython as Bio.Entrez.efetch(...), but that
+    does not offer field extraction.
+    """
+    global _entry_db_names, _entry_db_fields, fetch_db_formats
+    if _entry_db_names is None:
+        _entry_db_names = _get_entry_dbs()
+    if db not in _entry_db_names:
+        raise ValueError(
+            "TogoWS entry fetch does not officially support database '%s'." % db
+        )
+    if field:
+        try:
+            fields = _entry_db_fields[db]
+        except KeyError:
+            fields = _get_entry_fields(db)
+            _entry_db_fields[db] = fields
+        if db == "pubmed" and field == "ti" and "title" in fields:
+            # Backwards compatibility fix for TogoWS change Nov/Dec 2013
+            field = "title"
+            import warnings
+
+            warnings.warn(
+                "TogoWS dropped 'pubmed' field alias 'ti', please use 'title' instead."
+            )
+        if field not in fields:
+            raise ValueError(
+                "TogoWS entry fetch does not explicitly support "
+                "field '%s' for database '%s'. Only: %s"
+                % (field, db, ", ".join(sorted(fields)))
+            )
+    if format:
+        try:
+            formats = _entry_db_formats[db]
+        except KeyError:
+            formats = _get_entry_formats(db)
+            _entry_db_formats[db] = formats
+        if format not in formats:
+            raise ValueError(
+                "TogoWS entry fetch does not explicitly support "
+                "format '%s' for database '%s'. Only: %s"
+                % (format, db, ", ".join(sorted(formats)))
+            )
+
+    if isinstance(id, list):
+        id = ",".join(id)
+    url = _BASE_URL + "/entry/%s/%s" % (db, quote(id))
+    if field:
+        url += "/" + field
+    if format:
+        url += "." + format
+    return _open(url)
+
+
+def search_count(db, query):
+    """Call TogoWS search count to see how many matches a search gives.
+
+    Arguments:
+     - db - database (string), see http://togows.dbcls.jp/search
+     - query - search term (string)
+
+    You could then use the count to download a large set of search results in
+    batches using the offset and limit options to Bio.TogoWS.search(). In
+    general however the Bio.TogoWS.search_iter() function is simpler to use.
+    """
+    global _search_db_names
+    if _search_db_names is None:
+        _search_db_names = _get_fields(_BASE_URL + "/search")
+    if db not in _search_db_names:
+        # TODO - Make this a ValueError? Right now despite the HTML website
+        # claiming to, the "gene" or "ncbi-gene" don't work and are not listed.
+        import warnings
+
+        warnings.warn(
+            "TogoWS search does not officially support database '%s'. "
+            "See %s/search/ for options." % (db, _BASE_URL)
+        )
+    url = _BASE_URL + "/search/%s/%s/count" % (db, quote(query))
+    handle = _open(url)
+    data = handle.read()
+    handle.close()
+    if not data:
+        raise ValueError("TogoWS returned no data from URL %s" % url)
+    try:
+        return int(data.strip())
+    except ValueError:
+        raise ValueError(
+            "Expected an integer from URL %s, got: %r" % (url, data)
+        ) from None
+
+
+def search_iter(db, query, limit=None, batch=100):
+    """Call TogoWS search iterating over the results (generator function).
+
+    Arguments:
+     - db - database (string), see http://togows.dbcls.jp/search
+     - query - search term (string)
+     - limit - optional upper bound on number of search results
+     - batch - number of search results to pull back each time talk to
+       TogoWS (currently limited to 100).
+
+    You would use this function within a for loop, e.g.
+
+    >>> from Bio import TogoWS
+    >>> for id in TogoWS.search_iter("pubmed", "diabetes+human", limit=10):
+    ...     print("PubMed ID: %s" %id) # maybe fetch data with entry?
+    PubMed ID: ...
+
+    Internally this first calls the Bio.TogoWS.search_count() and then
+    uses Bio.TogoWS.search() to get the results in batches.
+    """
+    count = search_count(db, query)
+    if not count:
+        return
+    # NOTE - We leave it to TogoWS to enforce any upper bound on each
+    # batch, they currently return an HTTP 400 Bad Request if above 100.
+    remain = count
+    if limit is not None:
+        remain = min(remain, limit)
+    offset = 1  # They don't use zero based counting
+    prev_ids = []  # Just cache the last batch for error checking
+    while remain:
+        batch = min(batch, remain)
+        # print("%r left, asking for %r" % (remain, batch))
+        ids = search(db, query, offset, batch).read().strip().split()
+        assert len(ids) == batch, "Got %i, expected %i" % (len(ids), batch)
+        # print("offset %i, %s ... %s" % (offset, ids[0], ids[-1]))
+        if ids == prev_ids:
+            raise RuntimeError("Same search results for previous offset")
+        for identifier in ids:
+            if identifier in prev_ids:
+                raise RuntimeError("Result %s was in previous batch" % identifier)
+            yield identifier
+        offset += batch
+        remain -= batch
+        prev_ids = ids
+
+
+def search(db, query, offset=None, limit=None, format=None):
+    """Call TogoWS search.
+
+    This is a low level wrapper for the TogoWS search function, which
+    can return results in a several formats. In general, the search_iter
+    function is more suitable for end users.
+
+    Arguments:
+     - db - database (string), see http://togows.dbcls.jp/search/
+     - query - search term (string)
+     - offset, limit - optional integers specifying which result to start from
+       (1 based) and the number of results to return.
+     - format - return data file format (string), e.g. "json", "ttl" (RDF)
+       By default plain text is returned, one result per line.
+
+    At the time of writing, TogoWS applies a default count limit of 100
+    search results, and this is an upper bound. To access more results,
+    use the offset argument or the search_iter(...) function.
+
+    TogoWS supports a long list of databases, including many from the NCBI
+    (e.g. "ncbi-pubmed" or "pubmed", "ncbi-genbank" or "genbank", and
+    "ncbi-taxonomy"), EBI (e.g. "ebi-ebml" or "embl", "ebi-uniprot" or
+    "uniprot, "ebi-go"), and KEGG (e.g. "kegg-compound" or "compound").
+    For the current list, see http://togows.dbcls.jp/search/
+
+    The NCBI provide the Entrez Search service (ESearch) which is similar,
+    available in Biopython as the Bio.Entrez.esearch() function.
+
+    See also the function Bio.TogoWS.search_count() which returns the number
+    of matches found, and the Bio.TogoWS.search_iter() function which allows
+    you to iterate over the search results (taking care of batching for you).
+    """
+    global _search_db_names
+    if _search_db_names is None:
+        _search_db_names = _get_fields(_BASE_URL + "/search")
+    if db not in _search_db_names:
+        # TODO - Make this a ValueError? Right now despite the HTML website
+        # claiming to, the "gene" or "ncbi-gene" don't work and are not listed.
+        import warnings
+
+        warnings.warn(
+            "TogoWS search does not explicitly support database '%s'. "
+            "See %s/search/ for options." % (db, _BASE_URL)
+        )
+    url = _BASE_URL + "/search/%s/%s" % (db, quote(query))
+    if offset is not None and limit is not None:
+        try:
+            offset = int(offset)
+        except ValueError:
+            raise ValueError(
+                "Offset should be an integer (at least one), not %r" % offset
+            ) from None
+        try:
+            limit = int(limit)
+        except ValueError:
+            raise ValueError(
+                "Limit should be an integer (at least one), not %r" % limit
+            ) from None
+        if offset <= 0:
+            raise ValueError("Offset should be at least one, not %i" % offset)
+        if limit <= 0:
+            raise ValueError("Count should be at least one, not %i" % limit)
+        url += "/%i,%i" % (offset, limit)
+    elif offset is not None or limit is not None:
+        raise ValueError("Expect BOTH offset AND limit to be provided (or neither)")
+    if format:
+        url += "." + format
+    # print(url)
+    return _open(url)
+
+
+def convert(data, in_format, out_format):
+    """Call TogoWS for file format conversion.
+
+    Arguments:
+     - data - string or handle containing input record(s)
+     - in_format - string describing the input file format (e.g. "genbank")
+     - out_format - string describing the requested output format (e.g. "fasta")
+
+    For a list of supported conversions (e.g. "genbank" to "fasta"), see
+    http://togows.dbcls.jp/convert/
+
+    Note that Biopython has built in support for conversion of sequence and
+    alignnent file formats (functions Bio.SeqIO.convert and Bio.AlignIO.convert)
+    """
+    global _convert_formats
+    if not _convert_formats:
+        _convert_formats = _get_convert_formats()
+    if [in_format, out_format] not in _convert_formats:
+        msg = "\n".join("%s -> %s" % tuple(pair) for pair in _convert_formats)
+        raise ValueError("Unsupported conversion. Choose from:\n%s" % msg)
+    url = _BASE_URL + "/convert/%s.%s" % (in_format, out_format)
+    # TODO - Should we just accept a string not a handle? What about a filename?
+    try:
+        # Handle
+        data = data.read()
+    except AttributeError:
+        # String
+        pass
+    return _open(url, post=data)
+
+
+def _open(url, post=None):
+    """Build the URL and open a handle to it (PRIVATE).
+
+    Open a handle to TogoWS, will raise an IOError if it encounters an error.
+
+    In the absence of clear guidelines, this function enforces a limit of
+    "up to three queries per second" to avoid abusing the TogoWS servers.
+    """
+    delay = 0.333333333  # one third of a second
+    current = time.time()
+    wait = _open.previous + delay - current
+    if wait > 0:
+        time.sleep(wait)
+        _open.previous = current + wait
+    else:
+        _open.previous = current
+
+    if post:
+        handle = urlopen(url, post.encode())
+    else:
+        handle = urlopen(url)
+
+    # We now trust TogoWS to have set an HTTP error code, that
+    # suffices for my current unit tests. Previously we would
+    # examine the start of the data returned back.
+    text_handle = io.TextIOWrapper(handle, encoding="UTF-8")
+    text_handle.url = handle.url
+    return text_handle
+
+
+_open.previous = 0
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/TogoWS/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/TogoWS/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..13fb541
Binary files /dev/null and b/code/lib/Bio/TogoWS/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/UniGene/__init__.py b/code/lib/Bio/UniGene/__init__.py
new file mode 100644
index 0000000..6629c87
--- /dev/null
+++ b/code/lib/Bio/UniGene/__init__.py
@@ -0,0 +1,340 @@
+# Copyright 2006 by Sean Davis, National Cancer Institute, NIH.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Parse Unigene flat file format files such as the Hs.data file.
+
+Here is an overview of the flat file format that this parser deals with:
+
+   Line types/qualifiers::
+
+       ID           UniGene cluster ID
+       TITLE        Title for the cluster
+       GENE         Gene symbol
+       CYTOBAND     Cytological band
+       EXPRESS      Tissues of origin for ESTs in cluster
+       RESTR_EXPR   Single tissue or development stage contributes
+                    more than half the total EST frequency for this gene.
+       GNM_TERMINUS genomic confirmation of presence of a 3' terminus;
+                    T if a non-templated polyA tail is found among
+                    a cluster's sequences; else
+                    I if templated As are found in genomic sequence or
+                    S if a canonical polyA signal is found on
+                      the genomic sequence
+       GENE_ID      Entrez gene identifier associated with at least one
+                    sequence in this cluster;
+                    to be used instead of LocusLink.
+       LOCUSLINK    LocusLink identifier associated with at least one
+                    sequence in this cluster;
+                    deprecated in favor of GENE_ID
+       HOMOL        Homology;
+       CHROMOSOME   Chromosome.  For plants, CHROMOSOME refers to mapping
+                    on the arabidopsis genome.
+       STS          STS
+            ACC=         GenBank/EMBL/DDBJ accession number of STS
+                         [optional field]
+            UNISTS=      identifier in NCBI's UNISTS database
+       TXMAP        Transcript map interval
+            MARKER=      Marker found on at least one sequence in this
+                         cluster
+            RHPANEL=     Radiation Hybrid panel used to place marker
+       PROTSIM      Protein Similarity data for the sequence with
+                    highest-scoring protein similarity in this cluster
+            ORG=         Organism
+            PROTGI=      Sequence GI of protein
+            PROTID=      Sequence ID of protein
+            PCT=         Percent alignment
+            ALN=         length of aligned region (aa)
+       SCOUNT       Number of sequences in the cluster
+       SEQUENCE     Sequence
+            ACC=         GenBank/EMBL/DDBJ accession number of sequence
+            NID=         Unique nucleotide sequence identifier (gi)
+            PID=         Unique protein sequence identifier (used for
+                         non-ESTs)
+            CLONE=       Clone identifier (used for ESTs only)
+            END=         End (5'/3') of clone insert read (used for
+                         ESTs only)
+            LID=         Library ID; see Hs.lib.info for library name
+                         and tissue
+            MGC=         5' CDS-completeness indicator; if present, the
+                         clone associated with this sequence is believed
+                         CDS-complete. A value greater than 511 is the gi
+                         of the CDS-complete mRNA matched by the EST,
+                         otherwise the value is an indicator of the
+                         reliability of the test indicating CDS
+                         completeness; higher values indicate more
+                         reliable CDS-completeness predictions.
+           SEQTYPE=      Description of the nucleotide sequence.
+                         Possible values are mRNA, EST and HTC.
+           TRACE=        The Trace ID of the EST sequence, as provided by
+                         NCBI Trace Archive
+
+"""
+
+
+class SequenceLine:
+    """Store the information for one SEQUENCE line from a Unigene file.
+
+    Initialize with the text part of the SEQUENCE line, or nothing.
+
+    Attributes and descriptions (access as LOWER CASE):
+     - ACC=         GenBank/EMBL/DDBJ accession number of sequence
+     - NID=         Unique nucleotide sequence identifier (gi)
+     - PID=         Unique protein sequence identifier (used for non-ESTs)
+     - CLONE=       Clone identifier (used for ESTs only)
+     - END=         End (5'/3') of clone insert read (used for ESTs only)
+     - LID=         Library ID; see Hs.lib.info for library name and tissue
+     - MGC=         5' CDS-completeness indicator; if present,
+       the clone associated with this sequence
+       is believed CDS-complete. A value greater than 511
+       is the gi of the CDS-complete mRNA matched by the EST,
+       otherwise the value is an indicator of the reliability
+       of the test indicating CDS completeness;
+       higher values indicate more reliable CDS-completeness
+       predictions.
+     - SEQTYPE=     Description of the nucleotide sequence. Possible values
+       are mRNA, EST and HTC.
+     - TRACE=       The Trace ID of the EST sequence, as provided by NCBI
+       Trace Archive
+
+    """
+
+    def __init__(self, text=None):
+        """Initialize the class."""
+        self.acc = ""
+        self.nid = ""
+        self.lid = ""
+        self.pid = ""
+        self.clone = ""
+        self.image = ""
+        self.is_image = False
+        self.end = ""
+        self.mgc = ""
+        self.seqtype = ""
+        self.trace = ""
+        if text is not None:
+            self.text = text
+            self._init_from_text(text)
+
+    def _init_from_text(self, text):
+        parts = text.split("; ")
+        for part in parts:
+            key, val = part.split("=")
+            if key == "CLONE":
+                if val[:5] == "IMAGE":
+                    self.is_image = True
+                    self.image = val[6:]
+            setattr(self, key.lower(), val)
+
+    def __repr__(self):
+        """Return UniGene SequenceLine object as a string."""
+        return self.text
+
+
+class ProtsimLine:
+    """Store the information for one PROTSIM line from a Unigene file.
+
+    Initialize with the text part of the PROTSIM line, or nothing.
+
+    Attributes and descriptions (access as LOWER CASE)
+    ORG=         Organism
+    PROTGI=      Sequence GI of protein
+    PROTID=      Sequence ID of protein
+    PCT=         Percent alignment
+    ALN=         length of aligned region (aa)
+    """
+
+    def __init__(self, text=None):
+        """Initialize the class."""
+        self.org = ""
+        self.protgi = ""
+        self.protid = ""
+        self.pct = ""
+        self.aln = ""
+        if text is not None:
+            self.text = text
+            self._init_from_text(text)
+
+    def _init_from_text(self, text):
+        parts = text.split("; ")
+
+        for part in parts:
+            key, val = part.split("=")
+            setattr(self, key.lower(), val)
+
+    def __repr__(self):
+        """Return UniGene ProtsimLine object as a string."""
+        return self.text
+
+
+class STSLine:
+    """Store the information for one STS line from a Unigene file.
+
+    Initialize with the text part of the STS line, or nothing.
+
+    Attributes and descriptions (access as LOWER CASE)
+
+    ACC=         GenBank/EMBL/DDBJ accession number of STS [optional field]
+    UNISTS=      identifier in NCBI's UNISTS database
+    """
+
+    def __init__(self, text=None):
+        """Initialize the class."""
+        self.acc = ""
+        self.unists = ""
+        if text is not None:
+            self.text = text
+            self._init_from_text(text)
+
+    def _init_from_text(self, text):
+        parts = text.split(" ")
+
+        for part in parts:
+            key, val = part.split("=")
+            setattr(self, key.lower(), val)
+
+    def __repr__(self):
+        """Return UniGene STSLine object as a string."""
+        return self.text
+
+
+class Record:
+    """Store a Unigene record.
+
+    Here is what is stored::
+
+        self.ID           = ''  # ID line
+        self.species      = ''  # Hs, Bt, etc.
+        self.title        = ''  # TITLE line
+        self.symbol       = ''  # GENE line
+        self.cytoband     = ''  # CYTOBAND line
+        self.express      = []  # EXPRESS line, parsed on ';'
+                                # Will be an array of strings
+        self.restr_expr   = ''  # RESTR_EXPR line
+        self.gnm_terminus = ''  # GNM_TERMINUS line
+        self.gene_id      = ''  # GENE_ID line
+        self.locuslink    = ''  # LOCUSLINK line
+        self.homol        = ''  # HOMOL line
+        self.chromosome   = ''  # CHROMOSOME line
+        self.protsim      = []  # PROTSIM entries, array of Protsims
+                                # Type ProtsimLine
+        self.sequence     = []  # SEQUENCE entries, array of Sequence entries
+                                # Type SequenceLine
+        self.sts          = []  # STS entries, array of STS entries
+                                # Type STSLine
+        self.txmap        = []  # TXMAP entries, array of TXMap entries
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.ID = ""  # ID line
+        self.species = ""  # Hs, Bt, etc.
+        self.title = ""  # TITLE line
+        self.symbol = ""  # GENE line
+        self.cytoband = ""  # CYTOBAND line
+        self.express = []  # EXPRESS line, parsed on ';'
+        self.restr_expr = ""  # RESTR_EXPR line
+        self.gnm_terminus = ""  # GNM_TERMINUS line
+        self.gene_id = ""  # GENE_ID line
+        self.locuslink = ""  # LOCUSLINK line
+        self.homol = ""  # HOMOL line
+        self.chromosome = ""  # CHROMOSOME line
+        self.protsim = []  # PROTSIM entries, array of Protsims
+        self.sequence = []  # SEQUENCE entries, array of Sequence entries
+        self.sts = []  # STS entries, array of STS entries
+        self.txmap = []  # TXMAP entries, array of TXMap entries
+
+    def __repr__(self):
+        """Represent the UniGene Record object as a string for debugging."""
+        return "<%s> %s %s %s" % (
+            self.__class__.__name__,
+            self.ID,
+            self.symbol,
+            self.title,
+        )
+
+
+def parse(handle):
+    """Read and load a UniGene records, for files containing multiple records."""
+    while True:
+        record = _read(handle)
+        if not record:
+            return
+        yield record
+
+
+def read(handle):
+    """Read and load a UniGene record, one record per file."""
+    record = _read(handle)
+    if not record:
+        raise ValueError("No SwissProt record found")
+    # We should have reached the end of the record by now
+    remainder = handle.read()
+    if remainder:
+        raise ValueError("More than one SwissProt record found")
+    return record
+
+
+# Everything below is private
+
+
+def _read(handle):
+    UG_INDENT = 12
+    record = None
+    for line in handle:
+        tag, value = line[:UG_INDENT].rstrip(), line[UG_INDENT:].rstrip()
+        line = line.rstrip()
+        if tag == "ID":
+            record = Record()
+            record.ID = value
+            record.species = record.ID.split(".")[0]
+        elif tag == "TITLE":
+            record.title = value
+        elif tag == "GENE":
+            record.symbol = value
+        elif tag == "GENE_ID":
+            record.gene_id = value
+        elif tag == "LOCUSLINK":
+            record.locuslink = value
+        elif tag == "HOMOL":
+            if value == "YES":
+                record.homol = True
+            elif value == "NO":
+                record.homol = True
+            else:
+                raise ValueError("Cannot parse HOMOL line %s" % line)
+        elif tag == "EXPRESS":
+            record.express = [word.strip() for word in value.split("|")]
+        elif tag == "RESTR_EXPR":
+            record.restr_expr = [word.strip() for word in value.split("|")]
+        elif tag == "CHROMOSOME":
+            record.chromosome = value
+        elif tag == "CYTOBAND":
+            record.cytoband = value
+        elif tag == "PROTSIM":
+            protsim = ProtsimLine(value)
+            record.protsim.append(protsim)
+        elif tag == "SCOUNT":
+            scount = int(value)
+        elif tag == "SEQUENCE":
+            sequence = SequenceLine(value)
+            record.sequence.append(sequence)
+        elif tag == "STS":
+            sts = STSLine(value)
+            record.sts.append(sts)
+        elif tag == "//":
+            if len(record.sequence) != scount:
+                raise ValueError(
+                    "The number of sequences specified in the record "
+                    "(%d) does not agree with the number of sequences found (%d)"
+                    % (scount, len(record.sequence))
+                )
+            return record
+        else:
+            raise ValueError("Unknown tag %s" % tag)
+    if record:
+        raise ValueError("Unexpected end of stream.")
diff --git a/code/lib/Bio/UniGene/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/UniGene/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..8eb4548
Binary files /dev/null and b/code/lib/Bio/UniGene/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/UniProt/GOA.py b/code/lib/Bio/UniProt/GOA.py
new file mode 100644
index 0000000..4beff4c
--- /dev/null
+++ b/code/lib/Bio/UniProt/GOA.py
@@ -0,0 +1,497 @@
+#!/usr/bin/env python
+# Copyright 2013, 2016 by Iddo Friedberg idoerg@gmail.com. All rights reserved.
+# Copyright 2020 by Sergio Valqui. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Parsers for the GAF, GPA and GPI formats from UniProt-GOA.
+
+Uniprot-GOA README + GAF format description:
+ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/README
+
+Gene Association File, GAF formats:
+http://geneontology.org/docs/go-annotation-file-gaf-format-2.1/
+http://geneontology.org/docs/go-annotation-file-gaf-format-2.0/
+
+Gene Product Association Data  (GPA format) README:
+http://geneontology.org/docs/gene-product-association-data-gpad-format/
+
+Gene Product Information (GPI format) README:
+http://geneontology.org/docs/gene-product-information-gpi-format/
+
+Go Annotation files are located here:
+ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/
+"""
+
+
+import copy
+
+# GAF: GO Annotation Format
+#
+# GAF version 2.0
+
+GAF20FIELDS = [
+    "DB",
+    "DB_Object_ID",
+    "DB_Object_Symbol",
+    "Qualifier",
+    "GO_ID",
+    "DB:Reference",
+    "Evidence",
+    "With",
+    "Aspect",
+    "DB_Object_Name",
+    "Synonym",
+    "DB_Object_Type",
+    "Taxon_ID",
+    "Date",
+    "Assigned_By",
+    "Annotation_Extension",
+    "Gene_Product_Form_ID",
+]
+
+# GAF version 1.0
+GAF10FIELDS = [
+    "DB",
+    "DB_Object_ID",
+    "DB_Object_Symbol",
+    "Qualifier",
+    "GO_ID",
+    "DB:Reference",
+    "Evidence",
+    "With",
+    "Aspect",
+    "DB_Object_Name",
+    "Synonym",
+    "DB_Object_Type",
+    "Taxon_ID",
+    "Date",
+    "Assigned_By",
+]
+
+# GPA version 1.0
+GPA10FIELDS = [
+    "DB",
+    "DB_Object_ID",
+    "Qualifier",
+    "GO_ID",
+    "DB:Reference",
+    "Evidence code",
+    "With",
+    "Interacting_taxon_ID",
+    "Date",
+    "Assigned_by",
+    "Annotation_Extension",
+    "Spliceform_ID",
+]
+
+# GPA version 1.1
+GPA11FIELDS = [
+    "DB",
+    "DB_Object_ID",
+    "Qualifier",
+    "GO_ID",
+    "DB:Reference",
+    "ECO_Evidence_code",
+    "With",
+    "Interacting_taxon_ID",
+    "Date",
+    "Assigned_by",
+    "Annotation Extension",
+    "Annotation_Properties",
+]
+
+# GPI version 1.0
+GPI10FIELDS = [
+    "DB",
+    "DB_subset",
+    "DB_Object_ID",
+    "DB_Object_Symbol",
+    "DB_Object_Name",
+    "DB_Object_Synonym",
+    "DB_Object_Type",
+    "Taxon",
+    "Annotation_Target_Set",
+    "Annotation_Completed",
+    "Parent_Object_ID",
+]
+
+# GPI version 1.1
+GPI11FIELDS = [
+    "DB_Object_ID",
+    "DB_Object_Symbol",
+    "DB_Object_Name",
+    "DB_Object_Synonym",
+    "DB_Object_Type",
+    "Taxon",
+    "Parent_Object_ID",
+    "DB_Xref",
+    "Gene_Product_Properties",
+]
+
+# GPI version 1.2
+GPI12FIELDS = [
+    "DB",
+    "DB_Object_ID",
+    "DB_Object_Symbol",
+    "DB_Object_Name",
+    "DB_Object_Synonym",
+    "DB_Object_Type",
+    "Taxon",
+    "Parent_Object_ID",
+    "DB_Xref",
+    "Gene_Product_Properties",
+]
+
+
+def _gpi10iterator(handle):
+    """Read GPI 1.0 format files (PRIVATE).
+
+    This iterator is used to read a gp_information.goa_uniprot
+    file which is in the GPI 1.0 format.
+    """
+    for inline in handle:
+        if inline[0] == "!":
+            continue
+        inrec = inline.rstrip("\n").split("\t")
+        if len(inrec) == 1:
+            continue
+        inrec[5] = inrec[5].split("|")  # DB_Object_Synonym(s)
+        inrec[8] = inrec[8].split("|")  # Annotation_Target_Set
+        yield dict(zip(GPI10FIELDS, inrec))
+
+
+def _gpi11iterator(handle):
+    """Read GPI 1.1 format files (PRIVATE).
+
+    This iterator is used to read a gp_information.goa_uniprot
+    file which is in the GPI 1.1 format.
+    """
+    for inline in handle:
+        if inline[0] == "!":
+            continue
+        inrec = inline.rstrip("\n").split("\t")
+        if len(inrec) == 1:
+            continue
+        inrec[2] = inrec[2].split("|")  # DB_Object_Name
+        inrec[3] = inrec[3].split("|")  # DB_Object_Synonym(s)
+        inrec[7] = inrec[7].split("|")  # DB_Xref(s)
+        inrec[8] = inrec[8].split("|")  # Properties
+        yield dict(zip(GPI11FIELDS, inrec))
+
+
+def _gpi12iterator(handle):
+    """Read GPI 1.2 format files (PRIVATE).
+
+    This iterator is used to read a gp_information.goa_uniprot
+    file which is in the GPI 1.2 format.
+    """
+    for inline in handle:
+        if inline[0] == "!":
+            continue
+        inrec = inline.rstrip("\n").split("\t")
+        if len(inrec) == 1:
+            continue
+        inrec[3] = inrec[3].split("|")  # DB_Object_Name
+        inrec[4] = inrec[4].split("|")  # DB_Object_Synonym(s)
+        inrec[8] = inrec[8].split("|")  # DB_Xref(s)
+        inrec[9] = inrec[9].split("|")  # Properties
+        yield dict(zip(GPI12FIELDS, inrec))
+
+
+def gpi_iterator(handle):
+    """Read GPI format files.
+
+    This function should be called to read a
+    gp_information.goa_uniprot file. At the moment, there is
+    only one format, but this may change, so
+    this function is a placeholder a future wrapper.
+    """
+    inline = handle.readline()
+    if inline.strip() == "!gpi-version: 1.2":
+        return _gpi12iterator(handle)
+    elif inline.strip() == "!gpi-version: 1.1":
+        # sys.stderr.write("gpi 1.1\n")
+        return _gpi11iterator(handle)
+    elif inline.strip() == "!gpi-version: 1.0":
+        # sys.stderr.write("gpi 1.0\n")
+        return _gpi10iterator(handle)
+    elif inline.strip() == "!gpi-version: 2.1":
+        # sys.stderr.write("gpi 2.1\n")
+        # return _gpi20iterator(handle)
+        raise NotImplementedError("Sorry, parsing GPI version 2 not implemented yet.")
+    else:
+        raise ValueError(f"Unknown GPI version {inline}\n")
+
+
+def _gpa10iterator(handle):
+    """Read GPA 1.0 format files (PRIVATE).
+
+    This iterator is used to read a gp_association.*
+    file which is in the GPA 1.0 format. Do not call directly. Rather,
+    use the gpaiterator function.
+    """
+    for inline in handle:
+        if inline[0] == "!":
+            continue
+        inrec = inline.rstrip("\n").split("\t")
+        if len(inrec) == 1:
+            continue
+        inrec[2] = inrec[2].split("|")  # Qualifier
+        inrec[4] = inrec[4].split("|")  # DB:Reference(s)
+        inrec[6] = inrec[6].split("|")  # With
+        inrec[10] = inrec[10].split("|")  # Annotation extension
+        yield dict(zip(GPA10FIELDS, inrec))
+
+
+def _gpa11iterator(handle):
+    """Read GPA 1.1 format files (PRIVATE).
+
+    This iterator is used to read a gp_association.goa_uniprot
+    file which is in the GPA 1.1 format. Do not call directly. Rather
+    use the gpa_iterator function
+    """
+    for inline in handle:
+        if inline[0] == "!":
+            continue
+        inrec = inline.rstrip("\n").split("\t")
+        if len(inrec) == 1:
+            continue
+        inrec[2] = inrec[2].split("|")  # Qualifier
+        inrec[4] = inrec[4].split("|")  # DB:Reference(s)
+        inrec[6] = inrec[6].split("|")  # With
+        inrec[10] = inrec[10].split("|")  # Annotation extension
+        yield dict(zip(GPA11FIELDS, inrec))
+
+
+def gpa_iterator(handle):
+    """Read GPA format files.
+
+    This function should be called to read a
+    gene_association.goa_uniprot file. Reads the first record and
+    returns a gpa 1.1 or a gpa 1.0 iterator as needed
+    """
+    inline = handle.readline()
+    if inline.strip() == "!gpa-version: 1.1":
+        # sys.stderr.write("gpa 1.1\n")
+        return _gpa11iterator(handle)
+    elif inline.strip() == "!gpa-version: 1.0":
+        # sys.stderr.write("gpa 1.0\n")
+        return _gpa10iterator(handle)
+    else:
+        raise ValueError(f"Unknown GPA version {inline}\n")
+
+
+def _gaf20iterator(handle):
+    for inline in handle:
+        if inline[0] == "!":
+            continue
+        inrec = inline.rstrip("\n").split("\t")
+        if len(inrec) == 1:
+            continue
+        inrec[3] = inrec[3].split("|")  # Qualifier
+        inrec[5] = inrec[5].split("|")  # DB:reference(s)
+        inrec[7] = inrec[7].split("|")  # With || From
+        inrec[10] = inrec[10].split("|")  # Synonym
+        inrec[12] = inrec[12].split("|")  # Taxon
+        yield dict(zip(GAF20FIELDS, inrec))
+
+
+def _gaf10iterator(handle):
+    for inline in handle:
+        if inline[0] == "!":
+            continue
+        inrec = inline.rstrip("\n").split("\t")
+        if len(inrec) == 1:
+            continue
+        inrec[3] = inrec[3].split("|")  # Qualifier
+        inrec[5] = inrec[5].split("|")  # DB:reference(s)
+        inrec[7] = inrec[7].split("|")  # With || From
+        inrec[10] = inrec[10].split("|")  # Synonym
+        inrec[12] = inrec[12].split("|")  # Taxon
+        yield dict(zip(GAF10FIELDS, inrec))
+
+
+def _gaf10byproteiniterator(handle):
+    cur_id = None
+    id_rec_list = []
+    for inline in handle:
+        if inline[0] == "!":
+            continue
+        inrec = inline.rstrip("\n").split("\t")
+        if len(inrec) == 1:
+            continue
+        inrec[3] = inrec[3].split("|")  # Qualifier
+        inrec[5] = inrec[5].split("|")  # DB:reference(s)
+        inrec[7] = inrec[7].split("|")  # With || From
+        inrec[10] = inrec[10].split("|")  # Synonym
+        inrec[12] = inrec[12].split("|")  # Taxon
+        cur_rec = dict(zip(GAF10FIELDS, inrec))
+        if cur_rec["DB_Object_ID"] != cur_id and cur_id:
+            ret_list = copy.copy(id_rec_list)
+            id_rec_list = [cur_rec]
+            cur_id = cur_rec["DB_Object_ID"]
+            yield ret_list
+        else:
+            cur_id = cur_rec["DB_Object_ID"]
+            id_rec_list.append(cur_rec)
+
+
+def _gaf20byproteiniterator(handle):
+    cur_id = None
+    id_rec_list = []
+    for inline in handle:
+        if inline[0] == "!":
+            continue
+        inrec = inline.rstrip("\n").split("\t")
+        if len(inrec) == 1:
+            continue
+        inrec[3] = inrec[3].split("|")  # Qualifier
+        inrec[5] = inrec[5].split("|")  # DB:reference(s)
+        inrec[7] = inrec[7].split("|")  # With || From
+        inrec[10] = inrec[10].split("|")  # Synonym
+        inrec[12] = inrec[12].split("|")  # Taxon
+        cur_rec = dict(zip(GAF20FIELDS, inrec))
+        if cur_rec["DB_Object_ID"] != cur_id and cur_id:
+            ret_list = copy.copy(id_rec_list)
+            id_rec_list = [cur_rec]
+            cur_id = cur_rec["DB_Object_ID"]
+            yield ret_list
+        else:
+            cur_id = cur_rec["DB_Object_ID"]
+            id_rec_list.append(cur_rec)
+
+
+def gafbyproteiniterator(handle):
+    """Iterate over records in a gene association file.
+
+    Returns a list of all consecutive records with the same DB_Object_ID
+    This function should be called to read a
+    gene_association.goa_uniprot file. Reads the first record and
+    returns a gaf 2.0 or a gaf 1.0 iterator as needed
+    2016-04-09: added GAF 2.1 iterator & fixed bug in iterator assignment
+    In the meantime GAF 2.1 uses the GAF 2.0 iterator
+    """
+    inline = handle.readline()
+    if inline.strip() == "!gaf-version: 2.0":
+        # sys.stderr.write("gaf 2.0\n")
+        return _gaf20byproteiniterator(handle)
+    elif inline.strip() == "!gaf-version: 1.0":
+        # sys.stderr.write("gaf 1.0\n")
+        return _gaf10byproteiniterator(handle)
+    elif inline.strip() == "!gaf-version: 2.1":
+        # Handle GAF 2.1 as GAF 2.0 for now TODO: fix
+        # sys.stderr.write("gaf 2.1\n")
+        return _gaf20byproteiniterator(handle)
+    else:
+        raise ValueError(f"Unknown GAF version {inline}\n")
+
+
+def gafiterator(handle):
+    """Iterate over a GAF 1.0 or 2.0 file.
+
+    This function should be called to read a
+    gene_association.goa_uniprot file. Reads the first record and
+    returns a gaf 2.0 or a gaf 1.0 iterator as needed
+
+    Example: open, read, interat and filter results.
+
+    Original data file has been trimed to ~600 rows.
+
+    Original source ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/YEAST/goa_yeast.gaf.gz
+
+    >>> from Bio.UniProt.GOA import gafiterator, record_has
+    >>> Evidence = {'Evidence': set(['ND'])}
+    >>> Synonym = {'Synonym': set(['YA19A_YEAST', 'YAL019W-A'])}
+    >>> Taxon_ID = {'Taxon_ID': set(['taxon:559292'])}
+    >>> with open('UniProt/goa_yeast.gaf', 'r') as handle:
+    ...     for rec in gafiterator(handle):
+    ...         if record_has(rec, Taxon_ID) and record_has(rec, Evidence) and record_has(rec, Synonym):
+    ...             for key in ('DB_Object_Name', 'Evidence', 'Synonym', 'Taxon_ID'):
+    ...                 print(rec[key])
+    ...
+    Putative uncharacterized protein YAL019W-A
+    ND
+    ['YA19A_YEAST', 'YAL019W-A']
+    ['taxon:559292']
+    Putative uncharacterized protein YAL019W-A
+    ND
+    ['YA19A_YEAST', 'YAL019W-A']
+    ['taxon:559292']
+    Putative uncharacterized protein YAL019W-A
+    ND
+    ['YA19A_YEAST', 'YAL019W-A']
+    ['taxon:559292']
+
+    """
+    inline = handle.readline()
+    if inline.strip() == "!gaf-version: 2.0":
+        # sys.stderr.write("gaf 2.0\n")
+        return _gaf20iterator(handle)
+    elif inline.strip() == "!gaf-version: 2.1":
+        # sys.stderr.write("gaf 2.1\n")
+        # Handle GAF 2.1 as GAF 2.0 for now. TODO: fix
+        return _gaf20iterator(handle)
+    elif inline.strip() == "!gaf-version: 1.0":
+        # sys.stderr.write("gaf 1.0\n")
+        return _gaf10iterator(handle)
+    else:
+        raise ValueError(f"Unknown GAF version {inline}\n")
+
+
+def writerec(outrec, handle, fields=GAF20FIELDS):
+    """Write a single UniProt-GOA record to an output stream.
+
+    Caller should know the  format version. Default: gaf-2.0
+    If header has a value, then it is assumed this is the first record,
+    a header is written.
+    """
+    outstr = ""
+    for field in fields[:-1]:
+        if isinstance(outrec[field], list):
+            for subfield in outrec[field]:
+                outstr += subfield + "|"
+            outstr = outstr[:-1] + "\t"
+        else:
+            outstr += outrec[field] + "\t"
+    outstr += outrec[fields[-1]] + "\n"
+    handle.write(outstr)
+
+
+def writebyproteinrec(outprotrec, handle, fields=GAF20FIELDS):
+    """Write a list of GAF records to an output stream.
+
+    Caller should know the  format version. Default: gaf-2.0
+    If header has a value, then it is assumed this is the first record,
+    a header is written. Typically the list is the one read by fafbyproteinrec, which
+    contains all consecutive lines with the same DB_Object_ID
+    """
+    for outrec in outprotrec:
+        writerec(outrec, handle, fields=fields)
+
+
+def record_has(inrec, fieldvals):
+    """Accept a record, and a dictionary of field values.
+
+    The format is {'field_name': set([val1, val2])}.
+    If any field in the record has  a matching value, the function returns
+    True. Otherwise, returns False.
+    """
+    retval = False
+    for field in fieldvals:
+        if isinstance(inrec[field], str):
+            set1 = {inrec[field]}
+        else:
+            set1 = set(inrec[field])
+        if set1 & fieldvals[field]:
+            retval = True
+            break
+    return retval
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/UniProt/__init__.py b/code/lib/Bio/UniProt/__init__.py
new file mode 100644
index 0000000..56f36f9
--- /dev/null
+++ b/code/lib/Bio/UniProt/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2013 by Iddo Friedberg idoerg@gmail.com
+# Revision copyright 2013 by Peter Cock.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Code for dealing with assorted UniProt file formats.
+
+This currently include parsers for the GAF, GPA and GPI formats
+from UniProt-GOA as the module Bio.UniProt.GOA.
+
+See also Bio.SwissProt and the "swiss" support in Bio.SeqIO for
+the legacy plain text sequence format still used in UniProt.
+
+See also Bio.SeqIO.SwissIO for the "uniprot-xml" support in
+Bio.SeqIO.
+"""
diff --git a/code/lib/Bio/UniProt/__pycache__/GOA.cpython-37.pyc b/code/lib/Bio/UniProt/__pycache__/GOA.cpython-37.pyc
new file mode 100644
index 0000000..56dadb3
Binary files /dev/null and b/code/lib/Bio/UniProt/__pycache__/GOA.cpython-37.pyc differ
diff --git a/code/lib/Bio/UniProt/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/UniProt/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..b96cb5a
Binary files /dev/null and b/code/lib/Bio/UniProt/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Wise/__init__.py b/code/lib/Bio/Wise/__init__.py
new file mode 100644
index 0000000..4841b08
--- /dev/null
+++ b/code/lib/Bio/Wise/__init__.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+# Copyright 2004-2005 by Michael Hoffman. All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Run and process output from the Wise2 package tools.
+
+Bio.Wise contains modules for running and processing the output of
+some of the models in the Wise2 package by Ewan Birney available from:
+ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/
+http://www.ebi.ac.uk/Wise2/
+
+Bio.Wise.psw is for protein Smith-Waterman alignments
+Bio.Wise.dnal is for Smith-Waterman DNA alignments
+"""
+
+
+import os
+import sys
+import tempfile
+
+from Bio import SeqIO
+
+
+def _build_align_cmdline(
+    cmdline, pair, output_filename, kbyte=None, force_type=None, quiet=False
+):
+    """Build a command line string (PRIVATE).
+
+    >>> os.environ["WISE_KBYTE"]="300000"
+    >>> if os.isatty(sys.stderr.fileno()):
+    ...    c = _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"),
+    ...                             "/tmp/output", kbyte=100000)
+    ...    assert c == 'dnal -kbyte 100000 seq1.fna seq2.fna > /tmp/output', c
+    ...    c = _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"),
+    ...                             "/tmp/output_aa")
+    ...    assert c == 'psw -kbyte 300000 seq1.faa seq2.faa > /tmp/output_aa', c
+    ... else:
+    ...    c = _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"),
+    ...                             "/tmp/output", kbyte=100000)
+    ...    assert c == 'dnal -kbyte 100000 -quiet seq1.fna seq2.fna > /tmp/output', c
+    ...    c = _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"),
+    ...                             "/tmp/output_aa")
+    ...    assert c == 'psw -kbyte 300000 -quiet seq1.faa seq2.faa > /tmp/output_aa', c
+
+    """
+    cmdline = cmdline[:]
+
+    # XXX: force_type ignored
+
+    if kbyte is None:
+        try:
+            cmdline.extend(("-kbyte", os.environ["WISE_KBYTE"]))
+        except KeyError:
+            pass
+    else:
+        cmdline.extend(("-kbyte", str(kbyte)))
+
+    if not os.isatty(sys.stderr.fileno()):
+        cmdline.append("-quiet")
+
+    cmdline.extend(pair)
+    cmdline.extend((">", output_filename))
+    if quiet:
+        cmdline.extend(("2>", "/dev/null"))
+    return " ".join(cmdline)
+
+
+def align(
+    cmdline, pair, kbyte=None, force_type=None, dry_run=False, quiet=False, debug=False
+):
+    """Run an alignment. Returns a filehandle."""
+    if not pair or len(pair) != 2:
+        raise ValueError("Expected pair of filename, not %r" % pair)
+
+    output_file = tempfile.NamedTemporaryFile(mode="r")
+    input_files = (
+        tempfile.NamedTemporaryFile(mode="w"),
+        tempfile.NamedTemporaryFile(mode="w"),
+    )
+
+    if dry_run:
+        print(
+            _build_align_cmdline(
+                cmdline, pair, output_file.name, kbyte, force_type, quiet
+            )
+        )
+        return
+
+    for filename, input_file in zip(pair, input_files):
+        # Pipe the file through Biopython's Fasta parser/writer
+        # to make sure it conforms to the Fasta standard (in particular,
+        # Wise2 may choke on long lines in the Fasta file)
+        records = SeqIO.parse(open(filename), "fasta")
+        SeqIO.write(records, input_file, "fasta")
+        input_file.flush()
+
+    input_file_names = [input_file.name for input_file in input_files]
+
+    cmdline_str = _build_align_cmdline(
+        cmdline, input_file_names, output_file.name, kbyte, force_type, quiet
+    )
+
+    if debug:
+        sys.stderr.write("%s\n" % cmdline_str)
+
+    status = os.system(cmdline_str) >> 8
+
+    # `status` here will be >1 for error codes >=256
+    if status > 1:
+        if kbyte != 0:  # possible memory problem; could be None
+            sys.stderr.write("INFO trying again with the linear model\n")
+            return align(cmdline, pair, 0, force_type, dry_run, quiet, debug)
+        else:
+            raise OSError("%s returned %s" % (" ".join(cmdline), status))
+
+    return output_file
+
+
+def all_pairs(singles):
+    """Generate pairs list for all-against-all alignments.
+
+    >>> all_pairs(range(4))
+    [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
+    """
+    pairs = []
+
+    singles = list(singles)
+    while singles:
+        suitor = singles.pop(0)  # if sorted, stay sorted
+        pairs.extend((suitor, single) for single in singles)
+
+    return pairs
+
+
+def main():
+    """Provision for command line testing."""
+    pass
+
+
+def _test(*args, **keywds):
+    import doctest
+
+    doctest.testmod(sys.modules[__name__], *args, **keywds)
+
+
+if __name__ == "__main__":
+    if __debug__:
+        _test()
+    main()
diff --git a/code/lib/Bio/Wise/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Wise/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..c3b8976
Binary files /dev/null and b/code/lib/Bio/Wise/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/Wise/__pycache__/dnal.cpython-37.pyc b/code/lib/Bio/Wise/__pycache__/dnal.cpython-37.pyc
new file mode 100644
index 0000000..672c992
Binary files /dev/null and b/code/lib/Bio/Wise/__pycache__/dnal.cpython-37.pyc differ
diff --git a/code/lib/Bio/Wise/__pycache__/psw.cpython-37.pyc b/code/lib/Bio/Wise/__pycache__/psw.cpython-37.pyc
new file mode 100644
index 0000000..c4dca6f
Binary files /dev/null and b/code/lib/Bio/Wise/__pycache__/psw.cpython-37.pyc differ
diff --git a/code/lib/Bio/Wise/dnal.py b/code/lib/Bio/Wise/dnal.py
new file mode 100644
index 0000000..745f4f5
--- /dev/null
+++ b/code/lib/Bio/Wise/dnal.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python
+# Copyright 2004-2005 by Michael Hoffman. All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Run and process output from the Wise2 package tool dnal.
+
+Bio.Wise contains modules for running and processing the output of
+some of the models in the Wise2 package by Ewan Birney available from:
+ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/
+http://www.ebi.ac.uk/Wise2/
+
+Bio.Wise.psw is for protein Smith-Waterman alignments
+Bio.Wise.dnal is for Smith-Waterman DNA alignments
+"""
+
+
+import re
+
+# Importing with leading underscore as not intended to be exposed
+from subprocess import getoutput as _getoutput
+
+from Bio import Wise
+
+
+_SCORE_MATCH = 4
+_SCORE_MISMATCH = -1
+_SCORE_GAP_START = -5
+_SCORE_GAP_EXTENSION = -1
+
+_CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"]
+
+
+def _build_dnal_cmdline(match, mismatch, gap, extension):
+    res = _CMDLINE_DNAL[:]
+    res.extend(["-match", str(match)])
+    res.extend(["-mis", str(mismatch)])
+    res.extend(["-gap", str(-gap)])  # negative: convert score to penalty
+    res.extend(["-ext", str(-extension)])  # negative: convert score to penalty
+
+    return res
+
+
+_CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s"
+
+
+def _fgrep_count(pattern, file):
+    return int(_getoutput(_CMDLINE_FGREP_COUNT % (pattern, file)))
+
+
+_re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):")
+
+
+def _alb_line2coords(line):
+    return tuple(
+        int(coord) + 1  # one-based -> zero-based
+        for coord in _re_alb_line2coords.match(line).groups()
+    )
+
+
+def _get_coords(filename):
+    alb = open(filename)
+
+    start_line = None
+    end_line = None
+
+    for line in alb:
+        if line.startswith("["):
+            if not start_line:
+                start_line = line  # rstrip not needed
+            else:
+                end_line = line
+
+    if end_line is None:  # sequence is too short
+        return [(0, 0), (0, 0)]
+
+    return list(
+        zip(*map(_alb_line2coords, [start_line, end_line]))
+    )  # returns [(start0, end0), (start1, end1)]
+
+
+class Statistics:
+    """Calculate statistics from an ALB report."""
+
+    def __init__(self, filename, match, mismatch, gap, extension):
+        """Initialize the class."""
+        self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename)
+        self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename)
+        self.gaps = _fgrep_count('"INSERT" %s' % gap, filename)
+
+        if gap == extension:
+            self.extensions = 0
+        else:
+            self.extensions = _fgrep_count('"INSERT" %s' % extension, filename)
+
+        self.score = (
+            match * self.matches
+            + mismatch * self.mismatches
+            + gap * self.gaps
+            + extension * self.extensions
+        )
+
+        if self.matches or self.mismatches or self.gaps or self.extensions:
+            self.coords = _get_coords(filename)
+        else:
+            self.coords = [(0, 0), (0, 0)]
+
+    def identity_fraction(self):
+        """Calculate the fraction of matches."""
+        return self.matches / (self.matches + self.mismatches)
+
+    header = "identity_fraction\tmatches\tmismatches\tgaps\textensions"
+
+    def __str__(self):
+        """Statistics as a tab separated string."""
+        return "\t".join(
+            str(x)
+            for x in (
+                self.identity_fraction(),
+                self.matches,
+                self.mismatches,
+                self.gaps,
+                self.extensions,
+            )
+        )
+
+
+def align(
+    pair,
+    match=_SCORE_MATCH,
+    mismatch=_SCORE_MISMATCH,
+    gap=_SCORE_GAP_START,
+    extension=_SCORE_GAP_EXTENSION,
+    **keywds
+):
+    """Align a pair of DNA files using dnal and calculate the statistics of the alignment."""
+    cmdline = _build_dnal_cmdline(match, mismatch, gap, extension)
+    temp_file = Wise.align(cmdline, pair, **keywds)
+    try:
+        return Statistics(temp_file.name, match, mismatch, gap, extension)
+    except AttributeError:
+        try:
+            keywds["dry_run"]
+            return None
+        except KeyError:
+            raise
+
+
+def main():
+    """Command line implementation."""
+    import sys
+
+    stats = align(sys.argv[1:3])
+    print(
+        "\n".join(
+            "%s: %s" % (attr, getattr(stats, attr))
+            for attr in ("matches", "mismatches", "gaps", "extensions")
+        )
+    )
+    print("identity_fraction: %s" % stats.identity_fraction())
+    print("coords: %s" % stats.coords)
+
+
+def _test(*args, **keywds):
+    import doctest
+    import sys
+
+    doctest.testmod(sys.modules[__name__], *args, **keywds)
+
+
+if __name__ == "__main__":
+    if __debug__:
+        _test()
+    main()
diff --git a/code/lib/Bio/Wise/psw.py b/code/lib/Bio/Wise/psw.py
new file mode 100644
index 0000000..2eea52b
--- /dev/null
+++ b/code/lib/Bio/Wise/psw.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+# Copyright 2004 by Michael Hoffman. All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Run and process output from the Wise2 package tool psw.
+
+Bio.Wise contains modules for running and processing the output of
+some of the models in the Wise2 package by Ewan Birney available from:
+ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/
+http://www.ebi.ac.uk/Wise2/
+
+Bio.Wise.psw is for protein Smith-Waterman alignments
+Bio.Wise.dnal is for Smith-Waterman DNA alignments
+"""
+
+
+import os
+import re
+import sys
+
+from Bio import Wise
+
+
+_CMDLINE_PSW = ["psw", "-l", "-F"]
+_OPTION_GAP_START = "-g"
+_OPTION_GAP_EXTENSION = "-e"
+_OPTION_SCORES = "-m"
+
+
+class AlignmentColumnFullException(Exception):
+    """Manage exception in the alignment output."""
+
+    pass
+
+
+class Alignment(list):
+    """Define a container for all alignment Columns, output from running psw."""
+
+    def append(self, column_unit):
+        """Add an alignment Column to Alignment."""
+        try:
+            self[-1].append(column_unit)
+        except AlignmentColumnFullException:
+            list.append(self, AlignmentColumn(column_unit))
+        except IndexError:
+            list.append(self, AlignmentColumn(column_unit))
+
+
+class AlignmentColumn(list):
+    """Define a container for the units that made the Column."""
+
+    def _set_kind(self, column_unit):
+        if self.kind == "SEQUENCE":
+            self.kind = column_unit.kind
+
+    def __init__(self, column_unit):
+        """Initialize the class."""
+        assert column_unit.unit == 0
+        self.kind = column_unit.kind
+        list.__init__(self, [column_unit.column, None])
+
+    def __repr__(self):
+        """Represent the AlignmentColumn object as a string for debugging."""
+        return "%s(%r, %r)" % (self.kind, self[0], self[1])
+
+    def append(self, column_unit):
+        """Add a unit to the Column."""
+        if self[1] is not None:
+            raise AlignmentColumnFullException
+
+        assert column_unit.unit == 1
+
+        self._set_kind(column_unit)
+        self[1] = column_unit.column
+
+
+class ColumnUnit:
+    """Define a container for the details of each sequence alignment."""
+
+    def __init__(self, unit, column, kind):
+        """Initialize the class."""
+        self.unit = unit
+        self.column = column
+        self.kind = kind
+
+    def __repr__(self):
+        """Represent the ColumnUnit object as a string for debugging."""
+        return "ColumnUnit(unit=%r, column=%r, kind=%r)" % (
+            self.unit,
+            self.column,
+            self.kind,
+        )
+
+
+_re_unit = re.compile(r"^Unit +([01])- \[ *(-?\d+)- *(-?\d+)\] \[(\w+)\]$")
+
+
+def parse_line(line):
+    """Parse a line from psw.
+
+    >>> print(parse_line("Column 0:"))
+    None
+    >>> parse_line("Unit  0- [  -1-   0] [SEQUENCE]")
+    ColumnUnit(unit=0, column=0, kind='SEQUENCE')
+    >>> parse_line("Unit  1- [  85-  86] [SEQUENCE]")
+    ColumnUnit(unit=1, column=86, kind='SEQUENCE')
+    """
+    match = _re_unit.match(line.rstrip())
+
+    if not match:
+        return
+
+    return ColumnUnit(int(match.group(1)), int(match.group(3)), match.group(4))
+
+
+def parse(iterable):
+    """Parse a file.
+
+    format
+
+    Column 0:
+    Unit  0- [  -1-   0] [SEQUENCE]
+    Unit  1- [  85-  86] [SEQUENCE]
+
+    means that seq1[0] == seq2[86] (0-based)
+    """
+    alignment = Alignment()
+    for line in iterable:
+        try:
+            if os.environ["WISE_PY_DEBUG"]:
+                print(line)
+        except KeyError:
+            pass
+
+        column_unit = parse_line(line)
+        if column_unit:
+            alignment.append(column_unit)
+
+    return alignment
+
+
+def align(pair, scores=None, gap_start=None, gap_extension=None, *args, **keywds):
+    """Align a pair of DNA files using Wise2 psw."""
+    cmdline = _CMDLINE_PSW[:]
+    if scores:
+        cmdline.extend((_OPTION_SCORES, scores))
+    if gap_start:
+        cmdline.extend((_OPTION_GAP_START, str(gap_start)))
+    if gap_extension:
+        cmdline.extend((_OPTION_GAP_EXTENSION, str(gap_extension)))
+    temp_file = Wise.align(cmdline, pair, *args, **keywds)
+    return parse(temp_file)
+
+
+def main():
+    """Command line implementation."""
+    print(align(sys.argv[1:3]))
+
+
+def _test(*args, **keywds):
+    import doctest
+
+    doctest.testmod(sys.modules[__name__], *args, **keywds)
+
+
+if __name__ == "__main__":
+    if __debug__:
+        _test()
+        """Initialize the class."""
+    main()
diff --git a/code/lib/Bio/__init__.py b/code/lib/Bio/__init__.py
new file mode 100644
index 0000000..3eaea3b
--- /dev/null
+++ b/code/lib/Bio/__init__.py
@@ -0,0 +1,129 @@
+# Copyright 1999-2003 by Jeffrey Chang.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Collection of modules for dealing with biological data in Python.
+
+The Biopython Project is an international association of developers
+of freely available Python tools for computational molecular biology.
+
+https://biopython.org
+"""
+
+import os
+import warnings
+
+__version__ = "1.79"
+
+
+class MissingExternalDependencyError(Exception):
+    """Missing an external dependency.
+
+    Used for things like missing command line tools. Important for our unit
+    tests to allow skipping tests with missing external dependencies.
+    """
+
+
+class MissingPythonDependencyError(MissingExternalDependencyError, ImportError):
+    """Missing an external python dependency (subclass of ImportError).
+
+    Used for missing Python modules (rather than just a typical ImportError).
+    Important for our unit tests to allow skipping tests with missing external
+    python dependencies, while also allowing the exception to be caught as an
+    ImportError.
+    """
+
+
+class StreamModeError(ValueError):
+    """Incorrect stream mode (text vs binary).
+
+    This error should be raised when a stream (file or file-like object)
+    argument is in text mode while the receiving function expects binary mode,
+    or vice versa.
+    """
+
+
+class BiopythonWarning(Warning):
+    """Biopython warning.
+
+    Biopython should use this warning (or subclasses of it), making it easy to
+    silence all our warning messages should you wish to:
+
+    >>> import warnings
+    >>> from Bio import BiopythonWarning
+    >>> warnings.simplefilter('ignore', BiopythonWarning)
+
+    Consult the warnings module documentation for more details.
+    """
+
+
+class BiopythonParserWarning(BiopythonWarning):
+    """Biopython parser warning.
+
+    Some in-valid data files cannot be parsed and will trigger an exception.
+    Where a reasonable interpretation is possible, Biopython will issue this
+    warning to indicate a potential problem. To silence these warnings, use:
+
+    >>> import warnings
+    >>> from Bio import BiopythonParserWarning
+    >>> warnings.simplefilter('ignore', BiopythonParserWarning)
+
+    Consult the warnings module documentation for more details.
+    """
+
+
+class BiopythonDeprecationWarning(BiopythonWarning):
+    """Biopython deprecation warning.
+
+    Biopython uses this warning instead of the built in DeprecationWarning
+    since those are ignored by default since Python 2.7.
+
+    To silence all our deprecation warning messages, use:
+
+    >>> import warnings
+    >>> from Bio import BiopythonDeprecationWarning
+    >>> warnings.simplefilter('ignore', BiopythonDeprecationWarning)
+
+    Code marked as deprecated is likely to be removed in a future version
+    of Biopython. To avoid removal of this code, please contact the Biopython
+    developers via the mailing list or GitHub.
+    """
+
+
+class BiopythonExperimentalWarning(BiopythonWarning):
+    """Biopython experimental code warning.
+
+    Biopython uses this warning for experimental code ('alpha' or 'beta'
+    level code) which is released as part of the standard releases to mark
+    sub-modules or functions for early adopters to test & give feedback.
+
+    Code issuing this warning is likely to change (or even be removed) in
+    a subsequent release of Biopython. Such code should NOT be used for
+    production/stable code. It should only be used if:
+
+    - You are running the latest release of Biopython, or ideally the
+      latest code from our repository.
+    - You are subscribed to the biopython-dev mailing list to provide
+      feedback on this code, and to be alerted of changes to it.
+
+    If all goes well, experimental code would be promoted to stable in
+    a subsequent release, and this warning removed from it.
+    """
+
+
+_parent_dir = os.path.dirname(os.path.dirname(__file__))
+if os.path.exists(os.path.join(_parent_dir, "setup.py")):
+    warnings.warn(
+        "You may be importing Biopython from inside the source tree."
+        " This is bad practice and might lead to downstream issues."
+        " In particular, you might encounter ImportErrors due to"
+        " missing compiled C extensions. We recommend that you"
+        " try running your code from outside the source tree."
+        " If you are outside the source tree then you have a"
+        " setup.py file in an unexpected directory: " + _parent_dir,
+        BiopythonWarning,
+    )
+# See #PR 2007 and issue #1991 for discussion on this warning:
+# https://github.com/biopython/biopython/pull/2007
diff --git a/code/lib/Bio/__pycache__/File.cpython-37.pyc b/code/lib/Bio/__pycache__/File.cpython-37.pyc
new file mode 100644
index 0000000..8b175f4
Binary files /dev/null and b/code/lib/Bio/__pycache__/File.cpython-37.pyc differ
diff --git a/code/lib/Bio/__pycache__/LogisticRegression.cpython-37.pyc b/code/lib/Bio/__pycache__/LogisticRegression.cpython-37.pyc
new file mode 100644
index 0000000..32fa1c4
Binary files /dev/null and b/code/lib/Bio/__pycache__/LogisticRegression.cpython-37.pyc differ
diff --git a/code/lib/Bio/__pycache__/MarkovModel.cpython-37.pyc b/code/lib/Bio/__pycache__/MarkovModel.cpython-37.pyc
new file mode 100644
index 0000000..9666934
Binary files /dev/null and b/code/lib/Bio/__pycache__/MarkovModel.cpython-37.pyc differ
diff --git a/code/lib/Bio/__pycache__/MaxEntropy.cpython-37.pyc b/code/lib/Bio/__pycache__/MaxEntropy.cpython-37.pyc
new file mode 100644
index 0000000..fde5d1e
Binary files /dev/null and b/code/lib/Bio/__pycache__/MaxEntropy.cpython-37.pyc differ
diff --git a/code/lib/Bio/__pycache__/NaiveBayes.cpython-37.pyc b/code/lib/Bio/__pycache__/NaiveBayes.cpython-37.pyc
new file mode 100644
index 0000000..9a0323b
Binary files /dev/null and b/code/lib/Bio/__pycache__/NaiveBayes.cpython-37.pyc differ
diff --git a/code/lib/Bio/__pycache__/Seq.cpython-37.pyc b/code/lib/Bio/__pycache__/Seq.cpython-37.pyc
new file mode 100644
index 0000000..f5b6222
Binary files /dev/null and b/code/lib/Bio/__pycache__/Seq.cpython-37.pyc differ
diff --git a/code/lib/Bio/__pycache__/SeqFeature.cpython-37.pyc b/code/lib/Bio/__pycache__/SeqFeature.cpython-37.pyc
new file mode 100644
index 0000000..a003b0b
Binary files /dev/null and b/code/lib/Bio/__pycache__/SeqFeature.cpython-37.pyc differ
diff --git a/code/lib/Bio/__pycache__/SeqRecord.cpython-37.pyc b/code/lib/Bio/__pycache__/SeqRecord.cpython-37.pyc
new file mode 100644
index 0000000..a6ae8e3
Binary files /dev/null and b/code/lib/Bio/__pycache__/SeqRecord.cpython-37.pyc differ
diff --git a/code/lib/Bio/__pycache__/__init__.cpython-311.pyc b/code/lib/Bio/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000..47befb8
Binary files /dev/null and b/code/lib/Bio/__pycache__/__init__.cpython-311.pyc differ
diff --git a/code/lib/Bio/__pycache__/__init__.cpython-312.pyc b/code/lib/Bio/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000..036b78d
Binary files /dev/null and b/code/lib/Bio/__pycache__/__init__.cpython-312.pyc differ
diff --git a/code/lib/Bio/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..375f197
Binary files /dev/null and b/code/lib/Bio/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/__pycache__/_utils.cpython-37.pyc b/code/lib/Bio/__pycache__/_utils.cpython-37.pyc
new file mode 100644
index 0000000..fc3fc9d
Binary files /dev/null and b/code/lib/Bio/__pycache__/_utils.cpython-37.pyc differ
diff --git a/code/lib/Bio/__pycache__/bgzf.cpython-37.pyc b/code/lib/Bio/__pycache__/bgzf.cpython-37.pyc
new file mode 100644
index 0000000..b27abef
Binary files /dev/null and b/code/lib/Bio/__pycache__/bgzf.cpython-37.pyc differ
diff --git a/code/lib/Bio/__pycache__/kNN.cpython-37.pyc b/code/lib/Bio/__pycache__/kNN.cpython-37.pyc
new file mode 100644
index 0000000..1727f0f
Binary files /dev/null and b/code/lib/Bio/__pycache__/kNN.cpython-37.pyc differ
diff --git a/code/lib/Bio/__pycache__/pairwise2.cpython-37.pyc b/code/lib/Bio/__pycache__/pairwise2.cpython-37.pyc
new file mode 100644
index 0000000..ce9eb30
Binary files /dev/null and b/code/lib/Bio/__pycache__/pairwise2.cpython-37.pyc differ
diff --git a/code/lib/Bio/_utils.py b/code/lib/Bio/_utils.py
new file mode 100644
index 0000000..872f4c1
--- /dev/null
+++ b/code/lib/Bio/_utils.py
@@ -0,0 +1,70 @@
+# Copyright 2010 by Eric Talevich. All rights reserved.
+# Copyright 2012 by Wibowo Arindrarto. All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Common utility functions for various Bio submodules."""
+
+
+import os
+
+
+def find_test_dir(start_dir=None):
+    """Find the absolute path of Biopython's Tests directory.
+
+    Arguments:
+    start_dir -- Initial directory to begin lookup (default to current dir)
+
+    If the directory is not found up the filesystem's root directory, an
+    exception will be raised.
+
+    """
+    if not start_dir:
+        # no callbacks in function signatures!
+        # defaults to the current directory
+        # (using __file__ would give the installed Biopython)
+        start_dir = "."
+
+    target = os.path.abspath(start_dir)
+    while True:
+        if os.path.isdir(os.path.join(target, "Bio")) and os.path.isdir(
+            os.path.join(target, "Tests")
+        ):
+            # Good, we're in the Biopython root now
+            return os.path.abspath(os.path.join(target, "Tests"))
+        # Recurse up the tree
+        # TODO - Test this on Windows
+        new, tmp = os.path.split(target)
+        if target == new:
+            # Reached root
+            break
+        target = new
+    raise ValueError(
+        "Not within Biopython source tree: %r" % os.path.abspath(start_dir)
+    )
+
+
+def run_doctest(target_dir=None, *args, **kwargs):
+    """Run doctest for the importing module."""
+    import doctest
+
+    # default doctest options
+    default_kwargs = {"optionflags": doctest.ELLIPSIS}
+    kwargs.update(default_kwargs)
+
+    cur_dir = os.path.abspath(os.curdir)
+
+    print("Running doctests...")
+    try:
+        os.chdir(find_test_dir(target_dir))
+        doctest.testmod(*args, **kwargs)
+    finally:
+        # and revert back to initial directory
+        os.chdir(cur_dir)
+    print("Done")
+
+
+if __name__ == "__main__":
+    run_doctest()
diff --git a/code/lib/Bio/bgzf.py b/code/lib/Bio/bgzf.py
new file mode 100644
index 0000000..614964a
--- /dev/null
+++ b/code/lib/Bio/bgzf.py
@@ -0,0 +1,920 @@
+#!/usr/bin/env python
+# Copyright 2010-2018 by Peter Cock.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+r"""Read and write BGZF compressed files (the GZIP variant used in BAM).
+
+The SAM/BAM file format (Sequence Alignment/Map) comes in a plain text
+format (SAM), and a compressed binary format (BAM). The latter uses a
+modified form of gzip compression called BGZF (Blocked GNU Zip Format),
+which can be applied to any file format to provide compression with
+efficient random access. BGZF is described together with the SAM/BAM
+file format at http://samtools.sourceforge.net/SAM1.pdf
+
+Please read the text below about 'virtual offsets' before using BGZF
+files for random access.
+
+Aim of this module
+------------------
+The Python gzip library can be used to read BGZF files, since for
+decompression they are just (specialised) gzip files. What this
+module aims to facilitate is random access to BGZF files (using the
+'virtual offset' idea), and writing BGZF files (which means using
+suitably sized gzip blocks and writing the extra 'BC' field in the
+gzip headers). As in the gzip library, the zlib library is used
+internally.
+
+In addition to being required for random access to and writing of
+BAM files, the BGZF format can also be used on other sequential
+data (in the sense of one record after another), such as most of
+the sequence data formats supported in Bio.SeqIO (like FASTA,
+FASTQ, GenBank, etc) or large MAF alignments.
+
+The Bio.SeqIO indexing functions use this module to support BGZF files.
+
+Technical Introduction to BGZF
+------------------------------
+The gzip file format allows multiple compressed blocks, each of which
+could be a stand alone gzip file. As an interesting bonus, this means
+you can use Unix ``cat`` to combine two or more gzip files into one by
+concatenating them. Also, each block can have one of several compression
+levels (including uncompressed, which actually takes up a little bit
+more space due to the gzip header).
+
+What the BAM designers realised was that while random access to data
+stored in traditional gzip files was slow, breaking the file into
+gzip blocks would allow fast random access to each block. To access
+a particular piece of the decompressed data, you just need to know
+which block it starts in (the offset of the gzip block start), and
+how far into the (decompressed) contents of the block you need to
+read.
+
+One problem with this is finding the gzip block sizes efficiently.
+You can do it with a standard gzip file, but it requires every block
+to be decompressed -- and that would be rather slow. Additionally
+typical gzip files may use very large blocks.
+
+All that differs in BGZF is that compressed size of each gzip block
+is limited to 2^16 bytes, and an extra 'BC' field in the gzip header
+records this size. Traditional decompression tools can ignore this,
+and unzip the file just like any other gzip file.
+
+The point of this is you can look at the first BGZF block, find out
+how big it is from this 'BC' header, and thus seek immediately to
+the second block, and so on.
+
+The BAM indexing scheme records read positions using a 64 bit
+'virtual offset', comprising ``coffset << 16 | uoffset``, where ``coffset``
+is the file offset of the BGZF block containing the start of the read
+(unsigned integer using up to 64-16 = 48 bits), and ``uoffset`` is the
+offset within the (decompressed) block (unsigned 16 bit integer).
+
+This limits you to BAM files where the last block starts by 2^48
+bytes, or 256 petabytes, and the decompressed size of each block
+is at most 2^16 bytes, or 64kb. Note that this matches the BGZF
+'BC' field size which limits the compressed size of each block to
+2^16 bytes, allowing for BAM files to use BGZF with no gzip
+compression (useful for intermediate files in memory to reduce
+CPU load).
+
+Warning about namespaces
+------------------------
+It is considered a bad idea to use "from XXX import ``*``" in Python, because
+it pollutes the namespace. This is a real issue with Bio.bgzf (and the
+standard Python library gzip) because they contain a function called open
+i.e. Suppose you do this:
+
+>>> from Bio.bgzf import *
+>>> print(open.__module__)
+Bio.bgzf
+
+Or,
+
+>>> from gzip import *
+>>> print(open.__module__)
+gzip
+
+Notice that the open function has been replaced. You can "fix" this if you
+need to by importing the built-in open function:
+
+>>> from builtins import open
+
+However, what we recommend instead is to use the explicit namespace, e.g.
+
+>>> from Bio import bgzf
+>>> print(bgzf.open.__module__)
+Bio.bgzf
+
+
+Examples
+--------
+This is an ordinary GenBank file compressed using BGZF, so it can
+be decompressed using gzip,
+
+>>> import gzip
+>>> handle = gzip.open("GenBank/NC_000932.gb.bgz", "r")
+>>> assert 0 == handle.tell()
+>>> line = handle.readline()
+>>> assert 80 == handle.tell()
+>>> line = handle.readline()
+>>> assert 143 == handle.tell()
+>>> data = handle.read(70000)
+>>> assert 70143 == handle.tell()
+>>> handle.close()
+
+We can also access the file using the BGZF reader - but pay
+attention to the file offsets which will be explained below:
+
+>>> handle = BgzfReader("GenBank/NC_000932.gb.bgz", "r")
+>>> assert 0 == handle.tell()
+>>> print(handle.readline().rstrip())
+LOCUS       NC_000932             154478 bp    DNA     circular PLN 15-APR-2009
+>>> assert 80 == handle.tell()
+>>> print(handle.readline().rstrip())
+DEFINITION  Arabidopsis thaliana chloroplast, complete genome.
+>>> assert 143 == handle.tell()
+>>> data = handle.read(70000)
+>>> assert 987828735 == handle.tell()
+>>> print(handle.readline().rstrip())
+f="GeneID:844718"
+>>> print(handle.readline().rstrip())
+     CDS             complement(join(84337..84771,85454..85843))
+>>> offset = handle.seek(make_virtual_offset(55074, 126))
+>>> print(handle.readline().rstrip())
+    68521 tatgtcattc gaaattgtat aaagacaact cctatttaat agagctattt gtgcaagtat
+>>> handle.close()
+
+Notice the handle's offset looks different as a BGZF file. This
+brings us to the key point about BGZF, which is the block structure:
+
+>>> handle = open("GenBank/NC_000932.gb.bgz", "rb")
+>>> for values in BgzfBlocks(handle):
+...     print("Raw start %i, raw length %i; data start %i, data length %i" % values)
+Raw start 0, raw length 15073; data start 0, data length 65536
+Raw start 15073, raw length 17857; data start 65536, data length 65536
+Raw start 32930, raw length 22144; data start 131072, data length 65536
+Raw start 55074, raw length 22230; data start 196608, data length 65536
+Raw start 77304, raw length 14939; data start 262144, data length 43478
+Raw start 92243, raw length 28; data start 305622, data length 0
+>>> handle.close()
+
+In this example the first three blocks are 'full' and hold 65536 bytes
+of uncompressed data. The fourth block isn't full and holds 43478 bytes.
+Finally there is a special empty fifth block which takes 28 bytes on
+disk and serves as an 'end of file' (EOF) marker. If this is missing,
+it is possible your BGZF file is incomplete.
+
+By reading ahead 70,000 bytes we moved into the second BGZF block,
+and at that point the BGZF virtual offsets start to look different
+to a simple offset into the decompressed data as exposed by the gzip
+library.
+
+As an example, consider seeking to the decompressed position 196734.
+Since 196734 = 65536 + 65536 + 65536 + 126 = 65536*3 + 126, this
+is equivalent to jumping the first three blocks (which in this
+specific example are all size 65536 after decompression - which
+does not always hold) and starting at byte 126 of the fourth block
+(after decompression). For BGZF, we need to know the fourth block's
+offset of 55074 and the offset within the block of 126 to get the
+BGZF virtual offset.
+
+>>> print(55074 << 16 | 126)
+3609329790
+>>> print(bgzf.make_virtual_offset(55074, 126))
+3609329790
+
+Thus for this BGZF file, decompressed position 196734 corresponds
+to the virtual offset 3609329790. However, another BGZF file with
+different contents would have compressed more or less efficiently,
+so the compressed blocks would be different sizes. What this means
+is the mapping between the uncompressed offset and the compressed
+virtual offset depends on the BGZF file you are using.
+
+If you are accessing a BGZF file via this module, just use the
+handle.tell() method to note the virtual offset of a position you
+may later want to return to using handle.seek().
+
+The catch with BGZF virtual offsets is while they can be compared
+(which offset comes first in the file), you cannot safely subtract
+them to get the size of the data between them, nor add/subtract
+a relative offset.
+
+Of course you can parse this file with Bio.SeqIO using BgzfReader,
+although there isn't any benefit over using gzip.open(...), unless
+you want to index BGZF compressed sequence files:
+
+>>> from Bio import SeqIO
+>>> handle = BgzfReader("GenBank/NC_000932.gb.bgz")
+>>> record = SeqIO.read(handle, "genbank")
+>>> handle.close()
+>>> print(record.id)
+NC_000932.1
+
+Text Mode
+---------
+
+Like the standard library gzip.open(...), the BGZF code defaults to opening
+files in binary mode.
+
+You can request the file be opened in text mode, but beware that this is hard
+coded to the simple "latin1" (aka "iso-8859-1") encoding (which includes all
+the ASCII characters), which works well with most Western European languages.
+However, it is not fully compatible with the more widely used UTF-8 encoding.
+
+In variable width encodings like UTF-8, some single characters in the unicode
+text output are represented by multiple bytes in the raw binary form. This is
+problematic with BGZF, as we cannot always decode each block in isolation - a
+single unicode character could be split over two blocks. This can even happen
+with fixed width unicode encodings, as the BGZF block size is not fixed.
+
+Therefore, this module is currently restricted to only support single byte
+unicode encodings, such as ASCII, "latin1" (which is a superset of ASCII), or
+potentially other character maps (not implemented).
+
+Furthermore, unlike the default text mode on Python 3, we do not attempt to
+implement universal new line mode. This transforms the various operating system
+new line conventions like Windows (CR LF or "\r\n"), Unix (just LF, "\n"), or
+old Macs (just CR, "\r"), into just LF ("\n"). Here we have the same problem -
+is "\r" at the end of a block an incomplete Windows style new line?
+
+Instead, you will get the CR ("\r") and LF ("\n") characters as is.
+
+If your data is in UTF-8 or any other incompatible encoding, you must use
+binary mode, and decode the appropriate fragments yourself.
+"""
+
+import struct
+import sys
+import zlib
+
+from builtins import open as _open
+
+_bgzf_magic = b"\x1f\x8b\x08\x04"
+_bgzf_header = b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00"
+_bgzf_eof = b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00BC\x02\x00\x1b\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+_bytes_BC = b"BC"
+
+
+def open(filename, mode="rb"):
+    r"""Open a BGZF file for reading, writing or appending.
+
+    If text mode is requested, in order to avoid multi-byte characters, this is
+    hard coded to use the "latin1" encoding, and "\r" and "\n" are passed as is
+    (without implementing universal new line mode).
+
+    If your data is in UTF-8 or any other incompatible encoding, you must use
+    binary mode, and decode the appropriate fragments yourself.
+    """
+    if "r" in mode.lower():
+        return BgzfReader(filename, mode)
+    elif "w" in mode.lower() or "a" in mode.lower():
+        return BgzfWriter(filename, mode)
+    else:
+        raise ValueError("Bad mode %r" % mode)
+
+
+def make_virtual_offset(block_start_offset, within_block_offset):
+    """Compute a BGZF virtual offset from block start and within block offsets.
+
+    The BAM indexing scheme records read positions using a 64 bit
+    'virtual offset', comprising in C terms:
+
+    block_start_offset << 16 | within_block_offset
+
+    Here block_start_offset is the file offset of the BGZF block
+    start (unsigned integer using up to 64-16 = 48 bits), and
+    within_block_offset within the (decompressed) block (unsigned
+    16 bit integer).
+
+    >>> make_virtual_offset(0, 0)
+    0
+    >>> make_virtual_offset(0, 1)
+    1
+    >>> make_virtual_offset(0, 2**16 - 1)
+    65535
+    >>> make_virtual_offset(0, 2**16)
+    Traceback (most recent call last):
+    ...
+    ValueError: Require 0 <= within_block_offset < 2**16, got 65536
+
+    >>> 65536 == make_virtual_offset(1, 0)
+    True
+    >>> 65537 == make_virtual_offset(1, 1)
+    True
+    >>> 131071 == make_virtual_offset(1, 2**16 - 1)
+    True
+
+    >>> 6553600000 == make_virtual_offset(100000, 0)
+    True
+    >>> 6553600001 == make_virtual_offset(100000, 1)
+    True
+    >>> 6553600010 == make_virtual_offset(100000, 10)
+    True
+
+    >>> make_virtual_offset(2**48, 0)
+    Traceback (most recent call last):
+    ...
+    ValueError: Require 0 <= block_start_offset < 2**48, got 281474976710656
+
+    """
+    if within_block_offset < 0 or within_block_offset >= 65536:
+        raise ValueError(
+            "Require 0 <= within_block_offset < 2**16, got %i" % within_block_offset
+        )
+    if block_start_offset < 0 or block_start_offset >= 281474976710656:
+        raise ValueError(
+            "Require 0 <= block_start_offset < 2**48, got %i" % block_start_offset
+        )
+    return (block_start_offset << 16) | within_block_offset
+
+
+def split_virtual_offset(virtual_offset):
+    """Divides a 64-bit BGZF virtual offset into block start & within block offsets.
+
+    >>> (100000, 0) == split_virtual_offset(6553600000)
+    True
+    >>> (100000, 10) == split_virtual_offset(6553600010)
+    True
+
+    """
+    start = virtual_offset >> 16
+    return start, virtual_offset ^ (start << 16)
+
+
+def BgzfBlocks(handle):
+    """Low level debugging function to inspect BGZF blocks.
+
+    Expects a BGZF compressed file opened in binary read mode using
+    the builtin open function. Do not use a handle from this bgzf
+    module or the gzip module's open function which will decompress
+    the file.
+
+    Returns the block start offset (see virtual offsets), the block
+    length (add these for the start of the next block), and the
+    decompressed length of the blocks contents (limited to 65536 in
+    BGZF), as an iterator - one tuple per BGZF block.
+
+    >>> from builtins import open
+    >>> handle = open("SamBam/ex1.bam", "rb")
+    >>> for values in BgzfBlocks(handle):
+    ...     print("Raw start %i, raw length %i; data start %i, data length %i" % values)
+    Raw start 0, raw length 18239; data start 0, data length 65536
+    Raw start 18239, raw length 18223; data start 65536, data length 65536
+    Raw start 36462, raw length 18017; data start 131072, data length 65536
+    Raw start 54479, raw length 17342; data start 196608, data length 65536
+    Raw start 71821, raw length 17715; data start 262144, data length 65536
+    Raw start 89536, raw length 17728; data start 327680, data length 65536
+    Raw start 107264, raw length 17292; data start 393216, data length 63398
+    Raw start 124556, raw length 28; data start 456614, data length 0
+    >>> handle.close()
+
+    Indirectly we can tell this file came from an old version of
+    samtools because all the blocks (except the final one and the
+    dummy empty EOF marker block) are 65536 bytes.  Later versions
+    avoid splitting a read between two blocks, and give the header
+    its own block (useful to speed up replacing the header). You
+    can see this in ex1_refresh.bam created using samtools 0.1.18:
+
+    samtools view -b ex1.bam > ex1_refresh.bam
+
+    >>> handle = open("SamBam/ex1_refresh.bam", "rb")
+    >>> for values in BgzfBlocks(handle):
+    ...     print("Raw start %i, raw length %i; data start %i, data length %i" % values)
+    Raw start 0, raw length 53; data start 0, data length 38
+    Raw start 53, raw length 18195; data start 38, data length 65434
+    Raw start 18248, raw length 18190; data start 65472, data length 65409
+    Raw start 36438, raw length 18004; data start 130881, data length 65483
+    Raw start 54442, raw length 17353; data start 196364, data length 65519
+    Raw start 71795, raw length 17708; data start 261883, data length 65411
+    Raw start 89503, raw length 17709; data start 327294, data length 65466
+    Raw start 107212, raw length 17390; data start 392760, data length 63854
+    Raw start 124602, raw length 28; data start 456614, data length 0
+    >>> handle.close()
+
+    The above example has no embedded SAM header (thus the first block
+    is very small at just 38 bytes of decompressed data), while the next
+    example does (a larger block of 103 bytes). Notice that the rest of
+    the blocks show the same sizes (they contain the same read data):
+
+    >>> handle = open("SamBam/ex1_header.bam", "rb")
+    >>> for values in BgzfBlocks(handle):
+    ...     print("Raw start %i, raw length %i; data start %i, data length %i" % values)
+    Raw start 0, raw length 104; data start 0, data length 103
+    Raw start 104, raw length 18195; data start 103, data length 65434
+    Raw start 18299, raw length 18190; data start 65537, data length 65409
+    Raw start 36489, raw length 18004; data start 130946, data length 65483
+    Raw start 54493, raw length 17353; data start 196429, data length 65519
+    Raw start 71846, raw length 17708; data start 261948, data length 65411
+    Raw start 89554, raw length 17709; data start 327359, data length 65466
+    Raw start 107263, raw length 17390; data start 392825, data length 63854
+    Raw start 124653, raw length 28; data start 456679, data length 0
+    >>> handle.close()
+
+    """
+    if isinstance(handle, BgzfReader):
+        raise TypeError("Function BgzfBlocks expects a binary handle")
+    data_start = 0
+    while True:
+        start_offset = handle.tell()
+        try:
+            block_length, data = _load_bgzf_block(handle)
+        except StopIteration:
+            break
+        data_len = len(data)
+        yield start_offset, block_length, data_start, data_len
+        data_start += data_len
+
+
+def _load_bgzf_block(handle, text_mode=False):
+    """Load the next BGZF block of compressed data (PRIVATE).
+
+    Returns a tuple (block size and data), or at end of file
+    will raise StopIteration.
+    """
+    magic = handle.read(4)
+    if not magic:
+        # End of file - should we signal this differently now?
+        # See https://www.python.org/dev/peps/pep-0479/
+        raise StopIteration
+    if magic != _bgzf_magic:
+        raise ValueError(
+            r"A BGZF (e.g. a BAM file) block should start with "
+            r"%r, not %r; handle.tell() now says %r"
+            % (_bgzf_magic, magic, handle.tell())
+        )
+    gzip_mod_time, gzip_extra_flags, gzip_os, extra_len = struct.unpack(
+        "<LBBH", handle.read(8)
+    )
+
+    block_size = None
+    x_len = 0
+    while x_len < extra_len:
+        subfield_id = handle.read(2)
+        subfield_len = struct.unpack("<H", handle.read(2))[0]  # uint16_t
+        subfield_data = handle.read(subfield_len)
+        x_len += subfield_len + 4
+        if subfield_id == _bytes_BC:
+            assert subfield_len == 2, "Wrong BC payload length"
+            assert block_size is None, "Two BC subfields?"
+            block_size = struct.unpack("<H", subfield_data)[0] + 1  # uint16_t
+    assert x_len == extra_len, (x_len, extra_len)
+    assert block_size is not None, "Missing BC, this isn't a BGZF file!"
+    # Now comes the compressed data, CRC, and length of uncompressed data.
+    deflate_size = block_size - 1 - extra_len - 19
+    d = zlib.decompressobj(-15)  # Negative window size means no headers
+    data = d.decompress(handle.read(deflate_size)) + d.flush()
+    expected_crc = handle.read(4)
+    expected_size = struct.unpack("<I", handle.read(4))[0]
+    if expected_size != len(data):
+        raise RuntimeError("Decompressed to %i, not %i" % (len(data), expected_size))
+    # Should cope with a mix of Python platforms...
+    crc = zlib.crc32(data)
+    if crc < 0:
+        crc = struct.pack("<i", crc)
+    else:
+        crc = struct.pack("<I", crc)
+    if expected_crc != crc:
+        raise RuntimeError("CRC is %s, not %s" % (crc, expected_crc))
+    if text_mode:
+        # Note ISO-8859-1 aka Latin-1 preserves first 256 chars
+        # (i.e. ASCII), but critically is a single byte encoding
+        return block_size, data.decode("latin-1")
+    else:
+        return block_size, data
+
+
+class BgzfReader:
+    r"""BGZF reader, acts like a read only handle but seek/tell differ.
+
+    Let's use the BgzfBlocks function to have a peak at the BGZF blocks
+    in an example BAM file,
+
+    >>> from builtins import open
+    >>> handle = open("SamBam/ex1.bam", "rb")
+    >>> for values in BgzfBlocks(handle):
+    ...     print("Raw start %i, raw length %i; data start %i, data length %i" % values)
+    Raw start 0, raw length 18239; data start 0, data length 65536
+    Raw start 18239, raw length 18223; data start 65536, data length 65536
+    Raw start 36462, raw length 18017; data start 131072, data length 65536
+    Raw start 54479, raw length 17342; data start 196608, data length 65536
+    Raw start 71821, raw length 17715; data start 262144, data length 65536
+    Raw start 89536, raw length 17728; data start 327680, data length 65536
+    Raw start 107264, raw length 17292; data start 393216, data length 63398
+    Raw start 124556, raw length 28; data start 456614, data length 0
+    >>> handle.close()
+
+    Now let's see how to use this block information to jump to
+    specific parts of the decompressed BAM file:
+
+    >>> handle = BgzfReader("SamBam/ex1.bam", "rb")
+    >>> assert 0 == handle.tell()
+    >>> magic = handle.read(4)
+    >>> assert 4 == handle.tell()
+
+    So far nothing so strange, we got the magic marker used at the
+    start of a decompressed BAM file, and the handle position makes
+    sense. Now however, let's jump to the end of this block and 4
+    bytes into the next block by reading 65536 bytes,
+
+    >>> data = handle.read(65536)
+    >>> len(data)
+    65536
+    >>> assert 1195311108 == handle.tell()
+
+    Expecting 4 + 65536 = 65540 were you? Well this is a BGZF 64-bit
+    virtual offset, which means:
+
+    >>> split_virtual_offset(1195311108)
+    (18239, 4)
+
+    You should spot 18239 as the start of the second BGZF block, while
+    the 4 is the offset into this block. See also make_virtual_offset,
+
+    >>> make_virtual_offset(18239, 4)
+    1195311108
+
+    Let's jump back to almost the start of the file,
+
+    >>> make_virtual_offset(0, 2)
+    2
+    >>> handle.seek(2)
+    2
+    >>> handle.close()
+
+    Note that you can use the max_cache argument to limit the number of
+    BGZF blocks cached in memory. The default is 100, and since each
+    block can be up to 64kb, the default cache could take up to 6MB of
+    RAM. The cache is not important for reading through the file in one
+    pass, but is important for improving performance of random access.
+    """
+
+    def __init__(self, filename=None, mode="r", fileobj=None, max_cache=100):
+        """Initialize the class."""
+        # TODO - Assuming we can seek, check for 28 bytes EOF empty block
+        # and if missing warn about possible truncation (as in samtools)?
+        if max_cache < 1:
+            raise ValueError("Use max_cache with a minimum of 1")
+        # Must open the BGZF file in binary mode, but we may want to
+        # treat the contents as either text or binary (unicode or
+        # bytes under Python 3)
+        if fileobj:
+            assert filename is None
+            handle = fileobj
+            assert "b" in handle.mode.lower()
+        else:
+            if "w" in mode.lower() or "a" in mode.lower():
+                raise ValueError(
+                    "Must use read mode (default), not write or append mode"
+                )
+            handle = _open(filename, "rb")
+        self._text = "b" not in mode.lower()
+        if self._text:
+            self._newline = "\n"
+        else:
+            self._newline = b"\n"
+        self._handle = handle
+        self.max_cache = max_cache
+        self._buffers = {}
+        self._block_start_offset = None
+        self._block_raw_length = None
+        self._load_block(handle.tell())
+
+    def _load_block(self, start_offset=None):
+        if start_offset is None:
+            # If the file is being read sequentially, then _handle.tell()
+            # should be pointing at the start of the next block.
+            # However, if seek has been used, we can't assume that.
+            start_offset = self._block_start_offset + self._block_raw_length
+        if start_offset == self._block_start_offset:
+            self._within_block_offset = 0
+            return
+        elif start_offset in self._buffers:
+            # Already in cache
+            self._buffer, self._block_raw_length = self._buffers[start_offset]
+            self._within_block_offset = 0
+            self._block_start_offset = start_offset
+            return
+        # Must hit the disk... first check cache limits,
+        while len(self._buffers) >= self.max_cache:
+            # TODO - Implemente LRU cache removal?
+            self._buffers.popitem()
+        # Now load the block
+        handle = self._handle
+        if start_offset is not None:
+            handle.seek(start_offset)
+        self._block_start_offset = handle.tell()
+        try:
+            block_size, self._buffer = _load_bgzf_block(handle, self._text)
+        except StopIteration:
+            # EOF
+            block_size = 0
+            if self._text:
+                self._buffer = ""
+            else:
+                self._buffer = b""
+        self._within_block_offset = 0
+        self._block_raw_length = block_size
+        # Finally save the block in our cache,
+        self._buffers[self._block_start_offset] = self._buffer, block_size
+
+    def tell(self):
+        """Return a 64-bit unsigned BGZF virtual offset."""
+        if 0 < self._within_block_offset and self._within_block_offset == len(
+            self._buffer
+        ):
+            # Special case where we're right at the end of a (non empty) block.
+            # For non-maximal blocks could give two possible virtual offsets,
+            # but for a maximal block can't use 65536 as the within block
+            # offset. Therefore for consistency, use the next block and a
+            # within block offset of zero.
+            return (self._block_start_offset + self._block_raw_length) << 16
+        else:
+            # return make_virtual_offset(self._block_start_offset,
+            #                           self._within_block_offset)
+            # TODO - Include bounds checking as in make_virtual_offset?
+            return (self._block_start_offset << 16) | self._within_block_offset
+
+    def seek(self, virtual_offset):
+        """Seek to a 64-bit unsigned BGZF virtual offset."""
+        # Do this inline to avoid a function call,
+        # start_offset, within_block = split_virtual_offset(virtual_offset)
+        start_offset = virtual_offset >> 16
+        within_block = virtual_offset ^ (start_offset << 16)
+        if start_offset != self._block_start_offset:
+            # Don't need to load the block if already there
+            # (this avoids a function call since _load_block would do nothing)
+            self._load_block(start_offset)
+            assert start_offset == self._block_start_offset
+        if within_block > len(self._buffer):
+            if not (within_block == 0 and len(self._buffer) == 0):
+                raise ValueError(
+                    "Within offset %i but block size only %i"
+                    % (within_block, len(self._buffer))
+                )
+        self._within_block_offset = within_block
+        # assert virtual_offset == self.tell(), \
+        #    "Did seek to %i (%i, %i), but tell says %i (%i, %i)" \
+        #    % (virtual_offset, start_offset, within_block,
+        #       self.tell(), self._block_start_offset,
+        #       self._within_block_offset)
+        return virtual_offset
+
+    def read(self, size=-1):
+        """Read method for the BGZF module."""
+        if size < 0:
+            raise NotImplementedError("Don't be greedy, that could be massive!")
+
+        result = "" if self._text else b""
+        while size and self._buffer:
+            if self._within_block_offset + size <= len(self._buffer):
+                # This may leave us right at the end of a block
+                # (lazy loading, don't load the next block unless we have too)
+                data = self._buffer[
+                    self._within_block_offset : self._within_block_offset + size
+                ]
+                self._within_block_offset += size
+                assert data  # Must be at least 1 byte
+                result += data
+                break
+            else:
+                data = self._buffer[self._within_block_offset :]
+                size -= len(data)
+                self._load_block()  # will reset offsets
+                # TODO - Test with corner case of an empty block followed by
+                # a non-empty block
+                result += data
+
+        return result
+
+    def readline(self):
+        """Read a single line for the BGZF file."""
+        result = "" if self._text else b""
+        while self._buffer:
+            i = self._buffer.find(self._newline, self._within_block_offset)
+            # Three cases to consider,
+            if i == -1:
+                # No newline, need to read in more data
+                data = self._buffer[self._within_block_offset :]
+                self._load_block()  # will reset offsets
+                result += data
+            elif i + 1 == len(self._buffer):
+                # Found new line, but right at end of block (SPECIAL)
+                data = self._buffer[self._within_block_offset :]
+                # Must now load the next block to ensure tell() works
+                self._load_block()  # will reset offsets
+                assert data
+                result += data
+                break
+            else:
+                # Found new line, not at end of block (easy case, no IO)
+                data = self._buffer[self._within_block_offset : i + 1]
+                self._within_block_offset = i + 1
+                # assert data.endswith(self._newline)
+                result += data
+                break
+
+        return result
+
+    def __next__(self):
+        """Return the next line."""
+        line = self.readline()
+        if not line:
+            raise StopIteration
+        return line
+
+    def __iter__(self):
+        """Iterate over the lines in the BGZF file."""
+        return self
+
+    def close(self):
+        """Close BGZF file."""
+        self._handle.close()
+        self._buffer = None
+        self._block_start_offset = None
+        self._buffers = None
+
+    def seekable(self):
+        """Return True indicating the BGZF supports random access."""
+        return True
+
+    def isatty(self):
+        """Return True if connected to a TTY device."""
+        return False
+
+    def fileno(self):
+        """Return integer file descriptor."""
+        return self._handle.fileno()
+
+    def __enter__(self):
+        """Open a file operable with WITH statement."""
+        return self
+
+    def __exit__(self, type, value, traceback):
+        """Close a file with WITH statement."""
+        self.close()
+
+
+class BgzfWriter:
+    """Define a BGZFWriter object."""
+
+    def __init__(self, filename=None, mode="w", fileobj=None, compresslevel=6):
+        """Initilize the class."""
+        if fileobj:
+            assert filename is None
+            handle = fileobj
+        else:
+            if "w" not in mode.lower() and "a" not in mode.lower():
+                raise ValueError("Must use write or append mode, not %r" % mode)
+            if "a" in mode.lower():
+                raise NotImplementedError("Append mode is not implemented yet")
+                # handle = _open(filename, "ab")
+            else:
+                handle = _open(filename, "wb")
+        self._text = "b" not in mode.lower()
+        self._handle = handle
+        self._buffer = b""
+        self.compresslevel = compresslevel
+
+    def _write_block(self, block):
+        """Write provided data to file as a single BGZF compressed block (PRIVATE)."""
+        # print("Saving %i bytes" % len(block))
+        start_offset = self._handle.tell()
+        assert len(block) <= 65536
+        # Giving a negative window bits means no gzip/zlib headers,
+        # -15 used in samtools
+        c = zlib.compressobj(
+            self.compresslevel, zlib.DEFLATED, -15, zlib.DEF_MEM_LEVEL, 0
+        )
+        compressed = c.compress(block) + c.flush()
+        del c
+        if len(compressed) > 65536:
+            raise RuntimeError(
+                "TODO - Didn't compress enough, try less data in this block"
+            )
+        crc = zlib.crc32(block)
+        # Should cope with a mix of Python platforms...
+        if crc < 0:
+            crc = struct.pack("<i", crc)
+        else:
+            crc = struct.pack("<I", crc)
+        bsize = struct.pack("<H", len(compressed) + 25)  # includes -1
+        crc = struct.pack("<I", zlib.crc32(block) & 0xFFFFFFFF)
+        uncompressed_length = struct.pack("<I", len(block))
+        # Fixed 16 bytes,
+        # gzip magic bytes (4) mod time (4),
+        # gzip flag (1), os (1), extra length which is six (2),
+        # sub field which is BC (2), sub field length of two (2),
+        # Variable data,
+        # 2 bytes: block length as BC sub field (2)
+        # X bytes: the data
+        # 8 bytes: crc (4), uncompressed data length (4)
+        data = _bgzf_header + bsize + compressed + crc + uncompressed_length
+        self._handle.write(data)
+
+    def write(self, data):
+        """Write method for the class."""
+        # TODO - Check bytes vs unicode
+        if isinstance(data, str):
+            # When reading we can't cope with multi-byte characters
+            # being split between BGZF blocks, so we restrict to a
+            # single byte encoding - like ASCII or latin-1.
+            # On output we could probably allow any encoding, as we
+            # don't care about splitting unicode characters between blocks
+            data = data.encode("latin-1")
+        # block_size = 2**16 = 65536
+        data_len = len(data)
+        if len(self._buffer) + data_len < 65536:
+            # print("Cached %r" % data)
+            self._buffer += data
+        else:
+            # print("Got %r, writing out some data..." % data)
+            self._buffer += data
+            while len(self._buffer) >= 65536:
+                self._write_block(self._buffer[:65536])
+                self._buffer = self._buffer[65536:]
+
+    def flush(self):
+        """Flush data explicitally."""
+        while len(self._buffer) >= 65536:
+            self._write_block(self._buffer[:65535])
+            self._buffer = self._buffer[65535:]
+        self._write_block(self._buffer)
+        self._buffer = b""
+        self._handle.flush()
+
+    def close(self):
+        """Flush data, write 28 bytes BGZF EOF marker, and close BGZF file.
+
+        samtools will look for a magic EOF marker, just a 28 byte empty BGZF
+        block, and if it is missing warns the BAM file may be truncated. In
+        addition to samtools writing this block, so too does bgzip - so this
+        implementation does too.
+        """
+        if self._buffer:
+            self.flush()
+        self._handle.write(_bgzf_eof)
+        self._handle.flush()
+        self._handle.close()
+
+    def tell(self):
+        """Return a BGZF 64-bit virtual offset."""
+        return make_virtual_offset(self._handle.tell(), len(self._buffer))
+
+    def seekable(self):
+        """Return True indicating the BGZF supports random access."""
+        # Not seekable, but we do support tell...
+        return False
+
+    def isatty(self):
+        """Return True if connected to a TTY device."""
+        return False
+
+    def fileno(self):
+        """Return integer file descriptor."""
+        return self._handle.fileno()
+
+    def __enter__(self):
+        """Open a file operable with WITH statement."""
+        return self
+
+    def __exit__(self, type, value, traceback):
+        """Close a file with WITH statement."""
+        self.close()
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        print("Call this with no arguments and pipe uncompressed data in on stdin")
+        print("and it will produce BGZF compressed data on stdout. e.g.")
+        print("")
+        print("./bgzf.py < example.fastq > example.fastq.bgz")
+        print("")
+        print("The extension convention of *.bgz is to distinugish these from *.gz")
+        print("used for standard gzipped files without the block structure of BGZF.")
+        print("You can use the standard gunzip command to decompress BGZF files,")
+        print("if it complains about the extension try something like this:")
+        print("")
+        print("cat example.fastq.bgz | gunzip > example.fastq")
+        print("")
+        print("See also the tool bgzip that comes with samtools")
+        sys.exit(0)
+
+    # Ensure we have binary mode handles
+    # (leave stderr as default text mode)
+    stdin = sys.stdin.buffer
+    stdout = sys.stdout.buffer
+
+    sys.stderr.write("Producing BGZF output from stdin...\n")
+    w = BgzfWriter(fileobj=stdout)
+    while True:
+        data = stdin.read(65536)
+        w.write(data)
+        if not data:
+            break
+    # Doing close will write an empty BGZF block as EOF marker:
+    w.close()
+    sys.stderr.write("BGZF data produced\n")
diff --git a/code/lib/Bio/codonalign/__init__.py b/code/lib/Bio/codonalign/__init__.py
new file mode 100644
index 0000000..7ed1700
--- /dev/null
+++ b/code/lib/Bio/codonalign/__init__.py
@@ -0,0 +1,810 @@
+# Copyright 2013 by Zheng Ruan (zruan1991@gmail.com).
+# All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Code for dealing with Codon Alignments."""
+
+import copy
+from collections.abc import Mapping, Iterable
+
+from Bio import BiopythonWarning
+from Bio import BiopythonExperimentalWarning
+
+from Bio.SeqRecord import SeqRecord
+from Bio.Data import CodonTable
+
+from Bio.codonalign.codonseq import CodonSeq
+from Bio.codonalign.codonalignment import CodonAlignment, mktest
+
+import warnings
+
+warnings.warn(
+    "Bio.codonalign is an experimental module which may undergo "
+    "significant changes prior to its future official release.",
+    BiopythonExperimentalWarning,
+)
+
+
+def build(
+    pro_align,
+    nucl_seqs,
+    corr_dict=None,
+    gap_char="-",
+    unknown="X",
+    codon_table=None,
+    complete_protein=False,
+    anchor_len=10,
+    max_score=10,
+):
+    """Build a codon alignment from protein alignment and corresponding nucleotides.
+
+    Arguments:
+     - pro_align  - a protein MultipleSeqAlignment object
+     - nucl_seqs - an object returned by SeqIO.parse or SeqIO.index
+       or a collection of SeqRecord.
+     - corr_dict  - a dict that maps protein id to nucleotide id
+     - complete_protein - whether the sequence begins with a start
+       codon
+
+    Return a CodonAlignment object.
+
+    The example below answers this Biostars question: https://www.biostars.org/p/89741/
+
+    >>> from Bio.Seq import Seq
+    >>> from Bio.SeqRecord import SeqRecord
+    >>> from Bio.Align import MultipleSeqAlignment
+    >>> from Bio.codonalign import build
+    >>> seq1 = SeqRecord(Seq('ATGTCTCGT'), id='pro1')
+    >>> seq2 = SeqRecord(Seq('ATGCGT'), id='pro2')
+    >>> pro1 = SeqRecord(Seq('MSR'), id='pro1')
+    >>> pro2 = SeqRecord(Seq('M-R'), id='pro2')
+    >>> aln = MultipleSeqAlignment([pro1, pro2])
+    >>> codon_aln = build(aln, [seq1, seq2])
+    >>> print(codon_aln)
+    CodonAlignment with 2 rows and 9 columns (3 codons)
+    ATGTCTCGT pro1
+    ATG---CGT pro2
+
+    """
+    # TODO
+    # add an option to allow the user to specify the returned object?
+
+    from Bio.Align import MultipleSeqAlignment
+
+    # check the type of object of pro_align
+    if not isinstance(pro_align, MultipleSeqAlignment):
+        raise TypeError("the first argument should be a MultipleSeqAlignment object")
+    # check whether the number of seqs in pro_align and nucl_seqs is
+    # the same
+    pro_num = len(pro_align)
+    if corr_dict is None:
+        try:
+            nucl_num = len(nucl_seqs)
+        except TypeError:
+            # nucl_seqs will be an iterator if returned by SeqIO.parse()
+            nucl_seqs = tuple(nucl_seqs)
+            nucl_num = len(nucl_seqs)
+        if pro_num > nucl_num:
+            raise ValueError(
+                f"Higher Number of SeqRecords in Protein Alignment ({pro_num}) "
+                f"than the Number of Nucleotide SeqRecords ({nucl_num}) are found!"
+            )
+
+        # Determine the protein sequences and nucl sequences
+        # correspondence. If nucl_seqs is a list, tuple or read by
+        # SeqIO.parse(), we assume the order of sequences in pro_align
+        # and nucl_seqs are the same. If nucl_seqs is a dict or read by
+        # SeqIO.index(), we match seqs in pro_align and those in
+        # nucl_seq by their id.
+        if isinstance(nucl_seqs, Mapping):
+            corr_method = 1
+        elif isinstance(nucl_seqs, Iterable):
+            corr_method = 0
+        else:
+            raise TypeError(
+                "Nucl Sequences Error, Unknown type to assign correspondence method"
+            )
+    else:
+        if not isinstance(corr_dict, dict):
+            raise TypeError(
+                "corr_dict should be a dict that corresponds "
+                "protein id to nucleotide id!"
+            )
+        if len(corr_dict) >= pro_num:
+            if isinstance(nucl_seqs, Mapping):
+                pass
+            else:
+                d = {}
+                for record in nucl_seqs:
+                    key = record.id
+                    if key in d:
+                        raise ValueError("Duplicate key '%s'" % key)
+                    d[key] = record
+                nucl_seqs = d
+            corr_method = 2
+        else:
+            raise RuntimeError(
+                f"Number of items in corr_dict ({len(corr_dict)}) "
+                f"is less than number of protein records ({pro_num})"
+            )
+
+    # set up pro-nucl correspondence based on corr_method
+    # corr_method = 0, consecutive pairing
+    if corr_method == 0:
+        pro_nucl_pair = zip(pro_align, nucl_seqs)
+    # corr_method = 1, keyword pairing
+    elif corr_method == 1:
+        nucl_id = set(nucl_seqs.keys())
+        pro_id = {i.id for i in pro_align}
+        # check if there is pro_id that does not have a nucleotide match
+        if pro_id - nucl_id:
+            diff = pro_id - nucl_id
+            raise ValueError(
+                f"Protein Record {', '.join(diff)} cannot find a "
+                "nucleotide sequence match, please check the id"
+            )
+        else:
+            pro_nucl_pair = []
+            for pro_rec in pro_align:
+                pro_nucl_pair.append((pro_rec, nucl_seqs[pro_rec.id]))
+    # corr_method = 2, dict pairing
+    elif corr_method == 2:
+        pro_nucl_pair = []
+        for pro_rec in pro_align:
+            try:
+                nucl_id = corr_dict[pro_rec.id]
+            except KeyError:
+                print("Protein record (%s) is not in corr_dict!" % pro_rec.id)
+                exit(1)
+            pro_nucl_pair.append((pro_rec, nucl_seqs[nucl_id]))
+
+    if codon_table is None:
+        codon_table = CodonTable.generic_by_id[1]
+
+    codon_aln = []
+    shift = False
+    for pair in pro_nucl_pair:
+        # Beware that the following span corresponds to an ungapped
+        # nucleotide sequence.
+        corr_span = _check_corr(
+            pair[0],
+            pair[1],
+            gap_char=gap_char,
+            codon_table=codon_table,
+            complete_protein=complete_protein,
+            anchor_len=anchor_len,
+        )
+        if not corr_span:
+            raise ValueError(
+                f"Protein Record {pair[0].id} and "
+                f"Nucleotide Record {pair[1].id} do not match!"
+            )
+        else:
+            codon_rec = _get_codon_rec(
+                pair[0],
+                pair[1],
+                corr_span,
+                gap_char=gap_char,
+                complete_protein=complete_protein,
+                codon_table=codon_table,
+                max_score=max_score,
+            )
+            codon_aln.append(codon_rec)
+            if corr_span[1] == 2:
+                shift = True
+    if shift:
+        return CodonAlignment(_align_shift_recs(codon_aln))
+    else:
+        return CodonAlignment(codon_aln)
+
+
+def _codons2re(codons):
+    """Generate regular expression based on a given list of codons (PRIVATE)."""
+    reg = ""
+    for i in zip(*codons):
+        if len(set(i)) == 1:
+            reg += "".join(set(i))
+        else:
+            reg += "[" + "".join(set(i)) + "]"
+    return reg
+
+
+def _get_aa_regex(codon_table, stop="*", unknown="X"):
+    """Set up the regular expression of a given CodonTable (PRIVATE).
+
+    >>> from Bio.Data.CodonTable import generic_by_id
+    >>> p = generic_by_id[1]
+    >>> t = _get_aa_regex(p)
+    >>> print(t['A'][0])
+    G
+    >>> print(t['A'][1])
+    C
+    >>> print(sorted(list(t['A'][2:])))
+    ['A', 'C', 'G', 'T', 'U', '[', ']']
+    >>> print(sorted(list(t['L'][:5])))
+    ['C', 'T', 'U', '[', ']']
+    >>> print(sorted(list(t['L'][5:9])))
+    ['T', 'U', '[', ']']
+    >>> print(sorted(list(t['L'][9:])))
+    ['A', 'C', 'G', 'T', 'U', '[', ']']
+
+    """
+    from Bio.Data.CodonTable import CodonTable
+
+    if not isinstance(codon_table, CodonTable):
+        raise TypeError("Input table is not a instance of Bio.Data.CodonTable object")
+    aa2codon = {}
+    for codon, aa in codon_table.forward_table.items():
+        aa2codon.setdefault(aa, []).append(codon)
+    for aa, codons in aa2codon.items():
+        aa2codon[aa] = _codons2re(codons)
+    aa2codon[stop] = _codons2re(codon_table.stop_codons)
+    aa2codon[unknown] = "..."
+    return aa2codon
+
+
+def _check_corr(
+    pro, nucl, gap_char, codon_table, complete_protein=False, anchor_len=10,
+):
+    """Check if the nucleotide can be translated into the protein (PRIVATE).
+
+    Expects two SeqRecord objects.
+    """
+    import re
+
+    if not isinstance(pro, SeqRecord) or not isinstance(nucl, SeqRecord):
+        raise TypeError(
+            "_check_corr accepts two SeqRecord object. Please check your input."
+        )
+
+    aa2re = _get_aa_regex(codon_table)
+    pro_re = ""
+    for aa in pro.seq:
+        if aa != gap_char:
+            pro_re += aa2re[aa]
+
+    nucl_seq = str(nucl.seq.upper().replace(gap_char, ""))
+    match = re.search(pro_re, nucl_seq)
+    if match:
+        # mode = 0, direct match
+        return (match.span(), 0)
+    else:
+        # Might caused by mismatches or frameshift, using anchors to
+        # have a try
+        # anchor_len = 10 # adjust this value to test performance
+        pro_seq = str(pro.seq).replace(gap_char, "")
+        anchors = [
+            pro_seq[i : (i + anchor_len)] for i in range(0, len(pro_seq), anchor_len)
+        ]
+        # if the last anchor is less than the specified anchor
+        # size, we combine the penultimate and the last anchor
+        # together as the last one.
+        # TODO: modify this to deal with short sequence with only
+        # one anchor.
+        if len(anchors[-1]) < anchor_len:
+            anchors[-1] = anchors[-2] + anchors[-1]
+
+        pro_re = []
+        anchor_distance = 0
+        anchor_pos = []
+        for i, anchor in enumerate(anchors):
+            this_anchor_len = len(anchor)
+            qcodon = ""
+            fncodon = ""
+            # dirty code to deal with the last anchor
+            # as the last anchor is combined in the steps
+            # above, we need to get the true last anchor to
+            # pro_re
+            if this_anchor_len == anchor_len:
+                for aa in anchor:
+                    if complete_protein and i == 0:
+                        qcodon += _codons2re(codon_table.start_codons)
+                        fncodon += aa2re["X"]
+                        continue
+                    qcodon += aa2re[aa]
+                    fncodon += aa2re["X"]
+                match = re.search(qcodon, nucl_seq)
+            elif this_anchor_len > anchor_len:
+                last_qcodon = ""
+                last_fcodon = ""
+                for j in range(anchor_len, len(anchor)):
+                    last_qcodon += aa2re[anchor[j]]
+                    last_fcodon += aa2re["X"]
+                match = re.search(last_qcodon, nucl_seq)
+            # build full_pro_re from anchors
+            if match:
+                anchor_pos.append((match.start(), match.end(), i))
+                if this_anchor_len == anchor_len:
+                    pro_re.append(qcodon)
+                else:
+                    pro_re.append(last_qcodon)
+            else:
+                if this_anchor_len == anchor_len:
+                    pro_re.append(fncodon)
+                else:
+                    pro_re.append(last_fcodon)
+        full_pro_re = "".join(pro_re)
+        match = re.search(full_pro_re, nucl_seq)
+        if match:
+            # mode = 1, mismatch
+            return (match.span(), 1)
+        else:
+            # check frames of anchors
+            # ten frameshift events are allowed in a sequence
+            first_anchor = True
+            shift_id_pos = 0
+            # check the first anchor
+            if first_anchor and anchor_pos[0][2] != 0:
+                shift_val_lst = [1, 2, 3 * anchor_len - 2, 3 * anchor_len - 1, 0]
+                sh_anc = anchors[0]
+                for shift_val in shift_val_lst:
+                    if shift_val == 0:
+                        qcodon = None
+                        break
+                    if shift_val in (1, 2):
+                        sh_nuc_len = anchor_len * 3 + shift_val
+                    elif shift_val in (3 * anchor_len - 2, 3 * anchor_len - 1):
+                        sh_nuc_len = anchor_len * 3 - (3 * anchor_len - shift_val)
+                    if anchor_pos[0][0] >= sh_nuc_len:
+                        sh_nuc = nucl_seq[
+                            anchor_pos[0][0] - sh_nuc_len : anchor_pos[0][0]
+                        ]
+                    else:
+                        # this is unlikely to produce the correct output
+                        sh_nuc = nucl_seq[: anchor_pos[0][0]]
+                    qcodon, shift_id_pos = _get_shift_anchor_re(
+                        sh_anc, sh_nuc, shift_val, aa2re, anchor_len, shift_id_pos
+                    )
+                    if qcodon is not None and qcodon != -1:
+                        # pro_re[0] should be '.'*anchor_len, therefore I
+                        # replace it.
+                        pro_re[0] = qcodon
+                        break
+                if qcodon == -1:
+                    warnings.warn(
+                        f"first frameshift detection failed for {nucl.id}",
+                        BiopythonWarning,
+                    )
+            # check anchors in the middle
+            for i in range(len(anchor_pos) - 1):
+                shift_val = (anchor_pos[i + 1][0] - anchor_pos[i][0]) % (3 * anchor_len)
+                sh_anc = "".join(anchors[anchor_pos[i][2] : anchor_pos[i + 1][2]])
+                sh_nuc = nucl_seq[anchor_pos[i][0] : anchor_pos[i + 1][0]]
+                qcodon = None
+                if shift_val != 0:
+                    qcodon, shift_id_pos = _get_shift_anchor_re(
+                        sh_anc, sh_nuc, shift_val, aa2re, anchor_len, shift_id_pos
+                    )
+                if qcodon is not None and qcodon != -1:
+                    pro_re[anchor_pos[i][2] : anchor_pos[i + 1][2]] = [qcodon]
+                    qcodon = None
+                elif qcodon == -1:
+                    warnings.warn(
+                        f"middle frameshift detection failed for {nucl.id}",
+                        BiopythonWarning,
+                    )
+            # check the last anchor
+            if anchor_pos[-1][2] + 1 == len(anchors) - 1:
+                sh_anc = anchors[-1]
+                this_anchor_len = len(sh_anc)
+                shift_val_lst = [
+                    1,
+                    2,
+                    3 * this_anchor_len - 2,
+                    3 * this_anchor_len - 1,
+                    0,
+                ]
+                for shift_val in shift_val_lst:
+                    if shift_val == 0:
+                        qcodon = None
+                        break
+                    if shift_val in (1, 2):
+                        sh_nuc_len = this_anchor_len * 3 + shift_val
+                    elif shift_val in (
+                        3 * this_anchor_len - 2,
+                        3 * this_anchor_len - 1,
+                    ):
+                        sh_nuc_len = this_anchor_len * 3 - (
+                            3 * this_anchor_len - shift_val
+                        )
+                    if len(nucl_seq) - anchor_pos[-1][0] >= sh_nuc_len:
+                        sh_nuc = nucl_seq[
+                            anchor_pos[-1][0] : anchor_pos[-1][0] + sh_nuc_len
+                        ]
+                    else:
+                        # this is unlikely to produce the correct output
+                        sh_nuc = nucl_seq[anchor_pos[-1][0] :]
+                    qcodon, shift_id_pos = _get_shift_anchor_re(
+                        sh_anc, sh_nuc, shift_val, aa2re, this_anchor_len, shift_id_pos
+                    )
+                    if qcodon is not None and qcodon != -1:
+                        pro_re.pop()
+                        pro_re[-1] = qcodon
+                        break
+                if qcodon == -1:
+                    warnings.warn(
+                        f"last frameshift detection failed for {nucl.id}",
+                        BiopythonWarning,
+                    )
+            # try global match
+            full_pro_re = "".join(pro_re)
+            match = re.search(full_pro_re, nucl_seq)
+            if match:
+                return (match.span(), 2, match)
+            else:
+                raise RuntimeError(
+                    f"Protein SeqRecord ({pro.id}) and "
+                    f"Nucleotide SeqRecord ({nucl.id}) do not match!"
+                )
+
+
+def _get_shift_anchor_re(sh_anc, sh_nuc, shift_val, aa2re, anchor_len, shift_id_pos):
+    """Find a regular expression matching a potentially shifted anchor (PRIVATE).
+
+    Arguments:
+     - sh_anc    - shifted anchor sequence
+     - sh_nuc    - potentially corresponding nucleotide sequence
+       of sh_anc
+     - shift_val - 1 or 2 indicates forward frame shift, whereas
+       3*anchor_len-1 or 3*anchor_len-2 indicates
+       backward shift
+     - aa2re     - aa to codon re dict
+     - anchor_len - length of the anchor
+     - shift_id_pos - specify current shift name we are at
+
+    """
+    import re
+
+    shift_id = [chr(i) for i in range(97, 107)]
+    if 0 < shift_val < 3 * anchor_len - 2:
+        # if shift_val in (1, 2):
+        for j in range(len(sh_anc)):
+            qcodon = "^"
+            for k, aa in enumerate(sh_anc):
+                if k == j:
+                    qcodon += aa2re[aa] + "(?P<" + shift_id[shift_id_pos] + ">..*)"
+                else:
+                    qcodon += aa2re[aa]
+            qcodon += "$"
+            match = re.search(qcodon, sh_nuc)
+            if match:
+                qcodon = qcodon.replace("^", "").replace("$", "")
+                shift_id_pos += 1
+                return qcodon, shift_id_pos
+        if not match:
+            # failed to find a match (frameshift)
+            return -1, shift_id_pos
+    elif shift_val in (3 * anchor_len - 1, 3 * anchor_len - 2):
+        shift_val = 3 * anchor_len - shift_val
+        # obtain shifted anchor and corresponding nucl
+        # first check if the shifted pos is just at the end of the
+        # previous anchor.
+        for j in range(1, len(sh_anc)):
+            qcodon = "^"
+            for k, aa in enumerate(sh_anc):
+                if k == j - 1:
+                    # will be considered in the next step
+                    pass
+                elif k == j:
+                    qcodon += _merge_aa2re(
+                        sh_anc[j - 1],
+                        sh_anc[j],
+                        shift_val,
+                        aa2re,
+                        shift_id[shift_id_pos].upper(),
+                    )
+                else:
+                    qcodon += aa2re[aa]
+            qcodon += "$"
+            match = re.search(qcodon, sh_nuc)
+            if match:
+                qcodon = qcodon.replace("^", "").replace("$", "")
+                shift_id_pos += 1
+                return qcodon, shift_id_pos
+        if not match:
+            # failed to find a match (frameshift)
+            return -1, shift_id_pos
+
+
+def _merge_aa2re(aa1, aa2, shift_val, aa2re, reid):
+    """Merge two amino acids based on detected frame shift value (PRIVATE)."""
+
+    def get_aa_from_codonre(re_aa):
+        aas = []
+        m = 0
+        for i in re_aa:
+            if i == "[":
+                m = -1
+                aas.append("")
+            elif i == "]":
+                m = 0
+                continue
+            elif m == -1:
+                aas[-1] = aas[-1] + i
+            elif m == 0:
+                aas.append(i)
+        return aas
+
+    scodon = list(map(get_aa_from_codonre, (aa2re[aa1], aa2re[aa2])))
+    if shift_val == 1:
+        intersect = "".join(set(scodon[0][2]) & set(scodon[1][0]))
+        scodonre = "(?P<" + reid + ">"
+        scodonre += (
+            "["
+            + scodon[0][0]
+            + "]"
+            + "["
+            + scodon[0][1]
+            + "]"
+            + "["
+            + intersect
+            + "]"
+            + "["
+            + scodon[1][1]
+            + "]"
+            + "["
+            + scodon[1][2]
+            + "]"
+        )
+    elif shift_val == 2:
+        intersect1 = "".join(set(scodon[0][1]) & set(scodon[1][0]))
+        intersect2 = "".join(set(scodon[0][2]) & set(scodon[1][1]))
+        scodonre = "(?P<" + reid + ">"
+        scodonre += (
+            "["
+            + scodon[0][0]
+            + "]"
+            + "["
+            + intersect1
+            + "]"
+            + "["
+            + intersect2
+            + "]"
+            + "["
+            + scodon[1][2]
+            + "]"
+        )
+    scodonre += ")"
+    return scodonre
+
+
+def _get_codon_rec(
+    pro, nucl, span_mode, gap_char, codon_table, complete_protein=False, max_score=10,
+):
+    """Generate codon alignment based on regular re match (PRIVATE).
+
+    span_mode is a tuple returned by _check_corr. The first element
+    is the span of a re search, and the second element is the mode
+    for the match.
+
+    mode
+     - 0: direct match
+     - 1: mismatch (no indels)
+     - 2: frameshift
+
+    """
+    import re
+    from Bio.Seq import Seq
+
+    nucl_seq = nucl.seq.replace(gap_char, "")
+    span = span_mode[0]
+    mode = span_mode[1]
+    aa2re = _get_aa_regex(codon_table)
+    if mode in (0, 1):
+        if len(pro.seq.replace(gap_char, "")) * 3 != (span[1] - span[0]):
+            raise ValueError(
+                f"Protein Record {pro.id} and "
+                f"Nucleotide Record {nucl.id} do not match!"
+            )
+        aa_num = 0
+        codon_seq = CodonSeq()
+        for aa in pro.seq:
+            if aa == "-":
+                codon_seq += "---"
+            elif complete_protein and aa_num == 0:
+                this_codon = nucl_seq[span[0] : span[0] + 3]
+                if not re.search(
+                    _codons2re(codon_table.start_codons), str(this_codon.upper())
+                ):
+                    max_score -= 1
+                    warnings.warn(
+                        f"start codon of {pro.id} ({aa} {aa_num}) does not "
+                        f"correspond to {nucl.id} ({this_codon})",
+                        BiopythonWarning,
+                    )
+                if max_score == 0:
+                    raise RuntimeError(
+                        f"max_score reached for {nucl.id}! Please raise up "
+                        "the tolerance to get an alignment in anyway"
+                    )
+                codon_seq += this_codon
+                aa_num += 1
+            else:
+                this_codon = nucl_seq[span[0] + 3 * aa_num : span[0] + 3 * (aa_num + 1)]
+                if this_codon.upper().translate(table=codon_table) != aa:
+                    max_score -= 1
+                    warnings.warn(
+                        "%s(%s %d) does not correspond to %s(%s)"
+                        % (pro.id, aa, aa_num, nucl.id, this_codon),
+                        BiopythonWarning,
+                    )
+                if max_score == 0:
+                    raise RuntimeError(
+                        f"max_score reached for {nucl.id}! Please raise up "
+                        "the tolerance to get an alignment in anyway"
+                    )
+                codon_seq += this_codon
+                aa_num += 1
+        return SeqRecord(codon_seq, id=nucl.id)
+    elif mode == 2:
+        from collections import deque
+
+        shift_pos = deque([])
+        shift_start = []
+        match = span_mode[2]
+        m_groupdict = list(match.groupdict().keys())
+        # backward frameshift
+        for i in m_groupdict:
+            shift_pos.append(match.span(i))
+            shift_start.append(match.start(i))
+        rf_table = []
+        i = match.start()
+        while True:
+            rf_table.append(i)
+            i += 3
+            if i in shift_start and m_groupdict[shift_start.index(i)].isupper():
+                shift_index = shift_start.index(i)
+                shift_val = 6 - (shift_pos[shift_index][1] - shift_pos[shift_index][0])
+                rf_table.append(i)
+                rf_table.append(i + 3 - shift_val)
+                i = shift_pos[shift_index][1]
+            elif i in shift_start and m_groupdict[shift_start.index(i)].islower():
+                i = shift_pos[shift_start.index(i)][1]
+            if i >= match.end():
+                break
+        codon_seq = CodonSeq()
+        aa_num = 0
+        for aa in pro.seq:
+            if aa == "-":
+                codon_seq += "---"
+            elif complete_protein and aa_num == 0:
+                this_codon = nucl_seq[rf_table[0] : rf_table[0] + 3]
+                if not re.search(
+                    _codons2re(codon_table.start_codons), str(this_codon.upper())
+                ):
+                    max_score -= 1
+                    warnings.warn(
+                        f"start codon of {pro.id}({aa} {aa_num}) does not "
+                        f"correspond to {nucl.id}({this_codon})",
+                        BiopythonWarning,
+                    )
+                    codon_seq += this_codon
+                    aa_num += 1
+            else:
+                if (
+                    aa_num < len(pro.seq.replace("-", "")) - 1
+                    and rf_table[aa_num + 1] - rf_table[aa_num] - 3 < 0
+                ):
+                    max_score -= 1
+                    start = rf_table[aa_num]
+                    end = start + (3 - shift_val)
+                    ngap = shift_val
+                    this_codon = nucl_seq[start:end] + "-" * ngap
+                elif rf_table[aa_num] - rf_table[aa_num - 1] - 3 > 0:
+                    max_score -= 1
+                    start = rf_table[aa_num - 1] + 3
+                    end = rf_table[aa_num]
+                    ngap = 3 - (rf_table[aa_num] - rf_table[aa_num - 1] - 3)
+                    this_codon = (
+                        nucl_seq[start:end]
+                        + "-" * ngap
+                        + nucl_seq[rf_table[aa_num] : rf_table[aa_num] + 3]
+                    )
+                else:
+                    start = rf_table[aa_num]
+                    end = start + 3
+                    this_codon = nucl_seq[start:end]
+                    if this_codon.upper().translate(table=codon_table) != aa:
+                        max_score -= 1
+                        warnings.warn(
+                            f"Codon of {pro.id}({aa} {aa_num}) does not "
+                            f"correspond to {nucl.id}({this_codon})",
+                            BiopythonWarning,
+                        )
+                if max_score == 0:
+                    raise RuntimeError(
+                        f"max_score reached for {nucl.id}! Please raise up "
+                        "the tolerance to get an alignment in anyway"
+                    )
+                codon_seq += this_codon
+                aa_num += 1
+        codon_seq.rf_table = rf_table
+        return SeqRecord(codon_seq, id=nucl.id)
+
+
+def _align_shift_recs(recs):
+    """Build alignment according to the frameshift detected by _check_corr (PRIVATE).
+
+    Argument:
+     - recs - a list of SeqRecords containing a CodonSeq dictated
+       by a rf_table (with frameshift in some of them).
+
+    """
+
+    def find_next_int(k, lst):
+        idx = lst.index(k)
+        p = 0
+        while True:
+            if isinstance(lst[idx + p], int):
+                return lst[idx + p], p
+            p += 1
+
+    full_rf_table_lst = [rec.seq.get_full_rf_table() for rec in recs]
+    rf_num = [0] * len(recs)
+    for k, rec in enumerate(recs):
+        for i in rec.seq.get_full_rf_table():
+            if isinstance(i, int):
+                rf_num[k] += 1
+            # isinstance(i, float) should be True
+            elif rec.seq[int(i) : int(i) + 3] == "---":
+                rf_num[k] += 1
+    if len(set(rf_num)) != 1:
+        raise RuntimeError("Number of alignable codons unequal in given records")
+    i = 0
+    rec_num = len(recs)
+    while True:
+        add_lst = []
+        try:
+            col_rf_lst = [k[i] for k in full_rf_table_lst]
+        except IndexError:
+            # we probably reached the last codon
+            break
+        for j, k in enumerate(col_rf_lst):
+            add_lst.append((j, int(k)))
+            if isinstance(k, float) and recs[j].seq[int(k) : int(k) + 3] != "---":
+                m, p = find_next_int(k, full_rf_table_lst[j])
+                if (m - k) % 3 != 0:
+                    gap_num = 3 - (m - k) % 3
+                else:
+                    gap_num = 0
+                if gap_num != 0:
+                    gaps = "-" * int(gap_num)
+                    seq = CodonSeq(rf_table=recs[j].seq.rf_table)
+                    seq += recs[j].seq[: int(k)] + gaps + recs[j].seq[int(k) :]
+                    full_rf_table = full_rf_table_lst[j]
+                    bp = full_rf_table.index(k)
+                    full_rf_table = full_rf_table[:bp] + [
+                        v + int(gap_num) for v in full_rf_table[bp + 1 :]
+                    ]
+                    full_rf_table_lst[j] = full_rf_table
+                    recs[j].seq = seq
+                add_lst.pop()
+                gap_num += m - k
+                i += p - 1
+        if len(add_lst) != rec_num:
+            for j, k in add_lst:
+                seq = CodonSeq(rf_table=recs[j].seq.rf_table)
+                gaps = "-" * int(gap_num)
+                seq += recs[j].seq[: int(k)] + gaps + recs[j].seq[int(k) :]
+                full_rf_table = full_rf_table_lst[j]
+                bp = full_rf_table.index(k)
+                inter_rf = []
+                for t in range(0, len(gaps), 3):
+                    inter_rf.append(k + t + 3.0)
+                full_rf_table = (
+                    full_rf_table[:bp]
+                    + inter_rf
+                    + [v + int(gap_num) for v in full_rf_table[bp:]]
+                )
+                full_rf_table_lst[j] = full_rf_table
+                recs[j].seq = seq
+        i += 1
+    return recs
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/codonalign/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/codonalign/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..5dcc070
Binary files /dev/null and b/code/lib/Bio/codonalign/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/codonalign/__pycache__/chisq.cpython-37.pyc b/code/lib/Bio/codonalign/__pycache__/chisq.cpython-37.pyc
new file mode 100644
index 0000000..b4a3a28
Binary files /dev/null and b/code/lib/Bio/codonalign/__pycache__/chisq.cpython-37.pyc differ
diff --git a/code/lib/Bio/codonalign/__pycache__/codonalignment.cpython-37.pyc b/code/lib/Bio/codonalign/__pycache__/codonalignment.cpython-37.pyc
new file mode 100644
index 0000000..d424822
Binary files /dev/null and b/code/lib/Bio/codonalign/__pycache__/codonalignment.cpython-37.pyc differ
diff --git a/code/lib/Bio/codonalign/__pycache__/codonseq.cpython-37.pyc b/code/lib/Bio/codonalign/__pycache__/codonseq.cpython-37.pyc
new file mode 100644
index 0000000..a7f1b5d
Binary files /dev/null and b/code/lib/Bio/codonalign/__pycache__/codonseq.cpython-37.pyc differ
diff --git a/code/lib/Bio/codonalign/chisq.py b/code/lib/Bio/codonalign/chisq.py
new file mode 100644
index 0000000..6b1ff5f
--- /dev/null
+++ b/code/lib/Bio/codonalign/chisq.py
@@ -0,0 +1,148 @@
+"""Python implementation of chisqprob, to avoid SciPy dependency.
+
+Adapted from SciPy: scipy/special/cephes/{chdtr,igam}.
+"""
+
+import math
+
+# Cephes Math Library Release 2.0:  April, 1987
+# Copyright 1985, 1987 by Stephen L. Moshier
+# Direct inquiries to 30 Frost Street, Cambridge, MA 02140
+MACHEP = 0.0000001  # the machine roundoff error / tolerance
+BIG = 4.503599627370496e15
+BIGINV = 2.22044604925031308085e-16
+
+
+def chisqprob(x, df):
+    """Probability value (1-tail) for the Chi^2 probability distribution.
+
+    Broadcasting rules apply.
+
+    Parameters
+    ----------
+    x : array_like or float > 0
+
+    df : array_like or float, probably int >= 1
+
+    Returns
+    -------
+    chisqprob : ndarray
+        The area from ``chisq`` to infinity under the Chi^2 probability
+        distribution with degrees of freedom ``df``.
+
+    """
+    if x <= 0:
+        return 1.0
+    if x == 0:
+        return 0.0
+    if df <= 0:
+        raise ValueError("Domain error.")
+    if x < 1.0 or x < df:
+        return 1.0 - _igam(0.5 * df, 0.5 * x)
+    return _igamc(0.5 * df, 0.5 * x)
+
+
+def _igamc(a, x):
+    """Complemented incomplete Gamma integral (PRIVATE).
+
+    Parameters
+    ----------
+    a: float
+    x: float
+
+    Returns
+    -------
+    float
+
+    Notes
+    -----
+    The function is defined by::
+
+        igamc(a,x)   =   1 - igam(a,x)
+
+                                inf.
+                                   -
+                          1       | |  -t  a-1
+                    =   -----     |   e   t   dt.
+                         -      | |
+                        | (a)    -
+                                    x
+
+    In this implementation both arguments must be positive.
+    The integral is evaluated by either a power series or
+    continued fraction expansion, depending on the relative
+    values of a and x.
+
+    """
+    # Compute  x**a * exp(-x) / Gamma(a)
+    ax = math.exp(a * math.log(x) - x - math.lgamma(a))
+
+    # Continued fraction
+    y = 1.0 - a
+    z = x + y + 1.0
+    c = 0.0
+    pkm2 = 1.0
+    qkm2 = x
+    pkm1 = x + 1.0
+    qkm1 = z * x
+    ans = pkm1 / qkm1
+    while True:
+        c += 1.0
+        y += 1.0
+        z += 2.0
+        yc = y * c
+        pk = pkm1 * z - pkm2 * yc
+        qk = qkm1 * z - qkm2 * yc
+        if qk != 0:
+            r = pk / qk
+            t = abs((ans - r) / r)
+            ans = r
+        else:
+            t = 1.0
+        pkm2 = pkm1
+        pkm1 = pk
+        qkm2 = qkm1
+        qkm1 = qk
+        if abs(pk) > BIG:
+            pkm2 *= BIGINV
+            pkm1 *= BIGINV
+            qkm2 *= BIGINV
+            qkm1 *= BIGINV
+        if t <= MACHEP:
+            return ans * ax
+
+
+def _igam(a, x):
+    """Left tail of incomplete Gamma function (PRIVATE).
+
+    Computes this formula::
+
+                 inf.      k
+          a  -x   -       x
+         x  e     >   ----------
+                  -     -
+                k=0   | (a+k+1)
+
+    """
+    # Compute  x**a * exp(-x) / Gamma(a)
+    ax = math.exp(a * math.log(x) - x - math.lgamma(a))
+
+    # Power series
+    r = a
+    c = 1.0
+    ans = 1.0
+
+    while True:
+        r += 1.0
+        c *= x / r
+        ans += c
+        if c / ans <= MACHEP:
+            return ans * ax / a
+
+
+# --- Speed ---
+
+# try:
+#    from scipy.stats import chisqprob
+# except ImportError:
+#    pass
diff --git a/code/lib/Bio/codonalign/codonalignment.py b/code/lib/Bio/codonalign/codonalignment.py
new file mode 100644
index 0000000..f5570b6
--- /dev/null
+++ b/code/lib/Bio/codonalign/codonalignment.py
@@ -0,0 +1,513 @@
+# Copyright 2013 by Zheng Ruan (zruan1991@gmail.com).
+# All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""Code for dealing with Codon Alignment.
+
+CodonAlignment class is inherited from MultipleSeqAlignment class. This is
+the core class to deal with codon alignment in biopython.
+"""
+
+import warnings
+
+from Bio.Align import MultipleSeqAlignment
+from Bio.SeqRecord import SeqRecord
+from Bio.Data import CodonTable
+from Bio import BiopythonWarning
+
+
+from Bio.codonalign.codonseq import _get_codon_list, CodonSeq, cal_dn_ds
+from Bio.codonalign.chisq import chisqprob
+
+
+class CodonAlignment(MultipleSeqAlignment):
+    """Codon Alignment class that inherits from MultipleSeqAlignment.
+
+    >>> from Bio.SeqRecord import SeqRecord
+    >>> a = SeqRecord(CodonSeq("AAAACGTCG"), id="Alpha")
+    >>> b = SeqRecord(CodonSeq("AAA---TCG"), id="Beta")
+    >>> c = SeqRecord(CodonSeq("AAAAGGTGG"), id="Gamma")
+    >>> print(CodonAlignment([a, b, c]))
+    CodonAlignment with 3 rows and 9 columns (3 codons)
+    AAAACGTCG Alpha
+    AAA---TCG Beta
+    AAAAGGTGG Gamma
+
+    """
+
+    def __init__(self, records="", name=None):
+        """Initialize the class."""
+        MultipleSeqAlignment.__init__(self, records)
+
+        # check the type of the alignment to be nucleotide
+        for rec in self:
+            if not isinstance(rec.seq, CodonSeq):
+                raise TypeError(
+                    "CodonSeq objects are expected in each SeqRecord in CodonAlignment"
+                )
+
+        if self.get_alignment_length() % 3 != 0:
+            raise ValueError(
+                "Alignment length is not a multiple of "
+                "three (i.e. a whole number of codons)"
+            )
+
+    def __str__(self):
+        """Return a multi-line string summary of the alignment.
+
+        This output is indicated to be readable, but large alignment
+        is shown truncated. A maximum of 20 rows (sequences) and
+        60 columns (20 codons) are shown, with the record identifiers.
+        This should fit nicely on a single screen. e.g.
+
+        """
+        rows = len(self._records)
+        lines = [
+            "CodonAlignment with %i rows and %i columns (%i codons)"
+            % (rows, self.get_alignment_length(), self.get_aln_length(),)
+        ]
+
+        if rows <= 60:
+            lines.extend([self._str_line(rec, length=60) for rec in self._records])
+        else:
+            lines.extend([self._str_line(rec, length=60) for rec in self._records[:18]])
+            lines.append("...")
+            lines.append(self._str_line(self._records[-1], length=60))
+        return "\n".join(lines)
+
+    def __getitem__(self, index):
+        """Return a CodonAlignment object for single indexing."""
+        if isinstance(index, int):
+            return self._records[index]
+        elif isinstance(index, slice):
+            return CodonAlignment(self._records[index])
+        elif len(index) != 2:
+            raise TypeError("Invalid index type.")
+        # Handle double indexing
+        row_index, col_index = index
+        if isinstance(row_index, int):
+            return self._records[row_index][col_index]
+        elif isinstance(col_index, int):
+            return "".join(str(rec[col_index]) for rec in self._records[row_index])
+        else:
+            return MultipleSeqAlignment(
+                rec[col_index] for rec in self._records[row_index]
+            )
+
+    def __add__(self, other):
+        """Combine two codonalignments with the same number of rows by adding them.
+
+        The method also allows to combine a CodonAlignment object with a
+        MultipleSeqAlignment object. The following rules apply:
+
+            * CodonAlignment + CodonAlignment -> CodonAlignment
+            * CodonAlignment + MultipleSeqAlignment -> MultipleSeqAlignment
+        """
+        if isinstance(other, CodonAlignment):
+            if len(self) != len(other):
+                raise ValueError(
+                    "When adding two alignments they must have the same length"
+                    " (i.e. same number or rows)"
+                )
+            warnings.warn(
+                "Please make sure the two CodonAlignment objects are sharing the same codon table. This is not checked by Biopython.",
+                BiopythonWarning,
+            )
+            merged = (
+                SeqRecord(seq=CodonSeq(left.seq + right.seq))
+                for left, right in zip(self, other)
+            )
+            return CodonAlignment(merged)
+        elif isinstance(other, MultipleSeqAlignment):
+            if len(self) != len(other):
+                raise ValueError(
+                    "When adding two alignments they must have the same length"
+                    " (i.e. same number or rows)"
+                )
+            return self.toMultipleSeqAlignment() + other
+        else:
+            raise TypeError(
+                "Only CodonAlignment or MultipleSeqAlignment object can be"
+                f" added with a CodonAlignment object. {object(other)} detected."
+            )
+
+    def get_aln_length(self):
+        """Get alignment length."""
+        return self.get_alignment_length() // 3
+
+    def toMultipleSeqAlignment(self):
+        """Convert the CodonAlignment to a MultipleSeqAlignment.
+
+        Return a MultipleSeqAlignment containing all the
+        SeqRecord in the CodonAlignment using Seq to store
+        sequences
+        """
+        alignments = [SeqRecord(rec.seq.toSeq(), id=rec.id) for rec in self._records]
+        return MultipleSeqAlignment(alignments)
+
+    def get_dn_ds_matrix(self, method="NG86", codon_table=None):
+        """Available methods include NG86, LWL85, YN00 and ML.
+
+        Argument:
+         - method       - Available methods include NG86, LWL85, YN00 and ML.
+         - codon_table  - Codon table to use for forward translation.
+
+        """
+        from Bio.Phylo.TreeConstruction import DistanceMatrix as DM
+
+        if codon_table is None:
+            codon_table = CodonTable.generic_by_id[1]
+        names = [i.id for i in self._records]
+        size = len(self._records)
+        dn_matrix = []
+        ds_matrix = []
+        for i in range(size):
+            dn_matrix.append([])
+            ds_matrix.append([])
+            for j in range(i + 1):
+                if i != j:
+                    dn, ds = cal_dn_ds(
+                        self._records[i],
+                        self._records[j],
+                        method=method,
+                        codon_table=codon_table,
+                    )
+                    dn_matrix[i].append(dn)
+                    ds_matrix[i].append(ds)
+                else:
+                    dn_matrix[i].append(0.0)
+                    ds_matrix[i].append(0.0)
+        dn_dm = DM(names, matrix=dn_matrix)
+        ds_dm = DM(names, matrix=ds_matrix)
+        return dn_dm, ds_dm
+
+    def get_dn_ds_tree(
+        self, dn_ds_method="NG86", tree_method="UPGMA", codon_table=None
+    ):
+        """Construct dn tree and ds tree.
+
+        Argument:
+         - dn_ds_method - Available methods include NG86, LWL85, YN00 and ML.
+         - tree_method  - Available methods include UPGMA and NJ.
+
+        """
+        from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
+
+        if codon_table is None:
+            codon_table = CodonTable.generic_by_id[1]
+        dn_dm, ds_dm = self.get_dn_ds_matrix(
+            method=dn_ds_method, codon_table=codon_table
+        )
+        dn_constructor = DistanceTreeConstructor()
+        ds_constructor = DistanceTreeConstructor()
+        if tree_method == "UPGMA":
+            dn_tree = dn_constructor.upgma(dn_dm)
+            ds_tree = ds_constructor.upgma(ds_dm)
+        elif tree_method == "NJ":
+            dn_tree = dn_constructor.nj(dn_dm)
+            ds_tree = ds_constructor.nj(ds_dm)
+        else:
+            raise RuntimeError(
+                f"Unknown tree method ({tree_method})."
+                " Only NJ and UPGMA are accepted."
+            )
+        return dn_tree, ds_tree
+
+    @classmethod
+    def from_msa(cls, align):
+        """Convert a MultipleSeqAlignment to CodonAlignment.
+
+        Function to convert a MultipleSeqAlignment to CodonAlignment.
+        It is the user's responsibility to ensure all the requirement
+        needed by CodonAlignment is met.
+        """
+        rec = [SeqRecord(CodonSeq(str(i.seq)), id=i.id) for i in align._records]
+        return cls(rec)
+
+
+def mktest(codon_alns, codon_table=None, alpha=0.05):
+    """McDonald-Kreitman test for neutrality.
+
+    Implement the McDonald-Kreitman test for neutrality (PMID: 1904993)
+    This method counts changes rather than sites
+    (http://mkt.uab.es/mkt/help_mkt.asp).
+
+    Arguments:
+     - codon_alns  - list of CodonAlignment to compare (each
+       CodonAlignment object corresponds to gene sampled from a species)
+
+    Return the p-value of test result.
+    """
+    import copy
+
+    if codon_table is None:
+        codon_table = CodonTable.generic_by_id[1]
+    if not all(isinstance(i, CodonAlignment) for i in codon_alns):
+        raise TypeError("mktest accepts CodonAlignment list.")
+    codon_aln_len = [i.get_alignment_length() for i in codon_alns]
+    if len(set(codon_aln_len)) != 1:
+        raise RuntimeError(
+            "CodonAlignment object for mktest should be of equal length."
+        )
+    codon_num = codon_aln_len[0] // 3
+    # prepare codon_dict (taking stop codon as an extra amino acid)
+    codon_dict = copy.deepcopy(codon_table.forward_table)
+    for stop in codon_table.stop_codons:
+        codon_dict[stop] = "stop"
+    # prepare codon_lst
+    codon_lst = []
+    for codon_aln in codon_alns:
+        codon_lst.append([])
+        for i in codon_aln:
+            codon_lst[-1].append(_get_codon_list(i.seq))
+    codon_set = []
+    for i in range(codon_num):
+        uniq_codons = []
+        for j in codon_lst:
+            uniq_codon = {k[i] for k in j}
+            uniq_codons.append(uniq_codon)
+        codon_set.append(uniq_codons)
+    syn_fix, nonsyn_fix, syn_poly, nonsyn_poly = 0, 0, 0, 0
+    G, nonsyn_G = _get_codon2codon_matrix(codon_table=codon_table)
+    for i in codon_set:
+        all_codon = i[0].union(*i[1:])
+        if "-" in all_codon or len(all_codon) == 1:
+            continue
+        fix_or_not = all(len(k) == 1 for k in i)
+        if fix_or_not:
+            # fixed
+            nonsyn_subgraph = _get_subgraph(all_codon, nonsyn_G)
+            subgraph = _get_subgraph(all_codon, G)
+            this_non = _count_replacement(all_codon, nonsyn_subgraph)
+            this_syn = _count_replacement(all_codon, subgraph) - this_non
+            nonsyn_fix += this_non
+            syn_fix += this_syn
+        else:
+            # not fixed
+            nonsyn_subgraph = _get_subgraph(all_codon, nonsyn_G)
+            subgraph = _get_subgraph(all_codon, G)
+            this_non = _count_replacement(all_codon, nonsyn_subgraph)
+            this_syn = _count_replacement(all_codon, subgraph) - this_non
+            nonsyn_poly += this_non
+            syn_poly += this_syn
+    return _G_test([syn_fix, nonsyn_fix, syn_poly, nonsyn_poly])
+
+
+def _get_codon2codon_matrix(codon_table):
+    """Get codon codon substitution matrix (PRIVATE).
+
+    Elements in the matrix are number of synonymous and nonsynonymous
+    substitutions required for the substitution.
+    """
+    base_tuple = ("A", "T", "C", "G")
+    codons = [
+        i
+        for i in list(codon_table.forward_table.keys()) + codon_table.stop_codons
+        if "U" not in i
+    ]
+    # set up codon_dict considering stop codons
+    codon_dict = codon_table.forward_table
+    for stop in codon_table.stop_codons:
+        codon_dict[stop] = "stop"
+    # count site
+    num = len(codons)
+    G = {}  # graph for substitution
+    nonsyn_G = {}  # graph for nonsynonymous substitution
+    graph = {}
+    graph_nonsyn = {}
+    for i, codon in enumerate(codons):
+        graph[codon] = {}
+        graph_nonsyn[codon] = {}
+        for p, b in enumerate(codon):
+            for j in base_tuple:
+                tmp_codon = codon[0:p] + j + codon[p + 1 :]
+                if codon_dict[codon] != codon_dict[tmp_codon]:
+                    graph_nonsyn[codon][tmp_codon] = 1
+                    graph[codon][tmp_codon] = 1
+                else:
+                    if codon != tmp_codon:
+                        graph_nonsyn[codon][tmp_codon] = 0.1
+                        graph[codon][tmp_codon] = 1
+    for codon1 in codons:
+        nonsyn_G[codon1] = {}
+        G[codon1] = {}
+        for codon2 in codons:
+            if codon1 == codon2:
+                nonsyn_G[codon1][codon2] = 0
+                G[codon1][codon2] = 0
+            else:
+                nonsyn_G[codon1][codon2] = _dijkstra(graph_nonsyn, codon1, codon2)
+                G[codon1][codon2] = _dijkstra(graph, codon1, codon2)
+    return G, nonsyn_G
+
+
+def _dijkstra(graph, start, end):
+    """Dijkstra's algorithm Python implementation (PRIVATE).
+
+    Algorithm adapted from
+    http://thomas.pelletier.im/2010/02/dijkstras-algorithm-python-implementation/.
+    However, an obvious bug in::
+
+        if D[child_node] >(<) D[node] + child_value:
+
+    is fixed.
+    This function will return the distance between start and end.
+
+    Arguments:
+     - graph: Dictionary of dictionary (keys are vertices).
+     - start: Start vertex.
+     - end: End vertex.
+
+    Output:
+       List of vertices from the beginning to the end.
+
+    """
+    D = {}  # Final distances dict
+    P = {}  # Predecessor dict
+    # Fill the dicts with default values
+    for node in graph.keys():
+        D[node] = 100  # Vertices are unreachable
+        P[node] = ""  # Vertices have no predecessors
+    D[start] = 0  # The start vertex needs no move
+    unseen_nodes = list(graph.keys())  # All nodes are unseen
+    while len(unseen_nodes) > 0:
+        # Select the node with the lowest value in D (final distance)
+        shortest = None
+        node = ""
+        for temp_node in unseen_nodes:
+            if shortest is None:
+                shortest = D[temp_node]
+                node = temp_node
+            elif D[temp_node] < shortest:
+                shortest = D[temp_node]
+                node = temp_node
+        # Remove the selected node from unseen_nodes
+        unseen_nodes.remove(node)
+        # For each child (ie: connected vertex) of the current node
+        for child_node, child_value in graph[node].items():
+            if D[child_node] > D[node] + child_value:
+                D[child_node] = D[node] + child_value
+                # To go to child_node, you have to go through node
+                P[child_node] = node
+        if node == end:
+            break
+    # Set a clean path
+    path = []
+    # We begin from the end
+    node = end
+    distance = 0
+    # While we are not arrived at the beginning
+    while not (node == start):
+        if path.count(node) == 0:
+            path.insert(0, node)  # Insert the predecessor of the current node
+            node = P[node]  # The current node becomes its predecessor
+        else:
+            break
+    path.insert(0, start)  # Finally, insert the start vertex
+    for i in range(len(path) - 1):
+        distance += graph[path[i]][path[i + 1]]
+    return distance
+
+
+def _count_replacement(codon_set, G):
+    """Count replacement needed for a given codon_set (PRIVATE)."""
+    from math import floor
+
+    if len(codon_set) == 1:
+        return 0, 0
+    elif len(codon_set) == 2:
+        codons = list(codon_set)
+        return floor(G[codons[0]][codons[1]])
+    else:
+        codons = list(codon_set)
+        return _prim(G)
+
+
+def _prim(G):
+    """Prim's algorithm to find minimum spanning tree (PRIVATE).
+
+    Code is adapted from
+    http://programmingpraxis.com/2010/04/09/minimum-spanning-tree-prims-algorithm/
+    """
+    from math import floor
+    from collections import defaultdict
+    from heapq import heapify, heappop, heappush
+
+    nodes = []
+    edges = []
+    for i in G.keys():
+        nodes.append(i)
+        for j in G[i]:
+            if (i, j, G[i][j]) not in edges and (j, i, G[i][j]) not in edges:
+                edges.append((i, j, G[i][j]))
+    conn = defaultdict(list)
+    for n1, n2, c in edges:
+        conn[n1].append((c, n1, n2))
+        conn[n2].append((c, n2, n1))
+    mst = []  # minimum spanning tree
+    used = set(nodes[0])
+    usable_edges = conn[nodes[0]][:]
+    heapify(usable_edges)
+    while usable_edges:
+        cost, n1, n2 = heappop(usable_edges)
+        if n2 not in used:
+            used.add(n2)
+            mst.append((n1, n2, cost))
+            for e in conn[n2]:
+                if e[2] not in used:
+                    heappush(usable_edges, e)
+    length = 0
+    for p in mst:
+        length += floor(p[2])
+    return length
+
+
+def _get_subgraph(codons, G):
+    """Get the subgraph that contains all codons in list (PRIVATE)."""
+    subgraph = {}
+    for i in codons:
+        subgraph[i] = {}
+        for j in codons:
+            if i != j:
+                subgraph[i][j] = G[i][j]
+    return subgraph
+
+
+def _G_test(site_counts):
+    """G test for 2x2 contingency table (PRIVATE).
+
+    Arguments:
+     - site_counts - [syn_fix, nonsyn_fix, syn_poly, nonsyn_poly]
+
+    >>> print("%0.6f" % _G_test([17, 7, 42, 2]))
+    0.004924
+    """
+    # TODO:
+    #   Apply continuity correction for Chi-square test.
+    from math import log
+
+    # from scipy.stats import chi2
+    G = 0
+    tot = sum(site_counts)
+    tot_syn = site_counts[0] + site_counts[2]
+    tot_non = site_counts[1] + site_counts[3]
+    tot_fix = sum(site_counts[:2])
+    tot_poly = sum(site_counts[2:])
+    exp = [
+        tot_fix * tot_syn / tot,
+        tot_fix * tot_non / tot,
+        tot_poly * tot_syn / tot,
+        tot_poly * tot_non / tot,
+    ]
+    for obs, ex in zip(site_counts, exp):
+        G += obs * log(obs / ex)
+    G *= 2
+    # return 1-chi2.cdf(G, 1) # only 1 dof for 2x2 table
+    return chisqprob(G, 1)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/codonalign/codonseq.py b/code/lib/Bio/codonalign/codonseq.py
new file mode 100644
index 0000000..725355a
--- /dev/null
+++ b/code/lib/Bio/codonalign/codonseq.py
@@ -0,0 +1,1319 @@
+# Copyright 2013 by Zheng Ruan (zruan1991@gmail.com).
+# All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""Code for dealing with coding sequence.
+
+CodonSeq class is inherited from Seq class. This is the core class to
+deal with sequences in CodonAlignment in biopython.
+
+"""
+from itertools import permutations
+from math import log
+
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from Bio.Data import CodonTable
+
+
+class CodonSeq(Seq):
+    """CodonSeq is designed to be within the SeqRecords of a CodonAlignment class.
+
+    CodonSeq is useful as it allows the user to specify
+    reading frame when translate CodonSeq
+
+    CodonSeq also accepts codon style slice by calling
+    get_codon() method.
+
+    **Important:** Ungapped CodonSeq can be any length if you
+    specify the rf_table. Gapped CodonSeq should be a
+    multiple of three.
+
+    >>> codonseq = CodonSeq("AAATTTGGGCCAAATTT", rf_table=(0,3,6,8,11,14))
+    >>> print(codonseq.translate())
+    KFGAKF
+
+    test get_full_rf_table method
+
+    >>> p = CodonSeq('AAATTTCCCGG-TGGGTTTAA', rf_table=(0, 3, 6, 9, 11, 14, 17))
+    >>> full_rf_table = p.get_full_rf_table()
+    >>> print(full_rf_table)
+    [0, 3, 6, 9, 12, 15, 18]
+    >>> print(p.translate(rf_table=full_rf_table, ungap_seq=False))
+    KFPPWV*
+    >>> p = CodonSeq('AAATTTCCCGGGAA-TTTTAA', rf_table=(0, 3, 6, 9, 14, 17))
+    >>> print(p.get_full_rf_table())
+    [0, 3, 6, 9, 12.0, 15, 18]
+    >>> p = CodonSeq('AAA------------TAA', rf_table=(0, 3))
+    >>> print(p.get_full_rf_table())
+    [0, 3.0, 6.0, 9.0, 12.0, 15]
+
+    """
+
+    def __init__(self, data="", gap_char="-", rf_table=None):
+        """Initialize the class."""
+        # rf_table should be a tuple or list indicating the every
+        # codon position along the sequence. For example:
+        # sequence = 'AAATTTGGGCCAAATTT'
+        # rf_table = (0, 3, 6, 8, 11, 14)
+        # the translated protein sequences will be
+        # AAA TTT GGG GCC AAA TTT
+        #  K   F   G   A   K   F
+        # Notice: rf_table applies to ungapped sequence. If there
+        #   are gaps in the sequence, they will be discarded. This
+        #   feature ensures the rf_table is independent of where the
+        #   codon sequence appears in the alignment
+
+        Seq.__init__(self, data.upper())
+        self.gap_char = gap_char
+
+        # check the length of the alignment to be a triple
+        if rf_table is None:
+            length = len(self)
+            if length % 3 != 0:
+                raise ValueError(
+                    "Sequence length is not a multiple of "
+                    "three (i.e. a whole number of codons)"
+                )
+            self.rf_table = list(range(0, length - self.count(gap_char), 3))
+        else:
+            # if gap_char in self:
+            #    assert  len(self) % 3 == 0, \
+            #            "Gapped sequence length is not a triple number"
+            if not isinstance(rf_table, (tuple, list)):
+                raise TypeError("rf_table should be a tuple or list object")
+            if not all(isinstance(i, int) for i in rf_table):
+                raise TypeError(
+                    "Elements in rf_table should be int "
+                    "that specify the codon positions of "
+                    "the sequence"
+                )
+            self.rf_table = rf_table
+
+    def get_codon(self, index):
+        """Get the index codon from the sequence."""
+        if len({i % 3 for i in self.rf_table}) != 1:
+            raise RuntimeError(
+                "frameshift detected. CodonSeq object is not able to deal with "
+                "codon sequence with frameshift. Please use normal slice option."
+            )
+        if isinstance(index, int):
+            if index != -1:
+                return str(self[index * 3 : (index + 1) * 3])
+            else:
+                return str(self[index * 3 :])
+        else:
+            # This slice ensures that codon will always be the unit
+            # in slicing (it won't change to other codon if you are
+            # using reverse slicing such as [::-1]).
+            # The idea of the code below is to first map the slice
+            # to amino acid sequence and then transform it into
+            # codon sequence.
+            aa_index = range(len(self) // 3)
+
+            def cslice(p):
+                aa_slice = aa_index[p]
+                codon_slice = ""
+                for i in aa_slice:
+                    codon_slice += self[i * 3 : i * 3 + 3]
+                return str(codon_slice)
+
+            codon_slice = cslice(index)
+            return CodonSeq(codon_slice)
+
+    def get_codon_num(self):
+        """Return the number of codons in the CodonSeq."""
+        return len(self.rf_table)
+
+    def translate(
+        self, codon_table=None, stop_symbol="*", rf_table=None, ungap_seq=True
+    ):
+        """Translate the CodonSeq based on the reading frame in rf_table.
+
+        It is possible for the user to specify
+        a rf_table at this point. If you want to include
+        gaps in the translated sequence, this is the only
+        way. ungap_seq should be set to true for this
+        purpose.
+        """
+        if codon_table is None:
+            codon_table = CodonTable.generic_by_id[1]
+        amino_acids = []
+        if ungap_seq:
+            tr_seq = str(self).replace(self.gap_char, "")
+        else:
+            tr_seq = str(self)
+        if rf_table is None:
+            rf_table = self.rf_table
+        p = -1  # initiation
+        for i in rf_table:
+            if isinstance(i, float):
+                amino_acids.append("-")
+                continue
+            # elif '---' == tr_seq[i:i+3]:
+            #    amino_acids.append('-')
+            #    continue
+            elif "-" in tr_seq[i : i + 3]:
+                # considering two types of frameshift
+                if p == -1 or p - i == 3:
+                    p = i
+                    codon = tr_seq[i : i + 6].replace("-", "")[:3]
+                elif p - i > 3:
+                    codon = tr_seq[i : i + 3]
+                    p = i
+            else:
+                # normal condition without gaps
+                codon = tr_seq[i : i + 3]
+                p = i
+            if codon in codon_table.stop_codons:
+                amino_acids.append(stop_symbol)
+                continue
+            try:
+                amino_acids.append(codon_table.forward_table[codon])
+            except KeyError:
+                raise RuntimeError(
+                    f"Unknown codon detected ({codon}). Did you"
+                    " forget to specify the ungap_seq argument?"
+                )
+        return "".join(amino_acids)
+
+    def toSeq(self):
+        """Convert DNA to seq object."""
+        return Seq(str(self))
+
+    def get_full_rf_table(self):
+        """Return full rf_table of the CodonSeq records.
+
+        A full rf_table is different from a normal rf_table in that
+        it translate gaps in CodonSeq. It is helpful to construct
+        alignment containing frameshift.
+        """
+        ungap_seq = str(self).replace("-", "")
+        relative_pos = [self.rf_table[0]]
+        for i in range(1, len(self.rf_table[1:]) + 1):
+            relative_pos.append(self.rf_table[i] - self.rf_table[i - 1])
+        full_rf_table = []
+        codon_num = 0
+        for i in range(0, len(self), 3):
+            if self[i : i + 3] == self.gap_char * 3:
+                full_rf_table.append(i + 0.0)
+            elif relative_pos[codon_num] == 0:
+                full_rf_table.append(i)
+                codon_num += 1
+            elif relative_pos[codon_num] in (-1, -2):
+                # check the gap status of previous codon
+                gap_stat = 3 - self.count("-", i - 3, i)
+                if gap_stat == 3:
+                    full_rf_table.append(i + relative_pos[codon_num])
+                elif gap_stat == 2:
+                    full_rf_table.append(i + 1 + relative_pos[codon_num])
+                elif gap_stat == 1:
+                    full_rf_table.append(i + 2 + relative_pos[codon_num])
+                codon_num += 1
+            elif relative_pos[codon_num] > 0:
+                full_rf_table.append(i + 0.0)
+            try:
+                this_len = 3 - self.count("-", i, i + 3)
+                relative_pos[codon_num] -= this_len
+            except Exception:  # TODO: IndexError?
+                # we probably reached the last codon
+                pass
+        return full_rf_table
+
+    def full_translate(self, codon_table=None, stop_symbol="*"):
+        """Apply full translation with gaps considered."""
+        if codon_table is None:
+            codon_table = CodonTable.generic_by_id[1]
+        full_rf_table = self.get_full_rf_table()
+        return self.translate(
+            codon_table=codon_table,
+            stop_symbol=stop_symbol,
+            rf_table=full_rf_table,
+            ungap_seq=False,
+        )
+
+    def ungap(self, gap="-"):
+        """Return a copy of the sequence without the gap character(s)."""
+        if len(gap) != 1 or not isinstance(gap, str):
+            raise ValueError("Unexpected gap character, %s" % repr(gap))
+        return CodonSeq(str(self).replace(gap, ""), rf_table=self.rf_table)
+
+    @classmethod
+    def from_seq(cls, seq, rf_table=None):
+        """Get codon sequence from sequence data."""
+        if rf_table is None:
+            return cls(str(seq))
+        else:
+            return cls(str(seq), rf_table=rf_table)
+
+
+def _get_codon_list(codonseq):
+    """List of codons according to full_rf_table for counting (PRIVATE)."""
+    # if not isinstance(codonseq, CodonSeq):
+    #    raise TypeError("_get_codon_list accept a CodonSeq object "
+    #                    "({0} detected)".format(type(codonseq)))
+    full_rf_table = codonseq.get_full_rf_table()
+    codon_lst = []
+    for i, k in enumerate(full_rf_table):
+        if isinstance(k, int):
+            start = k
+            try:
+                end = int(full_rf_table[i + 1])
+            except IndexError:
+                end = start + 3
+            this_codon = str(codonseq[start:end])
+            if len(this_codon) == 3:
+                codon_lst.append(this_codon)
+            else:
+                codon_lst.append(str(this_codon.ungap()))
+        elif str(codonseq[int(k) : int(k) + 3]) == "---":
+            codon_lst.append("---")
+        else:
+            # this may be problematic, as normally no codon should
+            # fall into this condition
+            codon_lst.append(codonseq[int(k) : int(k) + 3])
+    return codon_lst
+
+
+def cal_dn_ds(
+    codon_seq1, codon_seq2, method="NG86", codon_table=None, k=1, cfreq=None,
+):
+    """Calculate dN and dS of the given two sequences.
+
+    Available methods:
+        - NG86  - `Nei and Gojobori (1986)`_ (PMID 3444411).
+        - LWL85 - `Li et al. (1985)`_ (PMID 3916709).
+        - ML    - `Goldman and Yang (1994)`_ (PMID 7968486).
+        - YN00  - `Yang and Nielsen (2000)`_ (PMID 10666704).
+
+    .. _`Nei and Gojobori (1986)`: http://www.ncbi.nlm.nih.gov/pubmed/3444411
+    .. _`Li et al. (1985)`: http://www.ncbi.nlm.nih.gov/pubmed/3916709
+    .. _`Goldman and Yang (1994)`: http://mbe.oxfordjournals.org/content/11/5/725
+    .. _`Yang and Nielsen (2000)`: https://doi.org/10.1093/oxfordjournals.molbev.a026236
+
+    Arguments:
+     - codon_seq1 - CodonSeq or or SeqRecord that contains a CodonSeq
+     - codon_seq2 - CodonSeq or or SeqRecord that contains a CodonSeq
+     - w  - transition/transversion ratio
+     - cfreq - Current codon frequency vector can only be specified
+       when you are using ML method. Possible ways of
+       getting cfreq are: F1x4, F3x4 and F61.
+
+    """
+    if isinstance(codon_seq1, CodonSeq) and isinstance(codon_seq2, CodonSeq):
+        pass
+    elif isinstance(codon_seq1, SeqRecord) and isinstance(codon_seq2, SeqRecord):
+        codon_seq1 = codon_seq1.seq
+        codon_seq2 = codon_seq2.seq
+    else:
+        raise TypeError(
+            "cal_dn_ds accepts two CodonSeq objects or SeqRecord "
+            "that contains CodonSeq as its seq!"
+        )
+    if len(codon_seq1.get_full_rf_table()) != len(codon_seq2.get_full_rf_table()):
+        raise RuntimeError(
+            f"full_rf_table length of seq1 ({len(codon_seq1.get_full_rf_table())})"
+            f" and seq2 ({len(codon_seq2.get_full_rf_table())}) are not the same"
+        )
+    if cfreq is None:
+        cfreq = "F3x4"
+    elif cfreq is not None and method != "ML":
+        raise RuntimeError("cfreq can only be specified when you are using ML method")
+    if cfreq not in ("F1x4", "F3x4", "F61"):
+        import warnings
+
+        warnings.warn(
+            f"Unknown cfreq ({cfreq}). "
+            "Only F1x4, F3x4 and F61 are acceptable. Used F3x4 in the following."
+        )
+        cfreq = "F3x4"
+    if codon_table is None:
+        codon_table = CodonTable.generic_by_id[1]
+    seq1_codon_lst = _get_codon_list(codon_seq1)
+    seq2_codon_lst = _get_codon_list(codon_seq2)
+    # remove gaps in seq_codon_lst
+    seq1 = []
+    seq2 = []
+    for i, j in zip(seq1_codon_lst, seq2_codon_lst):
+        if ("-" not in i) and ("-" not in j):
+            seq1.append(i)
+            seq2.append(j)
+    dnds_func = {"ML": _ml, "NG86": _ng86, "LWL85": _lwl85, "YN00": _yn00}
+    if method == "ML":
+        return dnds_func[method](seq1, seq2, cfreq, codon_table)
+    else:
+        return dnds_func[method](seq1, seq2, k, codon_table)
+
+
+#################################################################
+#              private functions for NG86 method
+#################################################################
+
+
+def _ng86(seq1, seq2, k, codon_table):
+    """NG86 method main function (PRIVATE)."""
+    S_sites1, N_sites1 = _count_site_NG86(seq1, codon_table=codon_table, k=k)
+    S_sites2, N_sites2 = _count_site_NG86(seq2, codon_table=codon_table, k=k)
+    S_sites = (S_sites1 + S_sites2) / 2.0
+    N_sites = (N_sites1 + N_sites2) / 2.0
+    SN = [0, 0]
+    for i, j in zip(seq1, seq2):
+        SN = [
+            m + n for m, n in zip(SN, _count_diff_NG86(i, j, codon_table=codon_table))
+        ]
+
+    ps = SN[0] / S_sites
+    pn = SN[1] / N_sites
+    if ps < 3 / 4:
+        dS = abs(-3.0 / 4 * log(1 - 4.0 / 3 * ps))
+    else:
+        dS = -1
+    if pn < 3 / 4:
+        dN = abs(-3.0 / 4 * log(1 - 4.0 / 3 * pn))
+    else:
+        dN = -1
+    return dN, dS
+
+
+def _count_site_NG86(codon_lst, codon_table, k=1):
+    """Count synonymous and non-synonymous sites of a list of codons (PRIVATE).
+
+    Arguments:
+     - codon_lst - A three letter codon list from a CodonSeq object.
+       This can be returned from _get_codon_list method.
+     - k - transition/transversion rate ratio.
+
+    """
+    S_site = 0  # synonymous sites
+    N_site = 0  # non-synonymous sites
+    purine = ("A", "G")
+    pyrimidine = ("T", "C")
+    base_tuple = ("A", "T", "C", "G")
+    for codon in codon_lst:
+        neighbor_codon = {"transition": [], "transversion": []}
+        # classify neighbor codons
+        codon = codon.replace("U", "T")
+        if codon == "---":
+            continue
+        for n, i in enumerate(codon):
+            for j in base_tuple:
+                if i == j:
+                    pass
+                elif i in purine and j in purine:
+                    codon_chars = list(codon)
+                    codon_chars[n] = j
+                    this_codon = "".join(codon_chars)
+                    neighbor_codon["transition"].append(this_codon)
+                elif i in pyrimidine and j in pyrimidine:
+                    codon_chars = list(codon)
+                    codon_chars[n] = j
+                    this_codon = "".join(codon_chars)
+                    neighbor_codon["transition"].append(this_codon)
+                else:
+                    codon_chars = list(codon)
+                    codon_chars[n] = j
+                    this_codon = "".join(codon_chars)
+                    neighbor_codon["transversion"].append(this_codon)
+        # count synonymous and non-synonymous sites
+        aa = codon_table.forward_table[codon]
+        this_codon_N_site = this_codon_S_site = 0
+        for neighbor in neighbor_codon["transition"]:
+            if neighbor in codon_table.stop_codons:
+                this_codon_N_site += 1
+            elif codon_table.forward_table[neighbor] == aa:
+                this_codon_S_site += 1
+            else:
+                this_codon_N_site += 1
+        for neighbor in neighbor_codon["transversion"]:
+            if neighbor in codon_table.stop_codons:
+                this_codon_N_site += k
+            elif codon_table.forward_table[neighbor] == aa:
+                this_codon_S_site += k
+            else:
+                this_codon_N_site += k
+        norm_const = (this_codon_N_site + this_codon_S_site) / 3
+        S_site += this_codon_S_site / norm_const
+        N_site += this_codon_N_site / norm_const
+    return (S_site, N_site)
+
+
+def _count_diff_NG86(codon1, codon2, codon_table):
+    """Count differences between two codons, three-letter string (PRIVATE).
+
+    The function will take multiple pathways from codon1 to codon2
+    into account.
+    """
+    if not isinstance(codon1, str) or not isinstance(codon2, str):
+        raise TypeError(
+            "_count_diff_NG86 accepts string object to represent codon"
+            f" ({type(codon1)}, {type(codon2)} detected)"
+        )
+    if len(codon1) != 3 or len(codon2) != 3:
+        raise RuntimeError(
+            "codon should be three letter string"
+            f" ({len(codon1)}, {len(codon2)} detected)"
+        )
+    SN = [0, 0]  # synonymous and nonsynonymous counts
+    if codon1 == "---" or codon2 == "---":
+        return SN
+    base_tuple = ("A", "C", "G", "T")
+    if not all(i in base_tuple for i in codon1):
+        raise RuntimeError(
+            f"Unrecognized character detected in codon1 {codon1}"
+            " (Codons consist of A, T, C or G)"
+        )
+    if not all(i in base_tuple for i in codon2):
+        raise RuntimeError(
+            f"Unrecognized character detected in codon2 {codon2}"
+            " (Codons consist of A, T, C or G)"
+        )
+    if codon1 == codon2:
+        return SN
+    else:
+        diff_pos = []
+        for i, k in enumerate(zip(codon1, codon2)):
+            if k[0] != k[1]:
+                diff_pos.append(i)
+
+        def compare_codon(codon1, codon2, codon_table, weight=1):
+            """Compare two codon accounting for different pathways."""
+            sd = nd = 0
+            if len(set(map(codon_table.forward_table.get, [codon1, codon2]))) == 1:
+                sd += weight
+            else:
+                nd += weight
+            return (sd, nd)
+
+        if len(diff_pos) == 1:
+            SN = [
+                i + j
+                for i, j in zip(
+                    SN, compare_codon(codon1, codon2, codon_table=codon_table)
+                )
+            ]
+        elif len(diff_pos) == 2:
+            for i in diff_pos:
+                temp_codon = codon1[:i] + codon2[i] + codon1[i + 1 :]
+                SN = [
+                    i + j
+                    for i, j in zip(
+                        SN,
+                        compare_codon(
+                            codon1, temp_codon, codon_table=codon_table, weight=0.5
+                        ),
+                    )
+                ]
+                SN = [
+                    i + j
+                    for i, j in zip(
+                        SN,
+                        compare_codon(
+                            temp_codon, codon2, codon_table=codon_table, weight=0.5
+                        ),
+                    )
+                ]
+        elif len(diff_pos) == 3:
+            paths = list(permutations([0, 1, 2], 3))
+            tmp_codon = []
+            for p in paths:
+                tmp1 = codon1[: p[0]] + codon2[p[0]] + codon1[p[0] + 1 :]
+                tmp2 = tmp1[: p[1]] + codon2[p[1]] + tmp1[p[1] + 1 :]
+                tmp_codon.append((tmp1, tmp2))
+                SN = [
+                    i + j
+                    for i, j in zip(
+                        SN, compare_codon(codon1, tmp1, codon_table, weight=0.5 / 3)
+                    )
+                ]
+                SN = [
+                    i + j
+                    for i, j in zip(
+                        SN, compare_codon(tmp1, tmp2, codon_table, weight=0.5 / 3)
+                    )
+                ]
+                SN = [
+                    i + j
+                    for i, j in zip(
+                        SN, compare_codon(tmp2, codon2, codon_table, weight=0.5 / 3)
+                    )
+                ]
+    return SN
+
+
+#################################################################
+#               private functions for LWL85 method
+#################################################################
+
+
+def _lwl85(seq1, seq2, k, codon_table):
+    """LWL85 method main function (PRIVATE).
+
+    Nomenclature is according to Li et al. (1985), PMID 3916709.
+    """
+    codon_fold_dict = _get_codon_fold(codon_table)
+    # count number of sites in different degenerate classes
+    fold0 = [0, 0]
+    fold2 = [0, 0]
+    fold4 = [0, 0]
+    for codon in seq1 + seq2:
+        fold_num = codon_fold_dict[codon]
+        for f in fold_num:
+            if f == "0":
+                fold0[0] += 1
+            elif f == "2":
+                fold2[0] += 1
+            elif f == "4":
+                fold4[0] += 1
+    L = [sum(fold0) / 2.0, sum(fold2) / 2.0, sum(fold4) / 2.0]
+    # count number of differences in different degenerate classes
+    PQ = [0] * 6  # with P0, P2, P4, Q0, Q2, Q4 in each position
+    for codon1, codon2 in zip(seq1, seq2):
+        if (codon1 == "---" or codon2 == "---") or codon1 == codon2:
+            continue
+        else:
+            PQ = [
+                i + j
+                for i, j in zip(
+                    PQ, _diff_codon(codon1, codon2, fold_dict=codon_fold_dict)
+                )
+            ]
+    PQ = [i / j for i, j in zip(PQ, L * 2)]
+    P = PQ[:3]
+    Q = PQ[3:]
+    A = [
+        (1.0 / 2) * log(1.0 / (1 - 2 * i - j)) - (1.0 / 4) * log(1.0 / (1 - 2 * j))
+        for i, j in zip(P, Q)
+    ]
+    B = [(1.0 / 2) * log(1.0 / (1 - 2 * i)) for i in Q]
+    dS = 3 * (L[2] * A[1] + L[2] * (A[2] + B[2])) / (L[1] + 3 * L[2])
+    dN = 3 * (L[2] * B[1] + L[0] * (A[0] + B[0])) / (2 * L[1] + 3 * L[0])
+    return dN, dS
+
+
+def _get_codon_fold(codon_table):
+    """Classify different position in a codon into different folds (PRIVATE)."""
+
+    def find_fold_class(codon, forward_table):
+        base = {"A", "T", "C", "G"}
+        fold = ""
+        codon_base_lst = list(codon)
+        for i, b in enumerate(codon_base_lst):
+            other_base = base - set(b)
+            aa = []
+            for j in other_base:
+                codon_base_lst[i] = j
+                try:
+                    aa.append(forward_table["".join(codon_base_lst)])
+                except KeyError:
+                    aa.append("stop")
+            if aa.count(forward_table[codon]) == 0:
+                fold += "0"
+            elif aa.count(forward_table[codon]) in (1, 2):
+                fold += "2"
+            elif aa.count(forward_table[codon]) == 3:
+                fold += "4"
+            else:
+                raise RuntimeError(
+                    "Unknown Error, cannot assign the position to a fold"
+                )
+            codon_base_lst[i] = b
+        return fold
+
+    fold_table = {}
+    for codon in codon_table.forward_table:
+        if "U" not in codon:
+            fold_table[codon] = find_fold_class(codon, codon_table.forward_table)
+    fold_table["---"] = "---"
+    return fold_table
+
+
+def _diff_codon(codon1, codon2, fold_dict):
+    """Count number of different substitution types between two codons (PRIVATE).
+
+    returns tuple (P0, P2, P4, Q0, Q2, Q4)
+
+    Nomenclature is according to Li et al. (1958), PMID 3916709.
+    """
+    P0 = P2 = P4 = Q0 = Q2 = Q4 = 0
+    fold_num = fold_dict[codon1]
+    purine = ("A", "G")
+    pyrimidine = ("T", "C")
+    for n, (i, j) in enumerate(zip(codon1, codon2)):
+        if i != j and (i in purine and j in purine):
+            if fold_num[n] == "0":
+                P0 += 1
+            elif fold_num[n] == "2":
+                P2 += 1
+            elif fold_num[n] == "4":
+                P4 += 1
+            else:
+                raise RuntimeError("Unexpected fold_num %d" % fold_num[n])
+        if i != j and (i in pyrimidine and j in pyrimidine):
+            if fold_num[n] == "0":
+                P0 += 1
+            elif fold_num[n] == "2":
+                P2 += 1
+            elif fold_num[n] == "4":
+                P4 += 1
+            else:
+                raise RuntimeError("Unexpected fold_num %d" % fold_num[n])
+        if i != j and (
+            (i in purine and j in pyrimidine) or (i in pyrimidine and j in purine)
+        ):
+            if fold_num[n] == "0":
+                Q0 += 1
+            elif fold_num[n] == "2":
+                Q2 += 1
+            elif fold_num[n] == "4":
+                Q4 += 1
+            else:
+                raise RuntimeError("Unexpected fold_num %d" % fold_num[n])
+    return (P0, P2, P4, Q0, Q2, Q4)
+
+
+#################################################################
+#               private functions for YN00 method
+#################################################################
+
+
+def _yn00(seq1, seq2, k, codon_table):
+    """YN00 method main function (PRIVATE).
+
+    Nomenclature is according to Yang and Nielsen (2000), PMID 10666704.
+    """
+    from collections import defaultdict
+    from scipy.linalg import expm
+
+    fcodon = [
+        {"A": 0, "G": 0, "C": 0, "T": 0},
+        {"A": 0, "G": 0, "C": 0, "T": 0},
+        {"A": 0, "G": 0, "C": 0, "T": 0},
+    ]
+    codon_fold_dict = _get_codon_fold(codon_table)
+    fold0_cnt = defaultdict(int)
+    fold4_cnt = defaultdict(int)
+    for codon in seq1 + seq2:
+        # count sites at different codon position
+        if codon != "---":
+            fcodon[0][codon[0]] += 1
+            fcodon[1][codon[1]] += 1
+            fcodon[2][codon[2]] += 1
+        # count sites in different degenerate fold class
+        fold_num = codon_fold_dict[codon]
+        for i, f in enumerate(fold_num):
+            if f == "0":
+                fold0_cnt[codon[i]] += 1
+            elif f == "4":
+                fold4_cnt[codon[i]] += 1
+    f0_total = sum(fold0_cnt.values())
+    f4_total = sum(fold4_cnt.values())
+    for i, j in zip(fold0_cnt, fold4_cnt):
+        fold0_cnt[i] = fold0_cnt[i] / f0_total
+        fold4_cnt[i] = fold4_cnt[i] / f4_total
+    # TODO:
+    # the initial kappa is different from what yn00 gives,
+    # try to find the problem.
+    TV = _get_TV(seq1, seq2, codon_table=codon_table)
+    k04 = (_get_kappa_t(fold0_cnt, TV), _get_kappa_t(fold4_cnt, TV))
+    kappa = (f0_total * k04[0] + f4_total * k04[1]) / (f0_total + f4_total)
+    # kappa = 2.4285
+    # count synonymous sites and non-synonymous sites
+    for i in range(3):
+        tot = sum(fcodon[i].values())
+        fcodon[i] = {j: k / tot for j, k in fcodon[i].items()}
+    pi = defaultdict(int)
+    for i in list(codon_table.forward_table.keys()) + codon_table.stop_codons:
+        if "U" not in i:
+            pi[i] = 0
+    for i in seq1 + seq2:
+        pi[i] += 1
+    S_sites1, N_sites1, bfreqSN1 = _count_site_YN00(
+        seq1, seq2, pi, k=kappa, codon_table=codon_table
+    )
+    S_sites2, N_sites2, bfreqSN2 = _count_site_YN00(
+        seq2, seq1, pi, k=kappa, codon_table=codon_table
+    )
+    N_sites = (N_sites1 + N_sites2) / 2
+    S_sites = (S_sites1 + S_sites2) / 2
+    bfreqSN = [{"A": 0, "T": 0, "C": 0, "G": 0}, {"A": 0, "T": 0, "C": 0, "G": 0}]
+    for i in range(2):
+        for b in ("A", "T", "C", "G"):
+            bfreqSN[i][b] = (bfreqSN1[i][b] + bfreqSN2[i][b]) / 2
+    # use NG86 method to get initial t and w
+    SN = [0, 0]
+    for i, j in zip(seq1, seq2):
+        SN = [
+            m + n for m, n in zip(SN, _count_diff_NG86(i, j, codon_table=codon_table))
+        ]
+    ps = SN[0] / S_sites
+    pn = SN[1] / N_sites
+    p = sum(SN) / (S_sites + N_sites)
+    w = log(1 - 4.0 / 3 * pn) / log(1 - 4.0 / 3 * ps)
+    t = -3 / 4 * log(1 - 4 / 3 * p)
+    tolerance = 1e-5
+    dSdN_pre = [0, 0]
+    for temp in range(20):
+        # count synonymous and nonsynonymous differences under kappa, w, t
+        codon_lst = [
+            i
+            for i in list(codon_table.forward_table.keys()) + codon_table.stop_codons
+            if "U" not in i
+        ]
+        Q = _get_Q(pi, kappa, w, codon_lst, codon_table)
+        P = expm(Q * t)
+        TV = [0, 0, 0, 0]  # synonymous/nonsynonymous transition/transversion
+        codon_npath = {}
+        for i, j in zip(seq1, seq2):
+            if i != "---" and j != "---":
+                codon_npath.setdefault((i, j), 0)
+                codon_npath[(i, j)] += 1
+        for i in codon_npath:
+            tv = _count_diff_YN00(i[0], i[1], P, codon_lst, codon_table)
+            TV = [m + n * codon_npath[i] for m, n in zip(TV, tv)]
+        TV = (TV[0] / S_sites, TV[1] / S_sites), (TV[2] / N_sites, TV[3] / N_sites)
+        # according to the DistanceF84() function of yn00.c in paml,
+        # the t (e.q. 10) appears in PMID: 10666704 is dS and dN
+        dSdN = []
+        for f, tv in zip(bfreqSN, TV):
+            dSdN.append(_get_kappa_t(f, tv, t=True))
+        t = dSdN[0] * 3 * S_sites / (S_sites + N_sites) + dSdN[1] * 3 * N_sites / (
+            S_sites + N_sites
+        )
+        w = dSdN[1] / dSdN[0]
+        if all(
+            map(lambda x: x < tolerance, (abs(i - j) for i, j in zip(dSdN, dSdN_pre)))
+        ):
+            return dSdN[1], dSdN[0]  # dN, dS
+        dSdN_pre = dSdN
+
+
+def _get_TV(codon_lst1, codon_lst2, codon_table):
+    """Get TV (PRIVATE).
+
+    Arguments:
+     - T - proportions of transitional differences
+     - V - proportions of transversional differences
+
+    """
+    purine = ("A", "G")
+    pyrimidine = ("C", "T")
+    TV = [0, 0]
+    sites = 0
+    for codon1, codon2 in zip(codon_lst1, codon_lst2):
+        if "---" not in (codon1, codon2):
+            for i, j in zip(codon1, codon2):
+                if i == j:
+                    pass
+                elif i in purine and j in purine:
+                    TV[0] += 1
+                elif i in pyrimidine and j in pyrimidine:
+                    TV[0] += 1
+                else:
+                    TV[1] += 1
+                sites += 1
+    return (TV[0] / sites, TV[1] / sites)
+    # return (TV[0], TV[1])
+
+
+def _get_kappa_t(pi, TV, t=False):
+    """Calculate kappa (PRIVATE).
+
+    The following formula and variable names are according to PMID: 10666704
+    """
+    pi["Y"] = pi["T"] + pi["C"]
+    pi["R"] = pi["A"] + pi["G"]
+    A = (
+        2 * (pi["T"] * pi["C"] + pi["A"] * pi["G"])
+        + 2
+        * (
+            pi["T"] * pi["C"] * pi["R"] / pi["Y"]
+            + pi["A"] * pi["G"] * pi["Y"] / pi["R"]
+        )
+        * (1 - TV[1] / (2 * pi["Y"] * pi["R"]))
+        - TV[0]
+    ) / (2 * (pi["T"] * pi["C"] / pi["Y"] + pi["A"] * pi["G"] / pi["R"]))
+    B = 1 - TV[1] / (2 * pi["Y"] * pi["R"])
+    a = -0.5 * log(A)  # this seems to be an error in YANG's original paper
+    b = -0.5 * log(B)
+    kappaF84 = a / b - 1
+    if t is False:
+        kappaHKY85 = 1 + (
+            pi["T"] * pi["C"] / pi["Y"] + pi["A"] * pi["G"] / pi["R"]
+        ) * kappaF84 / (pi["T"] * pi["C"] + pi["A"] * pi["G"])
+        return kappaHKY85
+    else:
+        t = (
+            4 * pi["T"] * pi["C"] * (1 + kappaF84 / pi["Y"])
+            + 4 * pi["A"] * pi["G"] * (1 + kappaF84 / pi["R"])
+            + 4 * pi["Y"] * pi["R"]
+        ) * b
+        return t
+
+
+def _count_site_YN00(codon_lst1, codon_lst2, pi, k, codon_table):
+    """Site counting method from Ina / Yang and Nielsen (PRIVATE).
+
+    Method from `Ina (1995)`_ as modified by `Yang and Nielsen (2000)`_.
+    This will return the total number of synonymous and nonsynonymous sites
+    and base frequencies in each category. The function is equivalent to
+    the ``CountSites()`` function in ``yn00.c`` of PAML.
+
+    .. _`Ina (1995)`: https://doi.org/10.1007/BF00167113
+    .. _`Yang and Nielsen (2000)`: https://doi.org/10.1093/oxfordjournals.molbev.a026236
+
+    """
+    if len(codon_lst1) != len(codon_lst2):
+        raise RuntimeError(
+            "Length of two codon_lst should be the same (%d and %d detected)"
+            % (len(codon_lst1), len(codon_lst2))
+        )
+    else:
+        length = len(codon_lst1)
+    purine = ("A", "G")
+    pyrimidine = ("T", "C")
+    base_tuple = ("A", "T", "C", "G")
+    codon_dict = codon_table.forward_table
+    stop = codon_table.stop_codons
+    codon_npath = {}
+    for i, j in zip(codon_lst1, codon_lst2):
+        if i != "---" and j != "---":
+            codon_npath.setdefault((i, j), 0)
+            codon_npath[(i, j)] += 1
+    S_sites = N_sites = 0
+    freqSN = [
+        {"A": 0, "T": 0, "C": 0, "G": 0},  # synonymous
+        {"A": 0, "T": 0, "C": 0, "G": 0},
+    ]  # nonsynonymous
+    for codon_pair, npath in codon_npath.items():
+        codon = codon_pair[0]
+        S = N = 0
+        for pos in range(3):
+            for base in base_tuple:
+                if codon[pos] == base:
+                    continue
+                neighbor_codon = codon[:pos] + base + codon[pos + 1 :]
+                if neighbor_codon in stop:
+                    continue
+                weight = pi[neighbor_codon]
+                if codon[pos] in pyrimidine and base in pyrimidine:
+                    weight *= k
+                elif codon[pos] in purine and base in purine:
+                    weight *= k
+                if codon_dict[codon] == codon_dict[neighbor_codon]:
+                    S += weight
+                    freqSN[0][base] += weight * npath
+                else:
+                    N += weight
+                    freqSN[1][base] += weight * npath
+        S_sites += S * npath
+        N_sites += N * npath
+    norm_const = 3 * length / (S_sites + N_sites)
+    S_sites *= norm_const
+    N_sites *= norm_const
+    for i in freqSN:
+        norm_const = sum(i.values())
+        for b in i:
+            i[b] /= norm_const
+    return S_sites, N_sites, freqSN
+
+
+def _count_diff_YN00(codon1, codon2, P, codon_lst, codon_table):
+    """Count differences between two codons (three-letter string; PRIVATE).
+
+    The function will weighted multiple pathways from codon1 to codon2
+    according to P matrix of codon substitution. The proportion
+    of transition and transversion (TV) will also be calculated in
+    the function.
+    """
+    if not isinstance(codon1, str) or not isinstance(codon2, str):
+        raise TypeError(
+            "_count_diff_YN00 accepts string object to represent codon"
+            f" ({type(codon1)}, {type(codon2)} detected)"
+        )
+    if len(codon1) != 3 or len(codon2) != 3:
+        raise RuntimeError(
+            "codon should be three letter string"
+            f" ({len(codon1)}, {len(codon2)} detected)"
+        )
+    TV = [
+        0,
+        0,
+        0,
+        0,
+    ]  # transition and transversion counts (synonymous and nonsynonymous)
+    if codon1 == "---" or codon2 == "---":
+        return TV
+    base_tuple = ("A", "C", "G", "T")
+    if not all(i in base_tuple for i in codon1):
+        raise RuntimeError(
+            f"Unrecognized character detected in codon1 {codon1}"
+            " (Codons consist of A, T, C or G)"
+        )
+    if not all(i in base_tuple for i in codon2):
+        raise RuntimeError(
+            f"Unrecognized character detected in codon2 {codon2}"
+            " (Codons consist of A, T, C or G)"
+        )
+    if codon1 == codon2:
+        return TV
+    else:
+        diff_pos = []
+        for i, k in enumerate(zip(codon1, codon2)):
+            if k[0] != k[1]:
+                diff_pos.append(i)
+
+        def count_TV(codon1, codon2, diff, codon_table, weight=1):
+            purine = ("A", "G")
+            pyrimidine = ("T", "C")
+            dic = codon_table.forward_table
+            stop = codon_table.stop_codons
+            if codon1 in stop or codon2 in stop:
+                # stop codon is always considered as nonsynonymous
+                if codon1[diff] in purine and codon2[diff] in purine:
+                    return [0, 0, weight, 0]
+                elif codon1[diff] in pyrimidine and codon2[diff] in pyrimidine:
+                    return [0, 0, weight, 0]
+                else:
+                    return [0, 0, 0, weight]
+            elif dic[codon1] == dic[codon2]:
+                if codon1[diff] in purine and codon2[diff] in purine:
+                    return [weight, 0, 0, 0]
+                elif codon1[diff] in pyrimidine and codon2[diff] in pyrimidine:
+                    return [weight, 0, 0, 0]
+                else:
+                    return [0, weight, 0, 0]
+            else:
+                if codon1[diff] in purine and codon2[diff] in purine:
+                    return [0, 0, weight, 0]
+                elif codon1[diff] in pyrimidine and codon2[diff] in pyrimidine:
+                    return [0, 0, weight, 0]
+                else:
+                    return [0, 0, 0, weight]
+
+        if len(diff_pos) == 1:
+            TV = [
+                p + q
+                for p, q in zip(TV, count_TV(codon1, codon2, diff_pos[0], codon_table))
+            ]
+        elif len(diff_pos) == 2:
+            tmp_codon = [codon1[:i] + codon2[i] + codon1[i + 1 :] for i in diff_pos]
+            path_prob = []
+            for i in tmp_codon:
+                codon_idx = list(map(codon_lst.index, [codon1, i, codon2]))
+                prob = (P[codon_idx[0], codon_idx[1]], P[codon_idx[1], codon_idx[2]])
+                path_prob.append(prob[0] * prob[1])
+            path_prob = [2 * i / sum(path_prob) for i in path_prob]
+            for n, i in enumerate(diff_pos):
+                temp_codon = codon1[:i] + codon2[i] + codon1[i + 1 :]
+                TV = [
+                    p + q
+                    for p, q in zip(
+                        TV,
+                        count_TV(
+                            codon1, temp_codon, i, codon_table, weight=path_prob[n] / 2
+                        ),
+                    )
+                ]
+                TV = [
+                    p + q
+                    for p, q in zip(
+                        TV,
+                        count_TV(
+                            codon1, temp_codon, i, codon_table, weight=path_prob[n] / 2
+                        ),
+                    )
+                ]
+        elif len(diff_pos) == 3:
+            paths = list(permutations([0, 1, 2], 3))
+            path_prob = []
+            tmp_codon = []
+            for p in paths:
+                tmp1 = codon1[: p[0]] + codon2[p[0]] + codon1[p[0] + 1 :]
+                tmp2 = tmp1[: p[1]] + codon2[p[1]] + tmp1[p[1] + 1 :]
+                tmp_codon.append((tmp1, tmp2))
+                codon_idx = list(map(codon_lst.index, [codon1, tmp1, tmp2, codon2]))
+                prob = (
+                    P[codon_idx[0], codon_idx[1]],
+                    P[codon_idx[1], codon_idx[2]],
+                    P[codon_idx[2], codon_idx[3]],
+                )
+                path_prob.append(prob[0] * prob[1] * prob[2])
+            path_prob = [3 * i / sum(path_prob) for i in path_prob]
+            for i, j, k in zip(tmp_codon, path_prob, paths):
+                TV = [
+                    p + q
+                    for p, q in zip(
+                        TV, count_TV(codon1, i[0], k[0], codon_table, weight=j / 3)
+                    )
+                ]
+                TV = [
+                    p + q
+                    for p, q in zip(
+                        TV, count_TV(i[0], i[1], k[1], codon_table, weight=j / 3)
+                    )
+                ]
+                TV = [
+                    p + q
+                    for p, q in zip(
+                        TV, count_TV(i[1], codon2, k[1], codon_table, weight=j / 3)
+                    )
+                ]
+    return TV
+
+
+#################################################################
+#        private functions for Maximum Likelihood method
+#################################################################
+
+
+def _ml(seq1, seq2, cmethod, codon_table):
+    """ML method main function (PRIVATE)."""
+    from collections import Counter
+    from scipy.optimize import minimize
+
+    codon_cnt = Counter()
+    pi = _get_pi(seq1, seq2, cmethod, codon_table=codon_table)
+    for i, j in zip(seq1, seq2):
+        # if i != j and ('---' not in (i, j)):
+        if "---" not in (i, j):
+            codon_cnt[(i, j)] += 1
+    codon_lst = [
+        i
+        for i in list(codon_table.forward_table.keys()) + codon_table.stop_codons
+        if "U" not in i
+    ]
+
+    # apply optimization
+    def func(
+        params, pi=pi, codon_cnt=codon_cnt, codon_lst=codon_lst, codon_table=codon_table
+    ):
+        """Temporary function, params = [t, k, w]."""
+        return -_likelihood_func(
+            params[0],
+            params[1],
+            params[2],
+            pi,
+            codon_cnt,
+            codon_lst=codon_lst,
+            codon_table=codon_table,
+        )
+
+    # count sites
+    opt_res = minimize(
+        func,
+        [1, 0.1, 2],
+        method="L-BFGS-B",
+        bounds=((1e-10, 20), (1e-10, 20), (1e-10, 10)),
+        tol=1e-5,
+    )
+    t, k, w = opt_res.x
+    Q = _get_Q(pi, k, w, codon_lst, codon_table)
+    Sd = Nd = 0
+    for i, c1 in enumerate(codon_lst):
+        for j, c2 in enumerate(codon_lst):
+            if i != j:
+                try:
+                    if codon_table.forward_table[c1] == codon_table.forward_table[c2]:
+                        # synonymous count
+                        Sd += pi[c1] * Q[i, j]
+                    else:
+                        # nonsynonymous count
+                        Nd += pi[c1] * Q[i, j]
+                except KeyError:
+                    # This is probably due to stop codons
+                    pass
+    Sd *= t
+    Nd *= t
+
+    # count differences (with w fixed to 1)
+    def func_w1(
+        params, pi=pi, codon_cnt=codon_cnt, codon_lst=codon_lst, codon_table=codon_table
+    ):
+        """Temporary function, params = [t, k]. w is fixed to 1."""
+        return -_likelihood_func(
+            params[0],
+            params[1],
+            1.0,
+            pi,
+            codon_cnt,
+            codon_lst=codon_lst,
+            codon_table=codon_table,
+        )
+
+    opt_res = minimize(
+        func_w1,
+        [1, 0.1],
+        method="L-BFGS-B",
+        bounds=((1e-10, 20), (1e-10, 20)),
+        tol=1e-5,
+    )
+    t, k = opt_res.x
+    w = 1.0
+    Q = _get_Q(pi, k, w, codon_lst, codon_table)
+    rhoS = rhoN = 0
+    for i, c1 in enumerate(codon_lst):
+        for j, c2 in enumerate(codon_lst):
+            if i != j:
+                try:
+                    if codon_table.forward_table[c1] == codon_table.forward_table[c2]:
+                        # synonymous count
+                        rhoS += pi[c1] * Q[i, j]
+                    else:
+                        # nonsynonymous count
+                        rhoN += pi[c1] * Q[i, j]
+                except KeyError:
+                    # This is probably due to stop codons
+                    pass
+    rhoS *= 3
+    rhoN *= 3
+    dN = Nd / rhoN
+    dS = Sd / rhoS
+    return dN, dS
+
+
+def _get_pi(seq1, seq2, cmethod, codon_table):
+    """Obtain codon frequency dict (pi) from two codon list (PRIVATE).
+
+    This function is designed for ML method. Available counting methods
+    (cfreq) are F1x4, F3x4 and F64.
+    """
+    # TODO:
+    # Stop codon should not be allowed according to Yang.
+    # Try to modify this!
+    pi = {}
+    if cmethod == "F1x4":
+        fcodon = {"A": 0, "G": 0, "C": 0, "T": 0}
+        for i in seq1 + seq2:
+            if i != "---":
+                for c in i:
+                    fcodon[c] += 1
+        tot = sum(fcodon.values())
+        fcodon = {j: k / tot for j, k in fcodon.items()}
+        for i in codon_table.forward_table.keys() + codon_table.stop_codons:
+            if "U" not in i:
+                pi[i] = fcodon[i[0]] * fcodon[i[1]] * fcodon[i[2]]
+    elif cmethod == "F3x4":
+        # three codon position
+        fcodon = [
+            {"A": 0, "G": 0, "C": 0, "T": 0},
+            {"A": 0, "G": 0, "C": 0, "T": 0},
+            {"A": 0, "G": 0, "C": 0, "T": 0},
+        ]
+        for i in seq1 + seq2:
+            if i != "---":
+                fcodon[0][i[0]] += 1
+                fcodon[1][i[1]] += 1
+                fcodon[2][i[2]] += 1
+        for i in range(3):
+            tot = sum(fcodon[i].values())
+            fcodon[i] = {j: k / tot for j, k in fcodon[i].items()}
+        for i in list(codon_table.forward_table.keys()) + codon_table.stop_codons:
+            if "U" not in i:
+                pi[i] = fcodon[0][i[0]] * fcodon[1][i[1]] * fcodon[2][i[2]]
+    elif cmethod == "F61":
+        for i in codon_table.forward_table.keys() + codon_table.stop_codons:
+            if "U" not in i:
+                pi[i] = 0.1
+        for i in seq1 + seq2:
+            if i != "---":
+                pi[i] += 1
+        tot = sum(pi.values())
+        pi = {j: k / tot for j, k in pi.items()}
+    return pi
+
+
+def _q(i, j, pi, k, w, codon_table):
+    """Q matrix for codon substitution (PRIVATE).
+
+    Arguments:
+     - i, j  : three letter codon string
+     - pi    : expected codon frequency
+     - k     : transition/transversion ratio
+     - w     : nonsynonymous/synonymous rate ratio
+     - codon_table: Bio.Data.CodonTable object
+
+    """
+    if i == j:
+        # diagonal elements is the sum of all other elements
+        return 0
+    if i in codon_table.stop_codons or j in codon_table.stop_codons:
+        return 0
+    if (i not in pi) or (j not in pi):
+        return 0
+    purine = ("A", "G")
+    pyrimidine = ("T", "C")
+    diff = []
+    for n, (c1, c2) in enumerate(zip(i, j)):
+        if c1 != c2:
+            diff.append((n, c1, c2))
+    if len(diff) >= 2:
+        return 0
+    if codon_table.forward_table[i] == codon_table.forward_table[j]:
+        # synonymous substitution
+        if diff[0][1] in purine and diff[0][2] in purine:
+            # transition
+            return k * pi[j]
+        elif diff[0][1] in pyrimidine and diff[0][2] in pyrimidine:
+            # transition
+            return k * pi[j]
+        else:
+            # transversion
+            return pi[j]
+    else:
+        # nonsynonymous substitution
+        if diff[0][1] in purine and diff[0][2] in purine:
+            # transition
+            return w * k * pi[j]
+        elif diff[0][1] in pyrimidine and diff[0][2] in pyrimidine:
+            # transition
+            return w * k * pi[j]
+        else:
+            # transversion
+            return w * pi[j]
+
+
+def _get_Q(pi, k, w, codon_lst, codon_table):
+    """Q matrix for codon substitution (PRIVATE)."""
+    import numpy as np
+
+    codon_num = len(codon_lst)
+    Q = np.zeros((codon_num, codon_num))
+    for i in range(codon_num):
+        for j in range(codon_num):
+            if i != j:
+                Q[i, j] = _q(
+                    codon_lst[i], codon_lst[j], pi, k, w, codon_table=codon_table
+                )
+    nucl_substitutions = 0
+    for i in range(codon_num):
+        Q[i, i] = -sum(Q[i, :])
+        try:
+            nucl_substitutions += pi[codon_lst[i]] * (-Q[i, i])
+        except KeyError:
+            pass
+    Q = Q / nucl_substitutions
+    return Q
+
+
+def _likelihood_func(t, k, w, pi, codon_cnt, codon_lst, codon_table):
+    """Likelihood function for ML method (PRIVATE)."""
+    from scipy.linalg import expm
+
+    Q = _get_Q(pi, k, w, codon_lst, codon_table)
+    P = expm(Q * t)
+    likelihood = 0
+    for i, c1 in enumerate(codon_lst):
+        for j, c2 in enumerate(codon_lst):
+            if (c1, c2) in codon_cnt:
+                if P[i, j] * pi[c1] <= 0:
+                    likelihood += codon_cnt[(c1, c2)] * 0
+                else:
+                    likelihood += codon_cnt[(c1, c2)] * log(pi[c1] * P[i, j])
+    return likelihood
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/cpairwise2.cp37-win_amd64.pyd b/code/lib/Bio/cpairwise2.cp37-win_amd64.pyd
new file mode 100644
index 0000000..6826be4
Binary files /dev/null and b/code/lib/Bio/cpairwise2.cp37-win_amd64.pyd differ
diff --git a/code/lib/Bio/cpairwise2module.c b/code/lib/Bio/cpairwise2module.c
new file mode 100644
index 0000000..af7843e
--- /dev/null
+++ b/code/lib/Bio/cpairwise2module.c
@@ -0,0 +1,479 @@
+/* Copyright 2002 by Jeffrey Chang.
+ * Copyright 2016, 2019 by Markus Piotrowski.
+ * All rights reserved.
+ *
+ * This file is part of the Biopython distribution and governed by your
+ * choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+ * Please see the LICENSE file that should have been included as part of this
+ * package.
+ *
+ * cpairwise2module.c
+ * Created 30 Sep 2001
+ *
+ * Optimized C routines that complement pairwise2.py.
+ */
+
+#include "Python.h"
+
+
+#define _PRECISION 1000
+#define rint(x) (int)((x)*_PRECISION+0.5)
+
+/* Functions in this module. */
+
+static double calc_affine_penalty(int length, double open, double extend,
+    int penalize_extend_when_opening)
+{
+    double penalty;
+
+    if(length <= 0)
+        return 0.0;
+    penalty = open + extend * length;
+    if(!penalize_extend_when_opening)
+        penalty -= extend;
+    return penalty;
+}
+
+static double _get_match_score(PyObject *py_sequenceA, PyObject *py_sequenceB,
+                               PyObject *py_match_fn, int i, int j,
+                               char *sequenceA, char *sequenceB,
+                               int use_sequence_cstring,
+                               double match, double mismatch,
+                               int use_match_mismatch_scores)
+{
+    PyObject *py_A=NULL, *py_B=NULL;
+    PyObject *py_arglist=NULL, *py_result=NULL;
+    double score = 0;
+
+    if(use_sequence_cstring && use_match_mismatch_scores) {
+        score = (sequenceA[i] == sequenceB[j]) ? match : mismatch;
+        return score;
+    }
+    /* Calculate the match score. */
+    if(!(py_A = PySequence_GetItem(py_sequenceA, i)))
+        goto _get_match_score_cleanup;
+    if(!(py_B = PySequence_GetItem(py_sequenceB, j)))
+        goto _get_match_score_cleanup;
+    if(!(py_arglist = Py_BuildValue("(OO)", py_A, py_B)))
+        goto _get_match_score_cleanup;
+
+    if(!(py_result = PyEval_CallObject(py_match_fn, py_arglist)))
+        goto _get_match_score_cleanup;
+    score = PyFloat_AsDouble(py_result);
+
+ _get_match_score_cleanup:
+    if(py_A) {
+        Py_DECREF(py_A);
+    }
+    if(py_B) {
+        Py_DECREF(py_B);
+    }
+    if(py_arglist) {
+        Py_DECREF(py_arglist);
+    }
+    if(py_result) {
+        Py_DECREF(py_result);
+    }
+    return score;
+}
+
+#if PY_MAJOR_VERSION >= 3
+static PyObject* _create_bytes_object(PyObject* o)
+{
+    PyObject* b;
+    if (PyBytes_Check(o)) {
+        return o;
+    }
+    if (!PyUnicode_Check(o)) {
+        return NULL;
+    }
+    b = PyUnicode_AsASCIIString(o);
+    if (!b) {
+        PyErr_Clear();
+        return NULL;
+    }
+    return b;
+}
+#endif
+
+/* This function is a more-or-less straightforward port of the
+ * equivalent function in pairwise2. Please see there for algorithm
+ * documentation.
+ */
+static PyObject *cpairwise2__make_score_matrix_fast(PyObject *self,
+                                                    PyObject *args)
+{
+    int i;
+    int row, col;
+    PyObject *py_sequenceA, *py_sequenceB, *py_match_fn;
+#if PY_MAJOR_VERSION >= 3
+    PyObject *py_bytesA, *py_bytesB;
+#endif
+    char *sequenceA=NULL, *sequenceB=NULL;
+    int use_sequence_cstring;
+    double open_A, extend_A, open_B, extend_B;
+    int penalize_extend_when_opening, penalize_end_gaps_A, penalize_end_gaps_B;
+    int align_globally, score_only;
+
+    PyObject *py_match=NULL, *py_mismatch=NULL;
+    double first_A_gap, first_B_gap;
+    double match, mismatch;
+    double score;
+    double best_score = 0;
+    double local_max_score = 0;
+    int use_match_mismatch_scores;
+    int lenA, lenB;
+    double *score_matrix = NULL;
+    unsigned char *trace_matrix = NULL;
+    PyObject *py_score_matrix=NULL, *py_trace_matrix=NULL;
+
+    double *col_cache_score = NULL;
+    PyObject *py_retval = NULL;
+
+    if(!PyArg_ParseTuple(args, "OOOddddi(ii)ii", &py_sequenceA, &py_sequenceB,
+                         &py_match_fn, &open_A, &extend_A, &open_B, &extend_B,
+                         &penalize_extend_when_opening,
+                         &penalize_end_gaps_A, &penalize_end_gaps_B,
+                         &align_globally, &score_only))
+        return NULL;
+    if(!PySequence_Check(py_sequenceA) || !PySequence_Check(py_sequenceB)) {
+        PyErr_SetString(PyExc_TypeError,
+                        "py_sequenceA and py_sequenceB should be sequences.");
+        return NULL;
+    }
+
+    /* Optimize for the common case. Check to see if py_sequenceA and
+       py_sequenceB are strings.  If they are, use the c string
+       representation. */
+#if PY_MAJOR_VERSION < 3
+    use_sequence_cstring = 0;
+    if(PyString_Check(py_sequenceA) && PyString_Check(py_sequenceB)) {
+        sequenceA = PyString_AS_STRING(py_sequenceA);
+        sequenceB = PyString_AS_STRING(py_sequenceB);
+        use_sequence_cstring = 1;
+    }
+#else
+    py_bytesA = _create_bytes_object(py_sequenceA);
+    py_bytesB = _create_bytes_object(py_sequenceB);
+    if (py_bytesA && py_bytesB) {
+        sequenceA = PyBytes_AS_STRING(py_bytesA);
+        sequenceB = PyBytes_AS_STRING(py_bytesB);
+        use_sequence_cstring = 1;
+    }
+    else {
+        Py_XDECREF(py_bytesA);
+        Py_XDECREF(py_bytesB);
+        use_sequence_cstring = 0;
+    }
+#endif
+
+    if(!PyCallable_Check(py_match_fn)) {
+        PyErr_SetString(PyExc_TypeError, "py_match_fn must be callable.");
+        return NULL;
+    }
+    /* Optimize for the common case. Check to see if py_match_fn is
+       an identity_match. If so, pull out the match and mismatch
+       member variables and calculate the scores myself. */
+    match = mismatch = 0;
+    use_match_mismatch_scores = 0;
+    if(!(py_match = PyObject_GetAttrString(py_match_fn, "match")))
+        goto cleanup_after_py_match_fn;
+    match = PyFloat_AsDouble(py_match);
+    if(match==-1.0 && PyErr_Occurred())
+        goto cleanup_after_py_match_fn;
+    if(!(py_mismatch = PyObject_GetAttrString(py_match_fn, "mismatch")))
+        goto cleanup_after_py_match_fn;
+    mismatch = PyFloat_AsDouble(py_mismatch);
+    if(mismatch==-1.0 && PyErr_Occurred())
+        goto cleanup_after_py_match_fn;
+    use_match_mismatch_scores = 1;
+
+ cleanup_after_py_match_fn:
+    if(PyErr_Occurred())
+        PyErr_Clear();
+    if(py_match) {
+        Py_DECREF(py_match);
+    }
+    if(py_mismatch) {
+        Py_DECREF(py_mismatch);
+    }
+    /* Cache some commonly used gap penalties */
+    first_A_gap = calc_affine_penalty(1, open_A, extend_A,
+                                      penalize_extend_when_opening);
+    first_B_gap = calc_affine_penalty(1, open_B, extend_B,
+                                      penalize_extend_when_opening);
+
+    /* Allocate matrices for storing the results and initialize first row and col. */
+    lenA = PySequence_Length(py_sequenceA);
+    lenB = PySequence_Length(py_sequenceB);
+    score_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*score_matrix));
+    if(!score_matrix) {
+        PyErr_SetString(PyExc_MemoryError, "Out of memory");
+        goto _cleanup_make_score_matrix_fast;
+    }
+    for(i=0; i<(lenB+1); i++)
+        score_matrix[i] = 0;
+    for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1))
+        score_matrix[i] = 0;
+    /* If we only want the score, we don't need the trace matrix. */
+    if (!score_only){
+        trace_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*trace_matrix));
+        if(!trace_matrix) {
+            PyErr_SetString(PyExc_MemoryError, "Out of memory");
+            goto _cleanup_make_score_matrix_fast;
+        }
+        for(i=0; i<(lenB+1); i++)
+            trace_matrix[i] = 0;
+        for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1))
+            trace_matrix[i] = 0;
+        }
+    else
+        trace_matrix = malloc(1);
+
+    /* Initialize the first row and col of the score matrix. */
+    for(i=0; i<=lenA; i++) {
+        if(penalize_end_gaps_B)
+            score = calc_affine_penalty(i, open_B, extend_B,
+                                        penalize_extend_when_opening);
+        else
+            score = 0;
+        score_matrix[i*(lenB+1)] = score;
+    }
+    for(i=0; i<=lenB; i++) {
+        if(penalize_end_gaps_A)
+            score = calc_affine_penalty(i, open_A, extend_A,
+                                        penalize_extend_when_opening);
+        else
+            score = 0;
+        score_matrix[i] = score;
+    }
+
+    /* Now initialize the col cache. */
+    col_cache_score = malloc((lenB+1)*sizeof(*col_cache_score));
+    memset((void *)col_cache_score, 0, (lenB+1)*sizeof(*col_cache_score));
+    for(i=0; i<=lenB; i++) {
+        col_cache_score[i] = calc_affine_penalty(i, (2*open_B), extend_B,
+                             penalize_extend_when_opening);
+    }
+
+    /* Fill in the score matrix. The row cache is calculated on the fly.*/
+    for(row=1; row<=lenA; row++) {
+        double row_cache_score = calc_affine_penalty(row, (2*open_A), extend_A,
+                                 penalize_extend_when_opening);
+        for(col=1; col<=lenB; col++) {
+            double match_score, nogap_score;
+            double row_open, row_extend, col_open, col_extend;
+            int best_score_rint, row_score_rint, col_score_rint;
+            unsigned char row_trace_score, col_trace_score, trace_score;
+
+            /* Calculate the best score. */
+            match_score = _get_match_score(py_sequenceA, py_sequenceB,
+                                           py_match_fn, row-1, col-1,
+                                           sequenceA, sequenceB,
+                                           use_sequence_cstring,
+                                           match, mismatch,
+                                           use_match_mismatch_scores);
+            if(match_score==-1.0 && PyErr_Occurred())
+                goto _cleanup_make_score_matrix_fast;
+            nogap_score = score_matrix[(row-1)*(lenB+1)+col-1] + match_score;
+
+            if (!penalize_end_gaps_A && row==lenA) {
+                row_open = score_matrix[(row)*(lenB+1)+col-1];
+                row_extend = row_cache_score;
+            }
+            else {
+                row_open = score_matrix[(row)*(lenB+1)+col-1] + first_A_gap;
+                row_extend = row_cache_score + extend_A;
+            }
+            row_cache_score = (row_open > row_extend) ? row_open : row_extend;
+
+            if (!penalize_end_gaps_B && col==lenB){
+                col_open = score_matrix[(row-1)*(lenB+1)+col];
+                col_extend = col_cache_score[col];
+            }
+            else {
+                col_open = score_matrix[(row-1)*(lenB+1)+col] + first_B_gap;
+                col_extend = col_cache_score[col] + extend_B;
+            }
+            col_cache_score[col] = (col_open > col_extend) ? col_open : col_extend;
+
+            best_score = (row_cache_score > col_cache_score[col]) ? row_cache_score : col_cache_score[col];
+            if(nogap_score > best_score)
+                best_score = nogap_score;
+
+            if (best_score > local_max_score)
+                local_max_score = best_score;
+
+            if(!align_globally && best_score < 0)
+                score_matrix[row*(lenB+1)+col] = 0;
+            else
+                score_matrix[row*(lenB+1)+col] = best_score;
+
+            if (!score_only) {
+                row_score_rint = rint(row_cache_score);
+                col_score_rint = rint(col_cache_score[col]);
+                row_trace_score = 0;
+                col_trace_score = 0;
+                if (rint(row_open) == row_score_rint)
+                    row_trace_score = row_trace_score|1;
+                if (rint(row_extend) == row_score_rint)
+                    row_trace_score = row_trace_score|8;
+                if (rint(col_open) == col_score_rint)
+                    col_trace_score = col_trace_score|4;
+                if (rint(col_extend) == col_score_rint)
+                    col_trace_score = col_trace_score|16;
+
+                trace_score = 0;
+                best_score_rint = rint(best_score);
+                if (rint(nogap_score) == best_score_rint)
+                    trace_score = trace_score|2;
+                if (row_score_rint == best_score_rint)
+                    trace_score += row_trace_score;
+                if (col_score_rint == best_score_rint)
+                    trace_score += col_trace_score;
+                trace_matrix[row*(lenB+1)+col] = trace_score;
+            }
+        }
+    }
+
+    if (!align_globally)
+        best_score = local_max_score;
+
+    /* Save the score and traceback matrices into real python objects. */
+	if(!score_only) {
+		if(!(py_score_matrix = PyList_New(lenA+1)))
+			goto _cleanup_make_score_matrix_fast;
+		if(!(py_trace_matrix = PyList_New(lenA+1)))
+			goto _cleanup_make_score_matrix_fast;
+
+		for(row=0; row<=lenA; row++) {
+			PyObject *py_score_row, *py_trace_row;
+			if(!(py_score_row = PyList_New(lenB+1)))
+				goto _cleanup_make_score_matrix_fast;
+			PyList_SET_ITEM(py_score_matrix, row, py_score_row);
+			if(!(py_trace_row = PyList_New(lenB+1)))
+				goto _cleanup_make_score_matrix_fast;
+			PyList_SET_ITEM(py_trace_matrix, row, py_trace_row);
+
+			for(col=0; col<=lenB; col++) {
+				PyObject *py_score, *py_trace;
+				int offset = row*(lenB+1) + col;
+
+				/* Set py_score_matrix[row][col] to the score. */
+				if(!(py_score = PyFloat_FromDouble(score_matrix[offset])))
+					goto _cleanup_make_score_matrix_fast;
+				PyList_SET_ITEM(py_score_row, col, py_score);
+
+				/* Set py_trace_matrix[row][col] to a list of indexes.  On
+				   the edges of the matrix (row or column is 0), the
+				   matrix should be [None]. */
+				if(!row || !col) {
+					if(!(py_trace = Py_BuildValue("B", 1)))
+						goto _cleanup_make_score_matrix_fast;
+					Py_INCREF(Py_None);
+					PyList_SET_ITEM(py_trace_row, col, Py_None);
+				}
+				else {
+					if(!(py_trace = Py_BuildValue("B", trace_matrix[offset])))
+						goto _cleanup_make_score_matrix_fast;
+					PyList_SET_ITEM(py_trace_row, col, py_trace);
+
+				}
+			}
+		}
+	}
+	else {
+		py_score_matrix = PyList_New(1);
+		py_trace_matrix = PyList_New(1);
+	}
+    py_retval = Py_BuildValue("(OOd)", py_score_matrix, py_trace_matrix, best_score);
+
+ _cleanup_make_score_matrix_fast:
+    if(score_matrix)
+        free(score_matrix);
+    if(trace_matrix)
+        free(trace_matrix);
+    if(col_cache_score)
+        free(col_cache_score);
+    if(py_score_matrix){
+        Py_DECREF(py_score_matrix);
+    }
+    if(py_trace_matrix){
+        Py_DECREF(py_trace_matrix);
+    }
+
+#if PY_MAJOR_VERSION >= 3
+    if (py_bytesA != NULL && py_bytesA != py_sequenceA) Py_DECREF(py_bytesA);
+    if (py_bytesB != NULL && py_bytesB != py_sequenceB) Py_DECREF(py_bytesB);
+#endif
+
+    return py_retval;
+}
+
+static PyObject *cpairwise2_rint(PyObject *self, PyObject *args,
+                                 PyObject *keywds)
+{
+    double x;
+    int precision = _PRECISION;
+    int rint_x;
+
+    static char *kwlist[] = {"x", "precision", NULL};
+
+    if(!PyArg_ParseTupleAndKeywords(args, keywds, "d|l", kwlist,
+                                    &x, &precision))
+        return NULL;
+    rint_x = (int)(x * precision + 0.5);
+#if PY_MAJOR_VERSION >= 3
+    return PyLong_FromLong((long)rint_x);
+#else
+    return PyInt_FromLong((long)rint_x);
+#endif
+}
+
+/* Module definition stuff */
+
+static PyMethodDef cpairwise2Methods[] = {
+    {"_make_score_matrix_fast",
+     (PyCFunction)cpairwise2__make_score_matrix_fast, METH_VARARGS, ""},
+    {"rint", (PyCFunction)cpairwise2_rint, METH_VARARGS|METH_KEYWORDS, ""},
+    {NULL, NULL, 0, NULL}
+};
+
+static char cpairwise2__doc__[] =
+"Optimized C routines that complement pairwise2.py. These are called from within pairwise2.py.\n\
+\n\
+";
+
+#if PY_MAJOR_VERSION >= 3
+
+static struct PyModuleDef moduledef = {
+        PyModuleDef_HEAD_INIT,
+        "cpairwise2",
+        cpairwise2__doc__,
+        -1,
+        cpairwise2Methods,
+        NULL,
+        NULL,
+        NULL,
+        NULL
+};
+
+PyObject *
+PyInit_cpairwise2(void)
+
+#else
+
+void
+/* for Windows: _declspec(dllexport) initcpairwise2(void) */
+initcpairwise2(void)
+#endif
+
+{
+#if PY_MAJOR_VERSION >= 3
+    PyObject* module = PyModule_Create(&moduledef);
+    if (module==NULL) return NULL;
+    return module;
+#else
+    (void) Py_InitModule3("cpairwise2", cpairwise2Methods, cpairwise2__doc__);
+#endif
+}
diff --git a/code/lib/Bio/kNN.py b/code/lib/Bio/kNN.py
new file mode 100644
index 0000000..b3e6260
--- /dev/null
+++ b/code/lib/Bio/kNN.py
@@ -0,0 +1,138 @@
+# Copyright 2002 by Jeffrey Chang.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Code for doing k-nearest-neighbors classification.
+
+k Nearest Neighbors is a supervised learning algorithm that classifies
+a new observation based the classes in its surrounding neighborhood.
+
+Glossary:
+ - distance   The distance between two points in the feature space.
+ - weight     The importance given to each point for classification.
+
+Classes:
+ - kNN           Holds information for a nearest neighbors classifier.
+
+
+Functions:
+ - train        Train a new kNN classifier.
+ - calculate    Calculate the probabilities of each class, given an observation.
+ - classify     Classify an observation into a class.
+
+Weighting Functions:
+ - equal_weight    Every example is given a weight of 1.
+
+"""
+
+import numpy
+
+
+class kNN:
+    """Holds information necessary to do nearest neighbors classification.
+
+    Attribues:
+     - classes  Set of the possible classes.
+     - xs       List of the neighbors.
+     - ys       List of the classes that the neighbors belong to.
+     - k        Number of neighbors to look at.
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.classes = set()
+        self.xs = []
+        self.ys = []
+        self.k = None
+
+
+def equal_weight(x, y):
+    """Return integer one (dummy method for equally weighting)."""
+    # everything gets 1 vote
+    return 1
+
+
+def train(xs, ys, k, typecode=None):
+    """Train a k nearest neighbors classifier on a training set.
+
+    xs is a list of observations and ys is a list of the class assignments.
+    Thus, xs and ys should contain the same number of elements.  k is
+    the number of neighbors that should be examined when doing the
+    classification.
+    """
+    knn = kNN()
+    knn.classes = set(ys)
+    knn.xs = numpy.asarray(xs, typecode)
+    knn.ys = ys
+    knn.k = k
+    return knn
+
+
+def calculate(knn, x, weight_fn=None, distance_fn=None):
+    """Calculate the probability for each class.
+
+    Arguments:
+     - x is the observed data.
+     - weight_fn is an optional function that takes x and a training
+       example, and returns a weight.
+     - distance_fn is an optional function that takes two points and
+       returns the distance between them.  If distance_fn is None (the
+       default), the Euclidean distance is used.
+
+    Returns a dictionary of the class to the weight given to the class.
+    """
+    if weight_fn is None:
+        weight_fn = equal_weight
+
+    x = numpy.asarray(x)
+
+    order = []  # list of (distance, index)
+    if distance_fn:
+        for i in range(len(knn.xs)):
+            dist = distance_fn(x, knn.xs[i])
+            order.append((dist, i))
+    else:
+        # Default: Use a fast implementation of the Euclidean distance
+        temp = numpy.zeros(len(x))
+        # Predefining temp allows reuse of this array, making this
+        # function about twice as fast.
+        for i in range(len(knn.xs)):
+            temp[:] = x - knn.xs[i]
+            dist = numpy.sqrt(numpy.dot(temp, temp))
+            order.append((dist, i))
+    order.sort()
+
+    # first 'k' are the ones I want.
+    weights = {}  # class -> number of votes
+    for k in knn.classes:
+        weights[k] = 0.0
+    for dist, i in order[: knn.k]:
+        klass = knn.ys[i]
+        weights[klass] = weights[klass] + weight_fn(x, knn.xs[i])
+
+    return weights
+
+
+def classify(knn, x, weight_fn=None, distance_fn=None):
+    """Classify an observation into a class.
+
+    If not specified, weight_fn will give all neighbors equal weight.
+    distance_fn is an optional function that takes two points and returns
+    the distance between them.  If distance_fn is None (the default),
+    the Euclidean distance is used.
+    """
+    if weight_fn is None:
+        weight_fn = equal_weight
+
+    weights = calculate(knn, x, weight_fn=weight_fn, distance_fn=distance_fn)
+
+    most_class = None
+    most_weight = None
+    for klass, weight in weights.items():
+        if most_class is None or weight > most_weight:
+            most_class = klass
+            most_weight = weight
+    return most_class
diff --git a/code/lib/Bio/motifs/__init__.py b/code/lib/Bio/motifs/__init__.py
new file mode 100644
index 0000000..1dad775
--- /dev/null
+++ b/code/lib/Bio/motifs/__init__.py
@@ -0,0 +1,610 @@
+# Copyright 2003-2009 by Bartek Wilczynski.  All rights reserved.
+# Copyright 2012-2013 by Michiel JL de Hoon.  All rights reserved.
+# Revisions copyright 2019 by Victor Lin.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Tools for sequence motif analysis.
+
+Bio.motifs contains the core Motif class containing various I/O methods
+as well as methods for motif comparisons and motif searching in sequences.
+It also includes functionality for parsing output from the AlignACE, MEME,
+and MAST programs, as well as files in the TRANSFAC format.
+"""
+
+import warnings
+
+from urllib.parse import urlencode
+from urllib.request import urlopen, Request
+
+
+def create(instances, alphabet="ACGT"):
+    """Create a Motif object."""
+    instances = Instances(instances, alphabet)
+    return Motif(instances=instances, alphabet=alphabet)
+
+
+def parse(handle, fmt, strict=True):
+    """Parse an output file from a motif finding program.
+
+    Currently supported formats (case is ignored):
+     - AlignAce:         AlignAce output file format
+     - ClusterBuster:    Cluster Buster position frequency matrix format
+     - XMS:              XMS matrix format
+     - MEME:             MEME output file motif
+     - MINIMAL:          MINIMAL MEME output file motif
+     - MAST:             MAST output file motif
+     - TRANSFAC:         TRANSFAC database file format
+     - pfm-four-columns: Generic position-frequency matrix format with four columns. (cisbp, homer, hocomoco, neph, tiffin)
+     - pfm-four-rows:    Generic position-frequency matrix format with four row. (scertf, yetfasco, hdpi, idmmpmm, flyfactor survey)
+     - pfm:              JASPAR-style position-frequency matrix
+     - jaspar:           JASPAR-style multiple PFM format
+     - sites:            JASPAR-style sites file
+
+    As files in the pfm and sites formats contain only a single motif,
+    it is easier to use Bio.motifs.read() instead of Bio.motifs.parse()
+    for those.
+
+    For example:
+
+    >>> from Bio import motifs
+    >>> with open("motifs/alignace.out") as handle:
+    ...     for m in motifs.parse(handle, "AlignAce"):
+    ...         print(m.consensus)
+    ...
+    TCTACGATTGAG
+    CTGCACCTAGCTACGAGTGAG
+    GTGCCCTAAGCATACTAGGCG
+    GCCACTAGCAGAGCAGGGGGC
+    CGACTCAGAGGTT
+    CCACGCTAAGAGAAGTGCCGGAG
+    GCACGTCCCTGAGCA
+    GTCCATCGCAAAGCGTGGGGC
+    GAGATCAGAGGGCCG
+    TGGACGCGGGG
+    GACCAGAGCCTCGCATGGGGG
+    AGCGCGCGTG
+    GCCGGTTGCTGTTCATTAGG
+    ACCGACGGCAGCTAAAAGGG
+    GACGCCGGGGAT
+    CGACTCGCGCTTACAAGG
+
+    If strict is True (default), the parser will raise a ValueError if the
+    file contents does not strictly comply with the specified file format.
+    """
+    fmt = fmt.lower()
+    if fmt == "alignace":
+        from Bio.motifs import alignace
+
+        return alignace.read(handle)
+    elif fmt == "meme":
+        from Bio.motifs import meme
+
+        return meme.read(handle)
+    elif fmt == "minimal":
+        from Bio.motifs import minimal
+
+        return minimal.read(handle)
+    elif fmt == "clusterbuster":
+        from Bio.motifs import clusterbuster
+
+        return clusterbuster.read(handle)
+    elif fmt in ("pfm-four-columns", "pfm-four-rows"):
+        from Bio.motifs import pfm
+
+        return pfm.read(handle, fmt)
+    elif fmt == "xms":
+        from Bio.motifs import xms
+
+        return xms.read(handle)
+    elif fmt == "mast":
+        from Bio.motifs import mast
+
+        return mast.read(handle)
+    elif fmt == "transfac":
+        from Bio.motifs import transfac
+
+        return transfac.read(handle, strict)
+    elif fmt in ("pfm", "sites", "jaspar"):
+        from Bio.motifs import jaspar
+
+        return jaspar.read(handle, fmt)
+    else:
+        raise ValueError("Unknown format %s" % fmt)
+
+
+def read(handle, fmt, strict=True):
+    """Read a motif from a handle using the specified file-format.
+
+    This supports the same formats as Bio.motifs.parse(), but
+    only for files containing exactly one motif.  For example,
+    reading a JASPAR-style pfm file:
+
+    >>> from Bio import motifs
+    >>> with open("motifs/SRF.pfm") as handle:
+    ...     m = motifs.read(handle, "pfm")
+    >>> m.consensus
+    Seq('GCCCATATATGG')
+
+    Or a single-motif MEME file,
+
+    >>> from Bio import motifs
+    >>> with open("motifs/meme.psp_test.classic.zoops.xml") as handle:
+    ...     m = motifs.read(handle, "meme")
+    >>> m.consensus
+    Seq('GCTTATGTAA')
+
+    If the handle contains no records, or more than one record,
+    an exception is raised:
+
+    >>> from Bio import motifs
+    >>> with open("motifs/alignace.out") as handle:
+    ...     motif = motifs.read(handle, "AlignAce")
+    Traceback (most recent call last):
+        ...
+    ValueError: More than one motif found in handle
+
+    If however you want the first motif from a file containing
+    multiple motifs this function would raise an exception (as
+    shown in the example above).  Instead use:
+
+    >>> from Bio import motifs
+    >>> with open("motifs/alignace.out") as handle:
+    ...     record = motifs.parse(handle, "alignace")
+    >>> motif = record[0]
+    >>> motif.consensus
+    Seq('TCTACGATTGAG')
+
+    Use the Bio.motifs.parse(handle, fmt) function if you want
+    to read multiple records from the handle.
+
+    If strict is True (default), the parser will raise a ValueError if the
+    file contents does not strictly comply with the specified file format.
+    """
+    fmt = fmt.lower()
+    motifs = parse(handle, fmt, strict)
+    if len(motifs) == 0:
+        raise ValueError("No motifs found in handle")
+    if len(motifs) > 1:
+        raise ValueError("More than one motif found in handle")
+    motif = motifs[0]
+    return motif
+
+
+class Instances(list):
+    """Class containing a list of sequences that made the motifs."""
+
+    def __init__(self, instances=None, alphabet="ACGT"):
+        """Initialize the class."""
+        from Bio.Seq import Seq
+
+        length = None
+        if instances is not None:
+            sequences = []
+            for instance in instances:
+                if length is None:
+                    length = len(instance)
+                elif length != len(instance):
+                    message = (
+                        "All instances should have the same length (%d found, %d expected)"
+                        % (len(instance), length)
+                    )
+                    raise ValueError(message)
+                if not isinstance(instance, Seq):
+                    instance = Seq(str(instance))
+                sequences.append(instance)
+            # no errors were raised; store the instances:
+            self.extend(sequences)
+        self.length = length
+        self.alphabet = alphabet
+
+    def __str__(self):
+        """Return a string containing the sequences of the motif."""
+        text = ""
+        for instance in self:
+            text += str(instance) + "\n"
+        return text
+
+    def count(self):
+        """Count nucleotides in a position."""
+        counts = {}
+        for letter in self.alphabet:
+            counts[letter] = [0] * self.length
+        for instance in self:
+            for position, letter in enumerate(instance):
+                counts[letter][position] += 1
+        return counts
+
+    def search(self, sequence):
+        """Find positions of motifs in a given sequence.
+
+        This is a generator function, returning found positions of motif
+        instances in a given sequence.
+        """
+        for pos in range(0, len(sequence) - self.length + 1):
+            for instance in self:
+                if instance == sequence[pos : pos + self.length]:
+                    yield (pos, instance)
+                    break  # no other instance will fit (we don't want to return multiple hits)
+
+    def reverse_complement(self):
+        """Compute reverse complement of sequences."""
+        instances = Instances(alphabet=self.alphabet)
+        instances.length = self.length
+        for instance in self:
+            instance = instance.reverse_complement()
+            instances.append(instance)
+        return instances
+
+
+class Motif:
+    """A class representing sequence motifs."""
+
+    def __init__(self, alphabet="ACGT", instances=None, counts=None):
+        """Initialize the class."""
+        from . import matrix
+
+        self.name = ""
+        if counts is not None and instances is not None:
+            raise Exception(
+                ValueError, "Specify either instances or counts, don't specify both"
+            )
+        elif counts is not None:
+            self.instances = None
+            self.counts = matrix.FrequencyPositionMatrix(alphabet, counts)
+            self.length = self.counts.length
+        elif instances is not None:
+            self.instances = instances
+            alphabet = self.instances.alphabet
+            counts = self.instances.count()
+            self.counts = matrix.FrequencyPositionMatrix(alphabet, counts)
+            self.length = self.counts.length
+        else:
+            self.counts = None
+            self.instances = None
+            self.length = None
+        self.alphabet = alphabet
+        self.pseudocounts = None
+        self.background = None
+        self.mask = None
+
+    def __get_mask(self):
+        return self.__mask
+
+    def __set_mask(self, mask):
+        if self.length is None:
+            self.__mask = ()
+        elif mask is None:
+            self.__mask = (1,) * self.length
+        elif len(mask) != self.length:
+            raise ValueError(
+                "The length (%d) of the mask is inconsistent with the length (%d) of the motif",
+                (len(mask), self.length),
+            )
+        elif isinstance(mask, str):
+            self.__mask = []
+            for char in mask:
+                if char == "*":
+                    self.__mask.append(1)
+                elif char == " ":
+                    self.__mask.append(0)
+                else:
+                    raise ValueError(
+                        "Mask should contain only '*' or ' ' and not a '%s'" % char
+                    )
+            self.__mask = tuple(self.__mask)
+        else:
+            self.__mask = tuple(int(bool(c)) for c in mask)
+
+    mask = property(__get_mask, __set_mask)
+    del __get_mask
+    del __set_mask
+
+    def __get_pseudocounts(self):
+        return self._pseudocounts
+
+    def __set_pseudocounts(self, value):
+        self._pseudocounts = {}
+        if isinstance(value, dict):
+            self._pseudocounts = {letter: value[letter] for letter in self.alphabet}
+        else:
+            if value is None:
+                value = 0.0
+            self._pseudocounts = dict.fromkeys(self.alphabet, value)
+
+    pseudocounts = property(__get_pseudocounts, __set_pseudocounts)
+    del __get_pseudocounts
+    del __set_pseudocounts
+
+    def __get_background(self):
+        return self._background
+
+    def __set_background(self, value):
+        if isinstance(value, dict):
+            self._background = {letter: value[letter] for letter in self.alphabet}
+        elif value is None:
+            self._background = dict.fromkeys(self.alphabet, 1.0)
+        else:
+            if sorted(self.alphabet) != ["A", "C", "G", "T"]:
+                raise ValueError(
+                    "Setting the background to a single value only works for DNA motifs"
+                    " (in which case the value is interpreted as the GC content)"
+                )
+            self._background["A"] = (1.0 - value) / 2.0
+            self._background["C"] = value / 2.0
+            self._background["G"] = value / 2.0
+            self._background["T"] = (1.0 - value) / 2.0
+        total = sum(self._background.values())
+        for letter in self.alphabet:
+            self._background[letter] /= total
+
+    background = property(__get_background, __set_background)
+    del __get_background
+    del __set_background
+
+    @property
+    def pwm(self):
+        """Compute position weight matrices."""
+        return self.counts.normalize(self._pseudocounts)
+
+    @property
+    def pssm(self):
+        """Compute position specific scoring matrices."""
+        return self.pwm.log_odds(self._background)
+
+    def __str__(self, masked=False):
+        """Return string representation of a motif."""
+        text = ""
+        if self.instances is not None:
+            text += str(self.instances)
+
+        if masked:
+            for i in range(self.length):
+                if self.__mask[i]:
+                    text += "*"
+                else:
+                    text += " "
+            text += "\n"
+        return text
+
+    def __len__(self):
+        """Return the length of a motif.
+
+        Please use this method (i.e. invoke len(m)) instead of referring to m.length directly.
+        """
+        if self.length is None:
+            return 0
+        else:
+            return self.length
+
+    def reverse_complement(self):
+        """Return the reverse complement of the motif as a new motif."""
+        alphabet = self.alphabet
+        if self.instances is not None:
+            instances = self.instances.reverse_complement()
+            res = Motif(alphabet=alphabet, instances=instances)
+        else:  # has counts
+            counts = {
+                "A": self.counts["T"][::-1],
+                "C": self.counts["G"][::-1],
+                "G": self.counts["C"][::-1],
+                "T": self.counts["A"][::-1],
+            }
+            res = Motif(alphabet=alphabet, counts=counts)
+        res.__mask = self.__mask[::-1]
+        res.background = {
+            "A": self.background["T"],
+            "C": self.background["G"],
+            "G": self.background["C"],
+            "T": self.background["A"],
+        }
+        res.pseudocounts = {
+            "A": self.pseudocounts["T"],
+            "C": self.pseudocounts["G"],
+            "G": self.pseudocounts["C"],
+            "T": self.pseudocounts["A"],
+        }
+        return res
+
+    @property
+    def consensus(self):
+        """Return the consensus sequence."""
+        return self.counts.consensus
+
+    @property
+    def anticonsensus(self):
+        """Return the least probable pattern to be generated from this motif."""
+        return self.counts.anticonsensus
+
+    @property
+    def degenerate_consensus(self):
+        """Return the degenerate consensus sequence.
+
+        Following the rules adapted from
+        D. R. Cavener: "Comparison of the consensus sequence flanking
+        translational start sites in Drosophila and vertebrates."
+        Nucleic Acids Research 15(4): 1353-1361. (1987).
+
+        The same rules are used by TRANSFAC.
+        """
+        return self.counts.degenerate_consensus
+
+    def weblogo(self, fname, fmt="PNG", version="2.8.2", **kwds):
+        """Download and save a weblogo using the Berkeley weblogo service.
+
+        Requires an internet connection.
+
+        The parameters from ``**kwds`` are passed directly to the weblogo server.
+
+        Currently, this method uses WebLogo version 3.3.
+        These are the arguments and their default values passed to
+        WebLogo 3.3; see their website at http://weblogo.threeplusone.com
+        for more information::
+
+            'stack_width' : 'medium',
+            'stacks_per_line' : '40',
+            'alphabet' : 'alphabet_dna',
+            'ignore_lower_case' : True,
+            'unit_name' : "bits",
+            'first_index' : '1',
+            'logo_start' : '1',
+            'logo_end': str(self.length),
+            'composition' : "comp_auto",
+            'percentCG' : '',
+            'scale_width' : True,
+            'show_errorbars' : True,
+            'logo_title' : '',
+            'logo_label' : '',
+            'show_xaxis': True,
+            'xaxis_label': '',
+            'show_yaxis': True,
+            'yaxis_label': '',
+            'yaxis_scale': 'auto',
+            'yaxis_tic_interval' : '1.0',
+            'show_ends' : True,
+            'show_fineprint' : True,
+            'color_scheme': 'color_auto',
+            'symbols0': '',
+            'symbols1': '',
+            'symbols2': '',
+            'symbols3': '',
+            'symbols4': '',
+            'color0': '',
+            'color1': '',
+            'color2': '',
+            'color3': '',
+            'color4': '',
+
+        """
+        if set(self.alphabet) == set("ACDEFGHIKLMNPQRSTVWY"):
+            alpha = "alphabet_protein"
+        elif set(self.alphabet) == set("ACGU"):
+            alpha = "alphabet_rna"
+        elif set(self.alphabet) == set("ACGT"):
+            alpha = "alphabet_dna"
+        else:
+            alpha = "auto"
+
+        frequencies = format(self, "transfac")
+        url = "http://weblogo.threeplusone.com/create.cgi"
+        values = {
+            "sequences": frequencies,
+            "format": fmt.lower(),
+            "stack_width": "medium",
+            "stacks_per_line": "40",
+            "alphabet": alpha,
+            "ignore_lower_case": True,
+            "unit_name": "bits",
+            "first_index": "1",
+            "logo_start": "1",
+            "logo_end": str(self.length),
+            "composition": "comp_auto",
+            "percentCG": "",
+            "scale_width": True,
+            "show_errorbars": True,
+            "logo_title": "",
+            "logo_label": "",
+            "show_xaxis": True,
+            "xaxis_label": "",
+            "show_yaxis": True,
+            "yaxis_label": "",
+            "yaxis_scale": "auto",
+            "yaxis_tic_interval": "1.0",
+            "show_ends": True,
+            "show_fineprint": True,
+            "color_scheme": "color_auto",
+            "symbols0": "",
+            "symbols1": "",
+            "symbols2": "",
+            "symbols3": "",
+            "symbols4": "",
+            "color0": "",
+            "color1": "",
+            "color2": "",
+            "color3": "",
+            "color4": "",
+        }
+
+        values.update({k: "" if v is False else str(v) for k, v in kwds.items()})
+        data = urlencode(values).encode("utf-8")
+        req = Request(url, data)
+        response = urlopen(req)
+        with open(fname, "wb") as f:
+            im = response.read()
+            f.write(im)
+
+    def __format__(self, format_spec):
+        """Return a string representation of the Motif in the given format.
+
+        Currently supported formats:
+         - clusterbuster: Cluster Buster position frequency matrix format
+         - pfm : JASPAR single Position Frequency Matrix
+         - jaspar : JASPAR multiple Position Frequency Matrix
+         - transfac : TRANSFAC like files
+
+        """
+        return self.format(format_spec)
+
+    def format(self, format_spec):
+        """Return a string representation of the Motif in the given format.
+
+        Currently supported formats:
+         - clusterbuster: Cluster Buster position frequency matrix format
+         - pfm : JASPAR single Position Frequency Matrix
+         - jaspar : JASPAR multiple Position Frequency Matrix
+         - transfac : TRANSFAC like files
+
+        """
+        if format_spec in ("pfm", "jaspar"):
+            from Bio.motifs import jaspar
+
+            motifs = [self]
+            return jaspar.write(motifs, format_spec)
+        elif format_spec == "transfac":
+            from Bio.motifs import transfac
+
+            motifs = [self]
+            return transfac.write(motifs)
+        elif format_spec == "clusterbuster":
+            from Bio.motifs import clusterbuster
+
+            motifs = [self]
+            return clusterbuster.write(motifs)
+        else:
+            raise ValueError("Unknown format type %s" % format_spec)
+
+
+def write(motifs, fmt):
+    """Return a string representation of motifs in the given format.
+
+    Currently supported formats (case is ignored):
+     - clusterbuster: Cluster Buster position frequency matrix format
+     - pfm : JASPAR simple single Position Frequency Matrix
+     - jaspar : JASPAR multiple PFM format
+     - transfac : TRANSFAC like files
+
+    """
+    fmt = fmt.lower()
+    if fmt in ("pfm", "jaspar"):
+        from Bio.motifs import jaspar
+
+        return jaspar.write(motifs, fmt)
+    elif fmt == "transfac":
+        from Bio.motifs import transfac
+
+        return transfac.write(motifs)
+    elif fmt == "clusterbuster":
+        from Bio.motifs import clusterbuster
+
+        return clusterbuster.write(motifs)
+    else:
+        raise ValueError("Unknown format type %s" % fmt)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/motifs/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/motifs/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..e06607d
Binary files /dev/null and b/code/lib/Bio/motifs/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/__pycache__/alignace.cpython-37.pyc b/code/lib/Bio/motifs/__pycache__/alignace.cpython-37.pyc
new file mode 100644
index 0000000..71c1800
Binary files /dev/null and b/code/lib/Bio/motifs/__pycache__/alignace.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/__pycache__/clusterbuster.cpython-37.pyc b/code/lib/Bio/motifs/__pycache__/clusterbuster.cpython-37.pyc
new file mode 100644
index 0000000..6d0cd44
Binary files /dev/null and b/code/lib/Bio/motifs/__pycache__/clusterbuster.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/__pycache__/mast.cpython-37.pyc b/code/lib/Bio/motifs/__pycache__/mast.cpython-37.pyc
new file mode 100644
index 0000000..4b661e5
Binary files /dev/null and b/code/lib/Bio/motifs/__pycache__/mast.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/__pycache__/matrix.cpython-37.pyc b/code/lib/Bio/motifs/__pycache__/matrix.cpython-37.pyc
new file mode 100644
index 0000000..71ef1d4
Binary files /dev/null and b/code/lib/Bio/motifs/__pycache__/matrix.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/__pycache__/meme.cpython-37.pyc b/code/lib/Bio/motifs/__pycache__/meme.cpython-37.pyc
new file mode 100644
index 0000000..40aae17
Binary files /dev/null and b/code/lib/Bio/motifs/__pycache__/meme.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/__pycache__/minimal.cpython-37.pyc b/code/lib/Bio/motifs/__pycache__/minimal.cpython-37.pyc
new file mode 100644
index 0000000..b85407c
Binary files /dev/null and b/code/lib/Bio/motifs/__pycache__/minimal.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/__pycache__/pfm.cpython-37.pyc b/code/lib/Bio/motifs/__pycache__/pfm.cpython-37.pyc
new file mode 100644
index 0000000..868de47
Binary files /dev/null and b/code/lib/Bio/motifs/__pycache__/pfm.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/__pycache__/thresholds.cpython-37.pyc b/code/lib/Bio/motifs/__pycache__/thresholds.cpython-37.pyc
new file mode 100644
index 0000000..f4dbf69
Binary files /dev/null and b/code/lib/Bio/motifs/__pycache__/thresholds.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/__pycache__/transfac.cpython-37.pyc b/code/lib/Bio/motifs/__pycache__/transfac.cpython-37.pyc
new file mode 100644
index 0000000..d9a3fc9
Binary files /dev/null and b/code/lib/Bio/motifs/__pycache__/transfac.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/__pycache__/xms.cpython-37.pyc b/code/lib/Bio/motifs/__pycache__/xms.cpython-37.pyc
new file mode 100644
index 0000000..c680baa
Binary files /dev/null and b/code/lib/Bio/motifs/__pycache__/xms.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/_pwm.c b/code/lib/Bio/motifs/_pwm.c
new file mode 100644
index 0000000..e29123b
--- /dev/null
+++ b/code/lib/Bio/motifs/_pwm.c
@@ -0,0 +1,216 @@
+/* Copyright 2009-2020 by Michiel de Hoon.  All rights reserved.
+ *
+ * This file is part of the Biopython distribution and governed by your
+ * choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+ * Please see the LICENSE file that should have been included as part of this
+ * package.
+ */
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <math.h>
+
+
+static void
+calculate(const char sequence[], int s, Py_ssize_t m, double* matrix,
+          Py_ssize_t n, float* scores)
+{
+    Py_ssize_t i, j;
+    char c;
+    double score;
+    int ok;
+    float* p = scores;
+#ifndef NAN
+    float NAN = 0.0;
+    NAN /= NAN;
+#endif
+
+    for (i = 0; i < n; i++)
+    {
+        score = 0.0;
+        ok = 1;
+        for (j = 0; j < m; j++)
+        {
+            c = sequence[i+j];
+            switch (c)
+            {
+              /* Handling mixed case input here rather than converting it to
+                 uppercase in Python code first, since doing so could use too
+                 much memory if sequence is too long (e.g. chromosome or
+                 plasmid). */
+                case 'A':
+                case 'a':
+                    score += matrix[j*4+0]; break;
+                case 'C':
+                case 'c':
+                    score += matrix[j*4+1]; break;
+                case 'G':
+                case 'g':
+                    score += matrix[j*4+2]; break;
+                case 'T':
+                case 't':
+                    score += matrix[j*4+3]; break;
+                default:
+                    ok = 0;
+            }
+        }
+        if (ok) *p = (float)score;
+        else *p = NAN;
+        p++;
+    }
+}
+
+static int
+matrix_converter(PyObject* object, void* address)
+{
+    const int flags = PyBUF_C_CONTIGUOUS | PyBUF_FORMAT;
+    char datatype;
+    Py_buffer* view = address;
+
+    if (object == NULL) goto exit;
+    if (PyObject_GetBuffer(object, view, flags) == -1) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "position-weight matrix is not an array");
+        return 0;
+    }
+    datatype = view->format[0];
+    switch (datatype) {
+        case '@':
+        case '=':
+        case '<':
+        case '>':
+        case '!': datatype = view->format[1]; break;
+        default: break;
+    }
+    if (datatype != 'd') {
+        PyErr_Format(PyExc_RuntimeError,
+            "position-weight matrix data format incorrect "
+            "('%c', expected 'd')", datatype);
+        goto exit;
+    }
+    if (view->ndim != 2) {
+        PyErr_Format(PyExc_RuntimeError,
+            "position-weight matrix has incorrect rank (%d expected 2)",
+            view->ndim);
+        goto exit;
+    }
+    if (view->shape[1] != 4) {
+        PyErr_Format(PyExc_RuntimeError,
+            "position-weight matrix should have four columns "
+            "(%zd columns found)", view->shape[1]);
+        goto exit;
+    }
+    return Py_CLEANUP_SUPPORTED;
+
+exit:
+    PyBuffer_Release(view);
+    return 0;
+}
+
+static int
+scores_converter(PyObject* object, void* address)
+{
+    const int flags = PyBUF_C_CONTIGUOUS | PyBUF_FORMAT;
+    char datatype;
+    Py_buffer* view = address;
+
+    if (object == NULL) goto exit;
+    if (PyObject_GetBuffer(object, view, flags) == -1) return 0;
+    datatype = view->format[0];
+    switch (datatype) {
+        case '@':
+        case '=':
+        case '<':
+        case '>':
+        case '!': datatype = view->format[1]; break;
+        default: break;
+    }
+    if (datatype != 'f') {
+        PyErr_Format(PyExc_RuntimeError,
+            "scores array has incorrect data format ('%c', expected 'f')",
+            datatype);
+        goto exit;
+    }
+    if (view->ndim != 1) {
+        PyErr_Format(PyExc_ValueError,
+            "scores array has incorrect rank (%d expected 1)",
+            view->ndim);
+        goto exit;
+    }
+    return Py_CLEANUP_SUPPORTED;
+
+exit:
+    PyBuffer_Release(view);
+    return 0;
+}
+
+static char calculate__doc__[] =
+"    calculate(sequence, pwm, scores)\n"
+"\n"
+"This function calculates the position-weight matrix scores for all\n"
+"positions along the sequence for position-weight matrix pwm, and stores\n"
+"them in the provided numpy array scores.\n";
+
+static PyObject*
+py_calculate(PyObject* self, PyObject* args, PyObject* keywords)
+{
+    const char* sequence;
+    static char* kwlist[] = {"sequence", "matrix", "scores", NULL};
+    Py_ssize_t m;
+    Py_ssize_t n;
+    Py_ssize_t s;
+    PyObject* result = NULL;
+    Py_buffer scores;
+    Py_buffer matrix;
+
+    matrix.obj = NULL;
+    scores.obj = NULL;
+    if (!PyArg_ParseTupleAndKeywords(args, keywords, "y#O&O&", kwlist,
+                                     &sequence, &s,
+                                     matrix_converter, &matrix,
+                                     scores_converter, &scores)) return NULL;
+    m = matrix.shape[0];
+    n = scores.shape[0];
+    if (n == s - m + 1) {
+        calculate(sequence, s, m, matrix.buf, n, scores.buf);
+        Py_INCREF(Py_None);
+        result = Py_None;
+    }
+    else {
+        PyErr_Format(PyExc_RuntimeError,
+                    "size of scores array is inconsistent "
+                    "(sequence length is %zd, "
+                    "motif length is %zd, scores length is %zd", s, m, n);
+    }
+
+    matrix_converter(NULL, &matrix);
+    scores_converter(NULL, &scores);
+    return result;
+}
+
+static struct PyMethodDef methods[] = {
+   {"calculate",
+    (PyCFunction)py_calculate,
+    METH_VARARGS | METH_KEYWORDS,
+    PyDoc_STR(calculate__doc__),
+   },
+   {NULL, NULL, 0, NULL} // sentinel
+};
+
+static struct PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "_pwm",
+    PyDoc_STR("Fast calculations involving position-weight matrices"),
+    -1,
+    methods,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+
+PyObject*
+PyInit__pwm(void)
+{
+    return PyModule_Create(&moduledef);
+}
diff --git a/code/lib/Bio/motifs/_pwm.cp37-win_amd64.pyd b/code/lib/Bio/motifs/_pwm.cp37-win_amd64.pyd
new file mode 100644
index 0000000..89817dc
Binary files /dev/null and b/code/lib/Bio/motifs/_pwm.cp37-win_amd64.pyd differ
diff --git a/code/lib/Bio/motifs/alignace.py b/code/lib/Bio/motifs/alignace.py
new file mode 100644
index 0000000..42d5e0a
--- /dev/null
+++ b/code/lib/Bio/motifs/alignace.py
@@ -0,0 +1,67 @@
+# Copyright 2003 by Bartek Wilczynski.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Parsing AlignACE output files."""
+
+from Bio.motifs import Motif, Instances
+from Bio.Seq import Seq
+
+
+class Record(list):
+    """AlignACE record (subclass of Python list)."""
+
+    def __init__(self):
+        """Initialize the class."""
+        self.parameters = None
+
+
+def read(handle):
+    """Parse an AlignACE format handle as a Record object."""
+    record = Record()
+    line = next(handle)
+    record.version = line.strip()
+    line = next(handle)
+    record.command = line.strip()
+    mask = None
+    number = None
+    for line in handle:
+        line = line.strip()
+        if line == "":
+            pass
+        elif line[:4] == "Para":
+            record.parameters = {}
+        elif line[0] == "#":
+            seq_name = line.split("\t")[1]
+            record.sequences.append(seq_name)
+        elif "=" in line:
+            par_name, par_value = line.split("=")
+            par_name = par_name.strip()
+            par_value = par_value.strip()
+            record.parameters[par_name] = par_value
+        elif line[:5] == "Input":
+            record.sequences = []
+        elif line[:5] == "Motif":
+            words = line.split()
+            assert words[0] == "Motif"
+            number = int(words[1])
+            instances = []
+        elif line[:3] == "MAP":
+            alphabet = "ACGT"
+            instances = Instances(instances, alphabet)
+            motif = Motif(alphabet, instances)
+            motif.score = float(line.split()[-1])
+            motif.number = number
+            motif.mask = mask
+            record.append(motif)
+        elif len(line.split("\t")) == 4:
+            seq = Seq(line.split("\t")[0])
+            instances.append(seq)
+        elif "*" in line:
+            mask = line.strip("\r\n")
+        else:
+            raise ValueError(line)
+    return record
diff --git a/code/lib/Bio/motifs/applications/__init__.py b/code/lib/Bio/motifs/applications/__init__.py
new file mode 100644
index 0000000..0832554
--- /dev/null
+++ b/code/lib/Bio/motifs/applications/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2009 by Bartek Wilczynski.  All rights reserved.
+# Revisions copyright 2009 by Peter Cock.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Motif command line tool wrappers (OBSOLETE).
+
+We have decided to remove this module in future, and instead recommend
+building your command and invoking it via the subprocess module directly.
+"""
+
+from ._xxmotif import XXmotifCommandline
diff --git a/code/lib/Bio/motifs/applications/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/motifs/applications/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..17fcc25
Binary files /dev/null and b/code/lib/Bio/motifs/applications/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/applications/__pycache__/_xxmotif.cpython-37.pyc b/code/lib/Bio/motifs/applications/__pycache__/_xxmotif.cpython-37.pyc
new file mode 100644
index 0000000..9b94dad
Binary files /dev/null and b/code/lib/Bio/motifs/applications/__pycache__/_xxmotif.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/applications/_xxmotif.py b/code/lib/Bio/motifs/applications/_xxmotif.py
new file mode 100644
index 0000000..6ff8cf0
--- /dev/null
+++ b/code/lib/Bio/motifs/applications/_xxmotif.py
@@ -0,0 +1,261 @@
+# Copyright 2012 by Christian Brueffer.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Command line wrapper for the motif finding program XXmotif."""
+
+
+import os
+from Bio.Application import AbstractCommandline, _Option, _Switch, _Argument
+
+
+class XXmotifCommandline(AbstractCommandline):
+    """Command line wrapper for XXmotif.
+
+    http://xxmotif.genzentrum.lmu.de/
+
+    Notes
+    -----
+    Last checked against version: 1.3
+
+    References
+    ----------
+    Luehr S, Hartmann H, and Söding J. The XXmotif web server for eXhaustive,
+    weight matriX-based motif discovery in nucleotide sequences,
+    Nucleic Acids Res. 40: W104-W109 (2012).
+
+    Hartmann H, Guthoehrlein EW, Siebert M., Luehr S, and Söding J. P-value
+    based regulatory motif discovery using positional weight matrices,
+    Genome Res. 23: 181–194 (2013)
+
+    Examples
+    --------
+    >>> from Bio.motifs.applications import XXmotifCommandline
+    >>> out_dir = "results"
+    >>> in_file = "sequences.fasta"
+    >>> xxmotif_cline = XXmotifCommandline(outdir=out_dir, seqfile=in_file, revcomp=True)
+    >>> print(xxmotif_cline)
+    XXmotif results sequences.fasta --revcomp
+
+    You would typically run the command line with xxmotif_cline() or via
+    the Python subprocess module, as described in the Biopython tutorial.
+
+    """
+
+    def __init__(self, cmd="XXmotif", **kwargs):
+        """Initialize the class."""
+        # order of parameters is the same as in XXmotif --help
+        _valid_alphabet = set("ACGTNX")
+
+        self.parameters = [
+            _Argument(
+                ["outdir", "OUTDIR"],
+                "output directory for all results",
+                filename=True,
+                is_required=True,
+                # XXmotif currently does not accept spaces in the outdir name
+                checker_function=lambda x: " " not in x,
+            ),
+            _Argument(
+                ["seqfile", "SEQFILE"],
+                "file name with sequences from positive set in FASTA format",
+                filename=True,
+                is_required=True,
+                # XXmotif currently only accepts a pure filename
+                checker_function=lambda x: os.path.split(x)[0] == "",
+            ),
+            # Options
+            _Option(
+                ["--negSet", "negSet", "NEGSET", "negset"],
+                "sequence set which has to be used as a reference set",
+                filename=True,
+                equate=False,
+            ),
+            _Switch(
+                ["--zoops", "ZOOPS", "zoops"],
+                "use zero-or-one occurrence per sequence model (DEFAULT)",
+            ),
+            _Switch(
+                ["--mops", "MOPS", "mops"], "use multiple occurrence per sequence model"
+            ),
+            _Switch(
+                ["--oops", "OOPS", "oops"], "use one occurrence per sequence model"
+            ),
+            _Switch(
+                ["--revcomp", "REVCOMP", "revcomp"],
+                "search in reverse complement of sequences as well (DEFAULT: NO)",
+            ),
+            _Option(
+                [
+                    "--background-model-order",
+                    "background-model-order",
+                    "BACKGROUND-MODEL-ORDER",
+                    "background_model_order",
+                ],
+                "order of background distribution (DEFAULT: 2, 8(--negset) )",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["--pseudo", "PSEUDO", "pseudo"],
+                "percentage of pseudocounts used (DEFAULT: 10)",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["-g", "--gaps", "GAPS", "gaps"],
+                "maximum number of gaps used for start seeds [0-3] (DEFAULT: 0)",
+                checker_function=lambda x: x in [0 - 3],
+                equate=False,
+            ),
+            _Option(
+                ["--type", "TYPE", "type"],
+                "defines what kind of start seeds are used (DEFAULT: ALL)"
+                "possible types: ALL, FIVEMERS, PALINDROME, TANDEM, NOPALINDROME, NOTANDEM",
+                checker_function=lambda x: x
+                in [
+                    "ALL",
+                    "all",
+                    "FIVEMERS",
+                    "fivemers",
+                    "PALINDROME",
+                    "palindrome",
+                    "TANDEM",
+                    "tandem",
+                    "NOPALINDROME",
+                    "nopalindrome",
+                    "NOTANDEM",
+                    "notandem",
+                ],
+                equate=False,
+            ),
+            _Option(
+                [
+                    "--merge-motif-threshold",
+                    "merge-motif-threshold",
+                    "MERGE-MOTIF-THRESHOLD",
+                    "merge_motif_threshold",
+                ],
+                "defines the similarity threshold for merging motifs (DEFAULT: HIGH)"
+                "possible modes: LOW, MEDIUM, HIGH",
+                checker_function=lambda x: x
+                in ["LOW", "low", "MEDIUM", "medium", "HIGH", "high"],
+                equate=False,
+            ),
+            _Switch(
+                [
+                    "--no-pwm-length-optimization",
+                    "no-pwm-length-optimization",
+                    "NO-PWM-LENGTH-OPTIMIZATION",
+                    "no_pwm_length_optimization",
+                ],
+                "do not optimize length during iterations (runtime advantages)",
+            ),
+            _Option(
+                [
+                    "--max-match-positions",
+                    "max-match-positions",
+                    "MAX-MATCH-POSITIONS",
+                    "max_match_positions",
+                ],
+                "max number of positions per motif (DEFAULT: 17, higher values will lead to very long runtimes)",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Switch(
+                ["--batch", "BATCH", "batch"],
+                "suppress progress bars (reduce output size for batch jobs)",
+            ),
+            _Option(
+                ["--maxPosSetSize", "maxPosSetSize", "MAXPOSSETSIZE", "maxpossetsize"],
+                "maximum number of sequences from the positive set used [DEFAULT: all]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # does not make sense in biopython
+            # _Switch(["--help", "help", "HELP"],
+            #         "print this help page"),
+            _Option(
+                ["--trackedMotif", "trackedMotif", "TRACKEDMOTIF", "trackedmotif"],
+                "inspect extensions and refinement of a given seed (DEFAULT: not used)",
+                checker_function=lambda x: any((c in _valid_alphabet) for c in x),
+                equate=False,
+            ),
+            # Using conservation information
+            _Option(
+                ["--format", "FORMAT", "format"],
+                "defines what kind of format the input sequences have (DEFAULT: FASTA)",
+                checker_function=lambda x: x in ["FASTA", "fasta", "MFASTA", "mfasta"],
+                equate=False,
+            ),
+            _Option(
+                [
+                    "--maxMultipleSequences",
+                    "maxMultipleSequences",
+                    "MAXMULTIPLESEQUENCES",
+                    "maxmultiplesequences",
+                ],
+                "maximum number of sequences used in an alignment [DEFAULT: all]",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # Using localization information
+            _Switch(
+                ["--localization", "LOCALIZATION", "localization"],
+                "use localization information to calculate combined P-values"
+                "(sequences should have all the same length)",
+            ),
+            _Option(
+                ["--downstream", "DOWNSTREAM", "downstream"],
+                "number of residues in positive set downstream of anchor point (DEFAULT: 0)",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # Start with self defined motif
+            _Option(
+                ["-m", "--startMotif", "startMotif", "STARTMOTIF", "startmotif"],
+                "Start motif (IUPAC characters)",
+                checker_function=lambda x: any((c in _valid_alphabet) for c in x),
+                equate=False,
+            ),
+            _Option(
+                ["-p", "--profileFile", "profileFile", "PROFILEFILE", "profilefile"],
+                "profile file",
+                filename=True,
+                equate=False,
+            ),
+            _Option(
+                ["--startRegion", "startRegion", "STARTREGION", "startregion"],
+                "expected start position for motif occurrences relative to anchor point (--localization)",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            _Option(
+                ["--endRegion", "endRegion", "ENDREGION", "endregion"],
+                "expected end position for motif occurrences relative to anchor point (--localization)",
+                checker_function=lambda x: isinstance(x, int),
+                equate=False,
+            ),
+            # XXmotif wrapper options
+            _Switch(
+                ["--XXmasker", "masker"],
+                "mask the input sequences for homology, repeats and low complexity regions",
+            ),
+            _Switch(
+                ["--XXmasker-pos", "maskerpos"],
+                "mask only the positive set for homology, repeats and low complexity regions",
+            ),
+            _Switch(
+                ["--no-graphics", "nographics"], "run XXmotif without graphical output"
+            ),
+        ]
+        AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/motifs/clusterbuster.py b/code/lib/Bio/motifs/clusterbuster.py
new file mode 100644
index 0000000..8340fea
--- /dev/null
+++ b/code/lib/Bio/motifs/clusterbuster.py
@@ -0,0 +1,80 @@
+# Copyright 2015 by Gert Hulselmans.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Parse Cluster Buster position frequency matrix files."""
+
+from Bio import motifs
+
+
+class Record(list):
+    """Class to store the information in a Cluster Buster matrix table.
+
+    The record inherits from a list containing the individual motifs.
+    """
+
+    def __str__(self):
+        return "\n".join(str(motif) for motif in self)
+
+
+def read(handle):
+    """Read motifs in Cluster Buster position frequency matrix format from a file handle.
+
+    Cluster Buster motif format: http://zlab.bu.edu/cluster-buster/help/cis-format.html
+    """
+    motif_nbr = 0
+    record = Record()
+    nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
+    motif_name = ""
+
+    for line in handle:
+        line = line.strip()
+        if line:
+            if line.startswith(">"):
+
+                if motif_nbr != 0:
+                    motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
+                    motif.name = motif_name
+                    record.append(motif)
+
+                motif_name = line[1:].strip()
+                nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
+                motif_nbr += 1
+            else:
+                if line.startswith("#"):
+                    continue
+
+                matrix_columns = line.split()
+
+                if len(matrix_columns) == 4:
+                    [
+                        nucleotide_counts[nucleotide].append(float(nucleotide_count))
+                        for nucleotide, nucleotide_count in zip(
+                            ["A", "C", "G", "T"], matrix_columns
+                        )
+                    ]
+
+    motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
+    motif.name = motif_name
+    record.append(motif)
+
+    return record
+
+
+def write(motifs):
+    """Return the representation of motifs in Cluster Buster position frequency matrix format."""
+    lines = []
+    for m in motifs:
+        line = f">{m.name}\n"
+        lines.append(line)
+        for ACGT_counts in zip(
+            m.counts["A"], m.counts["C"], m.counts["G"], m.counts["T"]
+        ):
+            lines.append("{:0.0f}\t{:0.0f}\t{:0.0f}\t{:0.0f}\n".format(*ACGT_counts))
+
+    # Finished; glue the lines together.
+    text = "".join(lines)
+
+    return text
diff --git a/code/lib/Bio/motifs/jaspar/__init__.py b/code/lib/Bio/motifs/jaspar/__init__.py
new file mode 100644
index 0000000..fc09b1e
--- /dev/null
+++ b/code/lib/Bio/motifs/jaspar/__init__.py
@@ -0,0 +1,372 @@
+# Copyright 2013 by Anthony Mathelier and David Arenillas. All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""JASPAR2014 module."""
+
+from Bio.Seq import Seq
+import re
+import math
+
+from Bio import motifs
+
+
+class Motif(motifs.Motif):
+    """A subclass of Bio.motifs.Motif used to represent a JASPAR profile.
+
+    Additional metadata information are stored if available. The metadata
+    availability depends on the source of the JASPAR motif (a 'pfm' format
+    file, a 'jaspar' format file or a JASPAR database).
+    """
+
+    def __init__(
+        self,
+        matrix_id,
+        name,
+        alphabet="ACGT",
+        instances=None,
+        counts=None,
+        collection=None,
+        tf_class=None,
+        tf_family=None,
+        species=None,
+        tax_group=None,
+        acc=None,
+        data_type=None,
+        medline=None,
+        pazar_id=None,
+        comment=None,
+    ):
+        """Construct a JASPAR Motif instance."""
+        motifs.Motif.__init__(self, alphabet, instances, counts)
+        self.name = name
+        self.matrix_id = matrix_id
+        self.collection = collection
+        self.tf_class = tf_class
+        self.tf_family = tf_family
+        # May have multiple so species is a list.
+        # The species are actually specified as
+        # taxonomy IDs.
+        self.species = species
+        self.tax_group = tax_group
+        self.acc = acc  # May have multiple so acc is a list.
+        self.data_type = data_type
+        self.medline = medline
+        self.pazar_id = pazar_id
+        self.comment = comment
+
+    @property
+    def base_id(self):
+        """Return the JASPAR base matrix ID."""
+        (base_id, __) = split_jaspar_id(self.matrix_id)
+        return base_id
+
+    @property
+    def version(self):
+        """Return the JASPAR matrix version."""
+        (__, version) = split_jaspar_id(self.matrix_id)
+        return version
+
+    def __str__(self):
+        """Return a string represention of the JASPAR profile.
+
+        We choose to provide only the filled metadata information.
+        """
+        tf_name_str = f"TF name\t{self.name}\n"
+        matrix_id_str = f"Matrix ID\t{self.matrix_id}\n"
+        the_string = "".join([tf_name_str, matrix_id_str])
+        if self.collection:
+            collection_str = f"Collection\t{self.collection}\n"
+            the_string = "".join([the_string, collection_str])
+        if self.tf_class:
+            tf_class_str = f"TF class\t{self.tf_class}\n"
+            the_string = "".join([the_string, tf_class_str])
+        if self.tf_family:
+            tf_family_str = f"TF family\t{self.tf_family}\n"
+            the_string = "".join([the_string, tf_family_str])
+        if self.species:
+            species_str = f"Species\t{','.join(self.species)}\n"
+            the_string = "".join([the_string, species_str])
+        if self.tax_group:
+            tax_group_str = f"Taxonomic group\t{self.tax_group}\n"
+            the_string = "".join([the_string, tax_group_str])
+        if self.acc:
+            acc_str = f"Accession\t{self.acc}\n"
+            the_string = "".join([the_string, acc_str])
+        if self.data_type:
+            data_type_str = f"Data type used\t{self.data_type}\n"
+            the_string = "".join([the_string, data_type_str])
+        if self.medline:
+            medline_str = f"Medline\t{self.medline}\n"
+            the_string = "".join([the_string, medline_str])
+        if self.pazar_id:
+            pazar_id_str = f"PAZAR ID\t{self.pazar_id}\n"
+            the_string = "".join([the_string, pazar_id_str])
+        if self.comment:
+            comment_str = f"Comments\t{self.comment}\n"
+            the_string = "".join([the_string, comment_str])
+        matrix_str = f"Matrix:\n{self.counts}\n\n"
+        the_string = "".join([the_string, matrix_str])
+        return the_string
+
+    def __hash__(self):
+        """Return the hash key corresponding to the JASPAR profile.
+
+        :note: We assume the unicity of matrix IDs
+
+        """
+        return self.matrix_id.__hash__()
+
+    def __eq__(self, other):
+        """Return True if matrix IDs are the same."""
+        return self.matrix_id == other.matrix_id
+
+
+class Record(list):
+    """Represent a list of jaspar motifs.
+
+    Attributes:
+     - version: The JASPAR version used
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.version = None
+
+    def __str__(self):
+        """Return a string of all motifs in the Record."""
+        return "\n".join(str(the_motif) for the_motif in self)
+
+    def to_dict(self):
+        """Return the list of matrices as a dictionary of matrices."""
+        dic = {}
+        for motif in self:
+            dic[motif.matrix_id] = motif
+        return dic
+
+
+def read(handle, format):
+    """Read motif(s) from a file in one of several different JASPAR formats.
+
+    Return the record of PFM(s).
+    Call the appropriate routine based on the format passed.
+    """
+    format = format.lower()
+    if format == "pfm":
+        record = _read_pfm(handle)
+        return record
+    elif format == "sites":
+        record = _read_sites(handle)
+        return record
+    elif format == "jaspar":
+        record = _read_jaspar(handle)
+        return record
+    else:
+        raise ValueError("Unknown JASPAR format %s" % format)
+
+
+def write(motifs, format):
+    """Return the representation of motifs in "pfm" or "jaspar" format."""
+    letters = "ACGT"
+    lines = []
+    if format == "pfm":
+        motif = motifs[0]
+        counts = motif.counts
+        for letter in letters:
+            terms = [f"{value:6.2f}" for value in counts[letter]]
+            line = f"{' '.join(terms)}\n"
+            lines.append(line)
+    elif format == "jaspar":
+        for m in motifs:
+            counts = m.counts
+            try:
+                matrix_id = m.matrix_id
+            except AttributeError:
+                matrix_id = None
+            line = f">{matrix_id} {m.name}\n"
+            lines.append(line)
+            for letter in letters:
+                terms = [f"{value:6.2f}" for value in counts[letter]]
+                line = f"{letter} [{' '.join(terms)}]\n"
+                lines.append(line)
+    else:
+        raise ValueError("Unknown JASPAR format %s" % format)
+
+    # Finished; glue the lines together
+    text = "".join(lines)
+
+    return text
+
+
+def _read_pfm(handle):
+    """Read the motif from a JASPAR .pfm file (PRIVATE)."""
+    alphabet = "ACGT"
+    counts = {}
+
+    for letter, line in zip(alphabet, handle):
+        words = line.split()
+        # if there is a letter in the beginning, ignore it
+        if words[0] == letter:
+            words = words[1:]
+        counts[letter] = [float(x) for x in words]
+
+    motif = Motif(matrix_id=None, name=None, alphabet=alphabet, counts=counts)
+    motif.mask = "*" * motif.length
+    record = Record()
+    record.append(motif)
+
+    return record
+
+
+def _read_sites(handle):
+    """Read the motif from JASPAR .sites file (PRIVATE)."""
+    alphabet = "ACGT"
+    instances = []
+
+    for line in handle:
+        if not line.startswith(">"):
+            break
+        # line contains the header ">...."
+        # now read the actual sequence
+        line = next(handle)
+        instance = ""
+        for c in line.strip():
+            if c == c.upper():
+                instance += c
+        instance = Seq(instance)
+        instances.append(instance)
+
+    instances = motifs.Instances(instances, alphabet)
+    motif = Motif(matrix_id=None, name=None, alphabet=alphabet, instances=instances)
+    motif.mask = "*" * motif.length
+    record = Record()
+    record.append(motif)
+
+    return record
+
+
+def _read_jaspar(handle):
+    """Read motifs from a JASPAR formatted file (PRIVATE).
+
+    Format is one or more records of the form, e.g.::
+
+      - JASPAR 2010 matrix_only format::
+
+                >MA0001.1 AGL3
+                A  [ 0  3 79 40 66 48 65 11 65  0 ]
+                C  [94 75  4  3  1  2  5  2  3  3 ]
+                G  [ 1  0  3  4  1  0  5  3 28 88 ]
+                T  [ 2 19 11 50 29 47 22 81  1  6 ]
+
+      - JASPAR 2010-2014 PFMs format::
+
+                >MA0001.1 AGL3
+                0	3	79	40	66	48	65	11	65	0
+                94	75	4	3	1	2	5	2	3	3
+                1	0	3	4	1	0	5	3	28	88
+                2	19	11	50	29	47	22	81	1	6
+
+    """
+    alphabet = "ACGT"
+    counts = {}
+
+    record = Record()
+
+    head_pat = re.compile(r"^>\s*(\S+)(\s+(\S+))?")
+    row_pat_long = re.compile(r"\s*([ACGT])\s*\[\s*(.*)\s*\]")
+    row_pat_short = re.compile(r"\s*(.+)\s*")
+
+    identifier = None
+    name = None
+    row_count = 0
+    nucleotides = ["A", "C", "G", "T"]
+    for line in handle:
+        line = line.strip()
+
+        head_match = head_pat.match(line)
+        row_match_long = row_pat_long.match(line)
+        row_match_short = row_pat_short.match(line)
+
+        if head_match:
+            identifier = head_match.group(1)
+            if head_match.group(3):
+                name = head_match.group(3)
+            else:
+                name = identifier
+        elif row_match_long:
+            (letter, counts_str) = row_match_long.group(1, 2)
+            words = counts_str.split()
+            counts[letter] = [float(x) for x in words]
+            row_count += 1
+            if row_count == 4:
+                record.append(Motif(identifier, name, alphabet=alphabet, counts=counts))
+                identifier = None
+                name = None
+                counts = {}
+                row_count = 0
+        elif row_match_short:
+            words = row_match_short.group(1).split()
+            counts[nucleotides[row_count]] = [float(x) for x in words]
+            row_count += 1
+            if row_count == 4:
+                record.append(Motif(identifier, name, alphabet=alphabet, counts=counts))
+                identifier = None
+                name = None
+                counts = {}
+                row_count = 0
+
+    return record
+
+
+def calculate_pseudocounts(motif):
+    """Calculate pseudocounts.
+
+    Computes the root square of the total number of sequences multiplied by
+    the background nucleotide.
+    """
+    alphabet = motif.alphabet
+    background = motif.background
+
+    # It is possible to have unequal column sums so use the average
+    # number of instances.
+    total = 0
+    for i in range(motif.length):
+        total += sum(float(motif.counts[letter][i]) for letter in alphabet)
+
+    avg_nb_instances = total / motif.length
+    sq_nb_instances = math.sqrt(avg_nb_instances)
+
+    if background:
+        background = dict(background)
+    else:
+        background = dict.fromkeys(sorted(alphabet), 1.0)
+
+    total = sum(background.values())
+    pseudocounts = {}
+
+    for letter in alphabet:
+        background[letter] /= total
+        pseudocounts[letter] = sq_nb_instances * background[letter]
+
+    return pseudocounts
+
+
+def split_jaspar_id(id):
+    """Split a JASPAR matrix ID into its component.
+
+    Components are base ID and version number, e.g. 'MA0047.2' is returned as
+    ('MA0047', 2).
+    """
+    id_split = id.split(".")
+
+    base_id = None
+    version = None
+    if len(id_split) == 2:
+        base_id = id_split[0]
+        version = id_split[1]
+    else:
+        base_id = id
+
+    return (base_id, version)
diff --git a/code/lib/Bio/motifs/jaspar/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/motifs/jaspar/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..ee0d260
Binary files /dev/null and b/code/lib/Bio/motifs/jaspar/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/jaspar/__pycache__/db.cpython-37.pyc b/code/lib/Bio/motifs/jaspar/__pycache__/db.cpython-37.pyc
new file mode 100644
index 0000000..3732919
Binary files /dev/null and b/code/lib/Bio/motifs/jaspar/__pycache__/db.cpython-37.pyc differ
diff --git a/code/lib/Bio/motifs/jaspar/db.py b/code/lib/Bio/motifs/jaspar/db.py
new file mode 100644
index 0000000..fc27305
--- /dev/null
+++ b/code/lib/Bio/motifs/jaspar/db.py
@@ -0,0 +1,776 @@
+# Copyright 2013 by David Arenillas and Anthony Mathelier. All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+"""Provides read access to a JASPAR5 formatted database.
+
+This modules requires MySQLdb to be installed.
+
+Example, substitute the your database credentials as
+appropriate::
+
+        from Bio.motifs.jaspar.db import JASPAR5
+        JASPAR_DB_HOST = "hostname.example.org"
+        JASPAR_DB_NAME = "JASPAR2018"
+        JASPAR_DB_USER = "guest"
+        JASPAR_DB_PASS = "guest"
+
+        jdb = JASPAR5(
+            host=JASPAR_DB_HOST,
+            name=JASPAR_DB_NAME,
+            user=JASPAR_DB_USER,
+            password=JASPAR_DB_PASS
+        )
+        ets1 = jdb.fetch_motif_by_id('MA0098')
+        print(ets1)
+    TF name ETS1
+    Matrix ID   MA0098.3
+    Collection  CORE
+    TF class    Tryptophan cluster factors
+    TF family   Ets-related factors
+    Species 9606
+    Taxonomic group vertebrates
+    Accession   ['P14921']
+    Data type used  HT-SELEX
+    Medline 20517297
+    PAZAR ID    TF0000070
+    Comments    Data is from Taipale HTSELEX DBD (2013)
+    Matrix:
+            0      1      2      3      4      5      6      7      8      9
+    A: 2683.00 180.00 425.00   0.00   0.00 2683.00 2683.00 1102.00  89.00 803.00
+    C: 210.00 2683.00 2683.00  21.00   0.00   0.00   9.00  21.00 712.00 401.00
+    G: 640.00 297.00   7.00 2683.00 2683.00   0.00  31.00 1580.00 124.00 1083.00
+    T: 241.00  22.00   0.00   0.00  12.00   0.00 909.00  12.00 1970.00 396.00
+
+        motifs = jdb.fetch_motifs(
+            collection = 'CORE',
+            tax_group = ['vertebrates', 'insects'],
+            tf_class = 'Homeo domain factors',
+            tf_family = ['TALE-type homeo domain factors', 'POU domain factors'],
+            min_ic = 12
+        )
+        for motif in motifs:
+            pass # do something with the motif
+"""
+
+
+import warnings
+from Bio import BiopythonWarning
+from Bio import MissingPythonDependencyError
+
+try:
+    import MySQLdb as mdb
+except ImportError:
+    raise MissingPythonDependencyError(
+        "Install MySQLdb if you want to use Bio.motifs.jaspar.db"
+    )
+
+from Bio.motifs import jaspar, matrix
+
+
+JASPAR_DFLT_COLLECTION = "CORE"
+
+
+class JASPAR5:
+    """Class representing a JASPAR5 database.
+
+    Class representing a JASPAR5 DB. The methods within are loosely based
+    on the perl TFBS::DB::JASPAR5 module.
+
+    Note: We will only implement reading of JASPAR motifs from the DB.
+    Unlike the perl module, we will not attempt to implement any methods to
+    store JASPAR motifs or create a new DB at this time.
+    """
+
+    def __init__(self, host=None, name=None, user=None, password=None):
+        """Construct a JASPAR5 instance and connect to specified DB.
+
+        Arguments:
+         - host - host name of the the JASPAR DB server
+         - name - name of the JASPAR database
+         - user - user name to connect to the JASPAR DB
+         - password - JASPAR DB password
+
+        """
+        self.name = name
+        self.host = host
+        self.user = user
+        self.password = password
+
+        self.dbh = mdb.connect(host, user, password, name)
+
+    def __str__(self):
+        """Return a string represention of the JASPAR5 DB connection."""
+        return r"%s\@%s:%s" % (self.user, self.host, self.name)
+
+    def fetch_motif_by_id(self, id):
+        """Fetch a single JASPAR motif from the DB by its JASPAR matrix ID.
+
+        Example id 'MA0001.1'.
+
+        Arguments:
+         - id - JASPAR matrix ID. This may be a fully specified ID including
+                the version number (e.g. MA0049.2) or just the base ID (e.g.
+                MA0049). If only a base ID is provided, the latest version is
+                returned.
+
+        Returns:
+         - A Bio.motifs.jaspar.Motif object
+
+        **NOTE:** The perl TFBS module allows you to specify the type of matrix
+        to return (PFM, PWM, ICM) but matrices are always stored in JASPAR as
+        PFMs so this does not really belong here. Once a PFM is fetched the
+        pwm() and pssm() methods can be called to return the normalized and
+        log-odds matrices.
+
+        """
+        # separate stable ID and version number
+        (base_id, version) = jaspar.split_jaspar_id(id)
+        if not version:
+            # if ID contains no version portion, fetch the latest version
+            version = self._fetch_latest_version(base_id)
+
+        # fetch internal JASPAR matrix ID - also a check for validity
+        int_id = None
+        if version:
+            int_id = self._fetch_internal_id(base_id, version)
+
+        # fetch JASPAR motif using internal ID
+        motif = None
+        if int_id:
+            motif = self._fetch_motif_by_internal_id(int_id)
+
+        return motif
+
+    def fetch_motifs_by_name(self, name):
+        """Fetch a list of JASPAR motifs from a JASPAR DB by the given TF name(s).
+
+        Arguments:
+        name - a single name or list of names
+        Returns:
+        A list of Bio.motifs.jaspar.Motif objects
+
+        Notes:
+        Names are not guaranteed to be unique. There may be more than one
+        motif with the same name. Therefore even if name specifies a single
+        name, a list of motifs is returned. This just calls
+        self.fetch_motifs(collection = None, tf_name = name).
+
+        This behaviour is different from the TFBS perl module's
+        get_Matrix_by_name() method which always returns a single matrix,
+        issuing a warning message and returning the first matrix retrieved
+        in the case where multiple matrices have the same name.
+
+        """
+        return self.fetch_motifs(collection=None, tf_name=name)
+
+    def fetch_motifs(
+        self,
+        collection=JASPAR_DFLT_COLLECTION,
+        tf_name=None,
+        tf_class=None,
+        tf_family=None,
+        matrix_id=None,
+        tax_group=None,
+        species=None,
+        pazar_id=None,
+        data_type=None,
+        medline=None,
+        min_ic=0,
+        min_length=0,
+        min_sites=0,
+        all=False,
+        all_versions=False,
+    ):
+        """Fetch jaspar.Record (list) of motifs using selection criteria.
+
+        Arguments::
+
+            Except where obvious, all selection criteria arguments may be
+            specified as a single value or a list of values. Motifs must
+            meet ALL the specified selection criteria to be returned with
+            the precedent exceptions noted below.
+
+            all         - Takes precedent of all other selection criteria.
+                          Every motif is returned. If 'all_versions' is also
+                          specified, all versions of every motif are returned,
+                          otherwise just the latest version of every motif is
+                          returned.
+            matrix_id   - Takes precedence over all other selection criteria
+                          except 'all'.  Only motifs with the given JASPAR
+                          matrix ID(s) are returned. A matrix ID may be
+                          specified as just a base ID or full JASPAR IDs
+                          including version number. If only a base ID is
+                          provided for specific motif(s), then just the latest
+                          version of those motif(s) are returned unless
+                          'all_versions' is also specified.
+            collection  - Only motifs from the specified JASPAR collection(s)
+                          are returned. NOTE - if not specified, the collection
+                          defaults to CORE for all other selection criteria
+                          except 'all' and 'matrix_id'. To apply the other
+                          selection criteria across all JASPAR collections,
+                          explicitly set collection=None.
+            tf_name     - Only motifs with the given name(s) are returned.
+            tf_class    - Only motifs of the given TF class(es) are returned.
+            tf_family   - Only motifs from the given TF families are returned.
+            tax_group   - Only motifs belonging to the given taxonomic
+                          supergroups are returned (e.g. 'vertebrates',
+                          'insects', 'nematodes' etc.)
+            species     - Only motifs derived from the given species are
+                          returned.  Species are specified as taxonomy IDs.
+            data_type   - Only motifs generated with the given data type (e.g.
+                          ('ChIP-seq', 'PBM', 'SELEX' etc.) are returned.
+                          NOTE - must match exactly as stored in the database.
+            pazar_id    - Only motifs with the given PAZAR TF ID are returned.
+            medline     - Only motifs with the given medline (PubmMed IDs) are
+                          returned.
+            min_ic      - Only motifs whose profile matrices have at least this
+                          information content (specificty) are returned.
+            min_length  - Only motifs whose profiles are of at least this
+                          length are returned.
+            min_sites   - Only motifs compiled from at least these many binding
+                          sites are returned.
+            all_versions- Unless specified, just the latest version of motifs
+                          determined by the other selection criteria are
+                          returned. Otherwise all versions of the selected
+                          motifs are returned.
+
+        Returns:
+            - A Bio.motifs.jaspar.Record (list) of motifs.
+
+        """
+        # Fetch the internal IDs of the motifs using the criteria provided
+        int_ids = self._fetch_internal_id_list(
+            collection=collection,
+            tf_name=tf_name,
+            tf_class=tf_class,
+            tf_family=tf_family,
+            matrix_id=matrix_id,
+            tax_group=tax_group,
+            species=species,
+            pazar_id=pazar_id,
+            data_type=data_type,
+            medline=medline,
+            all=all,
+            all_versions=all_versions,
+        )
+
+        record = jaspar.Record()
+
+        """
+        Now further filter motifs returned above based on any specified
+        matrix specific criteria.
+        """
+        for int_id in int_ids:
+            motif = self._fetch_motif_by_internal_id(int_id)
+
+            # Filter motifs to those with matrix IC greater than min_ic
+            if min_ic:
+                if motif.pssm.mean() < min_ic:
+                    continue
+
+            # Filter motifs to those with minimum length of min_length
+            if min_length:
+                if motif.length < min_length:
+                    continue
+
+            # XXX We could also supply a max_length filter.
+
+            """
+            Filter motifs to those composed of at least this many sites.
+            The perl TFBS module assumes column sums may be different but
+            this should be strictly enforced here we will ignore this and
+            just use the first column sum.
+            """
+            if min_sites:
+                num_sites = sum(motif.counts[nt][0] for nt in motif.alphabet)
+                if num_sites < min_sites:
+                    continue
+
+            record.append(motif)
+
+        return record
+
+    def _fetch_latest_version(self, base_id):
+        """Get the latest version number for the given base_id (PRIVATE)."""
+        cur = self.dbh.cursor()
+        cur.execute(
+            "select VERSION from MATRIX where BASE_id = %s order by VERSION"
+            " desc limit 1",
+            (base_id,),
+        )
+
+        row = cur.fetchone()
+
+        latest = None
+        if row:
+            latest = row[0]
+        else:
+            warnings.warn(
+                "Failed to fetch latest version number for JASPAR motif"
+                f" with base ID '{base_id}'. No JASPAR motif with this"
+                " base ID appears to exist in the database.",
+                BiopythonWarning,
+            )
+
+        return latest
+
+    def _fetch_internal_id(self, base_id, version):
+        """Fetch the internal id for a base id + version (PRIVATE).
+
+        Also checks if this combo exists or not.
+        """
+        cur = self.dbh.cursor()
+        cur.execute(
+            "select id from MATRIX where BASE_id = %s and VERSION = %s",
+            (base_id, version),
+        )
+
+        row = cur.fetchone()
+
+        int_id = None
+        if row:
+            int_id = row[0]
+        else:
+            warnings.warn(
+                "Failed to fetch internal database ID for JASPAR motif"
+                f" with matrix ID '{base_id}.{version}'. No JASPAR motif"
+                " with this matrix ID appears to exist.",
+                BiopythonWarning,
+            )
+
+        return int_id
+
+    def _fetch_motif_by_internal_id(self, int_id):
+        """Fetch basic motif information (PRIVATE)."""
+        cur = self.dbh.cursor()
+        cur.execute(
+            "select BASE_ID, VERSION, COLLECTION, NAME from MATRIX where id = %s",
+            (int_id,),
+        )
+
+        row = cur.fetchone()
+
+        # This should never happen as it is an internal method. If it does
+        # we should probably raise an exception
+        if not row:
+            warnings.warn(
+                f"Could not fetch JASPAR motif with internal ID = {int_id}",
+                BiopythonWarning,
+            )
+            return None
+
+        base_id = row[0]
+        version = row[1]
+        collection = row[2]
+        name = row[3]
+
+        matrix_id = "".join([base_id, ".", str(version)])
+
+        # fetch the counts matrix
+        counts = self._fetch_counts_matrix(int_id)
+
+        # Create new JASPAR motif
+        motif = jaspar.Motif(matrix_id, name, collection=collection, counts=counts)
+
+        # fetch species
+        cur.execute("select TAX_ID from MATRIX_SPECIES where id = %s", (int_id,))
+        tax_ids = []
+        rows = cur.fetchall()
+        for row in rows:
+            tax_ids.append(row[0])
+
+        # Many JASPAR motifs (especially those not in the CORE collection)
+        # do not have taxonomy IDs. So this warning would get annoying.
+        # if not tax_ids:
+        #     warnings.warn("Could not fetch any taxonomy IDs for JASPAR motif"
+        #                   " {0}".format(motif.matrix_id), BiopythonWarning)
+
+        motif.species = tax_ids
+
+        # fetch protein accession numbers
+        cur.execute("select ACC FROM MATRIX_PROTEIN where id = %s", (int_id,))
+        accs = []
+        rows = cur.fetchall()
+        for row in rows:
+            accs.append(row[0])
+
+        # Similarly as for taxonomy IDs, it would get annoying to print
+        # warnings for JASPAR motifs which do not have accession numbers.
+
+        motif.acc = accs
+
+        # fetch remaining annotation as tags from the ANNOTATION table
+        cur.execute("select TAG, VAL from MATRIX_ANNOTATION where id = %s", (int_id,))
+        rows = cur.fetchall()
+        for row in rows:
+            attr = row[0]
+            val = row[1]
+            if attr == "class":
+                motif.tf_class = val
+            elif attr == "family":
+                motif.tf_family = val
+            elif attr == "tax_group":
+                motif.tax_group = val
+            elif attr == "type":
+                motif.data_type = val
+            elif attr == "pazar_tf_id":
+                motif.pazar_id = val
+            elif attr == "medline":
+                motif.medline = val
+            elif attr == "comment":
+                motif.comment = val
+            else:
+                # TODO If we were to implement additional abitrary tags
+                # motif.tag(attr, val)
+                pass
+
+        return motif
+
+    def _fetch_counts_matrix(self, int_id):
+        """Fetch the counts matrix from the JASPAR DB by the internal ID (PRIVATE).
+
+        Returns a Bio.motifs.matrix.GenericPositionMatrix
+        """
+        counts = {}
+        cur = self.dbh.cursor()
+
+        for base in "ACGT":
+            base_counts = []
+
+            cur.execute(
+                "select val from MATRIX_DATA where ID = %s and row = %s order by col",
+                (int_id, base),
+            )
+
+            rows = cur.fetchall()
+            for row in rows:
+                base_counts.append(row[0])
+
+            counts[base] = [float(x) for x in base_counts]
+
+        return matrix.GenericPositionMatrix("ACGT", counts)
+
+    def _fetch_internal_id_list(
+        self,
+        collection=JASPAR_DFLT_COLLECTION,
+        tf_name=None,
+        tf_class=None,
+        tf_family=None,
+        matrix_id=None,
+        tax_group=None,
+        species=None,
+        pazar_id=None,
+        data_type=None,
+        medline=None,
+        all=False,
+        all_versions=False,
+    ):
+        """Fetch list of internal JASPAR motif IDs.
+
+        Fetch a list of internal JASPAR motif IDs based on various passed
+        parameters which may then be used to fetch the rest of the motif data.
+
+        Caller:
+            fetch_motifs()
+
+        Arguments:
+            See arguments sections of fetch_motifs()
+
+        Returns:
+            A list of internal JASPAR motif IDs which match the given
+            selection criteria arguments.
+
+
+        Build an SQL query based on the selection arguments provided.
+
+        1: First add table joins and sub-clauses for criteria corresponding to
+           named fields from the MATRIX and MATRIX_SPECIES tables such as
+           collection, matrix ID, name, species etc.
+
+        2: Then add joins/sub-clauses for tag/value parameters from the
+           MATRIX_ANNOTATION table.
+
+        For the surviving matrices, the responsibility to do matrix-based
+        feature filtering such as ic, number of sites etc, fall on the
+        calling fetch_motifs() method.
+
+        """
+        int_ids = []
+
+        cur = self.dbh.cursor()
+
+        """
+        Special case 1: fetch ALL motifs. Highest priority.
+        Ignore all other selection arguments.
+        """
+        if all:
+            cur.execute("select ID from MATRIX")
+            rows = cur.fetchall()
+
+            for row in rows:
+                int_ids.append(row[0])
+
+            return int_ids
+
+        """
+        Special case 2: fetch specific motifs by their JASPAR IDs. This
+        has higher priority than any other except the above 'all' case.
+        Ignore all other selection arguments.
+        """
+        if matrix_id:
+            """
+            These might be either stable IDs or stable_ID.version.
+            If just stable ID and if all_versions == 1, return all versions,
+            otherwise just the latest
+            """
+            if all_versions:
+                for id in matrix_id:
+                    # ignore vesion here, this is a stupidity filter
+                    (base_id, version) = jaspar.split_jaspar_id(id)
+                    cur.execute("select ID from MATRIX where BASE_ID = %s", (base_id,))
+
+                    rows = cur.fetchall()
+                    for row in rows:
+                        int_ids.append(row[0])
+            else:
+                # only the lastest version, or the requested version
+                for id in matrix_id:
+                    (base_id, version) = jaspar.split_jaspar_id(id)
+
+                    if not version:
+                        version = self._fetch_latest_version(base_id)
+
+                    int_id = None
+                    if version:
+                        int_id = self._fetch_internal_id(base_id, version)
+
+                    if int_id:
+                        int_ids.append(int_id)
+
+            return int_ids
+
+        tables = ["MATRIX m"]
+        where_clauses = []
+
+        # Select by MATRIX.COLLECTION
+        if collection:
+            if isinstance(collection, list):
+                # Multiple collections passed in as a list
+                clause = "m.COLLECTION in ('"
+                clause = "".join([clause, "','".join(collection)])
+                clause = "".join([clause, "')"])
+            else:
+                # A single collection - typical usage
+                clause = "m.COLLECTION = '%s'" % collection
+
+            where_clauses.append(clause)
+
+        # Select by MATRIX.NAME
+        if tf_name:
+            if isinstance(tf_name, list):
+                # Multiple names passed in as a list
+                clause = "m.NAME in ('"
+                clause = "".join([clause, "','".join(tf_name)])
+                clause = "".join([clause, "')"])
+            else:
+                # A single name
+                clause = "m.NAME = '%s'" % tf_name
+
+            where_clauses.append(clause)
+
+        # Select by MATRIX_SPECIES.TAX_ID
+        if species:
+            tables.append("MATRIX_SPECIES ms")
+            where_clauses.append("m.ID = ms.ID")
+
+            """
+            NOTE: species are numeric taxonomy IDs but stored as varchars
+            in the DB.
+            """
+            if isinstance(species, list):
+                # Multiple tax IDs passed in as a list
+                clause = "ms.TAX_ID in ('"
+                clause = "".join([clause, "','".join(str(s) for s in species)])
+                clause = "".join([clause, "')"])
+            else:
+                # A single tax ID
+                clause = "ms.TAX_ID = '%s'" % species
+
+            where_clauses.append(clause)
+
+        """
+        Tag based selection from MATRIX_ANNOTATION
+        Differs from perl TFBS module in that the matrix class explicitly
+        has a tag attribute corresponding to the tags in the database. This
+        provides tremendous flexibility in adding new tags to the DB and
+        being able to select based on those tags with out adding new code.
+        In the JASPAR Motif class we have elected to use specific attributes
+        for the most commonly used tags and here correspondingly only allow
+        selection on these attributes.
+
+        The attributes corresponding to the tags for which selection is
+        provided are:
+
+           Attribute   Tag
+           tf_class    class
+           tf_family   family
+           pazar_id    pazar_tf_id
+           medline     medline
+           data_type   type
+           tax_group   tax_group
+        """
+
+        # Select by TF class(es) (MATRIX_ANNOTATION.TAG="class")
+        if tf_class:
+            tables.append("MATRIX_ANNOTATION ma1")
+            where_clauses.append("m.ID = ma1.ID")
+
+            clause = "ma1.TAG = 'class'"
+            if isinstance(tf_class, list):
+                # A list of TF classes
+                clause = "".join([clause, " and ma1.VAL in ('"])
+                clause = "".join([clause, "','".join(tf_class)])
+                clause = "".join([clause, "')"])
+            else:
+                # A single TF class
+                clause = "".join([clause, " and ma1.VAL = '%s' " % tf_class])
+
+            where_clauses.append(clause)
+
+        # Select by TF families (MATRIX_ANNOTATION.TAG="family")
+        if tf_family:
+            tables.append("MATRIX_ANNOTATION ma2")
+            where_clauses.append("m.ID = ma2.ID")
+
+            clause = "ma2.TAG = 'family'"
+            if isinstance(tf_family, list):
+                # A list of TF families
+                clause = "".join([clause, " and ma2.VAL in ('"])
+                clause = "".join([clause, "','".join(tf_family)])
+                clause = "".join([clause, "')"])
+            else:
+                # A single TF family
+                clause = "".join([clause, " and ma2.VAL = '%s' " % tf_family])
+
+            where_clauses.append(clause)
+
+        # Select by PAZAR TF ID(s) (MATRIX_ANNOTATION.TAG="pazar_tf_id")
+        if pazar_id:
+            tables.append("MATRIX_ANNOTATION ma3")
+            where_clauses.append("m.ID = ma3.ID")
+
+            clause = "ma3.TAG = 'pazar_tf_id'"
+            if isinstance(pazar_id, list):
+                # A list of PAZAR IDs
+                clause = "".join([clause, " and ma3.VAL in ('"])
+                clause = "".join([clause, "','".join(pazar_id)])
+                clause = "".join([clause, "')"])
+            else:
+                # A single PAZAR ID
+                clause = "".join([" and ma3.VAL = '%s' " % pazar_id])
+
+            where_clauses.append(clause)
+
+        # Select by PubMed ID(s) (MATRIX_ANNOTATION.TAG="medline")
+        if medline:
+            tables.append("MATRIX_ANNOTATION ma4")
+            where_clauses.append("m.ID = ma4.ID")
+
+            clause = "ma4.TAG = 'medline'"
+            if isinstance(medline, list):
+                # A list of PubMed IDs
+                clause = "".join([clause, " and ma4.VAL in ('"])
+                clause = "".join([clause, "','".join(medline)])
+                clause = "".join([clause, "')"])
+            else:
+                # A single PubMed ID
+                clause = "".join([" and ma4.VAL = '%s' " % medline])
+
+            where_clauses.append(clause)
+
+        # Select by data type(s) used to compile the matrix
+        # (MATRIX_ANNOTATION.TAG="type")
+        if data_type:
+            tables.append("MATRIX_ANNOTATION ma5")
+            where_clauses.append("m.ID = ma5.ID")
+
+            clause = "ma5.TAG = 'type'"
+            if isinstance(data_type, list):
+                # A list of data types
+                clause = "".join([clause, " and ma5.VAL in ('"])
+                clause = "".join([clause, "','".join(data_type)])
+                clause = "".join([clause, "')"])
+            else:
+                # A single data type
+                clause = "".join([" and ma5.VAL = '%s' " % data_type])
+
+            where_clauses.append(clause)
+
+        # Select by taxonomic supergroup(s) (MATRIX_ANNOTATION.TAG="tax_group")
+        if tax_group:
+            tables.append("MATRIX_ANNOTATION ma6")
+            where_clauses.append("m.ID = ma6.ID")
+
+            clause = "ma6.TAG = 'tax_group'"
+            if isinstance(tax_group, list):
+                # A list of tax IDs
+                clause = "".join([clause, " and ma6.VAL in ('"])
+                clause = "".join([clause, "','".join(tax_group)])
+                clause = "".join([clause, "')"])
+            else:
+                # A single tax ID
+                clause = "".join([clause, " and ma6.VAL = '%s' " % tax_group])
+
+            where_clauses.append(clause)
+
+        sql = "".join(["select distinct(m.ID) from ", ", ".join(tables)])
+
+        if where_clauses:
+            sql = "".join([sql, " where ", " and ".join(where_clauses)])
+
+        # print("sql = %s" % sql)
+
+        cur.execute(sql)
+        rows = cur.fetchall()
+
+        for row in rows:
+            id = row[0]
+            if all_versions:
+                int_ids.append(id)
+            else:
+                # is the latest version?
+                if self._is_latest_version(id):
+                    int_ids.append(id)
+
+        if len(int_ids) < 1:
+            warnings.warn(
+                "Zero motifs returned with current select critera", BiopythonWarning
+            )
+
+        return int_ids
+
+    def _is_latest_version(self, int_id):
+        """Check if the internal ID represents the latest JASPAR matrix (PRIVATE).
+
+        Does this internal ID represent the latest version of the JASPAR
+        matrix (collapse on base ids)
+        """
+        cur = self.dbh.cursor()
+
+        cur.execute(
+            "select count(*) from MATRIX where "
+            "BASE_ID = (select BASE_ID from MATRIX where ID = %s) "
+            "and VERSION > (select VERSION from MATRIX where ID = %s)",
+            (int_id, int_id),
+        )
+
+        row = cur.fetchone()
+
+        count = row[0]
+
+        if count == 0:
+            # no matrices with higher version ID and same base id
+            return True
+
+        return False
diff --git a/code/lib/Bio/motifs/mast.py b/code/lib/Bio/motifs/mast.py
new file mode 100644
index 0000000..face5b9
--- /dev/null
+++ b/code/lib/Bio/motifs/mast.py
@@ -0,0 +1,133 @@
+# Copyright 2008 by Bartek Wilczynski.
+# Adapted from Bio.MEME.Parser by Jason A. Hackney.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""Module for the support of Motif Alignment and Search Tool (MAST)."""
+
+import xml.etree.ElementTree as ET
+
+from Bio.motifs import meme
+
+
+class Record(list):
+    """The class for holding the results from a MAST run.
+
+    A mast.Record holds data about matches between motifs and sequences.
+    The motifs held by the Record are objects of the class meme.Motif.
+
+    The mast.Record class inherits from list, so you can access individual
+    motifs in the record by their index. Alternatively, you can find a motif
+    by its name:
+
+    >>> from Bio import motifs
+    >>> with open("motifs/mast.crp0.de.oops.txt.xml") as f:
+    ...     record = motifs.parse(f, 'MAST')
+    >>> motif = record[0]
+    >>> print(motif.name)
+    1
+    >>> motif = record['1']
+    >>> print(motif.name)
+    1
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.sequences = []
+        self.version = ""
+        self.database = ""
+        self.diagrams = {}
+        self.alphabet = None
+        self.strand_handling = ""
+
+    def __getitem__(self, key):
+        """Return the motif of index key."""
+        if isinstance(key, str):
+            for motif in self:
+                if motif.name == key:
+                    return motif
+        else:
+            return list.__getitem__(self, key)
+
+
+def read(handle):
+    """Parse a MAST XML format handle as a Record object."""
+    record = Record()
+    try:
+        xml_tree = ET.parse(handle)
+    except ET.ParseError:
+        raise ValueError(
+            "Improper MAST XML input file. XML root tag should start with <mast version= ..."
+        )
+    __read_metadata(record, xml_tree)
+    __read_sequences(record, xml_tree)
+    return record
+
+
+# Everything below is private
+
+
+def __read_metadata(record, xml_tree):
+    record.version = xml_tree.getroot().get("version")
+    record.database = xml_tree.find("sequence_dbs").find("sequence_db").get("source")
+    record.alphabet = xml_tree.find("alphabet").get("name")
+    record.strand_handling = xml_tree.find("settings").get("strand_handling")
+    # TODO - read other metadata
+    for i, motif_tree in enumerate(xml_tree.find("motifs").findall("motif")):
+        motif = meme.Motif(record.alphabet)
+        # TODO - motif.name not in XML - always index?
+        motif.name = str(i + 1)
+        motif.id = motif_tree.get("id")
+        motif.alt_id = motif_tree.get("alt")
+        motif.length = int(motif_tree.get("length"))
+        # TODO - add nsites, evalue
+        record.append(motif)
+
+
+def __read_sequences(record, xml_tree):
+    """Read sequences from XML ElementTree object."""
+    for sequence_tree in xml_tree.find("sequences").findall("sequence"):
+        sequence_name = sequence_tree.get("name")
+        record.sequences.append(sequence_name)
+        diagram_str = __make_diagram(record, sequence_tree)
+        record.diagrams[sequence_name] = diagram_str
+        # TODO - add description, evalue, length, combined_pvalue
+
+
+def __make_diagram(record, sequence_tree):
+    """Make diagram string found in text file based on motif hit info."""
+    sequence_length = int(sequence_tree.get("length"))
+    hit_eles, hit_motifs, gaps = [], [], []
+    for seg_tree in sequence_tree.findall("seg"):
+        for hit_ele in seg_tree.findall("hit"):
+            hit_pos = int(hit_ele.get("pos"))
+            if not hit_eles:
+                gap = hit_pos - 1
+            else:
+                gap = hit_pos - int(hit_eles[-1].get("pos")) - hit_motifs[-1].length
+            gaps.append(gap)
+            hit_motifs.append(record[int(hit_ele.get("idx"))])
+            hit_eles.append(hit_ele)
+    if not hit_eles:
+        return str(sequence_length)
+    if record.strand_handling == "combine":
+        motif_strs = [
+            f"[{'-' if hit_ele.get('rc') == 'y' else '+'}{hit_motif.name}]"
+            for hit_ele, hit_motif in zip(hit_eles, hit_motifs)
+        ]
+    elif record.strand_handling == "unstranded":
+        motif_strs = [
+            f"[{hit_motif.name}]" for hit_ele, hit_motif in zip(hit_eles, hit_motifs)
+        ]
+    else:
+        # TODO - more strand_handling possibilities?
+        raise Exception(f"Strand handling option {record.strand_handling} not parsable")
+    tail_length = (
+        sequence_length - int(hit_eles[-1].get("pos")) - hit_motifs[-1].length + 1
+    )
+    motifs_with_gaps = [str(s) for pair in zip(gaps, motif_strs) for s in pair] + [
+        str(tail_length)
+    ]
+    # remove 0-length gaps
+    motifs_with_gaps = [s for s in motifs_with_gaps if s != "0"]
+    return "-".join(motifs_with_gaps)
diff --git a/code/lib/Bio/motifs/matrix.py b/code/lib/Bio/motifs/matrix.py
new file mode 100644
index 0000000..724732e
--- /dev/null
+++ b/code/lib/Bio/motifs/matrix.py
@@ -0,0 +1,550 @@
+# Copyright 2013 by Michiel de Hoon.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Support for various forms of sequence motif matrices.
+
+Implementation of frequency (count) matrices, position-weight matrices,
+and position-specific scoring matrices.
+"""
+
+import math
+
+try:
+    import numpy as np
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError(
+        "Install NumPy if you want to use Bio.motifs.matrix."
+    )
+
+from Bio.Seq import Seq
+
+from . import _pwm
+
+
+class GenericPositionMatrix(dict):
+    """Base class for the support of position matrix operations."""
+
+    def __init__(self, alphabet, values):
+        """Initialize the class."""
+        self.length = None
+        for letter in alphabet:
+            if self.length is None:
+                self.length = len(values[letter])
+            elif self.length != len(values[letter]):
+                raise Exception("data has inconsistent lengths")
+            self[letter] = list(values[letter])
+        self.alphabet = alphabet
+
+    def __str__(self):
+        """Return a string containing nucleotides and counts of the alphabet in the Matrix."""
+        words = ["%6d" % i for i in range(self.length)]
+        line = "   " + " ".join(words)
+        lines = [line]
+        for letter in self.alphabet:
+            words = ["%6.2f" % value for value in self[letter]]
+            line = "%c: " % letter + " ".join(words)
+            lines.append(line)
+        text = "\n".join(lines) + "\n"
+        return text
+
+    def __getitem__(self, key):
+        """Return the position matrix of index key."""
+        if isinstance(key, tuple):
+            if len(key) == 2:
+                key1, key2 = key
+                if isinstance(key1, slice):
+                    start1, stop1, stride1 = key1.indices(len(self.alphabet))
+                    indices1 = range(start1, stop1, stride1)
+                    letters1 = [self.alphabet[i] for i in indices1]
+                    dim1 = 2
+                elif isinstance(key1, int):
+                    letter1 = self.alphabet[key1]
+                    dim1 = 1
+                elif isinstance(key1, tuple):
+                    letters1 = [self.alphabet[i] for i in key1]
+                    dim1 = 2
+                elif isinstance(key1, str):
+                    if len(key1) == 1:
+                        letter1 = key1
+                        dim1 = 1
+                    else:
+                        raise KeyError(key1)
+                else:
+                    raise KeyError("Cannot understand key %s" % key1)
+                if isinstance(key2, slice):
+                    start2, stop2, stride2 = key2.indices(self.length)
+                    indices2 = range(start2, stop2, stride2)
+                    dim2 = 2
+                elif isinstance(key2, int):
+                    index2 = key2
+                    dim2 = 1
+                else:
+                    raise KeyError("Cannot understand key %s" % key2)
+                if dim1 == 1 and dim2 == 1:
+                    return dict.__getitem__(self, letter1)[index2]
+                elif dim1 == 1 and dim2 == 2:
+                    values = dict.__getitem__(self, letter1)
+                    return tuple(values[index2] for index2 in indices2)
+                elif dim1 == 2 and dim2 == 1:
+                    d = {}
+                    for letter1 in letters1:
+                        d[letter1] = dict.__getitem__(self, letter1)[index2]
+                    return d
+                else:
+                    d = {}
+                    for letter1 in letters1:
+                        values = dict.__getitem__(self, letter1)
+                        d[letter1] = [values[_] for _ in indices2]
+                    if sorted(letters1) == self.alphabet:
+                        return self.__class__(self.alphabet, d)
+                    else:
+                        return d
+            elif len(key) == 1:
+                key = key[0]
+            else:
+                raise KeyError("keys should be 1- or 2-dimensional")
+        if isinstance(key, slice):
+            start, stop, stride = key.indices(len(self.alphabet))
+            indices = range(start, stop, stride)
+            letters = [self.alphabet[i] for i in indices]
+            dim = 2
+        elif isinstance(key, int):
+            letter = self.alphabet[key]
+            dim = 1
+        elif isinstance(key, tuple):
+            letters = [self.alphabet[i] for i in key]
+            dim = 2
+        elif isinstance(key, str):
+            if len(key) == 1:
+                letter = key
+                dim = 1
+            else:
+                raise KeyError(key)
+        else:
+            raise KeyError("Cannot understand key %s" % key)
+        if dim == 1:
+            return dict.__getitem__(self, letter)
+        elif dim == 2:
+            d = {}
+            for letter in letters:
+                d[letter] = dict.__getitem__(self, letter)
+            return d
+        else:
+            raise RuntimeError("Should not get here")
+
+    @property
+    def consensus(self):
+        """Return the consensus sequence."""
+        sequence = ""
+        for i in range(self.length):
+            maximum = -math.inf
+            for letter in self.alphabet:
+                count = self[letter][i]
+                if count > maximum:
+                    maximum = count
+                    sequence_letter = letter
+            sequence += sequence_letter
+        return Seq(sequence)
+
+    @property
+    def anticonsensus(self):
+        """Return the anticonsensus sequence."""
+        sequence = ""
+        for i in range(self.length):
+            minimum = math.inf
+            for letter in self.alphabet:
+                count = self[letter][i]
+                if count < minimum:
+                    minimum = count
+                    sequence_letter = letter
+            sequence += sequence_letter
+        return Seq(sequence)
+
+    @property
+    def degenerate_consensus(self):
+        """Return the degenerate consensus sequence."""
+        # Following the rules adapted from
+        # D. R. Cavener: "Comparison of the consensus sequence flanking
+        # translational start sites in Drosophila and vertebrates."
+        # Nucleic Acids Research 15(4): 1353-1361. (1987).
+        # The same rules are used by TRANSFAC.
+        degenerate_nucleotide = {
+            "A": "A",
+            "C": "C",
+            "G": "G",
+            "T": "T",
+            "AC": "M",
+            "AG": "R",
+            "AT": "W",
+            "CG": "S",
+            "CT": "Y",
+            "GT": "K",
+            "ACG": "V",
+            "ACT": "H",
+            "AGT": "D",
+            "CGT": "B",
+            "ACGT": "N",
+        }
+        sequence = ""
+        for i in range(self.length):
+
+            def get(nucleotide):
+                return self[nucleotide][i]
+
+            nucleotides = sorted(self, key=get, reverse=True)
+            counts = [self[c][i] for c in nucleotides]
+            # Follow the Cavener rules:
+            if counts[0] > sum(counts[1:]) and counts[0] > 2 * counts[1]:
+                key = nucleotides[0]
+            elif 4 * sum(counts[:2]) > 3 * sum(counts):
+                key = "".join(sorted(nucleotides[:2]))
+            elif counts[3] == 0:
+                key = "".join(sorted(nucleotides[:3]))
+            else:
+                key = "ACGT"
+            nucleotide = degenerate_nucleotide.get(key, key)
+            sequence += nucleotide
+        return Seq(sequence)
+
+    @property
+    def gc_content(self):
+        """Compute the fraction GC content."""
+        alphabet = self.alphabet
+        gc_total = 0.0
+        total = 0.0
+        for i in range(self.length):
+            for letter in alphabet:
+                if letter in "CG":
+                    gc_total += self[letter][i]
+                total += self[letter][i]
+        return gc_total / total
+
+    def reverse_complement(self):
+        """Compute reverse complement."""
+        values = {}
+        if self.alphabet == "ACGU":
+            values["A"] = self["U"][::-1]
+            values["U"] = self["A"][::-1]
+        else:
+            values["A"] = self["T"][::-1]
+            values["T"] = self["A"][::-1]
+        values["G"] = self["C"][::-1]
+        values["C"] = self["G"][::-1]
+        alphabet = self.alphabet
+        return self.__class__(alphabet, values)
+
+
+class FrequencyPositionMatrix(GenericPositionMatrix):
+    """Class for the support of frequency calculations on the Position Matrix."""
+
+    def normalize(self, pseudocounts=None):
+        """Create and return a position-weight matrix by normalizing the counts matrix.
+
+        If pseudocounts is None (default), no pseudocounts are added
+        to the counts.
+
+        If pseudocounts is a number, it is added to the counts before
+        calculating the position-weight matrix.
+
+        Alternatively, the pseudocounts can be a dictionary with a key
+        for each letter in the alphabet associated with the motif.
+        """
+        counts = {}
+        if pseudocounts is None:
+            for letter in self.alphabet:
+                counts[letter] = [0.0] * self.length
+        elif isinstance(pseudocounts, dict):
+            for letter in self.alphabet:
+                counts[letter] = [float(pseudocounts[letter])] * self.length
+        else:
+            for letter in self.alphabet:
+                counts[letter] = [float(pseudocounts)] * self.length
+        for i in range(self.length):
+            for letter in self.alphabet:
+                counts[letter][i] += self[letter][i]
+        # Actual normalization is done in the PositionWeightMatrix initializer
+        return PositionWeightMatrix(self.alphabet, counts)
+
+
+class PositionWeightMatrix(GenericPositionMatrix):
+    """Class for the support of weight calculations on the Position Matrix."""
+
+    def __init__(self, alphabet, counts):
+        """Initialize the class."""
+        GenericPositionMatrix.__init__(self, alphabet, counts)
+        for i in range(self.length):
+            total = sum(float(self[letter][i]) for letter in alphabet)
+            for letter in alphabet:
+                self[letter][i] /= total
+        for letter in alphabet:
+            self[letter] = tuple(self[letter])
+
+    def log_odds(self, background=None):
+        """Return the Position-Specific Scoring Matrix.
+
+        The Position-Specific Scoring Matrix (PSSM) contains the log-odds
+        scores computed from the probability matrix and the background
+        probabilities. If the background is None, a uniform background
+        distribution is assumed.
+        """
+        values = {}
+        alphabet = self.alphabet
+        if background is None:
+            background = dict.fromkeys(self.alphabet, 1.0)
+        else:
+            background = dict(background)
+        total = sum(background.values())
+        for letter in alphabet:
+            background[letter] /= total
+            values[letter] = []
+        for i in range(self.length):
+            for letter in alphabet:
+                b = background[letter]
+                if b > 0:
+                    p = self[letter][i]
+                    if p > 0:
+                        logodds = math.log(p / b, 2)
+                    else:
+                        logodds = -math.inf
+                else:
+                    p = self[letter][i]
+                    if p > 0:
+                        logodds = math.inf
+                    else:
+                        logodds = math.nan
+                values[letter].append(logodds)
+        pssm = PositionSpecificScoringMatrix(alphabet, values)
+        return pssm
+
+
+class PositionSpecificScoringMatrix(GenericPositionMatrix):
+    """Class for the support of Position Specific Scoring Matrix calculations."""
+
+    def calculate(self, sequence):
+        """Return the PWM score for a given sequence for all positions.
+
+        Notes:
+         - the sequence can only be a DNA sequence
+         - the search is performed only on one strand
+         - if the sequence and the motif have the same length, a single
+           number is returned
+         - otherwise, the result is a one-dimensional numpy array
+
+        """
+        # TODO - Code itself tolerates ambiguous bases (as NaN).
+        if sorted(self.alphabet) != ["A", "C", "G", "T"]:
+            raise ValueError(
+                "PSSM has wrong alphabet: %s - Use only with DNA motifs" % self.alphabet
+            )
+
+        # NOTE: The C code handles mixed case input as this could be large
+        # (e.g. contig or chromosome), so requiring it be all upper or lower
+        # case would impose an overhead to allocate the extra memory.
+        try:
+            sequence = bytes(sequence)
+        except TypeError:  # str
+            try:
+                sequence = bytes(sequence, "ASCII")
+            except TypeError:
+                raise ValueError(
+                    "sequence should be a Seq, MutableSeq, string, or bytes-like object"
+                ) from None
+            except UnicodeEncodeError:
+                raise ValueError(
+                    "sequence should contain ASCII characters only"
+                ) from None
+        except Exception:
+            raise ValueError(
+                "sequence should be a Seq, MutableSeq, string, or bytes-like object"
+            ) from None
+
+        n = len(sequence)
+        m = self.length
+        # Create the numpy arrays here; the C module then does not rely on numpy
+        # Use a float32 for the scores array to save space
+        scores = np.empty(n - m + 1, np.float32)
+        logodds = np.array(
+            [[self[letter][i] for letter in "ACGT"] for i in range(m)], float
+        )
+        _pwm.calculate(sequence, logodds, scores)
+
+        if len(scores) == 1:
+            return scores[0]
+        else:
+            return scores
+
+    def search(self, sequence, threshold=0.0, both=True, chunksize=10 ** 6):
+        """Find hits with PWM score above given threshold.
+
+        A generator function, returning found hits in the given sequence
+        with the pwm score higher than the threshold.
+        """
+        sequence = sequence.upper()
+        seq_len = len(sequence)
+        motif_l = self.length
+        chunk_starts = np.arange(0, seq_len, chunksize)
+        if both:
+            rc = self.reverse_complement()
+        for chunk_start in chunk_starts:
+            subseq = sequence[chunk_start : chunk_start + chunksize + motif_l - 1]
+            pos_scores = self.calculate(subseq)
+            pos_ind = pos_scores >= threshold
+            pos_positions = np.where(pos_ind)[0] + chunk_start
+            pos_scores = pos_scores[pos_ind]
+            if both:
+                neg_scores = rc.calculate(subseq)
+                neg_ind = neg_scores >= threshold
+                neg_positions = np.where(neg_ind)[0] + chunk_start
+                neg_scores = neg_scores[neg_ind]
+            else:
+                neg_positions = np.empty((0), dtype=int)
+                neg_scores = np.empty((0), dtype=int)
+            chunk_positions = np.append(pos_positions, neg_positions - seq_len)
+            chunk_scores = np.append(pos_scores, neg_scores)
+            order = np.argsort(np.append(pos_positions, neg_positions))
+            chunk_positions = chunk_positions[order]
+            chunk_scores = chunk_scores[order]
+            yield from zip(chunk_positions, chunk_scores)
+
+    @property
+    def max(self):
+        """Maximal possible score for this motif.
+
+        returns the score computed for the consensus sequence.
+        """
+        score = 0.0
+        letters = self.alphabet
+        for position in range(0, self.length):
+            score += max(self[letter][position] for letter in letters)
+        return score
+
+    @property
+    def min(self):
+        """Minimal possible score for this motif.
+
+        returns the score computed for the anticonsensus sequence.
+        """
+        score = 0.0
+        letters = self.alphabet
+        for position in range(0, self.length):
+            score += min(self[letter][position] for letter in letters)
+        return score
+
+    @property
+    def gc_content(self):
+        """Compute the GC-ratio."""
+        raise Exception("Cannot compute the %GC composition of a PSSM")
+
+    def mean(self, background=None):
+        """Return expected value of the score of a motif."""
+        if background is None:
+            background = dict.fromkeys(self.alphabet, 1.0)
+        else:
+            background = dict(background)
+        total = sum(background.values())
+        for letter in self.alphabet:
+            background[letter] /= total
+        sx = 0.0
+        for i in range(self.length):
+            for letter in self.alphabet:
+                logodds = self[letter, i]
+                if math.isnan(logodds):
+                    continue
+                if math.isinf(logodds) and logodds < 0:
+                    continue
+                b = background[letter]
+                p = b * math.pow(2, logodds)
+                sx += p * logodds
+        return sx
+
+    def std(self, background=None):
+        """Return standard deviation of the score of a motif."""
+        if background is None:
+            background = dict.fromkeys(self.alphabet, 1.0)
+        else:
+            background = dict(background)
+        total = sum(background.values())
+        for letter in self.alphabet:
+            background[letter] /= total
+        variance = 0.0
+        for i in range(self.length):
+            sx = 0.0
+            sxx = 0.0
+            for letter in self.alphabet:
+                logodds = self[letter, i]
+                if math.isnan(logodds):
+                    continue
+                if math.isinf(logodds) and logodds < 0:
+                    continue
+                b = background[letter]
+                p = b * math.pow(2, logodds)
+                sx += p * logodds
+                sxx += p * logodds * logodds
+            sxx -= sx * sx
+            variance += sxx
+        variance = max(variance, 0)  # to avoid roundoff problems
+        return math.sqrt(variance)
+
+    def dist_pearson(self, other):
+        """Return the similarity score based on pearson correlation for the given motif against self.
+
+        We use the Pearson's correlation of the respective probabilities.
+        """
+        if self.alphabet != other.alphabet:
+            raise ValueError("Cannot compare motifs with different alphabets")
+
+        max_p = -2
+        for offset in range(-self.length + 1, other.length):
+            if offset < 0:
+                p = self.dist_pearson_at(other, -offset)
+            else:  # offset>=0
+                p = other.dist_pearson_at(self, offset)
+            if max_p < p:
+                max_p = p
+                max_o = -offset
+        return 1 - max_p, max_o
+
+    def dist_pearson_at(self, other, offset):
+        """Return the similarity score based on pearson correlation at the given offset."""
+        letters = self.alphabet
+        sx = 0.0  # \sum x
+        sy = 0.0  # \sum y
+        sxx = 0.0  # \sum x^2
+        sxy = 0.0  # \sum x \cdot y
+        syy = 0.0  # \sum y^2
+        norm = max(self.length, offset + other.length) * len(letters)
+        for pos in range(min(self.length - offset, other.length)):
+            xi = [self[letter, pos + offset] for letter in letters]
+            yi = [other[letter, pos] for letter in letters]
+            sx += sum(xi)
+            sy += sum(yi)
+            sxx += sum(x * x for x in xi)
+            sxy += sum(x * y for x, y in zip(xi, yi))
+            syy += sum(y * y for y in yi)
+        sx /= norm
+        sy /= norm
+        sxx /= norm
+        sxy /= norm
+        syy /= norm
+        numerator = sxy - sx * sy
+        denominator = math.sqrt((sxx - sx * sx) * (syy - sy * sy))
+        return numerator / denominator
+
+    def distribution(self, background=None, precision=10 ** 3):
+        """Calculate the distribution of the scores at the given precision."""
+        from .thresholds import ScoreDistribution
+
+        if background is None:
+            background = dict.fromkeys(self.alphabet, 1.0)
+        else:
+            background = dict(background)
+        total = sum(background.values())
+        for letter in self.alphabet:
+            background[letter] /= total
+        return ScoreDistribution(precision=precision, pssm=self, background=background)
diff --git a/code/lib/Bio/motifs/meme.py b/code/lib/Bio/motifs/meme.py
new file mode 100644
index 0000000..25ee0b4
--- /dev/null
+++ b/code/lib/Bio/motifs/meme.py
@@ -0,0 +1,195 @@
+# Copyright 2008 by Bartek Wilczynski.
+# Revisions copyright 2019 by Victor Lin.
+# Adapted from  Bio.MEME.Parser by Jason A. Hackney.  All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""Module for the support of MEME motif format."""
+
+import xml.etree.ElementTree as ET
+
+from Bio import Seq
+from Bio import motifs
+
+
+def read(handle):
+    """Parse the text output of the MEME program into a meme.Record object.
+
+    Examples
+    --------
+    >>> from Bio.motifs import meme
+    >>> with open("motifs/meme.INO_up800.classic.oops.xml") as f:
+    ...     record = meme.read(f)
+    >>> for motif in record:
+    ...     for instance in motif.instances:
+    ...         print(instance.motif_name, instance.sequence_name, instance.sequence_id, instance.strand, instance.pvalue)
+    GSKGCATGTGAAA INO1 sequence_5 + 1.21e-08
+    GSKGCATGTGAAA FAS1 sequence_2 - 1.87e-08
+    GSKGCATGTGAAA ACC1 sequence_4 - 6.62e-08
+    GSKGCATGTGAAA CHO2 sequence_1 - 1.05e-07
+    GSKGCATGTGAAA CHO1 sequence_0 - 1.69e-07
+    GSKGCATGTGAAA FAS2 sequence_3 - 5.62e-07
+    GSKGCATGTGAAA OPI3 sequence_6 + 1.08e-06
+    TTGACWCYTGCYCWG CHO2 sequence_1 + 7.2e-10
+    TTGACWCYTGCYCWG OPI3 sequence_6 - 2.56e-08
+    TTGACWCYTGCYCWG ACC1 sequence_4 - 1.59e-07
+    TTGACWCYTGCYCWG CHO1 sequence_0 + 2.05e-07
+    TTGACWCYTGCYCWG FAS1 sequence_2 + 3.85e-07
+    TTGACWCYTGCYCWG FAS2 sequence_3 - 5.11e-07
+    TTGACWCYTGCYCWG INO1 sequence_5 + 8.01e-07
+
+    """
+    record = Record()
+    try:
+        xml_tree = ET.parse(handle)
+    except ET.ParseError:
+        raise ValueError(
+            "Improper MEME XML input file. XML root tag should start with <MEME version= ..."
+        )
+    __read_metadata(record, xml_tree)
+    __read_alphabet(record, xml_tree)
+    sequence_id_name_map = __get_sequence_id_name_map(xml_tree)
+    record.sequences = list(sequence_id_name_map.keys())
+    __read_motifs(record, xml_tree, sequence_id_name_map)
+    return record
+
+
+class Motif(motifs.Motif):
+    """A subclass of Motif used in parsing MEME (and MAST) output.
+
+    This subclass defines functions and data specific to MEME motifs.
+    This includes the motif name, the evalue for a motif, and its number
+    of occurrences.
+    """
+
+    def __init__(self, alphabet=None, instances=None):
+        """Initialize the class."""
+        motifs.Motif.__init__(self, alphabet, instances)
+        self.evalue = 0.0
+        self.num_occurrences = 0
+        self.name = None
+        self.id = None
+        self.alt_id = None
+
+
+class Instance(Seq.Seq):
+    """A class describing the instances of a MEME motif, and the data thereof."""
+
+    def __init__(self, *args, **kwds):
+        """Initialize the class."""
+        Seq.Seq.__init__(self, *args, **kwds)
+        self.sequence_name = ""
+        self.sequence_id = ""
+        self.start = 0
+        self.pvalue = 1.0
+        self.strand = 0
+        self.length = 0
+        self.motif_name = ""
+
+
+class Record(list):
+    """A class for holding the results of a MEME run.
+
+    A meme.Record is an object that holds the results from running
+    MEME. It implements no methods of its own.
+
+    The meme.Record class inherits from list, so you can access individual
+    motifs in the record by their index. Alternatively, you can find a motif
+    by its name:
+
+    >>> from Bio import motifs
+    >>> with open("motifs/meme.INO_up800.classic.oops.xml") as f:
+    ...     record = motifs.parse(f, 'MEME')
+    >>> motif = record[0]
+    >>> print(motif.name)
+    GSKGCATGTGAAA
+    >>> motif = record['GSKGCATGTGAAA']
+    >>> print(motif.name)
+    GSKGCATGTGAAA
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.version = ""
+        self.datafile = ""
+        self.command = ""
+        self.alphabet = ""
+        self.sequences = []
+
+    def __getitem__(self, key):
+        """Return the motif of index key."""
+        if isinstance(key, str):
+            for motif in self:
+                if motif.name == key:
+                    return motif
+        else:
+            return list.__getitem__(self, key)
+
+
+# Everything below is private
+
+
+def __read_metadata(record, xml_tree):
+    record.version = xml_tree.getroot().get("version")
+    record.datafile = xml_tree.find("training_set").get("primary_sequences")
+    record.command = xml_tree.find("model").find("command_line").text
+    # TODO - background_frequencies, other metadata under model
+
+
+def __read_alphabet(record, xml_tree):
+    alphabet_tree = (
+        xml_tree.find("training_set").find("letter_frequencies").find("alphabet_array")
+    )
+    for value in alphabet_tree.findall("value"):
+        record.alphabet += value.get("letter_id")
+
+
+def __get_sequence_id_name_map(xml_tree):
+    return {
+        sequence_tree.get("id"): sequence_tree.get("name")
+        for sequence_tree in xml_tree.find("training_set").findall("sequence")
+    }
+
+
+def __read_motifs(record, xml_tree, sequence_id_name_map):
+    for motif_tree in xml_tree.find("motifs").findall("motif"):
+        instances = []
+        for site_tree in motif_tree.find("contributing_sites").findall(
+            "contributing_site"
+        ):
+            letters = [
+                letter_ref.get("letter_id")
+                for letter_ref in site_tree.find("site").findall("letter_ref")
+            ]
+            sequence = "".join(letters)
+            instance = Instance(sequence)
+            instance.motif_name = motif_tree.get("name")
+            instance.sequence_id = site_tree.get("sequence_id")
+            instance.sequence_name = sequence_id_name_map[instance.sequence_id]
+            # TODO - left flank, right flank
+            instance.start = int(site_tree.get("position")) + 1
+            instance.pvalue = float(site_tree.get("pvalue"))
+            instance.strand = __convert_strand(site_tree.get("strand"))
+            instance.length = len(sequence)
+            instances.append(instance)
+        instances = motifs.Instances(instances, record.alphabet)
+        motif = Motif(record.alphabet, instances)
+        motif.id = motif_tree.get("id")
+        motif.name = motif_tree.get("name")
+        motif.alt_id = motif_tree.get("alt")
+        motif.length = int(motif_tree.get("width"))
+        motif.num_occurrences = int(motif_tree.get("sites"))
+        motif.evalue = float(motif_tree.get("e_value"))
+        # TODO - ic, re, llr, pvalue, bayes_threshold, elapsed_time
+        record.append(motif)
+
+
+def __convert_strand(strand):
+    """Convert strand (+/-) from XML if present.
+
+    Default: +
+    """
+    if strand == "minus":
+        return "-"
+    if strand == "plus" or strand == "none":
+        return "+"
diff --git a/code/lib/Bio/motifs/minimal.py b/code/lib/Bio/motifs/minimal.py
new file mode 100644
index 0000000..bdf7e4c
--- /dev/null
+++ b/code/lib/Bio/motifs/minimal.py
@@ -0,0 +1,193 @@
+# Copyright 2018 by Ariel Aptekmann.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Module for the support of MEME minimal motif format."""
+
+from Bio import motifs
+
+
+def read(handle):
+    """Parse the text output of the MEME program into a meme.Record object.
+
+    Examples
+    --------
+    >>> from Bio.motifs import minimal
+    >>> with open("motifs/meme.out") as f:
+    ...     record = minimal.read(f)
+    ...
+    >>> for motif in record:
+    ...     print(motif.name, motif.evalue)
+    ...
+    1 1.1e-22
+
+    You can access individual motifs in the record by their index or find a motif
+    by its name:
+
+    >>> from Bio import motifs
+    >>> with open("motifs/minimal_test.meme") as f:
+    ...     record = motifs.parse(f, 'minimal')
+    ...
+    >>> motif = record[0]
+    >>> print(motif.name)
+    KRP
+    >>> motif = record['IFXA']
+    >>> print(motif.name)
+    IFXA
+
+    This function wont retrieve instances, as there are none in minimal meme format.
+
+    """
+    motif_number = 0
+    record = Record()
+    _read_version(record, handle)
+    _read_alphabet(record, handle)
+    _read_background(record, handle)
+
+    while True:
+        for line in handle:
+            if line.startswith("MOTIF"):
+                break
+        else:
+            return record
+        name = line.split()[1]
+        motif_number += 1
+        length, num_occurrences, evalue = _read_motif_statistics(handle)
+        counts = _read_lpm(handle, num_occurrences)
+        # {'A': 0.25, 'C': 0.25, 'T': 0.25, 'G': 0.25}
+        motif = motifs.Motif(alphabet=record.alphabet, counts=counts)
+        motif.background = record.background
+        motif.length = length
+        motif.num_occurrences = num_occurrences
+        motif.evalue = evalue
+        motif.name = name
+        record.append(motif)
+        assert len(record) == motif_number
+    return record
+
+
+class Record(list):
+    """Class for holding the results of a minimal MEME run."""
+
+    def __init__(self):
+        """Initialize record class values."""
+        self.version = ""
+        self.datafile = ""
+        self.command = ""
+        self.alphabet = None
+        self.background = {}
+        self.sequences = []
+
+    def __getitem__(self, key):
+        """Return the motif of index key."""
+        if isinstance(key, str):
+            for motif in self:
+                if motif.name == key:
+                    return motif
+        else:
+            return list.__getitem__(self, key)
+
+
+# Everything below is private
+
+
+def _read_background(record, handle):
+    """Read background letter frequencies (PRIVATE)."""
+    for line in handle:
+        if line.startswith("Background letter frequencies"):
+            break
+    else:
+        raise ValueError(
+            "Improper input file. File should contain a line starting background frequencies."
+        )
+    try:
+        line = next(handle)
+    except StopIteration:
+        raise ValueError(
+            "Unexpected end of stream: Expected to find line starting background frequencies."
+        )
+    line = line.strip()
+    ls = line.split()
+    A, C, G, T = float(ls[1]), float(ls[3]), float(ls[5]), float(ls[7])
+    record.background = {"A": A, "C": C, "G": G, "T": T}
+
+
+def _read_version(record, handle):
+    """Read MEME version (PRIVATE)."""
+    for line in handle:
+        if line.startswith("MEME version"):
+            break
+    else:
+        raise ValueError(
+            "Improper input file. File should contain a line starting MEME version."
+        )
+    line = line.strip()
+    ls = line.split()
+    record.version = ls[2]
+
+
+def _read_alphabet(record, handle):
+    """Read alphabet (PRIVATE)."""
+    for line in handle:
+        if line.startswith("ALPHABET"):
+            break
+    else:
+        raise ValueError(
+            "Unexpected end of stream: Expected to find line starting with 'ALPHABET'"
+        )
+    if not line.startswith("ALPHABET= "):
+        raise ValueError("Line does not start with 'ALPHABET':\n%s" % line)
+    line = line.strip().replace("ALPHABET= ", "")
+    if line == "ACGT":
+        al = "ACGT"
+    else:
+        al = "ACDEFGHIKLMNPQRSTVWY"
+    record.alphabet = al
+
+
+def _read_lpm(handle, num_occurrences):
+    """Read letter probability matrix (PRIVATE)."""
+    counts = [[], [], [], []]
+    for line in handle:
+        freqs = line.split()
+        if len(freqs) != 4:
+            break
+        counts[0].append(round(float(freqs[0]) * num_occurrences))
+        counts[1].append(round(float(freqs[1]) * num_occurrences))
+        counts[2].append(round(float(freqs[2]) * num_occurrences))
+        counts[3].append(round(float(freqs[3]) * num_occurrences))
+    c = {}
+    c["A"] = counts[0]
+    c["C"] = counts[1]
+    c["G"] = counts[2]
+    c["T"] = counts[3]
+    return c
+
+
+def _read_motif_statistics(handle):
+    """Read motif statistics (PRIVATE)."""
+    # minimal :
+    #      letter-probability matrix: alength= 4 w= 19 nsites= 17 E= 4.1e-009
+    for line in handle:
+        if line.startswith("letter-probability matrix:"):
+            break
+    num_occurrences = int(line.split("nsites=")[1].split()[0])
+    length = int(line.split("w=")[1].split()[0])
+    evalue = float(line.split("E=")[1].split()[0])
+    return length, num_occurrences, evalue
+
+
+def _read_motif_name(handle):
+    """Read motif name (PRIVATE)."""
+    for line in handle:
+        if "sorted by position p-value" in line:
+            break
+    else:
+        raise ValueError("Unexpected end of stream: Failed to find motif name")
+    line = line.strip()
+    words = line.split()
+    name = " ".join(words[0:2])
+    return name
diff --git a/code/lib/Bio/motifs/pfm.py b/code/lib/Bio/motifs/pfm.py
new file mode 100644
index 0000000..588f089
--- /dev/null
+++ b/code/lib/Bio/motifs/pfm.py
@@ -0,0 +1,413 @@
+# Copyright 2015 by Gert Hulselmans.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Parse various position frequency matrix format files."""
+
+import re
+
+from Bio import motifs
+
+
+class Record(list):
+    """Class to store the information in a position frequency matrix table.
+
+    The record inherits from a list containing the individual motifs.
+    """
+
+    def __str__(self):
+        return "\n".join(str(motif) for motif in self)
+
+
+def read(handle, pfm_format):
+    """Read motif(s) from a file in various position frequency matrix formats.
+
+    Return the record of PFM(s).
+    Call the appropriate routine based on the format passed.
+    """
+    # Supporting underscores here for backward compatibility
+    pfm_format = pfm_format.lower().replace("_", "-")
+    if pfm_format == "pfm-four-columns":
+        record = _read_pfm_four_columns(handle)
+        return record
+    elif pfm_format == "pfm-four-rows":
+        record = _read_pfm_four_rows(handle)
+        return record
+    else:
+        raise ValueError("Unknown Position Frequency matrix format '%s'" % pfm_format)
+
+
+def _read_pfm_four_columns(handle):
+    """Read motifs in position frequency matrix format (4 columns) from a file handle.
+
+    # cisbp
+    Pos A   C   G   T
+    1   0.00961538461538462 0.00961538461538462 0.00961538461538462 0.971153846153846
+    2   0.00961538461538462 0.00961538461538462 0.00961538461538462 0.971153846153846
+    3   0.971153846153846   0.00961538461538462 0.00961538461538462 0.00961538461538462
+    4   0.00961538461538462 0.00961538461538462 0.00961538461538462 0.971153846153846
+    5   0.00961538461538462 0.971153846153846   0.00961538461538462 0.00961538461538462
+    6   0.971153846153846   0.00961538461538462 0.00961538461538462 0.00961538461538462
+    7   0.00961538461538462 0.971153846153846   0.00961538461538462 0.00961538461538462
+    8   0.00961538461538462 0.00961538461538462 0.00961538461538462 0.971153846153846
+
+    # c2h2 zfs
+    Gene    ENSG00000197372
+    Pos A   C   G   T
+    1   0.341303    0.132427    0.117054    0.409215
+    2   0.283785    0.077066    0.364552    0.274597
+    3   0.491055    0.078208    0.310520    0.120217
+    4   0.492621    0.076117    0.131007    0.300256
+    5   0.250645    0.361464    0.176504    0.211387
+    6   0.276694    0.498070    0.197793    0.027444
+    7   0.056317    0.014631    0.926202    0.002850
+    8   0.004470    0.007769    0.983797    0.003964
+    9   0.936213    0.058787    0.002387    0.002613
+    10  0.004352    0.004030    0.002418    0.989200
+    11  0.013277    0.008165    0.001991    0.976567
+    12  0.968132    0.002263    0.002868    0.026737
+    13  0.397623    0.052017    0.350783    0.199577
+    14  0.000000    0.000000    1.000000    0.000000
+    15  1.000000    0.000000    0.000000    0.000000
+    16  0.000000    0.000000    1.000000    0.000000
+    17  0.000000    0.000000    1.000000    0.000000
+    18  1.000000    0.000000    0.000000    0.000000
+    19  0.000000    1.000000    0.000000    0.000000
+    20  1.000000    0.000000    0.000000    0.000000
+
+    # c2h2 zfs
+    Gene    FBgn0000210
+    Motif   M1734_0.90
+    Pos A   C   G   T
+    1   0.25    0.0833333   0.0833333   0.583333
+    2   0.75    0.166667    0.0833333   0
+    3   0.833333    0   0   0.166667
+    4   1   0   0   0
+    5   0   0.833333    0.0833333   0.0833333
+    6   0.333333    0   0   0.666667
+    7   0.833333    0   0   0.166667
+    8   0.5 0   0.333333    0.166667
+    9   0.5 0.0833333   0.166667    0.25
+    10  0.333333    0.25    0.166667    0.25
+    11  0.166667    0.25    0.416667    0.166667
+
+    # flyfactorsurvey (cluster buster)
+    >AbdA_Cell_FBgn0000014
+    1   3   0   14
+    0   0   0   18
+    16  0   0   2
+    18  0   0   0
+    1   0   0   17
+    0   0   6   12
+    15  1   2   0
+
+    # homer
+    >ATGACTCATC AP-1(bZIP)/ThioMac-PU.1-ChIP-Seq(GSE21512)/Homer    6.049537    -1.782996e+03   0   9805.3,5781.0,3085.1,2715.0,0.00e+00
+    0.419   0.275   0.277   0.028
+    0.001   0.001   0.001   0.997
+    0.010   0.002   0.965   0.023
+    0.984   0.003   0.001   0.012
+    0.062   0.579   0.305   0.054
+    0.026   0.001   0.001   0.972
+    0.043   0.943   0.001   0.012
+    0.980   0.005   0.001   0.014
+    0.050   0.172   0.307   0.471
+    0.149   0.444   0.211   0.195
+
+    # hocomoco
+    > AHR_si
+    40.51343240527031  18.259112547756697  56.41253757072521  38.77363485291994
+    10.877470982533044  11.870876719950774  34.66312982331297  96.54723985087516
+    21.7165707818416  43.883079837598544  20.706746561638717  67.6523201955933
+    2.5465132509466635  1.3171620263517245  145.8637051322628  4.231336967110781
+    0.0  150.35847450464382  1.4927836298652875  2.1074592421627525
+    3.441039751299748  0.7902972158110341  149.37613720253387  0.3512432070271259
+    0.0  3.441039751299748  0.7024864140542533  149.81519121131782
+    0.0  0.0  153.95871737667187  0.0
+    43.07922333291745  66.87558226865211  16.159862546986584  27.844049228115868
+
+    # neph
+    UW.Motif.0001   atgactca
+    0.772949    0.089579    0.098612    0.038860
+    0.026652    0.004653    0.025056    0.943639
+    0.017663    0.023344    0.918728    0.040264
+    0.919596    0.025414    0.029759    0.025231
+    0.060312    0.772259    0.104968    0.062462
+    0.037406    0.020643    0.006667    0.935284
+    0.047316    0.899024    0.026928    0.026732
+    0.948639    0.019497    0.005737    0.026128
+
+    # tiffin
+    T   A   G   C
+    30  0   28  40
+    0   0   0   99
+    0   55  14  29
+    0   99  0   0
+    20  78  0   0
+    0   52  7   39
+    19  46  11  22
+    0   60  38  0
+    0   33  0   66
+    73  0   25  0
+    99  0   0   0
+    """
+    record = Record()
+
+    motif_name = None
+    motif_nbr = 0
+    motif_nbr_added = 0
+
+    default_nucleotide_order = ["A", "C", "G", "T"]
+    nucleotide_order = default_nucleotide_order
+    nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
+
+    for line in handle:
+        line = line.strip()
+
+        if line:
+            columns = line.split()
+            nbr_columns = len(columns)
+
+            if line.startswith("#"):
+                # Skip comment lines.
+                continue
+            elif line.startswith(">"):
+                # Parse ">AbdA_Cell_FBgn0000014" and "> AHR_si" like lines and put the part after ">" as motif name.
+                if motif_nbr != 0 and motif_nbr_added != motif_nbr:
+                    # Add the previous motif to the record.
+                    motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
+                    motif.name = motif_name
+                    record.append(motif)
+                    motif_nbr_added = motif_nbr
+
+                # Reinitialize variables for the new motif.
+                motif_name = line[1:].strip()
+                nucleotide_order = default_nucleotide_order
+            elif columns[0] == "Gene":
+                # Parse "Gene   ENSG00000197372" like lines and put the gene name as motif name.
+                if motif_nbr != 0 and motif_nbr_added != motif_nbr:
+                    # Add the previous motif to the record.
+                    motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
+                    motif.name = motif_name
+                    record.append(motif)
+                    motif_nbr_added = motif_nbr
+
+                # Reinitialize variables for the new motif.
+                motif_name = columns[1]
+                nucleotide_order = default_nucleotide_order
+            elif columns[0] == "Motif":
+                # Parse "Motif  M1734_0.90" like lines.
+                if motif_nbr != 0 and motif_nbr_added != motif_nbr:
+                    # Add the previous motif to the record.
+                    motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
+                    motif.name = motif_name
+                    record.append(motif)
+                    motif_nbr_added = motif_nbr
+
+                # Reinitialize variables for the new motif.
+                motif_name = columns[1]
+                nucleotide_order = default_nucleotide_order
+            elif columns[0] == "Pos":
+                # Parse "Pos    A   C   G   T" like lines and change nucleotide order if necessary.
+                if nbr_columns == 5:
+                    # If the previous line was not a "Gene  ENSG00000197372" like line, a new motif starts here.
+                    if motif_nbr != 0 and motif_nbr_added != motif_nbr:
+                        # Add the previous motif to the record.
+                        motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
+                        motif.name = motif_name
+                        record.append(motif)
+                        motif_nbr_added = motif_nbr
+
+                    nucleotide_order = default_nucleotide_order
+
+                    if set(columns[1:]) == set(default_nucleotide_order):
+                        nucleotide_order = columns[1:]
+            elif columns[0] in default_nucleotide_order:
+                # Parse "A  C   G   T" like lines and change nucleotide order if necessary.
+                if nbr_columns == 4:
+                    nucleotide_order = default_nucleotide_order
+                    if set(columns) == set(default_nucleotide_order):
+                        nucleotide_order = columns
+            else:
+                # Parse matrix columns lines and use the correct nucleotide order.
+                if nbr_columns == 4:
+                    matrix_columns = columns
+                elif nbr_columns == 5:
+                    matrix_columns = columns[1:]
+                else:
+                    continue
+
+                if motif_nbr == motif_nbr_added:
+                    # A new motif matrix starts here, so reinitialize variables for the new motif.
+                    nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
+                    motif_nbr += 1
+
+                [
+                    nucleotide_counts[nucleotide].append(float(nucleotide_count))
+                    for nucleotide, nucleotide_count in zip(
+                        nucleotide_order, matrix_columns
+                    )
+                ]
+        else:
+            # Empty lines can be separators between motifs.
+            if motif_nbr != 0 and motif_nbr_added != motif_nbr:
+                # Add the previous motif to the record.
+                motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
+                motif.name = motif_name
+                record.append(motif)
+                motif_nbr_added = motif_nbr
+
+            # Reinitialize variables for the new motif.
+            motif_name = None
+            nucleotide_order = default_nucleotide_order
+            # nucleotide_counts = {'A': [], 'C': [], 'G': [], 'T': []}
+
+    if motif_nbr != 0 and motif_nbr_added != motif_nbr:
+        motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
+        motif.name = motif_name
+        record.append(motif)
+
+    return record
+
+
+def _read_pfm_four_rows(handle):
+    """Read motifs in position frequency matrix format (4 rows) from a file handle.
+
+    # hdpi
+    A   0   5   6   5   1   0
+    C   1   1   0   0   0   4
+    G   5   0   0   0   3   0
+    T   0   0   0   1   2   2
+
+    # yetfasco
+    A   0.5 0.0 0.0 0.25    0.25    0.25    0.25    0.25    0.25    0.25    0.25    0.25    0.5 0.0 0.0833333334583333
+    T   0.0 0.0 0.0 0.25    0.25    0.25    0.25    0.25    0.25    0.25    0.25    0.25    0.0 0.0 0.0833333334583333
+    G   0.0 1.0 0.0 0.25    0.25    0.25    0.25    0.25    0.25    0.25    0.25    0.25    0.0 1.0 0.249999999875
+    C   0.5 0.0 1.0 0.25    0.25    0.25    0.25    0.25    0.25    0.25    0.25    0.25    0.5 0.0 0.583333333208333
+
+    # flyfactorsurvey ZFP finger
+    A |     92    106    231    135      0      1    780     28      0    700    739     94     60    127    130
+    C |    138     82    129     81    774      1      3      1      0      6     17     49    193    122    148
+    G |    270    398     54    164      7    659      1    750    755     65      1     41    202    234    205
+    T |    290    204    375    411      9    127      6     11     36     20     31    605    335    307    308
+
+    # scertf pcm
+    A | 9 1 1 97 1 94
+    T | 80 1 97 1 1 2
+    C | 9 97 1 1 1 2
+    G | 2 1 1 1 97 2
+
+    # scertf pfm
+    A | 0.090 0.010 0.010 0.970 0.010 0.940
+    C | 0.090 0.970 0.010 0.010 0.010 0.020
+    G | 0.020 0.010 0.010 0.010 0.970 0.020
+    T | 0.800 0.010 0.970 0.010 0.010 0.020
+
+    # idmmpmm
+    > abd-A
+    0.218451749734889 0.0230646871686108 0.656680805938494 0.898197242841994 0.040694591728526 0.132953340402969 0.74907211028632 0.628313891834571
+    0.0896076352067868 0.317338282078473 0.321580063626723 0.0461293743372216 0.0502386002120891 0.040694591728526 0.0284994697773065 0.0339342523860021
+    0.455991516436904 0.0691940615058324 0.0108695652173913 0.0217391304347826 0.0284994697773065 0.0284994697773065 0.016304347826087 0.160127253446448
+    0.235949098621421 0.590402969247084 0.0108695652173913 0.0339342523860021 0.880567338282079 0.797852598091198 0.206124072110286 0.17762460233298
+
+    # JASPAR
+        >MA0001.1 AGL3
+        A  [ 0  3 79 40 66 48 65 11 65  0 ]
+        C  [94 75  4  3  1  2  5  2  3  3 ]
+        G  [ 1  0  3  4  1  0  5  3 28 88 ]
+        T  [ 2 19 11 50 29 47 22 81  1  6 ]
+
+    or::
+
+        >MA0001.1 AGL3
+        0  3 79 40 66 48 65 11 65  0
+        94 75  4  3  1  2  5  2  3  3
+        1  0  3  4  1  0  5  3 28 88
+        2 19 11 50 29 47 22 81  1  6
+    """
+    record = Record()
+
+    name_pattern = re.compile(r"^>\s*(.+)\s*")
+    row_pattern_with_nucleotide_letter = re.compile(
+        r"\s*([ACGT])\s*[\[|]*\s*([0-9.\-eE\s]+)\s*\]*\s*"
+    )
+    row_pattern_without_nucleotide_letter = re.compile(r"\s*([0-9.\-eE\s]+)\s*")
+
+    motif_name = None
+    nucleotide_counts = {}
+    row_count = 0
+    nucleotides = ["A", "C", "G", "T"]
+
+    for line in handle:
+        line = line.strip()
+
+        name_match = name_pattern.match(line)
+        row_match_with_nucleotide_letter = row_pattern_with_nucleotide_letter.match(
+            line
+        )
+        row_match_without_nucleotide_letter = row_pattern_without_nucleotide_letter.match(
+            line
+        )
+
+        if name_match:
+            motif_name = name_match.group(1)
+        elif row_match_with_nucleotide_letter:
+            (nucleotide, counts_str) = row_match_with_nucleotide_letter.group(1, 2)
+            current_nucleotide_counts = counts_str.split()
+            nucleotide_counts[nucleotide] = [
+                float(current_nucleotide_count)
+                for current_nucleotide_count in current_nucleotide_counts
+            ]
+            row_count += 1
+            if row_count == 4:
+                motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
+
+                if motif_name:
+                    motif.name = motif_name
+
+                record.append(motif)
+
+                motif_name = None
+                nucleotide_counts = {}
+                row_count = 0
+        elif row_match_without_nucleotide_letter:
+            current_nucleotide_counts = row_match_without_nucleotide_letter.group(
+                1
+            ).split()
+            nucleotide_counts[nucleotides[row_count]] = [
+                float(current_nucleotide_count)
+                for current_nucleotide_count in current_nucleotide_counts
+            ]
+            row_count += 1
+            if row_count == 4:
+                motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
+
+                if motif_name:
+                    motif.name = motif_name
+
+                record.append(motif)
+
+                motif_name = None
+                nucleotide_counts = {}
+                row_count = 0
+
+    return record
+
+
+def write(motifs):
+    """Return the representation of motifs in Cluster Buster position frequency matrix format."""
+    lines = []
+    for m in motifs:
+        line = f">{m.name}\n"
+        lines.append(line)
+        for ACGT_counts in zip(
+            m.counts["A"], m.counts["C"], m.counts["G"], m.counts["T"]
+        ):
+            lines.append("{:0.0f}\t{:0.0f}\t{:0.0f}\t{:0.0f}\n".format(*ACGT_counts))
+
+    # Finished; glue the lines together.
+    text = "".join(lines)
+
+    return text
diff --git a/code/lib/Bio/motifs/thresholds.py b/code/lib/Bio/motifs/thresholds.py
new file mode 100644
index 0000000..196b0a3
--- /dev/null
+++ b/code/lib/Bio/motifs/thresholds.py
@@ -0,0 +1,109 @@
+# Copyright 2008 by Norbert Dojer.  All rights reserved.
+# Adapted by Bartek Wilczynski.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+"""Approximate calculation of appropriate thresholds for motif finding."""
+
+
+class ScoreDistribution:
+    """Class representing approximate score distribution for a given motif.
+
+    Utilizes a dynamic programming approach to calculate the distribution of
+    scores with a predefined precision. Provides a number of methods for calculating
+    thresholds for motif occurrences.
+    """
+
+    def __init__(self, motif=None, precision=10 ** 3, pssm=None, background=None):
+        """Initialize the class."""
+        if pssm is None:
+            self.min_score = min(0.0, motif.min_score())
+            self.interval = max(0.0, motif.max_score()) - self.min_score
+            self.n_points = precision * motif.length
+            self.ic = motif.ic()
+        else:
+            self.min_score = min(0.0, pssm.min)
+            self.interval = max(0.0, pssm.max) - self.min_score
+            self.n_points = precision * pssm.length
+            self.ic = pssm.mean(background)
+        self.step = self.interval / (self.n_points - 1)
+        self.mo_density = [0.0] * self.n_points
+        self.mo_density[-self._index_diff(self.min_score)] = 1.0
+        self.bg_density = [0.0] * self.n_points
+        self.bg_density[-self._index_diff(self.min_score)] = 1.0
+        if pssm is None:
+            for lo, mo in zip(motif.log_odds(), motif.pwm()):
+                self.modify(lo, mo, motif.background)
+        else:
+            for position in range(pssm.length):
+                mo_new = [0.0] * self.n_points
+                bg_new = [0.0] * self.n_points
+                lo = pssm[:, position]
+                for letter, score in lo.items():
+                    bg = background[letter]
+                    mo = pow(2, pssm[letter, position]) * bg
+                    d = self._index_diff(score)
+                    for i in range(self.n_points):
+                        mo_new[self._add(i, d)] += self.mo_density[i] * mo
+                        bg_new[self._add(i, d)] += self.bg_density[i] * bg
+                self.mo_density = mo_new
+                self.bg_density = bg_new
+
+    def _index_diff(self, x, y=0.0):
+        return int((x - y + 0.5 * self.step) // self.step)
+
+    def _add(self, i, j):
+        return max(0, min(self.n_points - 1, i + j))
+
+    def modify(self, scores, mo_probs, bg_probs):
+        """Modify motifs and background density."""
+        mo_new = [0.0] * self.n_points
+        bg_new = [0.0] * self.n_points
+        for k, v in scores.items():
+            d = self._index_diff(v)
+            for i in range(self.n_points):
+                mo_new[self._add(i, d)] += self.mo_density[i] * mo_probs[k]
+                bg_new[self._add(i, d)] += self.bg_density[i] * bg_probs[k]
+        self.mo_density = mo_new
+        self.bg_density = bg_new
+
+    def threshold_fpr(self, fpr):
+        """Approximate the log-odds threshold which makes the type I error (false positive rate)."""
+        i = self.n_points
+        prob = 0.0
+        while prob < fpr:
+            i -= 1
+            prob += self.bg_density[i]
+        return self.min_score + i * self.step
+
+    def threshold_fnr(self, fnr):
+        """Approximate the log-odds threshold which makes the type II error (false negative rate)."""
+        i = -1
+        prob = 0.0
+        while prob < fnr:
+            i += 1
+            prob += self.mo_density[i]
+        return self.min_score + i * self.step
+
+    def threshold_balanced(self, rate_proportion=1.0, return_rate=False):
+        """Approximate log-odds threshold making FNR equal to FPR times rate_proportion."""
+        i = self.n_points
+        fpr = 0.0
+        fnr = 1.0
+        while fpr * rate_proportion < fnr:
+            i -= 1
+            fpr += self.bg_density[i]
+            fnr -= self.mo_density[i]
+        if return_rate:
+            return self.min_score + i * self.step, fpr
+        else:
+            return self.min_score + i * self.step
+
+    def threshold_patser(self):
+        """Threshold selection mimicking the behaviour of patser (Hertz, Stormo 1999) software.
+
+        It selects such a threshold that the log(fpr)=-ic(M)
+        note: the actual patser software uses natural logarithms instead of log_2, so the numbers
+        are not directly comparable.
+        """
+        return self.threshold_fpr(fpr=2 ** -self.ic)
diff --git a/code/lib/Bio/motifs/transfac.py b/code/lib/Bio/motifs/transfac.py
new file mode 100644
index 0000000..927cd2b
--- /dev/null
+++ b/code/lib/Bio/motifs/transfac.py
@@ -0,0 +1,325 @@
+# Copyright 2003 by Bartek Wilczynski.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Parsing TRANSFAC files."""
+
+
+from Bio import motifs
+
+
+class Motif(motifs.Motif, dict):
+    """Store the information for one TRANSFAC motif.
+
+    This class inherits from the Bio.motifs.Motif base class, as well
+    as from a Python dictionary. All motif information found by the parser
+    is stored as attributes of the base class when possible; see the
+    Bio.motifs.Motif base class for a description of these attributes. All
+    other information associated with the motif is stored as (key, value)
+    pairs in the dictionary, where the key is the two-letter fields as found
+    in the TRANSFAC file. References are an exception: These are stored in
+    the .references attribute.
+
+    These fields are commonly found in TRANSFAC files::
+
+        AC:    Accession number
+        AS:    Accession numbers, secondary
+        BA:    Statistical basis
+        BF:    Binding factors
+        BS:    Factor binding sites underlying the matrix
+               [sequence; SITE accession number; start position for matrix
+               sequence; length of sequence used; number of gaps inserted;
+               strand orientation.]
+        CC:    Comments
+        CO:    Copyright notice
+        DE:    Short factor description
+        DR:    External databases
+               [database name: database accession number]
+        DT:    Date created/updated
+        HC:    Subfamilies
+        HP:    Superfamilies
+        ID:    Identifier
+        NA:    Name of the binding factor
+        OC:    Taxonomic classification
+        OS:    Species/Taxon
+        OV:    Older version
+        PV:    Preferred version
+        TY:    Type
+        XX:    Empty line; these are not stored in the Record.
+
+    References are stored in an .references attribute, which is a list of
+    dictionaries with the following keys::
+
+        RN:    Reference number
+        RA:    Reference authors
+        RL:    Reference data
+        RT:    Reference title
+        RX:    PubMed ID
+
+    For more information, see the TRANSFAC documentation.
+    """
+
+    multiple_value_keys = {"BF", "OV", "HP", "BS", "HC", "DT", "DR"}
+    # These keys can occur multiple times for one motif
+
+    reference_keys = {"RX", "RA", "RT", "RL"}
+    # These keys occur for references
+
+
+class Record(list):
+    """Store the information in a TRANSFAC matrix table.
+
+    The record inherits from a list containing the individual motifs.
+
+    Attributes:
+     - version - The version number, corresponding to the 'VV' field
+       in the TRANSFAC file;
+
+    """
+
+    def __init__(self):
+        """Initialize the class."""
+        self.version = None
+
+    def __str__(self):
+        """Turn the TRANSFAC matrix into a string."""
+        return write(self)
+
+
+def read(handle, strict=True):
+    """Parse a transfac format handle into a Record object."""
+    annotations = {}
+    references = []
+    counts = None
+    record = Record()
+    for line in handle:
+        line = line.strip()
+        if not line:
+            continue
+        key_value = line.split(None, 1)
+        key = key_value[0].strip()
+        if strict:
+            if len(key) != 2:
+                raise ValueError(
+                    "The key value of a TRANSFAC motif line should have 2 characters:"
+                    f'"{line}"'
+                )
+        if len(key_value) == 2:
+            value = key_value[1].strip()
+            if strict:
+                if not line.partition("  ")[1]:
+                    raise ValueError(
+                        "A TRANSFAC motif line should have 2 "
+                        "spaces between key and value columns: "
+                        f'"{line}"'
+                    )
+        if key == "VV":
+            record.version = value
+        elif key in ("P0", "PO"):  # Old TRANSFAC files use PO instead of P0
+            counts = {}
+            if value.split()[:4] != ["A", "C", "G", "T"]:
+                raise ValueError(
+                    f'A TRANSFAC matrix "{key}" line should be '
+                    f'followed by "A C G T": {line}'
+                )
+            length = 0
+            for c in "ACGT":
+                counts[c] = []
+            for line in handle:
+                line = line.strip()
+                key_value = line.split(None, 1)
+                key = key_value[0].strip()
+                if len(key_value) == 2:
+                    value = key_value[1].strip()
+                    if strict:
+                        if not line.partition("  ")[1]:
+                            raise ValueError(
+                                "A TRANSFAC motif line should have 2 spaces"
+                                f' between key and value columns: "{line}"'
+                            )
+                try:
+                    i = int(key)
+                except ValueError:
+                    break
+                if length == 0 and i == 0:
+                    if strict:
+                        raise ValueError(
+                            'A TRANSFAC matrix should start with "01" as first row'
+                            f' of the matrix, but this matrix uses "00": "{line}'
+                        )
+                else:
+                    length += 1
+                if i != length:
+                    raise ValueError(
+                        "The TRANSFAC matrix row number does not match the position"
+                        f' in the matrix: "{line}"'
+                    )
+                if strict:
+                    if len(key) == 1:
+                        raise ValueError(
+                            "A TRANSFAC matrix line should have a 2 digit"
+                            f' key at the start of the line ("{i:02d}"),'
+                            f' but this matrix uses "{i:d}": "{line:s}".'
+                        )
+                    if len(key_value) != 2:
+                        raise ValueError(
+                            "A TRANSFAC matrix line should have a key and a"
+                            f' value: "{line}"'
+                        )
+                values = value.split()[:4]
+                if len(values) != 4:
+                    raise ValueError(
+                        "A TRANSFAC matrix line should have a value for each"
+                        f' nucleotide (A, C, G and T): "{line}"'
+                    )
+                for c, v in zip("ACGT", values):
+                    counts[c].append(float(v))
+        if line == "XX":
+            pass
+        elif key == "RN":
+            index, separator, accession = value.partition(";")
+            if index[0] != "[":
+                raise ValueError(
+                    f'The index "{index}" in a TRANSFAC RN line should start'
+                    f' with a "[": "{line}"'
+                )
+            if index[-1] != "]":
+                raise ValueError(
+                    f'The index "{index}" in a TRANSFAC RN line should end'
+                    f' with a "]": "{line}"'
+                )
+            index = int(index[1:-1])
+            if len(references) != index - 1:
+                raise ValueError(
+                    f'The index "{index:d}" of the TRANSFAC RN line does not '
+                    "match the current number of seen references "
+                    f'"{len(references) + 1:d}": "{line:s}"'
+                )
+            reference = {key: value}
+            references.append(reference)
+        elif key == "//":
+            if counts is not None:
+                motif = Motif(alphabet="ACGT", counts=counts)
+                motif.update(annotations)
+                motif.references = references
+                record.append(motif)
+            annotations = {}
+            references = []
+        elif key in Motif.reference_keys:
+            reference[key] = value
+        elif key in Motif.multiple_value_keys:
+            if key not in annotations:
+                annotations[key] = []
+            annotations[key].append(value)
+        else:
+            annotations[key] = value
+    return record
+
+
+def write(motifs):
+    """Write the representation of a motif in TRANSFAC format."""
+    blocks = []
+    try:
+        version = motifs.version
+    except AttributeError:
+        pass
+    else:
+        if version is not None:
+            block = (
+                """\
+VV  %s
+XX
+//
+"""
+                % version
+            )
+            blocks.append(block)
+    multiple_value_keys = Motif.multiple_value_keys
+    sections = (
+        ("AC", "AS"),  # Accession
+        ("ID",),  # ID
+        ("DT", "CO"),  # Date, copyright
+        ("NA",),  # Name
+        ("DE",),  # Short factor description
+        ("TY",),  # Type
+        ("OS", "OC"),  # Organism
+        ("HP", "HC"),  # Superfamilies, subfamilies
+        ("BF",),  # Binding factors
+        ("P0",),  # Frequency matrix
+        ("BA",),  # Statistical basis
+        ("BS",),  # Factor binding sites
+        ("CC",),  # Comments
+        ("DR",),  # External databases
+        ("OV", "PV"),  # Versions
+    )
+    for motif in motifs:
+        lines = []
+        for section in sections:
+            blank = False
+            for key in section:
+                if key == "P0":
+                    # Frequency matrix
+                    length = motif.length
+                    if length == 0:
+                        continue
+                    sequence = motif.degenerate_consensus
+                    letters = sorted(motif.alphabet)
+                    line = "      ".join(["P0"] + letters)
+
+                    lines.append(line)
+                    for i in range(length):
+                        line = (
+                            " ".join(["%02.d"] + ["%6.20g" for _ in letters])
+                            + "      %s"
+                        )
+                        line = line % tuple(
+                            [i + 1]
+                            + [motif.counts[l][i] for l in letters]
+                            + [sequence[i]]
+                        )
+                        lines.append(line)
+                    blank = True
+                else:
+                    try:
+                        value = motif.get(key)
+                    except AttributeError:
+                        value = None
+                    if value is not None:
+                        if key in multiple_value_keys:
+                            for v in value:
+                                line = "%s  %s" % (key, v)
+                                lines.append(line)
+                        else:
+                            line = "%s  %s" % (key, value)
+                            lines.append(line)
+                        blank = True
+                if key == "PV":
+                    # References
+                    try:
+                        references = motif.references
+                    except AttributeError:
+                        pass
+                    else:
+                        keys = ("RN", "RX", "RA", "RT", "RL")
+                        for reference in references:
+                            for key in keys:
+                                value = reference.get(key)
+                                if value is None:
+                                    continue
+                                line = "%s  %s" % (key, value)
+                                lines.append(line)
+                                blank = True
+            if blank:
+                line = "XX"
+                lines.append(line)
+        # Finished this motif; glue the lines together
+        line = "//"
+        lines.append(line)
+        block = "\n".join(lines) + "\n"
+        blocks.append(block)
+    # Finished all motifs; glue the blocks together
+    text = "".join(blocks)
+    return text
diff --git a/code/lib/Bio/motifs/xms.py b/code/lib/Bio/motifs/xms.py
new file mode 100644
index 0000000..46643c1
--- /dev/null
+++ b/code/lib/Bio/motifs/xms.py
@@ -0,0 +1,105 @@
+# Copyright 2015 by Gert Hulselmans.  All rights reserved.
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Parse XMS motif files."""
+
+from Bio import motifs
+
+
+from xml.dom import minidom, Node
+import re
+
+
+class XMSScanner:
+    """Class for scanning XMS XML file."""
+
+    def __init__(self, doc):
+        """Generate motif Record from xms document, an XML-like motif pfm file."""
+        self.record = Record()
+        for child in doc.getElementsByTagName("motif"):
+            if child.nodeType == Node.ELEMENT_NODE:
+                self.handle_motif(child)
+
+    def handle_motif(self, node):
+        """Read the motif's name and column from the node and add the motif record."""
+        motif_name = self.get_text(node.getElementsByTagName("name"))
+        nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
+
+        for column in node.getElementsByTagName("column"):
+            [
+                nucleotide_counts[nucleotide].append(float(nucleotide_count))
+                for nucleotide, nucleotide_count in zip(
+                    ["A", "C", "G", "T"], self.get_acgt(column)
+                )
+            ]
+
+        motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
+        motif.name = motif_name
+
+        self.record.append(motif)
+
+    def get_property_value(self, node, key_name):
+        """Extract the value of the motif's property named key_name from node."""
+        for cur_property in node.getElementsByTagName("prop"):
+            right_property = False
+            cur_value = None
+            for child in cur_property.childNodes:
+                if child.nodeType != Node.ELEMENT_NODE:
+                    continue
+                if child.tagName == "key" and self.get_text([child]) == key_name:
+                    right_property = True
+                if child.tagName == "value":
+                    cur_value = self.get_text([child])
+            if right_property:
+                return cur_value
+        return None
+
+    def get_acgt(self, node):
+        """Get and return the motif's weights of A, C, G, T."""
+        a, c, g, t = 0.0, 0.0, 0.0, 0.0
+        for weight in node.getElementsByTagName("weight"):
+            if weight.getAttribute("symbol") == "adenine":
+                a = float(self.get_text([weight]))
+            elif weight.getAttribute("symbol") == "cytosine":
+                c = float(self.get_text([weight]))
+            elif weight.getAttribute("symbol") == "guanine":
+                g = float(self.get_text([weight]))
+            elif weight.getAttribute("symbol") == "thymine":
+                t = float(self.get_text([weight]))
+        return a, c, g, t
+
+    def get_text(self, nodelist):
+        """Return a string representation of the motif's properties listed on nodelist ."""
+        retlist = []
+        for node in nodelist:
+            if node.nodeType == Node.TEXT_NODE:
+                retlist.append(node.wholeText)
+            elif node.hasChildNodes:
+                retlist.append(self.get_text(node.childNodes))
+
+        return re.sub(r"\s+", " ", "".join(retlist))
+
+
+class Record(list):
+    """Class to store the information in a XMS matrix table.
+
+    The record inherits from a list containing the individual motifs.
+    """
+
+    def __str__(self):
+        return "\n".join(str(motif) for motif in self)
+
+
+def read(handle):
+    """Read motifs in XMS matrix format from a file handle.
+
+    XMS is an XML format for describing regulatory motifs and PSSMs.
+    This format was defined by Thomas Down, and used in the NestedMICA and MotifExplorer programs.
+    """
+    xms_doc = minidom.parse(handle)
+    record = XMSScanner(xms_doc).record
+
+    return record
diff --git a/code/lib/Bio/pairwise2.py b/code/lib/Bio/pairwise2.py
new file mode 100644
index 0000000..797ce20
--- /dev/null
+++ b/code/lib/Bio/pairwise2.py
@@ -0,0 +1,1431 @@
+# Copyright 2002 by Jeffrey Chang.
+# Copyright 2016, 2019, 2020 by Markus Piotrowski.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Pairwise sequence alignment using a dynamic programming algorithm.
+
+This provides functions to get global and local alignments between two
+sequences. A global alignment finds the best concordance between all
+characters in two sequences. A local alignment finds just the
+subsequences that align the best. Local alignments must have a positive
+score to be reported and they will not be extended for 'zero counting'
+matches. This means a local alignment will always start and end with
+a positive counting match.
+
+When doing alignments, you can specify the match score and gap
+penalties.  The match score indicates the compatibility between an
+alignment of two characters in the sequences. Highly compatible
+characters should be given positive scores, and incompatible ones
+should be given negative scores or 0.  The gap penalties should be
+negative.
+
+The names of the alignment functions in this module follow the
+convention
+<alignment type>XX
+where <alignment type> is either "global" or "local" and XX is a 2
+character code indicating the parameters it takes.  The first
+character indicates the parameters for matches (and mismatches), and
+the second indicates the parameters for gap penalties.
+
+The match parameters are::
+
+    CODE  DESCRIPTION & OPTIONAL KEYWORDS
+    x     No parameters. Identical characters have score of 1, otherwise 0.
+    m     A match score is the score of identical chars, otherwise mismatch
+          score. Keywords ``match``, ``mismatch``.
+    d     A dictionary returns the score of any pair of characters.
+          Keyword ``match_dict``.
+    c     A callback function returns scores. Keyword ``match_fn``.
+
+The gap penalty parameters are::
+
+    CODE  DESCRIPTION & OPTIONAL KEYWORDS
+    x     No gap penalties.
+    s     Same open and extend gap penalties for both sequences.
+          Keywords ``open``, ``extend``.
+    d     The sequences have different open and extend gap penalties.
+          Keywords ``openA``, ``extendA``, ``openB``, ``extendB``.
+    c     A callback function returns the gap penalties.
+          Keywords ``gap_A_fn``, ``gap_B_fn``.
+
+All the different alignment functions are contained in an object
+``align``. For example:
+
+    >>> from Bio import pairwise2
+    >>> alignments = pairwise2.align.globalxx("ACCGT", "ACG")
+
+For better readability, the required arguments can be used with optional keywords:
+
+    >>> alignments = pairwise2.align.globalxx(sequenceA="ACCGT", sequenceB="ACG")
+
+The result is a list of the alignments between the two strings. Each alignment
+is a named tuple consisting of the two aligned sequences, the score and the
+start and end positions of the alignment:
+
+   >>> print(alignments)
+   [Alignment(seqA='ACCGT', seqB='A-CG-', score=3.0, start=0, end=5), ...
+
+You can access each element of an alignment by index or name:
+
+   >>> alignments[0][2]
+   3.0
+   >>> alignments[0].score
+   3.0
+
+For a nice printout of an alignment, use the ``format_alignment`` method of
+the module:
+
+    >>> from Bio.pairwise2 import format_alignment
+    >>> print(format_alignment(*alignments[0]))
+    ACCGT
+    | || 
+    A-CG-
+      Score=3
+    <BLANKLINE>
+
+All alignment functions have the following arguments:
+
+- Two sequences: strings, Biopython sequence objects or lists.
+  Lists are useful for supplying sequences which contain residues that are
+  encoded by more than one letter.
+
+- ``penalize_extend_when_opening``: boolean (default: False).
+  Whether to count an extension penalty when opening a gap. If false, a gap of
+  1 is only penalized an "open" penalty, otherwise it is penalized
+  "open+extend".
+
+- ``penalize_end_gaps``: boolean.
+  Whether to count the gaps at the ends of an alignment. By default, they are
+  counted for global alignments but not for local ones. Setting
+  ``penalize_end_gaps`` to (boolean, boolean) allows you to specify for the
+  two sequences separately whether gaps at the end of the alignment should be
+  counted.
+
+- ``gap_char``: string (default: ``'-'``).
+  Which character to use as a gap character in the alignment returned. If your
+  input sequences are lists, you must change this to ``['-']``.
+
+- ``force_generic``: boolean (default: False).
+  Always use the generic, non-cached, dynamic programming function (slow!).
+  For debugging.
+
+- ``score_only``: boolean (default: False).
+  Only get the best score, don't recover any alignments. The return value of
+  the function is the score. Faster and uses less memory.
+
+- ``one_alignment_only``: boolean (default: False).
+  Only recover one alignment.
+
+The other parameters of the alignment function depend on the function called.
+Some examples:
+
+- Find the best global alignment between the two sequences. Identical
+  characters are given 1 point. No points are deducted for mismatches or gaps.
+
+    >>> for a in pairwise2.align.globalxx("ACCGT", "ACG"):
+    ...     print(format_alignment(*a))
+    ACCGT
+    | || 
+    A-CG-
+      Score=3
+    <BLANKLINE>
+    ACCGT
+    || | 
+    AC-G-
+      Score=3
+    <BLANKLINE>
+
+- Same thing as before, but with a local alignment. Note that
+  ``format_alignment`` will only show the aligned parts of the sequences,
+  together with the starting positions.
+
+    >>> for a in pairwise2.align.localxx("ACCGT", "ACG"):
+    ...     print(format_alignment(*a))
+    1 ACCG
+      | ||
+    1 A-CG
+      Score=3
+    <BLANKLINE>
+    1 ACCG
+      || |
+    1 AC-G
+      Score=3
+    <BLANKLINE>
+
+  To restore the 'historic' behaviour of ``format_alignemt``, i.e., showing
+  also the un-aligned parts of both sequences, use the new keyword parameter
+  ``full_sequences``:
+
+    >>> for a in pairwise2.align.localxx("ACCGT", "ACG"):
+    ...     print(format_alignment(*a, full_sequences=True))
+    ACCGT
+    | || 
+    A-CG-
+      Score=3
+    <BLANKLINE>
+    ACCGT
+    || | 
+    AC-G-
+      Score=3
+    <BLANKLINE>
+
+
+- Do a global alignment. Identical characters are given 2 points, 1 point is
+  deducted for each non-identical character. Don't penalize gaps.
+
+    >>> for a in pairwise2.align.globalmx("ACCGT", "ACG", 2, -1):
+    ...     print(format_alignment(*a))
+    ACCGT
+    | || 
+    A-CG-
+      Score=6
+    <BLANKLINE>
+    ACCGT
+    || | 
+    AC-G-
+      Score=6
+    <BLANKLINE>
+
+- Same as above, except now 0.5 points are deducted when opening a gap, and
+  0.1 points are deducted when extending it.
+
+    >>> for a in pairwise2.align.globalms("ACCGT", "ACG", 2, -1, -.5, -.1):
+    ...     print(format_alignment(*a))
+    ACCGT
+    | || 
+    A-CG-
+      Score=5
+    <BLANKLINE>
+    ACCGT
+    || | 
+    AC-G-
+      Score=5
+    <BLANKLINE>
+
+- Note that you can use keywords to increase the readability, e.g.:
+
+    >>> a = pairwise2.align.globalms("ACGT", "ACG", match=2, mismatch=-1, open=-.5,
+    ...                              extend=-.1)
+
+- Depending on the penalties, a gap in one sequence may be followed by a gap in
+  the other sequence.If you don't like this behaviour, increase the gap-open
+  penalty:
+
+    >>> for a in pairwise2.align.globalms("A", "T", 5, -4, -1, -.1):
+    ...     print(format_alignment(*a))
+    A-
+    <BLANKLINE>
+    -T
+      Score=-2
+    <BLANKLINE>
+    >>> for a in pairwise2.align.globalms("A", "T", 5, -4, -3, -.1):
+    ...	    print(format_alignment(*a))
+    A
+    .
+    T
+      Score=-4
+    <BLANKLINE>
+
+- The alignment function can also use known matrices already included in
+  Biopython (in ``Bio.Align.substitution_matrices``):
+
+    >>> from Bio.Align import substitution_matrices
+    >>> matrix = substitution_matrices.load("BLOSUM62")
+    >>> for a in pairwise2.align.globaldx("KEVLA", "EVL", matrix):
+    ...     print(format_alignment(*a))
+    KEVLA
+     ||| 
+    -EVL-
+      Score=13
+    <BLANKLINE>
+
+- With the parameter ``c`` you can define your own match- and gap functions.
+  E.g. to define an affine logarithmic gap function and using it:
+
+    >>> from math import log
+    >>> def gap_function(x, y):  # x is gap position in seq, y is gap length
+    ...     if y == 0:  # No gap
+    ...         return 0
+    ...     elif y == 1:  # Gap open penalty
+    ...         return -2
+    ...     return - (2 + y/4.0 + log(y)/2.0)
+    ...
+    >>> alignment = pairwise2.align.globalmc("ACCCCCGT", "ACG", 5, -4,
+    ...                                      gap_function, gap_function)
+
+  You can define different gap functions for each sequence.
+  Self-defined match functions must take the two residues to be compared and
+  return a score.
+
+To see a description of the parameters for a function, please look at
+the docstring for the function via the help function, e.g.
+type ``help(pairwise2.align.localds)`` at the Python prompt.
+
+"""  # noqa: W291
+
+import warnings
+from collections import namedtuple
+
+from Bio import BiopythonWarning
+
+
+MAX_ALIGNMENTS = 1000  # maximum alignments recovered in traceback
+
+
+class align:
+    """Provide functions that do alignments.
+
+    Alignment functions are called as:
+
+      pairwise2.align.globalXX
+
+    or
+
+      pairwise2.align.localXX
+
+    Where XX is a 2 character code indicating the match/mismatch parameters
+    (first character, either x, m, d or c) and the gap penalty parameters
+    (second character, either x, s, d, or c).
+
+    For a detailed description read the main module's docstring (e.g.,
+    type ``help(pairwise2)``).
+    To see a description of the parameters for a function, please
+    look at the docstring for the function, e.g. type
+    ``help(pairwise2.align.localds)`` at the Python prompt.
+    """
+
+    class alignment_function:
+        """Callable class which impersonates an alignment function.
+
+        The constructor takes the name of the function.  This class
+        will decode the name of the function to figure out how to
+        interpret the parameters.
+        """
+
+        # match code -> tuple of (parameters, docstring)
+        match2args = {
+            "x": ([], ""),
+            "m": (
+                ["match", "mismatch"],
+                "match is the score to given to identical characters.\n"
+                "mismatch is the score given to non-identical ones.",
+            ),
+            "d": (
+                ["match_dict"],
+                "match_dict is a dictionary where the keys are tuples\n"
+                "of pairs of characters and the values are the scores,\n"
+                "e.g. ('A', 'C') : 2.5.",
+            ),
+            "c": (
+                ["match_fn"],
+                "match_fn is a callback function that takes two "
+                "characters and returns the score between them.",
+            ),
+        }
+        # penalty code -> tuple of (parameters, docstring)
+        penalty2args = {
+            "x": ([], ""),
+            "s": (
+                ["open", "extend"],
+                "open and extend are the gap penalties when a gap is\n"
+                "opened and extended.  They should be negative.",
+            ),
+            "d": (
+                ["openA", "extendA", "openB", "extendB"],
+                "openA and extendA are the gap penalties for sequenceA,\n"
+                "and openB and extendB for sequenceB.  The penalties\n"
+                "should be negative.",
+            ),
+            "c": (
+                ["gap_A_fn", "gap_B_fn"],
+                "gap_A_fn and gap_B_fn are callback functions that takes\n"
+                "(1) the index where the gap is opened, and (2) the length\n"
+                "of the gap.  They should return a gap penalty.",
+            ),
+        }
+
+        def __init__(self, name):
+            """Check to make sure the name of the function is reasonable."""
+            if name.startswith("global"):
+                if len(name) != 8:
+                    raise AttributeError("function should be globalXX")
+            elif name.startswith("local"):
+                if len(name) != 7:
+                    raise AttributeError("function should be localXX")
+            else:
+                raise AttributeError(name)
+            align_type, match_type, penalty_type = name[:-2], name[-2], name[-1]
+            try:
+                match_args, match_doc = self.match2args[match_type]
+            except KeyError:
+                raise AttributeError("unknown match type %r" % match_type)
+            try:
+                penalty_args, penalty_doc = self.penalty2args[penalty_type]
+            except KeyError:
+                raise AttributeError("unknown penalty type %r" % penalty_type)
+
+            # Now get the names of the parameters to this function.
+            param_names = ["sequenceA", "sequenceB"]
+            param_names.extend(match_args)
+            param_names.extend(penalty_args)
+            self.function_name = name
+            self.align_type = align_type
+            self.param_names = param_names
+
+            self.__name__ = self.function_name
+            # Set the doc string.
+            doc = "%s(%s) -> alignments\n" % (
+                self.__name__,
+                ", ".join(self.param_names),
+            )
+            doc += """\
+\nThe following parameters can also be used with optional
+keywords of the same name.\n\n
+sequenceA and sequenceB must be of the same type, either
+strings, lists or Biopython sequence objects.\n
+"""
+            if match_doc:
+                doc += "\n%s\n" % match_doc
+            if penalty_doc:
+                doc += "\n%s\n" % penalty_doc
+            doc += """\
+\nalignments is a list of named tuples (seqA, seqB, score,
+begin, end). seqA and seqB are strings showing the alignment
+between the sequences.  score is the score of the alignment.
+begin and end are indexes of seqA and seqB that indicate
+where the alignment occurs.
+"""
+            self.__doc__ = doc
+
+        def decode(self, *args, **keywds):
+            """Decode the arguments for the _align function.
+
+            keywds will get passed to it, so translate the arguments
+            to this function into forms appropriate for _align.
+            """
+            keywds = keywds.copy()
+
+            # Replace possible "keywords" with arguments:
+            args += (len(self.param_names) - len(args)) * (None,)
+            for key in keywds.copy():
+                if key in self.param_names:
+                    _index = self.param_names.index(key)
+                    args = args[:_index] + (keywds[key],) + args[_index:]
+                    del keywds[key]
+            args = tuple(arg for arg in args if arg is not None)
+
+            if len(args) != len(self.param_names):
+                raise TypeError(
+                    "%s takes exactly %d argument (%d given)"
+                    % (self.function_name, len(self.param_names), len(args))
+                )
+
+            i = 0
+            while i < len(self.param_names):
+                if self.param_names[i] in [
+                    "sequenceA",
+                    "sequenceB",
+                    "gap_A_fn",
+                    "gap_B_fn",
+                    "match_fn",
+                ]:
+                    keywds[self.param_names[i]] = args[i]
+                    i += 1
+                elif self.param_names[i] == "match":
+                    assert self.param_names[i + 1] == "mismatch"
+                    match, mismatch = args[i], args[i + 1]
+                    keywds["match_fn"] = identity_match(match, mismatch)
+                    i += 2
+                elif self.param_names[i] == "match_dict":
+                    keywds["match_fn"] = dictionary_match(args[i])
+                    i += 1
+                elif self.param_names[i] == "open":
+                    assert self.param_names[i + 1] == "extend"
+                    open, extend = args[i], args[i + 1]
+                    pe = keywds.get("penalize_extend_when_opening", 0)
+                    keywds["gap_A_fn"] = affine_penalty(open, extend, pe)
+                    keywds["gap_B_fn"] = affine_penalty(open, extend, pe)
+                    i += 2
+                elif self.param_names[i] == "openA":
+                    assert self.param_names[i + 3] == "extendB"
+                    openA, extendA, openB, extendB = args[i : i + 4]
+                    pe = keywds.get("penalize_extend_when_opening", 0)
+                    keywds["gap_A_fn"] = affine_penalty(openA, extendA, pe)
+                    keywds["gap_B_fn"] = affine_penalty(openB, extendB, pe)
+                    i += 4
+                else:
+                    raise ValueError("unknown parameter %r" % self.param_names[i])
+
+            # Here are the default parameters for _align.  Assign
+            # these to keywds, unless already specified.
+            pe = keywds.get("penalize_extend_when_opening", 0)
+            default_params = [
+                ("match_fn", identity_match(1, 0)),
+                ("gap_A_fn", affine_penalty(0, 0, pe)),
+                ("gap_B_fn", affine_penalty(0, 0, pe)),
+                ("penalize_extend_when_opening", 0),
+                ("penalize_end_gaps", self.align_type == "global"),
+                ("align_globally", self.align_type == "global"),
+                ("gap_char", "-"),
+                ("force_generic", 0),
+                ("score_only", 0),
+                ("one_alignment_only", 0),
+            ]
+            for name, default in default_params:
+                keywds[name] = keywds.get(name, default)
+            value = keywds["penalize_end_gaps"]
+            try:
+                n = len(value)
+            except TypeError:
+                keywds["penalize_end_gaps"] = tuple([value] * 2)
+            else:
+                assert n == 2
+            return keywds
+
+        def __call__(self, *args, **keywds):
+            """Call the alignment instance already created."""
+            keywds = self.decode(*args, **keywds)
+            return _align(**keywds)
+
+    def __getattr__(self, attr):
+        """Call alignment_function() to check and decode the attributes."""
+        # The following 'magic' is needed to rewrite the class docstring
+        # dynamically:
+        wrapper = self.alignment_function(attr)
+        wrapper_type = type(wrapper)
+        wrapper_dict = wrapper_type.__dict__.copy()
+        wrapper_dict["__doc__"] = wrapper.__doc__
+        new_alignment_function = type("alignment_function", (object,), wrapper_dict)
+
+        return new_alignment_function(attr)
+
+
+align = align()
+
+
+def _align(
+    sequenceA,
+    sequenceB,
+    match_fn,
+    gap_A_fn,
+    gap_B_fn,
+    penalize_extend_when_opening,
+    penalize_end_gaps,
+    align_globally,
+    gap_char,
+    force_generic,
+    score_only,
+    one_alignment_only,
+):
+    """Return optimal alignments between two sequences (PRIVATE).
+
+    This method either returns a list of optimal alignments (with the same
+    score) or just the optimal score.
+    """
+    if not sequenceA or not sequenceB:
+        return []
+    try:
+        sequenceA + gap_char
+        sequenceB + gap_char
+    except TypeError:
+        raise TypeError(
+            "both sequences must be of the same type, either "
+            "string/sequence object or list. Gap character must "
+            "fit the sequence type (string or list)"
+        )
+
+    if not isinstance(sequenceA, list):
+        sequenceA = str(sequenceA)
+    if not isinstance(sequenceB, list):
+        sequenceB = str(sequenceB)
+    if not align_globally and (penalize_end_gaps[0] or penalize_end_gaps[1]):
+        warnings.warn(
+            '"penalize_end_gaps" should not be used in local '
+            "alignments. The resulting score may be wrong.",
+            BiopythonWarning,
+        )
+
+    if (
+        (not force_generic)
+        and isinstance(gap_A_fn, affine_penalty)
+        and isinstance(gap_B_fn, affine_penalty)
+    ):
+        open_A, extend_A = gap_A_fn.open, gap_A_fn.extend
+        open_B, extend_B = gap_B_fn.open, gap_B_fn.extend
+        matrices = _make_score_matrix_fast(
+            sequenceA,
+            sequenceB,
+            match_fn,
+            open_A,
+            extend_A,
+            open_B,
+            extend_B,
+            penalize_extend_when_opening,
+            penalize_end_gaps,
+            align_globally,
+            score_only,
+        )
+    else:
+        matrices = _make_score_matrix_generic(
+            sequenceA,
+            sequenceB,
+            match_fn,
+            gap_A_fn,
+            gap_B_fn,
+            penalize_end_gaps,
+            align_globally,
+            score_only,
+        )
+
+    score_matrix, trace_matrix, best_score = matrices
+
+    # print("SCORE %s" % print_matrix(score_matrix))
+    # print("TRACEBACK %s" % print_matrix(trace_matrix))
+
+    # If they only want the score, then return it.
+    if score_only:
+        return best_score
+
+    starts = _find_start(score_matrix, best_score, align_globally)
+
+    # Recover the alignments and return them.
+    alignments = _recover_alignments(
+        sequenceA,
+        sequenceB,
+        starts,
+        best_score,
+        score_matrix,
+        trace_matrix,
+        align_globally,
+        gap_char,
+        one_alignment_only,
+        gap_A_fn,
+        gap_B_fn,
+    )
+    if not alignments:
+        # This may happen, see recover_alignments for explanation
+        score_matrix, trace_matrix = _reverse_matrices(score_matrix, trace_matrix)
+        starts = [(z, (y, x)) for z, (x, y) in starts]
+        alignments = _recover_alignments(
+            sequenceB,
+            sequenceA,
+            starts,
+            best_score,
+            score_matrix,
+            trace_matrix,
+            align_globally,
+            gap_char,
+            one_alignment_only,
+            gap_B_fn,
+            gap_A_fn,
+            reverse=True,
+        )
+    return alignments
+
+
+def _make_score_matrix_generic(
+    sequenceA,
+    sequenceB,
+    match_fn,
+    gap_A_fn,
+    gap_B_fn,
+    penalize_end_gaps,
+    align_globally,
+    score_only,
+):
+    """Generate a score and traceback matrix (PRIVATE).
+
+    This implementation according to Needleman-Wunsch allows the usage of
+    general gap functions and is rather slow. It is automatically called if
+    you define your own gap functions. You can force the usage of this method
+    with ``force_generic=True``.
+    """
+    local_max_score = 0
+    # Create the score and traceback matrices. These should be in the
+    # shape:
+    # sequenceA (down) x sequenceB (across)
+    lenA, lenB = len(sequenceA), len(sequenceB)
+    score_matrix, trace_matrix = [], []
+    for i in range(lenA + 1):
+        score_matrix.append([None] * (lenB + 1))
+        if not score_only:
+            trace_matrix.append([None] * (lenB + 1))
+
+    # Initialize first row and column with gap scores. This is like opening up
+    # i gaps at the beginning of sequence A or B.
+    for i in range(lenA + 1):
+        if penalize_end_gaps[1]:  # [1]:gap in sequence B
+            score = gap_B_fn(0, i)
+        else:
+            score = 0.0
+        score_matrix[i][0] = score
+    for i in range(lenB + 1):
+        if penalize_end_gaps[0]:  # [0]:gap in sequence A
+            score = gap_A_fn(0, i)
+        else:
+            score = 0.0
+        score_matrix[0][i] = score
+
+    # Fill in the score matrix.  Each position in the matrix
+    # represents an alignment between a character from sequence A to
+    # one in sequence B.  As I iterate through the matrix, find the
+    # alignment by choose the best of:
+    #    1) extending a previous alignment without gaps
+    #    2) adding a gap in sequenceA
+    #    3) adding a gap in sequenceB
+    for row in range(1, lenA + 1):
+        for col in range(1, lenB + 1):
+            # First, calculate the score that would occur by extending
+            # the alignment without gaps.
+            # fmt: off
+            nogap_score = (
+                score_matrix[row - 1][col - 1]
+                + match_fn(sequenceA[row - 1], sequenceB[col - 1])
+            )
+
+            # fmt: on
+            # Try to find a better score by opening gaps in sequenceA.
+            # Do this by checking alignments from each column in the row.
+            # Each column represents a different character to align from,
+            # and thus a different length gap.
+            # Although the gap function does not distinguish between opening
+            # and extending a gap, we distinguish them for the backtrace.
+            if not penalize_end_gaps[0] and row == lenA:
+                row_open = score_matrix[row][col - 1]
+                row_extend = max(score_matrix[row][x] for x in range(col))
+            else:
+                row_open = score_matrix[row][col - 1] + gap_A_fn(row, 1)
+                row_extend = max(
+                    score_matrix[row][x] + gap_A_fn(row, col - x) for x in range(col)
+                )
+
+            # Try to find a better score by opening gaps in sequenceB.
+            if not penalize_end_gaps[1] and col == lenB:
+                col_open = score_matrix[row - 1][col]
+                col_extend = max(score_matrix[x][col] for x in range(row))
+            else:
+                col_open = score_matrix[row - 1][col] + gap_B_fn(col, 1)
+                col_extend = max(
+                    score_matrix[x][col] + gap_B_fn(col, row - x) for x in range(row)
+                )
+
+            best_score = max(nogap_score, row_open, row_extend, col_open, col_extend)
+            local_max_score = max(local_max_score, best_score)
+            if not align_globally and best_score < 0:
+                score_matrix[row][col] = 0.0
+            else:
+                score_matrix[row][col] = best_score
+
+            # The backtrace is encoded binary. See _make_score_matrix_fast
+            # for details.
+            if not score_only:
+                trace_score = 0
+                if rint(nogap_score) == rint(best_score):
+                    trace_score += 2
+                if rint(row_open) == rint(best_score):
+                    trace_score += 1
+                if rint(row_extend) == rint(best_score):
+                    trace_score += 8
+                if rint(col_open) == rint(best_score):
+                    trace_score += 4
+                if rint(col_extend) == rint(best_score):
+                    trace_score += 16
+                trace_matrix[row][col] = trace_score
+
+    if not align_globally:
+        best_score = local_max_score
+
+    return score_matrix, trace_matrix, best_score
+
+
+def _make_score_matrix_fast(
+    sequenceA,
+    sequenceB,
+    match_fn,
+    open_A,
+    extend_A,
+    open_B,
+    extend_B,
+    penalize_extend_when_opening,
+    penalize_end_gaps,
+    align_globally,
+    score_only,
+):
+    """Generate a score and traceback matrix according to Gotoh (PRIVATE).
+
+    This is an implementation of the Needleman-Wunsch dynamic programming
+    algorithm as modified by Gotoh, implementing affine gap penalties.
+    In short, we have three matrices, holding scores for alignments ending
+    in (1) a match/mismatch, (2) a gap in sequence A, and (3) a gap in
+    sequence B, respectively. However, we can combine them in one matrix,
+    which holds the best scores, and store only those values from the
+    other matrices that are actually used for the next step of calculation.
+    The traceback matrix holds the positions for backtracing the alignment.
+    """
+    first_A_gap = calc_affine_penalty(1, open_A, extend_A, penalize_extend_when_opening)
+    first_B_gap = calc_affine_penalty(1, open_B, extend_B, penalize_extend_when_opening)
+    local_max_score = 0
+
+    # Create the score and traceback matrices. These should be in the
+    # shape:
+    # sequenceA (down) x sequenceB (across)
+    lenA, lenB = len(sequenceA), len(sequenceB)
+    score_matrix, trace_matrix = [], []
+    for i in range(lenA + 1):
+        score_matrix.append([None] * (lenB + 1))
+        if not score_only:
+            trace_matrix.append([None] * (lenB + 1))
+
+    # Initialize first row and column with gap scores. This is like opening up
+    # i gaps at the beginning of sequence A or B.
+    for i in range(lenA + 1):
+        if penalize_end_gaps[1]:  # [1]:gap in sequence B
+            score = calc_affine_penalty(
+                i, open_B, extend_B, penalize_extend_when_opening
+            )
+        else:
+            score = 0
+        score_matrix[i][0] = score
+    for i in range(lenB + 1):
+        if penalize_end_gaps[0]:  # [0]:gap in sequence A
+            score = calc_affine_penalty(
+                i, open_A, extend_A, penalize_extend_when_opening
+            )
+        else:
+            score = 0
+        score_matrix[0][i] = score
+
+    # Now initialize the col 'matrix'. Actually this is only a one dimensional
+    # list, since we only need the col scores from the last row.
+    col_score = [0]  # Best score, if actual alignment ends with gap in seqB
+    for i in range(1, lenB + 1):
+        col_score.append(
+            calc_affine_penalty(i, 2 * open_B, extend_B, penalize_extend_when_opening)
+        )
+
+    # The row 'matrix' is calculated on the fly. Here we only need the actual
+    # score.
+    # Now, filling up the score and traceback matrices:
+    for row in range(1, lenA + 1):
+        row_score = calc_affine_penalty(
+            row, 2 * open_A, extend_A, penalize_extend_when_opening
+        )
+        for col in range(1, lenB + 1):
+            # Calculate the score that would occur by extending the
+            # alignment without gaps.
+            # fmt: off
+            nogap_score = (
+                score_matrix[row - 1][col - 1]
+                + match_fn(sequenceA[row - 1], sequenceB[col - 1])
+            )
+            # fmt: on
+            # Check the score that would occur if there were a gap in
+            # sequence A. This could come from opening a new gap or
+            # extending an existing one.
+            # A gap in sequence A can also be opened if it follows a gap in
+            # sequence B:  A-
+            #              -B
+            if not penalize_end_gaps[0] and row == lenA:
+                row_open = score_matrix[row][col - 1]
+                row_extend = row_score
+            else:
+                row_open = score_matrix[row][col - 1] + first_A_gap
+                row_extend = row_score + extend_A
+            row_score = max(row_open, row_extend)
+
+            # The same for sequence B:
+            if not penalize_end_gaps[1] and col == lenB:
+                col_open = score_matrix[row - 1][col]
+                col_extend = col_score[col]
+            else:
+                col_open = score_matrix[row - 1][col] + first_B_gap
+                col_extend = col_score[col] + extend_B
+            col_score[col] = max(col_open, col_extend)
+
+            best_score = max(nogap_score, col_score[col], row_score)
+            local_max_score = max(local_max_score, best_score)
+            if not align_globally and best_score < 0:
+                score_matrix[row][col] = 0
+            else:
+                score_matrix[row][col] = best_score
+
+            # Now the trace_matrix. The edges of the backtrace are encoded
+            # binary: 1 = open gap in seqA, 2 = match/mismatch of seqA and
+            # seqB, 4 = open gap in seqB, 8 = extend gap in seqA, and
+            # 16 = extend gap in seqB. This values can be summed up.
+            # Thus, the trace score 7 means that the best score can either
+            # come from opening a gap in seqA (=1), pairing two characters
+            # of seqA and seqB (+2=3) or opening a gap in seqB (+4=7).
+            # However, if we only want the score we don't care about the trace.
+            if not score_only:
+                row_score_rint = rint(row_score)
+                col_score_rint = rint(col_score[col])
+                row_trace_score = 0
+                col_trace_score = 0
+                if rint(row_open) == row_score_rint:
+                    row_trace_score += 1  # Open gap in seqA
+                if rint(row_extend) == row_score_rint:
+                    row_trace_score += 8  # Extend gap in seqA
+                if rint(col_open) == col_score_rint:
+                    col_trace_score += 4  # Open gap in seqB
+                if rint(col_extend) == col_score_rint:
+                    col_trace_score += 16  # Extend gap in seqB
+
+                trace_score = 0
+                best_score_rint = rint(best_score)
+                if rint(nogap_score) == best_score_rint:
+                    trace_score += 2  # Align seqA with seqB
+                if row_score_rint == best_score_rint:
+                    trace_score += row_trace_score
+                if col_score_rint == best_score_rint:
+                    trace_score += col_trace_score
+                trace_matrix[row][col] = trace_score
+
+    if not align_globally:
+        best_score = local_max_score
+
+    return score_matrix, trace_matrix, best_score
+
+
+def _recover_alignments(
+    sequenceA,
+    sequenceB,
+    starts,
+    best_score,
+    score_matrix,
+    trace_matrix,
+    align_globally,
+    gap_char,
+    one_alignment_only,
+    gap_A_fn,
+    gap_B_fn,
+    reverse=False,
+):
+    """Do the backtracing and return a list of alignments (PRIVATE).
+
+    Recover the alignments by following the traceback matrix.  This
+    is a recursive procedure, but it's implemented here iteratively
+    with a stack.
+
+    sequenceA and sequenceB may be sequences, including strings,
+    lists, or list-like objects.  In order to preserve the type of
+    the object, we need to use slices on the sequences instead of
+    indexes.  For example, sequenceA[row] may return a type that's
+    not compatible with sequenceA, e.g. if sequenceA is a list and
+    sequenceA[row] is a string.  Thus, avoid using indexes and use
+    slices, e.g. sequenceA[row:row+1].  Assume that client-defined
+    sequence classes preserve these semantics.
+    """
+    lenA, lenB = len(sequenceA), len(sequenceB)
+    ali_seqA, ali_seqB = sequenceA[0:0], sequenceB[0:0]
+    tracebacks = []
+    in_process = []
+
+    for start in starts:
+        score, (row, col) = start
+        begin = 0
+        if align_globally:
+            end = None
+        else:
+            # If this start is a zero-extension: don't start here!
+            if (score, (row - 1, col - 1)) in starts:
+                continue
+            # Local alignments should start with a positive score!
+            if score <= 0:
+                continue
+            # Local alignments should not end with a gap!:
+            trace = trace_matrix[row][col]
+            if (trace - trace % 2) % 4 == 2:  # Trace contains 'nogap', fine!
+                trace_matrix[row][col] = 2
+            # If not, don't start here!
+            else:
+                continue
+            end = -max(lenA - row, lenB - col)
+            if not end:
+                end = None
+            col_distance = lenB - col
+            row_distance = lenA - row
+
+            # fmt: off
+            ali_seqA = (
+                (col_distance - row_distance) * gap_char
+                + sequenceA[lenA - 1 : row - 1 : -1]
+            )
+            ali_seqB = (
+                (row_distance - col_distance) * gap_char
+                + sequenceB[lenB - 1 : col - 1 : -1]
+            )
+            # fmt: on
+        in_process += [
+            (ali_seqA, ali_seqB, end, row, col, False, trace_matrix[row][col])
+        ]
+    while in_process and len(tracebacks) < MAX_ALIGNMENTS:
+        # Although we allow a gap in seqB to be followed by a gap in seqA,
+        # we don't want to allow it the other way round, since this would
+        # give redundant alignments of type: A-  vs.  -A
+        #                                    -B       B-
+        # Thus we need to keep track if a gap in seqA was opened (col_gap)
+        # and stop the backtrace (dead_end) if a gap in seqB follows.
+        #
+        # Attention: This may fail, if the gap-penalties for both strands are
+        # different. In this case the second alignment may be the only optimal
+        # alignment. Thus it can happen that no alignment is returned. For
+        # this case a workaround was implemented, which reverses the input and
+        # the matrices (this happens in _reverse_matrices) and repeats the
+        # backtrace. The variable 'reverse' keeps track of this.
+        dead_end = False
+        ali_seqA, ali_seqB, end, row, col, col_gap, trace = in_process.pop()
+
+        while (row > 0 or col > 0) and not dead_end:
+            cache = (ali_seqA[:], ali_seqB[:], end, row, col, col_gap)
+
+            # If trace is empty we have reached at least one border of the
+            # matrix or the end of a local alignment. Just add the rest of
+            # the sequence(s) and fill with gaps if necessary.
+            if not trace:
+                if col and col_gap:
+                    dead_end = True
+                else:
+                    ali_seqA, ali_seqB = _finish_backtrace(
+                        sequenceA, sequenceB, ali_seqA, ali_seqB, row, col, gap_char
+                    )
+                break
+            elif trace % 2 == 1:  # = row open = open gap in seqA
+                trace -= 1
+                if col_gap:
+                    dead_end = True
+                else:
+                    col -= 1
+                    ali_seqA += gap_char
+                    ali_seqB += sequenceB[col : col + 1]
+                    col_gap = False
+            elif trace % 4 == 2:  # = match/mismatch of seqA with seqB
+                trace -= 2
+                row -= 1
+                col -= 1
+                ali_seqA += sequenceA[row : row + 1]
+                ali_seqB += sequenceB[col : col + 1]
+                col_gap = False
+            elif trace % 8 == 4:  # = col open = open gap in seqB
+                trace -= 4
+                row -= 1
+                ali_seqA += sequenceA[row : row + 1]
+                ali_seqB += gap_char
+                col_gap = True
+            elif trace in (8, 24):  # = row extend = extend gap in seqA
+                trace -= 8
+                if col_gap:
+                    dead_end = True
+                else:
+                    col_gap = False
+                    # We need to find the starting point of the extended gap
+                    x = _find_gap_open(
+                        sequenceA,
+                        sequenceB,
+                        ali_seqA,
+                        ali_seqB,
+                        end,
+                        row,
+                        col,
+                        col_gap,
+                        gap_char,
+                        score_matrix,
+                        trace_matrix,
+                        in_process,
+                        gap_A_fn,
+                        col,
+                        row,
+                        "col",
+                        best_score,
+                        align_globally,
+                    )
+                    ali_seqA, ali_seqB, row, col, in_process, dead_end = x
+            elif trace == 16:  # = col extend = extend gap in seqB
+                trace -= 16
+                col_gap = True
+                x = _find_gap_open(
+                    sequenceA,
+                    sequenceB,
+                    ali_seqA,
+                    ali_seqB,
+                    end,
+                    row,
+                    col,
+                    col_gap,
+                    gap_char,
+                    score_matrix,
+                    trace_matrix,
+                    in_process,
+                    gap_B_fn,
+                    row,
+                    col,
+                    "row",
+                    best_score,
+                    align_globally,
+                )
+                ali_seqA, ali_seqB, row, col, in_process, dead_end = x
+
+            if trace:  # There is another path to follow...
+                cache += (trace,)
+                in_process.append(cache)
+            trace = trace_matrix[row][col]
+            if not align_globally:
+                if score_matrix[row][col] == best_score:
+                    # We have gone through a 'zero-score' extension, discard it
+                    dead_end = True
+                elif score_matrix[row][col] <= 0:
+                    # We have reached the end of the backtrace
+                    begin = max(row, col)
+                    trace = 0
+        if not dead_end:
+            if not reverse:
+                tracebacks.append((ali_seqA[::-1], ali_seqB[::-1], score, begin, end))
+            else:
+                tracebacks.append((ali_seqB[::-1], ali_seqA[::-1], score, begin, end))
+            if one_alignment_only:
+                break
+    return _clean_alignments(tracebacks)
+
+
+def _find_start(score_matrix, best_score, align_globally):
+    """Return a list of starting points (score, (row, col)) (PRIVATE).
+
+    Indicating every possible place to start the tracebacks.
+    """
+    nrows, ncols = len(score_matrix), len(score_matrix[0])
+    # In this implementation of the global algorithm, the start will always be
+    # the bottom right corner of the matrix.
+    if align_globally:
+        starts = [(best_score, (nrows - 1, ncols - 1))]
+    else:
+        # For local alignments, there may be many different start points.
+        starts = []
+        tolerance = 0  # XXX do anything with this?
+        # Now find all the positions within some tolerance of the best
+        # score.
+        for row in range(nrows):
+            for col in range(ncols):
+                score = score_matrix[row][col]
+                if rint(abs(score - best_score)) <= rint(tolerance):
+                    starts.append((score, (row, col)))
+    return starts
+
+
+def _reverse_matrices(score_matrix, trace_matrix):
+    """Reverse score and trace matrices (PRIVATE)."""
+    reverse_score_matrix = []
+    reverse_trace_matrix = []
+    # fmt: off
+    reverse_trace = {
+        1: 4, 2: 2, 3: 6, 4: 1, 5: 5, 6: 3, 7: 7, 8: 16, 9: 20, 10: 18, 11: 22, 12: 17,
+        13: 21, 14: 19, 15: 23, 16: 8, 17: 12, 18: 10, 19: 14, 20: 9, 21: 13, 22: 11,
+        23: 15, 24: 24, 25: 28, 26: 26, 27: 30, 28: 25, 29: 29, 30: 27, 31: 31,
+        None: None,
+    }
+    # fmt: on
+    for col in range(len(score_matrix[0])):
+        new_score_row = []
+        new_trace_row = []
+        for row in range(len(score_matrix)):
+            new_score_row.append(score_matrix[row][col])
+            new_trace_row.append(reverse_trace[trace_matrix[row][col]])
+        reverse_score_matrix.append(new_score_row)
+        reverse_trace_matrix.append(new_trace_row)
+    return reverse_score_matrix, reverse_trace_matrix
+
+
+def _clean_alignments(alignments):
+    """Take a list of alignments and return a cleaned version (PRIVATE).
+
+    Remove duplicates, make sure begin and end are set correctly, remove
+    empty alignments.
+    """
+    Alignment = namedtuple("Alignment", ("seqA, seqB, score, start, end"))
+    unique_alignments = []
+    for align in alignments:
+        if align not in unique_alignments:
+            unique_alignments.append(align)
+    i = 0
+    while i < len(unique_alignments):
+        seqA, seqB, score, begin, end = unique_alignments[i]
+        # Make sure end is set reasonably.
+        if end is None:  # global alignment
+            end = len(seqA)
+        elif end < 0:
+            end = end + len(seqA)
+        # If there's no alignment here, get rid of it.
+        if begin >= end:
+            del unique_alignments[i]
+            continue
+        unique_alignments[i] = Alignment(seqA, seqB, score, begin, end)
+        i += 1
+    return unique_alignments
+
+
+def _finish_backtrace(sequenceA, sequenceB, ali_seqA, ali_seqB, row, col, gap_char):
+    """Add remaining sequences and fill with gaps if necessary (PRIVATE)."""
+    if row:
+        ali_seqA += sequenceA[row - 1 :: -1]
+    if col:
+        ali_seqB += sequenceB[col - 1 :: -1]
+    if row > col:
+        ali_seqB += gap_char * (len(ali_seqA) - len(ali_seqB))
+    elif col > row:
+        ali_seqA += gap_char * (len(ali_seqB) - len(ali_seqA))
+    return ali_seqA, ali_seqB
+
+
+def _find_gap_open(
+    sequenceA,
+    sequenceB,
+    ali_seqA,
+    ali_seqB,
+    end,
+    row,
+    col,
+    col_gap,
+    gap_char,
+    score_matrix,
+    trace_matrix,
+    in_process,
+    gap_fn,
+    target,
+    index,
+    direction,
+    best_score,
+    align_globally,
+):
+    """Find the starting point(s) of the extended gap (PRIVATE)."""
+    dead_end = False
+    target_score = score_matrix[row][col]
+    for n in range(target):
+        if direction == "col":
+            col -= 1
+            ali_seqA += gap_char
+            ali_seqB += sequenceB[col : col + 1]
+        else:
+            row -= 1
+            ali_seqA += sequenceA[row : row + 1]
+            ali_seqB += gap_char
+        actual_score = score_matrix[row][col] + gap_fn(index, n + 1)
+        if not align_globally and score_matrix[row][col] == best_score:
+            # We have run through a 'zero-score' extension and discard it
+            dead_end = True
+            break
+        if rint(actual_score) == rint(target_score) and n > 0:
+            if not trace_matrix[row][col]:
+                break
+            else:
+                in_process.append(
+                    (
+                        ali_seqA[:],
+                        ali_seqB[:],
+                        end,
+                        row,
+                        col,
+                        col_gap,
+                        trace_matrix[row][col],
+                    )
+                )
+        if not trace_matrix[row][col]:
+            dead_end = True
+    return ali_seqA, ali_seqB, row, col, in_process, dead_end
+
+
+_PRECISION = 1000
+
+
+def rint(x, precision=_PRECISION):
+    """Print number with declared precision."""
+    return int(x * precision + 0.5)
+
+
+class identity_match:
+    """Create a match function for use in an alignment.
+
+    match and mismatch are the scores to give when two residues are equal
+    or unequal.  By default, match is 1 and mismatch is 0.
+    """
+
+    def __init__(self, match=1, mismatch=0):
+        """Initialize the class."""
+        self.match = match
+        self.mismatch = mismatch
+
+    def __call__(self, charA, charB):
+        """Call a match function instance already created."""
+        if charA == charB:
+            return self.match
+        return self.mismatch
+
+
+class dictionary_match:
+    """Create a match function for use in an alignment.
+
+    Attributes:
+     - score_dict     - A dictionary where the keys are tuples (residue 1,
+       residue 2) and the values are the match scores between those residues.
+     - symmetric      - A flag that indicates whether the scores are symmetric.
+
+    """
+
+    def __init__(self, score_dict, symmetric=1):
+        """Initialize the class."""
+        self.score_dict = score_dict
+        self.symmetric = symmetric
+
+    def __call__(self, charA, charB):
+        """Call a dictionary match instance already created."""
+        if self.symmetric and (charA, charB) not in self.score_dict:
+            # If the score dictionary is symmetric, then look up the
+            # score both ways.
+            charB, charA = charA, charB
+        return self.score_dict[(charA, charB)]
+
+
+class affine_penalty:
+    """Create a gap function for use in an alignment."""
+
+    def __init__(self, open, extend, penalize_extend_when_opening=0):
+        """Initialize the class."""
+        if open > 0 or extend > 0:
+            raise ValueError("Gap penalties should be non-positive.")
+        if not penalize_extend_when_opening and (extend < open):
+            raise ValueError(
+                "Gap opening penalty should be higher than "
+                "gap extension penalty (or equal)"
+            )
+        self.open, self.extend = open, extend
+        self.penalize_extend_when_opening = penalize_extend_when_opening
+
+    def __call__(self, index, length):
+        """Call a gap function instance already created."""
+        return calc_affine_penalty(
+            length, self.open, self.extend, self.penalize_extend_when_opening
+        )
+
+
+def calc_affine_penalty(length, open, extend, penalize_extend_when_opening):
+    """Calculate a penality score for the gap function."""
+    if length <= 0:
+        return 0.0
+    penalty = open + extend * length
+    if not penalize_extend_when_opening:
+        penalty -= extend
+    return penalty
+
+
+def print_matrix(matrix):
+    """Print out a matrix for debugging purposes."""
+    # Transpose the matrix and get the length of the values in each column.
+    matrixT = [[] for x in range(len(matrix[0]))]
+    for i in range(len(matrix)):
+        for j in range(len(matrix[i])):
+            matrixT[j].append(len(str(matrix[i][j])))
+    ndigits = [max(x) for x in matrixT]
+    for i in range(len(matrix)):
+        # Using string formatting trick to add leading spaces,
+        print(
+            " ".join("%*s " % (ndigits[j], matrix[i][j]) for j in range(len(matrix[i])))
+        )
+
+
+def format_alignment(align1, align2, score, begin, end, full_sequences=False):
+    """Format the alignment prettily into a string.
+
+    IMPORTANT: Gap symbol must be "-" (or ['-'] for lists)!
+
+    Since Biopython 1.71 identical matches are shown with a pipe
+    character, mismatches as a dot, and gaps as a space.
+
+    Prior releases just used the pipe character to indicate the
+    aligned region (matches, mismatches and gaps).
+
+    Also, in local alignments, if the alignment does not include
+    the whole sequences, now only the aligned part is shown,
+    together with the start positions of the aligned subsequences.
+    The start positions are 1-based; so start position n is the
+    n-th base/amino acid in the *un-aligned* sequence.
+
+    NOTE: This is different to the alignment's begin/end values,
+    which give the Python indices (0-based) of the bases/amino acids
+    in the *aligned* sequences.
+
+    If you want to restore the 'historic' behaviour, that means
+    displaying the whole sequences (including the non-aligned parts),
+    use ``full_sequences=True``. In this case, the non-aligned leading
+    and trailing parts are also indicated by spaces in the match-line.
+    """
+    align_begin = begin
+    align_end = end
+    start1 = start2 = ""
+    start_m = begin  # Begin of match line (how many spaces to include)
+    # For local alignments:
+    if not full_sequences and (begin != 0 or end != len(align1)):
+        # Calculate the actual start positions in the un-aligned sequences
+        # This will only work if the gap symbol is '-' or ['-']!
+        start1 = str(len(align1[:begin]) - align1[:begin].count("-") + 1) + " "
+        start2 = str(len(align2[:begin]) - align2[:begin].count("-") + 1) + " "
+        start_m = max(len(start1), len(start2))
+    elif full_sequences:
+        start_m = 0
+        begin = 0
+        end = len(align1)
+
+    if isinstance(align1, list):
+        # List elements will be separated by spaces, since they can be
+        # of different lengths
+        align1 = [a + " " for a in align1]
+        align2 = [a + " " for a in align2]
+
+    s1_line = ["{:>{width}}".format(start1, width=start_m)]  # seq1 line
+    m_line = [" " * start_m]  # match line
+    s2_line = ["{:>{width}}".format(start2, width=start_m)]  # seq2 line
+
+    for n, (a, b) in enumerate(zip(align1[begin:end], align2[begin:end])):
+        # Since list elements can be of different length, we center them,
+        # using the maximum length of the two compared elements as width
+        m_len = max(len(a), len(b))
+        s1_line.append("{:^{width}}".format(a, width=m_len))
+        s2_line.append("{:^{width}}".format(b, width=m_len))
+        if full_sequences and (n < align_begin or n >= align_end):
+            m_line.append("{:^{width}}".format(" ", width=m_len))  # space
+            continue
+        if a == b:
+            m_line.append("{:^{width}}".format("|", width=m_len))  # match
+        elif a.strip() == "-" or b.strip() == "-":
+            m_line.append("{:^{width}}".format(" ", width=m_len))  # gap
+        else:
+            m_line.append("{:^{width}}".format(".", width=m_len))  # mismatch
+
+    s2_line.append("\n  Score=%g\n" % score)
+    return "\n".join(["".join(s1_line), "".join(m_line), "".join(s2_line)])
+
+
+# Try and load C implementations of functions. If I can't,
+# then throw a warning and use the pure Python implementations.
+# The redefinition is deliberate, thus the no quality assurance
+# flag for when using flake8.
+# Before, we secure access to the pure Python functions (for testing purposes):
+
+_python_make_score_matrix_fast = _make_score_matrix_fast
+_python_rint = rint
+
+try:
+    from .cpairwise2 import rint, _make_score_matrix_fast  # noqa
+except ImportError:
+    warnings.warn(
+        "Import of C module failed. Falling back to pure Python "
+        "implementation. This may be slooow...",
+        BiopythonWarning,
+    )
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest()
diff --git a/code/lib/Bio/phenotype/__init__.py b/code/lib/Bio/phenotype/__init__.py
new file mode 100644
index 0000000..636b843
--- /dev/null
+++ b/code/lib/Bio/phenotype/__init__.py
@@ -0,0 +1,241 @@
+# Copyright 2014-2016 by Marco Galardini.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+r"""phenotype data input/output.
+
+Input
+=====
+The main function is Bio.phenotype.parse(...) which takes an input file,
+and format string.  This returns an iterator giving PlateRecord objects:
+
+    >>> from Bio import phenotype
+    >>> for record in phenotype.parse("phenotype/Plates.csv", "pm-csv"):
+    ...     print("%s %i" % (record.id, len(record)))
+    ...
+    PM01 96
+    PM09 96
+
+Note that the parse() function will invoke the relevant parser for the
+format with its default settings.  You may want more control, in which case
+you need to create a format specific sequence iterator directly.
+
+Input - Single Records
+======================
+If you expect your file to contain one-and-only-one record, then we provide
+the following 'helper' function which will return a single PlateRecord, or
+raise an exception if there are no records or more than one record:
+
+    >>> from Bio import phenotype
+    >>> record = phenotype.read("phenotype/Plate.json", "pm-json")
+    >>> print("%s %i" % (record.id, len(record)))
+    PM01 96
+
+This style is useful when you expect a single record only (and would
+consider multiple records an error).  For example, when dealing with PM
+JSON files saved by the opm library.
+
+However, if you just want the first record from a file containing multiple
+record, use the next() function on the iterator:
+
+    >>> from Bio import phenotype
+    >>> record = next(phenotype.parse("phenotype/Plates.csv", "pm-csv"))
+    >>> print("%s %i" % (record.id, len(record)))
+    PM01 96
+
+The above code will work as long as the file contains at least one record.
+Note that if there is more than one record, the remaining records will be
+silently ignored.
+
+Output
+======
+Use the function Bio.phenotype.write(...), which takes a complete set of
+PlateRecord objects (either as a list, or an iterator), an output file handle
+(or in recent versions of Biopython an output filename as a string) and of
+course the file format::
+
+        from Bio import phenotype
+        records = ...
+        phenotype.write(records, "example.json", "pm-json")
+
+Or, using a handle::
+
+        from Bio import phenotype
+        records = ...
+        with open("example.json", "w") as handle:
+           phenotype.write(records, handle, "pm-json")
+
+You are expected to call this function once (with all your records) and if
+using a handle, make sure you close it to flush the data to the hard disk.
+
+
+File Formats
+============
+When specifying the file format, use lowercase strings.
+
+ - pm-json - Phenotype Microarray plates in JSON format.
+ - pm-csv  - Phenotype Microarray plates in CSV format, which is the
+             machine vendor format
+
+Note that while Bio.phenotype can read the above file formats, it can only
+write in JSON format.
+"""
+
+from Bio import BiopythonExperimentalWarning
+from Bio.File import as_handle
+from . import phen_micro
+
+import warnings
+
+
+warnings.warn(
+    "Bio.phenotype is an experimental submodule which may undergo "
+    "significant changes prior to its future official release.",
+    BiopythonExperimentalWarning,
+)
+
+# Convention for format names is "mainname-format" in lower case.
+
+_FormatToIterator = {
+    "pm-csv": phen_micro.CsvIterator,
+    "pm-json": phen_micro.JsonIterator,
+}
+
+_FormatToWriter = {"pm-json": phen_micro.JsonWriter}
+
+
+def write(plates, handle, format):
+    """Write complete set of PlateRecords to a file.
+
+     - plates    - A list (or iterator) of PlateRecord objects.
+     - handle    - File handle object to write to, or filename as string
+                   (note older versions of Biopython only took a handle).
+     - format    - lower case string describing the file format to write.
+
+    You should close the handle after calling this function.
+
+    Returns the number of records written (as an integer).
+    """
+    # Try and give helpful error messages:
+    if not isinstance(format, str):
+        raise TypeError("Need a string for the file format (lower case)")
+    if not format:
+        raise ValueError("Format required (lower case string)")
+    if format != format.lower():
+        raise ValueError("Format string '%s' should be lower case" % format)
+
+    if isinstance(plates, phen_micro.PlateRecord):
+        plates = [plates]
+
+    with as_handle(handle, "w") as fp:
+        # Map the file format to a writer class
+        if format in _FormatToWriter:
+            writer_class = _FormatToWriter[format]
+            count = writer_class(plates).write(fp)
+        else:
+            raise ValueError("Unknown format '%s'" % format)
+
+        if not isinstance(count, int):
+            raise TypeError(
+                "Internal error - the underlying %s "
+                "writer should have returned the record count, not %r" % (format, count)
+            )
+
+    return count
+
+
+def parse(handle, format):
+    """Turn a phenotype file into an iterator returning PlateRecords.
+
+     - handle   - handle to the file, or the filename as a string
+                  (note older versions of Biopython only took a handle).
+     - format   - lower case string describing the file format.
+
+    Typical usage, opening a file to read in, and looping over the record(s):
+
+    >>> from Bio import phenotype
+    >>> filename = "phenotype/Plates.csv"
+    >>> for record in phenotype.parse(filename, "pm-csv"):
+    ...    print("ID %s" % record.id)
+    ...    print("Number of wells %i" % len(record))
+    ...
+    ID PM01
+    Number of wells 96
+    ID PM09
+    Number of wells 96
+
+    Use the Bio.phenotype.read(...) function when you expect a single record
+    only.
+    """
+    # Try and give helpful error messages:
+    if not isinstance(format, str):
+        raise TypeError("Need a string for the file format (lower case)")
+    if not format:
+        raise ValueError("Format required (lower case string)")
+    if format != format.lower():
+        raise ValueError("Format string '%s' should be lower case" % format)
+
+    with as_handle(handle) as fp:
+        # Map the file format to a sequence iterator:
+        if format in _FormatToIterator:
+            iterator_generator = _FormatToIterator[format]
+            i = iterator_generator(fp)
+        else:
+            raise ValueError("Unknown format '%s'" % format)
+        yield from i
+
+
+def read(handle, format):
+    """Turn a phenotype file into a single PlateRecord.
+
+     - handle   - handle to the file, or the filename as a string
+                  (note older versions of Biopython only took a handle).
+     - format   - string describing the file format.
+
+    This function is for use parsing phenotype files containing
+    exactly one record.  For example, reading a PM JSON file:
+
+    >>> from Bio import phenotype
+    >>> record = phenotype.read("phenotype/Plate.json", "pm-json")
+    >>> print("ID %s" % record.id)
+    ID PM01
+    >>> print("Number of wells %i" % len(record))
+    Number of wells 96
+
+    If the handle contains no records, or more than one record,
+    an exception is raised.  For example::
+
+        from Bio import phenotype
+        record = phenotype.read("plates.csv", "pm-csv")
+        Traceback (most recent call last):
+        ...
+        ValueError: More than one record found in handle
+
+    If however you want the first record from a file containing
+    multiple records this function would raise an exception (as
+    shown in the example above).  Instead use:
+
+    >>> from Bio import phenotype
+    >>> record = next(phenotype.parse("phenotype/Plates.csv", "pm-csv"))
+    >>> print("First record's ID %s" % record.id)
+    First record's ID PM01
+
+    Use the Bio.phenotype.parse(handle, format) function if you want
+    to read multiple records from the handle.
+    """
+    iterator = parse(handle, format)
+    try:
+        first = next(iterator)
+    except StopIteration:
+        first = None
+    if first is None:
+        raise ValueError("No records found in handle")
+    try:
+        second = next(iterator)
+    except StopIteration:
+        second = None
+    if second is not None:
+        raise ValueError("More than one record found in handle")
+    return first
diff --git a/code/lib/Bio/phenotype/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/phenotype/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..e7c934c
Binary files /dev/null and b/code/lib/Bio/phenotype/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Bio/phenotype/__pycache__/phen_micro.cpython-37.pyc b/code/lib/Bio/phenotype/__pycache__/phen_micro.cpython-37.pyc
new file mode 100644
index 0000000..56401ff
Binary files /dev/null and b/code/lib/Bio/phenotype/__pycache__/phen_micro.cpython-37.pyc differ
diff --git a/code/lib/Bio/phenotype/__pycache__/pm_fitting.cpython-37.pyc b/code/lib/Bio/phenotype/__pycache__/pm_fitting.cpython-37.pyc
new file mode 100644
index 0000000..cca8c12
Binary files /dev/null and b/code/lib/Bio/phenotype/__pycache__/pm_fitting.cpython-37.pyc differ
diff --git a/code/lib/Bio/phenotype/phen_micro.py b/code/lib/Bio/phenotype/phen_micro.py
new file mode 100644
index 0000000..93af24f
--- /dev/null
+++ b/code/lib/Bio/phenotype/phen_micro.py
@@ -0,0 +1,1207 @@
+# Copyright 2014-2016 by Marco Galardini.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Classes to work with Phenotype Microarray data.
+
+More information on the single plates can be found here: http://www.biolog.com/
+
+Classes:
+ - PlateRecord - Object that contain time course data on each well of the
+   plate, as well as metadata (if any).
+ - WellRecord - Object that contains the time course data of a single well
+ - JsonWriter - Writer of PlateRecord objects in JSON format.
+
+Functions:
+ - JsonIterator -  Incremental PM JSON parser, this is an iterator that returns
+   PlateRecord objects.
+ - CsvIterator - Incremental PM CSV parser, this is an iterator that returns
+   PlateRecord objects.
+ - _toOPM - Used internally by JsonWriter, converts PlateRecord objects in
+   dictionaries ready to be serialized in JSON format.
+
+"""
+
+import warnings
+import json
+import csv
+import numpy as np
+
+from Bio import BiopythonParserWarning
+
+# Private csv headers - hardcoded because this are supposedly never changed
+_datafile = "Data File"
+_plate = "Plate Type"
+_strainType = "Strain Type"
+_sample = "Sample Number"
+_strainName = "Strain Name"
+_strainNumber = "Strain Number"
+_other = "Other"
+_hour = "Hour"
+_file = "File"
+_position = "Position"
+_setupTime = "Setup Time"
+
+_platesPrefix = "PM"
+_platesPrefixMammalian = "PM-M"
+#
+
+# Json identifiers - hardcoded as they are set by the creators of opm
+_csvData = "csv_data"
+_measurements = "measurements"
+#
+
+
+class PlateRecord:
+    """PlateRecord object for storing Phenotype Microarray plates data.
+
+    A PlateRecord stores all the wells of a particular phenotype
+    Microarray plate, along with metadata (if any). The single wells can be
+    accessed calling their id as an index or iterating on the PlateRecord:
+
+    >>> from Bio import phenotype
+    >>> plate = phenotype.read("phenotype/Plate.json", "pm-json")
+    >>> well = plate['A05']
+    >>> for well in plate:
+    ...    print(well.id)
+    ...
+    A01
+    ...
+
+    The plate rows and columns can be queried with an indexing system similar
+    to NumPy and other matrices:
+
+    >>> print(plate[1])
+    Plate ID: PM01
+    Well: 12
+    Rows: 1
+    Columns: 12
+    PlateRecord('WellRecord['B01'], WellRecord['B02'], WellRecord['B03'], ..., WellRecord['B12']')
+
+    >>> print(plate[:,1])
+    Plate ID: PM01
+    Well: 8
+    Rows: 8
+    Columns: 1
+    PlateRecord('WellRecord['A02'], WellRecord['B02'], WellRecord['C02'], ..., WellRecord['H02']')
+
+    Single WellRecord objects can be accessed using this indexing system:
+
+    >>> print(plate[1,2])
+    Plate ID: PM01
+    Well ID: B03
+    Time points: 384
+    Minum signal 0.00 at time 11.00
+    Maximum signal 76.25 at time 18.00
+    WellRecord('(0.0, 11.0), (0.25, 11.0), (0.5, 11.0), (0.75, 11.0), (1.0, 11.0), ..., (95.75, 11.0)')
+
+    The presence of a particular well can be inspected with the "in" keyword:
+    >>> 'A01' in plate
+    True
+
+    All the wells belonging to a "row" (identified by the first character of
+    the well id) in the plate can be obtained:
+
+    >>> for well in plate.get_row('H'):
+    ...     print(well.id)
+    ...
+    H01
+    H02
+    H03
+    ...
+
+    All the wells belonging to a "column" (identified by the number of the well)
+    in the plate can be obtained:
+
+    >>> for well in plate.get_column(12):
+    ...     print(well.id)
+    ...
+    A12
+    B12
+    C12
+    ...
+
+    Two PlateRecord objects can be compared: if all their wells are equal the
+    two plates are considered equal:
+
+    >>> plate2 = phenotype.read("phenotype/Plate.json", "pm-json")
+    >>> plate == plate2
+    True
+
+    Two PlateRecord object can be summed up or subracted from each other: the
+    the signals of each well will be summed up or subtracted. The id of the
+    left operand will be kept:
+
+    >>> plate3 = plate + plate2
+    >>> print(plate3.id)
+    PM01
+
+    Many Phenotype Microarray plate have a "negative control" well, which can
+    be subtracted to all wells:
+
+    >>> subplate = plate.subtract_control()
+
+    """
+
+    def __init__(self, plateid, wells=None):
+        """Initialize the class."""
+        self.id = plateid
+
+        if wells is None:
+            wells = []
+
+        # Similar behaviour as GenBank
+        # Contains all the attributes
+        self.qualifiers = {}
+
+        # Well_id --> WellRecord objects
+        self._wells = {}
+        try:
+            for w in wells:
+                self._is_well(w)
+                self[w.id] = w
+        except TypeError:
+            raise TypeError(
+                "You must provide an iterator-like object containing the single wells"
+            )
+
+        self._update()
+
+    def _update(self):
+        """Update the rows and columns string identifiers (PRIVATE)."""
+        self._rows = sorted({x[0] for x in self._wells})
+        self._columns = sorted({x[1:] for x in self._wells})
+
+    def _is_well(self, obj):
+        """Check if the given object is a WellRecord object (PRIVATE).
+
+        Used both for the class constructor and the __setitem__ method
+        """
+        # Value should be of WellRecord type
+        if not isinstance(obj, WellRecord):
+            raise ValueError(
+                "A WellRecord type object is needed as value (got %s)" % type(obj)
+            )
+
+    def __getitem__(self, index):
+        """Access part of the plate.
+
+        Depending on the indices, you can get a WellRecord object
+        (representing a single well of the plate),
+        or another plate
+        (representing some part or all of the original plate).
+
+        plate[wid] gives a WellRecord (if wid is a WellRecord id)
+        plate[r,c] gives a WellRecord
+        plate[r] gives a row as a PlateRecord
+        plate[r,:] gives a row as a PlateRecord
+        plate[:,c] gives a column as a PlateRecord
+
+        plate[:] and plate[:,:] give a copy of the plate
+
+        Anything else gives a subset of the original plate, e.g.
+        plate[0:2] or plate[0:2,:] uses only row 0 and 1
+        plate[:,1:3] uses only columns 1 and 2
+        plate[0:2,1:3] uses only rows 0 & 1 and only cols 1 & 2
+
+        >>> from Bio import phenotype
+        >>> plate = phenotype.read("phenotype/Plate.json", "pm-json")
+
+        You can access a well of the plate, using its id.
+
+        >>> w = plate['A01']
+
+        You can access a row of the plate as a PlateRecord using an integer
+        index:
+
+        >>> first_row = plate[0]
+        >>> print(first_row)
+        Plate ID: PM01
+        Well: 12
+        Rows: 1
+        Columns: 12
+        PlateRecord('WellRecord['A01'], WellRecord['A02'], WellRecord['A03'], ..., WellRecord['A12']')
+        >>> last_row = plate[-1]
+        >>> print(last_row)
+        Plate ID: PM01
+        Well: 12
+        Rows: 1
+        Columns: 12
+        PlateRecord('WellRecord['H01'], WellRecord['H02'], WellRecord['H03'], ..., WellRecord['H12']')
+
+        You can also access use python's slice notation to sub-plates
+        containing only some of the plate rows:
+
+        >>> sub_plate = plate[2:5]
+        >>> print(sub_plate)
+        Plate ID: PM01
+        Well: 36
+        Rows: 3
+        Columns: 12
+        PlateRecord('WellRecord['C01'], WellRecord['C02'], WellRecord['C03'], ..., WellRecord['E12']')
+
+        This includes support for a step, i.e. plate[start:end:step], which
+        can be used to select every second row:
+
+        >>> sub_plate = plate[::2]
+
+        You can also use two indices to specify both rows and columns.
+        Using simple integers gives you the single wells. e.g.
+
+        >>> w = plate[3, 4]
+        >>> print(w.id)
+        D05
+
+        To get a single column use this syntax:
+
+        >>> sub_plate = plate[:, 4]
+        >>> print(sub_plate)
+        Plate ID: PM01
+        Well: 8
+        Rows: 8
+        Columns: 1
+        PlateRecord('WellRecord['A05'], WellRecord['B05'], WellRecord['C05'], ..., WellRecord['H05']')
+
+        Or, to get part of a column,
+
+        >>> sub_plate = plate[1:3, 4]
+        >>> print(sub_plate)
+        Plate ID: PM01
+        Well: 2
+        Rows: 2
+        Columns: 1
+        PlateRecord(WellRecord['B05'], WellRecord['C05'])
+
+        However, in general you get a sub-plate,
+
+        >>> print(plate[1:5, 3:6])
+        Plate ID: PM01
+        Well: 12
+        Rows: 4
+        Columns: 3
+        PlateRecord('WellRecord['B04'], WellRecord['B05'], WellRecord['B06'], ..., WellRecord['E06']')
+
+        This should all seem familiar to anyone who has used the NumPy
+        array or matrix objects.
+        """
+        # Well identifier access
+        if isinstance(index, str):
+            try:
+                return self._wells[index]
+            except KeyError:
+                raise KeyError("Well %s not found!" % index)
+
+        # Integer index
+        elif isinstance(index, int):
+            try:
+                row = self._rows[index]
+            except IndexError:
+                raise IndexError("Row %d not found!" % index)
+            return PlateRecord(
+                self.id, filter(lambda x: x.id.startswith(row), self._wells.values())
+            )
+
+        # Slice
+        elif isinstance(index, slice):
+            rows = self._rows[index]
+            return PlateRecord(
+                self.id, filter(lambda x: x.id[0] in rows, self._wells.values())
+            )
+
+        # Other access
+        elif len(index) != 2:
+            raise TypeError("Invalid index type.")
+
+        row_index, col_index = index
+        if isinstance(row_index, int) and isinstance(col_index, int):
+            # Return a single WellRecord
+            try:
+                row = self._rows[row_index]
+            except IndexError:
+                raise IndexError("Row %d not found!" % row_index)
+            try:
+                col = self._columns[col_index]
+            except IndexError:
+                raise IndexError("Column %d not found!" % col_index)
+
+            return self._wells[row + col]
+
+        elif isinstance(row_index, int):
+            try:
+                row = self._rows[row_index]
+            except IndexError:
+                raise IndexError("Row %d not found!" % row_index)
+            cols = self._columns[col_index]
+
+            return PlateRecord(
+                self.id,
+                filter(
+                    lambda x: x.id.startswith(row) and x.id[1:] in cols,
+                    self._wells.values(),
+                ),
+            )
+
+        elif isinstance(col_index, int):
+            try:
+                col = self._columns[col_index]
+            except IndexError:
+                raise IndexError("Columns %d not found!" % col_index)
+            rows = self._rows[row_index]
+
+            return PlateRecord(
+                self.id,
+                filter(
+                    lambda x: x.id.endswith(col) and x.id[0] in rows,
+                    self._wells.values(),
+                ),
+            )
+
+        else:
+            rows = self._rows[row_index]
+            cols = self._columns[col_index]
+
+            return PlateRecord(
+                self.id,
+                filter(
+                    lambda x: x.id[0] in rows and x.id[1:] in cols, self._wells.values()
+                ),
+            )
+
+    def __setitem__(self, key, value):
+        if not isinstance(key, str):
+            raise ValueError("Well identifier should be string-like")
+        self._is_well(value)
+        # Provided key and well ID should be the same
+        if value.id != key:
+            raise ValueError(
+                "WellRecord ID and provided key are different (got '%s' and '%s')"
+                % (type(value.id), type(key))
+            )
+        self._wells[key] = value
+
+        self._update()
+
+    def __delitem__(self, key):
+        if not isinstance(key, str):
+            raise ValueError("Well identifier should be string-like")
+        del self._wells[key]
+
+        self._update()
+
+    def __iter__(self):
+        for well in sorted(self._wells):
+            yield self._wells[well]
+
+    def __contains__(self, wellid):
+        if wellid in self._wells:
+            return True
+        return False
+
+    def __len__(self):
+        """Return the number of wells in this plate."""
+        return len(self._wells)
+
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return self._wells == other._wells
+        else:
+            return False
+
+    def __add__(self, plate):
+        """Add another PlateRecord object.
+
+        The wells in both plates must be the same
+
+        A new PlateRecord object is returned, having the same id as the
+        left operand.
+        """
+        if not isinstance(plate, PlateRecord):
+            raise TypeError("Expecting a PlateRecord object")
+
+        if {x.id for x in self} != {x.id for x in plate}:
+            raise ValueError("The two plates have different wells")
+
+        wells = []
+
+        for w in self:
+            wells.append(w + plate[w.id])
+
+        newp = PlateRecord(self.id, wells=wells)
+
+        return newp
+
+    def __sub__(self, plate):
+        """Subtract another PlateRecord object.
+
+        The wells in both plates must be the same
+
+        A new PlateRecord object is returned, having the same id as the
+        left operand.
+        """
+        if not isinstance(plate, PlateRecord):
+            raise TypeError("Expecting a PlateRecord object")
+
+        if {x.id for x in self} != {x.id for x in plate}:
+            raise ValueError("The two plates have different wells")
+
+        wells = []
+
+        for w in self:
+            wells.append(w - plate[w.id])
+
+        newp = PlateRecord(self.id, wells=wells)
+
+        return newp
+
+    def get_row(self, row):
+        """Get all the wells of a given row.
+
+        A row is identified with a letter (e.g. 'A')
+        """
+        # Key is casted to str implicitly
+        try:
+            row = str(row)
+        except Exception:
+            # Is it even possible to get an exception here?
+            raise ValueError("Row identifier should be string-like")
+        if len(row) > 1:
+            raise ValueError("Row identifier must be of maximum one letter")
+
+        for w in sorted(filter(lambda x: x.startswith(row), self._wells)):
+            yield self._wells[w]
+
+    def get_column(self, column):
+        """Get all the wells of a given column.
+
+        A column is identified with a number (e.g. '6')
+        """
+        # Column is casted to int implicitly
+        try:
+            column = int(column)
+        except Exception:
+            raise ValueError("Column identifier should be a number")
+
+        # A 96-well plate has well numbers in two digits
+        for w in sorted(filter(lambda x: x.endswith("%02d" % column), self._wells)):
+            yield self._wells[w]
+
+    def subtract_control(self, control="A01", wells=None):
+        """Subtract a 'control' well from the other plates wells.
+
+        By default the control is subtracted to all wells, unless
+        a list of well ID is provided
+
+        The control well should belong to the plate
+        A new PlateRecord object is returned
+        """
+        if control not in self:
+            raise ValueError("Control well not present in plate")
+        wcontrol = self[control]
+
+        if wells is None:
+            wells = self._wells.keys()
+
+        missing = {w for w in wells if w not in self}
+        if missing:
+            raise ValueError("Some wells to be subtracted are not present")
+
+        nwells = []
+
+        for w in self:
+            if w.id in wells:
+                nwells.append(w - wcontrol)
+            else:
+                nwells.append(w)
+
+        newp = PlateRecord(self.id, wells=nwells)
+
+        return newp
+
+    def __repr__(self):
+        """Return a (truncated) representation of the plate for debugging."""
+        if len(self._wells) > 4:
+            # Show the last well and the first three
+            return "%s('%s, ..., %s')" % (
+                self.__class__.__name__,
+                ", ".join(
+                    [
+                        "%s['%s']" % (self[x].__class__.__name__, self[x].id)
+                        for x in sorted(self._wells.keys())[:3]
+                    ]
+                ),
+                "%s['%s']"
+                % (
+                    self[sorted(self._wells.keys())[-1]].__class__.__name__,
+                    self[sorted(self._wells.keys())[-1]].id,
+                ),
+            )
+        else:
+            return "%s(%s)" % (
+                self.__class__.__name__,
+                ", ".join(
+                    [
+                        "%s['%s']" % (self[x].__class__.__name__, self[x].id)
+                        for x in sorted(self._wells.keys())
+                    ]
+                ),
+            )
+
+    def __str__(self):
+        """Return a human readable summary of the record (string).
+
+        The python built in function str works by calling the object's ___str__
+        method.  e.g.
+
+        >>> from Bio import phenotype
+        >>> record = next(phenotype.parse("phenotype/Plates.csv", "pm-csv"))
+        >>> print(record)
+        Plate ID: PM01
+        Well: 96
+        Rows: 8
+        Columns: 12
+        PlateRecord('WellRecord['A01'], WellRecord['A02'], WellRecord['A03'], ..., WellRecord['H12']')
+
+        Note that long well lists are shown truncated.
+        """
+        lines = []
+        if self.id:
+            lines.append("Plate ID: %s" % self.id)
+        lines.append("Well: %i" % len(self))
+        # Here we assume that all well ID start with a char
+        lines.append("Rows: %d" % len({x.id[0] for x in self}))
+        # Here we assume that well number is a two-digit number
+        lines.append("Columns: %d" % len({x.id[1:3] for x in self}))
+        lines.append(repr(self))
+        return "\n".join(lines)
+
+
+class WellRecord:
+    """WellRecord stores all time course signals of a phenotype Microarray well.
+
+    The single time points and signals can be accessed iterating on the
+    WellRecord or using lists indexes or slices:
+
+    >>> from Bio import phenotype
+    >>> plate = phenotype.read("phenotype/Plate.json", "pm-json")
+    >>> well = plate['A05']
+    >>> for time, signal in well:
+    ...    print("Time: %f, Signal: %f" % (time, signal)) # doctest:+ELLIPSIS
+    ...
+    Time: 0.000000, Signal: 14.000000
+    Time: 0.250000, Signal: 13.000000
+    Time: 0.500000, Signal: 15.000000
+    Time: 0.750000, Signal: 15.000000
+    ...
+    >>> well[1]
+    16.0
+    >>> well[1:5]
+    [16.0, 20.0, 18.0, 15.0]
+    >>> well[1:5:0.5]
+    [16.0, 19.0, 20.0, 18.0, 18.0, 18.0, 15.0, 18.0]
+
+    If a time point was not present in the input file but it's between the
+    minimum and maximum time point, the interpolated signal is returned,
+    otherwise a nan value:
+
+    >>> well[1.3]
+    19.0
+    >>> well[1250]
+    nan
+
+    Two WellRecord objects can be compared: if their input time/signal pairs
+    are exactly the same, the two records are considered equal:
+
+    >>> well2 = plate['H12']
+    >>> well == well2
+    False
+
+    Two WellRecord objects can be summed up or subtracted from each other: a new
+    WellRecord object is returned, having the left operand id.
+
+    >>> well1 = plate['A05']
+    >>> well2 = well + well1
+    >>> print(well2.id)
+    A05
+
+    If SciPy is installed, a sigmoid function can be fitted to the PM curve,
+    in order to extract some parameters; three sigmoid functions are available:
+    * gompertz
+    * logistic
+    * richards
+    The functions are described in Zwietering et al., 1990 (PMID: 16348228)
+
+    For example::
+
+        well.fit()
+        print(well.slope, well.model)
+        (61.853516785566917, 'logistic')
+
+    If not sigmoid function is specified, the first one that is successfully
+    fitted is used. The user can also specify a specific function.
+
+    To specify gompertz::
+
+        well.fit('gompertz')
+        print(well.slope, well.model)
+        (127.94630059171354, 'gompertz')
+
+    If no function can be fitted, the parameters are left as None, except for
+    the max, min, average_height and area.
+    """
+
+    def __init__(self, wellid, plate=None, signals=None):
+        """Initialize the class."""
+        if plate is None:
+            self.plate = PlateRecord(None)
+        else:
+            self.plate = plate
+
+        self.id = wellid
+
+        # Curve parameters (to be calculated with the "fit" function)
+        # Parameters that don't need scipy
+        self.max = None
+        self.min = None
+        self.average_height = None
+
+        # Parameters that need scipy
+        self.area = None
+        self.plateau = None
+        self.slope = None
+        self.lag = None
+        self.v = None
+        self.y0 = None
+        self.model = None
+
+        # Original signals (private)
+        if signals is None:
+            self._signals = {}
+        else:
+            self._signals = signals
+
+    def _interpolate(self, time):
+        """Linear interpolation of the signals at certain time points (PRIVATE)."""
+        times = sorted(self._signals.keys())
+
+        return np.interp(
+            time, times, [self._signals[x] for x in times], left=np.nan, right=np.nan
+        )
+
+    def __setitem__(self, time, signal):
+        """Assign a signal at a certain time point."""
+        try:
+            time = float(time)
+        except ValueError:
+            raise ValueError("Time point should be a number")
+        try:
+            signal = float(signal)
+        except ValueError:
+            raise ValueError("Signal should be a number")
+
+        self._signals[time] = signal
+
+    def __getitem__(self, time):
+        """Return a subset of signals or a single signal."""
+        if isinstance(time, slice):
+            # Fix the missing values in the slice
+            if time.start is None:
+                start = 0
+            else:
+                start = time.start
+
+            if time.stop is None:
+                stop = max(self.get_times())
+            else:
+                stop = time.stop
+
+            time = np.arange(start, stop, time.step)
+            return list(self._interpolate(time))
+
+        elif isinstance(time, int) or isinstance(time, float):
+            return self._interpolate(time)
+
+        raise ValueError("Invalid index")
+
+    def __iter__(self):
+        for time in sorted(self._signals.keys()):
+            yield time, self._signals[time]
+
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            if list(self._signals.keys()) != list(other._signals.keys()):
+                return False
+            # Account for the presence of NaNs
+            for k in self._signals:
+                if np.isnan(self[k]) and np.isnan(other[k]):
+                    continue
+                elif self[k] != other[k]:
+                    return False
+            return True
+        else:
+            return False
+
+    def __add__(self, well):
+        """Add another WellRecord object.
+
+        A new WellRecord object is returned, having the same id as the
+        left operand
+        """
+        if not isinstance(well, WellRecord):
+            raise TypeError("Expecting a WellRecord object")
+
+        signals = {}
+
+        times = set(self._signals.keys()).union(set(well._signals.keys()))
+        for t in sorted(times):
+            signals[t] = self[t] + well[t]
+
+        neww = WellRecord(self.id, signals=signals)
+
+        return neww
+
+    def __sub__(self, well):
+        """Subtract another WellRecord object.
+
+        A new WellRecord object is returned, having the same id as the
+        left operand
+        """
+        if not isinstance(well, WellRecord):
+            raise TypeError("Expecting a WellRecord object")
+
+        signals = {}
+
+        times = set(self._signals.keys()).union(set(well._signals.keys()))
+        for t in sorted(times):
+            signals[t] = self[t] - well[t]
+
+        neww = WellRecord(self.id, signals=signals)
+
+        return neww
+
+    def __len__(self):
+        """Return the number of time points sampled."""
+        return len(self._signals)
+
+    def __repr__(self):
+        """Return a (truncated) representation of the signals for debugging."""
+        if len(self) > 7:
+            # Shows the last time point and the first five
+            return "%s('%s, ..., %s')" % (
+                self.__class__.__name__,
+                ", ".join([str(x) for x in self.get_raw()[:5]]),
+                str(self.get_raw()[-1]),
+            )
+        else:
+            return "%s(%s)" % (
+                self.__class__.__name__,
+                ", ".join([str(x) for x in self.get_raw()]),
+            )
+
+    def __str__(self):
+        """Return a human readable summary of the record (string).
+
+        The python built-in function str works by calling the object's ___str__
+        method.  e.g.
+
+        >>> from Bio import phenotype
+        >>> plate = phenotype.read("phenotype/Plate.json", "pm-json")
+        >>> record = plate['A05']
+        >>> print(record)
+        Plate ID: PM01
+        Well ID: A05
+        Time points: 384
+        Minum signal 0.25 at time 13.00
+        Maximum signal 19.50 at time 23.00
+        WellRecord('(0.0, 14.0), (0.25, 13.0), (0.5, 15.0), (0.75, 15.0), (1.0, 16.0), ..., (95.75, 16.0)')
+
+        Note that long time spans are shown truncated.
+        """
+        lines = []
+        if self.plate and self.plate.id:
+            lines.append("Plate ID: %s" % self.plate.id)
+        if self.id:
+            lines.append("Well ID: %s" % self.id)
+        lines.append("Time points: %i" % len(self))
+        lines.append("Minum signal %.2f at time %.2f" % min(self, key=lambda x: x[1]))
+        lines.append("Maximum signal %.2f at time %.2f" % max(self, key=lambda x: x[1]))
+        lines.append(repr(self))
+        return "\n".join(lines)
+
+    def get_raw(self):
+        """Get a list of time/signal pairs."""
+        return [(t, self._signals[t]) for t in sorted(self._signals.keys())]
+
+    def get_times(self):
+        """Get a list of the recorded time points."""
+        return sorted(self._signals.keys())
+
+    def get_signals(self):
+        """Get a list of the recorded signals (ordered by collection time)."""
+        return [self._signals[t] for t in sorted(self._signals.keys())]
+
+    def fit(self, function=("gompertz", "logistic", "richards")):
+        """Fit a sigmoid function to this well and extract curve parameters.
+
+        If function is None or an empty tuple/list, then no fitting is done.
+        Only the object's ``.min``, ``.max`` and ``.average_height`` are
+        calculated.
+
+        By default the following fitting functions will be used in order:
+         - gompertz
+         - logistic
+         - richards
+
+        The first function that is successfully fitted to the signals will
+        be used to extract the curve parameters and update ``.area`` and
+        ``.model``. If no function can be fitted an exception is raised.
+
+        The function argument should be a tuple or list of any of these three
+        function names as strings.
+
+        There is no return value.
+        """
+        avail_func = ("gompertz", "logistic", "richards")
+
+        # Parameters not dependent on curve fitting
+        self.max = max(self, key=lambda x: x[1])[1]
+        self.min = min(self, key=lambda x: x[1])[1]
+
+        self.average_height = np.array(self.get_signals()).mean()
+
+        if not function:
+            self.area = None
+            self.model = None
+            return
+        for sigmoid_func in function:
+            if sigmoid_func not in avail_func:
+                raise ValueError("Fitting function %r not supported" % sigmoid_func)
+
+        # Parameters that depend on scipy curve_fit
+        from .pm_fitting import fit, get_area
+        from .pm_fitting import logistic, gompertz, richards
+
+        function_map = {
+            "logistic": logistic,
+            "gompertz": gompertz,
+            "richards": richards,
+        }
+
+        self.area = get_area(self.get_signals(), self.get_times())
+
+        self.model = None
+        for sigmoid_func in function:
+            func = function_map[sigmoid_func]
+            try:
+                (self.plateau, self.slope, self.lag, self.v, self.y0), pcov = fit(
+                    func, self.get_times(), self.get_signals()
+                )
+
+                self.model = sigmoid_func
+                return
+            except RuntimeError:
+                continue
+        raise RuntimeError("Could not fit any sigmoid function")
+
+
+def JsonIterator(handle):
+    """Iterate over PM json records as PlateRecord objects.
+
+    Arguments:
+     - handle - input file
+
+    """
+    try:
+        data = json.load(handle)
+    except ValueError:
+        raise ValueError("Could not parse JSON file")
+
+    # We can have one single plate or several
+    # we need to discriminate
+    if hasattr(data, "keys"):
+        data = [data]
+
+    for pobj in data:
+        try:
+            plateID = pobj[_csvData][_plate]
+        except TypeError:
+            raise TypeError("Malformed JSON input")
+        except KeyError:
+            raise KeyError("Could not retrieve plate id")
+
+        # Parse also non-standard plate IDs
+        if not plateID.startswith(_platesPrefix) and not plateID.startswith(
+            _platesPrefixMammalian
+        ):
+            warnings.warn(
+                "Non-standard plate ID found (%s)" % plateID, BiopythonParserWarning
+            )
+        else:
+            # Simplify the plates IDs, removing letters, as opm does
+            if plateID.startswith(_platesPrefixMammalian):
+                pID = plateID[len(_platesPrefixMammalian) :]
+            else:
+                pID = plateID[len(_platesPrefix) :]
+            while len(pID) > 0:
+                try:
+                    int(pID)
+                    break
+                except ValueError:
+                    pID = pID[:-1]
+
+            # No luck
+            if len(pID) == 0:
+                warnings.warn(
+                    "Non-standard plate ID found (%s)" % plateID, BiopythonParserWarning
+                )
+            elif int(pID) < 0:
+                warnings.warn(
+                    "Non-standard plate ID found (%s), using %s"
+                    % (plateID, _platesPrefix + abs(int(pID)))
+                )
+                plateID = _platesPrefix + abs(int(pID))
+            else:
+                if plateID.startswith(_platesPrefixMammalian):
+                    plateID = _platesPrefixMammalian + "%02d" % int(pID)
+                else:
+                    plateID = _platesPrefix + "%02d" % int(pID)
+
+        try:
+            times = pobj[_measurements][_hour]
+        except KeyError:
+            raise KeyError("Could not retrieve the time points")
+
+        plate = PlateRecord(plateID)
+
+        for k in pobj[_measurements]:
+            # Skip the time points
+            if k == _hour:
+                continue
+
+            plate[k] = WellRecord(
+                k,
+                plate=plate,
+                signals={
+                    times[i]: pobj[_measurements][k][i] for i in range(len(times))
+                },
+            )
+
+        # Remove the measurements and assign the other qualifiers
+        del pobj["measurements"]
+        plate.qualifiers = pobj
+
+        yield plate
+
+
+def CsvIterator(handle):
+    """Iterate over PM csv records as PlateRecord objects.
+
+    Arguments:
+     - handle - input file
+
+    """
+    plate = None
+    data = False
+    qualifiers = {}
+    idx = {}
+    wells = {}
+
+    tblreader = csv.reader(handle, delimiter=",", quotechar='"')
+    for line in tblreader:
+        if len(line) < 2:
+            continue
+
+        elif _datafile in line[0].strip():
+            # Do we have a previous plate?
+            if plate is not None:
+                qualifiers[_csvData][_datafile] = line[1].strip()
+                plate = PlateRecord(plate.id)
+                for k, v in wells.items():
+                    plate[k] = WellRecord(k, plate, v)
+                plate.qualifiers = qualifiers
+                yield plate
+            plate = PlateRecord(None)
+            data = False
+            qualifiers[_csvData] = {}
+            idx = {}
+            wells = {}
+
+        elif _plate in line[0].strip():
+            plateID = line[1].strip()
+
+            qualifiers[_csvData][_plate] = plateID
+
+            # Parse also non-standard plate IDs
+            if not plateID.startswith(_platesPrefix) and not plateID.startswith(
+                _platesPrefixMammalian
+            ):
+                warnings.warn(
+                    "Non-standard plate ID found (%s)" % plateID, BiopythonParserWarning
+                )
+            else:
+                # Simplify the plates IDs, removing letters, as opm does
+                if plateID.startswith(_platesPrefixMammalian):
+                    pID = plateID[len(_platesPrefixMammalian) :]
+                else:
+                    pID = plateID[len(_platesPrefix) :]
+                while len(pID) > 0:
+                    try:
+                        int(pID)
+                        break
+                    except ValueError:
+                        pID = pID[:-1]
+
+                # No luck
+                if len(pID) == 0:
+                    warnings.warn(
+                        "Non-standard plate ID found (%s)" % plateID,
+                        BiopythonParserWarning,
+                    )
+                elif int(pID) < 0:
+                    warnings.warn(
+                        "Non-standard plate ID found (%s), using %s"
+                        % (plateID, _platesPrefix + abs(int(pID)))
+                    )
+                    plateID = _platesPrefix + abs(int(pID))
+                else:
+                    if plateID.startswith(_platesPrefixMammalian):
+                        plateID = _platesPrefixMammalian + "%02d" % int(pID)
+                    else:
+                        plateID = _platesPrefix + "%02d" % int(pID)
+
+            plate.id = plateID
+
+        elif _strainType in line[0].strip():
+            if plate is None:
+                continue
+            qualifiers[_csvData][_strainType] = line[1].strip()
+
+        elif _sample in line[0].strip():
+            if plate is None:
+                continue
+            qualifiers[_csvData][_sample] = line[1].strip()
+
+        elif _strainNumber in line[0].strip():
+            if plate is None:
+                continue
+            qualifiers[_csvData][_strainNumber] = line[1].strip()
+
+        elif _strainName in line[0].strip():
+            if plate is None:
+                continue
+            qualifiers[_csvData][_strainName] = line[1].strip()
+
+        elif _other in line[0].strip():
+            if plate is None:
+                continue
+            qualifiers[_csvData][_other] = line[1].strip()
+
+        elif _file in line[0].strip():
+            if plate is None:
+                continue
+            qualifiers[_csvData][_file] = line[1].strip()
+
+        elif _position in line[0].strip():
+            if plate is None:
+                continue
+            qualifiers[_csvData][_position] = line[1].strip()
+
+        elif _setupTime in line[0].strip():
+            if plate is None:
+                continue
+            qualifiers[_csvData][_setupTime] = line[1].strip()
+
+        elif _hour in line[0].strip():
+            if plate is None:
+                continue
+            data = True
+            for i in range(1, len(line)):
+                x = line[i]
+                if x == "":
+                    continue
+                wells[x.strip()] = {}
+                idx[i] = x.strip()
+
+        elif data:
+            if plate is None:
+                continue
+
+            # Workaround for bad-formatted files
+            try:
+                float(line[0])
+            except ValueError:
+                continue
+
+            time = float(line[0])
+
+            for i in range(1, len(line)):
+                x = line[i]
+
+                try:
+                    signal = float(x)
+                except ValueError:
+                    continue
+
+                well = idx[i]
+                wells[well][time] = signal
+
+    if plate is not None and plate.id is not None:
+        plate = PlateRecord(plate.id)
+        for k, v in wells.items():
+            plate[k] = WellRecord(k, plate, v)
+        plate.qualifiers = qualifiers
+        yield plate
+
+
+def _toOPM(plate):
+    """Transform a PlateRecord object into a dictionary (PRIVATE)."""
+    d = dict(plate.qualifiers.items())
+
+    d[_csvData] = {}
+    d[_csvData][_plate] = plate.id
+    d[_measurements] = {}
+    d[_measurements][_hour] = []
+    times = set()
+    for wid, w in plate._wells.items():
+        d[_measurements][wid] = []
+        for hour in w._signals:
+            times.add(hour)
+
+    for hour in sorted(times):
+        d[_measurements][_hour].append(hour)
+        for wid, w in plate._wells.items():
+            if hour in w._signals:
+                d[_measurements][wid].append(w[hour])
+            # This shouldn't happen
+            else:
+                d[_measurements][wid].append(float("nan"))
+
+    return d
+
+
+class JsonWriter:
+    """Class to write PM Json format files."""
+
+    def __init__(self, plates):
+        """Initialize the class."""
+        self.plates = plates
+
+    def write(self, handle):
+        """Write this instance's plates to a file handle."""
+        out = []
+        for plate in self.plates:
+            try:
+                out.append(_toOPM(plate))
+            except ValueError:
+                raise ValueError("Could not export plate(s) in JSON format")
+
+        handle.write(json.dumps(out) + "\n")
+
+        return len(out)
+
+
+if __name__ == "__main__":
+    from Bio._utils import run_doctest
+
+    run_doctest(verbose=0)
diff --git a/code/lib/Bio/phenotype/pm_fitting.py b/code/lib/Bio/phenotype/pm_fitting.py
new file mode 100644
index 0000000..db8ac12
--- /dev/null
+++ b/code/lib/Bio/phenotype/pm_fitting.py
@@ -0,0 +1,146 @@
+# Copyright 2014-2016 by Marco Galardini.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Growth curves fitting and parameters extraction for phenotype data.
+
+This module provides functions to perform sigmoid functions fitting to
+Phenotype Microarray data. This module depends on scipy curve_fit function.
+If not available, a warning is raised.
+
+Functions:
+logistic           Logistic growth model.
+gompertz           Gompertz growth model.
+richards           Richards growth model.
+guess_plateau      Guess the plateau point to improve sigmoid fitting.
+guess_lag          Guess the lag point to improve sigmoid fitting.
+fit                Sigmoid functions fit.
+get_area           Calculate the area under the PM curve.
+"""
+
+import numpy as np
+
+try:
+    from scipy.optimize.minpack import curve_fit
+    from scipy.integrate import trapz
+except ImportError:
+    from Bio import MissingPythonDependencyError
+
+    raise MissingPythonDependencyError("Install scipy to extract curve parameters.")
+
+
+def logistic(x, A, u, d, v, y0):
+    """Logistic growth model.
+
+    Proposed in Zwietering et al., 1990 (PMID: 16348228)
+    """
+    y = (A / (1 + np.exp((((4 * u) / A) * (d - x)) + 2))) + y0
+    return y
+
+
+def gompertz(x, A, u, d, v, y0):
+    """Gompertz growth model.
+
+    Proposed in Zwietering et al., 1990 (PMID: 16348228)
+    """
+    y = (A * np.exp(-np.exp((((u * np.e) / A) * (d - x)) + 1))) + y0
+    return y
+
+
+def richards(x, A, u, d, v, y0):
+    """Richards growth model (equivalent to Stannard).
+
+    Proposed in Zwietering et al., 1990 (PMID: 16348228)
+    """
+    y = (
+        A
+        * pow(
+            1
+            + (
+                v
+                + (np.exp(1 + v) * np.exp((u / A) * (1 + v) * (1 + (1 / v)) * (d - x)))
+            ),
+            -(1 / v),
+        )
+    ) + y0
+    return y
+
+
+def guess_lag(x, y):
+    """Given two axes returns a guess of the lag point.
+
+    The lag point is defined as the x point where the difference in y
+    with the next point is higher then the mean differences between
+    the points plus one standard deviation. If such point is not found
+    or x and y have different lengths the function returns zero.
+    """
+    if len(x) != len(y):
+        return 0
+
+    diffs = []
+    indexes = range(len(x))
+
+    for i in indexes:
+        if i + 1 not in indexes:
+            continue
+        diffs.append(y[i + 1] - y[i])
+    diffs = np.array(diffs)
+
+    flex = x[-1]
+    for i in indexes:
+        if i + 1 not in indexes:
+            continue
+        if (y[i + 1] - y[i]) > (diffs.mean() + (diffs.std())):
+            flex = x[i]
+            break
+
+    return flex
+
+
+def guess_plateau(x, y):
+    """Given two axes returns a guess of the plateau point.
+
+    The plateau point is defined as the x point where the y point
+    is near one standard deviation of the differences between the y points to
+    the maximum y value. If such point is not found or x and y have
+    different lengths the function returns zero.
+    """
+    if len(x) != len(y):
+        return 0
+
+    diffs = []
+    indexes = range(len(y))
+
+    for i in indexes:
+        if i + 1 not in indexes:
+            continue
+        diffs.append(y[i + 1] - y[i])
+    diffs = np.array(diffs)
+
+    ymax = y[-1]
+    for i in indexes:
+        if y[i] > (ymax - diffs.std()) and y[i] < (ymax + diffs.std()):
+            ymax = y[i]
+            break
+
+    return ymax
+
+
+def fit(function, x, y):
+    """Fit the provided function to the x and y values.
+
+    The function parameters and the parameters covariance.
+    """
+    # Compute guesses for the parameters
+    # This is necessary to get significant fits
+    p0 = [guess_plateau(x, y), 4.0, guess_lag(x, y), 0.1, min(y)]
+
+    params, pcov = curve_fit(function, x, y, p0=p0)
+    return params, pcov
+
+
+def get_area(y, x):
+    """Get the area under the curve."""
+    return trapz(y=y, x=x)
diff --git a/code/lib/Building_Literature_Embedding_Model.py b/code/lib/Building_Literature_Embedding_Model.py
new file mode 100644
index 0000000..06f27d6
--- /dev/null
+++ b/code/lib/Building_Literature_Embedding_Model.py
@@ -0,0 +1,303 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Aug 29 20:18:05 2022
+
+@author: Jihye Moon
+"""
+import os 
+import numpy as np
+import math
+import pathlib
+from Moon_gene2vec import Gene2vec
+import tensorflow as tf
+from tensorflow.contrib.tensorboard.plugins import projector
+
+class building_embedding_model():
+    def __int__(self):
+        return None
+    
+    def setting(self,preprocessed_path, vocab_dir, logs_dir, gene2doc_dir, baseline_doc_dir):
+        
+        self.baseline_doc_dir=baseline_doc_dir
+        self.logs_dir=logs_dir
+        self.gene2doc_dir=gene2doc_dir
+        self.vocab_dir=vocab_dir
+        
+        self.gene2document =  os.path.join(preprocessed_path, 'gene2document.data.doc.txt')
+        self.baseline_doc =  os.path.join(preprocessed_path, 'baseline_doc.data.doc.txt')
+
+        return None
+    
+    def dumpArrayFile(self,denseList, path, name):
+        np.asarray(denseList).dump(os.path.join(path, name+'.dat'))
+
+    def creating_vocab(self, min_count = 5, min_size = 2): 
+        g2v = Gene2vec()
+        gene2doc, gene1 = g2v.data_loading(self.gene2document)
+        baseline_doc, gene2 = g2v.data_loading(self.baseline_doc)
+        
+        vocaburary = g2v.vocab_output()
+        
+        removed_voc = g2v.selecting_vocab(vocaburary, min_count, min_size=min_size)  
+        gene_dict, gene_reverse_dict = g2v.gene_dic(gene1, removed_voc)
+         
+        g2v.vocab_save('excluded_sum'+str(min_count)+'two2doc', gene_dict, self.vocab_dir)
+           
+        self.gene2doc=gene2doc
+        self.gene1=gene1
+        self.baseline_doc=baseline_doc
+        self.gene_dict=gene_dict 
+        self.gene_reverse_dict=gene_reverse_dict
+        self.g2v=g2v
+        self.vocabulary_size = len(self.gene_reverse_dict)
+        
+    def creating_training_data_for_gene2doc(self, window_size):
+        g2v=self.g2v
+        window_size=window_size
+        gene1=self.gene1 
+        gene_dict=self.gene_dict
+        buf_batch=[]; buf_labels=[] 
+        
+        countData=0; indexing=0
+        
+        for i in range(len(self.gene2doc)):
+            save_batch, save_labels = g2v.gene2doc_batch_fucntion(self.gene2doc[i], gene1[i], i, window_size) 
+            save_batch, save_labels = g2v.gene_insert(save_batch,save_labels, gene_dict[gene1[i]]) 
+            save_batch3, save_labels3 = g2v.gene_additing(self.gene2doc[i], gene_dict[gene1[i]], i, window_size)
+            save_batch=np.concatenate((save_batch,save_batch3))
+            save_labels=np.concatenate((save_labels,save_labels3)) 
+                
+            buf_batch.extend(save_batch)
+            buf_labels.extend(save_labels) 
+            if countData==1000: 
+                self.dumpArrayFile(buf_batch, self.gene2doc_dir, 'batch.'+str(indexing))
+                self.dumpArrayFile(buf_labels, self.gene2doc_dir, 'label.'+str(indexing))
+                countData=0
+                buf_batch=[]; buf_labels=[]
+                indexing+=1
+            countData+=1
+        self.dumpArrayFile(buf_batch, self.gene2doc_dir, 'batch.'+str(indexing))
+        self.dumpArrayFile(buf_labels, self.gene2doc_dir, 'label.'+str(indexing)) 
+        del self.gene2doc
+
+    def checking_gene2doc_generation(self, window_size):
+        g2v=self.g2v
+        window_size=window_size
+        gene1=self.gene1 
+        gene_reverse_dict=self.gene_reverse_dict
+        
+        for i in range(1):
+            print("== Examples: ", gene1[i])
+            save_batch, save_labels = g2v.gene2doc_batch_fucntion(self.gene2doc[i], gene1[i], i, window_size) 
+            save_batch, save_labels = g2v.gene_insert(save_batch,save_labels, self.gene_dict[gene1[i]]) 
+            save_batch3, save_labels3 = g2v.gene_additing(self.gene2doc[i], self.gene_dict[gene1[i]], i, window_size)
+            save_batch=np.concatenate((save_batch,save_batch3)) 
+            save_labels=np.concatenate((save_labels,save_labels3))  
+        print("============================== Fig. 3(a) in the published paper ")
+        for k in range(30): 
+            print(gene_reverse_dict[save_batch[k]], '->' , gene_reverse_dict[save_labels[k]])
+        print("============================== Fig. 3(b) in the published paper")
+        for n in range(30): 
+            k=len(save_batch)-1-n
+            print(gene_reverse_dict[save_batch[k]], '->' , gene_reverse_dict[save_labels[k]]) 
+        
+    def creating_training_data_for_word2doc(self, window_size):
+        g2v=self.g2v
+        buf_batch=[]
+        buf_labels=[] 
+        window_size=window_size
+        countData=0; indexing=0
+        for i in range(len(self.baseline_doc)):
+            save_batch, save_labels = g2v.gene2doc_batch_fucntion(self.baseline_doc[i], 0, i, window_size)  
+            buf_batch.extend(save_batch)
+            buf_labels.extend(save_labels) 
+            if countData==50000:
+                print(i, len(self.baseline_doc))
+                self.dumpArrayFile(buf_batch, self.baseline_doc_dir, 'batch.'+str(indexing))
+                self.dumpArrayFile(buf_labels, self.baseline_doc_dir, 'label.'+str(indexing))
+                countData=0
+                buf_batch=[]; buf_labels=[]
+                indexing+=1
+            countData+=1
+        self.dumpArrayFile(buf_batch, self.baseline_doc_dir, 'batch.'+str(indexing))
+        self.dumpArrayFile(buf_labels, self.baseline_doc_dir, 'label.'+str(indexing)) 
+    
+    def model_setting(self, dimension, num_sampled):
+        self.vocabulary_size = len(self.gene_reverse_dict)
+        self.dimension = dimension
+        self.num_sampled = num_sampled
+            
+    def sorting_data_loading(self, data): 
+        batch=[]
+        label=[]
+        full_size=int(len(data)/2)
+        for i in range(full_size):
+            batch.append('batch.'+str(i)+'.dat')
+            label.append('label.'+str(i)+'.dat')
+        return batch, label
+    
+    def logs(self, name, word):
+        f = open(name+'_logs.txt','a') 
+        f.write('{}\n'.format(word))
+        f.close()
+        
+    def starting_sorting(self, model_path):
+        import argparse
+        print('starting making data')
+        logs_dir=self.logs_dir
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            '--log_dir',
+            type=str,
+            default=model_path,
+            help='The log directory for TensorBoard summaries.')
+
+        FLAGS, unparsed = parser.parse_known_args()
+        self.FLAGS = FLAGS
+        if not os.path.exists(FLAGS.log_dir):
+            os.makedirs(FLAGS.log_dir)
+        dir_names = os.listdir(logs_dir)
+        batch_list_dir=[]; target_list_dir=[]
+        for i in range(len(dir_names)):
+            if '.txt' not in dir_names[i]: 
+                print(i, dir_names[i])
+                data_dir = os.path.join(logs_dir, dir_names[i]) 
+                result = os.listdir(data_dir)
+                
+                batch_rd, label_rd = self.sorting_data_loading(result)
+                for j in range(len(batch_rd)):
+                    if 'batch' in batch_rd[j]:
+                        batch_list_dir.append(os.path.join(data_dir, batch_rd[j]))
+                        self.logs(os.path.join(FLAGS.log_dir, 'batch_list'), os.path.join(data_dir, batch_rd[j]))
+                for j in range(len(label_rd)):
+                    if 'label' in label_rd[j]:
+                        target_list_dir.append(os.path.join(data_dir, label_rd[j]))
+                        self.logs(os.path.join(FLAGS.log_dir, 'target_list'), os.path.join(data_dir, label_rd[j]))
+            self.target_list_dir=target_list_dir
+            self.batch_list_dir=batch_list_dir
+        
+    def batch(self, X, y, batch_size, name='batch'): 
+        n_size=len(X)
+        rd_idx = np.random.permutation(n_size) 
+        n_batches = n_size // batch_size
+        for idx in np.array_split(rd_idx, n_batches):
+            X_batch, y_batch = X[idx], y[idx]
+            yield X_batch, y_batch
+             
+    def model_training(self, epoch=10, batch_size=256): 
+        all_size = len(self.batch_list_dir)
+        vocabulary_size=self.vocabulary_size
+        dimension=self.dimension
+        num_sampled=self.num_sampled
+        
+        valid_size = 16   
+        valid_window = 100  
+        valid_examples = np.random.choice(valid_window, valid_size, replace=False)
+        num_steps = epoch
+        graph = tf.Graph()
+        
+        with graph.as_default(): 
+          # Input data.
+          with tf.name_scope('inputs'):
+            train_inputs = tf.placeholder(tf.int32, shape=[None])
+            train_labels = tf.placeholder(tf.int32, shape=[None, 1])
+            valid_dataset = tf.constant(valid_examples, dtype=tf.int32)
+        
+          with tf.device('/cpu:0'):
+            nce_weights = tf.Variable(tf.truncated_normal([vocabulary_size, dimension],
+                                            stddev=1.0 / math.sqrt(dimension)), name='nce_w')
+            nce_biases = tf.Variable(tf.zeros([vocabulary_size]), name='nce_b')
+            embeddings = tf.Variable(tf.random_uniform([vocabulary_size, dimension], -1.0, 1.0), name='embed1') 
+            embed = tf.nn.embedding_lookup(embeddings, train_inputs, name='lookup')
+        
+          with tf.name_scope('loss'):
+            loss = tf.reduce_mean(
+                tf.nn.nce_loss(
+                    weights=nce_weights, 
+                    biases=nce_biases,
+                    labels=train_labels,
+                    inputs=embed,
+                    num_sampled=num_sampled,
+                    num_classes=vocabulary_size))
+            
+          tf.summary.scalar('loss', loss)
+          with tf.name_scope('optimizer'):
+            optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)
+        
+          norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
+          normalized_embeddings = embeddings / norm
+          valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)
+          similarity = tf.matmul(
+              valid_embeddings, normalized_embeddings, transpose_b=True)
+        
+          merged = tf.summary.merge_all()
+          init = tf.global_variables_initializer()
+          saver = tf.train.Saver()
+        
+        savedloss=[]
+        with tf.Session(graph=graph) as session:
+            writer = tf.summary.FileWriter(self.FLAGS.log_dir, session.graph)
+            init.run()  
+                    
+            print('Initialized') 
+            average_loss = 0;
+            counting=0; total_counting=0;
+            with open(self.FLAGS.log_dir + '/metadata.tsv', 'w', encoding='UTF-8') as f:
+                for i in range(vocabulary_size):
+                    f.write(self.gene_reverse_dict[i] + '\n')
+            for step in range(num_steps): 
+                rd_idx = np.arange(all_size)
+                np.random.shuffle(rd_idx)
+                for rn in rd_idx:  
+                    batch_dir = self.batch_list_dir[rn]
+                    label_dir = self.target_list_dir[rn] 
+                    target=np.load(batch_dir, allow_pickle=True)
+                    label=np.load(label_dir, allow_pickle=True)
+                    loading_target=target
+                    loading_label=label
+                    full_size=len(loading_label) 
+                    if step % full_size==0:
+                        full_size=len(loading_label)
+                        rd = np.arange(full_size)
+                        np.random.shuffle(rd) 
+                        loading_label=loading_label[rd]
+                        loading_target=loading_target[rd]
+                        loading_target=loading_target[rd] 
+                    for X_batch, y_batch in self.batch(X= target, y=label, batch_size = batch_size): 
+                        y_batch=y_batch.reshape(-1,1)
+                        feed_dict = {train_inputs: X_batch, train_labels: y_batch} 
+                        run_metadata = tf.RunMetadata() 
+                        _, summary, loss_val = session.run(
+                            [optimizer, merged, loss],
+                            feed_dict=feed_dict,
+                            run_metadata=run_metadata)
+                        average_loss += loss_val
+                        counting+=1; total_counting+=1;
+                        writer.add_summary(summary, step)
+                writer.add_run_metadata(run_metadata, 'step%d' % step)
+                average_loss /= counting
+                print('Average loss at step ', step, '/', num_steps, ': ', average_loss) 
+                self.logs(os.path.join(self.FLAGS.log_dir, '128d'), str(step)+' '+str(average_loss))
+                savedloss.append(average_loss)
+                average_loss = 0
+                counting=0 
+                saver.save(session, os.path.join(self.FLAGS.log_dir, 'mid_model.ckpt')) 
+                self.dumpArrayFile(self.gene_reverse_dict, self.FLAGS.log_dir, 'name')  
+                sim = similarity.eval()
+                for i in range(valid_size):
+                    valid_word = self.gene_reverse_dict[valid_examples[i]]
+                    top_k = 8  # number of nearest neighbors
+                    nearest = (-sim[i, :]).argsort()[1:top_k + 1]
+                    log_str = 'Nearest to %s:' % valid_word
+                    for k in range(top_k):
+                      close_word = self.gene_reverse_dict[nearest[k]]
+                      log_str = '%s %s,' % (log_str, close_word)
+                    print(log_str)
+            config = projector.ProjectorConfig()
+            embedding_conf = config.embeddings.add()
+            embedding_conf.tensor_name = embeddings.name
+            embedding_conf.metadata_path = os.path.join(self.FLAGS.log_dir, 'metadata.tsv') 
+            
+            saver.save(session, os.path.join(self.FLAGS.log_dir, 'model.ckpt'))
+          
+        writer.close() 
\ No newline at end of file
diff --git a/code/lib/CVD_risk_factor_search.py b/code/lib/CVD_risk_factor_search.py
new file mode 100644
index 0000000..8e0fc1f
--- /dev/null
+++ b/code/lib/CVD_risk_factor_search.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Aug  3 13:57:25 2018
+
+@author: Jihye Moon
+"""
+
+class run_intrisic_evaluation():
+    def __int__(self):
+        return None
+    def setting(self, path, gene_symb):
+        import loading_literature_embedding as emb
+    
+        emb2simi=emb.embedding_vector()  
+        words_list, index2word, syn0norm, syn1norm = emb2simi.setting(path, gene_symb)
+        self.emb2simi=emb2simi
+    def running(self, query, output_path, Top_Words):
+        self.emb2simi.similarity_display(query, output_path, Top_Words)
+    
diff --git a/code/lib/ExpCohort_Generator.py b/code/lib/ExpCohort_Generator.py
new file mode 100644
index 0000000..603acf8
--- /dev/null
+++ b/code/lib/ExpCohort_Generator.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Sep  7 14:10:26 2022
+
+@author: moon
+"""
+
+import pandas as pd 
+import pathlib
+import Literature_Data_Preprocessing as ldpl
+feature_symbol = ['oca','bca','nit','fhha', 'sbld', 'pulrate']
+feature_name = ['Other cancer','Breast cancer','Nitrates','Family history of heart attack','Systolic Blood Pressure (from Waveform Analysis), (mmHg)','Pulse Rate (from Waveform Analysis), (beats/minute)']
+label_symbol=['mi','rca','ang','ptca','cbg']
+label_name = ['Myocardial Infarction (MI)', 'Resuscitated Cardiac Arrest', 'Angina Pectoris', 'Percutaneous Transluminal', 'Coronary Angioplasty (PTCA)']
+
+
+subject_number= 200
+import random 
+X=[]
+y=[]
+for n in range(subject_number):
+    buffer=[]
+    for i in range(len(feature_name)):
+        buffer.append(random.random())
+    X.append(buffer)
+    y.append([random.randint(0, 1)])
+Xt = pd.DataFrame(X, columns=feature_symbol)
+y = pd.DataFrame(y) 
+
+
+ldp=ldpl.preprocessing('', '', '', '') 
+variables_indexing = {}
+disease_variables_indexing = {}
+
+for i in range(len(feature_name)):  
+    buffer = ldp.sentence_preprocessor(feature_name[i]) 
+    variables_indexing[feature_symbol[i]] = buffer 
+    
+for i in range(len(label_name)):  
+    buffer = ldp.sentence_preprocessor(label_name[i]) 
+    disease_variables_indexing[label_symbol[i]] = buffer 
+    
+example_path='../../data/Example/'
+pathlib.Path(example_path).mkdir(parents=True, exist_ok=True)
+Xt.to_csv(example_path+'Example_X.csv')
+y.to_csv(example_path+'Example_y.csv')
+
+pd.DataFrame(variables_indexing.values()).to_csv(example_path+'variables_preprocessed_names.csv')
+pd.DataFrame(variables_indexing.keys()).to_csv(example_path+'variables_symbol.csv')
+
+pd.DataFrame(disease_variables_indexing.values()).to_csv(example_path+'target_variables_preprocessed_names.csv')
+pd.DataFrame(disease_variables_indexing.keys()).to_csv(example_path+'target_variables_symbol.csv')
diff --git a/code/lib/Literature_Data_Collection.py b/code/lib/Literature_Data_Collection.py
new file mode 100644
index 0000000..ec7993e
--- /dev/null
+++ b/code/lib/Literature_Data_Collection.py
@@ -0,0 +1,117 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Aug 29 17:28:57 2022
+
+@author: Jihye Moon
+"""
+
+import os
+import time
+from lib.Loading_PudMed import ids_pudmed as pudmed
+
+class literature_data_collection():
+    def __init__(self, email, output_dir, document_output_dir, api_key=None):
+        self.output_dir=output_dir
+        self.document_output_dir=document_output_dir
+        self.email = email 
+        self.api_key = api_key
+
+    def text_open(self, path):
+        with open(path, 'r') as f:
+            data=f.read().strip().split('\n')
+        return data
+    
+    def data_split(self, key):
+        return key.split('#')
+    
+    def word_based_query_fit(self, year = None, user_term="heart"):
+        email = self.email
+        pud = pudmed() 
+        search_results, end_point = pud.search_list(user_term, year, email) 
+        return search_results, end_point 
+    
+    def collecting_doc_using_word_based_query(self, year = None, user_term="heart", gap = 50000, starting = 0, ixs = 0, test_end_point=0):
+        email = self.email
+        pud = pudmed() 
+        search_results, end_point = pud.search_list(user_term, year, email) 
+        batch = 10000
+        gap=gap
+        
+        if test_end_point != 0:
+            end_point = test_end_point # Test 
+            print('Checking data collection performance --- collecting until ',end_point,' documents')
+            
+        counting=round(end_point/gap) 
+         
+        starting = starting 
+        ending = starting + gap 
+
+        for ix in range(ixs, counting):
+            #if ix == counting-1:
+            #    starting = ending
+            #    ending = end_point
+            print(ix, '/', counting -1, ' | from ', starting, ' to ', ending) 
+            pud.search_full(ix, self.output_dir, search_results, starting, ending, batch)
+            starting = ending
+            ending = starting+gap 
+            time.sleep(1)
+
+            if ix == counting-1:
+                ending = end_point
+                
+    def collecting_doc_using_word_based_query2(self, batch = 10000, gap = 50000, starting = 0, ixs = 0, search_results={}, end_point=0):
+
+        pud = pudmed() 
+        
+        gap=gap 
+            
+        counting=round(end_point/gap) 
+         
+        starting = starting 
+        ending = starting + gap 
+
+        for ix in range(ixs, counting):
+            print(ix, '/', counting -1, ' | from ', starting, ' to ', ending) 
+            pud.search_full(ix, self.output_dir, search_results, starting, ending, batch)
+
+            starting = ending
+            ending = starting+gap 
+            if ix == counting-1:
+                ending = end_point
+    
+    def gene_based_query_fit(self, query_len, query_full, query_symbol):
+        self.query_len=query_len
+        self.query_symbol=query_symbol
+        self.query_full=query_full
+    
+    def collecting_doc_using_gene_based_query(self, year = None, batch_size = 10, starting = 0, query_len = 26335, end_point = 2634):
+        document_output_dir=self.document_output_dir
+        counting=starting*batch_size
+        query_len=self.query_len
+        query_symbol=self.query_symbol
+        query_full=self.query_full
+        email = self.email
+        pud = pudmed() 
+        
+        for i in range(starting, end_point+1): 
+            handle2 = open(os.path.join(document_output_dir, "FullText_symbol."+str(i)+".txt"), "w", encoding='utf-8')
+            handle_excluding2 = open(os.path.join(document_output_dir, "excluded_symbol."+str(i)+".txt"), "w", encoding='utf-8')
+            handle_meta2 = open(os.path.join(document_output_dir, "meta_symbol."+str(i)+".txt"), "w", encoding='utf-8') 
+            print('Collecting Gene2doc ',i , '/', end_point)
+            for j in range(batch_size):
+                if counting>=query_len-1:
+                    break; 
+                time.sleep(5)
+                LR2, FullText2, meta2 = pud.search_gene2doc(query_symbol[counting], email)
+         
+                if LR2!=[]:
+                    indexing2 = str(counting)+'\t'+query_symbol[counting]+'\t'+query_full[counting]+FullText2
+                    handle2.write(indexing2)
+                    handle_meta2.write(str(counting)+'\t'+query_symbol[counting]+'\t'+query_full[counting]+meta2)
+                else: 
+                    handle_excluding2.write(query_symbol[counting]+'\t'+query_full[counting]+'\n')
+                counting += 1 
+            handle_excluding2.close()
+            handle_meta2.close()
+            handle2.close() 
+ 
\ No newline at end of file
diff --git a/code/lib/Literature_Data_Preprocessing.py b/code/lib/Literature_Data_Preprocessing.py
new file mode 100644
index 0000000..0ff733f
--- /dev/null
+++ b/code/lib/Literature_Data_Preprocessing.py
@@ -0,0 +1,347 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Jun 21 00:16:25 2020
+
+@author: Jihye Moon
+
+"""
+
+import os
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.stem import PorterStemmer
+from io import StringIO
+from sklearn.feature_extraction import stop_words
+#from sklearn.feature_extraction import _stop_words as stop_words
+import re 
+nltk.download('punkt')
+nltk.download('averaged_perceptron_tagger')
+import Medline
+
+cachedStopWords = stop_words.ENGLISH_STOP_WORDS
+
+class preprocessing():
+    def __init__(self, data_dir, batch_dir, final_dir, preprocessed_dir): 
+        self.final_dir=final_dir
+        self.preprocessed_dir=preprocessed_dir
+        self.batch_dir=batch_dir
+        return None
+     
+    def Indexing(self, name, word):
+        f = open(name+'.txt','w') 
+        for i in range(len(word)):
+            f.write('{}\n'.format(word[i])) 
+        f.close()
+    
+    def batch_data_matching(self, full_path, including_list):
+        try:
+            arr_list=[]
+            dir_names = os.listdir(full_path)
+            j = 0; file_names = []
+            for dir_name in dir_names:
+                if dir_name in including_list:
+    
+                    arr = []
+                    i = 0
+                    full_dir_name = os.path.join(full_path, dir_name)
+                    if (os.path.isdir(full_dir_name)!=True):
+                        continue
+                    text_file_names = os.listdir(full_dir_name)
+                
+                    for text_file_name in text_file_names:
+                        full_text_file_name = os.path.join(full_dir_name, text_file_name)
+                        ext = os.path.splitext(full_text_file_name)[-1]
+                        if ext == '.txt': 
+                            arr.insert(i, full_text_file_name)
+                            i = i+1 
+                    
+                    file_names.append(dir_name)
+                    arr_list.insert(j, arr)
+                    j = j + 1 
+            return file_names, arr_list
+        except PermissionError:
+            pass
+    
+    def file_detection(self, data, name, point):
+        predata=[]
+        for i in range(len(data)):
+            if name in data[i]:
+                predata.append(data[i]) 
+        missing=[];sorting=[]
+        list_sorting=[]
+        for i in range(len(predata)):
+            buffer=predata[i].split('\\')
+            buffer_num = int(buffer[len(buffer)-1].split('.')[point])
+            sorting.append(buffer_num)
+            list_sorting.append([buffer_num, predata[i]])
+        sorting.sort()
+        list_sorting.sort()
+        arranged_list=[]
+        for i in range(len(predata)):
+            arranged_list.append(list_sorting[i][1])
+            if sorting[i]!=i:
+                missing.append(i) 
+                break;
+        return sorting, missing, arranged_list
+    
+    def combining_files(self, file_names, data_list, names, point):
+        arr_list={}
+        for i in range(len(file_names)):
+            #print(file_names[i]) 
+            sorting, missing, arranged_list = self.file_detection(data_list[i], names[i], point)
+            counting=0
+            extending=[]
+            if missing==[]:
+                for k in range(len(arranged_list)):
+                    with open(arranged_list[k], 'r') as f:
+                        data = f.read().strip()
+                        data = data.split('\n')
+                        if data!=['']:
+                            extending.extend(data)
+                    if counting==100:
+                        #print(k, '/', len(arranged_list))
+                        #print(data)
+                        counting=0
+                    counting+=1
+                arr_list[file_names[i]]=extending
+        return arr_list
+    
+    def combining_query2doc(self, file_names, data_list, names, point):
+        arr_list={}
+        for i in range(len(file_names)):
+            #print(file_names[i]) 
+            sorting, missing, arranged_list = self.file_detection(data_list[i], names[i], point)
+            counting=0
+            extending=[]
+            if missing==[]:
+                print("NONE MISSING")
+                for k in range(len(arranged_list)):
+                    with open(arranged_list[k], 'r') as f:
+                        data = f.read()
+                        data=data.split('\nPMID')
+                        full_data=[]
+                        for n in range(len(data)):
+                            if len(data[n])>0:
+                                full_data.append('\nPMID'+data[n])
+                        extending.extend(full_data)
+                    if counting==100:
+                        #print(k, '/', len(arranged_list))
+                        #print(data)
+                        counting=0
+                    counting+=1
+                arr_list[file_names[i]]=extending
+        return arr_list
+    
+    def Medine_mapping(self, data):  
+        LR=[]; TI=[]; AB=[]; MH=[]; RN=[]; PMID=[]; DCOM=[]
+        FullText=''; Meta=''
+        rec_file = StringIO(data)
+        medline_rec = Medline.read(rec_file)
+        if 'AB' in medline_rec:
+            if 'LR' in medline_rec:
+                LR.append(medline_rec['LR'])  
+            else:
+                LR.append('.')
+            if 'TI' in medline_rec:
+                TI.append(medline_rec['TI']) 
+            else:
+                TI.append('.')
+            if 'AB' in medline_rec:
+                AB.append(medline_rec['AB']) 
+            else:
+                AB.append('.')
+            if 'MH' in medline_rec:
+                MH.append(medline_rec['MH']) 
+            else:
+                MH.append('.')
+            if 'PMID' in medline_rec:
+                PMID.append(medline_rec['PMID']) 
+            else:
+                PMID.append('.') 
+            if 'DCOM' in medline_rec:
+                DCOM.append(medline_rec['DCOM']) 
+            else:
+                DCOM.append('.') 
+            if 'RN' in medline_rec:
+                RN.append(medline_rec['RN']) 
+            else:
+                RN.append('.') 
+                 
+        for i in range(len(AB)):
+            FullText += '#'+PMID[i]+'\t'+DCOM[i]+'\t'+LR[i]+'\t'+TI[i]+'\t'+AB[i] 
+            Meta += "\t@".join(RN[i])+'\t#'.join(MH[i])
+        FullText+='\n' 
+        Meta+='\n' 
+        return FullText, Meta
+    
+    def gene2doc_mapping(self, data_list):
+        gene2doc={}
+        total_size=len(data_list)
+        for i in range(total_size):
+            total_data=''
+            #print(i, '/', len(data_list), round(i/total_size,2)*100)
+            data = data_list[i].split('\t#')
+            gene = data[0].split('\t')[1]
+            data = data[1:len(data)]
+            if len(data)>=1:
+                for j in range(len(data)):        
+                    total_data += data[j].split('\t')[3] + ' ' + data[j].split('\t')[4] 
+            
+            if gene2doc.get(gene,-1) == -1:
+                gene2doc[gene] = total_data
+            else:
+                gene2doc[gene] += gene2doc[gene] + total_data + ' '
+        return gene2doc
+
+    def check_valid_word(self, word):
+        if word not in cachedStopWords:#is_english_word(word) and \
+            return True
+        else:
+            return False
+        
+    def stem_word(self, word):
+        ps = PorterStemmer()
+        return ps.stem(word)
+    
+    def replace_all(self, text):
+        patterns= [r'[^\w\s]']
+        for p in patterns:
+            match= re.findall(p, text)
+            for m in match:
+                if m != '-': 
+                    text = text.replace(m, ' ') 
+        return text
+    
+    def replace_num(self, text):
+        patterns= ['[0-9]+']
+        for p in patterns:
+            match= re.findall(p, text)
+            for m in match:
+                if ' '+m+' ' in text: 
+                    text = text.replace(m, ' ')
+        return text
+    
+    def replace_single_num(text):
+        text = text.replace('-', '')
+        patterns= ['[0-9]+']
+        for p in patterns:
+            match= re.findall(p, text)
+        if len(match)>0:
+            if len(text) == len(match[0]):
+                single=0
+            else:
+                single=1
+        else:
+            single=1
+            
+        return single
+       
+    def removal_unwanted_pos(self, data):
+        unwanted = ['IN', 'DT', 'PRP', 'RB', 'PRP$', 'WRB', 'MD', 'TO', 'RB', 'RBR', 'RBS', 'CC', 'EX'] 
+        unwanted = ['IN', 'DT', 'PRP', 'PRP$', 'WRB', 'MD', 'TO', 'RBR', 'RBS', 'CC', 'EX', 'RBR', 'RBS'] 
+        text=nltk.pos_tag(word_tokenize(data))
+        results = ''
+        for txt, pos in text:
+            if pos not in unwanted:
+                results+=txt+' '
+        return results
+
+    def sentence_preprocessor(self, sentence, stem=False): 
+        sentence = self.removal_unwanted_pos(sentence)
+        sentence = sentence.lower()   
+        sentence = self.replace_all(sentence) 
+        sentence = sentence.replace('.', '. ')
+        sentence = re.sub('[0-9]+', '#', sentence) 
+    
+        new_sentence = "" 
+        words = sentence.split(' ')
+        for word in words: 
+            if stem == True:
+                word = self.stem_word(word)
+            else:
+                word = word
+            if (self.check_valid_word(word)):
+                new_sentence += word + " "
+         
+        
+        new_sentence = new_sentence.replace(' - ', ' ') 
+        new_sentence = new_sentence.replace('- ', '-# ') 
+        new_sentence = new_sentence.replace(' -', ' #-') 
+        
+        new_sentence = new_sentence.replace(' -# ', ' ') 
+        new_sentence = new_sentence.replace(' #- ', ' ') 
+        new_sentence = new_sentence.replace(' -#- ', ' ') 
+        
+        new_sentence = new_sentence.replace(' # ', ' ') 
+        new_sentence = new_sentence.replace(' - ', ' ') 
+         
+        new_sentence = new_sentence + ' ' 
+        new_sentence = new_sentence.strip()
+    
+        return new_sentence
+    
+    def doc_preprocessor(self, sentence, stem=False): 
+        sentence = self.removal_unwanted_pos(sentence)
+        sentence = sentence.lower()   
+        sentence = self.replace_all(sentence) 
+        sentence = sentence.replace('.', '. ')
+        sentence = re.sub('[0-9]+', '#', sentence) 
+    
+        new_sentence = "" 
+        words = sentence.split(' ')
+        for word in words:
+            # for each word, stem it and check if it is in English dictionary and stopword or not
+            if stem == True:
+                word = self.stem_word(word)
+            else:
+                word = word
+            if (self.check_valid_word(word)):
+                new_sentence += word + " "
+         
+        new_sentence = new_sentence.replace(' - ', ' ') 
+        new_sentence = new_sentence.replace('- ', '-# ') 
+        new_sentence = new_sentence.replace(' -', ' #-') 
+        
+        new_sentence = new_sentence.replace(' -# ', ' ') 
+        new_sentence = new_sentence.replace(' #- ', ' ') 
+        new_sentence = new_sentence.replace(' -#- ', ' ') 
+        
+        new_sentence = new_sentence.replace(' # ', ' ') 
+        new_sentence = new_sentence.replace(' - ', ' ') 
+        
+        # remove uninformative words 
+        new_sentence = new_sentence + ' ' 
+        new_sentence = new_sentence.strip()
+    
+        return new_sentence
+    
+    
+    def making_doc_data(self, gene_list, name, dic):
+        preprocessed_dir=self.preprocessed_dir
+        counting=0
+        handle = open(os.path.join(preprocessed_dir, name+'.data.doc.txt'), "w")
+        if gene_list == None:
+            for i in range(len(dic)): 
+                if counting==10000:
+                    print(i, '/', len(dic))
+                    counting=0
+                buffer = dic[i].split('\t')
+                if buffer[0] != '\n':
+                    buffer = buffer[3] + buffer[4]
+                    if buffer != '':
+                        buffer = self.doc_preprocessor(buffer) 
+                        handle.write('-1' + '\t' + buffer + '\n')
+                counting+=1
+                
+        else:
+            for i in range(len(gene_list)): 
+                if counting==1000:
+                    print(i, '/', len(gene_list))
+                    counting=0
+                data = dic[gene_list[i]] 
+                buffer = self.doc_preprocessor(data)
+                if buffer != '':
+                    handle.write('#'+ gene_list[i] + '\t' + buffer + '\n')
+                counting+=1
+        handle.close()
+
diff --git a/code/lib/Loading_PudMed.py b/code/lib/Loading_PudMed.py
new file mode 100644
index 0000000..43916a5
--- /dev/null
+++ b/code/lib/Loading_PudMed.py
@@ -0,0 +1,229 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jun 10 00:25:50 2020
+
+@author: Jihye Moon
+
+"""
+import sys
+from lib.Bio import Entrez
+import lib.Bio
+from datetime import datetime
+from io import StringIO
+import time
+sys.path.append('lib')
+
+import lib.Medline
+import os
+
+date = startTime = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
+
+class ids_pudmed():
+    def __init__(self, snp_ids=[]):
+        self.snp_ids=snp_ids
+        self.uids=[]
+        self.gene_names=[]
+        self.names=[]
+        self.records=[]
+        self.gene_full_names=[]
+        self.saved_snp_id=[]
+        
+    def search_ids(self, search_email):
+        removal_index=[]
+        Entrez.email = search_email
+        records=[]
+        for snp_id in self.snp_ids:
+            record = Entrez.read(Entrez.elink(dbfrom="snp", 
+                                  id=snp_id.replace('rs',''), 
+                                  db="gene")) 
+            if record[0]['LinkSetDb']==[]:
+                removal_index.append(snp_id)
+                print("index is removed: ", snp_id)
+                
+            else:
+                results = record[0]['LinkSetDb'][0]['Link']
+                multi_gene=[]
+                multi_full_name=[]
+                multi_uid=[]
+                #records=[]
+                for result in results:
+                    uid = result['Id']
+                    handle = Entrez.esummary(db="gene", id=uid)
+                    uid_record = Entrez.read(handle)
+                    
+                    records.append(uid_record)
+                    handle.close()
+                    uid_summary = uid_record["DocumentSummarySet"]['DocumentSummary'][0]
+                    gene_name = uid_summary['Name']
+                    gene_full_name = uid_summary['Description']
+                    if len(results)>1:
+                        multi_gene.append(gene_name)
+                        multi_full_name.append(gene_full_name)
+                        multi_uid.append(uid)
+                        
+                        #records.append(uid_record)
+                    else:
+                        multi_gene = gene_name
+                        multi_full_name = gene_full_name
+                        multi_uid = uid
+                        #records = uid_record
+            
+                #print(results)
+            
+                if len(results)>1:
+                    multi_uid= "#".join(multi_uid)
+                    multi_gene= "#".join(multi_gene) 
+                    multi_full_name= "#".join(multi_full_name) 
+                    #records= " ".join(records) 
+                
+                #print(count, "/",len(self.snp_ids)," : ", snp_id, multi_uid, multi_gene)
+                self.uids.append(multi_uid)
+                self.gene_names.append(multi_gene)
+                self.gene_full_names.append(multi_full_name)
+                self.saved_snp_id.append(snp_id)
+                #self.records.append(records) 
+        return removal_index, self.records, self.uids, self.gene_names, self.gene_full_names
+        #return records
+    def search_id2summary(self, uids, search_email): 
+        Entrez.email = search_email
+        records=''
+        for uid in uids: 
+            summary='#'
+            handle = Entrez.esummary(db="gene", id=uid)
+            #uid_record = Entrez.read(handle) 
+            uid_record = Entrez.read(handle,validate=False)
+            #records.append(uid_record)
+            handle.close()
+            #print( uid_record["DocumentSummarySet"]['DocumentSummary'])
+            if uid_record["DocumentSummarySet"]['DocumentSummary']==[]:    
+                handle = Entrez.esummary(db="gene", id=uid)
+                uid_record = Entrez.read(handle) 
+                handle.close()
+                uid_summary = uid_record["DocumentSummarySet"]['DocumentSummary'][0]
+            else:
+                uid_summary = uid_record["DocumentSummarySet"]['DocumentSummary'][0]
+            gene_name = uid_summary['Name']
+            gene_full_name = uid_summary['Description']
+            if 'Summary' in uid_summary:
+                summary = uid_summary['Summary']
+                if summary == '':
+                    summary = '.'
+            sentence = uid + '\t' + gene_name + '\t' + gene_full_name + '\t' + summary
+            records += sentence + '\n'
+        return records
+
+    def search_gene2doc(self, query, email):
+        LR=[]; TI=[]; AB=[]; MH=[]; RN=[]; PMID=[]; DCOM=[]
+        rec_handler = self.search_medline(query, email)
+
+        FullText=''; Meta=''
+        for rec_id in rec_handler['IdList']:
+            rec = self.fetch_rec(rec_id, rec_handler)
+            rec_file = StringIO(rec)
+            medline_rec = Medline.read(rec_file)  
+            if medline_rec != []:
+                if 'LR' in medline_rec:
+                    LR.append(medline_rec['LR'])  
+                else:
+                    LR.append('.')
+                if 'TI' in medline_rec:
+                    TI.append(medline_rec['TI']) 
+                else:
+                    TI.append('.')
+                if 'AB' in medline_rec:
+                    AB.append(medline_rec['AB']) 
+                else:
+                    AB.append('.')
+                if 'MH' in medline_rec:
+                    MH.append(medline_rec['MH']) 
+                else:
+                    MH.append('.')
+                if 'PMID' in medline_rec:
+                    PMID.append(medline_rec['PMID']) 
+                else:
+                    PMID.append('.') 
+                if 'DCOM' in medline_rec:
+                    DCOM.append(medline_rec['DCOM']) 
+                else:
+                    DCOM.append('.') 
+                if 'RN' in medline_rec:
+                    RN.append(medline_rec['RN']) 
+                else:
+                    RN.append('.') 
+        for i in range(len(AB)):
+            FullText += '\t#'+PMID[i]+'\t'+DCOM[i]+'\t'+LR[i]+'\t'+TI[i]+'\t'+AB[i] 
+            Meta += "\t@".join(RN[i])+'\t#'.join(MH[i])
+        FullText+='\n' 
+        Meta+='\n' 
+        return AB, FullText, Meta
+     
+    def search_medline(self, query, email):
+        Entrez.email = email
+        search = Entrez.esearch(db='pubmed', term=query, usehistory='y')
+        
+        handle = Entrez.read(search)
+        try:
+            return handle
+        except Exception as e:
+            raise IOError(str(e))
+        finally:
+            search.close() 
+
+    def search_list(self, query, year, email): 
+        self.user_term = query
+        self.email = email
+        self.year=year
+        self.user_db="pubmed"
+        
+        Entrez.email = email
+        if year==None:
+            search_results = Entrez.read(
+                Entrez.esearch(
+                    db=self.user_db, term=self.user_term, datetype="pdat", usehistory="y"
+                    )
+                )
+            self.name = 'full'
+        else:
+            user_reldate = 365*year
+            search_results = Entrez.read(
+                Entrez.esearch(
+                    db=self.user_db, term=self.user_term, reldate=user_reldate, datetype="pdat", usehistory="y"
+                    #db=self.user_db, term=user_term, datetype="pdat", usehistory="y"
+                    )
+                )
+            self.name = str(year)
+
+        count = int(search_results["Count"]) 
+        return search_results, count
+    
+    def search_full(self, ix, data_dir, search_results, starting, count, batch): 
+        batch_size = batch
+        out_handle = open(os.path.join(data_dir, self.user_db+'.'+self.user_term+"."+str(ix)+"."+self.name+".txt"), "w", encoding='utf-8') 
+        for start in range(starting, count, batch_size):
+            end = min(count, start + batch_size) 
+            if end == count:
+                batch = end - start 
+            print("Going to download records from %i to %i" % (start + 1, end))
+            fetch_handle = Entrez.efetch(
+                db="pubmed",
+                rettype="medline",
+                retmode="text",
+                retstart=start,
+                retmax=batch_size,
+                webenv=search_results["WebEnv"],
+                query_key=search_results["QueryKey"],
+                )
+            data = fetch_handle.read()
+            fetch_handle.close()
+            out_handle.write(data)
+            time.sleep(2)  # Delay between each batch fetch to respect the API rate limit
+        out_handle.close()
+        
+    def fetch_rec(self, rec_id, entrez_handle):
+        fetch_handle = Entrez.efetch(db='pubmed', id=rec_id,
+                                 rettype='Medline', retmode='text',
+                                 webenv=entrez_handle['WebEnv'],
+                                 query_key=entrez_handle['QueryKey'])
+        rec = fetch_handle.read()
+        return rec
+
diff --git a/code/lib/ML_models.py b/code/lib/ML_models.py
new file mode 100644
index 0000000..9bb602d
--- /dev/null
+++ b/code/lib/ML_models.py
@@ -0,0 +1,540 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Aug 30 22:02:06 2022
+
+@author: Jihye Moon
+"""
+import numpy as np
+import pathlib
+import pandas as pd
+import os
+
+import tensorflow as tf  
+
+from sklearn.linear_model import LogisticRegression 
+from sklearn.svm import LinearSVC
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier 
+from sklearn.model_selection import GridSearchCV
+from sklearn.preprocessing import StandardScaler    
+
+from sklearn.feature_selection import SelectKBest
+from sklearn.feature_selection import f_regression
+from sklearn.ensemble import ExtraTreesClassifier
+from skfeature.function.similarity_based import fisher_score
+
+from imblearn.over_sampling import ADASYN, SMOTE
+
+class dimension_reducers():
+    def __int__(self):
+        return None
+    
+    def PCA(self, X_train, X_test, X_valid, dim):
+        from sklearn.decomposition import PCA 
+        scaler =StandardScaler()
+        pca = PCA(n_components=dim)
+        C1 = pca.fit_transform(X_train)  
+        C2 = pca.fit_transform(X_test)
+        C3 = pca.fit_transform(X_valid)
+        
+        scaler.fit(C1)
+        C1 = scaler.transform(C1)
+        C2 = scaler.transform(C2)
+        C3 = scaler.transform(C3) 
+        return C1, C2, C3
+    
+    def UMAP(self, X_train, X_test, X_valid, dim): 
+        import umap 
+        scaler =StandardScaler()
+        reducer = umap.UMAP(n_components=dim) 
+        reducer.fit(X_train)
+        B1 = reducer.transform(X_train)  
+        reducer.fit(X_test)
+        B2 = reducer.transform(X_test)
+        reducer.fit(X_valid)
+        B3 = reducer.transform(X_valid)
+        
+        scaler.fit(B1)
+        B1 = scaler.transform(B1)
+        B2 = scaler.transform(B2)
+        B3 = scaler.transform(B3) 
+        return B1, B2, B3
+    
+    def Our_DR(self, reduced_emb0, X_train, X_test, X_valid, dim):
+        scaler =StandardScaler()
+        A2=np.matmul(X_test, reduced_emb0) 
+        A1=np.matmul(X_train, reduced_emb0)  
+        A3=np.matmul(X_valid, reduced_emb0) 
+        
+        scaler.fit(A1)
+        A1 = scaler.transform(A1)
+        A2 = scaler.transform(A2)
+        A3 = scaler.transform(A3) 
+        return A1, A2, A3
+    
+class feature_selectors():
+    def __int__(self):
+        return None
+    
+    def dataTump(self, result_dir, word, name):
+        f = open(result_dir+'/'+name+'logs.txt','a') 
+        f.write('{}\t'.format(word))
+        f.write('\n') 
+        
+    def H2FS_fit(self, X_train, y_train, feature_size):
+        fnn = round(feature_size*0.5)
+        wg=self.HFS(X_train,y_train, feature_size)
+        hf_score=list(wg.values())
+        hf_idx = np.argsort(hf_score).tolist()
+        hf_idx = hf_idx[::-1][0:fnn]
+        self.hf_idx=hf_idx
+        
+    def H2FS_transform(self, X):
+        new_X = X[:,self.hf_idx]
+        #X_test3 = X_test[:,hf_idx]
+        #X_valid3 = X_valid[:,hf_idx]
+        return new_X
+    
+    def HFS(self, X_train, y_train, feature_size):
+        all_feature=X_train.shape[1]
+        weights = {}
+        for i in range(all_feature):
+            weights[i]=0
+        fis_idx, f1_idx, et_idx = self.HFS_FS(X_train, y_train)
+        
+        cases = [fis_idx, f1_idx, et_idx]
+        fns=[round(feature_size*0.3), round(feature_size*0.4),round(feature_size*0.5)] # 30%, 40%, and 50% of all features. Refer original H2FS paper 
+        count=0
+        for case in cases: 
+            for fn in fns: 
+                selected_features=case[0:fn] 
+                acc1, acc2, acc3, acc4, acc5 = self.HFS_CS(X_train[:,selected_features], y_train)
+                acc=[acc1, acc2, acc3, acc4, acc5] 
+                for sf in selected_features:
+                    weights[sf]=sum(acc)
+            count+=1
+        return weights
+    
+    def HFS_CS(self, X_train, y_train):
+        from sklearn.naive_bayes import GaussianNB
+        from sklearn.neighbors import KNeighborsClassifier
+        from sklearn.naive_bayes import BernoulliNB
+        clf = GaussianNB()
+        clf.fit(X_train, y_train)
+        
+        acc1= clf.score(X_train, y_train)
+        neigh = KNeighborsClassifier()
+        neigh.fit(X_train, y_train)
+        
+        acc2= neigh.score(X_train, y_train)
+        clf = BernoulliNB() 
+        clf.fit(X_train, y_train)
+        
+        acc3= clf.score(X_train, y_train)
+        clf = DecisionTreeClassifier()
+        clf.fit(X_train, y_train)
+        
+        acc4= clf.score(X_train, y_train)
+        clf = RandomForestClassifier()
+        clf.fit(X_train, y_train)
+        acc5= clf.score(X_train, y_train)
+        return acc1, acc2, acc3, acc4, acc5
+    
+    def HFS_FS(self, X_train, y_train, fn=282): 
+        fis_idx = fisher_score.fisher_score(X_train, y_train, mode='rank') #returns rank directly instead of fisher score. so no need for feature_ranking
+        fis_idx=fis_idx[0:fn]
+        
+        f1_clf=SelectKBest(f_regression, k=fn).fit(X_train,y_train)
+        f1_score=f1_clf.scores_ 
+        f1_idx = np.argsort(f1_score).tolist()
+        f1_idx = f1_idx[::-1][0:fn]
+        
+        rnd_clf = ExtraTreesClassifier()
+        rnd_clf.fit(X_train, y_train) 
+        et_score=rnd_clf.feature_importances_ 
+        et_idx = np.argsort(et_score).tolist()
+        et_idx = f1_idx[::-1][0:fn]
+        return fis_idx, f1_idx, et_idx
+      
+    def Our_FS(self, emb2simi, name, embedding_list, variables_indexing, disease_variables_indexing, additional_dictionary, embedding, target_embedding_list, index2target, index2variables, target_embedding, feature_size, result_dir): 
+        gene_name = '../gene_name_info/query_full_name'; gene_symb='../gene_name_info/query_symbol' 
+        _, embed_name = emb2simi.target2variable(" ".join(list(disease_variables_indexing.keys())), target_embedding, target_embedding_list, embedding, embedding_list, index2variables, variables_indexing, feature_size)
+        df = pd.DataFrame(embed_name)
+        df.to_csv(os.path.join(result_dir, name+'.csv'), index=False)  
+        print('Selected features by our FS was saved in' , result_dir)
+
+        return embed_name
+    def RF(self, ix, X_train, y_train, X_test, y_test, names, result_dir):
+        from sklearn.ensemble import RandomForestClassifier
+        from sklearn.model_selection import GridSearchCV 
+        rnd_grid = [
+            {'n_estimators': [128, 256, 384], 'max_features': [128]}, 
+            ]
+        rnd_clf = RandomForestClassifier()
+        grid_search3 = GridSearchCV(rnd_clf, rnd_grid, cv=None, scoring='accuracy', return_train_score=True)
+        grid_search3.fit(X_train, y_train)
+        best_param=grid_search3.best_params_
+      
+        rnd_clf = RandomForestClassifier(**best_param)
+        rnd_clf.fit(X_train, y_train)
+        values=rnd_clf.feature_importances_ 
+        indices = np.argsort(values).tolist()
+        indices = indices[::-1]
+        for i in range(len(indices)):
+            rlt = str(names[indices[i]])+' '+str(values[indices[i]])+' '+str(indices[i])
+            self.dataTump(result_dir, rlt,ix+' RF') 
+        print('Selected features by RF was saved in' , result_dir)
+        return names, values, indices
+
+    def DT(self, ix, X_train, y_train, X_test, y_test, names, result_dir):
+        from sklearn.tree import DecisionTreeClassifier
+        from sklearn.model_selection import GridSearchCV 
+        rnd_grid = [
+            {'max_features': [128], 'max_depth':[3, 5], 'max_leaf_nodes':[3, 5]}, 
+            ] 
+        rnd_clf = DecisionTreeClassifier()
+        grid_search3 = GridSearchCV(rnd_clf, rnd_grid, cv=None, scoring='accuracy', return_train_score=True)
+        grid_search3.fit(X_train, y_train)
+        best_param=grid_search3.best_params_
+    
+        rnd_clf = DecisionTreeClassifier(**best_param)
+        rnd_clf.fit(X_train, y_train)
+        values=rnd_clf.feature_importances_ 
+        indices = np.argsort(values).tolist()
+        indices = indices[::-1]
+        for i in range(len(indices)):
+            rlt = str(names[indices[i]])+' '+str(values[indices[i]])+' '+str(indices[i]) 
+            self.dataTump(result_dir, rlt,ix+' DT')
+        print('Selected features by DT was saved in' , result_dir)
+        return names, values, indices
+
+class predictors():
+    def __init__(self):
+        return None
+    
+    def reset_graph(self, seed=42):
+      tf.reset_default_graph()
+      tf.set_random_seed(seed)
+      np.random.seed(seed)
+      
+    def batch(self, X, y, batch_size, name='batch'):  
+        n_size=len(X)
+        rd_idx = np.random.permutation(n_size)  
+        n_batches = n_size // batch_size
+        for idx in np.array_split(rd_idx, n_batches):
+            X_batch, y_batch = X[idx], y[idx]
+            yield X_batch, y_batch
+            
+    def softmax(self, sx, name='softmax'):  
+        sfxmax=[]
+        for i in range(len(sx)):
+            sfxmax.append((np.exp(sx[i])/np.sum(np.exp(sx),axis=1)))
+        return sfxmax 
+
+    def CNN_train(self, X_train, _y_train, X_test, _y_test, X_valid, _y_valid, n_inputs_label=2):   
+        X_train = X_train.reshape(-1, X_train.shape[1], 1) 
+        X_test = X_test.reshape(-1, X_test.shape[1], 1)
+        X_valid = X_valid.reshape(-1, X_valid.shape[1], 1)
+            
+        _y_train = np.squeeze(_y_train)
+        _y_test = np.squeeze(_y_test)  
+        _y_valid = np.squeeze(_y_valid) 
+             
+        n_outputs = 2
+    
+        print("Class: ",n_outputs)
+    
+        learning_rate = 0.001 
+    
+        self.reset_graph()
+    
+        channels = 1
+        n_inputs = n_inputs_label
+        print(n_inputs)
+    
+        conv1_fmaps = 16  
+        conv1_ksize = [3]
+        conv1_stride = [2] 
+    
+        conv_pad = "SAME"  
+        n_fc1 = 64  
+        n_outputs = 2  
+             
+        folder_path="../../results/CNN_model"
+        pathlib.Path(folder_path).mkdir(parents=True, exist_ok=True)  
+    
+        graph = tf.Graph()
+    
+        with graph.as_default():
+            
+            with tf.name_scope("inputs"):
+                input_X = tf.placeholder(tf.float32, shape=[None, n_inputs, channels], name="X") 
+                input_y = tf.placeholder(tf.int32, shape=[None], name="y")
+                keep_prob = tf.placeholder(tf.float32) 
+    
+            with tf.name_scope("conv"):  
+                conv1 = tf.layers.conv1d(input_X, filters=conv1_fmaps, kernel_size=conv1_ksize,
+                             strides=conv1_stride, padding=conv_pad,
+                             activation=tf.nn.elu, name="conv1") 
+                pool1 = tf.layers.max_pooling1d(conv1, pool_size=2, strides=1, padding='SAME')
+                drop_out1 = tf.nn.dropout(pool1, keep_prob)
+                conv2 = tf.layers.conv1d(drop_out1, filters=conv1_fmaps, kernel_size=conv1_ksize,
+                             strides=conv1_stride, padding=conv_pad,
+                             activation=tf.nn.elu, name="conv2")
+                pool2 = tf.layers.max_pooling1d(conv2, pool_size=2, strides=1, padding='SAME')
+     
+            with tf.name_scope("conv2"): 
+                [a,b,c] = pool2.shape
+                pool8_flat = tf.reshape(conv2, shape=[-1, int(b) * int(c)])
+                drop_out9 = tf.nn.dropout(pool8_flat, keep_prob)
+    
+            with tf.name_scope("fc1"):
+                fc1 = tf.layers.dense(drop_out9, n_fc1, activation=tf.nn.relu, name="fc1") 
+    
+            with tf.name_scope("output"):
+                logits = tf.layers.dense(fc1, n_outputs, name="output") 
+                outputs=logits
+                
+            with tf.name_scope("train"): 
+                xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=outputs, labels=input_y)
+                loss = tf.reduce_mean(xentropy) 
+                optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 
+                training_op = optimizer.minimize(loss)
+                tf.summary.scalar('loss', loss)
+                merged = tf.summary.merge_all()
+        
+            with tf.name_scope("eval"):
+                correct = tf.nn.in_top_k(outputs, input_y, 1) 
+                accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
+    
+            with tf.name_scope("init_and_save"):
+                init = tf.global_variables_initializer()
+                saver = tf.train.Saver()
+    
+        n_epochs = 100
+        batch_size = 128
+        
+        saved_acc=[]
+        valid=0;test=0;epoch_count=0;valid_Z=0;
+        
+        with tf.Session(graph=graph) as sess:
+            writer = tf.summary.FileWriter(folder_path+'/TB', sess.graph)
+            init.run()
+            for epoch in range(n_epochs):        
+                run_metadata = tf.RunMetadata()  
+                for X_batch, y_batch in self.shuffle_batch(X_train, _y_train, batch_size):  
+                    _, summary, loss_val = sess.run([training_op, merged, loss], feed_dict={input_X: X_batch, input_y: y_batch, keep_prob:0.7},run_metadata=run_metadata)
+                    writer.add_summary(summary, epoch)  
+                acc_batch = accuracy.eval(feed_dict={input_X: X_batch, input_y: y_batch, keep_prob:1.0})
+                X_test=X_test.reshape(-1, n_inputs_label, 1)
+                X_valid=X_valid.reshape(-1, n_inputs_label, 1)
+                acc_test = accuracy.eval(feed_dict={input_X: X_test, input_y: _y_test, keep_prob:1.0}) 
+                acc_valid = accuracy.eval(feed_dict={input_X: X_valid, input_y: _y_valid, keep_prob:1.0})  
+                
+                if acc_valid>valid:
+                    valid=acc_valid
+                    test=acc_test 
+                    Z=logits.eval(feed_dict={input_X:X_test, keep_prob:1.0})
+                    prob=Z
+                    y_pred=np.argmax(Z, axis=1) 
+                    test_prediction=y_pred
+                    valid_Z=logits.eval(feed_dict={input_X:X_valid, keep_prob:1.0})
+                    valid_y_pred=np.argmax(valid_Z, axis=1) 
+                    valid_prediction=valid_y_pred
+                saved_acc.append([acc_batch, acc_test])
+            
+            acc_test = accuracy.eval(feed_dict={input_X: X_test, input_y: _y_test, keep_prob:1.0})
+            print("best valid :", valid, " test sets:", test)
+    
+            save_path = saver.save(sess, folder_path+"/CNN_model.ckpt")
+            print("this model is saved to ",save_path) 
+        writer.close()
+        return valid_prediction, test_prediction, prob[:,1]
+    
+    def DNN_train2(self, X_train, y_train, X_test, y_test, X_valid, y_valid, n_inputs_label): 
+        self.reset_graph()
+        n_inputs = n_inputs_label
+        n_layers = 10
+        n_hidden1 = 100
+        n_outputs = 2 
+        
+        learning_rate = 0.001 
+        
+        n_epochs = 1000 
+        batch_size = 128
+        
+        saved_acc=[] 
+        
+        folder_path="DNN_model"
+        pathlib.Path(folder_path).mkdir(parents=True, exist_ok=True)
+    
+        X = tf.placeholder(tf.float32, [None, n_inputs])
+        y = tf.placeholder(tf.int64, [None])
+        keep_prob = tf.placeholder(tf.float32)
+        training = tf.placeholder_with_default(False, shape=(), name='training')
+    
+        with tf.variable_scope("dnn"): 
+            for i in range(n_layers):
+                layer_name="hidden"+str(i)
+                if i==0:
+                    hidden = tf.layers.dense(X, n_hidden1, activation=tf.nn.elu, name=layer_name)
+                else:
+                    hidden = tf.nn.dropout(hidden, keep_prob)
+                    hidden = tf.layers.dense(X, n_hidden1, activation=tf.nn.elu, name=layer_name)
+                
+            logits = tf.layers.dense(hidden, n_outputs, name="outputs")
+        
+        with tf.variable_scope("loss"): 
+            crossentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
+            loss = tf.reduce_mean(crossentropy, name="loss")
+    
+        with tf.name_scope("train"): 
+            optimizer = tf.train.GradientDescentOptimizer(learning_rate)
+            training_op = optimizer.minimize(loss)
+    
+        with tf.name_scope("eval"): 
+            correct = tf.nn.in_top_k(logits, y, 1)
+            accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
+    
+        init = tf.global_variables_initializer() 
+        saver = tf.train.Saver() 
+    
+        epoch_count=0; valid=0; test=0; prob=0
+        with tf.Session() as sess:
+            init.run()
+            for epoch in range(n_epochs):
+                epoch_count=epoch_count+1
+                for X_batch, y_batch in self.batch(X_train, y_train, batch_size):
+                    sess.run(training_op, feed_dict={X: X_batch, y:y_batch, keep_prob:1.0, training:True}) 
+                acc_batch = accuracy.eval(feed_dict={X:X_batch, y:y_batch, keep_prob:1.0}) 
+                acc_test = accuracy.eval(feed_dict={X: X_test, y:y_test, keep_prob:1.0}) 
+                acc_valid = accuracy.eval(feed_dict={X: X_valid, y:y_valid, keep_prob:1.0}) 
+
+                if acc_valid>=valid:
+                    valid=acc_valid
+                    test=acc_test 
+                    Z=logits.eval(feed_dict={X:X_test, keep_prob:1.0})
+                    y_pred=np.argmax(Z, axis=1) 
+                    prob=Z
+                    test_prediction=y_pred
+                    valid_Z=logits.eval(feed_dict={X:X_valid, keep_prob:1.0})
+                    valid_y_pred=np.argmax(valid_Z, axis=1) 
+                    valid_prediction=valid_y_pred
+                saved_acc.append([acc_batch, acc_test])
+            save_path = saver.save(sess, folder_path+"/test.ckpt") 
+            print("DNN model is saved to :", save_path)
+            print("The best valid ", valid," test", test)
+            
+        return valid_prediction, test_prediction, prob[:,1]
+    
+    def multi_models_running(self, _X, _y, X_test, y_test):
+      total_prediction=[]; total_proba=[]
+      print("=================== LinearSVC")
+      grid = [
+            {'C': [0.01, 0.1, 1.0]}, 
+            ] 
+      clf = LinearSVC() 
+      grid_search = GridSearchCV(clf, grid, cv=None, scoring='accuracy', return_train_score=True)
+      grid_search.fit(_X, _y)
+      best_param=grid_search.best_params_
+      clf = LinearSVC(**best_param) 
+      clf.fit(_X, _y)
+      y_preds1=clf.predict(X_test) 
+      total_prediction.append(y_preds1) 
+      y_score2 = clf.decision_function(X_test)
+      total_proba.append(y_score2)
+    
+      print("=================== DT") 
+      grid = [
+        {'max_features': [128], 'max_depth':[3, 5], 'max_leaf_nodes':[3, 5]}, 
+      ]
+      clf = DecisionTreeClassifier() 
+      grid_search = GridSearchCV(clf, grid, cv=None, scoring='accuracy', return_train_score=True)
+      grid_search.fit(_X, _y)
+      best_param=grid_search.best_params_
+      clf = DecisionTreeClassifier(**best_param)   
+      clf.fit(_X, _y)
+      y_preds3=clf.predict(X_test)
+      total_prediction.append(y_preds3)
+     
+      if len(list(set(y_test)))>2:
+          y_score4 = clf.predict_proba(X_test)
+      else:
+          y_score4 = clf.predict_proba(X_test)[:,1] 
+    
+      total_proba.append(y_score4)
+      
+      print("=================== RF")
+      grid = [
+        {'n_estimators': [128, 256, 384], 'max_features': [128]}, 
+      ]
+      clf = RandomForestClassifier() 
+      grid_search = GridSearchCV(clf, grid, cv=None, scoring='accuracy', return_train_score=True)
+      grid_search.fit(_X, _y)
+      best_param=grid_search.best_params_
+      clf = RandomForestClassifier(**best_param)
+      clf.fit(_X, _y)
+      y_preds4=clf.predict(X_test)
+      total_prediction.append(y_preds4)
+    
+      if len(list(set(y_test)))>2:
+          y_score4 = clf.predict_proba(X_test)
+      else:
+          y_score4 = clf.predict_proba(X_test)[:,1] 
+    
+      total_proba.append(y_score4)
+     
+      print("=================== LR")
+      grid = [
+            {'C': [0.01, 0.1, 1.0]}, 
+            ] 
+      clf = LogisticRegression() 
+      grid_search = GridSearchCV(clf, grid, cv=None, scoring='accuracy', return_train_score=True)
+      grid_search.fit(_X, _y)
+      best_param=grid_search.best_params_
+      clf = LogisticRegression(**best_param)
+      clf.fit(_X, _y)
+      y_preds5=clf.predict(X_test) 
+       
+      if len(list(set(y_test)))>2:
+          y_score5 = clf.predict_proba(X_test)
+      else:
+          y_score5 = clf.predict_proba(X_test)[:,1]
+    
+      total_prediction.append(y_preds5)
+      total_proba.append(y_score5)
+      
+      return total_prediction, total_proba
+    
+    def shuffle_batch(self, X, y, batch_size):
+      rnd_idx = np.random.permutation(len(X))
+      n_batches = len(X) // batch_size  
+      for batch_idx in np.array_split(rnd_idx, n_batches):
+        X_batch, y_batch = X[batch_idx], y[batch_idx] 
+        yield X_batch, y_batch
+         
+    def dumpArrayFile(self, denseList, fileName):
+        np.asarray(denseList).dump(fileName + '.dat')
+    
+    def run_save(self, X_train, y_train, X_test, y_test, X_valid, y_valid, name, sampling, dimension, result_dir):
+        if sampling=='SMOTE':
+            oversample = SMOTE()
+        else:
+            oversample = ADASYN()
+            
+        Re_X_train, Re_y_train = oversample.fit_resample(X_train, y_train)
+        _, prediction, prob = self.DNN_train2(Re_X_train, Re_y_train, X_test, y_test, X_valid, y_valid, dimension)
+        total_prediction, total_prob = self.multi_models_running(Re_X_train, Re_y_train, X_test, y_test) 
+        total_prediction.extend([prediction])    
+        total_prob.extend([prob])
+        _, prediction, prob = self.CNN_train(Re_X_train, Re_y_train, X_test, y_test, X_valid, y_valid, dimension)
+        total_prediction.extend([prediction])
+        total_prob.extend([prob])
+        self.dumpArrayFile(total_prediction,result_dir+'/'+name) 
+        self.dumpArrayFile(total_prob,result_dir+'/'+'prob.'+name) 
+        return total_prediction, total_prob
+    
+    def save_label(self, y_test, name, result_dir):
+        self.dumpArrayFile(y_test,result_dir+'/'+name) 
+    
+
diff --git a/code/lib/Medline/__init__.py b/code/lib/Medline/__init__.py
new file mode 100644
index 0000000..572459c
--- /dev/null
+++ b/code/lib/Medline/__init__.py
@@ -0,0 +1,222 @@
+# Copyright 1999 by Jeffrey Chang.  All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+
+"""Code to work with Medline from the NCBI.
+
+Classes:
+ - Record           A dictionary holding Medline data.
+
+Functions:
+ - read             Reads one Medline record
+ - parse            Allows you to iterate over a bunch of Medline records
+
+"""
+
+
+class Record(dict):
+    """A dictionary holding information from a Medline record.
+
+    All data are stored under the mnemonic appearing in the Medline
+    file. These mnemonics have the following interpretations:
+
+    ========= ==============================
+    Mnemonic  Description
+    --------- ------------------------------
+    AB        Abstract
+    CI        Copyright Information
+    AD        Affiliation
+    IRAD      Investigator Affiliation
+    AID       Article Identifier
+    AU        Author
+    FAU       Full Author
+    CN        Corporate Author
+    DCOM      Date Completed
+    DA        Date Created
+    LR        Date Last Revised
+    DEP       Date of Electronic Publication
+    DP        Date of Publication
+    EDAT      Entrez Date
+    GS        Gene Symbol
+    GN        General Note
+    GR        Grant Number
+    IR        Investigator Name
+    FIR       Full Investigator Name
+    IS        ISSN
+    IP        Issue
+    TA        Journal Title Abbreviation
+    JT        Journal Title
+    LA        Language
+    LID       Location Identifier
+    MID       Manuscript Identifier
+    MHDA      MeSH Date
+    MH        MeSH Terms
+    JID       NLM Unique ID
+    RF        Number of References
+    OAB       Other Abstract
+    OCI       Other Copyright Information
+    OID       Other ID
+    OT        Other Term
+    OTO       Other Term Owner
+    OWN       Owner
+    PG        Pagination
+    PS        Personal Name as Subject
+    FPS       Full Personal Name as Subject
+    PL        Place of Publication
+    PHST      Publication History Status
+    PST       Publication Status
+    PT        Publication Type
+    PUBM      Publishing Model
+    PMC       PubMed Central Identifier
+    PMID      PubMed Unique Identifier
+    RN        Registry Number/EC Number
+    NM        Substance Name
+    SI        Secondary Source ID
+    SO        Source
+    SFM       Space Flight Mission
+    STAT      Status
+    SB        Subset
+    TI        Title
+    TT        Transliterated Title
+    VI        Volume
+    CON       Comment on
+    CIN       Comment in
+    EIN       Erratum in
+    EFR       Erratum for
+    CRI       Corrected and Republished in
+    CRF       Corrected and Republished from
+    PRIN      Partial retraction in
+    PROF      Partial retraction of
+    RPI       Republished in
+    RPF       Republished from
+    RIN       Retraction in
+    ROF       Retraction of
+    UIN       Update in
+    UOF       Update of
+    SPIN      Summary for patients in
+    ORI       Original report in
+    ========= ==============================
+
+    """
+
+
+def parse(handle):
+    """Read Medline records one by one from the handle.
+
+    The handle is either is a Medline file, a file-like object, or a list
+    of lines describing one or more Medline records.
+
+    Typical usage::
+
+        >>> from Bio import Medline
+        >>> with open("Medline/pubmed_result2.txt") as handle:
+        ...     records = Medline.parse(handle)
+        ...     for record in records:
+        ...         print(record['TI'])
+        ...
+        A high level interface to SCOP and ASTRAL ...
+        GenomeDiagram: a python package for the visualization of ...
+        Open source clustering software.
+        PDB file parser and structure class implemented in Python.
+
+    """
+    # These keys point to string values
+    textkeys = (
+        "ID",
+        "PMID",
+        "SO",
+        "RF",
+        "NI",
+        "JC",
+        "TA",
+        "IS",
+        "CY",
+        "TT",
+        "CA",
+        "IP",
+        "VI",
+        "DP",
+        "YR",
+        "PG",
+        "LID",
+        "DA",
+        "LR",
+        "OWN",
+        "STAT",
+        "DCOM",
+        "PUBM",
+        "DEP",
+        "PL",
+        "JID",
+        "SB",
+        "PMC",
+        "EDAT",
+        "MHDA",
+        "PST",
+        "AB",
+        "EA",
+        "TI",
+        "JT",
+    )
+    handle = iter(handle)
+
+    key = ""
+    record = Record()
+    for line in handle:
+        line = line.rstrip()
+        if line[:6] == "      ":  # continuation line
+            if key in ["MH", "AD"]:
+                # Multi-line MESH term, want to append to last entry in list
+                record[key][-1] += line[5:]  # including space using line[5:]
+            else:
+                record[key].append(line[6:])
+        elif line:
+            key = line[:4].rstrip()
+            if key not in record:
+                record[key] = []
+            record[key].append(line[6:])
+        elif record:
+            # Join each list of strings into one string.
+            for key in record:
+                if key in textkeys:
+                    record[key] = " ".join(record[key])
+            yield record
+            record = Record()
+    if record:  # catch last one
+        for key in record:
+            if key in textkeys:
+                record[key] = " ".join(record[key])
+        yield record
+
+
+def read(handle):
+    """Read a single Medline record from the handle.
+
+    The handle is either is a Medline file, a file-like object, or a list
+    of lines describing a Medline record.
+
+    Typical usage:
+
+        >>> from Bio import Medline
+        >>> with open("Medline/pubmed_result1.txt") as handle:
+        ...     record = Medline.read(handle)
+        ...     print(record['TI'])
+        ...
+        The Bio* toolkits--a brief overview.
+
+    """
+    item=[]
+    records = parse(handle)
+    try:
+        while True:
+            item = next(records)
+    except StopIteration:
+        #item=[]
+        pass
+    finally:
+        del records
+    #return next(records)
+    return item
diff --git a/code/lib/Medline/__pycache__/__init__.cpython-311.pyc b/code/lib/Medline/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000..0dd7c25
Binary files /dev/null and b/code/lib/Medline/__pycache__/__init__.cpython-311.pyc differ
diff --git a/code/lib/Medline/__pycache__/__init__.cpython-312.pyc b/code/lib/Medline/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000..3553acf
Binary files /dev/null and b/code/lib/Medline/__pycache__/__init__.cpython-312.pyc differ
diff --git a/code/lib/Medline/__pycache__/__init__.cpython-37.pyc b/code/lib/Medline/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..973d37e
Binary files /dev/null and b/code/lib/Medline/__pycache__/__init__.cpython-37.pyc differ
diff --git a/code/lib/Moon_gene2vec.py b/code/lib/Moon_gene2vec.py
new file mode 100644
index 0000000..edca73f
--- /dev/null
+++ b/code/lib/Moon_gene2vec.py
@@ -0,0 +1,369 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Jun 29 18:30:00 2020
+
+@author: Jihye Moon
+
+"""
+import os 
+from os.path import isdir, join
+from pathlib import Path
+import pathlib
+import sys
+import numpy as np 
+class Gene2vec(): 
+    def __init__(self, data=None, dic=None):
+        self.data = data
+        if dic != None:
+            self.dic = dic
+        else:
+            self.dic = {}
+        self.count = {}
+        self.count['UNK']=-1
+        self.dictionary={}
+        
+    def data_processing(self, ):
+        
+        return 0
+        
+    def data_loading(self, dataload):
+        sent=[]; gene=[]
+        with open(dataload, 'r', encoding='UTF-8') as f:
+            buffer_data = f.readlines()
+            for lines in buffer_data:
+                stripped = lines.rstrip() 
+                stripeed = stripped.split('\t')
+                if len(stripeed)>1:
+                    buffer = "".join(stripeed[1]).split()
+                    self.vocab(buffer)
+                    sent.append(buffer)
+                    if stripeed[0]!=-1:
+                        gene.append(stripeed[0])
+        return sent, gene 
+    
+    def vocab(self, sent):
+        for word in sent:
+            if self.count.get(word, -1) != -1:
+                self.count[word] +=1
+            else:
+                self.count[word] = 1
+        #return self.count
+    def vocab_save(self, name, vocab, path=None):
+        if path == None:
+            vocab_dir =  '/tmp/vocab'
+            pathlib.Path(vocab_dir).mkdir(parents=True, exist_ok=True)
+        else:
+            vocab_dir = path + '/vocab'
+            pathlib.Path(vocab_dir).mkdir(parents=True, exist_ok=True)
+            
+        handle = open(os.path.join(vocab_dir, name+'.vocab.txt'), "w")
+        for k, v in vocab.items():
+            handle.write(k+'\t'+str(v)+'\n')
+        handle.close()
+    def vocab_output(self): 
+        vocab_dir = '/tmp/vocab'
+        pathlib.Path(vocab_dir).mkdir(parents=True, exist_ok=True)
+        count ={k: v for k, v in sorted(self.count.items(), key=lambda item: item[1])}
+        #handle = open(os.path.join(preprocessed_dir, 'vocab.txt'), "w")
+        handle = open(vocab_dir + 'vocab.txt', "w")
+        for k, v in count.items():
+            handle.write(k+'\t'+str(v)+'\n')
+        handle.close()
+        #handle.close()
+        #x={v: k for k, values in vocab.items() for v in values}
+        return count
+    def vocab_import(self): 
+        count ={k: v for k, v in sorted(self.count.items(), key=lambda item: item[1])} 
+        x={v: k for k, values in count.items() for v in values}
+        return count, x
+    def selecting_vocab(self, data, appearance, min_size=1):
+        if data == None:
+            dic = self.dictionary
+        else:
+            dic = data
+        new_dic = {};
+        #dic_num = len(dic)
+        dic_num = 0
+        for word in self.count: 
+            if self.count[word]>appearance and len(word)>min_size: #after final
+            #if self.count[word]>appearance and len(word)>2:
+                new_dic[word] = dic_num
+                dic_num +=1
+        return new_dic
+    
+    def data_combine(self, sent):
+        sentences=[] 
+
+        for i in range(len(sent)):
+            sentences.append("".join(sent[i]).split())  
+            
+    def normal_dic(self, ):
+        
+        
+        return 0
+    
+    def gene_dic(self, gene, dictionary): 
+        dic = dictionary.copy()
+        gene_list=list(set(gene))
+        for gn in gene_list:    
+            if dic.get(gn, -1) == -1:
+                dic[gn]=len(dic)#+1 
+                
+        reversed_dic = dict(zip(dic.values(), dic.keys()))
+        self.dic= dic
+        self.gene_reverse_dict = reversed_dic
+        return dic, reversed_dic 
+    
+    def model_training(self, ):
+        return 0
+    
+    def sent2idx(self, sent):
+        sents=[]
+        for j in range(len(sent)):
+            if self.dic.get(sent[j],-1) != -1:
+                sents.append(self.dic[sent[j]])
+        return sents
+
+    def idx2sent(self, sent):
+        sents=[]
+        for j in range(len(sent)):
+            if self.gene_reverse_dict.get(sent[j],-1) != -1:
+                sents.append(self.gene_reverse_dict[sent[j]])
+        return sents
+ 
+    def gene2associated_skip_gram(self, sents, gene, sents_count, window_size): 
+        from itertools import compress
+        saveD=[]
+        preprocessing=self.sent2idx(sents)
+        gene_index=gene
+        prelen=len(preprocessing)
+        x=[]    
+        for k in range(prelen):
+            if self.gene_reverse_dict.get(preprocessing[k],-1)!=-1:
+                x.extend([preprocessing[k]])
+        value=0
+        for i in range(len(x)):
+            index=i
+            buffer=0
+            last=0
+            if i<window_size+1 and i>-1: 
+                buffer=2+buffer    
+                value=x[0:index+window_size+1]
+            elif i< (len(x)-window_size) and i>len(x): 
+                last=1+last
+                value=x[index-window_size:index-last]
+            else: 
+                value=x[index-window_size:index+window_size+1]
+            saveD.append(value)
+        ix=0
+        data_index=0
+        sz=sum([len(saveD[j])-1 for j in range(len(saveD))])+0
+        #print('sentence_sum',sz)
+        batch = np.zeros(shape=(sz), dtype=np.int32)*-1
+        labels = np.zeros(shape=(sz), dtype=np.int32)*-1
+        span = 2 * window_size + 1  
+        size_counting=0
+        size=[]
+        total_size=[]
+        if data_index + span > len(x):
+            data_index = 0
+        for i in range(len(saveD)):
+            buffer = saveD[i] 
+            data_index += span
+            context_words = [w for w in buffer] 
+            words_to_use = context_words
+            for j in range(len(context_words)-1):
+                batch[ix+j]=gene_index #x[i] 
+                labels[ix + j] = context_words[j]
+                size_counting+=1
+            for k in range(size_counting):
+                size.append(size_counting)
+            total_size.extend(size)
+            size=[]
+            size_counting=0
+            ix=ix+len(buffer)-1 
+        return batch, labels, total_size 
+    def gene2doc_batch_fucntion(self, sents, gene, sents_count, window_size): 
+        from itertools import compress
+        saveD=[]
+        preprocessing=self.sent2idx(sents)
+        gene_index=gene
+        prelen=len(preprocessing)
+        x=[]    
+        for k in range(prelen):
+            if self.gene_reverse_dict.get(preprocessing[k],-1)!=-1:
+                x.extend([preprocessing[k]])
+        value=0
+        for i in range(len(x)):
+            index=i
+            buffer=0
+            last=0
+            if i<window_size+1 and i>-1: 
+                buffer=2+buffer    
+                value=x[0:index+window_size+1]
+            elif i< (len(x)-window_size) and i>len(x): 
+                last=1+last
+                value=x[index-window_size:index-last]
+            else: 
+                value=x[index-window_size:index+window_size+1]
+            saveD.append(value)
+        ix=0
+        data_index=0
+        sz=sum([len(saveD[j])-1 for j in range(len(saveD))])+0
+        #print('sentence_sum',sz)
+        batch = np.zeros(shape=(sz), dtype=np.int32)*-1
+        labels = np.zeros(shape=(sz), dtype=np.int32)*-1
+        span = 2 * window_size + 1  
+        if data_index + span > len(x):
+            data_index = 0
+        for i in range(len(saveD)):
+            buffer = saveD[i] 
+            data_index += span
+            context_words = [w for w in buffer if w != x[i]] 
+            words_to_use = context_words
+            for j, context_word in enumerate(words_to_use):
+                batch[ix+j]=x[i] 
+                labels[ix + j] = context_word
+            ix=ix+len(buffer)-1 
+        return batch, labels 
+     
+    def gene_associated(self, sents, gene, sents_count, window_size): 
+        from itertools import compress
+        saveD=[]
+        preprocessing=self.sent2idx(sents)
+        gene_index=gene
+        prelen=len(preprocessing)
+        x=[]    
+        for k in range(prelen):
+            if self.gene_reverse_dict.get(preprocessing[k],-1)!=-1:
+                x.extend([preprocessing[k]])
+        value=0
+        for i in range(len(x)):
+            index=i
+            buffer=0
+            last=0
+            if i<window_size+1 and i>-1: 
+                buffer=2+buffer    
+                value=x[0:index+window_size+1]
+            elif i< (len(x)-window_size) and i>len(x): 
+                last=1+last
+                value=x[index-window_size:index-last]
+            else: 
+                value=x[index-window_size:index+window_size+1]
+            saveD.append(value)
+        ix=0
+        data_index=0
+        sz=sum([len(saveD[j])-1 for j in range(len(saveD))])+0
+        #print('sentence_sum',sz)
+        batch = np.zeros(shape=(sz), dtype=np.int32)*-1
+        labels = np.zeros(shape=(sz), dtype=np.int32)*-1
+        span = 2 * window_size + 1  
+        if data_index + span > len(x):
+            data_index = 0
+        for i in range(len(saveD)):
+            buffer = saveD[i] 
+            data_index += span
+            context_words = [w for w in buffer if w != x[i]] 
+            words_to_use = context_words
+            for j, context_word in enumerate(words_to_use):
+                batch[ix+j]=x[i] 
+                labels[ix + j] = context_word
+            ix=ix+len(buffer)-1 
+        return batch, labels 
+    def gene_associated_old2(self, sents, gene_dict2, gene_name, additing_quuery, sents_count, window_size):
+        saveD=[]
+        preprocessing=self.sent2idx(sents)
+        gene_index=gene=gene_dict2[gene_name]
+        associated = additing_quuery[gene_name].split()
+        associated_index = []
+        for i in range(len(associated)):
+            associated_index.append(gene_dict2[associated[i]])
+        
+        prelen=len(preprocessing)
+        x=[]    
+        for k in range(prelen):
+            if self.gene_reverse_dict.get(preprocessing[k],-1)!=-1:
+                x.extend([preprocessing[k]])
+        value=0
+        for i in range(len(x)):
+            index=i
+            buffer=0
+            last=0
+            if i<window_size+1 and i>-1: 
+                buffer=2+buffer    
+                value=x[0:index+window_size+1]
+            elif i< (len(x)-window_size) and i>len(x): 
+                last=1+last
+                value=x[index-window_size:index-last]
+            else: 
+                value=x[index-window_size:index+window_size+1]
+            saveD.append(value)
+        ix=0
+        data_index=0
+        #sz=sum([len(saveD[j])-1 for j in range(len(saveD))])+1
+        #print('sentence_sum',sz)
+        batch = np.zeros(shape=(prelen, len(associated_index)), dtype=np.int32)*-1
+        labels = np.zeros(shape=(prelen), dtype=np.int32)*-1
+        span = 2 * window_size + 1  
+        if data_index + span > len(x):
+            data_index = 0
+        for j, context_word in enumerate(preprocessing):
+            batch[j]=associated_index
+            #print(j, context_word) 
+            labels[j] = context_word
+            #ix=ix+len(buffer)-1 
+        return batch, labels
+    def gene_insert(self, batch, label, gene):
+        buffer=0; saved_insert=[]; saved_value=[]
+        for i in range(len(batch)):
+            if i>0:        
+                if buffer!=batch[i]:
+                    saved_insert.append(i)
+                    saved_value.append(buffer)
+            buffer=batch[i]
+    
+        for i in range(len(saved_value)):
+            batch=np.insert(batch, saved_insert[i]+i, saved_value[i]) 
+        
+        batch=np.insert(batch, len(batch), buffer) 
+        label=np.insert(label, saved_insert, gene) 
+        label=np.insert(label, len(label), gene) 
+        return batch, label
+    def gene_additing(self, sents, gene, sents_count, window_size):
+        saveD=[]
+        preprocessing=self.sent2idx(sents)
+        gene_index=gene
+        prelen=len(preprocessing)
+        x=[]    
+        for k in range(prelen):
+            if self.gene_reverse_dict.get(preprocessing[k],-1)!=-1:
+                x.extend([preprocessing[k]])
+        value=0
+        for i in range(len(x)):
+            index=i
+            buffer=0
+            last=0
+            if i<window_size+1 and i>-1: 
+                buffer=2+buffer    
+                value=x[0:index+window_size+1]
+            elif i< (len(x)-window_size) and i>len(x): 
+                last=1+last
+                value=x[index-window_size:index-last]
+            else: 
+                value=x[index-window_size:index+window_size+1]
+            saveD.append(value)
+        ix=0
+        data_index=0
+        #sz=sum([len(saveD[j])-1 for j in range(len(saveD))])+1
+        #print('sentence_sum',sz)
+        batch = np.zeros(shape=(prelen), dtype=np.int32)*-1
+        labels = np.zeros(shape=(prelen), dtype=np.int32)*-1
+        span = 2 * window_size + 1  
+        if data_index + span > len(x):
+            data_index = 0
+        for j, context_word in enumerate(preprocessing):
+            batch[j]=gene
+            #print(j, context_word) 
+            labels[j] = context_word
+            #ix=ix+len(buffer)-1 
+        return batch, labels
diff --git a/code/lib/__pycache__/Building_Literature_Embedding_Model.cpython-37.pyc b/code/lib/__pycache__/Building_Literature_Embedding_Model.cpython-37.pyc
new file mode 100644
index 0000000..dcc7573
Binary files /dev/null and b/code/lib/__pycache__/Building_Literature_Embedding_Model.cpython-37.pyc differ
diff --git a/code/lib/__pycache__/CVD_risk_factor_search.cpython-37.pyc b/code/lib/__pycache__/CVD_risk_factor_search.cpython-37.pyc
new file mode 100644
index 0000000..12186a3
Binary files /dev/null and b/code/lib/__pycache__/CVD_risk_factor_search.cpython-37.pyc differ
diff --git a/code/lib/__pycache__/Intrisic_Evaluation.cpython-37.pyc b/code/lib/__pycache__/Intrisic_Evaluation.cpython-37.pyc
new file mode 100644
index 0000000..5548fa1
Binary files /dev/null and b/code/lib/__pycache__/Intrisic_Evaluation.cpython-37.pyc differ
diff --git a/code/lib/__pycache__/Literature_Data_Collection.cpython-311.pyc b/code/lib/__pycache__/Literature_Data_Collection.cpython-311.pyc
new file mode 100644
index 0000000..ebba28f
Binary files /dev/null and b/code/lib/__pycache__/Literature_Data_Collection.cpython-311.pyc differ
diff --git a/code/lib/__pycache__/Literature_Data_Collection.cpython-312.pyc b/code/lib/__pycache__/Literature_Data_Collection.cpython-312.pyc
new file mode 100644
index 0000000..d005255
Binary files /dev/null and b/code/lib/__pycache__/Literature_Data_Collection.cpython-312.pyc differ
diff --git a/code/lib/__pycache__/Literature_Data_Collection.cpython-37.pyc b/code/lib/__pycache__/Literature_Data_Collection.cpython-37.pyc
new file mode 100644
index 0000000..d578bae
Binary files /dev/null and b/code/lib/__pycache__/Literature_Data_Collection.cpython-37.pyc differ
diff --git a/code/lib/__pycache__/Literature_Data_Preprocessing.cpython-312.pyc b/code/lib/__pycache__/Literature_Data_Preprocessing.cpython-312.pyc
new file mode 100644
index 0000000..c80249a
Binary files /dev/null and b/code/lib/__pycache__/Literature_Data_Preprocessing.cpython-312.pyc differ
diff --git a/code/lib/__pycache__/Literature_Data_Preprocessing.cpython-37.pyc b/code/lib/__pycache__/Literature_Data_Preprocessing.cpython-37.pyc
new file mode 100644
index 0000000..e7ceaea
Binary files /dev/null and b/code/lib/__pycache__/Literature_Data_Preprocessing.cpython-37.pyc differ
diff --git a/code/lib/__pycache__/Loading_PudMed.cpython-311.pyc b/code/lib/__pycache__/Loading_PudMed.cpython-311.pyc
new file mode 100644
index 0000000..2d733ca
Binary files /dev/null and b/code/lib/__pycache__/Loading_PudMed.cpython-311.pyc differ
diff --git a/code/lib/__pycache__/Loading_PudMed.cpython-312.pyc b/code/lib/__pycache__/Loading_PudMed.cpython-312.pyc
new file mode 100644
index 0000000..3b459af
Binary files /dev/null and b/code/lib/__pycache__/Loading_PudMed.cpython-312.pyc differ
diff --git a/code/lib/__pycache__/Loading_PudMed.cpython-37.pyc b/code/lib/__pycache__/Loading_PudMed.cpython-37.pyc
new file mode 100644
index 0000000..54ecf25
Binary files /dev/null and b/code/lib/__pycache__/Loading_PudMed.cpython-37.pyc differ
diff --git a/code/lib/__pycache__/ML_models.cpython-37.pyc b/code/lib/__pycache__/ML_models.cpython-37.pyc
new file mode 100644
index 0000000..3b60654
Binary files /dev/null and b/code/lib/__pycache__/ML_models.cpython-37.pyc differ
diff --git a/code/lib/__pycache__/Moon_gene2vec.cpython-37.pyc b/code/lib/__pycache__/Moon_gene2vec.cpython-37.pyc
new file mode 100644
index 0000000..3917397
Binary files /dev/null and b/code/lib/__pycache__/Moon_gene2vec.cpython-37.pyc differ
diff --git a/code/lib/__pycache__/loading_literature_embedding.cpython-37.pyc b/code/lib/__pycache__/loading_literature_embedding.cpython-37.pyc
new file mode 100644
index 0000000..3dd8738
Binary files /dev/null and b/code/lib/__pycache__/loading_literature_embedding.cpython-37.pyc differ
diff --git a/code/lib/__pycache__/step4_CVD_risk_factor_search.cpython-37.pyc b/code/lib/__pycache__/step4_CVD_risk_factor_search.cpython-37.pyc
new file mode 100644
index 0000000..81e7b03
Binary files /dev/null and b/code/lib/__pycache__/step4_CVD_risk_factor_search.cpython-37.pyc differ
diff --git a/code/lib/loading_literature_embedding.py b/code/lib/loading_literature_embedding.py
new file mode 100644
index 0000000..e7c36b0
--- /dev/null
+++ b/code/lib/loading_literature_embedding.py
@@ -0,0 +1,228 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr  1 16:48:48 2019
+
+@author: Jihye Moon
+""" 
+import numpy as np
+import os 
+import tensorflow as tf  
+ 
+class embedding_vector(): 
+    def text_open(self,path):
+        with open(path, 'r') as f:
+            data=f.read().strip().split('\n')
+        return data
+    
+    def data_split(self, key):
+        return key.split('#')
+    
+    def setting(self,path, gene_symb):
+        sess = tf.Session()
+        word2index = {} 
+        index2word=np.load(os.path.join(path, "name.dat"), allow_pickle=True)
+        saver = tf.train.import_meta_graph(os.path.join(path, "model.ckpt.meta"))
+        saver.restore(sess, (os.path.join(path, "model.ckpt")))
+        out_matrix=sess.run('nce_w:0') 
+        in_matrix=sess.run('embed1:0') 
+         
+        index2word=index2word.tolist()
+        
+        words_list = dict(zip(index2word.values(), index2word.keys()))
+        for i in range(len(index2word)):
+            word2index[index2word[i]] = i 
+            
+        self.index2word= dict(zip(word2index.values(), word2index.keys()))
+        self.word2index=word2index
+        self.syn0norm = np.array( [v/n for v, n in zip(in_matrix, np.linalg.norm(in_matrix, ord=2, axis=1))] )
+        self.syn1norm = np.array( [v/n for v, n in zip(out_matrix, np.linalg.norm(out_matrix, ord=2, axis=1))] )
+        
+        query_symbol=self.text_open(gene_symb+'.txt') #'../gene_name_info/query_symbol.txt'
+        
+        self.symble2name = {}
+        for i in range(len(query_symbol)):
+            self.symble2name[query_symbol[i]]=i
+
+        return words_list, index2word, self.syn0norm, self.syn1norm
+    
+    def filtering(self, word, scores):
+        word_x, word_y = word, word
+        unique_set=[]
+        non_single=[]
+        for x in word_x:
+            for y in word_y:
+                if x +'s' == y:
+                    non_single.append(y)
+                elif x +'es' == y:
+                    non_single.append(y) 
+        unique_set=list(set(non_single)) 
+        re_word=[]; re_score=[]
+        for i in range(len(word)):
+            if word[i] in unique_set:
+                continue
+            else:
+                re_word.append(word[i])
+                re_score.append(scores[i])
+        return re_word, re_score
+        
+    def compute_cosine_similarity(self,x,y):
+        return (np.dot(x,y)/(np.linalg.norm(x,2)*np.linalg.norm(y,2)))
+
+    def get_simwords(self,vec, matrix, TOPNUM):
+        sim_list = np.dot(matrix, vec.T)
+        word_sim_list = [ (s,w) for s, w in zip(sim_list, self.index2word)]
+        word_sim_list.sort(reverse=True)
+        print(TOPNUM)
+        return [ (v[1],v[0]) for v in word_sim_list[:TOPNUM]]
+
+    def get_simgenes(self,vec, matrix, TOPNUM):
+        symble2name=self.symble2name
+        sim_list = np.dot(matrix, vec.T)
+        word_sim_list = [ (s,w) for s, w in zip(sim_list, self.index2word)]
+        word_sim_list.sort(reverse=True)
+        count = 0
+        results = []
+        for v in word_sim_list:  
+            if symble2name.get(self.index2word[v[1]].replace('#',''),-1) != -1:
+                results.append((v[1],v[0]))
+                if count==TOPNUM:
+                    break;
+                count+=1
+        return results 
+
+    def print_sim_result(self,result, query, output): 
+        scores=[]; word=[]
+        for w, s in result: 
+            word.append(self.index2word[w])
+            scores.append(s)
+            
+        w, s = self.filtering(word, scores)
+        word=[]
+        for i in range(len(w)):
+            if w[i] not in query:
+                print("\t",w[i], s[i]) 
+                word.append(str(w[i])+' '+str(s[i])) 
+            self.logs(output+' '.join(query), word)
+        return None
+    
+    def type_similarity_display(self, output, TOPNUM):
+        kw=''  
+        while kw!='0': 
+            kw = input("query word (exit: 0): ")
+            datatype= 'm' 
+            keyword=kw 
+            keywords = keyword.split(" ") 
+            
+            if datatype=='m':
+                index_keywords = [self.word2index.get(k,0) for k in keywords] 
+                buffer_index_keywords=index_keywords.copy()
+                index_keywords=[]
+                print("==== Available Words (In-of-vocabulary):")
+                for ix in buffer_index_keywords:
+                    if ix!=0:
+                        index_keywords.append(ix)
+                        print(self.index2word.get(ix,0))
+                if index_keywords ==[]:
+                    print("There are no available words. Try different queries! ")
+                elif index_keywords !=[]:
+                    vec_keyword = np.mean([self.syn0norm[ki] for ki in index_keywords], axis=0)
+                    
+                    print ("=== Intrinsic Evaludation: Words ")
+                    result_inin = self.get_simwords(vec_keyword, self.syn0norm, TOPNUM)
+                    _ = self.print_sim_result(result_inin, keywords, output)
+                    
+                    print ("========")
+                    print ("=== Intrinsic Evaludation: Gene Names")
+                    result_inin = self.get_simgenes(vec_keyword, self.syn0norm, TOPNUM)
+                    _ = self.print_sim_result(result_inin, keywords, output)
+
+            else:
+                print("Type Correctly")
+                continue; 
+
+    def similarity_display(self, kw, output, TOPNUM):
+        if kw!='0':  
+            keyword=kw 
+            keywords = keyword.split(" ") 
+            
+            index_keywords = [self.word2index.get(k,0) for k in keywords]
+ 
+            buffer_index_keywords=index_keywords.copy()
+            index_keywords=[]
+            print("==== Available Words (In-of-vocabulary):")
+            for ix in buffer_index_keywords:
+                if ix!=0:
+                    index_keywords.append(ix)
+                    print(self.index2word.get(ix,0))
+            if index_keywords ==[]:
+                print("There are no available words. Try different queries! ")
+            elif index_keywords !=[]:
+                vec_keyword = np.mean([self.syn0norm[ki] for ki in index_keywords], axis=0)
+                    
+                print ("=== Intrinsic Evaludation: Words ")
+                result_inin = self.get_simwords(vec_keyword, self.syn0norm, TOPNUM)
+                _ = self.print_sim_result(result_inin, keywords, output+'/word_')
+                    
+                print ("========")
+                print ("=== Intrinsic Evaludation: Gene Names")
+                result_inin = self.get_simgenes(vec_keyword, self.syn0norm, TOPNUM)
+                _ = self.print_sim_result(result_inin, keywords, output+'/gene_')
+ 
+    def variable2embed(self, words_list, syn0norm, variables_index, additional_dictionary):
+        variables_lists = list(variables_index.keys())
+        buffer_embedding = []
+        embedding=[]
+        removal = []
+        embedding_list = {}
+        index2variables = {}
+        removed_words=[]
+        for i in range(len(variables_lists)):
+            buffer_embedding=[]
+            words = variables_index[variables_lists[i]]
+            words = words.split()
+            for w in words:
+                if words_list.get(w, -2)!=-2:
+                    buffer_embedding.append(syn0norm[words_list[w]]) 
+                else:
+                    removed_words.append(w)
+                if additional_dictionary.get(w, -2)!=-2:
+                    buffer_embedding.append(syn0norm[words_list[additional_dictionary[w]]]) 
+            if buffer_embedding==[]:
+                removal.append(variables_lists[i])
+            else:
+                embedding.append(np.mean(buffer_embedding, axis=0))
+                embedding_list[variables_lists[i]] = i
+                index2variables[i] = variables_lists[i]
+        self.index2variables=index2variables
+        return embedding_list, index2variables, embedding, removal, removed_words
+     
+    def get_simvariables(self, vec, matrix, index2variables, TOPNUM):
+        sim_list = np.dot(matrix, vec.T)
+        word_sim_list = [ (s,w) for s, w in zip(sim_list, index2variables)]
+        word_sim_list.sort(reverse=True)
+        return [ (v[1],v[0]) for v in word_sim_list[:TOPNUM]]
+
+    def logs(self, path, word):
+        f = open(path+'_logs.txt','w') 
+        for w in word:
+            f.write('{}\n'.format(w))
+        f.close()
+        
+    def target2variable(self, words, key_embedding, wordlist, embedding, embedding_list, index2variables,variables_indexing, TOPNUM): # variables to variables
+        buffer = words.split(' ') 
+        if len(buffer)==1:
+            vec_keyword = key_embedding[wordlist[words]]
+        else:
+            vec_keyword = []
+            for i in range(len(buffer)):
+                if wordlist.get(buffer[i], -1)!=-1:
+                    vec_keyword.append(key_embedding[wordlist[buffer[i]]])
+            vec_keyword = np.array(vec_keyword)
+            vec_keyword = np.mean(vec_keyword,axis=0)
+        result_inin = self.get_simvariables(vec_keyword, embedding, index2variables, TOPNUM)  
+        data = ''; name = []
+        for w, s in result_inin: 
+            data=data+index2variables[w]+' '
+            name.append(index2variables[w]) 
+        return data, name  
+    
\ No newline at end of file
diff --git a/code/lib/performance_metrics.py b/code/lib/performance_metrics.py
new file mode 100644
index 0000000..0319e5c
--- /dev/null
+++ b/code/lib/performance_metrics.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Dec 10 14:17:51 2021
+
+@author: Jihye Moon
+"""
+import pandas as pd
+import numpy as np
+import os
+import scipy.stats as st
+
+from imblearn.metrics import sensitivity_score, specificity_score
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score 
+
+class performance_metrics(): 
+    def saving_5folded_results(self, data, label):
+        total=[]
+        for i in range(len(data)):
+            total.append(self.metric(data[i],label))
+        return pd.DataFrame(total)
+    
+    def metric(self, y_pred,y_true): 
+        A = accuracy_score(y_true,y_pred)
+        R2 = recall_score(y_true,y_pred, average='macro')
+        F2 = f1_score(y_true,y_pred, average='macro')
+        P2 = precision_score(y_true,y_pred, average='macro') 
+    
+        SS = sensitivity_score(y_true,y_pred)
+        SP = specificity_score(y_true,y_pred)
+    
+        return [A, P2, R2, F2, SS, SP] 
+    
+    def averaged_results(self, N, path, data_name):
+        data={}
+        for i in range(N):
+            label = np.load(os.path.join(path, 'CVD_label.dat'), allow_pickle=True)
+            data[i] = self.saving_5folded_results(np.load(os.path.join(path, data_name+'.dat'), allow_pickle=True),label)     
+        name=['accuracy', 'macro-precision', 'macro-recall', 'macro-f1', 'sensitivity', 'specificity']
+        for k in range(len(name)):
+            print("=============== ", name[k] , "=================== ")
+            print(self.ci2(data_name, data,k)) 
+    
+    def direct_averaged_results(self, N, data_name, labels, total_result):
+        data={}
+        for i in range(N):
+            label = labels[i]
+            data[i] = self.saving_5folded_results(total_result[i],label)     
+        name=['accuracy', 'macro-precision', 'macro-recall', 'macro-f1', 'sensitivity', 'specificity']
+        for k in range(len(name)):
+            print("=============== ", name[k] , "=================== ")
+            print(self.ci2(data_name, data,k))
+            
+    def ci2(self, name, _data, k): # 95% CI with averaged results
+        print('=== ', name, ' ===')
+        data=pd.concat([_data[0][k],_data[1][k],_data[2][k],_data[3][k],_data[4][k]])#.groupby(level=0))
+        all_size=int(len(data)/max(data.index.tolist()))
+        all_size=max(data.index.tolist())+1
+        for az in range(all_size): 
+            print(" ", str(round(np.mean(data.loc[az]),2))+' ('+str(round(st.t.interval(alpha=0.95, df=len(data.loc[az])-1, loc=np.mean(data.loc[az]), scale=st.sem(data.loc[az]))[0],2))+', '+str(round(st.t.interval(alpha=0.95, df=len(data.loc[az])-1, loc=np.mean(data.loc[az]), scale=st.sem(data.loc[az]))[1],2 ))+')')
+        return '=================== '
diff --git a/code/read_me_images/model1_re.jpg b/code/read_me_images/model1_re.jpg
new file mode 100644
index 0000000..15f5470
Binary files /dev/null and b/code/read_me_images/model1_re.jpg differ
diff --git a/code/read_me_images/model2_re.jpg b/code/read_me_images/model2_re.jpg
new file mode 100644
index 0000000..8fe1a1d
Binary files /dev/null and b/code/read_me_images/model2_re.jpg differ
diff --git a/code/read_me_images/model3.png b/code/read_me_images/model3.png
new file mode 100644
index 0000000..12c50b9
Binary files /dev/null and b/code/read_me_images/model3.png differ
diff --git a/code/read_me_images/model3_re.jpg b/code/read_me_images/model3_re.jpg
new file mode 100644
index 0000000..bd68752
Binary files /dev/null and b/code/read_me_images/model3_re.jpg differ
diff --git a/code/read_me_images/preprocessing.png b/code/read_me_images/preprocessing.png
new file mode 100644
index 0000000..72d0d5d
Binary files /dev/null and b/code/read_me_images/preprocessing.png differ
diff --git a/code/read_me_images/table_collection.png b/code/read_me_images/table_collection.png
new file mode 100644
index 0000000..4d79459
Binary files /dev/null and b/code/read_me_images/table_collection.png differ
diff --git a/code/read_me_images/table_data.png b/code/read_me_images/table_data.png
new file mode 100644
index 0000000..4c2c79c
Binary files /dev/null and b/code/read_me_images/table_data.png differ
diff --git a/code/read_me_images/table_pre.png b/code/read_me_images/table_pre.png
new file mode 100644
index 0000000..88cf2d2
Binary files /dev/null and b/code/read_me_images/table_pre.png differ
diff --git a/code/run b/code/run
new file mode 100644
index 0000000..8a05a49
--- /dev/null
+++ b/code/run
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+#!/bin/sh
+
+if [ $1 == 'demo_a' ]; then
+  echo ' -- Running all demos'
+  echo ' -- Demo a -- '
+  EMBEDDING_PATH='../data/old_model'
+  STEP4_OUTPUT_PATH='../results/demo_a_CVD_searches' 
+  python -u step4_CVD_risk_factor_identification.py $EMBEDDING_PATH $STEP4_OUTPUT_PATH
+  
+elif [ $1 == 'demo_b' ]; then
+  echo ' -- Demo b -- ' 
+  QUERY_WORD='heart'
+  NUM_WORD_BASED_DATA=500000
+  NUM_GENE_BASED_DATA=100
+  BASE_PATH='../results/'
+  DATA_COLLECTION_PATH='../results/demo_b'
+  PREPROCESSEING_PATH='../results/demo_b' 
+  EMBEDDING_PATH='../results/demo_b_model'
+  EPOCH=2 # setting the number of ecoch for literature embedding model
+  STEP4_OUTPUT_PATH='../results/demo_b_CVD_searches'
+
+  python -u step1_data_collection.py $QUERY_WORD $NUM_WORD_BASED_DATA $NUM_GENE_BASED_DATA $DATA_COLLECTION_PATH
+  python -u step2_data_preprocessing.py $DATA_COLLECTION_PATH $PREPROCESSEING_PATH
+  python -u step3_literature_embedding_training.py $PREPROCESSEING_PATH $EPOCH $EMBEDDING_PATH
+  python -u step4_CVD_risk_factor_identification.py $EMBEDDING_PATH $STEP4_OUTPUT_PATH
+
+elif [ $1 == 'demo_r' ]; then
+  echo ' -- Running reproduction demo' 
+  echo ' -- 1) CVD risk factor search using pre-trained model '
+  EMBEDDING_PATH='../data/old_model'
+  STEP4_OUTPUT_PATH='../results/demo_original_CVD_searches' 
+  python -u step4_CVD_risk_factor_identification.py $EMBEDDING_PATH $STEP4_OUTPUT_PATH
+  
+  echo '-- 2) Literature model training and evaluation using collected literature data '
+  PREPROCESSEING_PATH='../data/old_preprocessed_data'
+  EPOCH=1 # Setting the number of ecoch for literature embedding model. In original paper, we used EPOCH=10
+  EMBEDDING_PATH='../results/demo_new_model'
+  STEP4_OUTPUT_PATH='../results/demo_new_CVD_searches'
+  python -u step3_literature_embedding_training.py $PREPROCESSEING_PATH $EPOCH $EMBEDDING_PATH
+  python -u step4_CVD_risk_factor_identification.py $EMBEDDING_PATH $STEP4_OUTPUT_PATH
+
+else 
+  echo ' -- Defualt'
+  echo ' -- Demo a -- '
+  EMBEDDING_PATH='../data/old_model'
+  STEP4_OUTPUT_PATH='../results/demo_a_CVD_searches' 
+  python -u step4_CVD_risk_factor_identification.py $EMBEDDING_PATH $STEP4_OUTPUT_PATH
+fi
+
diff --git a/code/step1_data_collection.py b/code/step1_data_collection.py
new file mode 100644
index 0000000..16c75e8
--- /dev/null
+++ b/code/step1_data_collection.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Jun 21 00:16:25 2020
+
+python -u "/mnt/c/Users/lrm22005/OneDrive - University of Connecticut/Research/ZIP11_Bioinformatic/capsule-3642152/code/step1_data_collection.py" 'zinc' 0 0 './results/zinc'
+"""
+
+import os
+import pathlib 
+import sys
+
+sys.path.append('lib')  
+from lib.Literature_Data_Collection import literature_data_collection
+
+years = 15
+
+if len(sys.argv)>3:
+    word_query = str(sys.argv[1])
+    word_end_point = int(sys.argv[2]) # the endpoint of a word-based data collection. for demo-b 100000
+    gene_end_point = int(sys.argv[3]) # the endpoint of gene name-based data collection for demo-b 50
+    paths = str(sys.argv[4]) + '/'
+elif len(sys.argv)==3:
+    word_query = str(sys.argv[1])
+    paths = str(sys.argv[2]) + '/'
+     
+data_dir = os.path.abspath(os.getcwd())
+output_dir = os.path.join(data_dir, paths + 'baseline_doc')
+document_output_dir = os.path.join(data_dir, paths + 'gene2document') 
+pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
+pathlib.Path(document_output_dir).mkdir(parents=True, exist_ok=True)
+email = "lrmercadod@gmail.com"  # Replace with your valid email address
+api_key = "19bea34a4dbdbc6ef30392cee15943365309"
+ld = literature_data_collection(email, output_dir, document_output_dir, api_key=api_key)
+
+########### word query based literature data collection ################# 
+gap=10000
+batch = 1000
+w2d_starting_point = 0  
+
+search_results, _word_end_point = ld.word_based_query_fit(year = years, user_term=word_query)
+print('The number of avaliable abstracts :', _word_end_point, 'for ', word_query) 
+
+if int(sys.argv[2])==0:
+    word_end_point = _word_end_point
+ld.collecting_doc_using_word_based_query(year = years, user_term=word_query, gap = gap, starting = gap*w2d_starting_point, ixs = w2d_starting_point, test_end_point=word_end_point)
+
+########### gene name-query based literature data collection ################# 
+query_full=ld.text_open('./data/gene_name_info/query_full_name.txt')
+query_symbol=ld.text_open('./data/gene_name_info/query_symbol.txt') # gene name list
+
+query_size = len(query_full)
+ld.gene_based_query_fit(query_size, query_full, query_symbol) # setting up
+
+g2d_starting_point = 0 
+batch_size = 100
+#############################
+#####################
+gene_end_point = round(query_size/batch_size)
+
+if len(sys.argv)>2:
+    gene_end_point = int(sys.argv[3]) # the endpoint of gene name-based data collection 
+if int(sys.argv[3])==0:
+    gene_end_point = round(query_size/batch_size)
+ld.collecting_doc_using_gene_based_query(year = years, batch_size = batch_size, starting = g2d_starting_point, query_len=len(query_full), end_point = gene_end_point)
diff --git a/code/step1_data_collection_Custom_Luis.py b/code/step1_data_collection_Custom_Luis.py
new file mode 100644
index 0000000..6f86893
--- /dev/null
+++ b/code/step1_data_collection_Custom_Luis.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Jun 21 00:16:25 2020
+python -u "/mnt/c/Users/lrm22005/OneDrive - University of Connecticut/Research/ZIP11_Bioinformatic/capsule-3642152/code/step1_data_collection.py" 'zinc' 0 0 './results/zinc'
+"""
+
+import os
+import pathlib
+import sys
+import time
+import urllib.error
+
+sys.path.append('lib')
+from lib.Literature_Data_Collection import literature_data_collection
+
+if len(sys.argv) > 3:
+    word_query = str(sys.argv[1])
+    word_end_point = int(sys.argv[2])  # the endpoint of a word-based data collection. for demo-b 100000
+    gene_end_point = int(sys.argv[3])  # the endpoint of gene name-based data collection for demo-b 50
+    paths = str(sys.argv[4]) + '/'
+elif len(sys.argv) == 3:
+    word_query = str(sys.argv[1])
+    paths = str(sys.argv[2]) + '/'
+
+data_dir = os.path.abspath(os.getcwd())
+output_dir = os.path.join(data_dir, paths + 'baseline_doc')
+document_output_dir = os.path.join(data_dir, paths + 'gene2document')
+pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
+pathlib.Path(document_output_dir).mkdir(parents=True, exist_ok=True)
+
+email = "lrmercadod@gmail.com"  # Replace with your valid email address
+api_key = "19bea34a4dbdbc6ef30392cee15943365309"
+ld = literature_data_collection(email, output_dir, document_output_dir, api_key=api_key)
+
+# setting up
+########### word query based literature data collection #################
+gap = 1000
+batch = 200
+w2d_starting_point = 0
+
+try:
+    search_results, _word_end_point = ld.word_based_query_fit(year=None, user_term=word_query)
+    print('The number of available abstracts:', _word_end_point, 'for', word_query)
+    
+    if int(sys.argv[2]) == 0:
+        word_end_point = _word_end_point
+    
+    ld.collecting_doc_using_word_based_query(year=None, user_term=word_query, gap=gap, starting=gap*w2d_starting_point,
+                                             ixs=w2d_starting_point, test_end_point=word_end_point)
+except urllib.error.HTTPError as e:
+    print(f"An HTTP error occurred: {e}")
+    print("Retrying in 5 seconds...")
+    time.sleep(5)
+    # Retry the request or handle the error appropriately
+
+########### gene name-query based literature data collection #################
+query_full = ld.text_open('./data/gene_name_info/query_full_name.txt')
+query_symbol = ld.text_open('./data/gene_name_info/query_symbol.txt')
+# gene name list
+query_size = len(query_full)
+ld.gene_based_query_fit(query_size, query_full, query_symbol)  # setting up
+
+g2d_starting_point = 0
+batch_size = 10
+
+############################
+gene_end_point = round(query_size / batch_size)
+
+if len(sys.argv) > 2:
+    gene_end_point = int(sys.argv[3])  # the endpoint of gene name-based data collection
+
+if int(sys.argv[3]) == 0:
+    gene_end_point = round(query_size / batch_size)
+
+ld.collecting_doc_using_gene_based_query(year=None, batch_size=batch_size, starting=g2d_starting_point,
+                                         query_len=len(query_full), end_point=gene_end_point)
\ No newline at end of file
diff --git a/code/step2_data_preprocessing.py b/code/step2_data_preprocessing.py
new file mode 100644
index 0000000..9538008
--- /dev/null
+++ b/code/step2_data_preprocessing.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Jun 21 00:16:25 2020
+
+@author: Jihye Moon
+
+"""
+
+import os
+import pathlib
+import sys
+
+sys.path.append('lib')  
+import lib.Literature_Data_Preprocessing as ldp
+
+base = sys.argv[1]
+output = sys.argv[2]
+batch_dir = base # os.path.join(base, 'literature_data')
+comb_dir = os.path.join(base, 'arranged')
+preprocessed_dir = os.path.join(output, 'preprocessed')
+pathlib.Path(comb_dir).mkdir(parents=True, exist_ok=True)
+pathlib.Path(preprocessed_dir).mkdir(parents=True, exist_ok=True)
+ 
+lp=ldp.preprocessing(base, batch_dir, comb_dir, preprocessed_dir) 
+
+### Extracting only abstracts and combining all collected files into one file (Gene name based documents)
+file_names, data_list=lp.batch_data_matching(batch_dir, ['gene2document'])
+arr_list = lp.combining_files(file_names, data_list, ['FullText'], 3)
+
+for i in range(len(file_names)):
+    lp.Indexing(os.path.join(comb_dir, file_names[i]), arr_list[file_names[i]])
+    
+gene2doc = lp.gene2doc_mapping(arr_list[file_names[0]])
+ 
+
+### Extracting only abstracts and combining all collected files into one file (Word name based documents)
+file_names_doc, data_list_doc = lp.batch_data_matching(batch_dir, ['baseline_doc'])
+arr_list2 = lp.combining_query2doc(file_names_doc, data_list_doc, ['pubmed'], 4) 
+
+
+### Literature Data Preprocessing
+total_FullText = ''; total_meta = ''
+total_size=len(arr_list2[file_names_doc[0]])
+full_handle = open(os.path.join(comb_dir, file_names_doc[0]+'.FullText.txt'), "w")
+meta_handle = open(os.path.join(comb_dir, file_names_doc[0]+'.meta.txt'), "w")
+
+total_FullText=[]
+for i in range(total_size):
+    FullText, Meta = lp.Medine_mapping(arr_list2[file_names_doc[0]][i]) 
+    #print(i, '/', total_size, round(i/total_size,2)*100)
+    total_FullText.append(FullText)
+    full_handle.write(FullText)
+    meta_handle.write(Meta)
+full_handle.close()
+meta_handle.close()
+
+doc_gene=list(gene2doc.keys())
+
+print('----- preprocessing --- for gene name based documents')
+lp.making_doc_data(doc_gene, file_names[0], gene2doc) 
+
+print('----- preprocessing --- for word name based documents')
+lp.making_doc_data(None, file_names_doc[0], total_FullText)
diff --git a/code/step3_literature_embedding_training.py b/code/step3_literature_embedding_training.py
new file mode 100644
index 0000000..56af086
--- /dev/null
+++ b/code/step3_literature_embedding_training.py
@@ -0,0 +1,57 @@
+"""
+Created on Sun Jun 21 00:16:25 2020
+
+@author: Jihye Moon
+
+"""
+ 
+import pathlib
+import sys
+sys.path.append('lib')  
+import Building_Literature_Embedding_Model as edg
+
+window_size = 2
+min_count = 5
+min_size = 2
+dimension = 128
+num_sampled = 16
+batch_size = 564 #256
+epoch = 10
+
+root_path = sys.argv[1] #
+epoch = int(sys.argv[2])
+output = sys.argv[3]
+
+vocab_dir = output + '/vocab/'
+preprocessed_path = root_path + '/preprocessed'
+model_path = output 
+logs_dir = vocab_dir+'/logs'
+gene2doc_dir = logs_dir+'/gene2doc'
+baseline_doc_dir = logs_dir+'/baseline_doc'
+
+pathlib.Path(logs_dir).mkdir(parents=True, exist_ok=True)
+pathlib.Path(gene2doc_dir).mkdir(parents=True, exist_ok=True)
+pathlib.Path(baseline_doc_dir).mkdir(parents=True, exist_ok=True)
+pathlib.Path(model_path).mkdir(parents=True, exist_ok=True)
+
+print("==== Generating Training Data for Literature Embedding Model")
+eg=edg.building_embedding_model()
+eg.setting(preprocessed_path, vocab_dir, logs_dir, gene2doc_dir, baseline_doc_dir)
+
+print("==== Creating Vocabulary ===")
+eg.creating_vocab()
+
+print("=== Checking If Data Generation Is Correct ===")
+eg.checking_gene2doc_generation(window_size)
+print("=== Creating Training Data For Fig. 3(a) and (b) in our paper ===")
+eg.creating_training_data_for_gene2doc(window_size)
+
+print("=== Creating Training Data For Fig. 2 in our paper ===")
+eg.creating_training_data_for_word2doc(window_size)
+
+print("=== Starting Model Training For Figs.2-3 ===")
+eg.model_setting(dimension=dimension, num_sampled=num_sampled)
+eg.starting_sorting(model_path)
+eg.model_training(epoch=epoch, batch_size=batch_size)
+
+
diff --git a/code/step4_CVD_risk_factor_identification.py b/code/step4_CVD_risk_factor_identification.py
new file mode 100644
index 0000000..e597a8c
--- /dev/null
+++ b/code/step4_CVD_risk_factor_identification.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Aug 25 16:46:07 2022
+
+@ Journal: Expert Systems With Applications
+@ Title: A Literature Embedding Model for Cardiovascular Disease Prediction using Risk Factors, Symptoms, and Genotype Information
+@ Accepted Date: Aug. 24, 2024
+@ Author: Jihye Moon, Hugo F. Posada-Quintero, and *Ki. H. Chon
+@ Contact Email: jihye.moon@uconn.edu 
+
+""" 
+import pathlib
+
+import lib.CVD_risk_factor_search as ie
+import sys
+model = ie.run_intrisic_evaluation()
+
+model_path = str(sys.argv[1]) #'../data/old_model'
+output_path = str(sys.argv[2]) #'../results/demo_a'
+
+queries = ['Zn Transport', 'protein names', 'drug interactions', 'cancer', 'drug names', 'protein drug', 'zinc', 'zn pathway']
+TOPNUM = 25
+pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
+sys.path.append('lib')
+model.setting(path=model_path, gene_symb='../data/gene_name_info/query_symbol')
+
+for query in queries:
+    model.running(query, output_path, TOPNUM)
+
+
+
+
+
+
+
diff --git a/code/step_1_data_collection_Luis.py b/code/step_1_data_collection_Luis.py
new file mode 100644
index 0000000..0ab1f4e
--- /dev/null
+++ b/code/step_1_data_collection_Luis.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Jun 21 00:16:25 2020
+Updated to include robust retry mechanism and API rate limiting
+"""
+
+import os
+import pathlib
+import sys
+import time
+import urllib.error
+
+# Ensuring the correct append path for 'lib'
+sys.path.append(os.path.join(os.path.abspath(os.getcwd()), 'lib'))
+from lib.Loading_PudMed import ids_pudmed as pudmed
+
+class literature_data_collection:
+    def __init__(self, email, output_dir, document_output_dir, api_key=None):
+        self.output_dir = output_dir
+        self.document_output_dir = document_output_dir
+        self.email = email
+        self.api_key = api_key
+        print("Initialized literature_data_collection with email: {}".format(email))
+
+    def text_open(self, path):
+        with open(path, 'r') as f:
+            data = f.read().strip().split('\n')
+        return data
+
+    def word_based_query_fit(self, year=None, user_term="heart"):
+        pud = pudmed()
+        print("Created pudmed instance for searching.")
+        search_results, end_point = pud.search_list(user_term, year, self.email)
+        return search_results, end_point
+
+    def collecting_doc_using_word_based_query(self, year=None, user_term="heart", gap=50000, starting=0, ixs=0, test_end_point=0):
+        pud = pudmed()
+        print("Collecting documents using word-based query.")
+        search_results, end_point = pud.search_list(user_term, year, self.email)
+        if test_end_point != 0:
+            end_point = test_end_point
+        print('Checking data collection performance --- collecting until', end_point, 'documents')
+        next_start = starting
+        for ix in range(ixs, round(end_point/gap) + 1):
+            next_start = self.robust_request(ix, gap, next_start, end_point, 10000, pud, search_results)
+            if next_start >= end_point:
+                break
+
+    def robust_request(self, ix, gap, starting, end_point, batch, pud, search_results):
+        success = False
+        attempts = 0
+        while not success and attempts < 5:
+            try:
+                print(f"{ix} / {end_point // gap} | from {starting} to {min(starting + gap, end_point)}")
+                pud.search_full(ix, self.output_dir, search_results, starting, min(starting + gap, end_point), batch)
+                success = True
+            except urllib.error.HTTPError as e:
+                attempts += 1
+                wait_time = 2 ** attempts
+                print(f"An HTTP error occurred: {e}")
+                print(f"Retrying in {wait_time} seconds...")
+                time.sleep(wait_time)
+
+        if not success:
+            print("Failed after 5 attempts, skipping this batch.")
+        return starting + gap  # Returns the next starting point
+
+if __name__ == "__main__":
+    if len(sys.argv) > 3:
+        word_query = str(sys.argv[1])
+        word_end_point = int(sys.argv[2])
+        gene_end_point = int(sys.argv[3])
+        paths = str(sys.argv[4]) + '/'
+    elif len(sys.argv) == 3:
+        word_query = str(sys.argv[1])
+        paths = str(sys.argv[2]) + '/'
+
+    data_dir = os.path.abspath(os.getcwd())
+    output_dir = os.path.join(data_dir, paths + 'baseline_doc')
+    document_output_dir = os.path.join(data_dir, paths + 'gene2document')
+    os.makedirs(output_dir, exist_ok=True)
+    os.makedirs(document_output_dir, exist_ok=True)
+
+    email = "lrmercadod@gmail.com"  # Replace with your valid email address
+    api_key = "19bea34a4dbdbc6ef30392cee15943365309"
+    ld = literature_data_collection(email, output_dir, document_output_dir, api_key=api_key)
+
+    gap = 50000  # Adjust as needed
+    batch = 10000  # Adjust as needed
+    w2d_starting_point = 0  # Adjust if resuming from a different point
+
+    try:
+        search_results, word_end_point = ld.word_based_query_fit(year=None, user_term=word_query)
+        print('The number of available abstracts:', word_end_point, 'for', word_query)
+
+        if int(sys.argv[2]) == 0:
+            word_end_point = word_end_point
+
+        ld.collecting_doc_using_word_based_query(year=None, user_term=word_query, gap=gap, starting=gap*w2d_starting_point, ixs=w2d_starting_point, test_end_point=word_end_point)
+    except urllib.error.HTTPError as e:
+        print(f"An HTTP error occurred: {e}")
+        print("Retrying in 5 seconds...")
+        time.sleep(5)
+
+    # Assuming gene data is prepared and ready to be processed
+    try:
+        query_full = ld.text_open('data/gene_name_info/query_full_name.txt')  # Adjust path as necessary
+        query_symbol = ld.text_open('data/gene_name_info/query_symbol.txt')  # Adjust path as necessary
+        query_size = len(query_full)
+        ld.gene_based_query_fit(query_size, query_full, query_symbol)
+
+        g2d_starting_point = 0
+        batch_size = 10
+        gene_end_point = round(query_size / batch_size)
+        if len(sys.argv) > 2:
+            gene_end_point = int(sys.argv[3])
+        if int(sys.argv[3]) == 0:
+            gene_end_point = round(query_size / batch_size)
+
+        ld.collecting_doc_using_gene_based_query(year=None, batch_size=batch_size, starting=g2d_starting_point, query_len=query_size, end_point=gene_end_point)
+    except Exception as e:
+        print(f"Error during gene-based data collection: {e}")
diff --git a/code/step_1_data_collection_Luis_.py b/code/step_1_data_collection_Luis_.py
new file mode 100644
index 0000000..4e313c0
--- /dev/null
+++ b/code/step_1_data_collection_Luis_.py
@@ -0,0 +1,21 @@
+from Bio import Entrez
+import time
+def download_data(query, batch_size=1000, delay=1):
+    Entrez.email = "your.email@example.com"
+    handle = Entrez.esearch(db="pubmed", term=query, retmax=1000000)
+    record = Entrez.read(handle)
+    ids = record["IdList"]
+    total = len(ids)
+    print(f"Total number of records: {total}")
+    for i in range(0, total, batch_size):
+        print(f"Downloading records {i+1}-{min(i+batch_size, total)}")
+        ids_batch = ids[i:i+batch_size]
+        handle = Entrez.efetch(db="pubmed", id=",".join(ids_batch), rettype="medline", retmode="text")
+        data = handle.read()
+        # Do something with the data, e.g., save it to a file
+        with open("data.txt", "a", encoding='utf-8') as f:
+            f.write(data)
+        handle.close()
+        time.sleep(delay)
+        
+download_data("zinc")
\ No newline at end of file
diff --git a/environment/Dockerfile b/environment/Dockerfile
new file mode 100644
index 0000000..89eaa02
--- /dev/null
+++ b/environment/Dockerfile
@@ -0,0 +1,7 @@
+# hash:sha256:e465d6106ff500ccbaa462a26e7c4b6ff7aade26df19638552849815bd95e8dc
+FROM registry.codeocean.com/codeocean/tensorflow:1.4.0-python3.5.2-cuda8.0.61-cudnn6.0.21-ubuntu16.04
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+RUN pip3 install -U --no-cache-dir --upgrade-strategy=only-if-needed \
+    nltk==3.6.2
diff --git a/error_log.txt b/error_log.txt
new file mode 100644
index 0000000..d944cb9
--- /dev/null
+++ b/error_log.txt
@@ -0,0 +1,371 @@
+Error writing SNP ID 1401488244 for gene ID 100134391: [Errno 22] Invalid argument
+Error writing SNP ID 1321762266 for gene ID 100134391: [Errno 22] Invalid argument
+Error writing SNP ID 1242721601 for gene ID 100134391: [Errno 22] Invalid argument
+Error writing SNP ID 1162358699 for gene ID 100134391: [Errno 22] Invalid argument
+Error writing SNP ID 989444980 for gene ID 100134391: [Errno 22] Invalid argument
+Error writing SNP ID 908631462 for gene ID 100134391: [Errno 22] Invalid argument
+Error writing SNP ID 555494967 for gene ID 100134391: [Errno 22] Invalid argument
+Error writing SNP ID 67190938 for gene ID 100134391: [Errno 22] Invalid argument
+Error writing SNP ID 1454934271 for gene ID 100133920: [Errno 22] Invalid argument
+Error writing SNP ID 1378719306 for gene ID 100133920: [Errno 22] Invalid argument
+Error writing SNP ID 1304374560 for gene ID 100133920: [Errno 22] Invalid argument
+Error writing SNP ID 1228792142 for gene ID 100133920: [Errno 22] Invalid argument
+Error writing SNP ID 3055566 for gene ID 100133920: [Errno 22] Invalid argument
+Error writing SNP ID 1377065813 for gene ID 100133331: [Errno 22] Invalid argument
+Error writing SNP ID 1203997153 for gene ID 100133331: [Errno 22] Invalid argument
+Error writing SNP ID 1475953306 for gene ID 100133077: [Errno 22] Invalid argument
+Error writing SNP ID 1422174919 for gene ID 100133077: [Errno 22] Invalid argument
+Error writing SNP ID 1367196304 for gene ID 100133077: [Errno 22] Invalid argument
+Error writing SNP ID 1312430718 for gene ID 100133077: [Errno 22] Invalid argument
+Error writing SNP ID 1254929479 for gene ID 100133077: [Errno 22] Invalid argument
+Error writing SNP ID 1198189379 for gene ID 100133077: [Errno 22] Invalid argument
+Error writing SNP ID 1035939999 for gene ID 100133077: [Errno 22] Invalid argument
+Error writing SNP ID 943090697 for gene ID 100133077: [Errno 22] Invalid argument
+Error writing SNP ID 761888488 for gene ID 100133077: [Errno 22] Invalid argument
+Error writing SNP ID 537149284 for gene ID 100133077: [Errno 22] Invalid argument
+Error writing SNP ID 6559249 for gene ID 100133077: [Errno 22] Invalid argument
+Error writing SNP ID 1459552283 for gene ID 100131472: [Errno 22] Invalid argument
+Error writing SNP ID 1215552070 for gene ID 100131472: [Errno 22] Invalid argument
+Error writing SNP ID 190355052 for gene ID 100131472: [Errno 22] Invalid argument
+Error writing SNP ID 1317791784 for gene ID 100131372: [Errno 22] Invalid argument
+Error writing SNP ID 1032440441 for gene ID 100131372: [Errno 22] Invalid argument
+Error writing SNP ID 774350025 for gene ID 100131372: [Errno 22] Invalid argument
+Error writing SNP ID 1403715705 for gene ID 100131289: [Errno 22] Invalid argument
+Error writing SNP ID 13192480 for gene ID 100131289: [Errno 22] Invalid argument
+Error writing SNP ID 1411086768 for gene ID 100131257: [Errno 22] Invalid argument
+Error writing SNP ID 1331412871 for gene ID 100131257: [Errno 22] Invalid argument
+Error writing SNP ID 1250795715 for gene ID 100131257: [Errno 22] Invalid argument
+Error writing SNP ID 1166693659 for gene ID 100131257: [Errno 22] Invalid argument
+Error writing SNP ID 985746876 for gene ID 100131257: [Errno 22] Invalid argument
+Error writing SNP ID 897042239 for gene ID 100131257: [Errno 22] Invalid argument
+Error writing SNP ID 549557861 for gene ID 100131257: [Errno 22] Invalid argument
+Error writing SNP ID 6463645 for gene ID 100131257: [Errno 22] Invalid argument
+Error writing SNP ID 1162482167 for gene ID 100131096: [Errno 22] Invalid argument
+Error writing SNP ID 569206848 for gene ID 100131096: [Errno 22] Invalid argument
+Error writing SNP ID 1440931989 for gene ID 100130964: [Errno 22] Invalid argument
+Error writing SNP ID 1331485874 for gene ID 100130964: [Errno 22] Invalid argument
+Error writing SNP ID 1218107497 for gene ID 100130964: [Errno 22] Invalid argument
+Error writing SNP ID 1015952163 for gene ID 100130964: [Errno 22] Invalid argument
+Error writing SNP ID 905187583 for gene ID 100130964: [Errno 22] Invalid argument
+Error writing SNP ID 187544859 for gene ID 100130964: [Errno 22] Invalid argument
+Error writing SNP ID 1403512839 for gene ID 100130876: [Errno 22] Invalid argument
+Error writing SNP ID 1160588349 for gene ID 100130876: [Errno 22] Invalid argument
+Error writing SNP ID 58269148 for gene ID 100130876: [Errno 22] Invalid argument
+Error writing SNP ID 1224558074 for gene ID 100130744: [Errno 22] Invalid argument
+Error writing SNP ID 761414982 for gene ID 100130744: [Errno 22] Invalid argument
+Error writing SNP ID 1434312909 for gene ID 100130698: [Errno 22] Invalid argument
+Error writing SNP ID 1266168830 for gene ID 100130698: [Errno 22] Invalid argument
+Error writing SNP ID 950291325 for gene ID 100130698: [Errno 22] Invalid argument
+Error writing SNP ID 1444085150 for gene ID 100130673: [Errno 22] Invalid argument
+Error writing SNP ID 535193915 for gene ID 100130673: [Errno 22] Invalid argument
+Error writing SNP ID 1395244138 for gene ID 100130587: [Errno 22] Invalid argument
+Error writing SNP ID 1276465865 for gene ID 100130587: [Errno 22] Invalid argument
+Error writing SNP ID 1054734281 for gene ID 100130587: [Errno 22] Invalid argument
+Error writing SNP ID 949279994 for gene ID 100130587: [Errno 22] Invalid argument
+Error writing SNP ID 567692067 for gene ID 100130587: [Errno 22] Invalid argument
+Error writing SNP ID 1452446245 for gene ID 100130502: [Errno 22] Invalid argument
+Error writing SNP ID 948845085 for gene ID 100130502: [Errno 22] Invalid argument
+Error writing SNP ID 1427296645 for gene ID 100130452: [Errno 22] Invalid argument
+Error writing SNP ID 1292396931 for gene ID 100130452: [Errno 22] Invalid argument
+Error writing SNP ID 1054257971 for gene ID 100130452: [Errno 22] Invalid argument
+Error writing SNP ID 924124821 for gene ID 100130452: [Errno 22] Invalid argument
+Error writing SNP ID 386654020 for gene ID 100130452: [Errno 22] Invalid argument
+Error writing SNP ID 1473991555 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1444752669 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1415506922 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1387168159 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1359171059 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1331689879 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1303506748 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1275744506 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1247356556 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1217490966 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1189421478 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1159376449 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1031959707 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1004368436 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 973227273 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 943672268 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 914646414 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 868593534 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 761427066 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 568918206 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 540705553 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 191753980 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 113244480 for gene ID 100130331: [Errno 22] Invalid argument
+Error writing SNP ID 1469723439 for gene ID 100130283: [Errno 22] Invalid argument
+Error writing SNP ID 1383298658 for gene ID 100130283: [Errno 22] Invalid argument
+Error writing SNP ID 1301222265 for gene ID 100130283: [Errno 22] Invalid argument
+Error writing SNP ID 1219936237 for gene ID 100130283: [Errno 22] Invalid argument
+Error writing SNP ID 1039151310 for gene ID 100130283: [Errno 22] Invalid argument
+Error writing SNP ID 958735062 for gene ID 100130283: [Errno 22] Invalid argument
+Error writing SNP ID 776464949 for gene ID 100130283: [Errno 22] Invalid argument
+Error writing SNP ID 547097852 for gene ID 100130283: [Errno 22] Invalid argument
+Error writing SNP ID 11862298 for gene ID 100130283: [Errno 22] Invalid argument
+Error writing SNP ID 1467965535 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1440863038 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1411124420 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1384161481 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1356135859 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1328668354 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1300706364 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1274407944 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1244911549 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1216192390 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1188007456 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1160028300 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1034054128 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 1007664231 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 979232284 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 949923982 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 920153574 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 890526906 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 756917908 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 559118170 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 529478619 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 180920765 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 6724403 for gene ID 100130256: [Errno 22] Invalid argument
+Error writing SNP ID 953022923 for gene ID 100130083: [Errno 22] Invalid argument
+Error writing SNP ID 1467821831 for gene ID 100129931: [Errno 22] Invalid argument
+Error writing SNP ID 1367706192 for gene ID 100129931: [Errno 22] Invalid argument
+Error writing SNP ID 1269846634 for gene ID 100129931: [Errno 22] Invalid argument
+Error writing SNP ID 1170898671 for gene ID 100129931: [Errno 22] Invalid argument
+Error writing SNP ID 979696164 for gene ID 100129931: [Errno 22] Invalid argument
+Error writing SNP ID 867916940 for gene ID 100129931: [Errno 22] Invalid argument
+Error writing SNP ID 547973127 for gene ID 100129931: [Errno 22] Invalid argument
+Error writing SNP ID 1468230327 for gene ID 100129697: [Errno 22] Invalid argument
+Error writing SNP ID 1360086656 for gene ID 100129697: [Errno 22] Invalid argument
+Error writing SNP ID 1250191056 for gene ID 100129697: [Errno 22] Invalid argument
+Error writing SNP ID 1039629136 for gene ID 100129697: [Errno 22] Invalid argument
+Error writing SNP ID 937306662 for gene ID 100129697: [Errno 22] Invalid argument
+Error writing SNP ID 555779068 for gene ID 100129697: [Errno 22] Invalid argument
+Error writing SNP ID 1415899090 for gene ID 100129503: [Errno 22] Invalid argument
+Error writing SNP ID 943081523 for gene ID 100129503: [Errno 22] Invalid argument
+Error writing SNP ID 1295225460 for gene ID 100129476: [Errno 22] Invalid argument
+Error writing SNP ID 191836561 for gene ID 100129476: [Errno 22] Invalid argument
+Error writing SNP ID 943272702 for gene ID 100129473: [Errno 22] Invalid argument
+Error writing SNP ID 1405799168 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 1246006261 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 1005913565 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 756408635 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 1451479264 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 1219896618 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 902211470 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 1455535940 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 1236509623 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 911093945 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 1397498615 for gene ID 100128818: [Errno 22] Invalid argument
+Error writing SNP ID 903527749 for gene ID 100128818: [Errno 22] Invalid argument
+Error writing SNP ID 1386379492 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1208191345 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 932246457 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 201207482 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1332246126 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 974348888 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 112799661 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 941742804 for gene ID 100128573: [Errno 22] Invalid argument
+Error writing SNP ID 1381978137 for gene ID 100128494: [Errno 22] Invalid argument
+Error writing SNP ID 1166093305 for gene ID 100128494: [Errno 22] Invalid argument
+Error writing SNP ID 762463840 for gene ID 100128494: [Errno 22] Invalid argument
+Error writing SNP ID 75048709 for gene ID 100128494: [Errno 22] Invalid argument
+Error writing SNP ID 1300239219 for gene ID 100128364: [Errno 22] Invalid argument
+Error writing SNP ID 879683159 for gene ID 100128364: [Errno 22] Invalid argument
+Error writing SNP ID 1467425305 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1414148569 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1357698689 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1302973516 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1245679701 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1193041929 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1019490115 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 899338912 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 535310734 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 4958990 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1377015885 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 1266055812 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 1053294258 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 782582963 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 11783919 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 1215258193 for gene ID 100128276: [Errno 22] Invalid argument
+Error writing SNP ID 532895802 for gene ID 100128276: [Errno 22] Invalid argument
+Error writing SNP ID 1289133122 for gene ID 100128242: [Errno 22] Invalid argument
+Error writing SNP ID 949052526 for gene ID 100128242: [Errno 22] Invalid argument
+Error writing SNP ID 1451230755 for gene ID 100128002: [Errno 22] Invalid argument
+Error writing SNP ID 1041973346 for gene ID 100128002: [Errno 22] Invalid argument
+Error writing SNP ID 1472508296 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1425307614 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1374924715 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1328108783 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1274324822 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1225391909 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1174650019 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1030991467 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 985935809 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 939018273 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 891318435 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 570684633 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 368760172 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 9482609 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1354561271 for gene ID 100126447: [Errno 22] Invalid argument
+Error writing SNP ID 1183159695 for gene ID 100126447: [Errno 22] Invalid argument
+Error writing SNP ID 777074097 for gene ID 100126447: [Errno 22] Invalid argument
+Error writing SNP ID 1168689953 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 937573888 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 368414153 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 1340576577 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 1013579192 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 552165477 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 1348791799 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 1025875099 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 529685715 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 1178194412 for gene ID 100128818: [Errno 22] Invalid argument
+Error writing SNP ID 1476717529 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1298193237 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1021561998 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 760988656 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1460770472 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 1214815613 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 759553552 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 1276633074 for gene ID 100128573: [Errno 22] Invalid argument
+Error writing SNP ID 1168689953 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 937573888 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 368414153 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 1340576577 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 1013579192 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 552165477 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 1348791799 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 1025875099 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 529685715 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 1178194412 for gene ID 100128818: [Errno 22] Invalid argument
+Error writing SNP ID 1476717529 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1298193237 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1021561998 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 760988656 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1460770472 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 1214815613 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 759553552 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 1276633074 for gene ID 100128573: [Errno 22] Invalid argument
+Error writing SNP ID 1168689953 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 937573888 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 368414153 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 1340576577 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 1013579192 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 552165477 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 1348791799 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 1025875099 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 529685715 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 1178194412 for gene ID 100128818: [Errno 22] Invalid argument
+Error writing SNP ID 1476717529 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1298193237 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1021561998 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 760988656 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1428970358 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 1179412392 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 573057482 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 1212664435 for gene ID 100128573: [Errno 22] Invalid argument
+Error writing SNP ID 1465316026 for gene ID 100128494: [Errno 22] Invalid argument
+Error writing SNP ID 1250211435 for gene ID 100128494: [Errno 22] Invalid argument
+Error writing SNP ID 778620142 for gene ID 100128494: [Errno 22] Invalid argument
+Error writing SNP ID 376861788 for gene ID 100128494: [Errno 22] Invalid argument
+Error writing SNP ID 1375702769 for gene ID 100128364: [Errno 22] Invalid argument
+Error writing SNP ID 1170374501 for gene ID 100128364: [Errno 22] Invalid argument
+Error writing SNP ID 1486847565 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1434780581 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1380171432 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1321965948 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1268442373 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1211745332 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1158450146 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 945227899 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 562613918 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 113812210 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1419983823 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 1306603154 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 1195941299 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 940958709 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 531284336 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 1327600424 for gene ID 100128276: [Errno 22] Invalid argument
+Error writing SNP ID 914601310 for gene ID 100128276: [Errno 22] Invalid argument
+Error writing SNP ID 1388102325 for gene ID 100128242: [Errno 22] Invalid argument
+Error writing SNP ID 1046670781 for gene ID 100128242: [Errno 22] Invalid argument
+Error writing SNP ID 187067585 for gene ID 100128242: [Errno 22] Invalid argument
+Error writing SNP ID 1271519673 for gene ID 100128002: [Errno 22] Invalid argument
+Error writing SNP ID 1490479661 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1442506271 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1393092454 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1346134640 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1292731845 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1243645858 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1192693850 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1046293712 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1004119480 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 956269867 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 909766099 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 757690502 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 539300770 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 114417332 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1414816038 for gene ID 100126447: [Errno 22] Invalid argument
+Error writing SNP ID 1250136823 for gene ID 100126447: [Errno 22] Invalid argument
+Error writing SNP ID 958271220 for gene ID 100126447: [Errno 22] Invalid argument
+Error writing SNP ID 1168689953 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 937573888 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 368414153 for gene ID 100129316: [Errno 22] Invalid argument
+Error writing SNP ID 1340576577 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 1013579192 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 552165477 for gene ID 100129215: [Errno 22] Invalid argument
+Error writing SNP ID 1348791799 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 1025875099 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 529685715 for gene ID 100129098: [Errno 22] Invalid argument
+Error writing SNP ID 1178194412 for gene ID 100128818: [Errno 22] Invalid argument
+Error writing SNP ID 1476717529 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1298193237 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1021561998 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 760988656 for gene ID 100128770: [Errno 22] Invalid argument
+Error writing SNP ID 1428970358 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 1179412392 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 573057482 for gene ID 100128593: [Errno 22] Invalid argument
+Error writing SNP ID 1212664435 for gene ID 100128573: [Errno 22] Invalid argument
+Error writing SNP ID 1465316026 for gene ID 100128494: [Errno 22] Invalid argument
+Error writing SNP ID 1250211435 for gene ID 100128494: [Errno 22] Invalid argument
+Error writing SNP ID 778620142 for gene ID 100128494: [Errno 22] Invalid argument
+Error writing SNP ID 376861788 for gene ID 100128494: [Errno 22] Invalid argument
+Error writing SNP ID 1375702769 for gene ID 100128364: [Errno 22] Invalid argument
+Error writing SNP ID 1170374501 for gene ID 100128364: [Errno 22] Invalid argument
+Error writing SNP ID 1486847565 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1434780581 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1380171432 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1321965948 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1268442373 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1211745332 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1158450146 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 945227899 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 562613918 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 113812210 for gene ID 100128340: [Errno 22] Invalid argument
+Error writing SNP ID 1419983823 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 1306603154 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 1195941299 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 940958709 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 531284336 for gene ID 100128338: [Errno 22] Invalid argument
+Error writing SNP ID 1327600424 for gene ID 100128276: [Errno 22] Invalid argument
+Error writing SNP ID 914601310 for gene ID 100128276: [Errno 22] Invalid argument
+Error writing SNP ID 1388102325 for gene ID 100128242: [Errno 22] Invalid argument
+Error writing SNP ID 1046670781 for gene ID 100128242: [Errno 22] Invalid argument
+Error writing SNP ID 187067585 for gene ID 100128242: [Errno 22] Invalid argument
+Error writing SNP ID 1022038 for gene ID 100128059: [Errno 22] Invalid argument
+Error writing SNP ID 1207012948 for gene ID 100128002: [Errno 22] Invalid argument
+Error writing SNP ID 1480734208 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1433052188 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1383813550 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1336828598 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1282866197 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1234863863 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1183687700 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1037657981 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 995080433 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 947092637 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 900183125 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 746594399 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 529397192 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 73771235 for gene ID 100126584: [Errno 22] Invalid argument
+Error writing SNP ID 1379383050 for gene ID 100126447: [Errno 22] Invalid argument
+Error writing SNP ID 1211903818 for gene ID 100126447: [Errno 22] Invalid argument
+Error writing SNP ID 913589155 for gene ID 100126447: [Errno 22] Invalid argument
diff --git a/metadata/metadata.yml b/metadata/metadata.yml
new file mode 100644
index 0000000..3ffef0f
--- /dev/null
+++ b/metadata/metadata.yml
@@ -0,0 +1,31 @@
+metadata_version: 1
+name: Copy of A Literature Embedding Model for Cardiovascular Disease Prediction using
+  Risk Factors, Symptoms, and Genotype Information
+description: We have developed a literature embedding model to identify significant
+  cardiovascular disease (CVD) risk factors and associated information. Our model
+  that trained using literature data and retrieve CVD risk factors and significant
+  information related to a given query. Our model can be used with CVD prediction
+  on cohort data as feature selection (FS) and dimensionality reduction (DR) tasks.
+  This capsule provides all procedures for literature data collection/pre-processing,
+  literature model training process, CVD risk factor identifications, and FS and DR
+  applications for CVD prediction on cohort data.
+tags:
+- Information Retrieval
+- Knowledge Representation
+- Machine Intelligence
+- cardiovascular-risk
+- Machine Learning
+- Natural Language Processing
+authors:
+- name: Jihye Moon
+  affiliations:
+  - name: University of Connecticut
+- name: Hugo F. Posada-Quintero
+  affiliations:
+  - name: University of Connecticut
+- name: Ki H. Chon
+  affiliations:
+  - name: University of Connecticut
+corresponding_contributor:
+  name: Ki H. Chon
+  email: ki.chon@uconn.edu